使用sqoop1,将sqlserver数据导入hive
发布时间:2020-12-12 13:46:57 所属栏目:MsSql教程 来源:网络整理
导读:#!/bin/sh #数据库连接 sqlConnect="jdbc:sqlserver://172.16.177.45:1433; username=sa; password=123456; database=Test;" checkColumn="RecoID" condiColumn="[记录时间]" #导入数据的范围 endTime="2016-08-15" ? dstTabArray=( "000B_7F14_IDUState
#!/bin/sh
#数据库连接 sqlConnect="jdbc:sqlserver://172.16.177.45:1433; username=sa; password=123456; database=Test;" checkColumn="RecoID" condiColumn="[记录时间]" #导入数据的范围 endTime="2016-08-15" ? dstTabArray=( "000B_7F14_IDUStateInfo_RSimulateData" #"000B_7F14_IDUStateInfo_RSwitchData" #"000B_7F14_ODUStateInfo_DTUWRSimulateData" #"000B_7F14_ODUStateInfo_WRSimulateData" #"000B_7F14_ODUStateInfo_WRSwitchData" ) #hive数据库 hiveDbName=BIZ1 #hive数据库中的表名 hiveTableName=000B_data #hive临时表明 tempTabName=000B_dataTemp #临时表存放的目录 tempTabPath=/user/hive/biz1/extend/wp04/${tempTabName} #正式表文件存放的目录 hiveTablePath=/user/hive/biz1/extend/wp04/${hiveTableName} #hive表列的名称 hiveTableCols="RowKey string, RecoTime timestamp, ProjID bigint, DevID bigint, DevAddr int, FrameNO int, ReceiveTime timestamp, ModifyFlag string, TableName string, RecoID bigint, ProtocolVer_DB string, ModelID_DB string, RemoteOnOffFunc smallint, ForbidComp1 smallint, ForbidComp2 smallint, OnOffModeSet smallint, RunModeSet smallint, Chill_LWT_Set float, Comp2FreqProAlarm smallint" WRSI="select reverse(right('0000000000'+ltrim((select top 1 c.[ProjDev_ID] from Proj_Dev c where c.Proj_ID=b.Proj_ID and c.[机组条码]='FFFFFFFFFFFFFFF')),10))+replace(CONVERT(varchar(12),a.[记录时间],108),':','')+'7F14'+right('000000'+ltrim([Reco_ID]),6)as RowKey ,a.[记录时间] as RecoTime ,b.Proj_ID as ProjID ,(select top 1 c.[ProjDev_ID] from Proj_Dev c where c.Proj_ID=b.Proj_ID and c.[机组条码]='FFFFFFFFFFFFFFF') as DevID ,'224' as DevAddr ,'' as FrameNO ,'' as ReceiveTime ,'' as ModifyFlag ,'7F14' as ?TableName ,a.[Reco_ID] as RecoID ,a.[协议版本] as ProtocolVer_DB ,a.[机型ID] as ModelID_DB ,'' as Comp2FreqProAlarm" ,'' as Comp2FreqProAlarm" ,'' as Comp2FreqProAlarm" ,[压缩机二频繁保护报警] as Comp2FreqProAlarm" for dstTabName in ${dstTabArray[@]}; do ? whereStr=" from [dbo].[${dstTabName}] a left join [dbo].[Proj_Dev] b on a.ProjDev_ID=b.ProjDev_ID" ? sql="" ? case ${dstTabName} in ? ? "000B_7F14_IDUStateInfo_RSimulateData" ) ? ? ? sql=${RSI}${whereStr};; ? ? "000B_7F14_IDUStateInfo_RSwitchData" ) ? ? ? sql=${RSW}${whereStr};; ? ? "000B_7F14_ODUStateInfo_DTUWRSimulateData" ) ? ? ? sql=${DTU}${whereStr};; ? ? "000B_7F14_ODUStateInfo_WRSimulateData" ) ? ? ? sql=${WRSI}${whereStr};; ? ? "000B_7F14_ODUStateInfo_WRSwitchData" ) ? ? ? sql=${WRSW}${whereStr};; ? esac ? #导入数据到hive-D mapred.job.queue.name=production ? sqoop import -D mapred.job.queue.name=production --connect "${sqlConnect}" ? ? ? --query "select t.* from (${sql} where ${condiColumn}>='${endTime}') t WHERE $CONDITIONS" ? ? ? --split-by ${checkColumn} ? ? ? --fields-terminated-by 't' ? ? ? --lines-terminated-by 'n' ? ? ? --delete-target-dir ? ? ? --target-dir ${tempTabPath} ? ? ? -m 20 ? hive -e " ? use ${hiveDbName}; ? drop table if exists ${tempTabName}; ? create external table ${tempTabName} (${hiveTableCols}) ? row format delimited fields terminated by 't' ? location "${tempTabPath}"; ? use ${hiveDbName}; ? set mapreduce.job.queuename=production; ? set hive.execution.engine=mr; ? set hive.exec.dynamic.partition=true; ? set hive.exec.dynamic.partition.mode=nonstrict; ? set hive.exec.compress.output=true; ? set mapred.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec; ? set PARQUET_COMPRESSION_CODE=snappy; ? set mapred.max.split.size=10000000; ? insert into table ${hiveTableName} partition(year,month,day) ? select *,year(RecoTime),month(RecoTime),day(RecoTime) from ${tempTabName}; ? drop table ${tempTabName};" ? hadoop fs -rm -r ${tempTabPath} ? echo "load ${dstTabName} Data over" ? echo "" ? echo "" ? echo "------------------------------------------------------------------------------------------" done echo "load Data over" echo "hive完成----------------------"(编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |