sqoop1,将sqlserver导入hbase
发布时间:2020-12-12 13:46:56 所属栏目:MsSql教程 来源:网络整理
导读:#!/bin/sh #数据库连接 srcConnect="jdbc:sqlserver://10.2.44.181:1433;username=mc_testor;password=0000002am;database=Gree_MonitorCenter_New" #表名 dstTabName=wp04_0013_data checkColumn="RowKey" condiColumn="[记录时间]" startRecoid=2017-01-06
#!/bin/sh #数据库连接 srcConnect="jdbc:sqlserver://10.2.44.181:1433;username=mc_testor;password=0000002am;database=Gree_MonitorCenter_New" #表名 dstTabName=wp04_0013_data checkColumn="RowKey" condiColumn="[记录时间]" startRecoid=2017-01-06 plus=1 endRecodid=2017-01-11 echo "from ${startRecoid} to ${endRecodid}" > log.out #hbase表名 hbaseTableName=wp04_0013_data_ #hbase存放的目录 hbaseTablePath=/user/hbase/biz1/wp04_0013_data #hbase临时存放目录 hbasetempPath=/user/hive/biz1/${hbaseTableName}temp #要执行sql sql="select ?reverse(right('0000000000'+ltrim(a.[ProjDev_ID]),10))+ replace(CONVERT(varchar(12),a.[记录时间],108),':','')+'7F15'+right('000000'+ltrim([Reco_ID]),6)as RowKey?,CONVERT(varchar(19),121) +','+ cast (b.Proj_ID as varchar ) +','+ ?cast (a.[ProjDev_ID] as varchar ) +','+ '224' +','+ '' +','+ ''? +','+ '7F15'? +','+ cast (a.[Reco_ID] as varchar ) +','+'' as c1 from [dbo].[0013_7F15_IDUStateInfo_RSimulateData] a left join Proj_Dev b on a.ProjDev_ID=b.ProjDev_ID " #分批导入 while [[ $startRecoid != $endRecodid ]]; do ? ? ? ?inter=`date -d "1 day $startRecoid" +%Y-%m-%d` ? ?temp_endRecodid=`echo $endRecodid | awk '{print $1}'` ? ? ? ? ? ?end_time_s=`date -d $temp_endRecodid +%s` ? ?temp_inter=`echo $inter | awk '{print $1}'` temphbase_inter=`echo $inter | awk '{print $1}' | tr -d "-"` inter_time_s=`date -d $temp_inter +%s` if [ $inter_time_s -gt $end_time_s ] then inter=$endRecodid fi ? temp_startRecoid=`echo $startRecoid | awk '{print $1}' | tr -d "-"` ? ?starttime=${startRecoid}" 00:00:00:000" # by ljn add ? ? ? ? ? ? ? ?? ? ? ? ? ? ? endtime=${startRecoid}" 23:59:59:997" ?# by ljn add 2015-11-22 23:58:01.000 ? 23:59:59:997 hadoop fs -rm -r ${hbaseTablePath}/${hbaseTableName}${temp_startRecoid} #hadoop fs -mkdir ${hbaseTablePath} 1>/dev/null 2>&1 hadoop fs -rm -r ${TemphbasePath}${temp_startRecoid} 1>/dev/null 2>&1 echo "开始导入hbases数据:++++++++++++++++++++++++++++++++" sqoop import -D mapreduce.job.queuename=production --connect "${srcConnect}" ? ? ?--query "select t.* from (${sql} where ${condiColumn}>='${starttime}' and ${condiColumn}<='${endtime}' ) t WHERE $CONDITIONS" ?--split-by ${checkColumn} --fields-terminated-by "t" --lines-terminated-by "n" ?--target-dir ${TemphbasePath}${temp_startRecoid} ?-m 15 hadoop fs -test -e ${TemphbasePath}${temp_startRecoid} if [ $? -ne 0 ] then echo "+++++++++++++++++++++++文件不存在+++++++++++++++++++++++++" echo "+++++++++++++++++++++现在截断时间是: $startRecoid +++++++++++++++" break fi startRecoid=$inter partf1=`hadoop fs -du -s ${TemphbasePath}${temp_startRecoid} |awk '{print $1 }'` if [ "$partf1" == "0" ?] then hadoop fs -rm -r ${TemphbasePath}${temp_startRecoid} 1>/dev/null 2>&1 echo "---------------file is null partf1 :$partf1-----------------------" continue fi hbase org.apache.hadoop.hbase.mapreduce.ImportTsv '-Dimporttsv.separator= ' -Dimporttsv.columns='HBASE_ROW_KEY,f1:c1' -Dimporttsv.bulk.output=${hbaseTablePath}/${hbaseTableName}${temp_startRecoid} biz1:${hbaseTableName}${temp_startRecoid} ${TemphbasePath}${temp_startRecoid} echo "此时的查询时间值为:+++++++++++++++++++++++++++++${temp_startRecoid}" echo "此时的hbase存放目录值为:+++++++++++++++++++++++++++++${hbaseTablePath}/${hbaseTableName}${temp_startRecoid}" echo "此时的表名为:+++++++++++++++++++++++++++++${hbaseTableName}${temp_startRecoid}" echo "表临时文件目录为:+++++++++++++++++++++++++++++${TemphbasePath}${temp_startRecoid}" ? ? ? ? #ps: ? ? ? ? #f1为列簇名 row key必须指定 ? ? ? ? #-Dimporttsv.bulk.output 指定目录 ? ? ? ? #smy_test 要生成的表(表不会生成) ? ? ? ? #/user/hdfs/sqlserver hdfs存放数据的目录 ? ? ? ? hadoop dfs -chmod -R 777 ?${hbaseTablePath} ? ? ? ? #从hfile中load到hbase 表中(这步可以生成表) ? ? ? ? hbase org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles ${hbaseTablePath}/${hbaseTableName}${temp_startRecoid} biz1:${hbaseTableName}${temp_startRecoid} ? ? ? ?? ? ? ? ? hadoop fs -rm -r ${hbaseTablePath}/${hbaseTableName}${temp_startRecoid} ? ? ? ? hadoop fs -rm -r ${TemphbasePath}${temp_startRecoid} 1>/dev/null 2>&1 echo "-------------循环完成---------------------------" done echo "load Data all overn" >> log.out
(编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |