hive的UDF 函数(User-Defined-Function)
关系运算等值比较: =
语法: A = B
hive> select 1 from dual where 1=1; 1
不等值比较: <>
语法: A <> B
hive> select 1 from dual where 1 <> 2; 1
小于比较: <
语法: A < B
hive> select 1 from dual where 1 < 2; 1
小于等于比较: <=
语法: A <= B
hive> select 1 from dual where 1 <= 1; 1
大于比较: >
语法: A > B
hive> select 1 from dual where 2 > 1; 1
大于等于比较: >=
语法: A >= B
hive> select 1 from dual where 1 >= 1; 1
String的比较要注意(经常使用的时间比较可以先to_date以后再比较)
hive> select * from udftest;
OK 2022222209 00:00:00 2022222209 hive> select a,b,ab,a=b from udftest; 2022222209 00:00:00 2022222209 false true false
空值判断: IS NULL
语法: A IS NULL
hive> select 1 from dual where null is null; 1
非空判断: IS NOT NULL
语法: A IS NOT NULL
hive> select 1 from dual where 1 is not null; 1
LIKE比较: LIKE
语法: A LIKE B
hive> select 1 from dual where 'football' like 'foot%'; 1 hive> select 1 from dual where 'football' like 'foot____'; 1
JAVA的LIKE操作: RLIKE
语法: A RLIKE B
hive> select 1 from dual where 'footbar' rlike '^f.*r$'; 1
REGEXP操作: REGEXP
语法: A REGEXP B
hive> select 1 from dual where 'footbar' REGEXP '^f.*r$'; 1
数学运算加法操作: +
语法: A + B
hive> select 1 + 9 from dual; 10 hive> create table udftest as select 1 + 1.2 from dual;
hive> describe udftest;
_c0 double
减法操作: -
语法: A – B
hive> select 10 – 5 from dual; 5 hive> create table udftest as select 5.6 – 4 from dual;
hive> describe udftest;
_c0 double
乘法操作: *
语法: A * B
hive> select 40 * 5 from dual; 200
除法操作: /
语法: A / B
hive> select 40 / 5 from dual; 8.0
取余操作: %
语法: A % B
hive> select 41 % 5 from dual; 1 hive> select 8.4 % 4 from dual; 0.40000000000000036
注:精度在hive中是个很大的问题,类似这样的操作最好通过round指定精度
hive> select round(8.4 % 4,2) from dual; 0.4
位与操作: &
语法: A & B
hive> select 4 & 8 from dual; 0 hive> select 6 & 4 from dual; 4
位或操作: |
语法: A | B
hive> select 4 | 8 from dual; 12 hive> select 6 | 8 from dual; 14
位异或操作: ^
语法: A ^ B
hive> select 4 ^ 8 from dual; 12 hive> select 6 ^ 4 from dual; 2
位取反操作: ~
语法: ~A
hive> select ~6 from dual;
-7 hive> select ~4 from dual;
-5
逻辑运算逻辑与操作: AND
语法: A AND B
hive> select 1 from dual where 1=1 and 2=2; 1
逻辑或操作: OR
语法: A OR B
hive> select 1 from dual where 1=2 or 2=2; 1
逻辑非操作: NOT
语法: NOT A
hive> select 1 from dual where not 1=2; 1
复合类型构建操作Map类型构建: map
语法: map (key1,value1,key2,value2,…)
hive> Create table udftest as select map('100','tom','200','mary') as t from dual;
hive> describe udftest;
t map<string,string>
hive> select t from udftest;
{"100″:"tom","200″:"mary"}
Struct类型构建: struct
语法: struct(val1,val2,val3,…)
hive> create table udftest as select struct('tom','mary','tim') as t from dual;
hive> describe udftest;
t struct
array类型构建: array
语法: array(val1,…)
hive> create table udftest as select array("tom","mary","tim") as t from dual;
hive> describe udftest;
t array<string> hive> select t from udftest;
["tom","tim"]
复杂类型访问操作map类型访问: M[key]
语法: M[key]
hive> Create table udftest as select map('100','mary') as t from dual;
hive> select t['200'],t['100'] from udftest;
mary tom
struct类型访问: S.x
语法: S.x
hive> create table udftest as select struct('tom',col3:string>
hive> select t.col1,t.col3 from udftest;
tom tim
array类型访问: A[n]
语法: A[n]
hive> create table udftest as select array("tom","tim") as t from dual;
hive> select t[0],t[1],t[2] from udftest;
tom mary tim
数值计算取整函数: round
语法: round(double a)
hive> select round(3.1415926) from dual; 3 hive> select round(3.5) from dual; 4 hive> create table udftest as select round(9542.158) from dual;
hive> describe udftest;
_c0 bigint
指定精度取整函数: round
语法: round(double a,int d)
hive> select round(3.1415926,4) from dual; 3.1416
向下取整函数: floor
语法: floor(double a)
hive> select floor(3.1415926) from dual; 3 hive> select floor(25) from dual; 25
向上取整函数: ceil
语法: ceil(double a)
hive> select ceil(3.1415926) from dual; 4 hive> select ceil(46) from dual; 46
向上取整函数: ceiling
语法: ceiling(double a)
hive> select ceiling(3.1415926) from dual; 4 hive> select ceiling(46) from dual; 46
取随机数函数: rand
语法: rand(),rand(int seed)
hive> select rand() from dual; 0.5577432776034763 hive> select rand() from dual; 0.6638336467363424 hive> select rand(100) from dual; 0.7220096548596434 hive> select rand(100) from dual; 0.7220096548596434
自然指数函数: exp
语法: exp(double a)
hive> select exp(2) from dual; 7.38905609893065
自然对数函数: ln
语法: ln(double a)
hive> select ln(7.38905609893065) from dual; 2.0
以10为底对数函数: log10
语法: log10(double a)
hive> select log10(100) from dual; 2.0
以2为底对数函数: log2
语法: log2(double a)
hive> select log2(8) from dual; 3.0
对数函数: log
语法: log(double base,double a) 幂运算函数: pow
语法: pow(double a,double p)
hive> select pow(2,4) from dual; 16.0
幂运算函数: power
语法: power(double a,double p)
hive> select power(2,4) from dual; 16.0
开平方函数: sqrt
语法: sqrt(double a)
hive> select sqrt(16) from dual; 4.0
2进制函数: bin
语法: bin(BIGINT a)
hive> select bin(7) from dual; 111
106进制函数: hex
语法: hex(BIGINT a)
hive> select hex(17) from dual; 11 hive> select hex('abc') from dual; 616263
反转106进制函数: unhex
语法: unhex(string a)
hive> select unhex('616263') from dual;
abc
hive> select unhex('11') from dual;
-
hive> select unhex(616263) from dual;
abc
进制转换函数: conv
语法: conv(BIGINT num,int from_base,int to_base)
hive> select conv(17,10,16) from dual; 11 hive> select conv(17,2) from dual; 10001
绝对值函数: abs
语法: abs(double a) abs(int a)
hive> select abs(-3.9) from dual; 3.9 hive> select abs(10.9) from dual; 10.9
正取余函数: pmod
语法: pmod(int a,int b),pmod(double a,double b)
hive> select pmod(9,4) from dual; 1 hive> select pmod(-9,4) from dual; 3
正弦函数: sin
语法: sin(double a)
hive> select sin(0.8) from dual; 0.7173560908995228
反正弦函数: asin
语法: asin(double a)
hive> select asin(0.7173560908995228) from dual; 0.8
余弦函数: cos
语法: cos(double a)
hive> select cos(0.9) from dual; 0.6216099682706644
反余弦函数: acos
语法: acos(double a)
hive> select acos(0.6216099682706644) from dual; 0.9
positive函数: positive
语法: positive(int a),positive(double a)
hive> select positive(-10) from dual;
-10 hive> select positive(12) from dual; 12
negative函数: negative
语法: negative(int a),negative(double a)
hive> select negative(-5) from dual; 5 hive> select negative(8) from dual;
-8
复杂类型长度统计函数Map类型长度函数: size(Map
hive> select size(map('100','101','mary')) from dual; 2
array类型长度函数: size(Array)
语法: size(Array)
hive> select size(array('100','101,'102','103')) from dual;
4
类型转换函数类型转换函数: cast
语法: cast(expr as )
hive> select cast(1 as bigint) from dual; 1
日期函数UNIX时间戳转日期函数: from_unixtime
语法: from_unixtime(bigint unixtime[,string format])
hive> select from_unixtime(1323308943,'yyyyMMdd') from dual; 20111208
获得当前UNIX时间戳函数: unix_timestamp
语法: unix_timestamp()
hive> select unix_timestamp() from dual; 1323309615
日期转UNIX时间戳函数: unix_timestamp
语法: unix_timestamp(string date)
hive> select unix_timestamp('2011⑴2-07 13:01:03') from dual; 1323234063
指定格式日期转UNIX时间戳函数: unix_timestamp
语法: unix_timestamp(string date,string pattern)
hive> select unix_timestamp('20111207 13:01:03','yyyyMMdd HH:mm:ss') from dual; 1323234063
日期时间转日期函数: to_date
语法: to_date(string timestamp)
hive> select to_date('2011⑴2-08 10:03:01') from dual; 2011-12-08
日期转年函数: year
语法: year(string date)
hive> select year('2011⑴2-08 10:03:01') from dual; 2011 hive> select year('2012⑴2-08') from dual; 2012
日期转月函数: month
语法: month (string date)
hive> select month('2011⑴2-08 10:03:01') from dual; 12 hive> select month('2011-08-08') from dual; 8
日期转天函数: day
语法: day (string date)
hive> select day('2011⑴2-08 10:03:01') from dual; 8 hive> select day('2011⑴2⑵4') from dual; 24
日期转小时函数: hour
语法: hour (string date)
hive> select hour('2011⑴2-08 10:03:01') from dual; 10
日期转分钟函数: minute
语法: minute (string date)
hive> select minute('2011⑴2-08 10:03:01') from dual; 3
日期转秒函数: second
语法: second (string date)
hive> select second('2011⑴2-08 10:03:01') from dual; 1
日期转周函数: weekofyear
语法: weekofyear (string date)
hive> select weekofyear('2011⑴2-08 10:03:01') from dual; 49
日期比较函数: datediff
语法: datediff(string enddate,string startdate)
hive> select datediff('2012⑴2-08','2012-05-09') from dual; 213
日期增加函数: date_add
语法: date_add(string startdate,int days)
hive> select date_add('2012⑴2-08',10) from dual; 2012-12-18
日期减少函数: date_sub
语法: date_sub (string startdate,int days)
hive> select date_sub('2012⑴2-08',10) from dual; 2012-11-2
条件函数If函数: if
语法: if(boolean testCondition,T valueTrue,T valueFalSEOrNull)
hive> select if(1=2,100,200) from dual; 200 hive> select if(1=1,200) from dual; 100
非空查找函数: COALESCE
语法: COALESCE(T v1,T v2,…)
hive> select COALESCE(null,'100','50') from dual; 100
条件判断函数1:CASE
语法: CASE a WHEN b THEN c [WHEN d THEN e]* [ELSE f] END
hive> Select case 100 when 50 then 'tom' when 100 then 'mary' else 'tim' end from dual;
mary
hive> Select case 200 when 50 then 'tom' when 100 then 'mary' else 'tim' end from dual;
tim
条件判断函数2:CASE
语法: CASE WHEN a THEN b [WHEN c THEN d]* [ELSE e] END
hive> select case when 1=2 then 'tom' when 2=2 then 'mary' else 'tim' end from dual;
mary
hive> select case when 1=1 then 'tom' when 2=2 then 'mary' else 'tim' end from dual;
tom
字符串函数字符串长度函数:length
语法: length(string A)
hive> select length('abcedfg') from dual; 7
字符串反转函数:reverse
语法: reverse(string A)
hive> select reverse('abcedfg') from dual;
gfdecba
字符串联接函数:concat
语法: concat(string A,string B…)
hive> select concat('abc','def','gh') from dual;
abcdefgh
带分隔符字符串联接函数:concat_ws
语法: concat_ws(string SEP,string A,string B…)
hive> select concat_ws(',','abc','gh') from dual;
abc,def,gh
字符串截取函数1:substr,substring
语法: substr(string A,int start),substring(string A,int start)
hive> select substr('abcde',3) from dual;
cde
hive> select substring('abcde',3) from dual;
cde
hive> select substr('abcde',-1) from dual; (和ORACLE相同)
e
字符串截取函数2:substr,int start,int len),int len) |