大佬教程收集整理的这篇文章主要介绍了hive第四天,大佬教程大佬觉得挺不错的,现在分享给大家,也给大家做个参考。
// 等值比较 = == <=>
// 不等值比较 != <>
// 区间比较: SELEct * from default.students where id between 1500100001 and 1500100010;
// 空值/非空值判断:is null、is not null、nvl()、isnull()
// like、rlike、regexp用法
取整函数(四舍五入):round
向上取整:ceil
向下取整:floor
SELEct if(1>0,1,0);
SELEct if(1>0,if(-1>0,-1,1),0);
SELEct COALESCE(null,'1','2'); // 1 从左往右 一次匹配 直到非空为止
SELEct COALESCE('1',null,'2'); // 1
SELEct score
,case when score>120 then '优秀'
when score>100 then '良好'
when score>90 then '及格'
else '不及格'
end as pingfen
from default.score limit 20;
SELEct name
,case name when "施笑槐" then "槐ge"
when "吕金鹏" then "鹏ge"
when "单乐蕊" then "蕊jie"
else "算了不叫了"
end as nickname
from default.students limit 10;
注意条件的顺序
SELEct from_unixtime(1610611142,'YYYY/MM/dd HH:mm:ss');
SELEct from_unixtime(unix_timestamp(),'YYYY/MM/dd HH:mm:ss');
// '2021年01月14日' -> '2021-01-14'
SELEct from_unixtime(unix_timestamp('2021年01月14日','yyyy年MM月dd日'),'yyyy-MM-dd');
// "04牛2021数加16逼" -> "2021/04/16"
SELEct from_unixtime(unix_timestamp("04牛2021数加16逼","MM牛yyyy数加dd逼"),"yyyy/MM/dd");
concat('123','456'); // 123456
concat('123','456',null); // NULL
SELEct concat_ws('#','a','b','c'); // a#b#c
SELEct concat_ws('#','a','b','c',null); // a#b#c 可以指定分隔符,并且会自动忽略NULL
SELEct concat_ws("|",cast(id as String),name,cast(age as String),gender,clazz) from students limit 10;
SELEct subString("abcdefg",1); // abcdefg HQL中涉及到位置的时候 是从1开始计数
// '2021/01/14' -> '2021-01-14'
SELEct concat_ws("-",subString('2021/01/14',1,4),subString('2021/01/14',6,2),subString('2021/01/14',9,2));
SELEct split("abcde,fgh",","); // ["abcde","fgh"]
SELEct split("a,b,c,d,e,f",",")[2]; // c
SELEct explode(split("abcde,fgh",",")); // abcde
// fgh
// 解析json格式的数据
SELEct get_json_object('{"name":"zhangsan","age":18,"score":[{"course_name":"math","score":100},{"course_name":"english","score":60}]}',"$.score[0].score"); // 100
create table words(
words String
)row format delimited fields terminated by '|';
// 数据
Hello,java,Hello,java,scala,python
hbase,hadoop,hadoop,hdfs,hive,hive
hbase,hadoop,hadoop,hdfs,hive,hive
SELEct word,count(*) from (SELEct explode(split(words,',')) word from words) a group by a.word;
// 结果
hadoop 4
hbase 2
hdfs 2
Hello 2
hive 4
java 2
python 1
scala 1
好像给每一份数据 开一扇窗户 所以叫开窗函数
在sql中有一类函数叫做聚合函数,例如sum()、avg()、max()等等,这类函数可以将多行数据按照规则聚集为一行,一般来讲聚集后 的行数是要少于聚集前的行数的.但是有时我们想要既显示聚集前的数据,又要显示聚集后的数据,这时我们便引入了窗口函数.
111,69,class1,department1112,80,class1,department1113,74,class1,department1114,94,class1,department1115,93,class1,department1121,74,class2,department1122,86,class2,department1123,78,class2,department1124,70,class2,department1211,93,class1,department2212,83,class1,department2213,94,class1,department2214,94,class1,department2215,82,class1,department2216,74,class1,department2221,99,class2,department2222,78,class2,department2223,74,class2,department2224,80,class2,department2225,85,class2,department2
create table new_score( id int ,score int ,clazz String ,department String) row format delimited fields terminated by ",";
SELEct id ,score ,clazz ,department ,row_number() over (partition by clazz order by score desc) as row_number_rk ,dense_rank() over (partition by clazz order by score desc) as dense_rk ,rank() over (partition by clazz order by score desc) as rk ,percent_rank() over (partition by clazz order by score desc) as percent_rkfrom new_score;id score clazz department row_number_rk dense_rk rk percent_rk114 94 class1 department1 1 1 1 0.0214 94 class1 department2 2 1 1 0.0213 94 class1 department2 3 1 1 0.0211 93 class1 department2 4 2 4 0.3115 93 class1 department1 5 2 4 0.3212 83 class1 department2 6 3 6 0.5215 82 class1 department2 7 4 7 0.6112 80 class1 department1 8 5 8 0.7113 74 class1 department1 9 6 9 0.8216 74 class1 department2 10 6 9 0.8111 69 class1 department1 11 7 11 1.0221 99 class2 department2 1 1 1 0.0122 86 class2 department1 2 2 2 0.125225 85 class2 department2 3 3 3 0.25224 80 class2 department2 4 4 4 0.375123 78 class2 department1 5 5 5 0.5222 78 class2 department2 6 5 5 0.5121 74 class2 department1 7 6 7 0.75223 74 class2 department2 8 6 7 0.75124 70 class2 department1 9 7 9 1.0
SELEct id ,score ,clazz ,department ,lag(id,2) over (partition by clazz order by score desc) as lag_num ,LEAD(id,2) over (partition by clazz order by score desc) as lead_num ,FIRST_VALUE(id) over (partition by clazz order by score desc) as first_v_num ,LAST_VALUE(id) over (partition by clazz order by score desc) as last_v_num ,NTILE(3) over (partition by clazz order by score desc) as ntile_numfrom new_score;id score clazz department lag_num lead_num first_v_num last_v_num ntile_num114 94 class1 department1 NULL 213 114 213 1214 94 class1 department2 NULL 211 114 213 1213 94 class1 department2 114 115 114 213 1211 93 class1 department2 214 212 114 115 1115 93 class1 department1 213 215 114 115 2212 83 class1 department2 211 112 114 212 2215 82 class1 department2 115 113 114 215 2112 80 class1 department1 212 216 114 112 2113 74 class1 department1 215 111 114 216 3216 74 class1 department2 112 NULL 114 216 3111 69 class1 department1 113 NULL 114 111 3221 99 class2 department2 NULL 225 221 221 1122 86 class2 department1 NULL 224 221 122 1225 85 class2 department2 221 123 221 225 1224 80 class2 department2 122 222 221 224 2123 78 class2 department1 225 121 221 222 2222 78 class2 department2 224 223 221 222 2121 74 class2 department1 123 124 221 223 3223 74 class2 department2 222 NULL 221 223 3124 70 class2 department1 121 NULL 221 124 3
https://blog.csdn wangt.cc /qq_26937525/article/details/54925827
lateral view explode
create table testArray2( name String, weight array<String>)row format delimited fields terminated by 't'COLLECTION ITEMS terminated by ',';志凯 "150","170","180"上单 "150","180","190"SELEct name,col1 from testarray2 lateral view explode(weight) t1 as col1;志凯 150志凯 170志凯 180上单 150上单 180上单 190SELEct key from (SELEct explode(map('key1',1,'key2',2,'key3',3)) as (key,value)) t;key1key2key3SELEct name,col1,col2 from testarray2 lateral view explode(map('key1',1,'key2',2,'key3',3)) t1 as col1,col2;志凯 key1 1志凯 key2 2志凯 key3 3上单 key1 1上单 key2 2上单 key3 3SELEct name,pos,col1 from testarray2 lateral view posexplode(weight) t1 as pos,col1;志凯 0 150志凯 1 170志凯 2 180上单 0 150上单 1 180上单 2 190
// testLieToLinename col1志凯 150志凯 170志凯 180上单 150上单 180上单 190create table testLieToLine( name String, col1 int)row format delimited fields terminated by 't';SELEct name,collect_list(col1) from testLieToLine group by name;// 结果上单 ["150","180","190"]志凯 ["150","170","180"]SELEct t1.name ,collect_list(t1.col1) from ( SELEct name ,col1 from testarray2 lateral view explode(weight) t1 as col1) t1 group by t1.name;
<dependency> <groupId>org.apache.hive</groupId> <artifactId>hive-exec</artifactId> <version>1.2.1</version> </dependency>
import org.apache.hadoop.hive.ql.exec.UDF;public class HiveUDF extends UDF { // hadoop => #hadoop# public String evaluate(String col1) { // 给传进来的数据 左边加上 # 号 右边加上 $ String result = "#" + col1 + "$"; return result; }}
add jar 路径
将jar包作为资源添加到hive环境中add jar /usr/local/soft/jars/HiveUDF2-1.0.jar;
create temporary function fxxx1 as 'MyUDF';
SELEct fxx1(Name) as fxx_name from students limit 10;#施笑槐$#吕金鹏$#单乐蕊$#葛德曜$#宣谷芹$#边昂雄$#尚孤风$#符半双$#沈德昌$#羿彦昌$
"key1:value1,key2:value2,key3:value3"
key1 value1
key2 value2
key3 value3
SELEct split(t.col1,":")[0],split(t.col1,":")[1] from (SELEct explode(split("key1:value1,key2:value2,key3:value3",",")) as col1) t;
import org.apache.hadoop.hive.ql.exec.UDFArgumentexception;import org.apache.hadoop.hive.ql.metadata.HiveException;import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;import org.apache.hadoop.hive.serde2.objecTinspector.objecTinspector;import org.apache.hadoop.hive.serde2.objecTinspector.objecTinspectorFactory;import org.apache.hadoop.hive.serde2.objecTinspector.StructObjecTinspector;import org.apache.hadoop.hive.serde2.objecTinspector.primitive.PrimitiveObjecTinspectorFactory;import java.util.ArrayList;public class HiveUDTF extends GenericUDTF { // 指定输出的列名 及 类型 @Override public StructObjecTinspector initialize(StructObjecTinspector argOIs) throws UDFArgumentexception { ArrayList<String> filedNames = new ArrayList<String>(); ArrayList<ObjecTinspector> filedObj = new ArrayList<ObjecTinspector>(); filedNames.add("col1"); filedObj.add(PrimitiveObjecTinspectorFactory.javaStringObjecTinspector); filedNames.add("col2"); filedObj.add(PrimitiveObjecTinspectorFactory.javaStringObjecTinspector); return ObjecTinspectorFactory.getStandardStructObjecTinspector(filedNames, filedObj); } // 处理逻辑 my_udtf(col1,col2,col3) // "key1:value1,key2:value2,key3:value3" // my_udtf("key1:value1,key2:value2,key3:value3") public void process(Object[] objects) throws HiveException { // objects 表示传入的N列 String col = objects[0].toString(); // key1:value1 key2:value2 key3:value3 String[] splits = col.split(","); for (String str : splits) { String[] cols = str.split(":"); // 将数据输出 forWARD(cols); } } // 在UDTF结束时调用 public void close() throws HiveException { }}
SELEct my_udtf("key1:value1,key2:value2,key3:value3");
字段:id,col1,col2,col3,col4,col5,col6,col7,col8,col9,col10,col11,col12 共13列
数据:
a,1,2,3,4,5,6,7,8,9,10,11,12
b,11,12,13,14,15,16,17,18,19,20,21,22
c,21,22,23,24,25,26,27,28,29,30,31,32
转成3列:id,hours,value
例如:
a,1,2,3,4,5,6,7,8,9,10,11,12
a,0时,1
a,2时,2
a,4时,3
a,6时,4
......
create table udtfData( id String ,col1 String ,col2 String ,col3 String ,col4 String ,col5 String ,col6 String ,col7 String ,col8 String ,col9 String ,col10 String ,col11 String ,col12 String)row format delimited fields terminated by ',';
import org.apache.hadoop.hive.ql.exec.UDFArgumentexception;import org.apache.hadoop.hive.ql.metadata.HiveException;import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;import org.apache.hadoop.hive.serde2.objecTinspector.objecTinspector;import org.apache.hadoop.hive.serde2.objecTinspector.objecTinspectorFactory;import org.apache.hadoop.hive.serde2.objecTinspector.StructObjecTinspector;import org.apache.hadoop.hive.serde2.objecTinspector.primitive.PrimitiveObjecTinspectorFactory;import java.util.ArrayList;public class HiveUDTF2 extends GenericUDTF { @Override public StructObjecTinspector initialize(StructObjecTinspector argOIs) throws UDFArgumentexception { ArrayList<String> filedNames = new ArrayList<String>(); ArrayList<ObjecTinspector> fieldObj = new ArrayList<ObjecTinspector>(); filedNames.add("col1"); fieldObj.add(PrimitiveObjecTinspectorFactory.javaStringObjecTinspector); filedNames.add("col2"); fieldObj.add(PrimitiveObjecTinspectorFactory.javaStringObjecTinspector); return ObjecTinspectorFactory.getStandardStructObjecTinspector(filedNames, fieldObj); } public void process(Object[] objects) throws HiveException { int hours = 0; for (Object obj : objects) { hours = hours + 1; String col = obj.toString(); ArrayList<String> cols = new ArrayList<String>(); cols.add(hours + "时"); cols.add(col); forWARD(cols); } } public void close() throws HiveException { }}
添加jar资源:
add jar /usr/local/soft/HiveUDF2-1.0.jar;
注册udtf函数:
create temporary function my_udtf as 'MyUDTF';
SQL:
SELEct id,hours,value from udtfData lateral view my_udtf(col1,col2,col3,col4,col5,col6,col7,col8,col9,col10,col11,col12) t as hours,value ;
hive -e "SELEct * FROM test1.students limit 10"
hive -f hql文件路径
将HQL写在一个文件里,再使用 -f 参数指定该文件
以上是大佬教程为你收集整理的hive第四天全部内容,希望文章能够帮你解决hive第四天所遇到的程序开发问题。
如果觉得大佬教程网站内容还不错,欢迎将大佬教程推荐给程序员好友。
本图文内容来源于网友网络收集整理提供,作为学习参考使用,版权属于原作者。
如您有任何意见或建议可联系处理。小编QQ:384754419,请注明来意。