Hive|Hive DML HiveDML

Load 数据文件

语法：LOAD DATA [LOCAL] INPATH 'filepath' [OVERWRITE] INTO TABLE tablename [PARTITION (partcol1=val1, partcol2=val2 ...)] 说明：从本地到HDFS是复制，从HDFS到HDFS上的Hive数据库是移动。 LOCAL表示从本地。 INTO 是追加， OVERWRITE INTO 是覆盖。

Insert 插入数据

hive> create table student2(id int,name string) partitioned by(month string, day string) row format delimited fields terminated by ' '; 示例1，普通插入： hive> insert into table student2 partition(month=202001,day=01) values(1,'baozi1'),(2,'baozi2'),(3,'baozi3'); 示例2，从查询结果插入： hive> insert into table student2 partition(month=202001,day=02) select * from student; 示例3，同一表查询结果多插入： hive> from student insert into table student2 partition(month=202001,day=03) select * insert into table student2 partition(month=202001,day=04) select *;

Truncate 清空表数据

hive> truncate table student2;

Select 查询

select语法同mysql hive> select * from stu_partition; hive> select * from stu_partition where month=202001; rlike：正则表达式 hive> select * from stu_partition where name like '%2'; stu_partition.idstu_partition.namestu_partition.month 2baozi2202001 2baozi2202002 hive> select * from stu_partition where name rlike '[2]'; stu_partition.idstu_partition.namestu_partition.month 2baozi2202001 2baozi2202002排序 -------------------------------------- order by：全局排序，一个Reduce。 sort by：“分块”排序，每个Reduce内排序。 distribute by：指定分区，“分块”排序。联合使用：distribute by分区，sort by对分区数据排序，可以是不同的字段。 cluster by：等于相同字段的distribute by + sort by。查询桶表 -------------------------------------- 语法： tablesample(bucket x out of y on ...) x：从第几个bucket开始 y：从x开始，下一个就是x+y，x+y+y..... 取几个：bucket数/y，y必须是bucket数的倍数或者因子。 * x必须小于等于y 示例： hive> select * from stu_bucket tablesample(bucket 1 out of 4 on id); stu_bucket.idstu_bucket.name 1016ss16 1012ss12 1008ss8 1004ss4 hive> select * from stu_bucket tablesample(bucket 1 out of 1 on id); stu_bucket.idstu_bucket.name 1016ss16 1012ss12 1008ss8 1004ss4 1009ss9 1005ss5 1001ss1 1013ss13 1010ss10 1002ss2 1006ss6 1014ss14 1003ss3 1011ss11 1007ss7 1015ss15 hive> select * from stu_bucket tablesample(bucket 1 out of 2 on id); stu_bucket.idstu_bucket.name 1016ss16 1012ss12 1008ss8 1004ss4 1010ss10 1002ss2 1006ss6 1014ss14case when 示例 -------------------------------------- emp_gender.txt 悟空 A 男大海 A 男宋宋 B 男凤姐 A 女婷姐 B 女婷婷 B 女 hive> create table emp_gender(name string, dept_no string, gender string) row format delimited fields terminated by ' '; hive> load data local inpath '/home/user000/data/emp_gender.txt' into table emp_gender; hive> select dept_no, sum(case gender when '男' then 1 else 0 end) male, sum(case gender when '女' then 1 else 0 end) femail from emp_gender group by dept_no; 结果 A 2 1 B 1 2行转列示例 -------------------------------------- CONCAT(colA, str, colB) CONCAT(colA, colB) CONCAT_WS(separator, str1, str2, ...) COLLECT_SET(col)：汇总字段值产生array。person_info.txt 孙悟空白羊座 A 大海射手座 A 宋宋白羊座 B 猪八戒白羊座 A 凤姐射手座 Ahive> create table person_info(name string, constellation string, blood_type string) row format delimited fields terminated by ' '; hive> load data local inpath '/home/user000/data/person_info.txt' into table person_info; hive> select t1.c1, concat_ws('|',collect_set(t1.name) ) from(select concat(constellation,',',blood_type) c1, name from person_info) t1 group by t1.c1; 结果射手座,A 大海|凤姐白羊座,A 孙悟空|猪八戒白羊座,B 宋宋列转行示例 -------------------------------------- EXPLODE(col)：将map、array拆分成多行。 LATERAL VIEW：语法：LATERAL VIEW udtf(expression) tableAlias AS columnAlias 解释：和split、explode等UDTF一起使用，对拆分后的数据，根据数据原本的对应关系，进行聚合。film_info.txt 《疑犯追踪》悬疑,动作,科幻,剧情《Lie to me》悬疑,警匪,动作,心理,剧情《战狼 2》战争,动作,灾难hive> create table file_info(movie string, category array) row format delimited fields terminated by '\t' collection items terminated by ','; hive> load data local inpath '/home/user000/data/film_info.txt' into table file_info; hive> select movie,category_name from file_info lateral view explode(category) tmpTable as category_name; +--------------+----------------+--+ |movie| category_name| +--------------+----------------+--+ | 《疑犯追踪》| 悬疑| | 《疑犯追踪》| 动作| | 《疑犯追踪》| 科幻| | 《疑犯追踪》| 剧情| | 《Lie to me》| 悬疑| | 《Lie to me》| 警匪| | 《Lie to me》| 动作| | 《Lie to me》| 心理| | 《Lie to me》| 剧情| | 《战狼 2》| 战争| | 《战狼 2》| 动作| | 《战狼 2》| 灾难| +--------------+----------------+--+