Hive DDL数据操作

鸡汤: 坚持自己的梦想,即使没有翅膀也能飞翔。

1. 数据导入

  • 语法
load data [local] inpath '/cmz/student.txt' overwrite | into table student [partition (partcol1=val1,…)];

详解

1. load data: 表示加载数据
2. local: 表示从本地加载数据到hive表;否则从HDFS加载数据到hive表
3. inpath: 表示加载数据的路径
4. overwrite: 表示覆盖表中已有数据,否则表示追加
5. into table: 表示加载到哪张表
6. student: 表示具体的表
7. partition: 表示上传到指定分区

1.1 load

cat>students.txt<<EOF
1,张三
2,李四
3,王五
4,赵六
EOF

create table if not exists student(
    id int,
    name string
) row format delimited fields terminated by ','
stored as textfile;

# 加载本地数据到hive
load data local inpath '/root/hive/sgg/students.txt' into table student;
详细操作
[root@master sgg]# cat>students.txt<<EOF
1,张三
2,李四
3,王五
4,赵六
EOF
[root@master sgg]# cat students.txt 
1,张三
2,李四
3,王五
4,赵六

[root@master sgg]# pwd
/root/hive/sgg
[root@master sgg]# ls /root/hive/sgg/students.txt 
/root/hive/sgg/students.txt

# 创建表
1: jdbc:hive2://master:10000> create database cmz_test;
No rows affected (0.266 seconds)
1: jdbc:hive2://master:10000> use cmz_test;
No rows affected (0.03 seconds)
1: jdbc:hive2://master:10000> 
1: jdbc:hive2://master:10000> show tables;
+-----------+--+
| tab_name  |
+-----------+--+
+-----------+--+
No rows selected (0.031 seconds)
1: jdbc:hive2://master:10000> create table if not exists student(
1: jdbc:hive2://master:10000>     id int,
1: jdbc:hive2://master:10000>     name string
1: jdbc:hive2://master:10000> ) row format delimited fields terminated by ','
1: jdbc:hive2://master:10000> stored as textfile;
No rows affected (0.149 seconds)
1: jdbc:hive2://master:10000> show tables;
+-----------+--+
| tab_name  |
+-----------+--+
| student   |
+-----------+--+

# 查看表结构
1 row selected (0.033 seconds)
1: jdbc:hive2://master:10000> desc student;
+-----------+------------+----------+--+
| col_name  | data_type  | comment  |
+-----------+------------+----------+--+
| id        | int        |          |
| name      | string     |          |
+-----------+------------+----------+--+
2 rows selected (0.092 seconds)

# 导入数据
1: jdbc:hive2://master:10000> load data local inpath '/root/hive/sgg/students.txt' into table student;
No rows affected (0.405 seconds)
1: jdbc:hive2://master:10000> select * from student;
+-------------+---------------+--+
| student.id  | student.name  |
+-------------+---------------+--+
| 1           | 张三            |
| 2           | 李四            |
| 3           | 王五            |
| 4           | 赵六            |
+-------------+---------------+--+
4 rows selected (0.118 seconds)

1.3 insert

show tables;
create table student1(
    id int, 
    name string
) partitioned by (month string) 
row format delimited fields terminated by '\t';
show tables;
desc student1;
insert into table student1 partition(month='201906') values(1,'cmz');
insert into table student1 partition(month='201907') values(1,"cmz1");
insert into table student1 partition(month='201908') values(1,'cmz2');
desc student1;
select * from student1;

insert 会执行MapReduce任务,所以会比较慢一点。所有不推荐insert 插入数据。

详细操作
1: jdbc:hive2://master:10000> show tables;
+-----------+--+
| tab_name  |
+-----------+--+
| student   |
+-----------+--+
1 row selected (0.056 seconds)
1: jdbc:hive2://master:10000> create table student1(
1: jdbc:hive2://master:10000>     id int, 
1: jdbc:hive2://master:10000>     name string
1: jdbc:hive2://master:10000> ) partitioned by (month string) 
1: jdbc:hive2://master:10000> row format delimited fields terminated by '\t';
No rows affected (0.083 seconds)
1: jdbc:hive2://master:10000> show tables;
+-----------+--+
| tab_name  |
+-----------+--+
| student   |
| student1  |
+-----------+--+
2 rows selected (0.032 seconds)

0: jdbc:hive2://master:10000> desc student1;
+--------------------------+-----------------------+-----------------------+--+
|         col_name         |       data_type       |        comment        |
+--------------------------+-----------------------+-----------------------+--+
| id                       | int                   |                       |
| name                     | string                |                       |
| month                    | string                |                       |
|                          | NULL                  | NULL                  |
| # Partition Information  | NULL                  | NULL                  |
| # col_name               | data_type             | comment               |
|                          | NULL                  | NULL                  |
| month                    | string                |                       |
+--------------------------+-----------------------+-----------------------+--+


# 卡成狗,我虚拟机太卡,我就插入一条数据
1: jdbc:hive2://master:10000> insert into table student1 partition(month='201906') values(1,'cmz');

0: jdbc:hive2://master:10000> select * from student1;
+--------------+----------------+-----------------+--+
| student1.id  | student1.name  | student1.month  |
+--------------+----------------+-----------------+--+
| 1            | cmz            | 201906          |
+--------------+----------------+-----------------+--+
1 row selected (0.092 seconds)
0: jdbc:hive2://master:10000> select * from student1 where month=201906;
+--------------+----------------+-----------------+--+
| student1.id  | student1.name  | student1.month  |
+--------------+----------------+-----------------+--+
| 1            | cmz            | 201906          |
+--------------+----------------+-----------------+--+
1 row selected (0.183 seconds)

1.4 As Select

  根据查询结果创建表(查询的结果会添加到新创建的表中)。

create table if not exists student2
    as select id, name from student;
详细操作
0: jdbc:hive2://master:10000> show tables;
+-----------+--+
| tab_name  |
+-----------+--+
| student   |
| student1  |
+-----------+--+
2 rows selected (0.037 seconds)
0: jdbc:hive2://master:10000> create table if not exists student2
0: jdbc:hive2://master:10000>     as select id, name from student;
No rows affected (12.782 seconds)
0: jdbc:hive2://master:10000> show tables;
+-----------+--+
| tab_name  |
+-----------+--+
| student   |
| student1  |
| student2  |
+-----------+--+
3 rows selected (0.035 seconds)
0: jdbc:hive2://master:10000> select * from student2;
+--------------+----------------+--+
| student2.id  | student2.name  |
+--------------+----------------+--+
| 1            | 张三             |
| 2            | 李四             |
| 3            | 王五             |
| 4            | 赵六             |
+--------------+----------------+--+
4 rows selected (0.108 seconds)

1.5 location

   创建表时通过Location指定加载数据路径.

cat>student3.txt<<EOF
1,张三
2,李四
3,王五
4,赵六
EOF

hadoop fs -mkdir /data
hadoop fs -put student3.txt /data

create table if not exists student3(
    id int, 
    name string
)
row format delimited fields terminated by ','
location '/data/';

location 后面是 hdfs上对应的 目录

??? note "详细操作"
```
[root@master sgg]# hadoop fs -mkdir /data;
[root@master sgg]# cat>student3.txt<<EOF
> 1,张三
> 2,李四
> 3,王五
> 4,赵六
> EOF
[root@master sgg]# hadoop fs -put student3.txt /data
[root@master sgg]# hadoop fs -text /data/*
1,张三
2,李四
3,王五
4,赵六

0: jdbc:hive2://master:10000> create table if not exists student3(
0: jdbc:hive2://master:10000>     id int, 
0: jdbc:hive2://master:10000>     name string
0: jdbc:hive2://master:10000> )
0: jdbc:hive2://master:10000> row format delimited fields terminated by ','
0: jdbc:hive2://master:10000> location '/data/';
No rows affected (0.08 seconds)
0: jdbc:hive2://master:10000> select * from student3;
+--------------+----------------+--+
| student3.id  | student3.name  |
+--------------+----------------+--+
+--------------+----------------+--+
No rows selected (0.05 seconds)
0: jdbc:hive2://master:10000> show tables;
+-----------+--+
| tab_name  |
+-----------+--+
| student   |
| student1  |
| student3  |
+-----------+--+
3 rows selected (0.041 seconds)

0: jdbc:hive2://master:10000> select * from student3;
+--------------+----------------+--+
| student3.id  | student3.name  |
+--------------+----------------+--+
| 1            | 张三             |
| 2            | 李四             |
| 3            | 王五             |
| 4            | 赵六             |
+--------------+----------------+--+
4 rows selected (0.116 seconds)
```

1.6 import/export

不推荐使用。

2. 数据导出

2.1 insert

   insert不光能导入数据,还能导出数据。

insert overwrite local directory '/root/hive/sgg/cmz'
    select * from student;


# 输出数据格式化  
insert overwrite local directory '/root/hive/sgg/cmz'
    ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'             
        select * from student;

不常用

详细操作
[root@master cmz]# pwd
/root/hive/sgg/cmz
[root@master cmz]# ls
1: jdbc:hive2://master:10000> insert overwrite local directory '/root/hive/sgg/cmz'
1: jdbc:hive2://master:10000>     select * from student;
No rows affected (13.466 seconds)
[root@master cmz]# ls
000000_0
[root@master cmz]# cat 000000_0 
1张三
2李四
3王五
4赵六

2.2 get

hadoop fs -get /user/hive/warehouse/cmz_test.db/student/students.txt .
详细操作
[root@master cmz]# hadoop fs -ls /user/hive/warehouse/cmz_test.db/student;
Found 1 items
-rwxr-xr-x   3 root supergroup         36 2019-08-05 14:04 /user/hive/warehouse/cmz_test.db/student/students.txt
[root@master cmz]# ls
000000_0
[root@master cmz]# hadoop fs -get /user/hive/warehouse/cmz_test.db/student/students.txt .
[root@master cmz]# ls
000000_0  students.txt
[root@master cmz]# cat students.txt 
1,张三
2,李四
3,王五
4,赵六

2.3 shell

hive -e 'select * from cmz_test.student;' >/root/hive/sgg/cmz/s.txt
详细操作
[root@master cmz]# hive -e 'select * from cmz_test.student;' >/root/hive/sgg/cmz/s.txt
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/usr/local/hadoop-2.6.5/share/hadoop/common/lib/slf4j-log4j12-1.7.5.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/usr/local/spark-1.6.3-bin-hadoop2.6/lib/spark-assembly-1.6.3-hadoop2.6.0.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/usr/local/hadoop-2.6.5/share/hadoop/common/lib/slf4j-log4j12-1.7.5.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/usr/local/spark-1.6.3-bin-hadoop2.6/lib/spark-assembly-1.6.3-hadoop2.6.0.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]

Logging initialized using configuration in file:/usr/local/apache-hive-1.2.2-bin/conf/hive-log4j.properties
OK
Time taken: 1.814 seconds, Fetched: 4 row(s)
[root@master cmz]# ls
000000_0  students.txt  s.txt
[root@master cmz]# cat s.txt 
student.id  student.name
1   张三
2   李四
3   王五
4   赵六

3. 清空表

  清空表数据,truncate类似MySQL

truncate  table student3;
详细操作
 1: jdbc:hive2://master:10000> select * from student3;
+--------------+----------------+--+
| student3.id  | student3.name  |
+--------------+----------------+--+
| 1            | 张三             |
| 2            | 李四             |
| 3            | 王五             |
| 4            | 赵六             |
+--------------+----------------+--+
4 rows selected (0.12 seconds)

1: jdbc:hive2://master:10000> truncate  table student3;
No rows affected (0.082 seconds)
1: jdbc:hive2://master:10000> select * from student3;
+--------------+----------------+--+
| student3.id  | student3.name  |
+--------------+----------------+--+
+--------------+----------------+--+
No rows selected (0.068 seconds)