*命令wiki超链接:*https://cwiki.apache.org/confluence/display/Hive/LanguageManual
创建
-
1.创建新表
create table t_hive ( a int , b int , c int) row format delimited fields terminated by '\t';
-
2.创建分区表
drop table if exists t_hft; create table t_hft( securityid string, tradetime string, preclosepx double ) partitioned by (tradedate int) row format delimited fields terminated by ',';
-
3.创建视图
create view v_hive as select a,b from t_hive;
删除
-
删除数据库(级联删除)
drop database if exists db_name;
-
删除表
drop table if exists tbname ;
-
删除表数据
truncate table tablename;
-
修改表名
alter table tbname rename to newtbname
-
删除分区数据
alter table dbname.tablename drop if exists partition (dtd='20180724');
查询
-
查看数据库信息
desc database extended dbname;
-
模糊查询表名
show tables '*t*';
-
查询表结构
desc formatted table_name
-
查看表分区
show partitions tbname;
数据加载
-
加载本地文件到hive表
load data local inpath '/opt/mydata/xxx.txt' into table idb_name.xxx ;
-
加载hdfs文件到hive中
load data inpath '/user/beifeng/hive/mydata/table_name.txt' into table db_name.table_name;
-
加载数据覆盖表中已经有的数据
load data inpath '/user/beifeng/hive/mydata/table_name.txt' overwrite into table db_name.table_name;
-
创建表是通过insert加载
create table db_name.newtable_name like oldtablename; insert into table db_name.newtable_name select * from db_name.oldtable_name;
-
从其他表导入数据
insert overwrite table t_hive2 select * from t_hive ;
-
创建表并从其他表导入数据
create table t_hive as select * from t_hive2 ;
-
仅复制表结构不导数据
create table t_hive3 like t_hive;
-
通过hive导出到本地文件系统
insert overwrite local directory '/tmp/t_hive' select * from t_hive;
-
导入分区数据
load data local inpath '/home/p70_g108/pzj/tmp/dd' overwrite into table proj_china_ucenter_item.shi_t_member_daily_activity_pcmanag_clean partition(dt=2018-12-18);
-
查询结果生成指定分隔符文件
insert overwrite local directory "/home/bfdetl/tmp_data/20180630/" row format delimited fields terminated by "\u0005"
修改/更新
-
加一个字段或者多个字段
alter table t_hive add columns (new_col string);
-
更新列/属性
alter table base_user_event change access_to_registration time_visit_registered string;
-
重命令表名
alter table t_hive rename to t_hadoop;
-
时间格式互转:yyyymmdd/yyyy-mm-dd
方法1: from_unixtime+ unix_timestamp
--20171205转成2017-12-05 select from_unixtime(unix_timestamp('20171205','yyyymmdd'),'yyyy-mm-dd') from dual;
--2017-12-05转成20171205 select from_unixtime(unix_timestamp('2017-12-05','yyyy-mm-dd'),'yyyymmdd') from dual;
方法2: substr + concat
--20171205转成2017-12-05 select concat(substr('20171205',1,4),'-',substr('20171205',5,2),'-',substr('20171205',7,2)) from dual;
--2017-12-05转成20171205 select concat(substr('2017-12-05',1,4),substr('2017-12-05',6,2),substr('2017-12-05',9,2)) from dual;
-
截取字符串
substr(register_date,1,10) ='2018-07-04'
-
case when length(t1.loginname)=11 then t1.loginname else '' end ticket_phone_number
-
替换字符串
SELECT regexp_replace(substr(register_date,1,10),'-','') as A ,register_date from proj_gucp_ods.lenovoid_regrister_user_etl_info_two limit 1
-
计算时间差值
SELECT datediff('2013-10-15', '2013-9-15') FROM words LIMIT 1; >>30
-
保留后两位小数并百分率输出
SELECT concat(round(123/221,4)*100,'%') as lc
-
查询除某一列的所有字段
set hive.support.quoted.identifiers=None; select `(name|id|pwd)?+.+` from tableName;
-
LUDP(注意:走MR生成的是yyyy_mm_dd格式 走spark生成的是时间戳的格式)
with dates as ( select date_add("${start_day}", a.pos) as d from (select posexplode(split(repeat("o", datediff("${end_day}", "${start_day}")), "o"))) a) select * from dates;
-
列转行
with top_n as( select concat('【register_source】:',register_source, '\n【matched_regapp】: ',matched_regapp, '\n【total】: ' total) as a from proj_gucp_ods.pei_lxs_source_monitor order by total desc limit 10) select concat_ws('\n',collect_list(top_n.a)) as info from top_n;
-
切换表的数据库
use dbname1; alter table dbname1.${table_name} rename to dbanme2.${table_name};
-
列的增删
CREATE TABLE test ( creatingTs BIGINT, a STRING, b BIGINT, c STRING, d STRING, e BIGINT, f BIGINT); --若删除column f 列,可以使用以下语句: ALTER TABLE test REPLACE COLUMNS ( creatingTs BIGINT, a STRING, b BIGINT, c STRING, d STRING, e BIGINT); --若增加列: alter table of_test columns (judegment int)