Alter Column

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
-- 修改column属性(列名,列字段类型,列注释):
ALTER TABLE table_name     
    CHANGE [COLUMN] col_old_name col_new_name column_type [COMMENT col_comment] [FIRST|AFTER column_name] 

-- 增加/替换column(可以使用replace来删除不需要的字段)
ALTER TABLE table_name   
    ADD|REPLACE COLUMNS (
        col_name data_type [COMMENT col_comment], 
        ...
    )  

hive udf函数的使用

1
2
3
4
add jar /data/udfbak/dcdnIpdbUdf-2.9.jar; 这是本地路径
add jar /user/dcdn/warehouse/udf/dcdnIpdbUdf-2.6.jar;
create temporary function mysplit as 'com.dcdn.udf.UDFMySplit';
select mysplit('10.11.12.13',",") a from dual;

根据目录创建外表

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
CREATE EXTERNAL TABLE test_min_part(
    stime  timestamp, 
    tid    int, 
    tmsg   string
) PARTITIONED BY ( 
    pdate string, 
    phour string,
    pmin  string
)
ROW FORMAT DELIMITED 
FIELDS TERMINATED BY '\u0001'
COLLECTION ITEMS TERMINATED BY '\u0002' 
LINES TERMINATED BY '\n' 
STORED AS textfile
LOCATION '/user/dcdn/mysql_backup/dim_lib.db/test_min_part';

给外表加分区:

1
alter table test_min_part add partition(pdate='20170412',phour='10',pmin='00') location '20170412/10/00';

创建外表

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
create external table tb_test(
    logtime            bigint,
    bussinessname      string,
    appname            string,
    streamname         string,
    connectiontotal    bigint,
    outbandwidthdelta  bigint,
    srvtime            bigint,
    source             int
)
partitioned by(
    pdate bigint
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
COLLECTION ITEMS TERMINATED BY '\t'
LINES TERMINATED BY '\n'
STORED AS textfile
LOCATION '/user/dcdn/mysql_backup/live_stat.db/tb_test';

-- 删除分区:
alter table tb1 drop partition(data_date='2016-06-15',data_hour='04');  
alter table tb2 drop partition(data_date=20170309); 
-- 重建分区:
alter table tb1 add partition(data_date='2016-06-15',data_hour='04') location '20160615/04/';
alter table tb2 add partition(data_date='2016-11-17',data_hour='12') location '20161117/12/';

-- 上传本地文件数据到hive的指定分区中
load data local inpath '/data/19/*' overwrite into table test.tb4 partition(data_date='2016-11-27',data_hour='19');

-- 把数据写入到本地目录中
INSERT OVERWRITE LOCAL DIRECTORY '/tmp/httphive18.log'
    ROW FORMAT DELIMITED
    FIELDS TERMINATED BY '\t'
    COLLECTION ITEMS TERMINATED BY '\t'
select  gcid,
        province,
        isp,
        sum(1) devicenum 
  from  http_hub_deploy  
 where data_date='2016-04-19' 
   and domain='iqiyi.x.00cdn.com' 
 group by gcid,province,isp;

-- 根据已有的表创建表
create table if not exists r_live_oss_stat_flow 
    as 
select hostname,sum(1) cnt                
  from dcdn_standard_access_log 
 where data_date='2016-05-29' 
   and data_hour='20'   
   and host='get3.xiaopi.com'               
   and status=404                          
 group by hostname;


-- 创建表的语句
-- 建表语句
CREATE EXTERNAL TABLE o_vod_scheduledns_log(
    stime     timestamp,
    hostname  string,  
    ms        double
)
PARTITIONED BY ( 
    data_date string, 
    data_hour string
)
ROW FORMAT DELIMITED 
FIELDS TERMINATED BY '\u0001' 
STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.SequenceFileInputFormat' 
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat'
LOCATION 'hdfs://master.xldw.xunlei.com:xxxx/user/dcdn/warehouse/flume_hive/o_vod_scheduledns_log';

create table jsontest(
    id  int,
    jl   string
)
row format delimited
fields terminated by '\u0001'
escaped by '\\'
collection items terminated by '\u0002'
stored as textfile;

CREATE TABLE jt_reg 
    AS
select regexp_replace(regexp_replace(value,'\\}\\,\\{','\\}\\\n\\{'),'\\[|\\]','') as valuereg  
  from jt;

CREATE TABLE logs 
    AS 
SELECT get_json_object(jt_reg.valuereg,'$.ts') AS ts, 
       get_json_object(jt_reg.valuereg,'$.id') AS id,
       get_json_object(jt_reg.valuereg,'$.log') AS log
  FROM ams_json_reg;

SELECT get_json_object(single_json_table.single_json,'$.ts') AS ts,
       get_json_object(single_json_table.single_json,'$.id') AS id,
       get_json_object(single_json_table.single_json,'$.log') AS log
  FROM (SELECT explode(split(regexp_replace(substr(json_array_col,2, 
                        length(json_array_col)-2),'"}","','"}",,,,"'),',,,,')) 
          FROM src_table
        ) single_json_table;

Lateral View和json使用例子

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
create table if not exists jonliu_temp 
    as 
select hostname,sum(1) cnt                 
  from dcdn_standard_access_log           
 where data_date='2016-05-29'           
   and data_hour='20'   
   and host='get3.xiaopi.com'              
   and status=404                          
 group by hostname;

select json_tuple(d,'ts','id','log') 
  from (SELECT explode(split(regexp_replace(substr(jl,2,length(jl)-2),'},\\{','}@@@@\\{'),'@@@@')) as d 
          FROM jsontest
       ) tmp;

select a.id,
       json_tuple(b.jl,'ts','id','log') jld
  from jsontest a 
LATERAL VIEW explode(split(regexp_replace(substr(jl,2,length(jl)-2),'},\\{','}@@@@\\{'),'@@@@')) b as jl; 

explain 
select t1.id,t2.*
  from (select a.id  id,
               b.dmc dmc
          from jsontest a 
               LATERAL VIEW explode(split(regexp_replace(substr(jl,2,length(jl)-2),'},\\{','}@@@@\\{'),'@@@@')) b as dmc
          where jl!='[]'
        ) t1 
  LATERAL VIEW json_tuple(dmc,'ts','id','log') t2 as ts,tid,log; 

create table jond2 
    as 
select a.id,b.*
  from jontest a 
  LATERAL VIEW json_tuple(dmc,'ts','id','log') b as ts,tid,log;

hive命令参数

1
2
3
4
5
set hive.exec.compress.output=true;
set mapred.job.queue.name=dcdn;
set mapred.max.split.size=512000000;
set mapred.min.split.size.per.node=512000000;
set mapred.min.split.size.per.rack=512000000;