Hive学习实战
--------------------------------------------------------------
Hive从入门到实战【40讲】---笔记记录
--------------------------------------------------------------
hive命令
1、show tables;
2、show databases;
3、 desc login; ---查看表结构。
4、 show partitions test5; --查看分区
create table login_struct(
ip string,
user struct<uid:bigint,name :string>
)
partitioned by (dt string)
row format delimited
fields terminated by ','
collection items terminated by '|'
stored as textfile;
load data local inpath
'/hadoop/hive0.9/testHive/login_struct.txt'
into table login_struct partition (dt='2015-02-07');
第20天:
嵌套复杂类型
假设登录表
create table login_game_complex(
ip string,
uid string,
gameinfo map<bigint, struct<name:string,score:bigint,level:string>>
)
partitioned by (dt string)
row format delimited
stored as textfile;
嵌套复合类型
这标识登录表每个用户多会有游戏信息,
而用户的游戏信息有多个,
key是游戏的id
value是一个stucet,包含游戏的名字,积分,等级
这种复杂类型的入库格式很麻烦
,而且符合嵌套层次很多时,要生成的正确 的格式也比较复杂
,很容易出错,这里稍微提一下在嵌套层次多的情况下
分隔符会随着符合类型嵌套层次的递增,
分隔符 默认会以 \1\2\3.. 变化
这里不介绍从shell下生成文件local data入库
create table login_game_simple2(
ip string,
uid string,
gameid bigint,
gamename string,
gamescore bigint,
gamelevel string
)
partitioned by (dt string)
row format delimited
fields terminated by ','
stored as textfile;
192.168.1.0,110,1,wow,100,v1
192.168.1.0,110,2,caojian,100,v2
192.168.1.0,110,3,曹健,100,v3
192.168.1.0,110,1,崔,100,v3
192.168.1.0,111,2,接w,100,v4
192.168.1.1,111,3,刚w,1002,v
192.168.1.1,111,1,w私会w,1020,v1
192.168.1.1,111,1,w,1010,v6
192.168.1.1,112,1,wtouceng,1100,v1
192.168.1.1,112,1,wow,100,v7
load data local inpath
'/hadoop/hive0.9/testHive/login_simple.txt'
into table login_game_simple2 partition (dt='2015-02-07');
ip string,
uid string,
gameid bigint,
gamename string,
gamescore bigint,
gamelevel string
ip string,
uid string,
gameinfo
map<bigint,
struct<name:string,score:bigint,level:string>>
insert overwrite table login_game_complex
partition (dt='2015-02-07')
select ip ,uid,map(gameid,named_struct(
'name' ,gamename,'score',gamescore,'level',gamelevel)
) from login_game_simple2 where dt='2015-02-07' ;
NULL
{} 2015-02-07
192.168.1.0 110
{1:{"name":"wow","score":100,"level":"v1"}}
2015-02-07
192.168.1.0 110
{2:{"name":"caojian","score":100,"level":"v2"}}
2015-02-07
192.168.1.0 110
{3:{"name":"曹健","score":100,"level":"v3"}}
2015-02-07
192.168.1.0 110
{1:{"name":"崔","score":100,"level":"v3"}}
2015-02-07
192.168.1.0 111
{2:{"name":"接w","score":100,"level":"v4"}}
2015-02-07
192.168.1.1 111
{3:{"name":"刚w","score":1002,"level":"v"}}
2015-02-07
192.168.1.1 111
{1:{"name":"w私会w","score":1020,"level":"v1"}}
2015-02-07
192.168.1.1 111
{1:{"name":"w","score":1010,"level":"v6"}}
2015-02-07
192.168.1.1 112
{1:{"name":"wtouceng","score":1100,"level":"v1"}}
2015-02-07
192.168.1.1 112
{1:{"name":"wow","score":100,"level":"v7"}}
2015-02-07 NULL {}
2015-02-07
正则表达式
create table test_serde(
c0 string,
c1 string,
c2 string
)
row format
serde 'org.apache.hadoop.hive.contrib.serde2.RegexSerDe'
with serdeproperties
('input.regex' = '([^ ]*) ([^ ]* ([^ ]*))',
'output.format.string'= '%1&% %2&% %3&%')
stored as textfile; ;
add jar /hadoop/hive0.9/hive/lib/hive-contrib-0.9.0.jar
load data local inpath
'/hadoop/hive0.9/hive/lib/r.txt'
into table test_serde ;