1、Hbase中建表
hbase(main):056:0>create 'movie','analyse'
2、Flume配置
a1.sources = r1
a1.sinks = k1
a1.channels = c1
# Describe/configure the source
a1.sources.r1.type = spooldir
#实时读取本地目录
a1.sources.r1.spoolDir = /root/flume_test
a1.sources.r1.deletePolicy=never
a1.sources.r1.fileHeader = true
a1.sources.r1.channels = c1
# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100
a1.sinks.k1.type = org.apache.flume.sink.hbase.HBaseSink
#表名
a1.sinks.k1.table = movie
#列组
a1.sinks.k1.columnFamily = analyse
#通配符(数据格式为 1001::猫和老鼠::9 )
a1.sinks.k1.serializer.regex =(.*)::(.*)::(.*)
a1.sinks.k1.serializer = org.apache.flume.sink.hbase.RegexHbaseEventSerializer
a1.sinks.k1.channel = c1
#列
a1.sinks.k1.serializer.colNames = ROW_KEY,movie_name,rating
# 索引为0,即ROW_KEY(ROW_KEY是特殊字符)
a1.sinks.k1.serializer.rowKeyIndex = 0
3、启动flume
[root@quickstart]#flume-ng agent --conf conf /etc/flume-ng/conf --conf-file /etc/flume-ng/conf/flume.conf --name a1 -Dflume.root.logger = INFO,console
4、导入数据到 /root/flume_test/,文件内容为
1001::tom::3
1002::jerry::5
1003::jack::4
5、查看HBase表
hbase(main):066:0> scan 'movie'
ROW COLUMN+CELL
1001 column=analyse:movieid, timestamp=1545212471611, value=tom
1001 column=analyse:ratings, timestamp=1545212471611, value=3
1002 column=analyse:movieid, timestamp=1545212471611, value=jerry
1002 column=analyse:ratings, timestamp=1545212471611, value=5
1003 column=analyse:movieid, timestamp=1545212471611, value=jack
1003 column=analyse:ratings, timestamp=1545212471611, value=4
3 row(s) in 0.0440 seconds