HBase与WordCount的结合使用Demo - HBase

TOP

HBase与WordCount的结合使用Demo

2019-04-29 01:47:07 【大中小】浏览:66次

Tags：HBase WordCount 结合使用 Demo

例子运行条件：

hadoop和hbase集群都已经正常启动，hdfs文件系统中已上传作为单词统计的源文件

hadoop的安装可以参考：hadoop-1.2.1安装方法详解

hbase的安装可以参考：hbase-0.94安装方法详解

单词计数的源文件准备参考：hadoop的第一个程序 WordCount

注：本例子测试于hadoop-1.2.1和hbase-0.94

package com.songguoliang.hbase;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;

/**
 * HBase与WordCount的结合使用Demo
 * @date 2015-07-27 11:21:48
 * @author sgl
 */
public class WordCountHBase {
	/**
	 * Map
	 * @date 2015-07-27 11:24:04
	 * @author sgl
	 */
	public static class Map extends Mapper<LongWritable, Text, Text, IntWritable>{
		private IntWritable one=new IntWritable(1);
		/*
		 * 重写map方法
		 * (non-Javadoc)
		 * @see org.apache.hadoop.mapreduce.Mapper#map(KEYIN, VALUEIN, org.apache.hadoop.mapreduce.Mapper.Context)
		 * @date 2015-07-27 11:29:48
		 * @author sgl
		 */
		@Override
		protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException {
			//将输入的每行内容以空格分开
			String words[]=value.toString().trim().split(" ");
			for(String word:words){
				context.write(new Text(word), one);
			}
		}
	}
	/**
	 * Reduce
	 * @date 2015-07-27 11:36:03
	 * @author sgl
	 * @version $Id: WordCountHBase.java, v 0.1 2015-07-27 11:36:03 sgl Exp $
	 */
	public static class Reduce extends TableReducer<Text, IntWritable, NullWritable>{
		/*
		 * 重写reduce方法
		 * (non-Javadoc)
		 * @see org.apache.hadoop.mapreduce.Reducer#reduce(KEYIN, java.lang.Iterable, org.apache.hadoop.mapreduce.Reducer.Context)
		 * @date 2015-07-27 11:36:12
		 * @author sgl
		 */
		@Override
		protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, NullWritable, Writable>.Context context) throws IOException, InterruptedException {
			int sum=0;
			for(IntWritable value:values){
				sum+=value.get();
			}
			//Put实例化，每一个单词存一行
			Put put=new Put(Bytes.toBytes(key.toString()));
			//列族为content,列修饰符为count,列值为数量
			put.add(Bytes.toBytes("content"), Bytes.toBytes("count"), Bytes.toBytes(String.valueOf(sum)));
			context.write(NullWritable.get(), put);
		}
		
	}
	/**
	 * 在HBase中创建表
	 * @date 2015-07-27 11:50:42
	 * @author sgl
	 * @param tableName 表名
	 * @throws IOException
	 */
	public static void createHBaseTable(String tableName) throws IOException{
		HTableDescriptor tableDescriptor=new HTableDescriptor(tableName);
		HColumnDescriptor columnDescriptor=new HColumnDescriptor("content");
		tableDescriptor.addFamily(columnDescriptor);
		Configuration conf=HBaseConfiguration.create();
		conf.set("hbase.zookeeper.quorum", "sdw1,sdw2");
		HBaseAdmin admin=new HBaseAdmin(conf);
		if(admin.tableExists(tableName)){
			System.out.println("表已存在，正在尝试重新创建表！");
			admin.disableTable(tableName);
			admin.deleteTable(tableName);
		}
		System.out.println("创建新表："+tableName);
		admin.createTable(tableDescriptor);
	}
	
	public static void main(String[] args) {
		try {
			String tableName="wordcount";
			createHBaseTable(tableName);
			
			Configuration conf=new Configuration();
			conf.set(TableOutputFormat.OUTPUT_TABLE, tableName);
			conf.set("hbase.zookeeper.quorum", "sdw1,sdw2");
			String input=args[0];
			Job job=new Job(conf, "WordCount table with "+input);
			job.setJarByClass(WordCountHBase.class);
			job.setMapperClass(Map.class);
			job.setReducerClass(Reduce.class);
			job.setMapOutputKeyClass(Text.class);
			job.setMapOutputValueClass(IntWritable.class);
			job.setInputFormatClass(TextInputFormat.class);
			job.setOutputFormatClass(TableOutputFormat.class);
			FileInputFormat.addInputPath(job, new Path(input));
			System.exit(job.waitForCompletion(true)0:1);
		} catch (Exception e) {
			e.printStackTrace();
		}
		
	}
	
}


【大中小】【打印】【繁体】【投稿】【收藏】【推荐】【举报】【评论】【关闭】【返回顶部】

上一篇：hbase数据备份数	下一篇：使用pycharm访问远端hbase集群