{"rsdb":{"rid":"309083","subhead":"","postdate":"0","aid":"224245","fid":"118","uid":"1","topic":"1","content":"
\n\t\t\t\t\t\t\t\t\n \t\t\t\t\t\t\t\t \n\t\t\t\t\t\t
\n

Hbase Mapreduce \u4f8b\u5b50<\/span><\/h2>\n

http:\/\/hadoop.apache.org\/hbase\/docs\/current\/api\/org\/apache\/hadoop\/hbase\/mapreduce\/package-summary.html#package_description<\/a><\/span><\/p>\n

http:\/\/wiki.apache.org\/hadoop\/Hbase\/MapReduce<\/span><\/a> \uff08Deprecated\uff09<\/span><\/span><\/p>\n

<\/p>\n

\u9700\u8981\u91cd\u542fHadoop\u7684\u65b9\u5f0f<\/p>\n

\u6240\u6709\u673a\u5668\u90fd\u6709\u4fee\u6539\u914d\u7f6e<\/p>\n

1:\u4fee\u6539$HADOOP_HOME\/conf\/hadoop-env.sh<\/span> \uff0c\u6dfb\u52a0HBase\u7c7b\u5e93\u5f15\u7528<\/span><\/span><\/p>\n

export HBASE_HOME=\/home\/iic\/hbase-0.20.3<\/span><\/span><\/span><\/p>\n

export HADOOP_CLASSPATH=$HBASE_HOME\/hbase-0.20.3.jar:$HBASE_HOME\/hbase-0.20.3-test.jar:$HBASE_HOME\/conf:${HBASE_HOME}\/lib\/zookeeper-3.3.0.jar<\/span><\/p>\n

<\/p>\n

\u4e0d\u9700\u8981\u91cd\u542fHadoop\u7684\u65b9\u5f0f\uff08\u628a\u4f9d\u8d56\u7c7b\u5e93\u6253\u5305\u8fdbjar\/lib\u76ee\u5f55\u4e0b\uff0c\u540c\u65f6\u4ee3\u7801\u4e2d\u8c03\u7528job.setJarByClass(XXX.class);\uff09<\/span><\/p>\n

Another possibility, if for example you do not have access to hadoop-env.sh or are unable to restart the hadoop cluster, is bundling the hbase jars into a mapreduce job jar adding it and its dependencies under the job jar lib\/<\/code> directory and the hbase conf into the job jars top-level directory.<\/span><\/p>\n

<\/p>\n

\u6d4b\u8bd5\uff0c\u51fa\u73b0\u5f02\u5e38\uff1ajava<\/a>.lang.OutOfMemoryError: Java heap space<\/p>\n

bin\/hadoop org.apache.hadoop.hbase.Performanceeva luation sequentialWrite 4<\/p>\n

HBase map reduce 2<\/h2>\n

\u6b64\u4f8b\u5b50\uff0c\u628a\u8868mrtest\u4e2d\u7684\u5217contents\u7684\u503c\uff0c\u53cd\u8f6c\u540e\uff0c\u4fdd\u5b58\u5230\u5217text\u91cc\u3002<\/p>\n

bin\/hbase shell<\/p>\n

create 'mrtest', 'contents', 'text'<\/span><\/span><\/p>\n

put 'mrtest', '1', 'contents:', 'content'<\/span><\/span><\/span><\/span><\/p>\n

put 'mrtest', '1', 'text:', 'text'<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n

get 'mrtest', '1'<\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/span><\/p>\n

<\/p>\n

\u7c7bcom.test.hadoop.hbase.HBaseTest \u751f\u6210100W\u7684\u6d4b\u8bd5\u6570\u636e\u3002<\/p>\n

<\/p>\n

\/home\/iic\/hadoop-0.20.2\/bin\/hadoop jarexamples\/<\/span>examples_1.jar <\/span>examples.TestTableMapReduce<\/span><\/span><\/p>\n

<\/p>\n

HBase \u81ea\u5e26\u4f8b\u5b50<\/h2>\n

hbase-0.20.3\\src\\test<\/p>\n

\u8ba1\u7b97\u8868\u7684\u603b\u884c\u6570\uff08org.apache.hadoop.hbase.mapreduce.RowCounter\uff09<\/h2>\n

bin\/hadoop jar \/home\/iic\/hbase-0.20.3\/hbase-0.20.3.jar rowcounter scores grade<\/p>\n

\u7ed3\u679c<\/p>\n

10\/04\/12 17:08:05 INFO mapred.JobClient: ROWS=2<\/p>\n

<\/p>\n

\u5bf9HBase\u7684\u5217\u8fdb\u884cLucene\u7d22\u5f15(examples.TestTableIndex)<\/h2>\n

\u5bf9\u8868mrtest\u7684\u5217contents\u8fdb\u884c\u7d22\u5f15\uff0c\u4f7f\u7528lucene-core-2.2.0.jar\uff0c\u9700\u628a\u5b83\u52a0\u5165\u7c7b\u8def\u5f84\u3002\u628alucene-core-2.2.0.jar\u52a0\u5165\u5230examples.zip\/lib\u76ee\u5f55\u4e0b\uff0c\u540c\u65f6\u4ee3\u7801\u4e2d\u5fc5\u987b\u6307\u5b9a<\/span><\/span><\/span><\/span><\/span>job.setJarByClass(TestTableIndex.class);\u4e0d\u7136lucene\u4e0d\u8bc6\u522b<\/span><\/p>\n

<\/p>\n

bin\/hadoop fs -rmr testindex<\/p>\n

bin\/hadoop jar<\/span>examples.zip<\/span>examples.TestTableIndex<\/span><\/span><\/p>\n

<\/p>\n

<\/p>\n

\u5148\u4ece\u6587\u4ef6\u4e2d\u4ea7\u751f\u9002\u5408HBase\u7684HFiles\u6587\u4ef6\uff0c\u518d\u5012\u5165\u5230Hbase\u4e2d\uff0c\u52a0\u5feb\u5bfc\u5165\u901f\u5ea6<\/h2>\n

examples.TestHFileOutputFormat<\/p>\n

\u8f93\u5165\u7684\u6570\u636e\uff0c\u7531\u4f8b\u5b50\u81ea\u52a8\u751f\u6210,\u5176\u4e2dKey\u662f\u524d\u9762\u88650\u7684\u5341\u4f4d\u6570\u201c0000000001\u201d\u3002<\/p>\n

\u8f93\u51fa\u6570\u636e\u76ee\u5f55\uff1a\/user\/iic\/hbase-hfile-test<\/p>\n

bin\/hadoop fs -rmr hbase-hfile-test<\/p>\n

bin\/hadoop jar<\/span>examples.zip<\/span>examples.TestHFileOutputFormat<\/span><\/span><\/p>\n

\u52a0\u8f7d\u751f\u6210\u7684\u6570\u636e\u5230Hbae\u4e2d\uff08\u8981\u5148\u5b89\u88c5JRuby\uff0c\u624d\u80fd\u6267\u884c\uff09<\/span><\/span><\/p>\n

export PATH=$PATH:\/home\/iic\/jruby-1.4.0\/bin\/<\/span><\/p>\n

echo $PATH<\/span><\/p>\n

<\/p>\n

vi bin\/loadtable.rb <\/span><\/p>\n

require '\/home\/iic\/hbase-0.20.3\/hbase-0.20.3.jar'
require '\/home\/iic\/hadoop-0.20.2\/hadoop-0.20.2-core.jar'
require '\/home\/iic\/hadoop-0.20.2\/lib\/log4j-1.2.15.jar'
require '\/home\/iic\/hadoop-0.20.2\/lib\/commons-logging-1.0.4.jar'
require '\/home\/iic\/hbase-0.20.3\/lib\/zookeeper-3.3.0.jar'
require '\/home\/iic\/hbase-0.20.3\/lib\/commons-cli-2.0-SNAPSHOT.jar'<\/span><\/p>\n

$CLASSPATH <<'\/home\/iic\/hbase-0.20.3\/conf';<\/span><\/p>\n

<\/p>\n

delete table \"hbase-test\"<\/p>\n

<\/p>\n

jruby bin\/loadtable.rb<\/span> hbase-test \/user\/iic\/hbase-hfile-test<\/span><\/p>\n

<\/p>\n

\u67e5\u770b\u5176\u4f7f\u7528\u65b9\u5f0f<\/span><\/p>\n

\uff08bin\/hbase org.jruby.Main bin\/loadtable.rb<\/span><\/p>\n

Usage: loadtable.rb TABLENAME HFILEOUTPUTFORMAT_OUTPUT_DIR<\/span><\/p>\n

\u5176\u4f7f\u7528JRuby<\/span><\/p>\n

\uff09<\/span><\/p>\n

<\/p>\n

\u6ce8\u610f\uff1a\u6b64\u79cd\u65b9\u5f0f\uff0c\u5fc5\u987b\u89e3\u51b3\u51e0\u4e2a\u95ee\u9898<\/p>\n

1\uff1ayour MapReduce job ensures a total ordering among all keys \uff0cby default distributes keys among reducers using a Partitioner that hashes on the map task output key\u3002(key.hashCode() & Integer.MAX_VALUE) % numReduceTasks<\/span><\/p>\n

\u9ed8\u8ba4MR\u5728\u4f7f\u7528\u9ed8\u8ba4\u7684default hash Partitioner \u5206\u914dKey\u7ed9Reducer\u7684\u65f6\u5019\uff0c\u5982\u679cKey\u662f0~4\uff0c\u67092\u4e2aTask\uff0c\u5219<\/p>\n

reducer 0 would have get keys 0, 2 and 4 whereas reducer 1 would get keys 1 and 3 (in order). <\/p>\n

\u5219\u751f\u6210\u7684Block\u91cc\u9762\u7684Start key \u548c End Key\u6b21\u5e8f\u8bb2\u6df7\u4e71\uff0c<\/p>\n

<\/p>\n

System.out.println((new ImmutableBytesWritable(\"0\".getBytes())
.hashCode() & Integer.MAX_VALUE)<\/p>\n

<\/p>\n

\u6240\u4ee5\u9700\u8981\u5b9e\u73b0\u81ea\u5df1\u7684Hash Partitioner \uff0c\u751f\u6210the keys need to be orderd so reducer 0 gets keys 0-2 and reducer 1 gets keys 3-4 (See TotalOrderPartitioner<\/span><\/strong> up in hadoop for more on what this means). <\/p>\n

<\/p>\n

\u9a8c\u8bc1\u5bfc\u5165\u7684\u884c\u6570<\/p>\n

bin\/hadoop jar \/home\/iic\/hbase-0.20.3\/hbase-0.20.3.jar rowcounter hbase-test info<\/span><\/p>\n

<\/p>\n

HFile\u751f\u6210\u4f8b\u5b502\uff1a<\/h2>\n

\u6b64\u79cd\u4f8b\u5b50\uff0c\u53ea\u9002\u5408\u7b2c\u4e00\u6b21\u6d77\u91cf\u5bfc\u5165\u6570\u636e\uff0c\u56e0\u4e3abin\/loadtable.rb\u6bcf\u6b21\u90fd\u66ff\u6362\u6240\u6709\u7684\u6587\u4ef6\u3002<\/span><\/span><\/strong><\/span><\/p>\n

\u5bf9\u4e8e\u540e\u7eed\u7684\u6570\u636e\u64cd\u4f5c\uff0c\u53ef\u4ee5\u4f7f\u7528Map\u6587\u672c\u6587\u4ef6+Hbase Table\u76f4\u63a5\u64cd\u4f5cInsert\u7684\u529f\u80fd\u3002<\/strong><\/span><\/p>\n

\u6216\u8005\u4fdd\u8bc1\u65b0\u589e\u52a0\u7684Key\u8ddf\u539f\u6765\u6ca1\u6709\u51b2\u7a81\uff0c\u6309\u7167bin\/loadtable.rb\u7684\u903b\u8f91\uff0c\u6dfb\u52a0\u65b0\u7684Block\u3002<\/span><\/strong><\/p>\n

<\/p>\n

\u751f\u62101KW\u6570\u636e\u7684test_1kw.log\uff1a0,content0,1271222976817<\/p>\n

\/home\/bmb\/jdk1.6.0_16\/bin\/java<\/a> -cp examples.zip examples.CreateLogFile test_1kw.log 10000000<\/span><\/span><\/p>\n

bin\/hadoop fs -put test_1kw.log hadoop-performance-test<\/span><\/span><\/span><\/p>\n

\u53ea\u75281\u4e2aReduce Task\uff0c\u907f\u514dTotal Order Key\u7684\u95ee\u9898<\/span><\/span><\/span><\/p>\n

bin\/hadoop jarexamples.zip examples.TestCreateHFileMR hadoop-performance-test hadoop-hbase-hfile-test 1<\/span><\/span><\/p>\n

\u751f\u6210Hbase Hfile\u6587\u4ef6\u624d\u82b1\u4e86\u4e00\u70b9\u65f6\u95f4\uff0c\u6bd4\u6027\u80fd\u6d4b\u8bd5\u751f\u62101KW\u7684HBase\u6570\u636e\u5feb\u4e86N\u591a\u3002<\/span><\/span><\/p>\n

10\/04\/15 14:22:59--10\/04\/15 14:25:22<\/span><\/span><\/p>\n

\u5bfc\u5165Hbase<\/span><\/span><\/p>\n

jruby bin\/loadtable.rb<\/span> hbase-test2 hadoop-hbase-hfile-test<\/span><\/span><\/span><\/p>\n

<\/p>\n

\u9a8c\u8bc1\u5bfc\u5165\u7684\u884c\u6570<\/p>\n

bin\/hadoop jar \/home\/iic\/hbase-0.20.3\/hbase-0.20.3.jar rowcounter hbase-test2 info<\/span><\/p>\n

<\/p>\n

\u5176\u4ed6HBase MapReduce\u4f8b\u5b50
http:\/\/www.hadoop.org.cn\/mapreduce\/hbase-mapreduce\/<\/a><\/p>\n

http:\/\/www.spicylogic.com\/allenday\/blog\/2008\/08\/28\/hbase-bulk-load-import-example\/<\/span><\/a><\/span><\/p> <\/div>\n <\/div>","orderid":"0","title":"Hbase   MapReduce\u4f8b\u5b50","smalltitle":"","mid":"0","fname":"HBase","special_id":"0","bak_id":"0","info":"0","hits":"435","pages":"1","comments":"0","posttime":"2019-05-15 13:33:41","list":"1557898421","username":"admin","author":"","copyfrom":"","copyfromurl":"","titlecolor":"","fonttype":"0","titleicon":"0","picurl":"https:\/\/www.cppentry.com\/upload_files\/","ispic":"0","yz":"1","yzer":"","yztime":"0","levels":"0","levelstime":"0","keywords":"Hbase<\/A>  <\/A> MapReduce<\/A> \u4f8b\u5b50<\/A>","jumpurl":"","iframeurl":"","style":"","template":"a:3:{s:4:\"foot\";s:0:\"\";s:8:\"bencandy\";s:0:\"\";s:4:\"head\";s:0:\"\";}","target":"0","ip":"47.106.78.186","lastfid":"0","money":"0","buyuser":"","passwd":"","allowdown":"","allowview":"","editer":"","edittime":"0","begintime":"0","endtime":"0","description":" Hbase Mapreduce \u4f8b\u5b50http:\/\/hadoop.apache.org\/hbase\/docs\/current\/api\/org\/apache\/hadoop\/hbase\/mapreduce\/package-summary.html#package_descriptionhttp:\/\/wiki.apache.org\/hadoop\/Hbase\/MapReduce \uff08Deprecated\uff09\u9700\u8981\u91cd\u542fH..","lastview":"1714030877","digg_num":"0","digg_time":"0","forbidcomment":"0","ifvote":"0","heart":"","htmlname":"","city_id":"0"},"page":"1"}