{"rsdb":{"rid":"309114","subhead":"","postdate":"0","aid":"224276","fid":"114","uid":"1","topic":"1","content":"
\n
\n \n \u7248\u6743\u58f0\u660e\uff1a\u4e2a\u4eba\u539f\u521b\uff0c\u8f6c\u8f7d\u8bf7\u6807\u6ce8\uff01 https:\/\/blog.csdn.net\/Z_Date\/article\/details\/83863790 <\/div>\n \n \n
\n

\u9700\u6c42\uff1a<\/strong><\/p>\n\n

\u6587\u7ae0\u53ca\u5176\u5185\u5bb9<\/strong>\uff1a index.html : hadoop is good hadoop hadoop is ok page.html : hadoop has hbase hbase is good hbase and hive content.html : hadoop spark hbase are good ok<\/p>\n\n

\u8f93\u51fa\uff1a<\/strong> and page.html:1 are content.html:1 hadoop index.html:3;page.html:1;content.html:1 hbase page.html:3;content.html:1<\/p>\n\n

DescSortCombiner<\/strong><\/p>\n\n

\nimport java<\/a>.io.IOException;\nimport org.apache.hadoop.io.Text;\nimport org.apache.hadoop.mapreduce.Reducer;\n\npublic class DescSortCombiner extends Reducer<Text, Text, Text, Text>{\n\n\t\/**\n\t * index.html_hadoop list(1,1,1)\n\t * index.html_is list(1,1)\n\t * index.html_good list(1)\n\t * index.html_ok list(1)\n\t * page.html_hadoop list(1)\n\t * \n\t * \n\t * hadoop index.html:3\n\t * hadoop page.html:1\n\t * \n\t * \n\t *\/\n\t@Override\n\tprotected void reduce(Text key, Iterable<Text> value,Context context) throws IOException,\n\t\t\tInterruptedException {\n\t\t int counter = 0;\n\t\t Text k = new Text();\n\t\t Text v = new Text();\n\t\tString s [] = key.toString().split(\"_\");\n\t\tfor (Text t : value) {\n\t\t\tcounter += Integer.parseInt(t.toString());\n\t\t}\n\t\tk.set(s[1]);\n\t\tv.set(s[0]+\":\"+counter);\n\t\tcontext.write(k, v);\n\t}\n\t\n}<\/code><\/pre>\n\n

DescSort <\/strong><\/p>\n\n

\nimport java<\/a>.io.IOException;\nimport java.util.ArrayList;\nimport java.util.List;\nimport org.apache.hadoop.conf.Configuration;\nimport org.apache.hadoop.conf.Configured;\nimport org.apache.hadoop.fs.FileSystem;\nimport org.apache.hadoop.fs.Path;\nimport org.apache.hadoop.io.LongWritable;\nimport org.apache.hadoop.io.Text;\nimport org.apache.hadoop.mapreduce.InputSplit;\nimport org.apache.hadoop.mapreduce.Job;\nimport org.apache.hadoop.mapreduce.Mapper;\nimport org.apache.hadoop.mapreduce.Reducer;\nimport org.apache.hadoop.mapreduce.lib.input.FileInputFormat;\nimport org.apache.hadoop.mapreduce.lib.input.FileSplit;\nimport org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;\nimport org.apache.hadoop.util.GenericOptionsParser;\nimport org.apache.hadoop.util.Tool;\nimport org.apache.hadoop.util.ToolRunner;\n\n\/**\n * \n * @author lyd\n *\n *\u5012\u6392\u7d22\u5f15\uff1a\n *\n *\/\npublic class DescSort  extends Configured implements Tool{\n\t\/**\n\t * \u81ea\u5b9a\u4e49\u7684myMapper\n\t * @author lyd\n\t *\n\t *\/\n\tstatic class MyMapper extends Mapper<LongWritable, Text, Text, Text>{\n\n\t\t@Override\n\t\tprotected void setup(Context context)throws IOException, InterruptedException {\n\t\t}\n\n\t\t@Override\n\t\tprotected void map(LongWritable key, Text value,Context context)\n\t\t\t\tthrows IOException, InterruptedException {\n\t\t\tString line = value.toString();\n\t\t\t\/\/\u83b7\u53d6\u6587\u4ef6\u540d\n\t\t\tInputSplit is = context.getInputSplit();\n\t\t\tString fileName = ((FileSplit)is).getPath().getName();\n\t\t\tString lines [] = line.split(\" \");\n\t\t\tfor (String s: lines) {\n\t\t\t\tcontext.write(new Text(fileName+\"_\"+s), new Text(1+\"\"));\n\t\t\t}\n\t\t\t\/**\n\t\t\t * index.html_hadoop 1\n\t\t\t * index.html_is 1\n\t\t\t * index.html_good 1\n\t\t\t * index.html_hadoop 1\n\t\t\t * index.html_hadoop 1\n\t\t\t * index.html_is 1\n\t\t\t * index.html_ok 1\n\t\t\t * page.html_hadoop 1\n\t\t\t *\/\n\t\t}\n\n\t\t@Override\n\t\tprotected void cleanup(Context context)throws IOException, InterruptedException {\n\t\t}\n\t\t\n\t}\n\t\n\t\/**\n\t * \u81ea\u5b9a\u4e49MyReducer\n\t * @author lyd\n\t *\n\t *\/\n\tstatic class MyReducer extends Reducer<Text, Text, Text, Text>{\n\n\t\t@Override\n\t\tprotected void setup(Context context)throws IOException, InterruptedException {\n\t\t}\n\t\t\n\t\tList<String> li = new ArrayList<String>();\n\t\t@Override\n\t\tprotected void reduce(Text key, Iterable<Text> value,Context context)\n\t\t\t\tthrows IOException, InterruptedException {\n\t\t\n\t\t\t\/**\n\t\t\t * index.html_hadoop list(1,1,1)\n\t\t\t * index.html_is list(1,1)\n\t\t\t * index.html_good list(1)\n\t\t\t * index.html_ok list(1)\n\t\t\t * page.html_hadoop list(1)\n\t\t\t * \n\t\t\t * \n\t\t\t *hadoop list(index.html:3,page.html:1)\n\t\t\t *\/\n\t\t\t\n\t\t\t\/*\n\t\t\t int counter = 0;\n\t\t\t for (Text t : value) {\n\t\t\t\tcounter += Integer.parseInt(t.toString());\n\t\t\t}\n\t\t\tString s [] = key.toString().split(\"_\");\n\t\t\tli.add(s[1]+\" \"+s[0]+\":\"+counter);*\/\n\t\t\tString v = \"\";\n\t\t\tfor (Text t : value) {\n\t\t\t\tv += t.toString() +\";\";\n\t\t\t}\n\t\t\tcontext.write(key, new Text(v.substring(0, v.length()-1)));\n\t\t}\n\t\t\n\t\t@Override\n\t\tprotected void cleanup(Context context)throws IOException, InterruptedException {\n\t\t\t\/*for (String s : li) {\n\t\t\t\tString ss [] = s.split(\" \");\n\t\t\t}*\/\n\t\t}\n\t}\n\t\n\t@Override\n\tpublic int run(String[] args) throws Exception {\n\t\t\/\/1\u3001\u83b7\u53d6conf\u5bf9\u8c61\n\t\tConfiguration conf = super.getConf();\n\t\t\/\/2\u3001\u521b\u5efajob\n\t\tJob job = Job.getInstance(conf, \"model03\");\n\t\t\/\/3\u3001\u8bbe\u7f6e\u8fd0\u884cjob\u7684class\n\t\tjob.setJarByClass(DescSort.class);\n\t\t\/\/4\u3001\u8bbe\u7f6emap\u76f8\u5173\u5c5e\u6027\n\t\tjob.setMapperClass(MyMapper.class);\n\t\tjob.setMapOutputKeyClass(Text.class);\n\t\tjob.setMapOutputValueClass(Text.class);\n\t\tFileInputFormat.addInputPath(job, new Path(args[0]));\n\t\t\n\t\t\/\/\u8bbe\u7f6ecommbiner\n\t\tjob.setCombinerClass(DescSortCombiner.class);\n\t\t\n\t\t\/\/5\u3001\u8bbe\u7f6ereduce\u76f8\u5173\u5c5e\u6027\n\t\tjob.setReducerClass(MyReducer.class);\n\t\tjob.setOutputKeyClass(Text.class);\n\t\tjob.setOutputValueClass(Text.class);\n\t\t\/\/\u5224\u65ad\u8f93\u51fa\u76ee\u5f55\u662f\u5426\u5b58\u5728\uff0c\u82e5\u5b58\u5728\u5219\u5220\u9664\n\t\tFileSystem fs = FileSystem.get(conf);\n\t\tif(fs.exists(new Path(args[1]))){\n\t\t\tfs.delete(new Path(args[1]), true);\n\t\t}\n\t\tFileOutputFormat.setOutputPath(job, new Path(args[1]));\n\t\t\n\t\t\/\/6\u3001\u63d0\u4ea4\u8fd0\u884cjob\n\t\tint isok = job.waitForCompletion(true)  0 : 1;\n\t\treturn isok;\n\t}\n\t\n\t\/**\n\t * job\u7684\u4e3b\u5165\u53e3\n\t * @param args\n\t *\/\n\tpublic static void main(String[] args) {\n\t\ttry {\n\t\t\t\/\/\u5bf9\u8f93\u5165\u53c2\u6570\u4f5c\u89e3\u6790\n\t\t\tString [] argss = new GenericOptionsParser(new Configuration(), args).getRemainingArgs();\n\t\t\tSystem.exit(ToolRunner.run(new DescSort(), argss));\n\t\t} catch (Exception e) {\n\t\t\te.printStackTrace();\n\t\t}\n\t}\n}\n<\/code><\/pre>\n\n

<\/p> <\/div>\n <\/div>","orderid":"0","title":"\u7ed3\u5408\u6848\u4f8b\u8bb2\u89e3MapReduce\u91cd\u8981\u77e5\u8bc6\u70b9 -----------   \u5012\u6392\u5e8f","smalltitle":"","mid":"0","fname":"Hadoop","special_id":"0","bak_id":"0","info":"0","hits":"537","pages":"1","comments":"0","posttime":"2019-05-16 00:26:42","list":"1557937602","username":"admin","author":"","copyfrom":"","copyfromurl":"","titlecolor":"","fonttype":"0","titleicon":"0","picurl":"https:\/\/www.cppentry.com\/upload_files\/","ispic":"0","yz":"1","yzer":"","yztime":"0","levels":"0","levelstime":"0","keywords":"\u7ed3\u5408<\/A> \u6848\u4f8b<\/A> \u8bb2\u89e3<\/A> MapReduce<\/A> \u91cd\u8981<\/A> \u77e5\u8bc6\u70b9<\/A> -----------<\/A>  <\/A> \u6392\u5e8f<\/A>","jumpurl":"","iframeurl":"","style":"","template":"a:3:{s:4:\"foot\";s:0:\"\";s:8:\"bencandy\";s:0:\"\";s:4:\"head\";s:0:\"\";}","target":"0","ip":"47.106.78.186","lastfid":"0","money":"0","buyuser":"","passwd":"","allowdown":"","allowview":"","editer":"","edittime":"0","begintime":"0","endtime":"0","description":" \u7248\u6743\u58f0\u660e\uff1a\u4e2a\u4eba\u539f\u521b\uff0c\u8f6c\u8f7d\u8bf7\u6807\u6ce8\uff01 https:\/\/blog.csdn.net\/Z_Date\/article\/details\/83863790 \u9700\u6c42\uff1a\u6587\u7ae0\u53ca\u5176\u5185\u5bb9\uff1a index.html : hadoop is go..","lastview":"1719085352","digg_num":"0","digg_time":"0","forbidcomment":"0","ifvote":"0","heart":"","htmlname":"","city_id":"0"},"page":"1"}