\u4e00\u3001\u5ef6\u8fdf\u8ba1\u7b97<\/h3> \n
RDD \u4ee3\u8868\u7684\u662f\u5206\u5e03\u5f0f\u6570\u636e\u5f62\u6001\uff0c\u56e0\u6b64\uff0cRDD \u5230 RDD \u4e4b\u95f4\u7684\u8f6c\u6362\uff0c\u672c\u8d28\u4e0a\u662f\u6570\u636e\u5f62\u6001\u4e0a\u7684\u8f6c\u6362\uff08Transformations\uff09<\/p> \n
\u5728 RDD \u7684\u7f16\u7a0b\u6a21\u578b\u4e2d\uff0c\u4e00\u5171\u6709\u4e24\u79cd\u7b97\u5b50\uff0cTransformations \u7c7b\u7b97\u5b50\u548c Actions \u7c7b\u7b97\u5b50\u3002\u5f00\u53d1\u8005\u9700\u8981\u4f7f\u7528 Transformations \u7c7b\u7b97\u5b50\uff0c\u5b9a\u4e49\u5e76\u63cf\u8ff0\u6570\u636e\u5f62\u6001\u7684\u8f6c\u6362\u8fc7\u7a0b\uff0c\u7136\u540e\u8c03\u7528 Actions \u7c7b\u7b97\u5b50\uff0c\u5c06\u8ba1\u7b97\u7ed3\u679c\u6536\u96c6\u8d77\u6765\u3001\u6216\u662f\u7269\u5316\u5230\u78c1\u76d8\u3002<\/p> \n
\u5728\u8fd9\u6837\u7684\u7f16\u7a0b\u6a21\u578b\u4e0b\uff0cSpark \u5728\u8fd0\u884c\u65f6\u7684\u8ba1\u7b97\u88ab\u5212\u5206\u4e3a\u4e24\u4e2a\u73af\u8282\u3002<\/p> \n
- \n
- \u57fa\u4e8e\u4e0d\u540c\u6570\u636e\u5f62\u6001\u4e4b\u95f4\u7684\u8f6c\u6362\uff0c\u6784\u5efa\u8ba1\u7b97\u6d41\u56fe\uff08DAG\uff0cDirected Acyclic Graph\uff09<\/li> \n
- \u901a\u8fc7 Actions \u7c7b\u7b97\u5b50\uff0c\u4ee5\u56de\u6eaf\u7684\u65b9\u5f0f\u53bb\u89e6\u53d1\u6267\u884c\u8fd9\u4e2a\u8ba1\u7b97\u6d41\u56fe<\/li> \n <\/ol> \n
\u6362\u53e5\u8bdd\u8bf4\uff0c\u5f00\u53d1\u8005\u8c03\u7528\u7684\u5404\u7c7b Transformations \u7b97\u5b50\uff0c\u5e76\u4e0d\u7acb\u5373\u6267\u884c\u8ba1\u7b97\uff0c\u5f53\u4e14\u4ec5\u5f53\u5f00\u53d1\u8005\u8c03\u7528 Actions \u7b97\u5b50\u65f6\uff0c\u4e4b\u524d\u8c03\u7528\u7684\u8f6c\u6362\u7b97\u5b50\u624d\u4f1a\u4ed8\u8bf8\u6267\u884c\u3002\u5728\u4e1a\u5185\uff0c\u8fd9\u6837\u7684\u8ba1\u7b97\u6a21\u5f0f\u6709\u4e2a\u4e13\u95e8\u7684\u672f\u8bed\uff0c\u53eb\u4f5c\u201c\u5ef6\u8fdf\u8ba1\u7b97\u201d\uff08Lazy eva luation\uff09\u3002<\/p> \n
\u4e8c\u3001Spark\u7b97\u5b50\u5206\u7c7b<\/h3> \n
\u5728 RDD \u7684\u5f00\u53d1\u6846\u67b6\u4e0b\uff0c\u54ea\u4e9b\u7b97\u5b50\u5c5e\u4e8e Transformations \u7b97\u5b50\uff0c\u54ea\u4e9b\u7b97\u5b50\u662f Actions \u7b97\u5b50\u5462\uff1f<\/p> \n
\u8fd9\u91cc\u7ed9\u51fa\u4e00\u5f20\u81ea\u5df1\u5728\u6781\u5ba2\u770b\u7684\u8bfe\u7a0b\u4e2d\u7684\u56fe<\/p> \n
<\/p> \n
\u4e09\u3001Transform\u7b97\u5b50\u6267\u884c\u6d41\u7a0b(\u6e90\u7801)<\/h3> \n
Map\u8f6c\u6362\u7b97\u662f RDD \u7684\u7ecf\u5178\u8f6c\u6362\u64cd\u4f5c\u4e4b\u4e00\u4e86.\u5c31\u4ee5\u5b83\u5f00\u5934.Map\u7684\u6e90\u7801\u5982\u4e0b\uff1a<\/p> \n
<\/p> \n
1. sc.clean(f)<\/h4> \n
\u9996\u5148\u6389\u4e86\u4e00\u4e2asc.clean(f) , \u6211\u4eec\u8fdb\u5230clean\u51fd\u6570\u91cc\u770b\u4e0b:<\/p> \n
<\/p> \n
\u6ce8\u91ca\u4e2d\u660e\u786e\u63d0\u5230\u4e86\u8fd9\u4e2a\u51fd\u6570\u7684\u529f\u80fd\uff1aclean \u6574\u7406\u4e00\u4e2a\u95ed\u5305\uff0c\u4f7f\u5176\u53ef\u4ee5\u5e8f\u5217\u5316\u5e76\u53d1\u9001\u5230\u4efb\u52a1.<\/p> \n
\u8fd9\u91cc\u7684\u4ee3\u7801\u6709\u4e9b\u591a\uff0c\u5927\u6982\u77e5\u9053\u8fd9\u4e2a\u51fd\u6570\u7684\u529f\u80fd\u662f\u8fd9\u6837\u5c31ok\u4e86\uff0c\u95ed\u5305\u7684\u95ee\u9898\u4f1a\u5728\u53e6\u4e00\u7bc7\u6587\u7ae0\u91cc\u4ed4\u7ec6\u4ecb\u7ecd<\/p> \n
2. MapPartitionsRDD<\/h4> \n
\u8fdb\u5165\u5230\u51fd\u6570\u540e\u6e90\u7801\u5982\u4e0b\uff1a<\/p> \n
<\/p> \n
\u8fd9\u662f\u4e00\u4e2aMapPartitionsRDD\u3002\u6211\u4eec\u4ed4\u7ec6\u770b\u5b83\u7684\u6784\u6210\uff0c\u4ece\u800c\u6765\u7406\u89e3\u5b83\u662f\u5982\u4f55\u63cf\u8ff0MapPartitionsRDD\u7684.<\/p> \n
2.1 var prev<\/font>\uff1aRDD[T]<\/h5> \n
\u8fd9\u91cc\u7684 prev<\/font> \u5c31\u662f\u7236RDD\uff0cf <\/font> \u5219\u662fMap\u4e2d\u4f20\u5165\u7684\u5904\u7406\u51fd\u6570\uff0c\u9664\u4e86\u8fd9\u4e24\u4e2a\u5c31\u6ca1\u6709\u4e86\uff0c\u4e5f\u5c31\u662f\u8bf4\u660e RDD\u4e2d\u6ca1\u6709\u5b58\u50a8\u5177\u4f53\u7684\u6570\u636e\u672c\u8eab<\/p> \n
\u8fd9\u518d\u6b21\u5370\u8bc1\u4e86\u8f6c\u6362\u4e0d\u4f1a\u4ea7\u751f\u4efb\u4f55\u6570\u636e.\u5b83\u53ea\u662f\u5355\u7eaf\u4e86\u8bb0\u5f55\u7236RDD\u4ee5\u53ca\u5982\u4f55\u8f6c\u6362\u7684\u8fc7\u7a0b\u5c31\u5b8c\u4e86,\u4e0d\u4f1a\u5728\u8f6c\u6362\u9636\u6bb5\u4ea7\u751f\u4efb\u4f55\u6570\u636e\u96c6<\/strong><\/font><\/p> \n
2.2 preservesPartitioning<\/h5> \n
preservesPartitioning \u8868\u793a\u662f\u5426\u4fdd\u6301\u7236RDD\u7684\u5206\u533a\u4fe1\u606f. <\/font>
\u5982\u679c\u4e3afalse(\u9ed8\u8ba4\u4e3afalse),\u5219\u4f1a\u5bf9\u7ed3\u679c\u91cd\u65b0\u5206\u533a.\u4e5f\u5c31\u662fMap\u7cfb\u9ed8\u8ba4\u90fd\u4f1a\u5206\u533a
\u5982\u679c\u4e3atrue,\u4fdd\u7559\u5206\u533a. \u5219\u6309\u7167 firstParent \u4fdd\u7559\u5206\u533a\u3000\u3000\u3000<\/p> \n<\/p> \n
\u53ef\u4ee5\u770b\u5230\u6839\u636e dependencies \u627e\u5230\u5176\u7b2c\u4e00\u4e2a\u7236 RDD<\/p> \n
<\/p> \n
2.3 compute \u8ba1\u7b97\u903b\u8f91<\/h5> \n
2.3.1 compute\u65b9\u6cd5<\/h6> \n
RDD<\/code> \u62bd\u8c61\u7c7b\u8981\u6c42\u5176\u6240\u6709\u5b50\u7c7b\u90fd\u5fc5\u987b\u5b9e\u73b0
compute<\/code> \u65b9\u6cd5\uff0c\u8be5\u65b9\u6cd5\u63a5\u53d7\u7684\u53c2\u6570\u4e4b\u4e00\u662f\u4e00\u4e2a
Partition<\/code> \u5bf9\u8c61\uff0c\u76ee\u7684\u662f\u8ba1\u7b97\u8be5\u5206\u533a\u4e2d\u7684\u6570\u636e\u3002<\/p> \n
override def compute(split: Partition, context: TaskContext): Iterator[U] =\n f(context, split.index, firstParent[T].iterator(split, context))\n<\/code><\/pre> \n
\u53ef\u4ee5\u770b\u5230\uff0ccompute \u65b9\u6cd5\u8c03\u7528\u5f53\u524d RDD \u5185\u7684\u7b2c\u4e00\u4e2a\u7236 RDD \u7684 iterator<\/font> \u65b9\u6cd5\uff0c\u8be5\u65b9\u7684\u76ee\u7684\u662f\u62c9\u53d6\u7236
RDD<\/code> \u5bf9\u5e94\u5206\u533a\u5185\u7684\u6570\u636e\u3002<\/p> \n
iterator<\/code> \u65b9\u6cd5\u4f1a\u8fd4\u56de\u4e00\u4e2a\u8fed\u4ee3\u5668\u5bf9\u8c61\uff0c\u8fed\u4ee3\u5668\u5185\u90e8\u5b58\u50a8\u7684\u6bcf\u4e2a\u5143\u7d20\u5373\u7236 RDD \u5bf9\u5e94\u5206\u533a\u5185\u5df2\u7ecf\u8ba1\u7b97\u5b8c\u6bd5\u7684\u6570\u636e\u8bb0\u5f55<\/strong><\/font>\u3002\u5f97\u5230\u7684\u8fed\u4ee3\u5668\u4f5c\u4e3a
f<\/code> \u65b9\u6cd5\u7684\u4e00\u4e2a\u53c2\u6570\u3002
f<\/code> \u5728
RDD<\/code> \u7c7b\u7684
map<\/code> \u65b9\u6cd5\u4e2d\u6307\u5b9a\uff0c\u5373\u5b9e\u9645\u7684\u8f6c\u6362\u51fd\u6570\u3002<\/p> \n
compute<\/code> \u65b9\u6cd5\u4f1a\u5c06\u8fed\u4ee3\u5668\u4e2d\u7684\u8bb0\u5f55\u4e00\u4e00\u8f93\u5165
f<\/code> \u65b9\u6cd5\uff0c\u5f97\u5230\u7684\u65b0\u8fed\u4ee3\u5668\u5373\u4e3a\u6240\u6c42\u5206\u533a\u4e2d\u7684\u6570\u636e\u3002<\/p> \n
\u5176\u4ed6
RDD<\/code> \u5b50\u7c7b\u7684
compute<\/code> \u65b9\u6cd5\u4e0e\u4e4b\u7c7b\u4f3c\uff0c\u5728\u9700\u8981\u7528\u5230\u7236 RDD \u7684\u5206\u533a\u6570\u636e\u65f6\u5019\uff0c\u5c31\u4f1a\u8c03\u7528
iterator<\/code> \u65b9\u6cd5\uff0c\u7136\u540e\u6839\u636e\u9700\u6c42\u5728\u5f97\u5230\u7684\u6570\u636e\u4e4b\u4e0a\u6267\u884c\u7c97\u7c92\u5ea6\u7684\u64cd\u4f5c\u3002\u6362\u53e5\u8bdd\u8bf4\uff0c
compute<\/code> \u51fd\u6570\u8d1f\u8d23\u7684\u662f\u7236
RDD<\/code> \u5206\u533a\u6570\u636e\u5230\u5b50
RDD<\/code> \u5206\u533a\u6570\u636e\u7684\u53d8\u6362\u903b\u8f91\u3002<\/strong><\/p> \n
2.3.2 iterator\u65b9\u6cd5<\/h6> \n
\u6b64\u65b9\u6cd5\u7684\u5b9e\u73b0\u5728 RDD \u8fd9\u4e2a\u62bd\u8c61\u7c7b\u4e2d<\/p> \n
\/**\n * Internal method to this RDD; will read from cache if applicable, or otherwise compute it.\n * This should ''not'' be called by users directly, but is available for implementers of custom\n * subclasses of RDD.\n *\/\nfinal def iterator(split: Partition, context: TaskContext): Iterator[T] = {\n if (storageLevel != StorageLevel.NONE) {\n getOrCompute(split, context)\n } else {\n computeOrReadCheckpoint(split, context)\n }\n}\n<\/code><\/pre> \n
interator\u9996\u5148\u68c0\u67e5 \u5b58\u50a8\u7ea7\u522b storageLevel<\/font>\uff1a\u6b64\u5904\u53ef\u53c2\u8003RDD\u6301\u4e45\u5316<\/a><\/p> \n
\u5982\u679c\u5b58\u50a8\u7ea7\u522b\u4e0d\u662fNONE, \u8bf4\u660e\u5206\u533a\u7684\u6570\u636e\u8bf4\u660e\u5206\u533a\u7684\u6570\u636e\u8981\u4e48\u5df2\u7ecf\u5b58\u50a8\u5728\u6587\u4ef6\u7cfb\u7edf\u5f53\u4e2d\uff0c\u8981\u4e48\u5f53\u524d RDD \u66fe\u7ecf\u6267\u884c\u8fc7
cache<\/code>\u3001
persise<\/code> \u7b49\u6301\u4e45\u5316\u64cd\u4f5c\uff0c\u6b64\u65f6\u9700\u8981\u4ece\u5b58\u50a8\u7a7a\u95f4\u8bfb\u53d6\u5206\u533a\u6570\u636e\uff0c\u8c03\u7528 getOrCompute<\/font> \u65b9\u6cd5<\/p> \n
<\/p> \n
getOrCompute \u65b9\u6cd5\u4f1a\u6839\u636e RDD \u7f16\u53f7\uff1aid<\/font> \u4e0e \u5206\u533a\u7f16\u53f7\uff1apartition.index<\/font> \u8ba1\u7b97\u5f97\u5230\u5f53\u524d\u5206\u533a\u5728\u5b58\u50a8\u5c42\u5bf9\u5e94\u7684\u5757\u7f16\u53f7\uff1ablockId<\/font>\uff0c\u901a\u8fc7\u5b58\u50a8\u5c42\u63d0\u4f9b\u7684\u6570\u636e\u8bfb\u53d6\u63a5\u53e3\u63d0\u53d6\u51fa\u5757\u7684\u6570\u636e\u3002<\/p> \n
\u4ee3\u7801\u4e2d\u7684\u8fd9\u51e0\u53e5\u6ce8\u91ca\u7ed9\u7684\u975e\u5e38\u5230\u4f4d\uff0c\u5927\u81f4\u7684\u5224\u65ad\u987a\u5e8f\u5982\u4e0b\uff1a<\/p> \n
- \n
- \u5757\u547d\u4e2d\u7684\u60c5\u51b5\uff1a\u4e5f\u5c31\u662f\u6570\u636e\u4e4b\u524d\u5df2\u7ecf\u6210\u529f\u5b58\u50a8\u5230\u4ecb\u8d28\u4e2d\uff0c\u8fd9\u5176\u4e2d\u53ef\u80fd\u662f\u6570\u636e\u672c\u8eab\u5c31\u5728\u5b58\u50a8\u4ecb\u8d28\u4e2d\uff08\u6bd4\u5982\u901a\u8fc7\u8bfb\u53d6HDFS\u521b\u5efa\u7684RDD\uff09\uff0c\u4e5f\u53ef\u80fd\u662f RDD \u5728\u7ecf\u8fc7\u6301\u4e45\u5316\u64cd\u4f5c\u5e76\u4e14\u7ecf\u5386\u4e86\u4e00\u6b21\u8ba1\u7b97\u8fc7\u7a0b\uff0c\u8fd9\u4e2a\u65f6\u5019\u6211\u4eec\u5c31\u80fd\u6210\u529f\u8bfb\u53d6\u6570\u636e\u5e76\u5c06\u5176\u8fd4\u56de<\/li> \n
- \u5757\u672a\u547d\u4e2d\u7684\u60c5\u51b5\uff1a\u53ef\u80fd\u662f\u6570\u636e\u5df2\u7ecf\u4e22\u5931\uff0c\u6216\u8005 RDD \u7ecf\u8fc7\u6301\u4e45\u5316\u64cd\u4f5c\uff0c\u4f46\u662f\u662f\u5f53\u524d\u5206\u533a\u6570\u636e\u662f\u7b2c\u4e00\u6b21\u88ab\u8ba1\u7b97\uff0c\u56e0\u6b64\u4f1a\u51fa\u73b0\u62c9\u53d6\u5f97\u5230\u6570\u636e\u4e3a
None<\/code> \u7684\u60c5\u51b5\u3002\u8fd9\u5c31\u610f\u5473\u7740\u6211\u4eec\u9700\u8981\u8ba1\u7b97\u5206\u533a\u6570\u636e\uff0c\u7ee7\u7eed\u8c03\u7528
RDD<\/code> \u7c7b
computeOrReadCheckpoint<\/code> \u65b9\u6cd5\u6765\u8ba1\u7b97\u6570\u636e\uff0c\u5e76\u5c06\u8ba1\u7b97\u5f97\u5230\u7684\u6570\u636e\u7f13\u5b58\u5230\u5b58\u50a8\u4ecb\u8d28\u4e2d\uff0c\u4e0b\u6b21\u5c31\u65e0\u9700\u518d\u91cd\u590d\u8ba1\u7b97\u3002<\/li> \n <\/ul> \n
\u5982\u679c\u5f53\u524dRDD\u7684\u5b58\u50a8\u7ea7\u522b\u4e3a
None<\/code>\uff0c\u8bf4\u660e\u4e3a\u672a\u7ecf\u6301\u4e45\u5316\u7684
RDD<\/code>\uff0c\u9700\u8981\u91cd\u65b0\u8ba1\u7b97 RDD \u5185\u7684\u6570\u636e\uff0c\u8fd9\u65f6\u5019\u8c03\u7528
RDD<\/code> \u7c7b\u7684
computeOrReadCheckpoint<\/code> \u65b9\u6cd5\uff0c\u8be5\u65b9\u6cd5\u4e5f\u5728\u6301\u4e45\u5316 RDD \u7684\u5206\u533a\u83b7\u53d6\u6570\u636e\u5931\u8d25\u65f6\u88ab\u8c03\u7528\u3002<\/p> \n
<\/p> \n
computeOrReadCheckpoint<\/font> \u65b9\u6cd5\u4f1a\u68c0\u67e5\u5f53\u524d RDD \u662f\u5426\u5df2\u7ecf\u88ab\u6807\u8bb0\u6210\u68c0\u67e5\u70b9\uff0c\u5982\u679c\u672a\u88ab\u6807\u8bb0\u6210\u68c0\u67e5\u70b9\uff0c\u5219\u6267\u884c\u81ea\u8eab\u7684
compute<\/code> \u65b9\u6cd5\u6765\u8ba1\u7b97\u5206\u533a\u6570\u636e\uff0c\u5426\u5219\u5c31\u76f4\u63a5\u62c9\u53d6\u7236 RDD \u5206\u533a\u5185\u7684\u6570\u636e\u3002<\/p> \n
\u9700\u8981\u6ce8\u610f\u7684\u662f\uff0c\u5bf9\u4e8e\u6807\u8bb0\u6210\u68c0\u67e5\u70b9\u7684\u60c5\u51b5\uff0c\u5f53\u524d RDD \u7684\u7236 RDD \u4e0d\u518d\u662f\u539f\u5148\u8f6c\u6362\u64cd\u4f5c\u4e2d\u63d0\u4f9b\u6570\u636e\u7684\u7236 RDD\uff0c\u800c\u662f\u88ab Apache Spark \u66ff\u6362\u6210\u4e00\u4e2a CheckpointRDD<\/font> \u5bf9\u8c61\uff0c\u8be5\u5bf9\u8c61\u4e2d\u7684\u6570\u636e\u5b58\u653e\u5728\u6587\u4ef6\u7cfb\u7edf\u4e2d\uff0c\u56e0\u6b64\u6700\u7ec8\u8be5\u5bf9\u8c61\u4f1a\u4ece\u6587\u4ef6\u7cfb\u7edf\u4e2d\u8bfb\u53d6\u6570\u636e\u5e76\u8fd4\u56de\u7ed9
computeOrReadCheckpoint<\/code> \u65b9\u6cd5<\/p> \n
\u53c2\u8003\u6587\u7ae0\uff1a<\/p> \n
Cache \u548c Checkpoint<\/a><\/p> \n<\/div>","orderid":"0","title":"Spark\u6e90\u7801\u89e3\u6790\uff08\u4e00\uff09\uff1aRDD\u4e4bTransfrom\u7b97\u5b50","smalltitle":"","mid":"0","fname":"Scala","special_id":"0","bak_id":"0","info":"0","hits":"319","pages":"1","comments":"0","posttime":"2023-07-23 13:24:52","list":"1690089892","username":"admin","author":"","copyfrom":"","copyfromurl":"","titlecolor":"","fonttype":"0","titleicon":"0","picurl":"https:\/\/www.cppentry.com\/upload_files\/","ispic":"0","yz":"1","yzer":"","yztime":"0","levels":"0","levelstime":"0","keywords":"Spark<\/A> \u89e3\u6790<\/A> RDD<\/A> Transfrom<\/A> \u7b97\u5b50<\/A>","jumpurl":"","iframeurl":"","style":"","template":"a:3:{s:4:\"head\";s:0:\"\";s:4:\"foot\";s:0:\"\";s:8:\"bencandy\";s:0:\"\";}","target":"0","ip":"119.59.235.169","lastfid":"0","money":"0","buyuser":"","passwd":"","allowdown":"","allowview":"","editer":"","edittime":"0","begintime":"0","endtime":"0","description":"Spark\u6e90\u7801\u89e3\u6790\uff08\u4e00\uff09\uff1aRDD\u4e4bTransfrom\u7b97\u5b50","lastview":"1716082052","digg_num":"0","digg_time":"0","forbidcomment":"0","ifvote":"0","heart":"","htmlname":"","city_id":"0"},"page":"1"}