JavaRDD<String> rdd1 = jsc.parallelize(Arrays.asList("coffee", "coffee", "panda", "monkey", "tea"));
JavaRDD<String> rdd2 = jsc.parallelize(Arrays.asList("coffee", "monkey", "kitty"));
//distinct 去重操作
JavaRDD<String> rdd_distinct = rdd1.distinct();
System.out.println(rdd_distinct.collect());
//union 该函数将两个RDD进行合并,不去重
JavaRDD<String> rdd_union = rdd1.union(rdd2);
System.out.println(rdd_union.collect());
//intersection 该函数返回两个RDD的交集,并且去重
JavaRDD<String> rdd_intersection = rdd1.intersection(rdd2);
System.out.println(rdd_intersection.collect());
//返回在rdd1中出现,并且不在rdd2中出现的元素,不去重
JavaRDD<String> rdd_subtract = rdd1.subtract(rdd2);
System.out.println(rdd_subtract.collect());
//返回rdd1与rdd2的笛卡尔积
JavaPairRDD<String, String> rdd_cartesian = rdd1.cartesian(rdd2);
System.out.println(rdd_cartesian.collect());
//返回rdd中数组的合
JavaRDD<Integer> rdd = jsc.parallelize(Arrays.asList(1, 2, 3, 4));
Integer sum = rdd.reduce(new Function2<Integer, Integer, Integer>() {
@Override
public Integer call(Integer x, Integer y) throws Exception {
return x + y;
}
});
System.out.println(sum);
输出结果:
[monkey, coffee, panda, tea]
[coffee, coffee, panda, monkey, tea, coffee, monkey, kitty]
[monkey, coffee]
[tea, panda]
[(coffee,coffee), (coffee,monkey), (coffee,kitty), (coffee,coffee), (coffee,monkey), (coffee,kitty), (panda,coffee), (panda,monkey), (panda,kitty), (monkey,coffee), (monkey,monkey), (monkey,kitty), (tea,coffee), (tea,monkey), (tea,kitty)]
10