版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/Java_Soldier/article/details/80562937
//创建JavaStreamingContext
JavaStreamingContext jsc = createJavaStreamingContext(null, appName, batchDurationWithSeconds)
//kafkaParams 封装了kafka的参数
Object kafkaParams = new HashMap()
for (Iterator localIterator1 = config.getInputConfigs().entrySet().iterator()
{
Map.Entry entry = (Map.Entry)localIterator1.next()
((Map)kafkaParams).put(entry.getKey().toString(), entry.getValue())
}
System.out.println("inputParam: " + kafkaParams)
Object outputConfig = new HashMap()
for (Map.Entry<Object, Object> entry : config.getOutputConfigs().entrySet())
{
((HashMap)outputConfig).put(entry.getKey().toString(), entry.getValue().toString())
}
System.out.println("outputConfig: " + outputConfig)
Map<TopicPartition, Long> fromOffsets = new HashMap()
//初始化redis连接
ShardedJedis redis = RedisSingleton.instance((HashMap)outputConfig).getResource()
//获取redis中的offset
Map<String, String> kafkaOffsets = redis.hgetAll("POSITION_KAFKA_OFFSETS")
redis.close()
Pattern patter4value = Pattern.compile(Pattern.quote(":"))
//kafkaOffsets.entrySet() 形式为0:SGN_DATA 12541
for (Map.Entry<String, String> entry : kafkaOffsets.entrySet())
{
String[] info = patter4value.split((CharSequence)entry.getKey(), 2)
if (info.length == 2) {
//如果不是冷启动,fromOffsets中put偏移量
fromOffsets.put(new TopicPartition(info[1], Integer.parseInt(info[0])), Long.valueOf(Long.parseLong((String)entry.getValue())))
}
}
//如果起始redis中的偏移量为0,则插入0
if (fromOffsets.isEmpty()) {
for (Map.Entry<String, Integer> entry : topics.entrySet()) {
if (((Integer)entry.getValue()).intValue() > 0) {
for (int i = 0
fromOffsets.put(new TopicPartition((String)entry.getKey(), i), Long.valueOf(0L))
}
}
}
}
System.out.println("fromOffsets: " + fromOffsets)
//创建直连的sparkStreaming对象
Object stream = KafkaUtils
.createDirectStream(jsc,
LocationStrategies.PreferConsistent(),
ConsumerStrategies.Assign(fromOffsets.keySet(), (Map)kafkaParams, fromOffsets))
//通过stream对象,获取各个分区的offset,并将各分区的offset存入到redis中
JavaPairDStream<String, String> mapedDatas = ((JavaInputDStream)stream).transform(new Function() {
private static final long serialVersionUID = -6012908518603112999L
private StringBuilder msg = new StringBuilder()
public Object call(Object rdd1) throws Exception {
JavaRDD<ConsumerRecord<String, String>> rdd = (JavaRDD<ConsumerRecord<String, String>>) rdd1
//获取rdd分区的offset
OffsetRange[] offsetRanges = ((HasOffsetRanges)rdd.rdd()).offsetRanges()
ShardedJedis redis = RedisSingleton.instance((HashMap<String, String>) outputConfig).getResource()
for (OffsetRange offset : offsetRanges)
{
this.msg.setLength(0)
redis.hset("POSITION_KAFKA_OFFSETS", offset.partition() + ":" + offset.topic(), offset.fromOffset() + "")
}
redis.close()
return (Object)rdd
}
}).mapToPair(new PhonePairFunction2())
JavaPairDStream<String, Iterable<String>> groupedData = mapedDatas.filter(new Function() {
private static final long serialVersionUID = 6634203502402639467L
public Boolean call(Object v1)
throws Exception
{
Tuple2<String, String> record = (Tuple2<String, String>) v1
return Boolean.valueOf((record != null) && (record._1 != null) && (record._2 != null))
}
}).groupByKey()
groupedData.foreachRDD(new RddProcessor((HashMap)outputConfig))