下列代码,涉及到数据Kafka接入,数据Spark算子数据处理,Kafka偏移量记录,数据反压,数据批量插入MySql等所有操作步骤。
package com.e_data;
import com.util.DateUtil;
import com.util.JDBCUtils3;
import com.util.RedisUtil;
import org.apache.commons.collections.IteratorUtils;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.serialization.StringDeserializer;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.api.java.function.VoidFunction;
import org.apache.spark.streaming.Durations;
import org.apache.spark.streaming.api.java.JavaDStream;
import org.apache.spark.streaming.api.java.JavaInputDStream;
import org.apache.spark.streaming.api.java.JavaPairDStream;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import org.apache.spark.streaming.kafka010.*;
import scala.Tuple2;
import java.io.Serializable;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.util.*;
/**
* 最稳定版本
* 作者 liuwunan 可以保证数据一点不会丢失。
*/
public class HuaLong_RealTime_ETC implements Serializable {
//设置异常标记,如果发生异常,则记录起始偏移量,如果正常 则记录解释偏移量
public static Boolean ExceptionFlag = true;
//配置kafka参数(节点、消费者组、topic)
private static String topics = "topics"; //指定topic
private static String groupId = "groupId";//指定消费者组id
private static String offset = "offset";
private static String brokers = "IP:9092,IP:9092,IP:9092";//指定kafka地
public static void main(String[] args) {
//设置hadoop 文件备份为1,Hadoop 系统默认3份 减少数据同步延迟性
//Configuration hdfs = new Configuration();
//hdfs.set("dfs.replication","1");
SparkConf conf = new SparkConf().setAppName("RealTimeE")
.set("spark.dynamicAllocation.enabled", "false");//关闭资源动态添加
conf.set("spark.serializer","org.apache.spark.serializer.KryoSerializer");
conf.setMaster("local[5]"); //设置线程数 禁止使用1 可以使用 *
conf.set("spark.streaming.backpressure.enabled", "true");//启用反压
conf.set("spark.streaming.backpressure.pid.minRate","1");//最小条数
co