需求:使用 Spark 将 Hive 数据同步到 MySQL,MySQL表以其中三个字段作为唯一索引,索引不冲突的数据直接写入,冲突的数据对其中几个字段进行更新。
主类
import java.util.Properties
import com.sm.conf.ConfigManager
import com.sm.constants.Constants
import com.sm.utils.{DateUtils, MySQLUtils}
import org.apache.log4j.Level
import org.apache.spark.sql.{SaveMode, SparkSession}
import org.slf4j.LoggerFactory
/**
* 同步 Hive dws payment及sv_key retained 数据到MySQL
*
* create by LiuJinHe 2020/4/1
*/
object ExportPayRetainedToMysql {
private val warehouseLocation = "hdfs://cdh-slave01:9870/user/hive/warehouse"
private var prop: Properties = _
private var hiveDB: String = _
private var database: String = _
private var url: String = _
private var hiveTable: String = _
private var destTable: String = _
private var logger: org.slf4j.Logger = _
private var yesterday: String = _
private var start: Long = 0L
private var sqlStr: String = _
def main(args: Array[String]): Unit = {
logger = LoggerFactory.getLogger(this.getClass)
org.apache.log4j.Logger.getLogger("org.apache.hadoop").setLevel(Level.WARN)
org.apache.log4j.Logger.getLogger("org.apache.spark").setLevel(Level.INFO)
org.apache.log4j.Logger.getLogger("org.spark_project.jetty").setLevel(Level.WARN)
val spark = initSparkSession
start = System.currentTimeMillis()
// 拉取数据日期,格式为2020-01-01,默认为前一天
yesterday = DateUtils.getYesterdayDate
if (args.length == 1) {
yesterday = args(0)
}
pro