1、DataX的安装参考前面的文章:
LInux下DataX的安装
2、查看streamreader----->streamwriter的模版:
[root@localhost datax]# python /datax/datax/datax/bin/datax.py -r streamreader -w streamwriter
DataX (DATAX-OPENSOURCE-3.0), From Alibaba !
Copyright (C) 2010-2017, Alibaba Group. All Rights Reserved.
Please refer to the streamreader document:
https://github.com/alibaba/DataX/blob/master/streamreader/doc/streamreader.md
Please refer to the streamwriter document:
https://github.com/alibaba/DataX/blob/master/streamwriter/doc/streamwriter.md
Please save the following configuration as a json file and use
python {DATAX_HOME}/bin/datax.py {JSON_FILE_NAME}.json
to run the job.
{
"job": {
"content": [
{
"reader": {
"name": "streamreader", #读取端
"parameter": {
"column": [], # 同步的列名(*表示所有)
"sliceRecordCount": "" #打印数量
}
},
"writer": {
"name": "streamwriter", #写入端
"parameter": {
"encoding": "", #编码
"print": true
}
}
}
],
"setting": {
"speed": {
"channel": "" # 并发(即sliceRecordCount*channel=结果)
}
}
}
}
3、mysql---->mysql的模版和例子:
模版:
[root@localhost ~]# python /datax/datax/datax/bin/datax.py -r mysqlreader -w mysqlwriter
DataX (DATAX-OPENSOURCE-3.0), From Alibaba !
Copyright (C) 2010-2017, Alibaba Group. All Rights Reserved.
Please refer to the mysqlreader document:
https://github.com/alibaba/DataX/blob/master/mysqlreader/doc/mysqlreader.md
Please refer to the mysqlwriter document:
https://github.com/alibaba/DataX/blob/master/mysqlwriter/doc/mysqlwriter.md
Please save the following configuration as a json file and use
python {DATAX_HOME}/bin/datax.py {JSON_FILE_NAME}.json
to run the job.
{
"job": {
"content": [
{
"reader": {
"name": "mysqlreader", #读取端
"parameter": {
"column": [], #需要同步的列(*表示所有列)
"connection": [
{
"jdbcUrl": [], #连接信息
"table": [] #表名
}
],
"password": "", #连接用户密码
"username": "", #连接用户
"where": "" #筛选条件
}
},
"writer": {
"name": "mysqlwriter", #写入端
"parameter": {
"column": [], #需要同步的列(*表示所有列)
"connection": [
{
"jdbcUrl": "", #连接信息
"table": [] #表名
}
],
"password": "", #连接用户密码
"preSql": [], #同步前需要做的事
"session": [],
"username": "", #连接用户
"writeMode": "" #操作类型
}
}
}
],
"setting": {
"speed": {
"channel": "" #指定并发数
}
}
}
}
例子:
vi /data/test/vi mysql-mysql-01.json
{
"job": {
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"column": ["CustomerId","configKey"],
"password": "tdHTC2d6ufn@bQqk",
"username": "user_pi",
"splitPK":"Id",
"connection": [
{
"jdbcUrl": ["jdbc:mysql://121.37.168.175:3306/preinstall"],
"table": ["sys_config"]
}
]
}
},
"writer": {
"name": "mysqlwriter",
"parameter": {
"column": ["CustomerId","configKey"],
"password": "tdHTC2d6ufn@bQqk",
"username": "user_ubi",
"writermode":"insert",
"preSql":["truncate tm_1"],
"connection": [
{
"jdbcUrl": "jdbc:mysql://121.37.168.175:3306/ubicenter",
"table": ["tm_1"]
}
]
}
}
}
],
"setting": {
"speed": {
"channel": "1"
}
}
}
}
运行结果:
[root@localhost test]# python /datax/datax/datax/bin/datax.py /datax/test/mysql-mysql-01.json
DataX (DATAX-OPENSOURCE-3.0), From Alibaba !
Copyright (C) 2010-2017, Alibaba Group. All Rights Reserved.
2023-11-02 16:50:16.667 [main] INFO VMInfo - VMInfo# operatingSystem class => sun.management.OperatingSystemImpl
2023-11-02 16:50:16.682 [main] INFO Engine - the machine info =>
osInfo: Oracle Corporation 1.8 25.291-b10
jvmInfo: Linux amd64 3.10.0-229.el7.x86_64
cpu num: 2
totalPhysicalMemory: -0.00G
freePhysicalMemory: -0.00G
maxFileDescriptorCount: -1
currentOpenFileDescriptorCount: -1
GC Names [PS MarkSweep, PS Scavenge]
MEMORY_NAME | allocation_size | init_size
PS Eden Space | 256.00MB | 256.00MB
Code Cache | 240.00MB | 2.44MB
Compressed Class Space | 1,024.00MB | 0.00MB
PS Survivor Space | 42.50MB | 42.50MB
PS Old Gen | 683.00MB | 683.00MB
Metaspace | -0.00MB | 0.00MB
2023-11-02 16:50:16.718 [main] INFO Engine -
{
"content":[
{
"reader":{
"name":"mysqlreader",
"parameter":{
"column":[
"CustomerId",
"configKey"
],
"connection":[
{
"jdbcUrl":[
"jdbc:mysql://121.37.168.175:3306/preinstall"
],
"table":[
"sys_config"
]
}
],
"password":"****************",
"splitPK":"Id",
"username":"user_pi"
}
},
"writer":{
"name":"mysqlwriter",
"parameter":{
"column":[
"CustomerId",
"configKey"
],
"connection":[
{
"jdbcUrl":"jdbc:mysql://121.37.168.175:3306/ubicenter",
"table":[
"tm_1"
]
}
],
"password":"****************",
"preSql":[
"truncate tm_1"
],
"username":"user_ubi",
"writermode":"insert"
}
}
}
],
"setting":{
"speed":{
"channel":"1"
}
}
}
2023-11-02 16:50:16.754 [main] WARN Engine - prioriy set to 0, because NumberFormatException, the value is: null
2023-11-02 16:50:16.758 [main] INFO PerfTrace - PerfTrace traceId=job_-1, isEnable=false, priority=0
2023-11-02 16:50:16.758 [main] INFO JobContainer - DataX jobContainer starts job.
2023-11-02 16:50:16.762 [main] INFO JobContainer - Set jobId = 0
Loading class `com.mysql.jdbc.Driver'. This is deprecated. The new driver class is `com.mysql.cj.jdbc.Driver'. The driver is automatically registered via the SPI and manual loading of the driver class is generally unnecessary.
2023-11-02 16:50:17.502 [job-0] INFO OriginalConfPretreatmentUtil - Available jdbcUrl:jdbc:mysql://121.37.168.175:3306/preinstall?yearIsDateType=false&zeroDateTimeBehavior=convertToNull&tinyInt1isBit=false&rewriteBatchedStatements=true.
2023-11-02 16:50:17.735 [job-0] INFO OriginalConfPretreatmentUtil - table:[sys_config] has columns:[Id,CustomerId,configKey,ConfigValue,CreatedAt,CreatedById,UpdatedAt,UpdatedById,Deleted,DeletedAt,DeletedById,JsonString,DataProvider].
Loading class `com.mysql.jdbc.Driver'. This is deprecated. The new driver class is `com.mysql.cj.jdbc.Driver'. The driver is automatically registered via the SPI and manual loading of the driver class is generally unnecessary.
2023-11-02 16:50:18.269 [job-0] INFO OriginalConfPretreatmentUtil - table:[tm_1] all columns:[
Id,CustomerId,configKey,ConfigValue,CreatedAt,CreatedById,UpdatedAt,UpdatedById,Deleted,DeletedAt,DeletedById,JsonString,DataProvider
].
2023-11-02 16:50:18.484 [job-0] INFO OriginalConfPretreatmentUtil - Write data [
INSERT INTO %s (CustomerId,configKey) VALUES(?,?)
], which jdbcUrl like:[jdbc:mysql://121.37.168.175:3306/ubicenter?yearIsDateType=false&zeroDateTimeBehavior=convertToNull&tinyInt1isBit=false&rewriteBatchedStatements=true]
2023-11-02 16:50:18.485 [job-0] INFO JobContainer - jobContainer starts to do prepare ...
2023-11-02 16:50:18.486 [job-0] INFO JobContainer - DataX Reader.Job [mysqlreader] do prepare work .
2023-11-02 16:50:18.487 [job-0] INFO JobContainer - DataX Writer.Job [mysqlwriter] do prepare work .
2023-11-02 16:50:18.671 [job-0] INFO CommonRdbmsWriter$Job - Begin to execute preSqls:[truncate tm_1]. context info:jdbc:mysql://121.37.168.175:3306/ubicenter?yearIsDateType=false&zeroDateTimeBehavior=convertToNull&tinyInt1isBit=false&rewriteBatchedStatements=true.
2023-11-02 16:50:18.737 [job-0] INFO JobContainer - jobContainer starts to do split ...
2023-11-02 16:50:18.741 [job-0] INFO JobContainer - Job set Channel-Number to 1 channels.
2023-11-02 16:50:18.748 [job-0] INFO JobContainer - DataX Reader.Job [mysqlreader] splits to [1] tasks.
2023-11-02 16:50:18.749 [job-0] INFO JobContainer - DataX Writer.Job [mysqlwriter] splits to [1] tasks.
2023-11-02 16:50:18.787 [job-0] INFO JobContainer - jobContainer starts to do schedule ...
2023-11-02 16:50:18.794 [job-0] INFO JobContainer - Scheduler starts [1] taskGroups.
2023-11-02 16:50:18.798 [job-0] INFO JobContainer - Running by standalone Mode.
2023-11-02 16:50:18.815 [taskGroup-0] INFO TaskGroupContainer - taskGroupId=[0] start [1] channels for [1] tasks.
2023-11-02 16:50:18.822 [taskGroup-0] INFO Channel - Channel set byte_speed_limit to -1, No bps activated.
2023-11-02 16:50:18.822 [taskGroup-0] INFO Channel - Channel set record_speed_limit to -1, No tps activated.
2023-11-02 16:50:18.845 [taskGroup-0] INFO TaskGroupContainer - taskGroup[0] taskId[0] attemptCount[1] is started
2023-11-02 16:50:18.851 [0-0-0-reader] INFO CommonRdbmsReader$Task - Begin to read record by Sql: [select CustomerId,configKey from sys_config
] jdbcUrl:[jdbc:mysql://121.37.168.175:3306/preinstall?yearIsDateType=false&zeroDateTimeBehavior=convertToNull&tinyInt1isBit=false&rewriteBatchedStatements=true].
2023-11-02 16:50:19.099 [0-0-0-reader] INFO CommonRdbmsReader$Task - Finished read record by Sql: [select CustomerId,configKey from sys_config
] jdbcUrl:[jdbc:mysql://121.37.168.175:3306/preinstall?yearIsDateType=false&zeroDateTimeBehavior=convertToNull&tinyInt1isBit=false&rewriteBatchedStatements=true].
2023-11-02 16:50:19.347 [taskGroup-0] INFO TaskGroupContainer - taskGroup[0] taskId[0] is successed, used[511]ms
2023-11-02 16:50:19.347 [taskGroup-0] INFO TaskGroupContainer - taskGroup[0] completed it's tasks.
2023-11-02 16:50:28.827 [job-0] INFO StandAloneJobContainerCommunicator - Total 8 records, 111 bytes | Speed 11B/s, 0 records/s | Error 0 records, 0 bytes | All Task WaitWriterTime 0.000s | All Task WaitReaderTime 0.000s | Percentage 100.00%
2023-11-02 16:50:28.828 [job-0] INFO AbstractScheduler - Scheduler accomplished all tasks.
2023-11-02 16:50:28.829 [job-0] INFO JobContainer - DataX Writer.Job [mysqlwriter] do post work.
2023-11-02 16:50:28.829 [job-0] INFO JobContainer - DataX Reader.Job [mysqlreader] do post work.
2023-11-02 16:50:28.830 [job-0] INFO JobContainer - DataX jobId [0] completed successfully.
2023-11-02 16:50:28.831 [job-0] INFO HookInvoker - No hook invoked, because base dir not exists or is a file: /datax/datax/datax/hook
2023-11-02 16:50:28.833 [job-0] INFO JobContainer -
[total cpu info] =>
averageCpu | maxDeltaCpu | minDeltaCpu
-1.00% | -1.00% | -1.00%
[total gc info] =>
NAME | totalGCCount | maxDeltaGCCount | minDeltaGCCount | totalGCTime | maxDeltaGCTime | minDeltaGCTime
PS MarkSweep | 0 | 0 | 0 | 0.000s | 0.000s | 0.000s
PS Scavenge | 0 | 0 | 0 | 0.000s | 0.000s | 0.000s
2023-11-02 16:50:28.834 [job-0] INFO JobContainer - PerfTrace not enable!
2023-11-02 16:50:28.835 [job-0] INFO StandAloneJobContainerCommunicator - Total 8 records, 111 bytes | Speed 11B/s, 0 records/s | Error 0 records, 0 bytes | All Task WaitWriterTime 0.000s | All Task WaitReaderTime 0.000s | Percentage 100.00%
2023-11-02 16:50:28.837 [job-0] INFO JobContainer -
任务启动时刻 : 2023-11-02 16:50:16
任务结束时刻 : 2023-11-02 16:50:28
任务总计耗时 : 12s
任务平均流量 : 11B/s
记录写入速度 : 0rec/s
读出记录总数 : 8
读写失败总数 : 0
4、mysql---->tidb的例子:
vi mysql-tidb.json
{
"job": {
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"column": ["CustomerId","configKey"],
"password": "pi123",
"username": "user_pi",
"splitPK":"Id",
"connection": [
{
"jdbcUrl": ["jdbc:mysql://121.37.168.175:3306/preinstall"],
"table": ["sys_config"]
}
]
}
},
"writer": {
"name": "mysqlwriter",
"parameter": {
"column": ["CustomerId","configKey"],
"password": "tidb123",
"username": "user_tidb",
"writermode":"insert",
"preSql":["truncate tm_1"],
"connection": [
{
"jdbcUrl": "jdbc:mysql://192.168.200.57:4000/test01",
"table": ["tm_1"]
}
]
}
}
}
],
"setting": {
"speed": {
"channel": "1"
}
}
}
}
[root@localhost test]# python /datax/datax/datax/bin/datax.py /datax/test/mysql-tidb.json
DataX (DATAX-OPENSOURCE-3.0), From Alibaba !
Copyright (C) 2010-2017, Alibaba Group. All Rights Reserved.
2023-11-02 17:16:00.209 [main] INFO VMInfo - VMInfo# operatingSystem class => sun.management.OperatingSystemImpl
2023-11-02 17:16:00.247 [main] INFO Engine - the machine info =>
osInfo: Oracle Corporation 1.8 25.291-b10
jvmInfo: Linux amd64 3.10.0-229.el7.x86_64
cpu num: 2
totalPhysicalMemory: -0.00G
freePhysicalMemory: -0.00G
maxFileDescriptorCount: -1
currentOpenFileDescriptorCount: -1
GC Names [PS MarkSweep, PS Scavenge]
MEMORY_NAME | allocation_size | init_size
PS Eden Space | 256.00MB | 256.00MB
Code Cache | 240.00MB | 2.44MB
Compressed Class Space | 1,024.00MB | 0.00MB
PS Survivor Space | 42.50MB | 42.50MB
PS Old Gen | 683.00MB | 683.00MB
Metaspace | -0.00MB | 0.00MB
2023-11-02 17:16:00.319 [main] INFO Engine -
{
"content":[
{
"reader":{
"name":"mysqlreader",
"parameter":{
"column":[
"CustomerId",
"configKey"
],
"connection":[
{
"jdbcUrl":[
"jdbc:mysql://192.168.200.57:3306/preinstall"
],
"table":[
"sys_config"
]
}
],
"password":"*****",
"splitPK":"Id",
"username":"user_pi"
}
},
"writer":{
"name":"mysqlwriter",
"parameter":{
"column":[
"CustomerId",
"configKey"
],
"connection":[
{
"jdbcUrl":"jdbc:mysql://192.168.200.57:4000/test01",
"table":[
"tm_1"
]
}
],
"password":"*******",
"preSql":[
"truncate tm_1"
],
"username":"user_ubi",
"writermode":"insert"
}
}
}
],
"setting":{
"speed":{
"channel":"1"
}
}
}
2023-11-02 17:16:00.360 [main] WARN Engine - prioriy set to 0, because NumberFormatException, the value is: null
2023-11-02 17:16:00.364 [main] INFO PerfTrace - PerfTrace traceId=job_-1, isEnable=false, priority=0
2023-11-02 17:16:00.364 [main] INFO JobContainer - DataX jobContainer starts job.
2023-11-02 17:16:00.368 [main] INFO JobContainer - Set jobId = 0
Loading class `com.mysql.jdbc.Driver'. This is deprecated. The new driver class is `com.mysql.cj.jdbc.Driver'. The driver is automatically registered via the SPI and manual loading of the driver class is generally unnecessary.
2023-11-02 17:16:01.901 [job-0] INFO OriginalConfPretreatmentUtil - Available jdbcUrl:jdbc:mysql://192.168.200.57:3306/preinstall?yearIsDateType=false&zeroDateTimeBehavior=convertToNull&tinyInt1isBit=false&rewriteBatchedStatements=true.
2023-11-02 17:16:02.047 [job-0] INFO OriginalConfPretreatmentUtil - table:[sys_config] has columns:[Id,CustomerId,configKey,ConfigValue,CreatedAt,CreatedById,UpdatedAt,UpdatedById,Deleted,DeletedAt,DeletedById,JsonString,DataProvider].
Loading class `com.mysql.jdbc.Driver'. This is deprecated. The new driver class is `com.mysql.cj.jdbc.Driver'. The driver is automatically registered via the SPI and manual loading of the driver class is generally unnecessary.
2023-11-02 17:16:02.516 [job-0] INFO OriginalConfPretreatmentUtil - table:[tm_1] all columns:[
Id,CustomerId,configKey,ConfigValue,CreatedAt,CreatedById,UpdatedAt,UpdatedById,Deleted,DeletedAt,DeletedById,JsonString,DataProvider
].
2023-11-02 17:16:02.528 [job-0] INFO OriginalConfPretreatmentUtil - Write data [
INSERT INTO %s (CustomerId,configKey) VALUES(?,?)
], which jdbcUrl like:[jdbc:mysql://192.168.200.57:4000/test01?yearIsDateType=false&zeroDateTimeBehavior=convertToNull&tinyInt1isBit=false&rewriteBatchedStatements=true]
2023-11-02 17:16:02.529 [job-0] INFO JobContainer - jobContainer starts to do prepare ...
2023-11-02 17:16:02.530 [job-0] INFO JobContainer - DataX Reader.Job [mysqlreader] do prepare work .
2023-11-02 17:16:02.530 [job-0] INFO JobContainer - DataX Writer.Job [mysqlwriter] do prepare work .
2023-11-02 17:16:02.544 [job-0] INFO CommonRdbmsWriter$Job - Begin to execute preSqls:[truncate tm_1]. context info:jdbc:mysql://192.168.200.57:4000/test01?yearIsDateType=false&zeroDateTimeBehavior=convertToNull&tinyInt1isBit=false&rewriteBatchedStatements=true.
2023-11-02 17:16:02.685 [job-0] INFO JobContainer - jobContainer starts to do split ...
2023-11-02 17:16:02.685 [job-0] INFO JobContainer - Job set Channel-Number to 1 channels.
2023-11-02 17:16:02.695 [job-0] INFO JobContainer - DataX Reader.Job [mysqlreader] splits to [1] tasks.
2023-11-02 17:16:02.696 [job-0] INFO JobContainer - DataX Writer.Job [mysqlwriter] splits to [1] tasks.
2023-11-02 17:16:02.737 [job-0] INFO JobContainer - jobContainer starts to do schedule ...
2023-11-02 17:16:02.743 [job-0] INFO JobContainer - Scheduler starts [1] taskGroups.
2023-11-02 17:16:02.746 [job-0] INFO JobContainer - Running by standalone Mode.
2023-11-02 17:16:02.769 [taskGroup-0] INFO TaskGroupContainer - taskGroupId=[0] start [1] channels for [1] tasks.
2023-11-02 17:16:02.775 [taskGroup-0] INFO Channel - Channel set byte_speed_limit to -1, No bps activated.
2023-11-02 17:16:02.775 [taskGroup-0] INFO Channel - Channel set record_speed_limit to -1, No tps activated.
2023-11-02 17:16:02.804 [taskGroup-0] INFO TaskGroupContainer - taskGroup[0] taskId[0] attemptCount[1] is started
2023-11-02 17:16:02.813 [0-0-0-reader] INFO CommonRdbmsReader$Task - Begin to read record by Sql: [select CustomerId,configKey from sys_config
] jdbcUrl:[jdbc:mysql://192.168.200.57:3306/preinstall?yearIsDateType=false&zeroDateTimeBehavior=convertToNull&tinyInt1isBit=false&rewriteBatchedStatements=true].
2023-11-02 17:16:02.881 [0-0-0-reader] INFO CommonRdbmsReader$Task - Finished read record by Sql: [select CustomerId,configKey from sys_config
] jdbcUrl:[jdbc:mysql://192.168.200.57:3306/preinstall?yearIsDateType=false&zeroDateTimeBehavior=convertToNull&tinyInt1isBit=false&rewriteBatchedStatements=true].
2023-11-02 17:16:03.006 [taskGroup-0] INFO TaskGroupContainer - taskGroup[0] taskId[0] is successed, used[213]ms
2023-11-02 17:16:03.007 [taskGroup-0] INFO TaskGroupContainer - taskGroup[0] completed it's tasks.
2023-11-02 17:16:12.789 [job-0] INFO StandAloneJobContainerCommunicator - Total 8 records, 111 bytes | Speed 11B/s, 0 records/s | Error 0 records, 0 bytes | All Task WaitWriterTime 0.000s | All Task WaitReaderTime 0.000s | Percentage 100.00%
2023-11-02 17:16:12.790 [job-0] INFO AbstractScheduler - Scheduler accomplished all tasks.
2023-11-02 17:16:12.791 [job-0] INFO JobContainer - DataX Writer.Job [mysqlwriter] do post work.
2023-11-02 17:16:12.791 [job-0] INFO JobContainer - DataX Reader.Job [mysqlreader] do post work.
2023-11-02 17:16:12.791 [job-0] INFO JobContainer - DataX jobId [0] completed successfully.
2023-11-02 17:16:12.793 [job-0] INFO HookInvoker - No hook invoked, because base dir not exists or is a file: /datax/datax/datax/hook
2023-11-02 17:16:12.795 [job-0] INFO JobContainer -
[total cpu info] =>
averageCpu | maxDeltaCpu | minDeltaCpu
-1.00% | -1.00% | -1.00%
[total gc info] =>
NAME | totalGCCount | maxDeltaGCCount | minDeltaGCCount | totalGCTime | maxDeltaGCTime | minDeltaGCTime
PS MarkSweep | 1 | 1 | 1 | 0.072s | 0.072s | 0.072s
PS Scavenge | 1 | 1 | 1 | 0.028s | 0.028s | 0.028s
2023-11-02 17:16:12.795 [job-0] INFO JobContainer - PerfTrace not enable!
2023-11-02 17:16:12.796 [job-0] INFO StandAloneJobContainerCommunicator - Total 8 records, 111 bytes | Speed 11B/s, 0 records/s | Error 0 records, 0 bytes | All Task WaitWriterTime 0.000s | All Task WaitReaderTime 0.000s | Percentage 100.00%
2023-11-02 17:16:12.797 [job-0] INFO JobContainer -
任务启动时刻 : 2023-11-02 17:16:00
任务结束时刻 : 2023-11-02 17:16:12
任务总计耗时 : 12s
任务平均流量 : 11B/s
记录写入速度 : 0rec/s
读出记录总数 : 8
读写失败总数 : 0