项目场景:
datax同步任务json模板:
mysql -> kudu
kudu建表
DROP TABLE dim.content_test;
CREATE TABLE dim.content_test(
id string NOT NULL,
title string NOT NULL,
PRIMARY KEY (id)
)
PARTITION BY HASH (id) PARTITIONS 3
STORED AS KUDU TBLPROPERTIES ('kudu.master_addresses'='xxx:7051,xxx:7051,xxx:7051');
json
{
"core": {
"transport": {
"channel": {
"speed": {
"byte": 104857600
}
}
}
},
"job": {
"setting": {
"speed": {
"channel": 10,
"byte": 1048576000
},
"errorLimit": {
"record": 0,
"percentage": 0.02
}
},
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"username": "xxx",
"password": "xxx",
"splitPk": "",
"connection": [
{
"querySql": [
"select ID,TITLE from content_test;"
],
"jdbcUrl": [
"jdbc:mysql://xxx:3306/yy"
]
}
]
}
},
"writer": {
"name": "kuduwriter",
"parameter": {
"batchSize": 1024,
"bufferSize": 2048,
"skipFail": false,
"encoding": "UTF-8",
"kuduConfig": {
"kudu.master_addresses": "xxx:7051,xxx:7051,xxx:7051"
},
"table": "impala::dim.content_test",
"truncate": false,
"writeMode": "upsert",
"column": [
{
"index": 0,
"name": "id",
"type": "int",
"comment": "内容id",
"primaryKey": true
},
{
"index": 1,
"name": "title",
"type": "string",
"comment": "内容名称"
}
],
"primaryKey": [
{
"index": 0,
"name": "id",
"type": "int"
}
]
}
}
}
]
}
}
参考网址:https://github.com/alibaba/DataX/blob/master/kuduwriter/doc/kuduwirter.md
hdfs -> mysql
json
{
"core": {
"transport": {
"channel": {
"speed": {
"byte": 104857600
}
}
}
},
"job": {
"setting": {
"speed": {
"channel": 10,
"byte": 1048576000
},
"errorLimit": {
"record": 0,
"percentage": 0.02
}
},
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"username": "xxx",
"password": "xxx",
"column": [
"`id`",
"`ww`"
],
"splitPk": "",
"connection": [
{
"table": [
"comp_tx"
],
"jdbcUrl": [
"jdbc:mysql://XXX:3312/ddd"
]
}
]
}
},
"writer": {
"name": "hdfswriter",
"parameter": {
"defaultFS": "hdfs://name1",
"nullFormat": "\\N",
"hadoopConfig": {
"dfs.nameservices": "nameservice1",
"dfs.ha.namenodes.nameservice1": "aa,bb",
"dfs.namenode.rpc-address.nameservice1.aa": "hdfs://XXX:8020",
"dfs.namenode.rpc-address.nameservice1.bb": "hdfs://XXX:8020",
"dfs.client.failover.proxy.provider.nameservice1": "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider",
"dfs.ha.automatic-failover.enabled.yournamespace": "true"
},
"fileType": "orc",
"path": "/user/hive/dim/XXX",
"fileName": "XXX",
"writeMode": "append",
"fieldDelimiter": "\u0001",
"column": [
{
"name": "id",
"type": "string"
},
{
"name": "ww",
"type": "string"
}
]
}
}
}
]
}
}
参考网址:https://github.com/alibaba/DataX/blob/master/hdfswriter/doc/hdfswriter.md
hdfs -> clickhouse
json
{
"core": {
"transport": {
"channel": {
"speed": {
"byte": 104857600
}
}
}
},
"job": {
"setting": {
"speed": {
"channel": 10,
"byte": 1048576000
},
"errorLimit": {
"record": 0,
"percentage": 0.02
}
},
"content": [
{
"reader": {
"name": "hdfsreader",
"parameter": {
"path": "/user/hive/test01",
"nullFormat": "\\N",
"defaultFS": "hdfs://name1",
"hadoopConfig": {
"dfs.nameservices": "xx",
"dfs.ha.namenodes.nameservice1": "xx01,xx02",
"dfs.namenode.rpc-address.nameservice1.xx01": "hdfs://xxx:8020",
"dfs.namenode.rpc-address.nameservice1.xx02": "hdfs://xxx:8020",
"dfs.client.failover.proxy.provider.nameservice1": "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider",
"dfs.ha.automatic-failover.enabled.yournamespace": "true"
},
"fileType": "orc",
"fieldDelimiter": "\u0001",
"skipHeader": true,
"column": [
{
"index": "0",
"type": "string"
},
{
"index": "1",
"type": "string"
},
{
"index": "2",
"type": "string"
},
{
"index": "3",
"type": "string"
}
]
}
},
"writer": {
"name": "clickhousewriter",
"parameter": {
"username": "xxxx",
"password": "xxxx",
"column": [
"event",
"aa",
"bb",
"dt"
],
"connection": [
{
"table": [
"ww"
],
"jdbcUrl": "jdbc:clickhouse://xxx:8123/test"
}
]
}
}
}
]
}
}
hdfs -> textfile
json
{
"core": {
"transport": {
"channel": {
"speed": {
"byte": 104857600
}
}
}
},
"job": {
"setting": {
"speed": {
"channel": 10,
"byte": 1048576000
},
"errorLimit": {
"record": 0,
"percentage": 0.02
}
},
"content": [
{
"reader": {
"name": "hdfsreader",
"parameter": {
"path": "/user/hive/warehouse/*.0.",
"defaultFS": "hdfs://NameServiceHA",
"hadoopConfig": {
"dfs.nameservices": "NameServiceHA",
"dfs.ha.namenodes.NameServiceHA": "data01,data02",
"dfs.namenode.rpc-address.NameServiceHA.data01": "hdfs://XXX:8020",
"dfs.namenode.rpc-address.NameServiceHA.data02": "hdfs://XXX:8020",
"dfs.client.failover.proxy.provider.NameServiceHA": "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider",
"dfs.ha.automatic-failover.enabled.yournamespace": "true"
},
"column": [
"*"
],
"fileType": "TEXT",
"fieldDelimiter": "\u0001"
}
},
"writer": {
"name": "txtfilewriter",
"parameter": {
"path": "/data/team/",
"fileName": "test",
"writeMode": "truncate",
"dateFormat": "yyyy-MM-dd"
}
}
}
]
}
}
hdfs -> ftp
json
"core": {
"transport": {
"channel": {
"speed": {
"byte": 104857600
}
}
}
},
"job": {
"setting": {
"speed": {
"channel": 10,
"byte": 1048576000
},
"errorLimit": {
"record": 0,
"percentage": 0.02
}
},
"content": [
{
"reader": {
"name": "hdfsreader",
"parameter": {
"path": "/user/hive/warehouse/*.0.",
"defaultFS": "hdfs://NameServiceHA",
"hadoopConfig": {
"dfs.nameservices": "NameServiceHA",
"dfs.ha.namenodes.NameServiceHA": "data01,data02",
"dfs.namenode.rpc-address.NameServiceHA.data01": "hdfs://xxx:8020",
"dfs.namenode.rpc-address.NameServiceHA.data02": "hdfs://xxx:8020",
"dfs.client.failover.proxy.provider.NameServiceHA": "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider",
"dfs.ha.automatic-failover.enabled.yournamespace": "true"
},
"column": [
"*"
],
"fileType": "TEXT",
"fieldDelimiter": "\u0001"
}
},
"writer": {
"name": "ftpwriter",
"parameter": {
"protocol": "ftp",
"host": "xxx",
"port": 21,
"username": "xxx",
"password": "xxx",
"connectPattern": "PASV",
"path": "/user/20220705/",
"fileName": "test",
"writeMode": "truncate",
"fieldDelimiter": ",",
"timeout": "3600000",
"encoding": "UTF-8",
"nullFormat": "\\N",
"dateFormat": "yyyy-MM-dd",
"fileFormat": "text",
"suffix": ".text",
"header": []
}
}
}
]
}
}
mysql -> clickhouse
json
{
"job": {
"setting": {
"speed": {
"channel": 3,
"byte": 1048576
},
"errorLimit": {
"record": 0,
"percentage": 0.02
}
},
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"username": "xxx",
"password": "xxx",
"splitPk": "",
"connection": [
{
"querySql": [
"select id,code FROM test.aa"
],
"jdbcUrl": [
"jdbc:mysql://xxx:3306/test"
]
}
]
}
},
"writer": {
"name": "clickhousewriter",
"parameter": {
"username": "xxx",
"password": "xxx",
"column": [
"id",
"code"
],
"connection": [
{
"table": [
"test01"
],
"jdbcUrl": "jdbc:clickhouse://xxx:8123/test"
}
]
}
}
}
]
}
}
hdfs -> hdfs
json
{
"job": {
"setting": {
"speed": {
"channel": 10
}
},
"content": [
{
"reader": {
"name": "hdfsreader",
"parameter": {
"path": "/user/hive/warehouse/*.0.parq",
"defaultFS": "hdfs://NameServiceHA",
"hadoopConfig": {
"dfs.nameservices": "NameServiceHA",
"dfs.ha.namenodes.NameServiceHA": "test01,test02",
"dfs.namenode.rpc-address.NameServiceHA.data01": "hdfs://xxx:8020",
"dfs.namenode.rpc-address.NameServiceHA.data02": "hdfs://xxx:8020",
"dfs.client.failover.proxy.provider.NameServiceHA": "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider",
"dfs.ha.automatic-failover.enabled.yournamespace": "true"
},
"column": [
"*"
],
"fileType": "PARQUET",
"fieldDelimiter": "\u0001"
}
},
"writer": {
"name": "hdfswriter",
"parameter": {
"defaultFS": "hdfs://test",
"hadoopConfig": {
"dfs.nameservices": "test",
"dfs.ha.namenodes.nameservice1": "test01,test02",
"dfs.namenode.rpc-address.nameservice1.test01": "hdfs://xxx:8020",
"dfs.namenode.rpc-address.nameservice1.test02": "hdfs://xxx:8020",
"dfs.client.failover.proxy.provider.nameservice1": "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider",
"dfs.ha.automatic-failover.enabled.yournamespace": "true"
},
"fileType": "TEXT",
"path": "/newtv/hive/test_temp",
"fileName": "test_temp",
"column": [
{
"name": "aa",
"type": "string"
},
{
"name": "bb",
"type": "string"
}
],
"writeMode": "append",
"fieldDelimiter": "^"
}
}
}
]
}
}
版权声明:本文为weixin_51485976原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。