Flink Sql Configs
配置项 | 是否必须 | 默认值 | 配置说明 |
---|---|---|---|
path | Y | N/A | Hudi表的 base path,如果不存在会创建,否则应是一个已初始化成功的 hudi 表 |
read.end-commit | Y | N/A | |
read.start-commit | Y | N/A | |
read.tasks | Y | N/A | |
write.tasks | Y | N/A | |
write.partition.format | Y | N/A | 分区路径格式,仅 write.datetime.partitioning 为 true 是有效。两种默认值:1、yyyyMMddHH,当分区字段类型为 timestamp(3) WITHOUT TIME ZONE, LONG, FLOAT, DOUBLE, DECIMAL 是;2、yyyyMMdd,当分区字段类型为 DATE 和 INT 时。 |
write.bucket_assign.tasks | Y | N/A | |
archive.max_commits | N | 50 | |
archive.min_commits | N | 40 | |
cdc.enabled | N | false | |
changelog.enabled | N | false | |
clean.async.enabled | N | true | |
clean.policy | N | KEEP_LATEST_COMMITS | 清理策略,可取值:KEEP_LATEST_COMMITS, KEEP_LATEST_FILE_VERSIONS, KEEP_LATEST_BY_HOURS.Default is KEEP_LATEST_COMMITS |
clean.retain_commits | N | 30 | |
clean.retain_file_versions | N | 5 | |
clean.retain_hours | N | 24 | |
clustering.async.enabled | N | false | |
clustering.delta_commits | N | 4 | |
clustering.plan.partition.filter.mode | N | NONE | 可取值:NONE, RECENT_DAYS, SELECTED_PARTITIONS, DAY_ROLLING |
clustering.plan.strategy.class | N | org.apache.hudi.client.clustering.plan.strategy.FlinkSizeBasedClusteringPlanStrategy | |
clustering.tasks | Y | N/A | |
clustering.schedule.enabled | N | false | |
compaction.async.enabled | N | true | |
compaction.delta_commits | N | 5 | |
compaction.delta_seconds | N | 3600 | |
compaction.max_memory | N | 100 | |
compaction.schedule.enabled | N | true | |
compaction.target_io | N | 512000 | |
compaction.timeout.seconds | N | 1200 | |
compaction.trigger.strategy | N | num_commits | 可取值:num_commits, time_elapsed, num_or_time |
hive_sync.conf.dir | Y | N/A | |
hive_sync.table_properties | Y | N/A | |
hive_sync.assume_date_partitioning | N | false | 假定分区为 yyyy/mm/dd 格式 |
hive_sync.auto_create_db | N | true | 自动创建不存在的数据库 |
hive_sync.db | N | default | |
hive_sync.table | N | unknown | |
hive_sync.table.strategy | N | ALL | |
hive_sync.enabled | N | false | |
hive_sync.file_format | N | PARQUET | |
hive_sync.jdbc_url | N | jdbc:hive2://localhost:10000 | |
hive_sync.metastore.uris | N | ‘’ | Hive Metastore uris |
hive_sync.mode | N | HMS | |
hive_sync.partition_fields | N | ‘’ | |
hive_sync.password | N | hive | |
hive_sync.support_timestamp | N | true | |
hive_sync.use_jdbc | N | true | |
hive_sync.username | N | hive | |
hoodie.bucket.index.hash.field | N | 桶(BUCKET)的 key,必须为 recordKey 的子集,或者就是 recordKey | |
hoodie.bucket.index.num.buckets | N | 4 | |
hoodie.datasource.merge.type | N | payload_combine | |
hoodie.datasource.query.type | N | snapshot | |
hoodie.datasource.write.hive_style_partitioning | N | false | |
hoodie.datasource.write.keygenerator.type | N | SIMPLE | |
hoodie.datasource.write.partitionpath.field | N | ‘’ | |
hoodie.datasource.write.recordkey.field | N | uuid | |
hoodie.datasource.write.partitionpath.urlencode | N | false | |
hoodie.database.name | Y | N/A | |
hoodie.table.name | Y | N/A | |
hoodie.datasource.write.keygenerator.class | Y | N/A | |
index.bootstrap.enabled | N | false | |
index.global.enabled | N | true | |
index.partition.regex | N | * | |
index.state.ttl | N | 0.0 | |
index.type | N | FLINK_STATE | 取值有:BUCKET,FLINK_STATE,BLOOM,GLOBAL_BLOOM,GLOBAL_SIMPLE,HBASE,INMEMORY,SIMPLE,默认为 FLINK_STATE,详情参见 https://github.com/apache/hudi/blob/master/hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/configuration/FlinkOptions.java 或者 https://github.com/apache/hudi/blob/master/hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieIndexConfig.java |
metadata.enabled | N | false | |
metadata.compaction.delta_commits | N | 10 | |
partition.default_name | N |
HIVE_DEFAULT_PARTITION |
|
payload.class | N | org.apache.hudi.common.model.EventTimeAvroPayload | |
precombine.field | N | ts | |
read.streaming.enabled | N | false | |
read.streaming.skip_compaction | N | false | |
read.streaming.skip_clustering | N | false | |
read.utc-timezone | N | true | |
record.merger.impls | N | org.apache.hudi.common.model.HoodieAvroRecordMerger | |
record.merger.strategy | N | eeb8d96f-b1e4-49fd-bbf8-28ac514178e5 | |
table.type | N | COPY_ON_WRITE | 指定表类型,可取:COPY_ON_WRITE 或 MERGE_ON_READ |
write.batch.size | N | 256.0 | |
write.commit.ack.timeout | N | -1 | |
write.ignore.failed | N | false | |
write.insert.cluster | N | false | |
write.log.max.size | N | 1024 | |
write.log_block.size | N | 128 | |
write.log_block.size | N | 100 | 单位:MB |
write.operation | N | upsert | 可取值:false、insert 或 upsert,默认 false 表示允许重复 |
write.precombine | N | false | 是否在 insert 和 upsert 前删除重复数据 |
write.parquet.block.size | N | 120 | |
write.rate.limit | N | 0 | 每秒写入的数据条数。默认 0 表示没有限制 |
write.retry.interval.ms | N | 2000 | |
write.retry.times | N | 3 | |
write.sort.memory | N | 128 | 单位:MB |
write.task.max.size | N | 1024.0 | 单位:MB |
版权声明:本文为Aquester原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。