准备
-
不要在KETTLE打开要操作的转换、作业!!!
-
原理
- 先拿到保存日志的数据库的连接配置,然后采用文本编辑,添加/替换到
.ktr
、
.kjb
文件里面;- 最后把日志配置关联上数据库连接+本身配置信息,添加/替换到
.ktr
、
.kjb
文件里面
安装sublime
指定目录下,搜索文件的指定内容,能满足这个要求的文本编辑器,都可以
日志表的建表语句
-
基于oracle数据库
作业日志表
-- Create table
create table T_KETTLE_JOB_LOG
(
id_job INTEGER,
channel_id VARCHAR2(255),
jobname VARCHAR2(255),
status VARCHAR2(15),
lines_read INTEGER,
lines_written INTEGER,
lines_updated INTEGER,
lines_input INTEGER,
lines_output INTEGER,
lines_rejected INTEGER,
errors INTEGER,
startdate DATE,
enddate DATE,
logdate DATE,
depdate DATE,
replaydate DATE,
log_field CLOB,
executing_server VARCHAR2(255),
executing_user VARCHAR2(255),
start_job_entry VARCHAR2(255),
client VARCHAR2(255)
)
tablespace RHIN_CDR;
-- Add comments to the table
comment on table T_KETTLE_JOB_LOG
is 'KETTLE作业日志';
-- Add comments to the columns
comment on column T_KETTLE_JOB_LOG.id_job
is '批次ID(即作业ID),自递增,主键';
comment on column T_KETTLE_JOB_LOG.channel_id
is '日志通道ID(GUID),跟Logging channel log table有关联';
comment on column T_KETTLE_JOB_LOG.jobname
is '作业名称';
comment on column T_KETTLE_JOB_LOG.status
is '执行状态(start、end、stop、running)';
comment on column T_KETTLE_JOB_LOG.lines_read
is '最后一个转换,读取的行数';
comment on column T_KETTLE_JOB_LOG.lines_written
is '最后一个转换,写入的行数';
comment on column T_KETTLE_JOB_LOG.lines_updated
is '最后一个转换,更新的行数';
comment on column T_KETTLE_JOB_LOG.lines_input
is '最后一个转换,从存储或网络(如文件、数据库等)读取的行数';
comment on column T_KETTLE_JOB_LOG.lines_output
is '最后一个转换,输出到存储或网络(如文件、数据库等)的行数';
comment on column T_KETTLE_JOB_LOG.lines_rejected
is '最后一个转换,因错误处理导致拒绝的行数';
comment on column T_KETTLE_JOB_LOG.errors
is '发生的错误数';
comment on column T_KETTLE_JOB_LOG.startdate
is '开始执行时间(kettle的bug,始终是1900-01-01 7:00:00)';
comment on column T_KETTLE_JOB_LOG.enddate
is '结束执行时间';
comment on column T_KETTLE_JOB_LOG.logdate
is '最后记录日志的时间';
comment on column T_KETTLE_JOB_LOG.depdate
is '依赖日期,作业设置中的依赖规则计算的最大日期。(不懂)';
comment on column T_KETTLE_JOB_LOG.replaydate
is '重播日期,跟STARTDATE是同义词(不懂)';
comment on column T_KETTLE_JOB_LOG.log_field
is '详细日志内容';
comment on column T_KETTLE_JOB_LOG.executing_server
is '哪个服务器在执行当前作业(主机名)';
comment on column T_KETTLE_JOB_LOG.executing_user
is '采用存储库方式,则是登录存储库的用户;否则为当前系统的登录用户';
comment on column T_KETTLE_JOB_LOG.start_job_entry
is '当前作业,从哪个转换开始执行(为空代表是从Start开始)';
comment on column T_KETTLE_JOB_LOG.client
is '客户端(SPOON、PAN、KITCHEN、CARTE)';
-- Create/Recreate indexes
create index IDX_T_KETTLE_JOB_LOG_TABLE_1 on T_KETTLE_JOB_LOG (ID_JOB)
tablespace RHIN_CDR;
create index IDX_T_KETTLE_JOB_LOG_TABLE_2 on T_KETTLE_JOB_LOG (ERRORS, STATUS, JOBNAME)
tablespace RHIN_CDR;
create index IDX_T_KETTLE_JOB_LOG_TABLE_3 on T_KETTLE_JOB_LOG (JOBNAME, LOGDATE)
tablespace RHIN_CDR;
create index IDX_T_KETTLE_JOB_LOG_TABLE_4 on T_KETTLE_JOB_LOG (CHANNEL_ID)
tablespace RHIN_CDR;
作业日志通道表
-- Create table
create table T_KETTLE_JOB_CHANNEL_LOG
(
id_batch INTEGER,
channel_id VARCHAR2(255),
log_date DATE,
logging_object_type VARCHAR2(255),
object_name VARCHAR2(255),
object_copy VARCHAR2(255),
repository_directory VARCHAR2(255),
filename VARCHAR2(255),
object_id VARCHAR2(255),
object_revision VARCHAR2(255),
parent_channel_id VARCHAR2(255),
root_channel_id VARCHAR2(255)
)
tablespace RHIN_CDR;
-- Add comments to the columns
comment on column T_KETTLE_JOB_CHANNEL_LOG.id_batch
is '批次ID(即作业ID),与t_kettle_job_log的id_batch字段有关联';
comment on column T_KETTLE_JOB_CHANNEL_LOG.channel_id
is '日志通道ID(GUID),跟t_kettle_step_log的channel_id字段有关联';
comment on column T_KETTLE_JOB_CHANNEL_LOG.log_date
is '最后记录日志的时间';
comment on column T_KETTLE_JOB_CHANNEL_LOG.logging_object_type
is '被记录对象的类型(如JOB、JOBENTRY、DATABASE、STEP、TRANS)';
comment on column T_KETTLE_JOB_CHANNEL_LOG.object_name
is '被记录对象的名称(跟LOGGING_OBJECT_TYPE是一对)';
comment on column T_KETTLE_JOB_CHANNEL_LOG.object_copy
is '被记录步骤对象的复制(不懂)';
comment on column T_KETTLE_JOB_CHANNEL_LOG.repository_directory
is '资源库(或存储)的目录,貌似是JOB、TRANS才有';
comment on column T_KETTLE_JOB_CHANNEL_LOG.filename
is 'JOB、JOBENTRY、TRANS的路径(只有作业、转换才有)';
comment on column T_KETTLE_JOB_CHANNEL_LOG.object_id
is '当前对象ID';
comment on column T_KETTLE_JOB_CHANNEL_LOG.object_revision
is '当前对象版本';
comment on column T_KETTLE_JOB_CHANNEL_LOG.parent_channel_id
is '所属转换日志的日志通道ID,跟t_kettle_transformation_log的channel_id字段有关联';
comment on column T_KETTLE_JOB_CHANNEL_LOG.root_channel_id
is '所属作业日志的日志通道ID,跟t_kettle_job_log的channel_id字段有关联';
-- Create/Recreate indexes
create index IDX_T_KETTLE_JOB_CHANNEL_LOG_TABLE_1 on T_KETTLE_JOB_CHANNEL_LOG (CHANNEL_ID)
tablespace RHIN_CDR;
转换日志表
-- Create table
create table T_KETTLE_TRANSFORMATION_LOG
(
id_batch INTEGER,
channel_id VARCHAR2(255),
transname VARCHAR2(255),
status VARCHAR2(15),
lines_read INTEGER,
lines_written INTEGER,
lines_updated INTEGER,
lines_input INTEGER,
lines_output INTEGER,
lines_rejected INTEGER,
errors INTEGER,
startdate DATE,
enddate DATE,
logdate DATE,
depdate DATE,
replaydate DATE,
log_field CLOB,
executing_server VARCHAR2(255),
executing_user VARCHAR2(255),
client VARCHAR2(255)
)
tablespace RHIN_CDR;
-- Add comments to the table
comment on table T_KETTLE_TRANSFORMATION_LOG
is 'KETTLE转换日志';
-- Add comments to the columns
comment on column T_KETTLE_TRANSFORMATION_LOG.id_batch
is '批次ID(即转换ID),自递增';
comment on column T_KETTLE_TRANSFORMATION_LOG.channel_id
is '日志通道ID(GUID),跟Logging channel log table有关联';
comment on column T_KETTLE_TRANSFORMATION_LOG.transname
is '转换名称';
comment on column T_KETTLE_TRANSFORMATION_LOG.status
is '执行状态(start、end、stop、running)';
comment on column T_KETTLE_TRANSFORMATION_LOG.lines_read
is '特定步骤,读取的行数(没数据,没用的)';
comment on column T_KETTLE_TRANSFORMATION_LOG.lines_written
is '特定步骤,写入的行数(没数据,没用的)';
comment on column T_KETTLE_TRANSFORMATION_LOG.lines_updated
is '特定步骤,执行的更新语句的数量';
comment on column T_KETTLE_TRANSFORMATION_LOG.lines_input
is '特定步骤,从存储或网络(如文件、数据库等)读取的行数';
comment on column T_KETTLE_TRANSFORMATION_LOG.lines_output
is '特定步骤,输出到存储或网络(如文件、数据库等)的行数';
comment on column T_KETTLE_TRANSFORMATION_LOG.lines_rejected
is '特定步骤,因错误处理导致拒绝的行数';
comment on column T_KETTLE_TRANSFORMATION_LOG.errors
is '发生的错误数';
comment on column T_KETTLE_TRANSFORMATION_LOG.startdate
is '开始执行时间(kettle的bug,有些是1900-01-01 7:00:00)';
comment on column T_KETTLE_TRANSFORMATION_LOG.enddate
is '结束执行时间';
comment on column T_KETTLE_TRANSFORMATION_LOG.logdate
is '最后记录日志的时间';
comment on column T_KETTLE_TRANSFORMATION_LOG.depdate
is '依赖日期,作业设置中的依赖规则计算的最大日期。(不懂)';
comment on column T_KETTLE_TRANSFORMATION_LOG.replaydate
is '重播日期,跟STARTDATE是同义词(不懂)';
comment on column T_KETTLE_TRANSFORMATION_LOG.log_field
is '详细日志内容';
comment on column T_KETTLE_TRANSFORMATION_LOG.executing_server
is '哪个服务器在执行当前作业(主机名)';
comment on column T_KETTLE_TRANSFORMATION_LOG.executing_user
is '采用存储库方式,则是登录存储库的用户;否则为当前系统的登录用户';
comment on column T_KETTLE_TRANSFORMATION_LOG.client
is '客户端(SPOON、PAN、KITCHEN、CARTE)';
-- Create/Recreate indexes
create index IDX_T_KETTLE_TRANSFORMATION_1 on T_KETTLE_TRANSFORMATION_LOG (ID_BATCH)
tablespace RHIN_CDR;
create index IDX_T_KETTLE_TRANSFORMATION_2 on T_KETTLE_TRANSFORMATION_LOG (ERRORS, STATUS, TRANSNAME)
tablespace RHIN_CDR;
create index IDX_T_KETTLE_TRANSFORMATION_3 on T_KETTLE_TRANSFORMATION_LOG (TRANSNAME, LOGDATE)
tablespace RHIN_CDR;
create index IDX_T_KETTLE_TRANSFORMATION_4 on T_KETTLE_TRANSFORMATION_LOG (CHANNEL_ID)
tablespace RHIN_CDR;
转换步骤日志表
-- Create table
create table T_KETTLE_STEP_LOG
(
id_batch INTEGER,
channel_id VARCHAR2(255),
transname VARCHAR2(255),
stepname VARCHAR2(255),
step_copy INTEGER,
lines_read INTEGER,
lines_written INTEGER,
lines_updated INTEGER,
lines_input INTEGER,
lines_output INTEGER,
lines_rejected INTEGER,
errors INTEGER,
log_field CLOB,
log_date DATE
)
tablespace RHIN_CDR;
-- Add comments to the table
comment on table T_KETTLE_STEP_LOG
is 'KETTLE转换-步骤日志';
-- Add comments to the columns
comment on column T_KETTLE_STEP_LOG.id_batch
is '批次ID(转换ID),自递增,主键';
comment on column T_KETTLE_STEP_LOG.channel_id
is '日志通道ID(GUID),跟t_kettle_job_channel_log的channel_id字段有关联';
comment on column T_KETTLE_STEP_LOG.transname
is '转换名称';
comment on column T_KETTLE_STEP_LOG.stepname
is '步骤名称';
comment on column T_KETTLE_STEP_LOG.step_copy
is '当前步骤复制的数量';
comment on column T_KETTLE_STEP_LOG.lines_read
is '从上一个步骤读取的数量';
comment on column T_KETTLE_STEP_LOG.lines_written
is '输出到跟随的步骤(因为支持并发多个步骤)。假设输入1w条数据,且跟随两个步骤,那么LINES_WRITTEN是2w(1w * 2)';
comment on column T_KETTLE_STEP_LOG.lines_updated
is '当前步骤,执行的更新语句的数量';
comment on column T_KETTLE_STEP_LOG.lines_input
is '当前步骤,从来源(文件、数据库、网络等),读取的行数';
comment on column T_KETTLE_STEP_LOG.lines_output
is '当前步骤,输出到输出端(文件、数据库、网络等)的行数';
comment on column T_KETTLE_STEP_LOG.lines_rejected
is '当前步骤,因错误处理导致拒绝的行数';
comment on column T_KETTLE_STEP_LOG.errors
is '当前步骤,发生的错误数';
comment on column T_KETTLE_STEP_LOG.log_field
is '当前步骤,产生的详细日志内容';
comment on column T_KETTLE_STEP_LOG.log_date
is '当前步骤,最后记录日志的时间';
-- Create/Recreate indexes
create index IDX_T_KETTLE_STEP_1 on T_KETTLE_STEP_LOG (ID_BATCH, TRANSNAME, LOG_DATE)
tablespace RHIN_CDR;
create index IDX_T_KETTLE_STEP_2 on T_KETTLE_STEP_LOG (ERRORS)
tablespace RHIN_CDR;
获取数据库连接配置
-
文本编辑器,随便打开一个作业或转换(配置过我们数据库的连接,即可),用于保存日志
找到如下内容(后面会用到):
<connection>
<name>xxxDataBaseName</name>
<server>xxxIP</server>
<type>ORACLE</type>
<access>Native</access>
<database>/xxxServiceName</database>
<port>xxxPort</port>
<username>xxxUserName</username>
<password>Encrypted xxx密码加密</password>
<servername/>
<data_tablespace/>
<index_tablespace/>
<attributes>
<attribute>
<code>FORCE_IDENTIFIERS_TO_LOWERCASE</code>
<attribute>N</attribute>
</attribute>
<attribute>
<code>FORCE_IDENTIFIERS_TO_UPPERCASE</code>
<attribute>N</attribute>
</attribute>
<attribute>
<code>IS_CLUSTERED</code>
<attribute>N</attribute>
</attribute>
<attribute>
<code>PORT_NUMBER</code>
<attribute>1521</attribute>
</attribute>
<attribute>
<code>PRESERVE_RESERVED_WORD_CASE</code>
<attribute>Y</attribute>
</attribute>
<attribute>
<code>QUOTE_ALL_FIELDS</code>
<attribute>N</attribute>
</attribute>
<attribute>
<code>STRICT_NUMBER_38_INTERPRETATION</code>
<attribute>N</attribute>
</attribute>
<attribute>
<code>SUPPORTS_BOOLEAN_DATA_TYPE</code>
<attribute>Y</attribute>
</attribute>
<attribute>
<code>SUPPORTS_TIMESTAMP_DATA_TYPE</code>
<attribute>Y</attribute>
</attribute>
<attribute>
<code>USE_POOLING</code>
<attribute>N</attribute>
</attribute>
</attributes>
</connection>
转换
-
使用sublime文本编辑器的全局搜索/替换(Find in Files),快捷键:ctrl+shift+F
-
搜索路径
xxx转换的存储目录,*.ktr
配置数据库连接
搜索关键词:
</notepads>
替换
</notepads>
数据库连接配置的内容粘贴过来
像这样:
</notepads>
<connection>
<name>xxxDataBaseName</name>
<server>xxxIP</server>
<type>ORACLE</type>
<access>Native</access>
<database>/xxxServiceName</database>
<port>xxxPort</port>
<username>xxxUserName</username>
<password>Encrypted xxx密码加密</password>
<servername/>
<data_tablespace/>
<index_tablespace/>
<attributes>
<attribute>
<code>FORCE_IDENTIFIERS_TO_LOWERCASE</code>
<attribute>N</attribute>
</attribute>
<attribute>
<code>FORCE_IDENTIFIERS_TO_UPPERCASE</code>
<attribute>N</attribute>
</attribute>
<attribute>
<code>IS_CLUSTERED</code>
<attribute>N</attribute>
</attribute>
<attribute>
<code>PORT_NUMBER</code>
<attribute>1521</attribute>
</attribute>
<attribute>
<code>PRESERVE_RESERVED_WORD_CASE</code>
<attribute>Y</attribute>
</attribute>
<attribute>
<code>QUOTE_ALL_FIELDS</code>
<attribute>N</attribute>
</attribute>
<attribute>
<code>STRICT_NUMBER_38_INTERPRETATION</code>
<attribute>N</attribute>
</attribute>
<attribute>
<code>SUPPORTS_BOOLEAN_DATA_TYPE</code>
<attribute>Y</attribute>
</attribute>
<attribute>
<code>SUPPORTS_TIMESTAMP_DATA_TYPE</code>
<attribute>Y</attribute>
</attribute>
<attribute>
<code>USE_POOLING</code>
<attribute>N</attribute>
</attribute>
</attributes>
</connection>
配置日志表
配置转换日志表
正则搜索关键词:
<trans-log-table>
<connection\s*/>
<schema\s*/>
<table\s*/>
<size_limit_lines\s*/>
<interval\s*/>
<timeout_days\s*/>
替换
<trans-log-table>
<connection>xxxDatabaseName</connection>
<schema>xxxSchema</schema>
<table>t_kettle_transformation_log</table>
<size_limit_lines/>
<interval/>
<timeout_days>14</timeout_days>
connection:
创建的数据库连接的名称
schema:
所属数据库
table:
日志表名
size_limit_lines:
记录多少行日志。截取行,不设置则完整日志
interval:
多久记录一次(单位:秒)。执行比较久时,可以指定这个配置,隔一段时间,就将当前日志记录下来。不设置则跑完了才记录
timeout_days:
保留多少天的日志。每次运行时,自动判断+自动清理
修改字段
搜索关键词:
<field>
<id>EXECUTING_SERVER</id>
<enabled>N</enabled>
<name>EXECUTING_SERVER</name>
</field>
<field>
<id>EXECUTING_USER</id>
<enabled>N</enabled>
<name>EXECUTING_USER</name>
</field>
<field>
<id>CLIENT</id>
<enabled>N</enabled>
<name>CLIENT</name>
</field>
</trans-log-table>
替换
<field>
<id>EXECUTING_SERVER</id>
<enabled>Y</enabled>
<name>EXECUTING_SERVER</name>
</field>
<field>
<id>EXECUTING_USER</id>
<enabled>Y</enabled>
<name>EXECUTING_USER</name>
</field>
<field>
<id>CLIENT</id>
<enabled>Y</enabled>
<name>CLIENT</name>
</field>
</trans-log-table>
配置转换步骤日志表
正则搜索关键词:
<step-log-table>
<connection\s*/>
<schema\s*/>
<table\s*/>
<timeout_days\s*/>
替换
<step-log-table>
<connection>xxxDatabaseName</connection>
<schema>xxxSchema</schema>
<table>t_kettle_step_log</table>
<timeout_days>14</timeout_days>
connection:
创建的数据库连接的名称
schema:
所属数据库
table:
日志表名
timeout_days:
保留多少天的日志。每次运行时,自动判断+自动清理
修改字段
搜索关键词:
<field>
<id>LOG_FIELD</id>
<enabled>N</enabled>
<name>LOG_FIELD</name>
</field>
</step-log-table>
替换
<field>
<id>LOG_FIELD</id>
<enabled>Y</enabled>
<name>LOG_FIELD</name>
</field>
</step-log-table>
作业
-
使用sublime文本编辑器的全局搜索/替换(Find in Files),快捷键:ctrl+shift+F
-
搜索路径
xxx转换的存储目录,*.kjb
配置数据库连接
搜索关键词:
<slaveservers>
</slaveservers>
<slaveservers>
</slaveservers>
像这样:
<slaveservers>
</slaveservers>
<connection>
<name>xxxDataBaseName</name>
<server>xxxIP</server>
<type>ORACLE</type>
<access>Native</access>
<database>/xxxServiceName</database>
<port>xxxPort</port>
<username>xxxUserName</username>
<password>Encrypted xxx密码加密</password>
<servername/>
<data_tablespace/>
<index_tablespace/>
<attributes>
<attribute>
<code>FORCE_IDENTIFIERS_TO_LOWERCASE</code>
<attribute>N</attribute>
</attribute>
<attribute>
<code>FORCE_IDENTIFIERS_TO_UPPERCASE</code>
<attribute>N</attribute>
</attribute>
<attribute>
<code>IS_CLUSTERED</code>
<attribute>N</attribute>
</attribute>
<attribute>
<code>PORT_NUMBER</code>
<attribute>1521</attribute>
</attribute>
<attribute>
<code>PRESERVE_RESERVED_WORD_CASE</code>
<attribute>Y</attribute>
</attribute>
<attribute>
<code>QUOTE_ALL_FIELDS</code>
<attribute>N</attribute>
</attribute>
<attribute>
<code>STRICT_NUMBER_38_INTERPRETATION</code>
<attribute>N</attribute>
</attribute>
<attribute>
<code>SUPPORTS_BOOLEAN_DATA_TYPE</code>
<attribute>Y</attribute>
</attribute>
<attribute>
<code>SUPPORTS_TIMESTAMP_DATA_TYPE</code>
<attribute>Y</attribute>
</attribute>
<attribute>
<code>USE_POOLING</code>
<attribute>N</attribute>
</attribute>
</attributes>
</connection>
配置日志表
配置作业日志表
正则搜索关键词:
<job-log-table>
<connection\s*/>
<schema\s*/>
<table\s*/>
<size_limit_lines\s*/>
<interval\s*/>
<timeout_days\s*/>
替换
<job-log-table>
<connection>xxxDatabaseName</connection>
<schema>xxxSchema</schema>
<table>t_kettle_job_log</table>
<size_limit_lines/>
<interval/>
<timeout_days>14</timeout_days>
connection:
创建的数据库连接的名称
schema:
所属数据库
table:
日志表名
size_limit_lines:
记录多少行日志。截取行,不设置则完整日志
interval:
多久记录一次(单位:秒)。执行比较久时,可以指定这个配置,隔一段时间,就将当前日志记录下来。不设置则跑完了才记录
timeout_days:
保留多少天的日志。每次运行时,自动判断+自动清理
修改字段
搜索关键词
<field>
<id>EXECUTING_SERVER</id>
<enabled>N</enabled>
<name>EXECUTING_SERVER</name>
</field>
<field>
<id>EXECUTING_USER</id>
<enabled>N</enabled>
<name>EXECUTING_USER</name>
</field>
<field>
<id>START_JOB_ENTRY</id>
<enabled>N</enabled>
<name>START_JOB_ENTRY</name>
</field>
<field>
<id>CLIENT</id>
<enabled>N</enabled>
<name>CLIENT</name>
</field>
</job-log-table>
替换
<field>
<id>EXECUTING_SERVER</id>
<enabled>Y</enabled>
<name>EXECUTING_SERVER</name>
</field>
<field>
<id>EXECUTING_USER</id>
<enabled>Y</enabled>
<name>EXECUTING_USER</name>
</field>
<field>
<id>START_JOB_ENTRY</id>
<enabled>Y</enabled>
<name>START_JOB_ENTRY</name>
</field>
<field>
<id>CLIENT</id>
<enabled>Y</enabled>
<name>CLIENT</name>
</field>
</job-log-table>
配置作业日志通道表
正则搜索关键词:
<channel-log-table>
<connection\s*/>
<schema\s*/>
<table\s*/>
<timeout_days\s*/>
替换
<channel-log-table>
<connection>xxxDatabaseName</connection>
<schema>xxxSchema</schema>
<table>t_kettle_job_channel_log</table>
<timeout_days>14</timeout_days>
connection:
创建的数据库连接的名称
schema:
所属数据库
table:
日志表名
timeout_days:
保留多少天的日志。每次运行时,自动判断+自动清理