–》1.0 hive命令
DESCRIBE FORMATTED dwd.dwd_mm_inv_batch_stock;
–》2.0 连接hive元数据库mysql, sql查询
SELECT d.name,
t.tbl_name,
tp.param_value '普通表行数',
t1.表的行数 '分区表总行数',
t1.表的分区数,
coalesce(tp2.数据量gb, t1.数据量gb) '数据量GB',
coalesce(tp2.数据量mb, t1.数据量mb) '数据量MB',
coalesce(tp3.param_value, t1.文件数) '文件数',
concat('ANALYZE TABLE ',
d.name,
'.',
t.tbl_name,
' COMPUTE STATISTICS; ') '表统计信息收集'
FROM hive.dbs d
JOIN hive.tbls t
ON t.db_id = d.db_id
LEFT JOIN hive.table_params tp
ON tp.tbl_id = t.tbl_id
AND tp.param_key = 'numRows'
LEFT JOIN hive.table_params tp3
ON tp3.tbl_id = t.tbl_id
AND tp3.param_key = 'numFiles'
LEFT JOIN (SELECT tbl_id,
param_value / 1024 / 1024 / 1024 '数据量GB',
param_value / 1024 / 1024 '数据量MB'
FROM hive.table_params
WHERE param_key = 'totalSize'
GROUP BY tbl_id) tp2
ON tp2.tbl_id = t.tbl_id
LEFT JOIN (SELECT tbl.tbl_name,
d.name db_name,
SUM(CASE
WHEN param_key = 'numRows' THEN
param_value
ELSE
0
END) '表的行数',
SUM(CASE
WHEN param_key = 'numRows' THEN
1
ELSE
0
END) '表的分区数',
SUM(CASE
WHEN param_key = 'totalSize' THEN
param_value
ELSE
0
END) / 1024 / 1024 / 1024 '数据量GB',
SUM(CASE
WHEN param_key = 'totalSize' THEN
param_value
ELSE
0
END) / 1024 / 1024 '数据量MB',
SUM(CASE
WHEN param_key = 'numFiles' THEN
param_value
ELSE
0
END) '文件数'
FROM hive.partitions pt
INNER JOIN hive.partition_params ptp
ON pt.part_id = ptp.part_id
INNER JOIN hive.tbls tbl
ON pt.tbl_id = tbl.tbl_id
INNER JOIN hive.dbs d
ON tbl.db_id = d.db_id
WHERE d.name = 'ods'
GROUP BY tbl.tbl_name) t1
ON t1.db_name = d.name
AND t1.tbl_name = t.tbl_name
WHERE d.name = 'ods'
-- and t.TBL_NAME = 'ods_s4_matdoc'
ORDER BY CAST(tp.param_value AS signed) DESC;
版权声明:本文为qq_42828748原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。