hive-表对应hdsf文件数量,可用于查hdfs小文件数量的

  • Post author:
  • Post category:其他


–》1.0 hive命令

DESCRIBE FORMATTED dwd.dwd_mm_inv_batch_stock;

–》2.0 连接hive元数据库mysql, sql查询

SELECT d.name,
       t.tbl_name,
       tp.param_value '普通表行数',
       t1.表的行数 '分区表总行数',
       t1.表的分区数,
       coalesce(tp2.数据量gb, t1.数据量gb) '数据量GB',
       coalesce(tp2.数据量mb, t1.数据量mb) '数据量MB',
       coalesce(tp3.param_value, t1.文件数) '文件数',
       concat('ANALYZE TABLE ',
              d.name,
              '.',
              t.tbl_name,
              ' COMPUTE STATISTICS; ') '表统计信息收集'
  FROM hive.dbs d
  JOIN hive.tbls t
    ON t.db_id = d.db_id
  LEFT JOIN hive.table_params tp
    ON tp.tbl_id = t.tbl_id
   AND tp.param_key = 'numRows'
  LEFT JOIN hive.table_params tp3
    ON tp3.tbl_id = t.tbl_id
   AND tp3.param_key = 'numFiles'
  LEFT JOIN (SELECT tbl_id,
                    param_value / 1024 / 1024 / 1024 '数据量GB',
                    param_value / 1024 / 1024 '数据量MB'
               FROM hive.table_params
              WHERE param_key = 'totalSize'
              GROUP BY tbl_id) tp2
    ON tp2.tbl_id = t.tbl_id
  LEFT JOIN (SELECT tbl.tbl_name,
                    d.name db_name,
                    SUM(CASE
                          WHEN param_key = 'numRows' THEN
                           param_value
                          ELSE
                           0
                        END) '表的行数',
                    SUM(CASE
                          WHEN param_key = 'numRows' THEN
                           1
                          ELSE
                           0
                        END) '表的分区数',
                    SUM(CASE
                          WHEN param_key = 'totalSize' THEN
                           param_value
                          ELSE
                           0
                        END) / 1024 / 1024 / 1024 '数据量GB',
                    SUM(CASE
                          WHEN param_key = 'totalSize' THEN
                           param_value
                          ELSE
                           0
                        END) / 1024 / 1024 '数据量MB',
                    SUM(CASE
                          WHEN param_key = 'numFiles' THEN
                           param_value
                          ELSE
                           0
                        END) '文件数'
               FROM hive.partitions pt
              INNER JOIN hive.partition_params ptp
                 ON pt.part_id = ptp.part_id
              INNER JOIN hive.tbls tbl
                 ON pt.tbl_id = tbl.tbl_id
              INNER JOIN hive.dbs d
                 ON tbl.db_id = d.db_id
              WHERE d.name = 'ods'
              GROUP BY tbl.tbl_name) t1
    ON t1.db_name = d.name
   AND t1.tbl_name = t.tbl_name
 WHERE d.name = 'ods'
-- and t.TBL_NAME  = 'ods_s4_matdoc'
 ORDER BY CAST(tp.param_value AS signed) DESC;



版权声明:本文为qq_42828748原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。