将多个scv文件按照分类汇总

  • Post author:
  • Post category:其他


import os

import csv

# 指定包含CSV文件的文件夹路径、分类列名和缓存容量

folder_path = ‘/path/to/csv_folder’

category_column_name = ‘Category’

buffer_size = 1000 # 可根据需要调整

# 在输出结果中包含的列名

output_column_names = [‘Name’, ‘Category’, ‘Value’]

# 初始化分类字典和计数器

categories = {}

count = {}

# 遍历文件夹中的所有CSV文件,读取每个CSV文件并添加到相应的分类中

for filename in os.listdir(folder_path):

if filename.endswith(“.csv”):

with open(os.path.join(folder_path, filename), ‘r’) as file:

reader = csv.DictReader(file)

for row in reader:

category = row[category_column_name]

# 如果该分类不存在,则在分类字典中创建新的分类,并初始化计数器为0

if category not in categories:

categories[category] = []

count[category] = 0

# 将当前行添加到相应的分类中,并增加计数器

categories[category].append(row)

count[category] += 1

# 如果当前分类中的缓存计数器达到了缓存容量,则将缓存中的所有行写入到相应的CSV文件中

if count[category] >= buffer_size:

output_filename = f'{category}.csv’

output_path = os.path.join(folder_path, output_filename)

with open(output_path, ‘a’, newline=”) as output_file:

writer = csv.DictWriter(output_file, fieldnames=output_column_names)

# 如果输出文件为空,则先写入一行表头

if os.path.isfile(output_path) and os.path.getsize(output_path) == 0:

writer.writeheader()

# 写入当前分类中的所有行

for row in categories[category]:

writer.writerow(row)

# 清空当前分类中的缓存列表和计数器

categories[category] = []

count[category] = 0

# 遍历全部分类,将每个分类中剩余的行所有写入到相应的CSV文件中

for category, rows in categories.items():

if len(rows) > 0:

output_filename = f'{category}.csv’

output_path = os.path.join(folder_path, output_filename)

with open(output_path, ‘a’, newline=”) as output_file:

writer = csv.DictWriter(output_file, fieldnames=output_column_names)

# 如果输出文件为空,则先写入一行表头

if os.path.isfile(output_path) and os.path.getsize(output_path) == 0:

writer.writeheader()

# 写入当前分类中的所有行

for row in rows:

writer.writerow(row)



版权声明:本文为2301_77925375原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。