import os
import csv
# 指定包含CSV文件的文件夹路径、分类列名和缓存容量
folder_path = ‘/path/to/csv_folder’
category_column_name = ‘Category’
buffer_size = 1000 # 可根据需要调整
# 在输出结果中包含的列名
output_column_names = [‘Name’, ‘Category’, ‘Value’]
# 初始化分类字典和计数器
categories = {}
count = {}
# 遍历文件夹中的所有CSV文件,读取每个CSV文件并添加到相应的分类中
for filename in os.listdir(folder_path):
if filename.endswith(“.csv”):
with open(os.path.join(folder_path, filename), ‘r’) as file:
reader = csv.DictReader(file)
for row in reader:
category = row[category_column_name]
# 如果该分类不存在,则在分类字典中创建新的分类,并初始化计数器为0
if category not in categories:
categories[category] = []
count[category] = 0
# 将当前行添加到相应的分类中,并增加计数器
categories[category].append(row)
count[category] += 1
# 如果当前分类中的缓存计数器达到了缓存容量,则将缓存中的所有行写入到相应的CSV文件中
if count[category] >= buffer_size:
output_filename = f'{category}.csv’
output_path = os.path.join(folder_path, output_filename)
with open(output_path, ‘a’, newline=”) as output_file:
writer = csv.DictWriter(output_file, fieldnames=output_column_names)
# 如果输出文件为空,则先写入一行表头
if os.path.isfile(output_path) and os.path.getsize(output_path) == 0:
writer.writeheader()
# 写入当前分类中的所有行
for row in categories[category]:
writer.writerow(row)
# 清空当前分类中的缓存列表和计数器
categories[category] = []
count[category] = 0
# 遍历全部分类,将每个分类中剩余的行所有写入到相应的CSV文件中
for category, rows in categories.items():
if len(rows) > 0:
output_filename = f'{category}.csv’
output_path = os.path.join(folder_path, output_filename)
with open(output_path, ‘a’, newline=”) as output_file:
writer = csv.DictWriter(output_file, fieldnames=output_column_names)
# 如果输出文件为空,则先写入一行表头
if os.path.isfile(output_path) and os.path.getsize(output_path) == 0:
writer.writeheader()
# 写入当前分类中的所有行
for row in rows:
writer.writerow(row)