"""
把原音频,按csv中的标注结果分割成几个音频,如text是无,忽略。否则从sDate到eDate 进行分割。
"""
import os
import pandas as pd
import json
from pydub import AudioSegment
csv_url = "D:\csv\\"
wav_url = "D:\标注音频与示例\\"
save_wav = "D:\wav\\new_wav\\"
for path in os.listdir(csv_url):
data_frame = pd.read_csv(csv_url + path, encoding='utf-8')
name_list = data_frame["storeFileName"]
result_list = data_frame["finalResult"]
for nl, rl in zip(name_list, result_list):
new_wav_url = save_wav + nl.split(".")[0] + "\\"
if not os.path.exists(new_wav_url):
os.makedirs(new_wav_url)
result = json.loads(rl)
txt_url = new_wav_url + nl.split(".")[0]+".txt"
with open(txt_url, "w", encoding='utf-8') as fn:
fn.write(nl+"\n")
fn.write(rl)
fn.close()
audio = AudioSegment.from_wav(wav_url + nl)
for text in result['text']:
if text['defData']['text'] != '无':
sTime = text['defData']['sTime'] * 1000
dTime = text['defData']['dTime'] * 1000
eTime = sTime + dTime
# 音频切割按开始时间到结束时间切割
audio_chunk = audio[sTime:eTime]
audio_chunk.export(new_wav_url + nl.split(".")[0] + "-" + str(text['id']) + ".wav", format="wav")
版权声明:本文为sinat_35395498原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。