Last active
October 25, 2023 07:01
-
-
Save i-sync/79d30960e5fae8bd18d27e4adafa9fd5 to your computer and use it in GitHub Desktop.
Audio Split 音频文件分割
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import subprocess | |
base_directory = "/path/to/source" # 输入目录的基本路径 | |
output_directory = "/path/to/dest" # 切割后的音频文件保存的目录 | |
segment_duration = 180 # 切割的每个片段的时长(以秒为单位) | |
def get_audio_encoding(input_path): | |
command = f'ffprobe -v error -select_streams a:0 -show_entries stream=codec_name -of default=noprint_wrappers=1:nokey=1 "{input_path}"' | |
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) | |
out, err = process.communicate() | |
encoding = out.decode().strip() | |
return encoding | |
def split_audio_files(directory): | |
print(directory) | |
for root, dirs, files in os.walk(directory): | |
print(root, dirs) | |
for file in files: | |
print(file) | |
if file.endswith(".mp3") or file.endswith(".m4a"): | |
input_path = os.path.join(root, file) | |
filename_prefix, file_extension = os.path.splitext(file) | |
# 获取原始文件的目录名称 | |
directory_name = os.path.basename(os.path.normpath(root)) | |
# 构建输出文件的路径和文件名 | |
output_directory_path = os.path.join(output_directory, directory_name) | |
os.makedirs(output_directory_path, exist_ok=True) # 创建输出目录 | |
# 构建输出文件的路径和文件名 | |
output_filename_pattern = f"{filename_prefix}-%02d{file_extension}" | |
output_filepath_pattern = os.path.join(output_directory_path, output_filename_pattern) | |
# 检查目标文件是否已存在,如果存在则跳过当前文件 | |
output_files = os.listdir(output_directory_path) | |
if any(output_filename.startswith(filename_prefix) for output_filename in output_files): | |
print(f"目标文件 {output_filepath_pattern} 已存在,跳过当前文件") | |
continue | |
# 是否生成临时文件 | |
isTemp = False | |
# 获取音频编码 | |
encoding = get_audio_encoding(input_path) | |
# 判断音频编码与扩展名是否一致 | |
if (encoding == "aac" and file.endswith(".m4a")) or (encoding == "mp3" and file.endswith(".mp3")): | |
# 对于AAC文件,直接进行分片处理 | |
output_path = input_path #os.path.join(output_directory, file) | |
else: | |
isTemp = True | |
# 对于非AAC文件,先重新编码为MP3再进行分片处理 | |
output_filename = f"{filename_prefix}.mp3" | |
output_path = os.path.join(output_directory, output_filename) | |
reencode_command = f'ffmpeg -i "{input_path}" -c:a libmp3lame -vn "{output_path}"' | |
subprocess.run(reencode_command, shell=True, check=True) | |
# 执行FFmpeg命令进行切割 | |
segment_command = f"ffmpeg -i \"{output_path}\" -f segment -segment_time {segment_duration} -start_number 1 -c copy \"{output_filepath_pattern}\"" | |
subprocess.run(segment_command, shell=True, check=True) | |
if isTemp: | |
# 删除临时文件 | |
os.remove(output_path) | |
print("音频文件切割完成!") | |
if __name__ == '__main__': | |
split_audio_files(base_directory) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import shutil | |
import subprocess | |
base_directory = "/xxx" # 输入目录的基本路径 | |
output_directory = "/yyy" # 切割后的音频文件保存的目录 | |
segment_duration = 180 # 切割的每个片段的时长(以秒为单位) | |
def get_audio_encoding(input_path): | |
command = f'ffprobe -v error -select_streams a:0 -show_entries stream=codec_name -of default=noprint_wrappers=1:nokey=1 "{input_path}"' | |
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) | |
out, err = process.communicate() | |
encoding = out.decode().strip() | |
return encoding | |
def get_audio_duration(input_path): | |
command = f'ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 "{input_path}"' | |
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) | |
out, err = process.communicate() | |
duration = float(out.decode().strip()) | |
return duration | |
def split_audio_files(directory): | |
print(directory) | |
for root, dirs, files in os.walk(directory): | |
print(root, dirs) | |
for file in files: | |
print(file) | |
if file.endswith(".mp3") or file.endswith(".m4a"): | |
input_path = os.path.join(root, file) | |
filename_prefix, file_extension = os.path.splitext(file) | |
# 获取原始文件的目录名称 | |
directory_name = os.path.basename(os.path.normpath(root)) | |
# 构建输出文件的路径和文件名 | |
output_directory_path = os.path.join(output_directory, directory_name) | |
os.makedirs(output_directory_path, exist_ok=True) # 创建输出目录 | |
# 构建输出文件的路径和文件名 | |
output_filename_pattern = f"{filename_prefix}-%02d{file_extension}" | |
output_filepath_pattern = os.path.join(output_directory_path, output_filename_pattern) | |
# 检查目标文件是否已存在,如果存在则跳过当前文件 | |
output_files = os.listdir(output_directory_path) | |
if any(output_filename.startswith(filename_prefix) for output_filename in output_files): | |
print(f"目标文件 {output_filepath_pattern} 已存在,跳过当前文件") | |
continue | |
# 获取音频文件的时长 | |
duration = get_audio_duration(input_path) | |
# 判断音频时长是否小于切割长度 | |
if duration < segment_duration: | |
# 如果音频时长小于切割长度,直接复制到目标目录 | |
output_file_path = os.path.join(output_directory_path, file) | |
shutil.copy2(input_path, output_file_path) | |
print(f"音频文件 {file} 长度小于切割长度,直接复制到目标目录") | |
continue | |
# 是否生成临时文件 | |
isTemp = False | |
# 获取音频编码 | |
encoding = get_audio_encoding(input_path) | |
# 判断音频编码与扩展名是否一致 | |
if (encoding == "aac" and file.endswith(".m4a")) or (encoding == "mp3" and file.endswith(".mp3")): | |
# 对于AAC文件,直接进行分片处理 | |
output_path = input_path #os.path.join(output_directory, file) | |
else: | |
isTemp = True | |
# 对于非AAC文件,先重新编码为MP3再进行分片处理 | |
output_filename = f"{filename_prefix}.mp3" | |
output_path = os.path.join(output_directory, output_filename) | |
reencode_command = f'ffmpeg -i "{input_path}" -c:a libmp3lame -vn "{output_path}"' | |
subprocess.run(reencode_command, shell=True, check=True) | |
# 执行FFmpeg命令进行切割 | |
segment_command = f"ffmpeg -i \"{output_path}\" -f segment -segment_time {segment_duration} -start_number 1 -c copy \"{output_filepath_pattern}\"" | |
subprocess.run(segment_command, shell=True, check=True) | |
if isTemp: | |
# 删除临时文件 | |
os.remove(output_path) | |
print("音频文件切割完成!") | |
if __name__ == '__main__': | |
split_audio_files(base_directory) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import subprocess | |
import argparse | |
# 递归查找目录中的音频文件并修正扩展名 | |
def process_directory(directory): | |
for root, dirs, files in os.walk(directory): | |
for file in files: | |
if file.endswith(('.mp3', '.wav', '.ogg', '.aac')): | |
file_path = os.path.join(root, file) | |
# 使用FFmpeg命令行工具获取音频文件的编码方式 | |
cmd = f'ffprobe -v error -select_streams a:0 -show_entries stream=codec_name -of default=noprint_wrappers=1:nokey=1 "{file_path}"' | |
output = subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT) | |
# 根据编码方式修改扩展名 | |
extension = os.path.splitext(file)[1] | |
new_extension = f'.{output.decode().strip()}' | |
if new_extension == '.aac': | |
new_extension = '.m4a' | |
if extension.lower() != new_extension.lower(): | |
new_file_path = os.path.join(root, os.path.splitext(file)[0] + new_extension) | |
#os.rename(file_path, new_file_path) | |
print(f"Modified extension of '{file}' to '{new_extension}'") | |
print("Extension modification complete.") | |
# 解析命令行参数 | |
parser = argparse.ArgumentParser() | |
parser.add_argument("directory", help="目录路径") | |
args = parser.parse_args() | |
# 调用函数处理目录 | |
process_directory(args.directory) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment