Last active
December 9, 2023 00:16
-
-
Save shiguruikai/1b549ebbc7c902628f3cbaa3a305073f to your computer and use it in GitHub Desktop.
rvcやsvc用の学習データを作成するためのスクリプト。
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Requires -Version 7.1 | |
[CmdletBinding()] | |
param ( | |
[Parameter(Mandatory)] | |
[string] | |
$SrcDir, | |
[Parameter(Mandatory)] | |
[string] | |
$DstDir, | |
[Parameter()] | |
[int] | |
$SamplingRate = 44100, | |
[Parameter()] | |
[ValidateSet("left", "rigth", "mix")] | |
[string] | |
$MonoChannel = "left", | |
[Parameter()] | |
[double] | |
$HPF = 80, | |
[Parameter()] | |
[boolean] | |
$NormalizeDC = $true, | |
# https://ffmpeg.org/ffmpeg-filters.html#anlmdn | |
[Parameter()] | |
[ValidateRange(0.00001, 10000)] | |
[double] | |
$NoiseReductionLevel = 0.01, | |
[Parameter()] | |
[int] | |
$LoudnormI = -14, | |
[Parameter()] | |
[int] | |
$LoudnormTP = -2, | |
[Parameter()] | |
[int] | |
$LoudnormLRA = 11, | |
[Parameter()] | |
[ValidateRange(0, 97)] | |
[int] | |
$SilenceThreshold = -40, | |
[Parameter()] | |
[double] | |
$MinSilenceDuration = 0.2, | |
[Parameter()] | |
[double] | |
$MinSplitDuration = 2.0, | |
[Parameter()] | |
[double] | |
$SkipTotalDuration = 0.4, | |
[Parameter()] | |
[double] | |
$ConcatDuration = 2, | |
[Parameter()] | |
[ValidateSet("pcm_s16le", "pcm_s24le", "pcm_s32le", "pcm_f32le", "pcm_f64le")] | |
[string] | |
$OutputCodec = "pcm_s16le", | |
[Parameter()] | |
[ValidateSet("pcm_s16le", "pcm_s24le", "pcm_s32le", "pcm_f32le", "pcm_f64le")] | |
[string] | |
$TempCodec = "pcm_f32le", | |
[Parameter()] | |
[ValidateSet("quiet", "panic", "fatal", "error", "warning", "info", "verbose", "debug", "trace")] | |
[string] | |
$FFmpegLogLevel = "error" | |
) | |
$ErrorActionPreference = 'Stop' | |
$SrcDir = Resolve-Path $SrcDir | |
$DstDir = (mkdir $DstDir -Force).FullName | |
if (Get-ChildItem $DstDir) { | |
Write-Host "[warn] File exists in destination directory." -ForegroundColor Yellow | |
} | |
# 論理コア数 | |
$ncpus = (Get-CimInstance win32_processor).NumberOfLogicalProcessors | |
# $ncpus = (Get-ComputerInfo).CsNumberOfLogicalProcessors | |
# 並列実行数 | |
$parallelLimit = [Math]::Max(1, [int]($ncpus * 0.8)) | |
# Step 1. モノラル化 & ハイパスフィルター & ノイズリダクション | |
$i = 0 | |
$inputFiles = Get-ChildItem $SrcDir -File | |
$inputFiles | ForEach-Object -ThrottleLimit $parallelLimit -Parallel { | |
$DstDir = $using:DstDir | |
$SamplingRate = $using:SamplingRate | |
$MonoChannel = $using:MonoChannel | |
$HPF = $using:HPF | |
$NormalizeDC = $using:NormalizeDC | |
$NoiseReductionLevel = $using:NoiseReductionLevel | |
$TempCodec = $using:TempCodec | |
$FFmpegLogLevel = $using:FFmpegLogLevel | |
$inputFile = $_ | |
$outputFilePath = Join-Path $DstDir "$($inputFile.BaseName).wav" | |
$panFilter = if ($MonoChannel -eq 'left') { | |
'pan=mono|FC=FC+FL' | |
} | |
elseif ($MonoChannel -eq 'rigth') { | |
'pan=mono|FC=FC+FR' | |
} | |
else { | |
'pan=mono|FC=FC+FL*0.5+FR*0.5' | |
} | |
ffmpeg ` | |
-v $FFmpegLogLevel ` | |
-i $inputFile.FullName ` | |
-af "$($panFilter),highpass=f=$($HPF):n=$([int]$NormalizeDC),anlmdn=s=$($NoiseReductionLevel)" ` | |
-ac 1 -ar $SamplingRate -acodec $TempCodec ` | |
$outputFilePath ` | |
-y 2>&1 | Write-Host | |
return $_ | |
} | ForEach-Object { | |
$i++ | |
Write-Progress "Step 1. Mono & HPF & Noise Reduction" "$i / $($inputFiles.Count)" -PercentComplete ($i / $inputFiles.Count * 100) | |
} | |
# Step 2. ラウドネス正規化 | |
$i = 0 | |
$inputFiles = Get-ChildItem $DstDir -File | |
$inputFiles | ForEach-Object -ThrottleLimit $parallelLimit -Parallel { | |
$DstDir = $using:DstDir | |
$SamplingRate = $using:SamplingRate | |
$I = $using:LoudnormI | |
$TP = $using:LoudnormTP | |
$LRA = $using:LoudnormLRA | |
$FFmpegLogLevel = $using:FFmpegLogLevel | |
$TempCodec = $using:TempCodec | |
$inputFile = $_ | |
$loudnormFilter = ffmpeg ` | |
-i $inputFile.FullName ` | |
-af "loudnorm=I=$($I):TP=$($TP):LRA=$($LRA):print_format=json" ` | |
-f null - 2>&1 ` | |
| Out-String ` | |
| Select-String "\[Parsed_loudnorm.*?\]\s*(\{[\s\S]*?\})" ` | |
| ForEach-Object { $_.Matches.Groups[1].Value } ` | |
| ConvertFrom-Json ` | |
| ForEach-Object { | |
"loudnorm=I=$($I):TP=$($TP):LRA=$($LRA)" ` | |
+ ":measured_I=$($_.input_i):measured_TP=$($_.input_tp):measured_LRA=$($_.input_lra)" ` | |
+ ":measured_thresh=$($_.input_thresh):offset=$($_.target_offset)" | |
} | |
$outputFilePath = Join-Path $DstDir "$($inputFile.BaseName).temp.wav" | |
ffmpeg ` | |
-v $FFmpegLogLevel ` | |
-i $inputFile.FullName ` | |
-af $loudnormFilter ` | |
-ac 1 -ar $SamplingRate -acodec $TempCodec ` | |
$outputFilePath ` | |
-y 2>&1 | Write-Host | |
$inputFile | Remove-Item | |
Rename-Item $outputFilePath -NewName "$($inputFile.BaseName).wav" | |
return $_ | |
} | ForEach-Object { | |
$i++ | |
Write-Progress "Step 2. Loudness Normalization" "$i / $($inputFiles.Count)" -PercentComplete ($i / $inputFiles.Count * 100) | |
} | |
# Step 3. 無音で分割 | |
$i = 0 | |
$inputFiles = Get-ChildItem $DstDir -File | |
$inputFiles | ForEach-Object -ThrottleLimit $parallelLimit -Parallel { | |
$DstDir = $using:DstDir | |
$SilenceThreshold = $using:SilenceThreshold | |
$MinSilenceDuration = $using:MinSilenceDuration | |
$MinSplitDuration = $using:MinSplitDuration | |
$FFmpegLogLevel = $using:FFmpegLogLevel | |
$inputFile = $_ | |
$audioInfo = ffprobe ` | |
-v error ` | |
-i $inputFile.FullName ` | |
-print_format json -show_entries format=duration 2>&1 ` | |
| ConvertFrom-Json | |
$totalDuration = [double]$audioInfo.format.duration | |
# 最小時間より短い場合は分割しない。 | |
if ($totalDuration -lt $MinSplitDuration) { | |
return $_ | |
} | |
# 無音が検出された時間 | |
[double[]]$silenceTimes = ffmpeg ` | |
-i $inputFile.FullName ` | |
-af "silencedetect=n=$($SilenceThreshold)dB:d=$($MinSilenceDuration)" ` | |
-f null - 2>&1 ` | |
| Select-String "silence_end" ` | |
| ForEach-Object { | |
$array = $_ -split ' ' | |
$silence_end = [double]$array[4] | |
$silence_duration = [double]$array[7] | |
# 無音部分の真ん中の時間を返す。 | |
return $silence_end - ($silence_duration / 2) | |
} | |
[double[]]$segmentTimes = @() | |
# 分割後の長さが最小時間以上であれば、$segmentTimesに追加する。 | |
foreach ($st in $silenceTimes) { | |
if (($st - [double]$segmentTimes[-1]) -ge $MinSplitDuration ` | |
-and ($totalDuration - $st) -ge $MinSplitDuration) { | |
$segmentTimes += $st | |
} | |
} | |
if (-Not $segmentTimes.Count) { | |
return $_ | |
} | |
$outputFilePath = Join-Path $DstDir "$($inputFile.BaseName)_%04d$($inputFile.Extension)" | |
ffmpeg ` | |
-v $FFmpegLogLevel ` | |
-i $inputFile.FullName ` | |
-f segment -segment_times ($segmentTimes -join ',') ` | |
-reset_timestamps 1 ` | |
-map 0:a ` | |
-c:a copy $outputFilePath ` | |
-y 2>&1 | Write-Host | |
Remove-Item $inputFile.FullName | |
return $_ | |
} | ForEach-Object { | |
$i++ | |
Write-Progress "Step 3. Split By Silence" "$i / $($inputFiles.Count)" -PercentComplete ($i / $inputFiles.Count * 100) | |
} | |
# Step 4. 無音を削除 | |
$i = 0 | |
$inputFiles = Get-ChildItem $DstDir -File | |
$inputFiles | ForEach-Object -ThrottleLimit $parallelLimit -Parallel { | |
$DstDir = $using:DstDir | |
$SamplingRate = $using:SamplingRate | |
$SilenceThreshold = $using:SilenceThreshold | |
$MinSilenceDuration = $using:MinSilenceDuration | |
$FFmpegLogLevel = $using:FFmpegLogLevel | |
$OutputCodec = $using:OutputCodec | |
$inputFile = $_ | |
# 最初の無音を削除 | |
$removeStart = "silenceremove=start_periods=1:start_silence=0.02:start_threshold=$($SilenceThreshold)dB:detection=rms:window=0.01" | |
$filters = @( | |
# デジタル無音を全て削除 | |
"silenceremove=window=0:detection=peak:stop_mode=all:start_mode=all:stop_periods=-1:stop_threshold=0", | |
# 先頭の無音を削除 | |
$removeStart, | |
# 末尾の無音を削除(逆転 → 無音除去 → 再逆転) | |
"areverse", $removeStart, "areverse", | |
# 一定の音量かつ一定の長さの無音を全て削除 | |
"silenceremove=stop_periods=-1:stop_duration=$($MinSilenceDuration):stop_threshold=$($SilenceThreshold)dB:detection=rms:window=0.01" | |
) | |
$outputFilePath = Join-Path $DstDir "$($inputFile.BaseName).temp.wav" | |
ffmpeg ` | |
-v $FFmpegLogLevel ` | |
-i $inputFile.FullName ` | |
-af ($filters -join ',') ` | |
-ac 1 -ar $SamplingRate -acodec $OutputCodec ` | |
$outputFilePath ` | |
-y 2>&1 | Write-Host | |
$inputFile | Remove-Item | |
Rename-Item $outputFilePath -NewName "$($inputFile.BaseName).wav" | |
return $_ | |
} | ForEach-Object { | |
$i++ | |
Write-Progress "Step 4. Remove Silence" "$i / $($inputFiles.Count)" -PercentComplete ($i / $inputFiles.Count * 100) | |
} | |
# Step 5. オーディオの長さを確認 | |
$audioInfoList = [System.Collections.Generic.List[psobject]]::new() | |
$i = 0 | |
$inputFiles = Get-ChildItem $DstDir -File | |
$inputFiles | ForEach-Object -ThrottleLimit $parallelLimit -Parallel { | |
$DstDir = $using:DstDir | |
$inputFile = $_ | |
$audioInfo = ffprobe ` | |
-v error ` | |
-i $inputFile.FullName ` | |
-print_format json -show_entries format=duration 2>&1 ` | |
| ConvertFrom-Json | |
return [PSCustomObject]@{ | |
FullName = $_.FullName | |
BaseName = $_.BaseName | |
Duration = [double]$audioInfo.format.duration | |
} | |
} | ForEach-Object { | |
$audioInfoList += $_ | |
$i++ | |
Write-Progress "Step 5. Check audio duration" "$i / $($inputFiles.Count)" -PercentComplete ($i / $inputFiles.Count * 100) | |
} | |
$audioInfoList = @($audioInfoList | Sort-Object FullName) | |
$removeTargets = $audioInfoList | Where-Object { $_.Duration -le $SkipTotalDuration } | |
$concatTargets = $audioInfoList | Where-Object { $_.Duration -gt $SkipTotalDuration -and $_.Duration -le $ConcatDuration } | |
# Step 6. かなり短いファイルを削除 | |
$i = 0 | |
$removeTargets | ForEach-Object { | |
$i++ | |
Remove-Item $_.FullName | |
Write-Progress "Step 6. Remove very short files" "$i / $($removeTargets.Count)" -PercentComplete ($i / $removeTargets.Count * 100) | |
} | |
# Step 7. 短いファイルを連結 | |
try { | |
# 無音ファイルの作成 | |
$silentFilePath = Join-Path $DstDir "silent.temp.wav" | |
ffmpeg ` | |
-v $FFmpegLogLevel ` | |
-f lavfi ` | |
-i "anullsrc=cl=mono:r=$($SamplingRate)" ` | |
-t 0.02 ` | |
$silentFilePath ` | |
-y 2>&1 | Write-Host | |
$filePaths = [System.Collections.Generic.List[string]]::new() | |
$i = 0 | |
while ($true) { | |
$filePaths.Clear() | |
$totalDuration = 0.0 | |
for (; $totalDuration -lt $ConcatDuration -and $i -lt $concatTargets.Count; $i++) { | |
$t = $concatTargets[$i] | |
$filePaths.Add($t.FullName) | |
$totalDuration += $t.Duration | |
} | |
if (-not $filePaths.Count) { | |
break | |
} | |
$outputFilePath = Join-Path $DstDir "$(Split-Path $filePaths[0] -LeafBase)_$($i).concat.wav" | |
$fileList = ($filePaths | ForEach-Object { "file '$($_)'" }) -join "`nfile '$($silentFilePath)'`n" | |
$fileList | ffmpeg ` | |
-v $FFmpegLogLevel ` | |
-protocol_whitelist 'pipe,file' ` | |
-safe 0 ` | |
-f concat ` | |
-i pipe:0 ` | |
-c:a copy $outputFilePath ` | |
-y 2>&1 | Write-Host | |
$filePaths | Remove-Item | |
Write-Progress "Step 7. Concat short files" "$i / $($concatTargets.Count)" -PercentComplete ($i / $concatTargets.Count * 100) | |
} | |
} | |
finally { | |
# 作成した無音ファイルを削除 | |
Remove-Item $silentFilePath -ErrorAction SilentlyContinue | |
} | |
# Step 8. リネーム | |
$i = 0 | |
$dstFiles = Get-ChildItem $DstDir -File | |
$dstFiles | Sort-Object Name | ForEach-Object { | |
$i++ | |
$_ | Rename-Item -NewName "$($_.BaseName).temp$($_.Extension)" | |
Write-Progress "Step 8a. Rename files" "$i / $($dstFiles.Count)" -PercentComplete ($i / $dstFiles.Count * 100) | |
} | |
$i = 0 | |
$dstFiles = Get-ChildItem $DstDir -File | |
$padWidth = $dstFiles.Count.ToString().Length | |
$dstFiles | Sort-Object Name | ForEach-Object { | |
$i++ | |
$_ | Rename-Item -NewName "$($i.ToString().PadLeft($padWidth, '0'))$($_.Extension)" | |
Write-Progress "Step 8b. Rename files" "$i / $($dstFiles.Count)" -PercentComplete ($i / $dstFiles.Count * 100) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment