Skip to content

Instantly share code, notes, and snippets.

@shiguruikai
Last active December 9, 2023 00:16
Show Gist options
  • Save shiguruikai/1b549ebbc7c902628f3cbaa3a305073f to your computer and use it in GitHub Desktop.
Save shiguruikai/1b549ebbc7c902628f3cbaa3a305073f to your computer and use it in GitHub Desktop.
rvcやsvc用の学習データを作成するためのスクリプト。
# Requires -Version 7.1
[CmdletBinding()]
param (
[Parameter(Mandatory)]
[string]
$SrcDir,
[Parameter(Mandatory)]
[string]
$DstDir,
[Parameter()]
[int]
$SamplingRate = 44100,
[Parameter()]
[ValidateSet("left", "rigth", "mix")]
[string]
$MonoChannel = "left",
[Parameter()]
[double]
$HPF = 80,
[Parameter()]
[boolean]
$NormalizeDC = $true,
# https://ffmpeg.org/ffmpeg-filters.html#anlmdn
[Parameter()]
[ValidateRange(0.00001, 10000)]
[double]
$NoiseReductionLevel = 0.01,
[Parameter()]
[int]
$LoudnormI = -14,
[Parameter()]
[int]
$LoudnormTP = -2,
[Parameter()]
[int]
$LoudnormLRA = 11,
[Parameter()]
[ValidateRange(0, 97)]
[int]
$SilenceThreshold = -40,
[Parameter()]
[double]
$MinSilenceDuration = 0.2,
[Parameter()]
[double]
$MinSplitDuration = 2.0,
[Parameter()]
[double]
$SkipTotalDuration = 0.4,
[Parameter()]
[double]
$ConcatDuration = 2,
[Parameter()]
[ValidateSet("pcm_s16le", "pcm_s24le", "pcm_s32le", "pcm_f32le", "pcm_f64le")]
[string]
$OutputCodec = "pcm_s16le",
[Parameter()]
[ValidateSet("pcm_s16le", "pcm_s24le", "pcm_s32le", "pcm_f32le", "pcm_f64le")]
[string]
$TempCodec = "pcm_f32le",
[Parameter()]
[ValidateSet("quiet", "panic", "fatal", "error", "warning", "info", "verbose", "debug", "trace")]
[string]
$FFmpegLogLevel = "error"
)
$ErrorActionPreference = 'Stop'
$SrcDir = Resolve-Path $SrcDir
$DstDir = (mkdir $DstDir -Force).FullName
if (Get-ChildItem $DstDir) {
Write-Host "[warn] File exists in destination directory." -ForegroundColor Yellow
}
# 論理コア数
$ncpus = (Get-CimInstance win32_processor).NumberOfLogicalProcessors
# $ncpus = (Get-ComputerInfo).CsNumberOfLogicalProcessors
# 並列実行数
$parallelLimit = [Math]::Max(1, [int]($ncpus * 0.8))
# Step 1. モノラル化 & ハイパスフィルター & ノイズリダクション
$i = 0
$inputFiles = Get-ChildItem $SrcDir -File
$inputFiles | ForEach-Object -ThrottleLimit $parallelLimit -Parallel {
$DstDir = $using:DstDir
$SamplingRate = $using:SamplingRate
$MonoChannel = $using:MonoChannel
$HPF = $using:HPF
$NormalizeDC = $using:NormalizeDC
$NoiseReductionLevel = $using:NoiseReductionLevel
$TempCodec = $using:TempCodec
$FFmpegLogLevel = $using:FFmpegLogLevel
$inputFile = $_
$outputFilePath = Join-Path $DstDir "$($inputFile.BaseName).wav"
$panFilter = if ($MonoChannel -eq 'left') {
'pan=mono|FC=FC+FL'
}
elseif ($MonoChannel -eq 'rigth') {
'pan=mono|FC=FC+FR'
}
else {
'pan=mono|FC=FC+FL*0.5+FR*0.5'
}
ffmpeg `
-v $FFmpegLogLevel `
-i $inputFile.FullName `
-af "$($panFilter),highpass=f=$($HPF):n=$([int]$NormalizeDC),anlmdn=s=$($NoiseReductionLevel)" `
-ac 1 -ar $SamplingRate -acodec $TempCodec `
$outputFilePath `
-y 2>&1 | Write-Host
return $_
} | ForEach-Object {
$i++
Write-Progress "Step 1. Mono & HPF & Noise Reduction" "$i / $($inputFiles.Count)" -PercentComplete ($i / $inputFiles.Count * 100)
}
# Step 2. ラウドネス正規化
$i = 0
$inputFiles = Get-ChildItem $DstDir -File
$inputFiles | ForEach-Object -ThrottleLimit $parallelLimit -Parallel {
$DstDir = $using:DstDir
$SamplingRate = $using:SamplingRate
$I = $using:LoudnormI
$TP = $using:LoudnormTP
$LRA = $using:LoudnormLRA
$FFmpegLogLevel = $using:FFmpegLogLevel
$TempCodec = $using:TempCodec
$inputFile = $_
$loudnormFilter = ffmpeg `
-i $inputFile.FullName `
-af "loudnorm=I=$($I):TP=$($TP):LRA=$($LRA):print_format=json" `
-f null - 2>&1 `
| Out-String `
| Select-String "\[Parsed_loudnorm.*?\]\s*(\{[\s\S]*?\})" `
| ForEach-Object { $_.Matches.Groups[1].Value } `
| ConvertFrom-Json `
| ForEach-Object {
"loudnorm=I=$($I):TP=$($TP):LRA=$($LRA)" `
+ ":measured_I=$($_.input_i):measured_TP=$($_.input_tp):measured_LRA=$($_.input_lra)" `
+ ":measured_thresh=$($_.input_thresh):offset=$($_.target_offset)"
}
$outputFilePath = Join-Path $DstDir "$($inputFile.BaseName).temp.wav"
ffmpeg `
-v $FFmpegLogLevel `
-i $inputFile.FullName `
-af $loudnormFilter `
-ac 1 -ar $SamplingRate -acodec $TempCodec `
$outputFilePath `
-y 2>&1 | Write-Host
$inputFile | Remove-Item
Rename-Item $outputFilePath -NewName "$($inputFile.BaseName).wav"
return $_
} | ForEach-Object {
$i++
Write-Progress "Step 2. Loudness Normalization" "$i / $($inputFiles.Count)" -PercentComplete ($i / $inputFiles.Count * 100)
}
# Step 3. 無音で分割
$i = 0
$inputFiles = Get-ChildItem $DstDir -File
$inputFiles | ForEach-Object -ThrottleLimit $parallelLimit -Parallel {
$DstDir = $using:DstDir
$SilenceThreshold = $using:SilenceThreshold
$MinSilenceDuration = $using:MinSilenceDuration
$MinSplitDuration = $using:MinSplitDuration
$FFmpegLogLevel = $using:FFmpegLogLevel
$inputFile = $_
$audioInfo = ffprobe `
-v error `
-i $inputFile.FullName `
-print_format json -show_entries format=duration 2>&1 `
| ConvertFrom-Json
$totalDuration = [double]$audioInfo.format.duration
# 最小時間より短い場合は分割しない。
if ($totalDuration -lt $MinSplitDuration) {
return $_
}
# 無音が検出された時間
[double[]]$silenceTimes = ffmpeg `
-i $inputFile.FullName `
-af "silencedetect=n=$($SilenceThreshold)dB:d=$($MinSilenceDuration)" `
-f null - 2>&1 `
| Select-String "silence_end" `
| ForEach-Object {
$array = $_ -split ' '
$silence_end = [double]$array[4]
$silence_duration = [double]$array[7]
# 無音部分の真ん中の時間を返す。
return $silence_end - ($silence_duration / 2)
}
[double[]]$segmentTimes = @()
# 分割後の長さが最小時間以上であれば、$segmentTimesに追加する。
foreach ($st in $silenceTimes) {
if (($st - [double]$segmentTimes[-1]) -ge $MinSplitDuration `
-and ($totalDuration - $st) -ge $MinSplitDuration) {
$segmentTimes += $st
}
}
if (-Not $segmentTimes.Count) {
return $_
}
$outputFilePath = Join-Path $DstDir "$($inputFile.BaseName)_%04d$($inputFile.Extension)"
ffmpeg `
-v $FFmpegLogLevel `
-i $inputFile.FullName `
-f segment -segment_times ($segmentTimes -join ',') `
-reset_timestamps 1 `
-map 0:a `
-c:a copy $outputFilePath `
-y 2>&1 | Write-Host
Remove-Item $inputFile.FullName
return $_
} | ForEach-Object {
$i++
Write-Progress "Step 3. Split By Silence" "$i / $($inputFiles.Count)" -PercentComplete ($i / $inputFiles.Count * 100)
}
# Step 4. 無音を削除
$i = 0
$inputFiles = Get-ChildItem $DstDir -File
$inputFiles | ForEach-Object -ThrottleLimit $parallelLimit -Parallel {
$DstDir = $using:DstDir
$SamplingRate = $using:SamplingRate
$SilenceThreshold = $using:SilenceThreshold
$MinSilenceDuration = $using:MinSilenceDuration
$FFmpegLogLevel = $using:FFmpegLogLevel
$OutputCodec = $using:OutputCodec
$inputFile = $_
# 最初の無音を削除
$removeStart = "silenceremove=start_periods=1:start_silence=0.02:start_threshold=$($SilenceThreshold)dB:detection=rms:window=0.01"
$filters = @(
# デジタル無音を全て削除
"silenceremove=window=0:detection=peak:stop_mode=all:start_mode=all:stop_periods=-1:stop_threshold=0",
# 先頭の無音を削除
$removeStart,
# 末尾の無音を削除(逆転 → 無音除去 → 再逆転)
"areverse", $removeStart, "areverse",
# 一定の音量かつ一定の長さの無音を全て削除
"silenceremove=stop_periods=-1:stop_duration=$($MinSilenceDuration):stop_threshold=$($SilenceThreshold)dB:detection=rms:window=0.01"
)
$outputFilePath = Join-Path $DstDir "$($inputFile.BaseName).temp.wav"
ffmpeg `
-v $FFmpegLogLevel `
-i $inputFile.FullName `
-af ($filters -join ',') `
-ac 1 -ar $SamplingRate -acodec $OutputCodec `
$outputFilePath `
-y 2>&1 | Write-Host
$inputFile | Remove-Item
Rename-Item $outputFilePath -NewName "$($inputFile.BaseName).wav"
return $_
} | ForEach-Object {
$i++
Write-Progress "Step 4. Remove Silence" "$i / $($inputFiles.Count)" -PercentComplete ($i / $inputFiles.Count * 100)
}
# Step 5. オーディオの長さを確認
$audioInfoList = [System.Collections.Generic.List[psobject]]::new()
$i = 0
$inputFiles = Get-ChildItem $DstDir -File
$inputFiles | ForEach-Object -ThrottleLimit $parallelLimit -Parallel {
$DstDir = $using:DstDir
$inputFile = $_
$audioInfo = ffprobe `
-v error `
-i $inputFile.FullName `
-print_format json -show_entries format=duration 2>&1 `
| ConvertFrom-Json
return [PSCustomObject]@{
FullName = $_.FullName
BaseName = $_.BaseName
Duration = [double]$audioInfo.format.duration
}
} | ForEach-Object {
$audioInfoList += $_
$i++
Write-Progress "Step 5. Check audio duration" "$i / $($inputFiles.Count)" -PercentComplete ($i / $inputFiles.Count * 100)
}
$audioInfoList = @($audioInfoList | Sort-Object FullName)
$removeTargets = $audioInfoList | Where-Object { $_.Duration -le $SkipTotalDuration }
$concatTargets = $audioInfoList | Where-Object { $_.Duration -gt $SkipTotalDuration -and $_.Duration -le $ConcatDuration }
# Step 6. かなり短いファイルを削除
$i = 0
$removeTargets | ForEach-Object {
$i++
Remove-Item $_.FullName
Write-Progress "Step 6. Remove very short files" "$i / $($removeTargets.Count)" -PercentComplete ($i / $removeTargets.Count * 100)
}
# Step 7. 短いファイルを連結
try {
# 無音ファイルの作成
$silentFilePath = Join-Path $DstDir "silent.temp.wav"
ffmpeg `
-v $FFmpegLogLevel `
-f lavfi `
-i "anullsrc=cl=mono:r=$($SamplingRate)" `
-t 0.02 `
$silentFilePath `
-y 2>&1 | Write-Host
$filePaths = [System.Collections.Generic.List[string]]::new()
$i = 0
while ($true) {
$filePaths.Clear()
$totalDuration = 0.0
for (; $totalDuration -lt $ConcatDuration -and $i -lt $concatTargets.Count; $i++) {
$t = $concatTargets[$i]
$filePaths.Add($t.FullName)
$totalDuration += $t.Duration
}
if (-not $filePaths.Count) {
break
}
$outputFilePath = Join-Path $DstDir "$(Split-Path $filePaths[0] -LeafBase)_$($i).concat.wav"
$fileList = ($filePaths | ForEach-Object { "file '$($_)'" }) -join "`nfile '$($silentFilePath)'`n"
$fileList | ffmpeg `
-v $FFmpegLogLevel `
-protocol_whitelist 'pipe,file' `
-safe 0 `
-f concat `
-i pipe:0 `
-c:a copy $outputFilePath `
-y 2>&1 | Write-Host
$filePaths | Remove-Item
Write-Progress "Step 7. Concat short files" "$i / $($concatTargets.Count)" -PercentComplete ($i / $concatTargets.Count * 100)
}
}
finally {
# 作成した無音ファイルを削除
Remove-Item $silentFilePath -ErrorAction SilentlyContinue
}
# Step 8. リネーム
$i = 0
$dstFiles = Get-ChildItem $DstDir -File
$dstFiles | Sort-Object Name | ForEach-Object {
$i++
$_ | Rename-Item -NewName "$($_.BaseName).temp$($_.Extension)"
Write-Progress "Step 8a. Rename files" "$i / $($dstFiles.Count)" -PercentComplete ($i / $dstFiles.Count * 100)
}
$i = 0
$dstFiles = Get-ChildItem $DstDir -File
$padWidth = $dstFiles.Count.ToString().Length
$dstFiles | Sort-Object Name | ForEach-Object {
$i++
$_ | Rename-Item -NewName "$($i.ToString().PadLeft($padWidth, '0'))$($_.Extension)"
Write-Progress "Step 8b. Rename files" "$i / $($dstFiles.Count)" -PercentComplete ($i / $dstFiles.Count * 100)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment