Created
April 5, 2025 15:12
-
-
Save anotherlab/3067de2646f2278930c35fddfdfa52d3 to your computer and use it in GitHub Desktop.
PowerShell script to convert VTT files to SRT
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Reads VTT and writes SRT | |
param ( | |
[Parameter(Mandatory = $true)][string]$localPath | |
) | |
# Function to convert VTT to SRT | |
function Convert-VttToSrt { | |
param ( | |
[string]$filePath | |
) | |
# Read the file into an array of lines | |
$file_data = Get-Content $filePath | |
# Verify that we are reading a WebVTT file | |
$IsVTT = $file_data[0] -like 'WEBVTT*' | |
if ($IsVTT -eq $True) | |
{ | |
# Generate the destination filename | |
$srtFilename = [io.path]::ChangeExtension($filePath, 'srt') | |
# Create a new list to hold the converted data | |
$new_data = New-Object System.Collections.Generic.List[System.Object] | |
# Loop through the lines of the file and process them | |
$fdl = $file_data.Length | |
$CurentLine = 2 | |
$Counter = 1 | |
while ($CurentLine -lt $fdl) | |
{ | |
# Read the current line and split it into parts | |
$ThisLine = $file_data[$CurentLine] | |
$ThisTime = $ThisLine.Split(" ") | |
# Check if the line contains a timestamp | |
if ($ThisTime.Length -eq 3) | |
{ | |
if ($ThisTime[1] -eq "-->") | |
{ | |
# Change the time format from VTT to SRT | |
$ThisLine = $ThisLine.Replace('.', ',') | |
$new_data.Add($Counter++) | |
$new_data.Add($ThisLine) | |
$CurentLine++ | |
# Read the next set of lines for the caption | |
# Treat as caption until the next blank line | |
$caption = Update-NameFormat -inputString $file_data[$CurentLine] | |
while ($caption -ne '' -and $CurentLine -lt $fdl) | |
{ | |
$new_data.Add($caption) | |
$caption = Update-NameFormat -inputString $file_data[++$CurentLine] | |
} | |
$new_data.Add($caption) | |
} | |
} | |
$CurentLine++ | |
} | |
# Write out the updated data to the SRT file | |
Out-File -FilePath $srtFilename -InputObject $new_data | |
} | |
} | |
function Convert-HtmlEntities { | |
param ( | |
[string]$inputString | |
) | |
$decodedString = $inputString | |
# Define common HTML escape sequences and their replacements | |
$htmlEntities = @{ | |
"&" = "&" | |
"<" = "<" | |
">" = ">" | |
""" = '"' | |
"'" = "'" | |
" " = " " | |
} | |
# Replace each entity with its corresponding character | |
foreach ($entity in $htmlEntities.Keys) { | |
$decodedString = $decodedString -replace [regex]::Escape($entity), $htmlEntities[$entity] | |
} | |
return $decodedString | |
} | |
function Update-NameFormat{ | |
param ( | |
[string]$inputString | |
) | |
$vpattern = "^<v " | |
$trailingPattern = "</v>$" | |
# Does the string start with <v ? | |
if ($inputString -match $vpattern) { | |
# Remove the <v> tag | |
$outputString = $inputString -replace $vpattern, '' | |
# look for the closing part of <v> tag | |
$index = $outputString.IndexOf('>') | |
if ($index -ge 0) { | |
$outputString = $outputString.Substring(0, $index) + ": " + $outputString.Substring($index + 1) | |
} | |
} else { | |
$outputString = $inputString | |
} | |
# Remove any trailing </v> | |
$outputString = $outputString -replace $trailingPattern, '' | |
# Decode HTML entities | |
$outputString = Convert-HtmlEntities -inputString $outputString | |
return $outputString.Trim() | |
} | |
# Get all matching .vtt files | |
$vttFiles = Get-ChildItem -Path $localPath -Filter "*.vtt" -File | |
# Get the count of files to process sowe can display a progress bar | |
$FileCount = $vttFiles.Count | |
$CurrentFile = 0 | |
# walk through the list of files | |
try | |
{ | |
foreach ($file in $vttFiles) { | |
$percent = [int]($CurrentFile++ * 100 / $FileCount) | |
Write-Progress -Activity "VTT to SRT" -PercentComplete $percent -Status "$($file.Name)" | |
Convert-VttToSrt -filePath $file.FullName | |
} | |
} catch { | |
Write-Host "Error: $_" | |
exit 1 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment