Skip to content

Instantly share code, notes, and snippets.

@anotherlab
Created April 5, 2025 15:12
Show Gist options
  • Save anotherlab/3067de2646f2278930c35fddfdfa52d3 to your computer and use it in GitHub Desktop.
Save anotherlab/3067de2646f2278930c35fddfdfa52d3 to your computer and use it in GitHub Desktop.
PowerShell script to convert VTT files to SRT
# Reads VTT and writes SRT
param (
[Parameter(Mandatory = $true)][string]$localPath
)
# Function to convert VTT to SRT
function Convert-VttToSrt {
param (
[string]$filePath
)
# Read the file into an array of lines
$file_data = Get-Content $filePath
# Verify that we are reading a WebVTT file
$IsVTT = $file_data[0] -like 'WEBVTT*'
if ($IsVTT -eq $True)
{
# Generate the destination filename
$srtFilename = [io.path]::ChangeExtension($filePath, 'srt')
# Create a new list to hold the converted data
$new_data = New-Object System.Collections.Generic.List[System.Object]
# Loop through the lines of the file and process them
$fdl = $file_data.Length
$CurentLine = 2
$Counter = 1
while ($CurentLine -lt $fdl)
{
# Read the current line and split it into parts
$ThisLine = $file_data[$CurentLine]
$ThisTime = $ThisLine.Split(" ")
# Check if the line contains a timestamp
if ($ThisTime.Length -eq 3)
{
if ($ThisTime[1] -eq "-->")
{
# Change the time format from VTT to SRT
$ThisLine = $ThisLine.Replace('.', ',')
$new_data.Add($Counter++)
$new_data.Add($ThisLine)
$CurentLine++
# Read the next set of lines for the caption
# Treat as caption until the next blank line
$caption = Update-NameFormat -inputString $file_data[$CurentLine]
while ($caption -ne '' -and $CurentLine -lt $fdl)
{
$new_data.Add($caption)
$caption = Update-NameFormat -inputString $file_data[++$CurentLine]
}
$new_data.Add($caption)
}
}
$CurentLine++
}
# Write out the updated data to the SRT file
Out-File -FilePath $srtFilename -InputObject $new_data
}
}
function Convert-HtmlEntities {
param (
[string]$inputString
)
$decodedString = $inputString
# Define common HTML escape sequences and their replacements
$htmlEntities = @{
"&" = "&"
"&lt;" = "<"
"&gt;" = ">"
"&quot;" = '"'
"&apos;" = "'"
"&nbsp;" = " "
}
# Replace each entity with its corresponding character
foreach ($entity in $htmlEntities.Keys) {
$decodedString = $decodedString -replace [regex]::Escape($entity), $htmlEntities[$entity]
}
return $decodedString
}
function Update-NameFormat{
param (
[string]$inputString
)
$vpattern = "^<v "
$trailingPattern = "</v>$"
# Does the string start with <v ?
if ($inputString -match $vpattern) {
# Remove the <v> tag
$outputString = $inputString -replace $vpattern, ''
# look for the closing part of <v> tag
$index = $outputString.IndexOf('>')
if ($index -ge 0) {
$outputString = $outputString.Substring(0, $index) + ": " + $outputString.Substring($index + 1)
}
} else {
$outputString = $inputString
}
# Remove any trailing </v>
$outputString = $outputString -replace $trailingPattern, ''
# Decode HTML entities
$outputString = Convert-HtmlEntities -inputString $outputString
return $outputString.Trim()
}
# Get all matching .vtt files
$vttFiles = Get-ChildItem -Path $localPath -Filter "*.vtt" -File
# Get the count of files to process sowe can display a progress bar
$FileCount = $vttFiles.Count
$CurrentFile = 0
# walk through the list of files
try
{
foreach ($file in $vttFiles) {
$percent = [int]($CurrentFile++ * 100 / $FileCount)
Write-Progress -Activity "VTT to SRT" -PercentComplete $percent -Status "$($file.Name)"
Convert-VttToSrt -filePath $file.FullName
}
} catch {
Write-Host "Error: $_"
exit 1
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment