Created
February 25, 2026 15:03
-
-
Save elico/e5b0d018f8ba16636bbefc09390a2123 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| $inputFile = '1-he.srt' | |
| $outputFile = '1-he.txt' | |
| # Create a 'memory' of lines we've already processed | |
| $seenLines = New-Object System.Collections.Generic.HashSet[string] | |
| Get-Content $inputFile | Where-Object { | |
| # 1. Filter out numbers, timestamps, and empty lines | |
| $_ -notmatch '^\d+$' -and $_ -notmatch '-->' -and $_ -ne '' | |
| } | ForEach-Object { | |
| $trimmedLine = $_.Trim() | |
| # 2. Only pass the line forward if we haven't seen it yet | |
| if ($seenLines.Add($trimmedLine)) { | |
| $trimmedLine | |
| } | |
| } | Out-File -FilePath $outputFile -Encoding UTF8 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment