Created
April 26, 2012 00:50
-
-
Save rcoup/2494882 to your computer and use it in GitHub Desktop.
WScript to strip UTF-8 BOM from a text/CSV file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
' Usage: strip-utf8-bom.vbs file.csv | |
' Notes: | |
' this isn't suitable for large files unless you have a lot of memory - ADODB.Stream reads the entire file into | |
' memory, then builds the output buffer in memory as well. #stupid | |
If WScript.Arguments.Count <> 1 Then | |
WScript.Echo "Usage: strip-utf8-bom.vbs file.csv" | |
WScript.Quit | |
End If | |
Dim fIn, fOut, sFilename, sBOM | |
sFilename = WScript.Arguments(0) | |
Set fIn = CreateObject("adodb.stream") | |
fIn.Type = 1 'adTypeBinary | |
fIn.Mode = adModeRead | |
fIn.Open | |
fIn.LoadFromFile sFilename | |
sBOM = fIn.Read(5) | |
' UTF8 BOM is 0xEF,0xBB,0xBF (decimal 239, 187, 191) | |
If AscB(MidB(sBOM, 1, 1)) = 239 _ | |
And AscB(MidB(sBOM, 2, 1)) = 187 _ | |
And AscB(MidB(sBOM, 3, 1)) = 191 Then | |
WScript.Echo(sFilename & ": UTF-8 BOM found... removing") | |
fIn.Position = 3 ' Skip BOM | |
Set fOut = CreateObject("adodb.stream") | |
fOut.Type = 1 'adTypeBinary | |
fOut.Mode = adModeReadWrite | |
fOut.Open | |
fIn.CopyTo fOut | |
fOut.SaveToFile sFilename, 2 'adSaveCreateOverwrite | |
fOut.Flush | |
fOut.Close | |
Else | |
WScript.Echo(sFilename & ": No UTF-8 BOM found") | |
End If |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment