Created
May 7, 2019 15:13
-
-
Save LarryWeiss/296061f477615dc1160e4e45f339fd8a to your computer and use it in GitHub Desktop.
Show-FileEncoding function written in PowerShell
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
######################################################################################################## | |
# | |
# .SYNOPSIS | |
# | |
# PowerShell function that returns the file encoding name as a string. | |
# | |
# _____ _ ______ _ _ ______ _ _ | |
# / ____| | | ____(_) | | ____| | (_) | |
# | (___ | |__ _____ ______| |__ _| | ___| |__ _ __ ___ ___ __| |_ _ __ __ _ | |
# \___ \| '_ \ / _ \ \ /\ / /____| __| | | |/ _ \ __| | '_ \ / __/ _ \ / _` | | '_ \ / _` | | |
# ____) | | | | (_) \ V V / | | | | | __/ |____| | | | (_| (_) | (_| | | | | | (_| | | |
# |_____/|_| |_|\___/ \_/\_/ |_| |_|_|\___|______|_| |_|\___\___/ \__,_|_|_| |_|\__, | | |
# __/ | | |
# |___/ | |
# .DESCRIPTION | |
# | |
# The Show-FileEncoding function determines encoding by looking at Byte Order Mark (BOM), or | |
# if no BOM then by reading the file to determine if it is ASCII or UTF-8 | |
# | |
# .PARAMETER Path | |
# | |
# Path to the file to be examined | |
# | |
# .EXAMPLE | |
# | |
# Show-ChildItem *.ps1 | select FullName, @{n='Encoding';e={Show-FileEncoding $_.FullName}} | where {$_.Encoding -ne 'ASCII'} | |
# This command gets ps1 files in current directory where encoding is not ASCII | |
# | |
# .EXAMPLE | |
# | |
# Show-ChildItem *.ps1 | select FullName, @{n='Encoding';e={Show-FileEncoding $_.FullName}} | where {$_.Encoding -ne 'ASCII'} | foreach {(get-content $_.FullName) | set-content $_.FullName -Encoding ASCII} | |
# Same as previous example but fixes encoding using set-content | |
# | |
# .INPUTS | |
# | |
# None. This command does not accept pipeline input. | |
# | |
# .OUTPUTS | |
# | |
# String | |
# | |
# .NOTES | |
# | |
# Derived by Larry Weiss ([email protected]) from | |
# a version by Franck Richard at | |
# http://franckrichard.blogspot.de/2010/08/powershell-get-encoding-file-type.html | |
# that was based on a port of C# code by Rick Strahl from | |
# http://www.west-wind.com/Weblog/posts/197245.aspx | |
# | |
# .LINK | |
# | |
# References: | |
# http://unicode.org/faq/utf_bom.html | |
# http://en.wikipedia.org/wiki/Byte_order_mark | |
# | |
######################################################################################################## | |
function Show-FileEncoding { | |
[CmdletBinding()] | |
param ( | |
[string] | |
[Parameter(Mandatory = $True, ValueFromPipelineByPropertyName = $True)] | |
$Path | |
) | |
[byte[]] $bytes = $null | |
[byte[]] $b = get-content -Encoding byte -ReadCount 4 -TotalCount 4 -Path $Path | |
if ( $b.count -eq 0 ) { | |
'ASCII' | |
return | |
} | |
if ( $b[0] -eq 0xef -and $b[1] -eq 0xbb -and $b[2] -eq 0xbf ) { | |
'UTF8' | |
} | |
elseif ( $b[0] -eq 0xfe -and $b[1] -eq 0xff ) { | |
'Unicode UTF-16 Big-Endian' | |
} | |
elseif ( $b[0] -eq 0xff -and $b[1] -eq 0xfe ) { | |
'Unicode UTF-16 Little-Endian' | |
} | |
elseif ( $b[0] -eq 0 -and $b[1] -eq 0 -and $b[2] -eq 0xfe -and $b[3] -eq 0xff ) { | |
'UTF32 Big-Endian' | |
} | |
elseif ( $b[0] -eq 0xfe -and $b[1] -eq 0xff -and $b[2] -eq 0 -and $b[3] -eq 0 ) { | |
'UTF32 Little-Endian' | |
} | |
elseif ( $b[0] -eq 0x2b -and $b[1] -eq 0x2f -and $b[2] -eq 0x76 -and ($b[3] -eq 0x38 -or $b[3] -eq 0x39 -or $b[3] -eq 0x2b -or $b[3] -eq 0x2f) ) { | |
'UTF7' | |
} | |
elseif ( $b[0] -eq 0xf7 -and $b[1] -eq 0x64 -and $b[2] -eq 0x4c ) { | |
'UTF-1' | |
} | |
elseif ( $b[0] -eq 0xdd -and $b[1] -eq 0x73 -and $b[2] -eq 0x66 -and $b[3] -eq 0x73 ) { | |
'UTF-EBCDIC' | |
} | |
elseif ( $b[0] -eq 0x0e -and $b[1] -eq 0xfe -and $b[2] -eq 0xff ) { | |
'SCSU' | |
} | |
elseif ( $b[0] -eq 0xfb -and $b[1] -eq 0xee -and $b[2] -eq 0x28 ) { | |
'BOCU-1' | |
} | |
elseif ( $b[0] -eq 0x84 -and $b[1] -eq 0x31 -and $b[2] -eq 0x95 -and $b[3] -eq 0x33 ) { | |
'GB-18030' | |
} | |
else { | |
$bytes = [IO.File]::ReadAllBytes((Resolve-Path $Path)) | |
foreach ( $byte in $bytes ) { | |
if ( $byte -gt 0x7f) { | |
"UTF8" + "!" + $byte | |
return | |
} | |
} | |
'ASCII' | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment