Created
April 17, 2012 14:54
-
-
Save jpoehls/2406504 to your computer and use it in GitHub Desktop.
Convert-FileEncoding and Get-FileEncoding
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<# | |
.SYNOPSIS | |
Converts files to the given encoding. | |
Matches the include pattern recursively under the given path. | |
.EXAMPLE | |
Convert-FileEncoding -Include *.js -Path scripts -Encoding UTF8 | |
#> | |
function Convert-FileEncoding([string]$Include, [string]$Path, [string]$Encoding='UTF8') { | |
$count = 0 | |
Get-ChildItem -Include $Pattern -Recurse -Path $Path ` | |
| select FullName, @{n='Encoding';e={Get-FileEncoding $_.FullName}} ` | |
| where {$_.Encoding -ne $Encoding} ` | |
| % { (Get-Content $_.FullName) ` | |
| Out-File $_.FullName -Encoding $Encoding; $count++; } | |
Write-Host "$count $Pattern file(s) converted to $Encoding in $Path." | |
} | |
# http://franckrichard.blogspot.com/2010/08/powershell-get-encoding-file-type.html | |
<# | |
.SYNOPSIS | |
Gets file encoding. | |
.DESCRIPTION | |
The Get-FileEncoding function determines encoding by looking at Byte Order Mark (BOM). | |
Based on port of C# code from http://www.west-wind.com/Weblog/posts/197245.aspx | |
.EXAMPLE | |
Get-ChildItem *.ps1 | select FullName, @{n='Encoding';e={Get-FileEncoding $_.FullName}} | where {$_.Encoding -ne 'ASCII'} | |
This command gets ps1 files in current directory where encoding is not ASCII | |
.EXAMPLE | |
Get-ChildItem *.ps1 | select FullName, @{n='Encoding';e={Get-FileEncoding $_.FullName}} | where {$_.Encoding -ne 'ASCII'} | foreach {(get-content $_.FullName) | set-content $_.FullName -Encoding ASCII} | |
Same as previous example but fixes encoding using set-content | |
# Modified by F.RICHARD August 2010 | |
# add comment + more BOM | |
# http://unicode.org/faq/utf_bom.html | |
# http://en.wikipedia.org/wiki/Byte_order_mark | |
# | |
# Do this next line before or add function in Profile.ps1 | |
# Import-Module .\Get-FileEncoding.ps1 | |
#> | |
function Get-FileEncoding | |
{ | |
[CmdletBinding()] | |
Param ( | |
[Parameter(Mandatory = $True, ValueFromPipelineByPropertyName = $True)] | |
[string]$Path | |
) | |
[byte[]]$byte = get-content -Encoding byte -ReadCount 4 -TotalCount 4 -Path $Path | |
#Write-Host Bytes: $byte[0] $byte[1] $byte[2] $byte[3] | |
# EF BB BF (UTF8) | |
if ( $byte[0] -eq 0xef -and $byte[1] -eq 0xbb -and $byte[2] -eq 0xbf ) | |
{ Write-Output 'UTF8' } | |
# FE FF (UTF-16 Big-Endian) | |
elseif ($byte[0] -eq 0xfe -and $byte[1] -eq 0xff) | |
{ Write-Output 'Unicode UTF-16 Big-Endian' } | |
# FF FE (UTF-16 Little-Endian) | |
elseif ($byte[0] -eq 0xff -and $byte[1] -eq 0xfe) | |
{ Write-Output 'Unicode UTF-16 Little-Endian' } | |
# 00 00 FE FF (UTF32 Big-Endian) | |
elseif ($byte[0] -eq 0 -and $byte[1] -eq 0 -and $byte[2] -eq 0xfe -and $byte[3] -eq 0xff) | |
{ Write-Output 'UTF32 Big-Endian' } | |
# FE FF 00 00 (UTF32 Little-Endian) | |
elseif ($byte[0] -eq 0xfe -and $byte[1] -eq 0xff -and $byte[2] -eq 0 -and $byte[3] -eq 0) | |
{ Write-Output 'UTF32 Little-Endian' } | |
# 2B 2F 76 (38 | 38 | 2B | 2F) | |
elseif ($byte[0] -eq 0x2b -and $byte[1] -eq 0x2f -and $byte[2] -eq 0x76 -and ($byte[3] -eq 0x38 -or $byte[3] -eq 0x39 -or $byte[3] -eq 0x2b -or $byte[3] -eq 0x2f) ) | |
{ Write-Output 'UTF7'} | |
# F7 64 4C (UTF-1) | |
elseif ( $byte[0] -eq 0xf7 -and $byte[1] -eq 0x64 -and $byte[2] -eq 0x4c ) | |
{ Write-Output 'UTF-1' } | |
# DD 73 66 73 (UTF-EBCDIC) | |
elseif ($byte[0] -eq 0xdd -and $byte[1] -eq 0x73 -and $byte[2] -eq 0x66 -and $byte[3] -eq 0x73) | |
{ Write-Output 'UTF-EBCDIC' } | |
# 0E FE FF (SCSU) | |
elseif ( $byte[0] -eq 0x0e -and $byte[1] -eq 0xfe -and $byte[2] -eq 0xff ) | |
{ Write-Output 'SCSU' } | |
# FB EE 28 (BOCU-1) | |
elseif ( $byte[0] -eq 0xfb -and $byte[1] -eq 0xee -and $byte[2] -eq 0x28 ) | |
{ Write-Output 'BOCU-1' } | |
# 84 31 95 33 (GB-18030) | |
elseif ($byte[0] -eq 0x84 -and $byte[1] -eq 0x31 -and $byte[2] -eq 0x95 -and $byte[3] -eq 0x33) | |
{ Write-Output 'GB-18030' } | |
else | |
{ Write-Output 'ASCII' } | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
There is one error in the 11th line, the "$pattern" should be "$include". But still, thanks for your help.