Last active
March 5, 2025 00:33
-
-
Save ninmonkey/4cebda2f4e5e68b2086d8e03ec9c006c to your computer and use it in GitHub Desktop.
Assumes pwsh7. Decode a few strings with mismatched encodings
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Assumes pwsh7 | |
# tip: pwsh 7 added overloads | |
$StrIn = 'Г¶' | |
$bytes = [Text.Encoding]::GetEncoding(1252).GetBytes( $StrIn ) | |
$Bytes = [Text.Encoding]::GetEncoding('utf-8').GetString( $bytes ) | |
# and also | |
$StrIn.EnumerateRunes | Ft -auto | |
function Enc { | |
# Get encoding by name or id | |
param( $NameOrId ) | |
[Text.Encoding]::GetEncoding( $NameOrId ) | |
} | |
function CompareEnc { | |
# quick sugar to decode with a few encodings | |
param( | |
# String | |
[string] $Text, | |
# What encoding to start with | |
[Parameter(Mandatory)] | |
[ArgumentCompletions('utf-8', 'utf-16le', '1252')] | |
[object] $FromEncoding | |
) | |
$bytes = (Enc $FromEncoding).GetBytes( $Text ) | |
[pscustomobject]@{ | |
Original = $Text | |
InitialEncoding = $FromEncoding | |
FromU8 = (Enc 'utf-8').GetString( $bytes ) | |
FromCyrillic = (Enc 1252).GetString( $bytes ) | |
FromAscii = (Enc 'Ascii').GetString( $bytes ) | |
} | |
} | |
$u8 = Enc 'utf-8' | |
$Win = Enc 1252 | |
$bytes = $Win.GetBytes( $StrIn ) | |
# ... | |
function TryMany { | |
param( [string] $Text ) | |
@( | |
CompareEnc $Text utf-8 | |
CompareEnc $Text 1252 | |
CompareEnc $Text utf-16le | |
) | |
} | |
TryMany $StrIn | ft -auto | |
# hr | |
TryMany 'Hi 🐒 world' | ft -auto | |
# hr |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment