Created
January 2, 2022 20:23
-
-
Save rodion-m/59a54bfeac642e59da7f4bbc80b9853c to your computer and use it in GitHub Desktop.
Encoding Detector (for corrupted zip files names for example)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System.Text; | |
namespace ZipFilesEncodingFixer | |
{ | |
public static class EncodingDetector | |
{ | |
static EncodingDetector() | |
{ | |
Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); | |
} | |
/// <example> | |
/// For example: | |
/// DetectEncoding(sourceSample: "êìö", destinationSample: "ИНФ"); | |
/// returns: | |
/// Source Encoding: IBM865 TargetEncoding: cp866 | |
/// Source Encoding: IBM865 TargetEncoding: x-mac-cyrillic | |
/// Source Encoding: IBM865 TargetEncoding: x-mac-ukrainian | |
/// Source Encoding: IBM437 TargetEncoding: cp866 | |
/// Source Encoding: IBM437 TargetEncoding: x-mac-cyrillic | |
/// Source Encoding: IBM437 TargetEncoding: x-mac-ukrainian | |
/// Source Encoding: x-Europa TargetEncoding: cp866 | |
/// Source Encoding: x-Europa TargetEncoding: x-mac-cyrillic | |
/// Source Encoding: x-Europa TargetEncoding: x-mac-ukrainian | |
/// Source Encoding: ibm850 TargetEncoding: cp866 | |
/// Source Encoding: ibm850 TargetEncoding: x-mac-cyrillic | |
/// Source Encoding: ibm850 TargetEncoding: x-mac-ukrainian | |
/// Source Encoding: IBM00858 TargetEncoding: cp866 | |
/// Source Encoding: IBM00858 TargetEncoding: x-mac-cyrillic | |
/// Source Encoding: IBM00858 TargetEncoding: x-mac-ukrainian | |
/// </example> | |
public static IEnumerable<(Encoding sourceEncoding, Encoding targetEncoding)> | |
DetectEncoding(string sourceSample, string destinationSample) | |
{ | |
foreach (var sourceEncoding in Encoding.GetEncodings()) | |
{ | |
var bytes = sourceEncoding.GetEncoding().GetBytes(sourceSample); | |
foreach (var targetEncoding in Encoding.GetEncodings()) | |
{ | |
if (targetEncoding.GetEncoding().GetString(bytes) == destinationSample) | |
{ | |
yield return (sourceEncoding.GetEncoding(), targetEncoding.GetEncoding()); | |
} | |
} | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment