Created
June 2, 2020 11:36
-
-
Save rohansen/791b124b0b53f86b4958c4a21fed5333 to your computer and use it in GitHub Desktop.
Converting characters and accents to "regular characters"
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
void Main() | |
{ | |
Console.WriteLine(RemoveDiacritics2("PIŁKI")); | |
} | |
public static string RemoveDiacritics2(string text) | |
{ | |
if (!string.IsNullOrEmpty(text)) | |
{ | |
string str = Encoding.UTF8.GetString(Encoding.GetEncoding("ISO-8859-8").GetBytes(text)); | |
if (str.Contains("?")) | |
str = NormalizeStr(text); | |
text = str; | |
} | |
return text; | |
} | |
public static string NormalizeStr(string text){ | |
StringBuilder stringBuilder = new StringBuilder(); | |
string str = text.Normalize(NormalizationForm.FormD); | |
int num = str.Length - 1; | |
int index = 0; | |
while (index <= num) | |
{ | |
char ch = str[index]; | |
if (System.Globalization.CharUnicodeInfo.GetUnicodeCategory(ch) != System.Globalization.UnicodeCategory.NonSpacingMark) | |
stringBuilder.Append(ch); | |
checked { ++index; } | |
} | |
return stringBuilder.ToString(); | |
} | |
public static string RemoveDiacritics(string text) | |
{ | |
if (!string.IsNullOrEmpty(text)) | |
{ | |
string str = Encoding.ASCII.GetString(Encoding.GetEncoding("ISO-8859-8").GetBytes(text)); | |
if (str.Contains("?")) | |
str = NormalizeStr(text); | |
text = str; | |
} | |
return text; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment