Created
September 3, 2019 18:22
-
-
Save MihaZupan/2240e24ff571ec7bdbed1cd9e9ab2bcc to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public static class CharHelper | |
{ | |
[MethodImpl(MethodImplOptionPortable.AggressiveInlining)] | |
public static bool IsRomanLetterPartial(char c) | |
{ | |
// We don't support LCDM | |
/* return IsRomanLetterLowerPartial(c) || IsRomanLetterUpperPartial(c); */ | |
int testValue = c - 73; | |
if ((uint)testValue > 47) | |
return false; | |
return ((175926155452417L >> testValue) & 1) != 0; | |
} | |
[MethodImpl(MethodImplOptionPortable.AggressiveInlining)] | |
public static bool IsRomanLetterLowerPartial(char c) | |
{ | |
/* // We don't support LCDM | |
return c == 'i' || c == 'v' || c == 'x'; */ | |
int testValue = c - 105; | |
if ((uint)testValue > 15) | |
return false; | |
return ((40961 >> testValue) & 1) != 0; | |
} | |
[MethodImpl(MethodImplOptionPortable.AggressiveInlining)] | |
public static bool IsRomanLetterUpperPartial(char c) | |
{ | |
/* // We don't support LCDM | |
return c == 'I' || c == 'V' || c == 'X'; */ | |
int testValue = c - 73; | |
if ((uint)testValue > 15) | |
return false; | |
return ((40961 >> testValue) & 1) != 0; | |
} | |
[MethodImpl(MethodImplOptionPortable.AggressiveInlining)] | |
public static bool IsWhitespace(this char c) | |
{ | |
// 2.1 Characters and lines | |
// A whitespace character is a space(U + 0020), tab(U + 0009), newline(U + 000A), line tabulation (U + 000B), form feed (U + 000C), or carriage return (U + 000D). | |
/* return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r'; */ | |
if (c > 32) | |
return false; | |
return ((4294983168L >> c) & 1) != 0; | |
} | |
[MethodImpl(MethodImplOptionPortable.AggressiveInlining)] | |
public static bool IsControl(this char c) | |
{ | |
return c < ' ' || char.IsControl(c); | |
} | |
[MethodImpl(MethodImplOptionPortable.AggressiveInlining)] | |
public static bool IsEscapableSymbol(this char c) | |
{ | |
/* // char.IsSymbol also works with Unicode symbols that cannot be escaped based on the specification. | |
return (c > ' ' && c < '0') || (c > '9' && c < 'A') || (c > 'Z' && c < 'a') || (c > 'z' && c < 127) || c == '•'; */ | |
int testValue = c; | |
if (testValue > 126) | |
return c == '•'; | |
return testValue < 64 ? ((-287948909764935680L >> testValue) & 1) != 0 : ((8646911293007069185L >> (testValue - 64)) & 1) != 0; | |
} | |
[MethodImpl(MethodImplOptionPortable.AggressiveInlining)] | |
public static bool IsWhiteSpaceOrZero(this char c) | |
{ | |
/* return IsWhitespace(c) || IsZero(c); */ | |
if (c > 32) | |
return false; | |
return ((4294983169L >> c) & 1) != 0; | |
} | |
// Note that we are not considering the character & as a punctuation in HTML | |
// as it is used for HTML entities, print unicode, so we assume that when we have a `&` | |
// it is more likely followed by a valid HTML Entity that represents a non punctuation | |
public static void CheckUnicodeCategory(this char c, out bool space, out bool punctuation) | |
{ | |
// Credits: code from CommonMark.NET | |
// Copyright (c) 2014, Kārlis Gaņģis All rights reserved. | |
// See license for details: https://github.com/Knagis/CommonMark.NET/blob/master/LICENSE.md | |
if (c <= 'ÿ') | |
{ | |
// space = c == '\0' || c == ' ' || (c >= '\t' && c <= '\r') || c == '\u00a0' || c == '\u0085'; | |
// punctuation = c == '\0' || (c >= 33 && c <= 47 && c != 38) || (c >= 58 && c <= 64) || (c >= 91 && c <= 96) || (c >= 123 && c <= 126); | |
int testValue = c; | |
if (testValue > 126) | |
{ | |
space = c == '\u00a0' || c == '\u0085'; | |
punctuation = false; | |
} | |
else | |
{ | |
space = testValue < 33 && ((4294983169L >> testValue) & 1) != 0; | |
punctuation = testValue < 64 ? ((-287949184642842623L >> testValue) & 1) != 0 : ((8646911293007069185L >> (testValue - 64)) & 1) != 0; | |
} | |
} | |
else | |
{ | |
var category = CharUnicodeInfo.GetUnicodeCategory(c); | |
space = category == UnicodeCategory.SpaceSeparator | |
|| category == UnicodeCategory.LineSeparator | |
|| category == UnicodeCategory.ParagraphSeparator; | |
punctuation = !space && | |
(category == UnicodeCategory.ConnectorPunctuation | |
|| category == UnicodeCategory.DashPunctuation | |
|| category == UnicodeCategory.OpenPunctuation | |
|| category == UnicodeCategory.ClosePunctuation | |
|| category == UnicodeCategory.InitialQuotePunctuation | |
|| category == UnicodeCategory.FinalQuotePunctuation | |
|| category == UnicodeCategory.OtherPunctuation); | |
} | |
} | |
// Same as CheckUnicodeCategory | |
internal static bool IsSpaceOrPunctuation(this char c) | |
{ | |
if (c <= 'ÿ') | |
{ | |
/* return c == '\0' || c == ' ' || (c >= '\t' && c <= '\r') || c == '\u00a0' || c == '\u0085' || | |
(c >= 33 && c <= 47 && c != 38) || (c >= 58 && c <= 64) || (c >= 91 && c <= 96) || (c >= 123 && c <= 126); */ | |
int testValue = c; | |
if (testValue > 126) | |
return c == '\u00a0' || c == '\u0085'; | |
return testValue < 64 ? ((-287949180347859455L >> testValue) & 1) != 0 : ((8646911293007069185L >> (testValue - 64)) & 1) != 0; | |
} | |
else | |
{ | |
var category = CharUnicodeInfo.GetUnicodeCategory(c); | |
return category == UnicodeCategory.SpaceSeparator | |
|| category == UnicodeCategory.LineSeparator | |
|| category == UnicodeCategory.ParagraphSeparator | |
|| category == UnicodeCategory.ConnectorPunctuation | |
|| category == UnicodeCategory.DashPunctuation | |
|| category == UnicodeCategory.OpenPunctuation | |
|| category == UnicodeCategory.ClosePunctuation | |
|| category == UnicodeCategory.InitialQuotePunctuation | |
|| category == UnicodeCategory.FinalQuotePunctuation | |
|| category == UnicodeCategory.OtherPunctuation; | |
} | |
} | |
[MethodImpl(MethodImplOptionPortable.AggressiveInlining)] | |
public static bool IsAlpha(this char c) | |
{ | |
/* return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); */ | |
int testValue = c - 65; | |
if ((uint)testValue > 57) | |
return false; | |
return ((288230371923853311L >> testValue) & 1) != 0; | |
} | |
[MethodImpl(MethodImplOptionPortable.AggressiveInlining)] | |
public static bool IsAlphaNumeric(this char c) | |
{ | |
/* return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9'); */ | |
int testValue = c; | |
if (testValue > 122) | |
return false; | |
return testValue < 64 ? ((287948901175001088L >> testValue) & 1) != 0 : ((576460743847706622L >> (testValue - 64)) & 1) != 0; | |
} | |
[MethodImpl(MethodImplOptionPortable.AggressiveInlining)] | |
public static bool IsAsciiPunctuation(this char c) | |
{ | |
/* // 2.1 Characters and lines | |
// An ASCII punctuation character is !, ", #, $, %, &, ', (, ), *, +, ,, -, ., /, :, ;, <, =, >, ?, @, [, \, ], ^, _, `, {, |, }, or ~. | |
switch (c) | |
{ | |
case '!': | |
case '"': | |
case '#': | |
case '$': | |
case '%': | |
case '&': | |
case '\'': | |
case '(': | |
case ')': | |
case '*': | |
case '+': | |
case ',': | |
case '-': | |
case '.': | |
case '/': | |
case ':': | |
case ';': | |
case '<': | |
case '=': | |
case '>': | |
case '?': | |
case '@': | |
case '[': | |
case '\\': | |
case ']': | |
case '^': | |
case '_': | |
case '`': | |
case '{': | |
case '|': | |
case '}': | |
case '~': | |
return true; | |
} | |
return false; */ | |
int testValue = c; | |
if (testValue > 126) | |
return false; | |
return testValue < 64 ? ((-287948909764935680L >> testValue) & 1) != 0 : ((8646911293007069185L >> (testValue - 64)) & 1) != 0; | |
} | |
[MethodImpl(MethodImplOptionPortable.AggressiveInlining)] | |
public static bool IsEmailUsernameSpecialChar(char c) | |
{ | |
/* return ".!#$%&'*+/=?^_`{|}~-+.~".IndexOf(c) >= 0; */ | |
int testValue = c; | |
if (testValue > 126) | |
return false; | |
return testValue < 64 ? ((-6917268469155102720L >> testValue) & 1) != 0 : ((8646911292067545088L >> (testValue - 64)) & 1) != 0; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment