Created
February 8, 2015 22:38
-
-
Save RyuaNerin/a510cca0b3901903b7d9 to your computer and use it in GitHub Desktop.
트위터 공홈에 있는 방식 그대로 썼다가 망한거
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // Same with Twitter | |
| class TwitterStr | |
| { | |
| public TwitterStr(string url, int i1, int i2) | |
| { | |
| this.url = url; | |
| this.indices = new int[] { i1, i2 }; | |
| } | |
| public string url; | |
| public int[] indices; | |
| } | |
| private int GetLength(string a) | |
| { | |
| int d = getUnicodeTextLength(a); | |
| List<TwitterStr> e = extractUrlsWithIndices(a); | |
| modifyIndicesFromUTF16ToUnicode(a, e); | |
| for (var f = 0; f < e.Count; f++) | |
| { | |
| d += e[f].indices[0] - e[f].indices[1]; | |
| if (e[f].url.ToLower().StartsWith("https://")) | |
| d += 23; | |
| else | |
| d += 22; | |
| } | |
| return d; | |
| } | |
| private Regex regUnicodeText = new Regex(@"[\uD800-\uDBFF][\uDC00-\uDFFF]", RegexOptions.Compiled | RegexOptions.Multiline); | |
| private int getUnicodeTextLength(string text) | |
| { | |
| return regUnicodeText.Replace(text, "").Length; | |
| } | |
| private Regex regExtractUrl = new Regex(@"(((?:[^A-Za-z0-9@@$##]|^))((https?:\/\/)?((?:(?:(?:[^](?:[_-]|[^])*)?[^]\.)*(?:(?:[^](?:-|[^])*)?[^]\.)(?:(?:(?:abogado|academy|accountants|active|actor|aero|agency|airforce|allfinanz|alsace|archi|army|arpa|asia|associates|attorney|auction|audio|autos|axa|band|bar|bargains|bayern|beer|berlin|best|bid|bike|bio|biz|black|blackfriday|blue|bmw|bnpparibas|boo|boutique|brussels|budapest|build|builders|business|buzz|bzh|cab|cal|camera|camp|cancerresearch|capetown|capital|caravan|cards|care|career|careers|casa|cash|cat|catering|center|ceo|cern|channel|cheap|christmas|chrome|church|citic|city|claims|cleaning|click|clinic|clothing|club|codes|coffee|college|cologne|com|community|company|computer|condos|construction|consulting|contractors|cooking|cool|coop|country|credit|creditcard|crs|cruises|cuisinella|cymru|dad|dance|dating|day|deals|degree|democrat|dental|dentist|desi|diamonds|diet|digital|direct|directory|discount|dnp|domains|durban|dvag|eat|edu|education|email|emerck|engineer|engineering|enterprises|equipment|esq|estate|eus|events|exchange|expert|exposed|fail|farm|feedback|finance|financial|fish|fishing|fitness|flights|florist|flsmidth|fly|foo|forsale|foundation|frl|frogans|fund|furniture|futbol|gal|gallery|gbiz|gent|gift|gifts|gives|glass|gle|global|globo|gmail|gmo|gmx|google|gop|gov|graphics|gratis|green|gripe|guide|guitars|guru|hamburg|haus|healthcare|help|here|hiphop|hiv|holdings|holiday|homes|horse|host|hosting|house|how|ibm|immo|immobilien|industries|info|ing|ink|institute|insure|int|international|investments|jetzt|jobs|joburg|juegos|kaufen|kim|kitchen|kiwi|koeln|krd|kred|lacaixa|land|lawyer|lease|lgbt|life|lighting|limited|limo|link|loans|london|lotto|ltda|luxe|luxury|maison|management|mango|market|marketing|media|meet|melbourne|meme|menu|miami|mil|mini|mobi|moda|moe|monash|mortgage|moscow|motorcycles|mov|museum|nagoya|name|navy|net|network|neustar|new|nexus|ngo|nhk|ninja|nra|nrw|nyc|okinawa|ong|onl|ooo|org|organic|otsuka|ovh|paris|partners|parts|pharmacy|photo|photography|photos|physio|pics|pictures|pink|pizza|place|plumbing|pohl|poker|post|praxi|press|pro|prod|productions|prof|properties|property|pub|qpon|quebec|realtor|recipes|red|rehab|reise|reisen|ren|rentals|repair|report|republican|rest|restaurant|reviews|rich|rio|rip|rocks|rodeo|rsvp|ruhr|ryukyu|saarland|sarl|sca|scb|schmidt|schule|scot|services|sexy|shiksha|shoes|singles|social|software|sohu|solar|solutions|soy|space|spiegel|supplies|supply|support|surf|surgery|suzuki|systems|taipei|tatar|tattoo|tax|technology|tel|tienda|tips|tirol|today|tokyo|tools|top|town|toys|trade|training|travel|tui|university|uno|uol|vacations|vegas|ventures|vermögensberater|vermögensberatung|versicherung|vet|viajes|villas|vision|vlaanderen|vodka|vote|voting|voto|voyage|wales|wang|watch|webcam|website|wed|wedding|whoswho|wien|wiki|williamhill|wme|work|works|world|wtc|wtf|xxx|xyz|yachts|yandex|yoga|yokohama|youtube|zip|zone|дети|москва|онлайн|орг|рус|сайт|بازار|شبكة|موقع|संगठन|みんな|世界|中信|中文网|企业|佛山|公司|公益|商城|商标|在线|广东|我爱你|手机|政务|机构|游戏|移动|组织机构|网址|网络|集团|삼성)(?=[^0-9a-zA-Z@]|$))|(?:(?:ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bl|bm|bn|bo|bq|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cu|cv|cw|cx|cy|cz|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mf|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|um|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|za|zm|zw|бел|мкд|мон|рф|срб|укр|қаз|الاردن|الجزائر|السعودية|المغرب|امارات|ایران|بھارت|تونس|سودان|سورية|عراق|عمان|فلسطين|قطر|مصر|مليسيا|پاکستان|भारत|বাংলা|ভারত|ਭਾਰਤ|ભારત|இந்தியா|இலங்கை|சிங்கப்பூர்|భారత్|ලංකා|ไทย|გე|中国|中國|台湾|台灣|新加坡|香港|한국)(?=[^0-9a-zA-Z@]|$))|(?:xn--[0-9a-z]+))))(?::([0-9]+))?(\/(?:(?:[a-z0-9!\*';:=\+,\.\$\/%#\[\]\-_~@|&À-ÖØ-öø-ÿĀ-ɏɓ-ɔɖ-ɗəɛɣɨɯɲʉʋʻ̀-ͯḀ-ỿ]*(?:\((?:[a-z0-9!\*';:=\+,\.\$\/%#\[\]\-_~@|&À-ÖØ-öø-ÿĀ-ɏɓ-ɔɖ-ɗəɛɣɨɯɲʉʋʻ̀-ͯḀ-ỿ]+|(?:[a-z0-9!\*';:=\+,\.\$\/%#\[\]\-_~@|&À-ÖØ-öø-ÿĀ-ɏɓ-ɔɖ-ɗəɛɣɨɯɲʉʋʻ̀-ͯḀ-ỿ]*\([a-z0-9!\*';:=\+,\.\$\/%#\[\]\-_~@|&À-ÖØ-öø-ÿĀ-ɏɓ-ɔɖ-ɗəɛɣɨɯɲʉʋʻ̀-ͯḀ-ỿ]+\)[a-z0-9!\*';:=\+,\.\$\/%#\[\]\-_~@|&À-ÖØ-öø-ÿĀ-ɏɓ-ɔɖ-ɗəɛɣɨɯɲʉʋʻ̀-ͯḀ-ỿ]*))\)[a-z0-9!\*';:=\+,\.\$\/%#\[\]\-_~@|&À-ÖØ-öø-ÿĀ-ɏɓ-ɔɖ-ɗəɛɣɨɯɲʉʋʻ̀-ͯḀ-ỿ]*)*[\+\-a-z0-9=_#\/À-ÖØ-öø-ÿĀ-ɏɓ-ɔɖ-ɗəɛɣɨɯɲʉʋʻ̀-ͯḀ-ỿ]|(?:\((?:[a-z0-9!\*';:=\+,\.\$\/%#\[\]\-_~@|&À-ÖØ-öø-ÿĀ-ɏɓ-ɔɖ-ɗəɛɣɨɯɲʉʋʻ̀-ͯḀ-ỿ]+|(?:[a-z0-9!\*';:=\+,\.\$\/%#\[\]\-_~@|&À-ÖØ-öø-ÿĀ-ɏɓ-ɔɖ-ɗəɛɣɨɯɲʉʋʻ̀-ͯḀ-ỿ]*\([a-z0-9!\*';:=\+,\.\$\/%#\[\]\-_~@|&À-ÖØ-öø-ÿĀ-ɏɓ-ɔɖ-ɗəɛɣɨɯɲʉʋʻ̀-ͯḀ-ỿ]+\)[a-z0-9!\*';:=\+,\.\$\/%#\[\]\-_~@|&À-ÖØ-öø-ÿĀ-ɏɓ-ɔɖ-ɗəɛɣɨɯɲʉʋʻ̀-ͯḀ-ỿ]*))\)))|(?:@[a-z0-9!\*';:=\+,\.\$\/%#\[\]\-_~@|&À-ÖØ-öø-ÿĀ-ɏɓ-ɔɖ-ɗəɛɣɨɯɲʉʋʻ̀-ͯḀ-ỿ]+\/))*)?(\?[a-z0-9!?\*'@\(\);:&=\+\$\/%#\[\]\-_\.,~|]*[a-z0-9_&=#\/])?", RegexOptions.Compiled | RegexOptions.IgnoreCase); | |
| private Regex reInvalidUrlWithoutProtocolPrecedingChars = new Regex(@"[-_.\/]$", RegexOptions.Compiled); | |
| private Regex reValidTcoUrl = new Regex(@"^https?:\/\/t\.co\/[a-z0-9]+", RegexOptions.Compiled | RegexOptions.IgnoreCase); | |
| private Regex reValidAsciiDomain = new Regex(@"(?:(?:[\-a-z0-9À-ÖØ-öø-ÿĀ-ɏɓ-ɔɖ-ɗəɛɣɨɯɲʉʋʻ̀-ͯḀ-ỿ]+)\.)+(?:(?:(?:abogado|academy|accountants|active|actor|aero|agency|airforce|allfinanz|alsace|archi|army|arpa|asia|associates|attorney|auction|audio|autos|axa|band|bar|bargains|bayern|beer|berlin|best|bid|bike|bio|biz|black|blackfriday|blue|bmw|bnpparibas|boo|boutique|brussels|budapest|build|builders|business|buzz|bzh|cab|cal|camera|camp|cancerresearch|capetown|capital|caravan|cards|care|career|careers|casa|cash|cat|catering|center|ceo|cern|channel|cheap|christmas|chrome|church|citic|city|claims|cleaning|click|clinic|clothing|club|codes|coffee|college|cologne|com|community|company|computer|condos|construction|consulting|contractors|cooking|cool|coop|country|credit|creditcard|crs|cruises|cuisinella|cymru|dad|dance|dating|day|deals|degree|democrat|dental|dentist|desi|diamonds|diet|digital|direct|directory|discount|dnp|domains|durban|dvag|eat|edu|education|email|emerck|engineer|engineering|enterprises|equipment|esq|estate|eus|events|exchange|expert|exposed|fail|farm|feedback|finance|financial|fish|fishing|fitness|flights|florist|flsmidth|fly|foo|forsale|foundation|frl|frogans|fund|furniture|futbol|gal|gallery|gbiz|gent|gift|gifts|gives|glass|gle|global|globo|gmail|gmo|gmx|google|gop|gov|graphics|gratis|green|gripe|guide|guitars|guru|hamburg|haus|healthcare|help|here|hiphop|hiv|holdings|holiday|homes|horse|host|hosting|house|how|ibm|immo|immobilien|industries|info|ing|ink|institute|insure|int|international|investments|jetzt|jobs|joburg|juegos|kaufen|kim|kitchen|kiwi|koeln|krd|kred|lacaixa|land|lawyer|lease|lgbt|life|lighting|limited|limo|link|loans|london|lotto|ltda|luxe|luxury|maison|management|mango|market|marketing|media|meet|melbourne|meme|menu|miami|mil|mini|mobi|moda|moe|monash|mortgage|moscow|motorcycles|mov|museum|nagoya|name|navy|net|network|neustar|new|nexus|ngo|nhk|ninja|nra|nrw|nyc|okinawa|ong|onl|ooo|org|organic|otsuka|ovh|paris|partners|parts|pharmacy|photo|photography|photos|physio|pics|pictures|pink|pizza|place|plumbing|pohl|poker|post|praxi|press|pro|prod|productions|prof|properties|property|pub|qpon|quebec|realtor|recipes|red|rehab|reise|reisen|ren|rentals|repair|report|republican|rest|restaurant|reviews|rich|rio|rip|rocks|rodeo|rsvp|ruhr|ryukyu|saarland|sarl|sca|scb|schmidt|schule|scot|services|sexy|shiksha|shoes|singles|social|software|sohu|solar|solutions|soy|space|spiegel|supplies|supply|support|surf|surgery|suzuki|systems|taipei|tatar|tattoo|tax|technology|tel|tienda|tips|tirol|today|tokyo|tools|top|town|toys|trade|training|travel|tui|university|uno|uol|vacations|vegas|ventures|vermögensberater|vermögensberatung|versicherung|vet|viajes|villas|vision|vlaanderen|vodka|vote|voting|voto|voyage|wales|wang|watch|webcam|website|wed|wedding|whoswho|wien|wiki|williamhill|wme|work|works|world|wtc|wtf|xxx|xyz|yachts|yandex|yoga|yokohama|youtube|zip|zone|дети|москва|онлайн|орг|рус|сайт|بازار|شبكة|موقع|संगठन|みんな|世界|中信|中文网|企业|佛山|公司|公益|商城|商标|在线|广东|我爱你|手机|政务|机构|游戏|移动|组织机构|网址|网络|集团|삼성)(?=[^0-9a-zA-Z@]|$))|(?:(?:ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bl|bm|bn|bo|bq|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cu|cv|cw|cx|cy|cz|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mf|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|um|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|za|zm|zw|бел|мкд|мон|рф|срб|укр|қаз|الاردن|الجزائر|السعودية|المغرب|امارات|ایران|بھارت|تونس|سودان|سورية|عراق|عمان|فلسطين|قطر|مصر|مليسيا|پاکستان|भारत|বাংলা|ভারত|ਭਾਰਤ|ભારત|இந்தியா|இலங்கை|சிங்கப்பூர்|భారత్|ලංකා|ไทย|გე|中国|中國|台湾|台灣|新加坡|香港|한국)(?=[^0-9a-zA-Z@]|$))|(?:xn--[0-9a-z]+))", RegexOptions.Compiled | RegexOptions.IgnoreCase); | |
| private Regex reValidSpecialShortDomain = new Regex(@"^(?:(?:[^](?:-|[^])*)?[^]\.)(?:(?:co|tv)(?=[^0-9a-zA-Z@]|$", RegexOptions.Compiled); | |
| private Regex reInvalidShortDomain = new Regex(@"^(?:(?:[^](?:-|[^])*)?[^]\.)(?:(?:ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bl|bm|bn|bo|bq|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cu|cv|cw|cx|cy|cz|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mf|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|um|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|za|zm|zw|бел|мкд|мон|рф|срб|укр|қаз|الاردن|الجزائر|السعودية|المغرب|امارات|ایران|بھارت|تونس|سودان|سورية|عراق|عمان|فلسطين|قطر|مصر|مليسيا|پاکستان|भारत|বাংলা|ভারত|ਭਾਰਤ|ભારત|இந்தியா|இலங்கை|சிங்கப்பூர்|భారత్|ලංකා|ไทย|გე|中国|中國|台湾|台灣|新加坡|香港|한국)(?=[^0-9a-zA-Z@]|$", RegexOptions.Compiled | RegexOptions.IgnoreCase); | |
| private List<TwitterStr> extractUrlsWithIndices(string text, bool extractUrlsWithoutProtocol = true) | |
| { | |
| List<TwitterStr> lst = new List<TwitterStr>(); | |
| if (string.IsNullOrEmpty(text) || (extractUrlsWithoutProtocol ? text == "." : text == ":")) | |
| return lst; | |
| Match m1 = regExtractUrl.Match(text); | |
| while (m1.Success) | |
| { | |
| string e = m1.Groups[2].Value; | |
| string f = m1.Groups[3].Value; | |
| string g = m1.Groups[4].Value; | |
| string h = m1.Groups[5].Value; | |
| string i = m1.Groups[7].Value; | |
| int j = m1.Index + m1.Length; | |
| int k = j - f.Length; | |
| if (string.IsNullOrEmpty(g)) | |
| { | |
| if (!extractUrlsWithoutProtocol || reInvalidUrlWithoutProtocolPrecedingChars.IsMatch(e)) | |
| continue; | |
| TwitterStr l = null; | |
| int m = 0; | |
| reValidAsciiDomain.Replace(h, | |
| a => | |
| { | |
| int c = h.IndexOf(a.Value, m); | |
| m = c + a.Length; | |
| l = new TwitterStr(a.Value, k + c, k + m); | |
| if (!string.IsNullOrEmpty(i)) | |
| if (!reValidSpecialShortDomain.IsMatch(a.Value)) | |
| if (reInvalidShortDomain.IsMatch(a.Value)) | |
| lst.Add(l); | |
| return null; | |
| }); | |
| if (l == null) | |
| continue; | |
| if (!string.IsNullOrEmpty(i)) | |
| { | |
| l.url = f.Replace(h, l.url); | |
| l.indices[1] = j; | |
| } | |
| } | |
| else | |
| { | |
| Match mTco = reValidTcoUrl.Match(f); | |
| if (mTco.Success) | |
| { | |
| f = mTco.Groups[0].Value; | |
| j = k + f.Length; | |
| lst.Add(new TwitterStr(f, k, j)); | |
| } | |
| } | |
| m1 = m1.NextMatch(); | |
| } | |
| return lst; | |
| } | |
| private void modifyIndicesFromUTF16ToUnicode(string a, List<TwitterStr> c) | |
| { | |
| convertUnicodeIndices(a, c, true); | |
| } | |
| private void convertUnicodeIndices(string a, List<TwitterStr> b, bool c) | |
| { | |
| if (b.Count == 0) | |
| return; | |
| int d = 0; | |
| int e = 0; | |
| b.Sort(new Comparison<TwitterStr>((ea, eb) => ea.indices[0].CompareTo(eb.indices[0]))); | |
| int f = 0; | |
| TwitterStr g = b[0]; | |
| while (d < a.Length) | |
| { | |
| if (g.indices[0] == (c ? d : e)) | |
| { | |
| int h = g.indices[1] - g.indices[1]; | |
| g.indices[0] = c ? e : d; | |
| g.indices[1] = g.indices[0] + h; | |
| f++; | |
| if (f == b.Count) | |
| break; | |
| g = b[f]; | |
| } | |
| int i = (int)a[d]; | |
| if (55296 <= i && i <= 56319 && d < a.Length - 1) | |
| { | |
| i = (int)a[d + 1]; | |
| if (56320 <= i && i <= 57343) | |
| d++; | |
| e++; | |
| d++; | |
| } | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment