Last active
October 10, 2018 18:17
-
-
Save bstaletic/f8ec3614361735dabbe2dea833dc9fac to your computer and use it in GitHub Desktop.
CodePointArray
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/cpp/ycm/CodePoint.cpp b/cpp/ycm/CodePoint.cpp | |
index 5e0c2673..651c0743 100644 | |
--- a/cpp/ycm/CodePoint.cpp | |
+++ b/cpp/ycm/CodePoint.cpp | |
@@ -52,15 +52,25 @@ const RawCodePoint FindCodePoint( const char *text ) { | |
// Do a binary search on the array of code points to find the raw code point | |
// corresponding to the text. If no code point is found, return the default | |
// raw code point for that text. | |
- auto first = code_points.begin(); | |
- size_t count = code_points.size(); | |
+ auto first = code_points.original.begin(); | |
+ size_t count = code_points.original.size(); | |
while ( count > 0 ) { | |
size_t step = count / 2; | |
auto it = first + step; | |
- int cmp = std::strcmp( it->original, text ); | |
+ int cmp = std::strcmp( *it, text ); | |
if ( cmp == 0 ) { | |
- return *it; | |
+ //return *it; | |
+ size_t index = std::distance( first, it ); | |
+ return { code_points.original[ index ], | |
+ code_points.normal[ index ], | |
+ code_points.folded_case[ index ], | |
+ code_points.swapped_case[ index ], | |
+ code_points.is_letter[ index ], | |
+ code_points.is_punctuation[ index ], | |
+ code_points.is_uppercase[ index ], | |
+ code_points.break_property[ index ], | |
+ code_points.combining_class[ index ] }; | |
} | |
if ( cmp < 0 ) { | |
first = ++it; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/update_unicode.py b/update_unicode.py | |
index 17abbb40..c13133ef 100755 | |
--- a/update_unicode.py | |
+++ b/update_unicode.py | |
@@ -42,9 +42,21 @@ DIR_OF_CPP_SOURCES = os.path.join( DIR_OF_THIS_SCRIPT, 'cpp', 'ycm' ) | |
UNICODE_TABLE_TEMPLATE = ( | |
"""// This file was automatically generated with the update_unicode.py script | |
// using version {unicode_version} of the Unicode Character Database. | |
-static const std::array< const RawCodePoint, {size} > code_points = {{ {{ | |
+#include <array> | |
+struct RawCodePointArray {{ | |
+std::array< char*, {size} > original; | |
+std::array< char*, {size} > normal; | |
+std::array< char*, {size} > folded_case; | |
+std::array< char*, {size} > swapped_case; | |
+std::array< bool , {size} > is_letter; | |
+std::array< bool , {size} > is_punctuation; | |
+std::array< bool , {size} > is_uppercase; | |
+std::array< uint8_t , {size} > break_property; | |
+std::array< uint8_t , {size} > combining_class; | |
+}}; | |
+static const RawCodePointArray code_points = {{ | |
{code_points} | |
-}} }};""" ) | |
+}};""" ) | |
UNICODE_VERSION_REGEX = re.compile( r'Version (?P<version>\d+(?:\.\d+){2})' ) | |
GRAPHEME_BREAK_PROPERTY_REGEX = re.compile( | |
r'^(?P<value>[A-F0-9.]+)\s+; (?P<property>\w+) # .*$' ) | |
@@ -489,17 +501,52 @@ def CppBool( statement ): | |
def GenerateUnicodeTable( header_path, code_points ): | |
unicode_version = GetUnicodeVersion() | |
size = len( code_points ) | |
- code_points = '\n'.join( [ | |
- ( '{' + CppChar( code_point[ 'original' ] ) + ',' + | |
- CppChar( code_point[ 'normal' ] ) + ',' + | |
- CppChar( code_point[ 'folded_case' ] ) + ',' + | |
- CppChar( code_point[ 'swapped_case' ] ) + ',' + | |
- CppBool( code_point[ 'is_letter' ] ) + ',' + | |
- CppBool( code_point[ 'is_punctuation' ] ) + ',' + | |
- CppBool( code_point[ 'is_uppercase' ] ) + ',' + | |
- str( code_point[ 'break_property' ] ) + ',' + | |
- str( code_point[ 'combining_class' ] ) + '},' ) | |
- for code_point in code_points ] ) | |
+ original = '{{' | |
+ normal = '{{' | |
+ folded_case = '{{' | |
+ swapped_case = '{{' | |
+ is_letter = '{{' | |
+ is_punctuation = '{{' | |
+ is_uppercase = '{{' | |
+ break_property = '{{' | |
+ combining_class = '{{' | |
+ for code_point in code_points: | |
+ original += CppChar( code_point[ 'original' ] ) + ',' | |
+ normal += CppChar( code_point[ 'normal' ] ) + ',' | |
+ folded_case += CppChar( code_point[ 'folded_case' ] ) + ',' | |
+ swapped_case += CppChar( code_point[ 'swapped_case' ] ) + ',' | |
+ is_letter += CppBool( code_point[ 'is_letter' ] ) + ',' | |
+ is_punctuation += CppBool( code_point[ 'is_punctuation' ] ) + ',' | |
+ is_uppercase += CppBool( code_point[ 'is_uppercase' ] ) + ',' | |
+ break_property += str( code_point[ 'break_property' ] ) + ',' | |
+ combining_class += str( code_point[ 'combining_class' ] ) + ',' | |
+ original = original.rstrip(',') | |
+ original += '}},' | |
+ normal = normal.rstrip(',') | |
+ normal += '}},' | |
+ folded_case = folded_case.rstrip(',') | |
+ folded_case += '}},' | |
+ swapped_case = swapped_case.rstrip(',') | |
+ swapped_case += '}},' | |
+ is_letter = is_letter.rstrip(',') | |
+ is_letter += '}},' | |
+ is_punctuation = is_punctuation.rstrip(',') | |
+ is_punctuation += '}},' | |
+ is_uppercase = is_uppercase.rstrip(',') | |
+ is_uppercase += '}},' | |
+ break_property = break_property.rstrip(',') | |
+ break_property += '}},' | |
+ combining_class = combining_class.rstrip(',') | |
+ combining_class += '}}' | |
+ code_points = '\n'.join( [ original, | |
+ normal, | |
+ folded_case, | |
+ swapped_case, | |
+ is_letter, | |
+ is_punctuation, | |
+ is_uppercase, | |
+ break_property, | |
+ combining_class ] ) | |
contents = UNICODE_TABLE_TEMPLATE.format( unicode_version = unicode_version, | |
size = size, | |
code_points = code_points ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment