bstaletic · October 10, 2018 18:17
diff --git a/CodePoint.diff b/CodePoint.diff
 diff --git a/cpp/ycm/CodePoint.cpp b/cpp/ycm/CodePoint.cpp
 index 5e0c2673..651c0743 100644
 --- a/cpp/ycm/CodePoint.cpp
 +++ b/cpp/ycm/CodePoint.cpp
 @@ -52,15 +52,25 @@ const RawCodePoint FindCodePoint( const char *text ) {
   // Do a binary search on the array of code points to find the raw code point
   // corresponding to the text. If no code point is found, return the default
   // raw code point for that text.
 -  auto first = code_points.begin();
 -  size_t count = code_points.size();
 +  auto first = code_points.original.begin();
 +  size_t count = code_points.original.size();
 
   while ( count > 0 ) {
     size_t step = count / 2;
     auto it = first + step;
 -    int cmp = std::strcmp( it->original, text );
 +    int cmp = std::strcmp( *it, text );
     if ( cmp == 0 ) {
 -      return *it;
 +      //return *it;
 +      size_t index = std::distance( first, it );
 +      return { code_points.original[ index ],
 +              code_points.normal[ index ],
 +              code_points.folded_case[ index ],
 +              code_points.swapped_case[ index ],
 +              code_points.is_letter[ index ],
 +              code_points.is_punctuation[ index ],
 +              code_points.is_uppercase[ index ],
 +              code_points.break_property[ index ],
 +              code_points.combining_class[ index ] };
     }
     if ( cmp < 0 ) {
       first = ++it;
diff --git a/update_unicode.diff b/update_unicode.diff
 diff --git a/update_unicode.py b/update_unicode.py
 index 17abbb40..c13133ef 100755
 --- a/update_unicode.py
 +++ b/update_unicode.py
 @@ -42,9 +42,21 @@ DIR_OF_CPP_SOURCES = os.path.join( DIR_OF_THIS_SCRIPT, 'cpp', 'ycm' )
 UNICODE_TABLE_TEMPLATE = (
   """// This file was automatically generated with the update_unicode.py script
 // using version {unicode_version} of the Unicode Character Database.
 -static const std::array< const RawCodePoint, {size} > code_points = {{ {{
 +#include <array>
 +struct RawCodePointArray {{
 +std::array< char*, {size} > original;
 +std::array< char*, {size} > normal;
 +std::array< char*, {size} > folded_case;
 +std::array< char*, {size} > swapped_case;
 +std::array< bool , {size} > is_letter;
 +std::array< bool , {size} > is_punctuation;
 +std::array< bool , {size} > is_uppercase;
 +std::array< uint8_t , {size} > break_property;
 +std::array< uint8_t , {size} > combining_class;
 +}};
 +static const RawCodePointArray code_points = {{
 {code_points}
 -}} }};""" )
 +}};""" )
 UNICODE_VERSION_REGEX = re.compile( r'Version (?P<version>\d+(?:\.\d+){2})' )
 GRAPHEME_BREAK_PROPERTY_REGEX = re.compile(
   r'^(?P<value>[A-F0-9.]+)\s+; (?P<property>\w+) # .*$' )
 @@ -489,17 +501,52 @@ def CppBool( statement ):
 def GenerateUnicodeTable( header_path, code_points ):
   unicode_version = GetUnicodeVersion()
   size = len( code_points )
 -  code_points = '\n'.join( [
 -    ( '{' + CppChar( code_point[ 'original' ] ) + ',' +
 -            CppChar( code_point[ 'normal' ] ) + ',' +
 -            CppChar( code_point[ 'folded_case' ] ) + ',' +
 -            CppChar( code_point[ 'swapped_case' ] ) + ',' +
 -            CppBool( code_point[ 'is_letter' ] ) + ',' +
 -            CppBool( code_point[ 'is_punctuation' ] ) + ',' +
 -            CppBool( code_point[ 'is_uppercase' ] ) + ',' +
 -            str( code_point[ 'break_property' ] ) + ',' +
 -            str( code_point[ 'combining_class' ] ) + '},' )
 -    for code_point in code_points ] )
 +  original = '{{'
 +  normal = '{{'
 +  folded_case = '{{'
 +  swapped_case = '{{'
 +  is_letter = '{{'
 +  is_punctuation = '{{'
 +  is_uppercase = '{{'
 +  break_property = '{{'
 +  combining_class = '{{'
 +  for code_point in code_points:
 +    original += CppChar( code_point[ 'original' ] ) + ','
 +    normal += CppChar( code_point[ 'normal' ] ) + ','
 +    folded_case += CppChar( code_point[ 'folded_case' ] ) + ','
 +    swapped_case += CppChar( code_point[ 'swapped_case' ] ) + ','
 +    is_letter += CppBool( code_point[ 'is_letter' ] ) + ','
 +    is_punctuation += CppBool( code_point[ 'is_punctuation' ] ) + ','
 +    is_uppercase += CppBool( code_point[ 'is_uppercase' ] ) + ','
 +    break_property += str( code_point[ 'break_property' ] ) + ','
 +    combining_class += str( code_point[ 'combining_class' ] ) + ','
 +  original = original.rstrip(',')
 +  original += '}},'
 +  normal = normal.rstrip(',')
 +  normal += '}},'
 +  folded_case = folded_case.rstrip(',')
 +  folded_case += '}},'
 +  swapped_case = swapped_case.rstrip(',')
 +  swapped_case += '}},'
 +  is_letter = is_letter.rstrip(',')
 +  is_letter += '}},'
 +  is_punctuation = is_punctuation.rstrip(',')
 +  is_punctuation += '}},'
 +  is_uppercase = is_uppercase.rstrip(',')
 +  is_uppercase += '}},'
 +  break_property = break_property.rstrip(',')
 +  break_property += '}},'
 +  combining_class = combining_class.rstrip(',')
 +  combining_class += '}}'
 +  code_points = '\n'.join( [ original,
 +                             normal,
 +                             folded_case,
 +                             swapped_case,
 +                             is_letter,
 +                             is_punctuation,
 +                             is_uppercase,
 +                             break_property,
 +                             combining_class ] )
   contents = UNICODE_TABLE_TEMPLATE.format( unicode_version = unicode_version,
                                             size = size,
                                             code_points = code_points )
	diff --git a/cpp/ycm/CodePoint.cpp b/cpp/ycm/CodePoint.cpp
	index 5e0c2673..651c0743 100644
	--- a/cpp/ycm/CodePoint.cpp
	+++ b/cpp/ycm/CodePoint.cpp
	@@ -52,15 +52,25 @@ const RawCodePoint FindCodePoint( const char *text ) {
	// Do a binary search on the array of code points to find the raw code point
	// corresponding to the text. If no code point is found, return the default
	// raw code point for that text.
	- auto first = code_points.begin();
	- size_t count = code_points.size();
	+ auto first = code_points.original.begin();
	+ size_t count = code_points.original.size();

	while ( count > 0 ) {
	size_t step = count / 2;
	auto it = first + step;
	- int cmp = std::strcmp( it->original, text );
	+ int cmp = std::strcmp( *it, text );
	if ( cmp == 0 ) {
	- return *it;
	+ //return *it;
	+ size_t index = std::distance( first, it );
	+ return { code_points.original[ index ],
	+ code_points.normal[ index ],
	+ code_points.folded_case[ index ],
	+ code_points.swapped_case[ index ],
	+ code_points.is_letter[ index ],
	+ code_points.is_punctuation[ index ],
	+ code_points.is_uppercase[ index ],
	+ code_points.break_property[ index ],
	+ code_points.combining_class[ index ] };
	}
	if ( cmp < 0 ) {
	first = ++it;
	diff --git a/update_unicode.py b/update_unicode.py
	index 17abbb40..c13133ef 100755
	--- a/update_unicode.py
	+++ b/update_unicode.py
	@@ -42,9 +42,21 @@ DIR_OF_CPP_SOURCES = os.path.join( DIR_OF_THIS_SCRIPT, 'cpp', 'ycm' )
	UNICODE_TABLE_TEMPLATE = (
	"""// This file was automatically generated with the update_unicode.py script
	// using version {unicode_version} of the Unicode Character Database.
	-static const std::array< const RawCodePoint, {size} > code_points = {{ {{
	+#include <array>
	+struct RawCodePointArray {{
	+std::array< char*, {size} > original;
	+std::array< char*, {size} > normal;
	+std::array< char*, {size} > folded_case;
	+std::array< char*, {size} > swapped_case;
	+std::array< bool , {size} > is_letter;
	+std::array< bool , {size} > is_punctuation;
	+std::array< bool , {size} > is_uppercase;
	+std::array< uint8_t , {size} > break_property;
	+std::array< uint8_t , {size} > combining_class;
	+}};
	+static const RawCodePointArray code_points = {{
	{code_points}
	-}} }};""" )
	+}};""" )
	UNICODE_VERSION_REGEX = re.compile( r'Version (?P<version>\d+(?:\.\d+){2})' )
	GRAPHEME_BREAK_PROPERTY_REGEX = re.compile(
	r'^(?P<value>[A-F0-9.]+)\s+; (?P<property>\w+) # .*$' )
	@@ -489,17 +501,52 @@ def CppBool( statement ):
	def GenerateUnicodeTable( header_path, code_points ):
	unicode_version = GetUnicodeVersion()
	size = len( code_points )
	- code_points = '\n'.join( [
	- ( '{' + CppChar( code_point[ 'original' ] ) + ',' +
	- CppChar( code_point[ 'normal' ] ) + ',' +
	- CppChar( code_point[ 'folded_case' ] ) + ',' +
	- CppChar( code_point[ 'swapped_case' ] ) + ',' +
	- CppBool( code_point[ 'is_letter' ] ) + ',' +
	- CppBool( code_point[ 'is_punctuation' ] ) + ',' +
	- CppBool( code_point[ 'is_uppercase' ] ) + ',' +
	- str( code_point[ 'break_property' ] ) + ',' +
	- str( code_point[ 'combining_class' ] ) + '},' )
	- for code_point in code_points ] )
	+ original = '{{'
	+ normal = '{{'
	+ folded_case = '{{'
	+ swapped_case = '{{'
	+ is_letter = '{{'
	+ is_punctuation = '{{'
	+ is_uppercase = '{{'
	+ break_property = '{{'
	+ combining_class = '{{'
	+ for code_point in code_points:
	+ original += CppChar( code_point[ 'original' ] ) + ','
	+ normal += CppChar( code_point[ 'normal' ] ) + ','
	+ folded_case += CppChar( code_point[ 'folded_case' ] ) + ','
	+ swapped_case += CppChar( code_point[ 'swapped_case' ] ) + ','
	+ is_letter += CppBool( code_point[ 'is_letter' ] ) + ','
	+ is_punctuation += CppBool( code_point[ 'is_punctuation' ] ) + ','
	+ is_uppercase += CppBool( code_point[ 'is_uppercase' ] ) + ','
	+ break_property += str( code_point[ 'break_property' ] ) + ','
	+ combining_class += str( code_point[ 'combining_class' ] ) + ','
	+ original = original.rstrip(',')
	+ original += '}},'
	+ normal = normal.rstrip(',')
	+ normal += '}},'
	+ folded_case = folded_case.rstrip(',')
	+ folded_case += '}},'
	+ swapped_case = swapped_case.rstrip(',')
	+ swapped_case += '}},'
	+ is_letter = is_letter.rstrip(',')
	+ is_letter += '}},'
	+ is_punctuation = is_punctuation.rstrip(',')
	+ is_punctuation += '}},'
	+ is_uppercase = is_uppercase.rstrip(',')
	+ is_uppercase += '}},'
	+ break_property = break_property.rstrip(',')
	+ break_property += '}},'
	+ combining_class = combining_class.rstrip(',')
	+ combining_class += '}}'
	+ code_points = '\n'.join( [ original,
	+ normal,
	+ folded_case,
	+ swapped_case,
	+ is_letter,
	+ is_punctuation,
	+ is_uppercase,
	+ break_property,
	+ combining_class ] )
	contents = UNICODE_TABLE_TEMPLATE.format( unicode_version = unicode_version,
	size = size,
	code_points = code_points )