Created
July 28, 2012 22:29
-
-
Save camertron/3195022 to your computer and use it in GitHub Desktop.
Dumping composition exclusions into yml
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'open-uri' | |
require 'fileutils' | |
require 'yaml' | |
PROPS_URL = "http://www.unicode.org/Public/6.1.0/ucd/DerivedNormalizationProps.txt" | |
OUTPUT_FILE = "/tmp/cldr/composition_exclusions.yml" | |
EXPECTED_TOTAL_POINTS = 1120 | |
data = open(PROPS_URL).read | |
start_pos = data.index("# Derived Property: Full_Composition_Exclusion") | |
end_pos = data.index(/^#\s=*$/, start_pos) | |
data = data[start_pos..end_pos].split("\n") | |
result = [] | |
data.each do |line| | |
unless line =~ /^#/ || line.strip.size == 0 | |
start, finish = line.match(/([0-9A-F]{4,6})\.?\.?([0-9A-F]{4,6})?/).captures.map { |code_point| code_point ? code_point.to_s.to_i(16) : nil } | |
finish = start unless finish | |
result << (start..finish) | |
end | |
end | |
count = result.inject(0) { |sum, range| sum + range.count } | |
unless count == TOTAL_POINTS | |
raise "Unexpected number of composition exclusions! Got #{count}, expected #{EXPECTED_TOTAL_POINTS}" | |
end | |
FileUtils.mkdir_p(File.dirname(OUTPUT_FILE)) | |
File.open(OUTPUT_FILE, "w+") do |f| | |
f.write(YAML::dump(result)) | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment