Created
May 3, 2023 14:32
-
-
Save makenowjust/d190cf1caf7c08e2592ddf8d6558d08b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'fileutils' | |
UNICODE_VERSION = "15.0.0" | |
BASE_DIR = "#{__dir__}/#{UNICODE_VERSION}" | |
UCD_DIR = "#{BASE_DIR}/ucd" | |
BASE_URL = "https://www.unicode.org/Public/#{UNICODE_VERSION}/ucd" | |
def download_file(file) | |
dir = "#{BASE_DIR}/#{File.dirname(file)}" | |
FileUtils.mkdir_p(dir) | |
Dir.chdir(dir) do | |
`curl -LO "#{BASE_URL}/#{file}"` unless File.exist?("#{BASE_DIR}/#{file}") | |
end | |
end | |
def load_file(file) | |
download_file(file) | |
File.read("#{BASE_DIR}/#{file}").lines(chomp: true) | |
end | |
def load_binary_property() | |
data = [] | |
in_binary_property = false | |
load_file("PropertyAliases.txt").each do |line| | |
in_binary_property ||= !!(line =~ /# Binary Properties/) | |
next unless in_binary_property | |
m = line.match(/^([^;]+);([^;]+)(?:;([^;]+))?$/) | |
next unless m | |
short, long, other = m[1].strip, m[2].strip, m[3]&.strip | |
data << [long, [short, other].compact] | |
end | |
data | |
end | |
def load_general_category_value() | |
data = [] | |
load_file("PropertyValueAliases.txt").each do |line| | |
m = line.match(/^gc ; ([^;#]+);([^;#]+)(?:;([^;#]+))?/) | |
next unless m | |
short, long, other = m[1].strip, m[2].strip, m[3]&.strip | |
data << [long, [short, other].compact] | |
end | |
data | |
end | |
def load_script_value() | |
data = [] | |
load_file("PropertyValueAliases.txt").each do |line| | |
m = line.match(/^sc ; ([^;#]+);([^;#]+)(?:;([^;#]+))?/) | |
next unless m | |
short, long, other = m[1].strip, m[2].strip, m[3]&.strip | |
data << [long, [short, other].compact] | |
end | |
data | |
end | |
def load_age_value() | |
data = [] | |
load_file("PropertyValueAliases.txt").each do |line| | |
m = line.match(/^age; (\d+\.\d+)\s*;([^;#]+)(?:;([^;#]+))?/) | |
next unless m | |
short, long, other = m[1].strip, m[2].strip, m[3]&.strip | |
data << [long, [short, other].compact] | |
end | |
data | |
end | |
def load_block_value() | |
data = [] | |
load_file("PropertyValueAliases.txt").each do |line| | |
m = line.match(/^blk; ([^;#]+);([^;#]+)(?:;([^;#]+))?/) | |
next unless m | |
short, long, other = m[1].strip, m[2].strip, m[3]&.strip | |
data << [long, [short, other].compact] | |
end | |
data | |
end | |
binary_properties = load_binary_property() | |
general_category_values = load_general_category_value() | |
script_values = load_script_value() | |
age_values = load_age_value() | |
block_values = load_block_value() | |
puts "val BinaryPropertyNames: Map[String, String] = Map(" | |
binary_properties.each do |(name, aliases)| | |
[name, *aliases].uniq.each do |a| | |
puts " #{a.inspect} -> #{name.inspect}," | |
end | |
end | |
puts ")" | |
puts | |
puts "val GeneralCategoryValues: Map[String, String] = Map(" | |
general_category_values.each do |(name, aliases)| | |
[name, *aliases].uniq.each do |a| | |
puts " #{a.inspect} -> #{name.inspect}," | |
end | |
end | |
puts ")" | |
puts | |
puts "val ScriptValues: Map[String, String] = Map(" | |
script_values.each do |(name, aliases)| | |
[name, *aliases].uniq.each do |a| | |
puts " #{a.inspect} -> #{name.inspect}," | |
end | |
end | |
puts ")" | |
puts | |
puts "val AgeValues: Set[String] = Set(" | |
age_values.each do |(_, (name))| | |
puts " #{name.inspect}," | |
end | |
puts ")" | |
puts | |
puts "val BlockValues: Set[String] = Set(" | |
block_values.each do |(name, _)| | |
puts " #{name.inspect}," | |
end | |
puts ")" | |
puts |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment