Skip to content

Instantly share code, notes, and snippets.

@hisui
Last active October 5, 2015 21:47
Show Gist options
  • Save hisui/2881606 to your computer and use it in GitHub Desktop.
Save hisui/2881606 to your computer and use it in GitHub Desktop.
IDリストから、与えられた文字列がIDリスト中あるかチェックする関数のコードを生成
# coding: utf-8
# ASCIIのIDのリストから、与えられた文字列がリストにあるかどうかをチェックする関数のコードを生成します。
#
# 例:
# ---- hoge.txt ----
# HOGE 1
# HIGE 2
# HOGA 3
#
# -------------------
# > ruby trie.rb hoge.txt
#
# enumの名前(詳しくはコードを)
ENUM_NAME = "Tag"
ENUM_NULL = "Tag_0"
# コードのインデント用
class PrittyWriter
def initialize(buf)
@level = 0
@buf = buf
end
def <<(str)
if @buf.size > 0 and @buf[-1, 1] == "\n"
@buf << "\t" * @level
end
@buf << str
@buf << "\n" if str =~/[{}:;,]\z/
self
end
def indent
@level += 1
begin
yield
ensure @level -= 1
end
end
end
# トライ
class Trie
attr_reader :list, :leaf
def initialize
@list = []
@leaf = nil
end
def add(str, i=0)
if i >= str.size
@leaf = true
return
end
# コドジェネ用途なので、効率は無視するよ
@list.each {|e|
if e[0] == str[i]
e[1].add(str, i+1)
return
end
}
trie = Trie.new
trie.add(str, i+1)
@list << [str[i], trie]
end
def dump(out, order, enums)
# 文字列の終端チェック
if @leaf
out << "if (pos == end) return #{enums[order += 1]};"
else
out << "if (pos == end) return #{ENUM_NULL};"
end
# 分岐がない場合はmemcmpにしちゃう
if @list.size == 1
cseq = ""
node = self
while node.list.size == 1
t = node
e = node.list[0]
cseq << e[0].chr
node = e[1]
break if node.leaf
end
if cseq.size == 1
out << "if (*pos++ != '%c') return #{ENUM_NULL};" % cseq[0]
else
out << "if (!is_prefix_of(\"#{cseq}\", pos, end)) return #{ENUM_NULL};"
out << "std::advance(pos, #{cseq.size});"
end
order = node.dump(out, order, enums)
# 分岐をswitch&caseで表現
elsif @list.size > 1
out << "switch (*pos++) {"
@list.each {|e|
out << "case '%c':" % e[0]
out.indent {
order = e[1].dump(out, order, enums)
}
}
out << "}"
out << "return #{ENUM_NULL};"
end
order
end
end
########################
### MAIN ROUTINE ###
########################
if ARGV.empty?
$stderr.puts "Usage:ruby trie.rb FILENAME or 'inline'"
exit -1
end
# 単語のリスト
list = nil
if ARGV[0] == "inline"
# inlineの場合はここで設定された値を使う
list = %w{
GET 1
POST 2
DELETE 3
PUT 4
}
else
# ファイルから行で区切られた単語リストを読み込む
# '#' で始まる行はコメントとみなす
# (てか、アルファベットじゃない文字が入ってる行は無視)
list = File.read(ARGV[0]).lines.map(&:chomp).flat_map {|e| e.scan(/^(\w+)\s+(\d+)$/) }
list.map! {|word, code|
[word, code, "#{ENUM_NAME.upcase}_#{word}"]
}
end
# 一旦、メモリ上にトライ木を構築
trie = Trie.new
list.sort_by! {|e| e[0] }
list.each {|word, _|
trie.add word
}
buf = ""
out = PrittyWriter.new buf
# 単語に対応したenumを作る
out << (<<EOF)
template <size_t N, typename InputIterator> bool is_prefix_of(const char (&src)[N], InputIterator pos, InputIterator end)
{
for (int i = 0; i < N-1; ++i) {
if (pos == end || *pos++ != src[i]) return false;
}
return true;
}
EOF
out << "\nenum #{ENUM_NAME}: uint32_t"
out << "\n"
out << "{"
out.indent {
out << "#{ENUM_NULL} = 0,"
list.each {|_, code, enum| out << "#{enum} = #{code}," }
}
out << "};"
out << "\ntemplate <typename InputIterator>"
out << "\n#{ENUM_NAME} ascii_to_#{ENUM_NAME}(InputIterator pos, InputIterator end)"
out << "\n"
out << "{"
out.indent { trie.dump out, 0, [ENUM_NULL] + list.map {|e| e[2]} }
out << "}"
puts buf
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment