Last active
October 5, 2015 21:47
-
-
Save hisui/2881606 to your computer and use it in GitHub Desktop.
IDリストから、与えられた文字列がIDリスト中あるかチェックする関数のコードを生成
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
# ASCIIのIDのリストから、与えられた文字列がリストにあるかどうかをチェックする関数のコードを生成します。 | |
# | |
# 例: | |
# ---- hoge.txt ---- | |
# HOGE 1 | |
# HIGE 2 | |
# HOGA 3 | |
# | |
# ------------------- | |
# > ruby trie.rb hoge.txt | |
# | |
# enumの名前(詳しくはコードを) | |
ENUM_NAME = "Tag" | |
ENUM_NULL = "Tag_0" | |
# コードのインデント用 | |
class PrittyWriter | |
def initialize(buf) | |
@level = 0 | |
@buf = buf | |
end | |
def <<(str) | |
if @buf.size > 0 and @buf[-1, 1] == "\n" | |
@buf << "\t" * @level | |
end | |
@buf << str | |
@buf << "\n" if str =~/[{}:;,]\z/ | |
self | |
end | |
def indent | |
@level += 1 | |
begin | |
yield | |
ensure @level -= 1 | |
end | |
end | |
end | |
# トライ | |
class Trie | |
attr_reader :list, :leaf | |
def initialize | |
@list = [] | |
@leaf = nil | |
end | |
def add(str, i=0) | |
if i >= str.size | |
@leaf = true | |
return | |
end | |
# コドジェネ用途なので、効率は無視するよ | |
@list.each {|e| | |
if e[0] == str[i] | |
e[1].add(str, i+1) | |
return | |
end | |
} | |
trie = Trie.new | |
trie.add(str, i+1) | |
@list << [str[i], trie] | |
end | |
def dump(out, order, enums) | |
# 文字列の終端チェック | |
if @leaf | |
out << "if (pos == end) return #{enums[order += 1]};" | |
else | |
out << "if (pos == end) return #{ENUM_NULL};" | |
end | |
# 分岐がない場合はmemcmpにしちゃう | |
if @list.size == 1 | |
cseq = "" | |
node = self | |
while node.list.size == 1 | |
t = node | |
e = node.list[0] | |
cseq << e[0].chr | |
node = e[1] | |
break if node.leaf | |
end | |
if cseq.size == 1 | |
out << "if (*pos++ != '%c') return #{ENUM_NULL};" % cseq[0] | |
else | |
out << "if (!is_prefix_of(\"#{cseq}\", pos, end)) return #{ENUM_NULL};" | |
out << "std::advance(pos, #{cseq.size});" | |
end | |
order = node.dump(out, order, enums) | |
# 分岐をswitch&caseで表現 | |
elsif @list.size > 1 | |
out << "switch (*pos++) {" | |
@list.each {|e| | |
out << "case '%c':" % e[0] | |
out.indent { | |
order = e[1].dump(out, order, enums) | |
} | |
} | |
out << "}" | |
out << "return #{ENUM_NULL};" | |
end | |
order | |
end | |
end | |
######################## | |
### MAIN ROUTINE ### | |
######################## | |
if ARGV.empty? | |
$stderr.puts "Usage:ruby trie.rb FILENAME or 'inline'" | |
exit -1 | |
end | |
# 単語のリスト | |
list = nil | |
if ARGV[0] == "inline" | |
# inlineの場合はここで設定された値を使う | |
list = %w{ | |
GET 1 | |
POST 2 | |
DELETE 3 | |
PUT 4 | |
} | |
else | |
# ファイルから行で区切られた単語リストを読み込む | |
# '#' で始まる行はコメントとみなす | |
# (てか、アルファベットじゃない文字が入ってる行は無視) | |
list = File.read(ARGV[0]).lines.map(&:chomp).flat_map {|e| e.scan(/^(\w+)\s+(\d+)$/) } | |
list.map! {|word, code| | |
[word, code, "#{ENUM_NAME.upcase}_#{word}"] | |
} | |
end | |
# 一旦、メモリ上にトライ木を構築 | |
trie = Trie.new | |
list.sort_by! {|e| e[0] } | |
list.each {|word, _| | |
trie.add word | |
} | |
buf = "" | |
out = PrittyWriter.new buf | |
# 単語に対応したenumを作る | |
out << (<<EOF) | |
template <size_t N, typename InputIterator> bool is_prefix_of(const char (&src)[N], InputIterator pos, InputIterator end) | |
{ | |
for (int i = 0; i < N-1; ++i) { | |
if (pos == end || *pos++ != src[i]) return false; | |
} | |
return true; | |
} | |
EOF | |
out << "\nenum #{ENUM_NAME}: uint32_t" | |
out << "\n" | |
out << "{" | |
out.indent { | |
out << "#{ENUM_NULL} = 0," | |
list.each {|_, code, enum| out << "#{enum} = #{code}," } | |
} | |
out << "};" | |
out << "\ntemplate <typename InputIterator>" | |
out << "\n#{ENUM_NAME} ascii_to_#{ENUM_NAME}(InputIterator pos, InputIterator end)" | |
out << "\n" | |
out << "{" | |
out.indent { trie.dump out, 0, [ENUM_NULL] + list.map {|e| e[2]} } | |
out << "}" | |
puts buf |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment