Last active
August 29, 2015 14:27
-
-
Save oprypin/ffd10e61fa92fc953aa0 to your computer and use it in GitHub Desktop.
A tokenizer feeding into parser+builder
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Generator functions in Python give out results one by one, and you can iterate over them in a for-loop | |
def tokens_generator(): | |
yield "{" | |
yield "Hash" | |
yield "(" | |
yield "Int32" | |
yield "," | |
yield "Array" | |
yield "(" | |
yield "Int32" | |
for i in range(2): # 2 times | |
yield ")" # this is just a demonstration to make this function nontrivial. this could be an actual tokenizer | |
yield "," | |
yield "Int32" | |
yield "}" | |
for tok in tokens_generator(): # just a demonstration that it can be used in a for-loop | |
tok # do something with it | |
print(''.join(tokens_generator())) | |
# {Hash(Int32,Array(Int32)),Int32} | |
# However, all the for loop does is just call `next` on the generator until the end and give the results of each yield | |
# This function is a recursive generator based on the generator stream of tokens | |
def tokens_to_signature(g): | |
tok = next(g) # Get the next token | |
if tok == "Int32": | |
yield "i" | |
elif tok == "Array": | |
yield "a" | |
next(g) # skip ( | |
yield from tokens_to_signature(g) # Recursively parse whatever type is inside | |
# equivalent code: | |
# for t in tokens_to_signature(g): | |
# yield t | |
next(g) # skip ) | |
elif tok == "Hash": | |
yield "{" | |
next(g) # skip ( | |
yield from tokens_to_signature(g) | |
next(g) # skip , | |
yield from tokens_to_signature(g) | |
next(g) # skip ) | |
yield "}" | |
elif tok == "{": | |
yield "(" | |
while True: | |
yield from tokens_to_signature(g) | |
tok = next(g) # , or } | |
if tok == "}": | |
break | |
yield ")" | |
print(''.join(tokens_to_signature(tokens_generator()))) | |
# ({iai}i) | |
# this is the DBus signature for | |
# {Hash(Int32,Array(Int32)),Int32} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def tokens(): | |
result = [] | |
result.append("{") | |
result.append("Hash") | |
result.append("(") | |
result.append("Int32") | |
result.append(",") | |
result.append("Array") | |
result.append("(") | |
result.append("Int32") | |
for i in range(2): # 2 times | |
result.append(")") # this is just a demonstration to make this function nontrivial. this could be an actual tokenizer | |
result.append(",") | |
result.append("Int32") | |
result.append("}") | |
return result | |
print(''.join(tokens())) | |
# {Hash(Int32,Array(Int32)),Int32} | |
def tokens_to_signature(tokens): | |
index = 0 | |
result = [] | |
def token(): | |
nonlocal index | |
index += 1 | |
return tokens[index - 1] | |
def parse(): | |
tok = token() # Get the next token | |
if tok == "Int32": | |
result.append("i") | |
elif tok == "Array": | |
result.append("a") | |
token() # skip ( | |
parse() # Recursively parse whatever type is inside | |
token() # skip ) | |
elif tok == "Hash": | |
result.append("{") | |
token() # skip ( | |
parse() | |
token() # skip , | |
parse() | |
token() # skip ) | |
result.append("}") | |
elif tok == "{": | |
result.append("(") | |
while True: | |
parse() | |
tok = token() # , or } | |
if tok == "}": | |
break | |
result.append(")") | |
parse() | |
return result | |
print(''.join(tokens_to_signature(tokens()))) | |
# ({iai}i) | |
# this is the DBus signature for | |
# {Hash(Int32,Array(Int32)),Int32} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def tokens | |
result = [] of String | |
result << "{" | |
result << "Hash" | |
result << "(" | |
result << "Int32" | |
result << "," | |
result << "Array" | |
result << "(" | |
result << "Int32" | |
2.times do | |
result << ")" # this is just a demonstration to make this function nontrivial. this could be an actual tokenizer | |
end | |
result << "," | |
result << "Int32" | |
result << "}" | |
return result | |
end | |
puts tokens.join | |
# {Hash(Int32,Array(Int32)),Int32} | |
private def tokens_to_signature(tokens, io) | |
case tokens.next | |
when "Int32" | |
io << "i" | |
when "Array" | |
io << "a" | |
tokens.next # skip ( | |
tokens_to_signature(tokens, io) # Recursively parse whatever type is inside | |
tokens.next # skip ) | |
when "Hash" | |
io << "{" | |
tokens.next # skip ( | |
tokens_to_signature(tokens, io) | |
tokens.next # skip , | |
tokens_to_signature(tokens, io) | |
tokens.next # skip ) | |
io << "}" | |
when "{" | |
io << "(" | |
loop do | |
tokens_to_signature(tokens, io) | |
break if tokens.next == "}" # , or } | |
end | |
io << ")" | |
end | |
end | |
def tokens_to_signature(tokens) | |
io = String::Builder.new | |
tokens_to_signature(tokens.each, io) | |
io.to_s | |
end | |
puts tokens_to_signature(tokens) | |
# ({iai}i) | |
# this is the DBus signature for | |
# {Hash(Int32,Array(Int32)),Int32} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Solution provided by jhass http://paste.mrzyx.de/pf18uww81 | |
def tokens | |
yield "{" | |
yield "Hash" | |
yield "(" | |
yield "Int32" | |
yield "," | |
yield "Array" | |
yield "(" | |
yield "Int32" | |
2.times do | |
yield ")" # this is just a demonstration to make this function nontrivial. this could be an actual tokenizer | |
end | |
yield "," | |
yield "Int32" | |
yield "}" | |
end | |
# {Hash(Int32,Array(Int32)),Int32} | |
private def tokens_to_signature(tokens, io) | |
case tokens.receive | |
when "Int32" | |
io << "i" | |
when "Array" | |
io << "a" | |
tokens.receive # skip ( | |
tokens_to_signature(tokens, io) # Recursively parse whatever type is inside | |
tokens.receive # skip ) | |
when "Hash" | |
io << "{" | |
tokens.receive # skip ( | |
tokens_to_signature(tokens, io) | |
tokens.receive # skip , | |
tokens_to_signature(tokens, io) | |
tokens.receive # skip ) | |
io << "}" | |
when "{" | |
io << "(" | |
loop do | |
tokens_to_signature(tokens, io) | |
break if tokens.receive == "}" # , or } | |
end | |
io << ")" | |
end | |
end | |
def tokens_to_signature | |
io = String::Builder.new | |
channel = Channel(String).new | |
spawn do | |
tokens do |token| | |
channel.send token | |
end | |
end | |
tokens_to_signature(channel, io) | |
io.to_s | |
end | |
puts tokens_to_signature | |
# ({iai}i) | |
# this is the DBus signature for | |
# {Hash(Int32,Array(Int32)),Int32} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def tokens | |
result = [] | |
result << "{" | |
result << "Hash" | |
result << "(" | |
result << "Int32" | |
result << "," | |
result << "Array" | |
result << "(" | |
result << "Int32" | |
2.times do | |
result << ")" # this is just a demonstration to make this function nontrivial. this could be an actual tokenizer | |
end | |
result << "," | |
result << "Int32" | |
result << "}" | |
return result | |
end | |
puts tokens.join | |
# {Hash(Int32,Array(Int32)),Int32} | |
def tts(tokens, io) | |
case tokens.next | |
when "Int32" | |
io << "i" | |
when "Array" | |
io << "a" | |
tokens.next # skip ( | |
tts(tokens, io) # Recursively parse whatever type is inside | |
tokens.next # skip ) | |
when "Hash" | |
io << "{" | |
tokens.next # skip ( | |
tts(tokens, io) | |
tokens.next # skip , | |
tts(tokens, io) | |
tokens.next # skip ) | |
io << "}" | |
when "{" | |
io << "(" | |
loop do | |
tts(tokens, io) | |
break if tokens.next == "}" # , or } | |
end | |
io << ")" | |
end | |
end | |
def tokens_to_signature(tokens) | |
io = "" | |
tts(tokens.each, io) | |
io | |
end | |
puts tokens_to_signature(tokens) | |
# ({iai}i) | |
# this is the DBus signature for | |
# {Hash(Int32,Array(Int32)),Int32} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment