Skip to content

Instantly share code, notes, and snippets.

@oprypin
Last active August 29, 2015 14:27
Show Gist options
  • Save oprypin/ffd10e61fa92fc953aa0 to your computer and use it in GitHub Desktop.
Save oprypin/ffd10e61fa92fc953aa0 to your computer and use it in GitHub Desktop.
A tokenizer feeding into parser+builder
# Generator functions in Python give out results one by one, and you can iterate over them in a for-loop
def tokens_generator():
yield "{"
yield "Hash"
yield "("
yield "Int32"
yield ","
yield "Array"
yield "("
yield "Int32"
for i in range(2): # 2 times
yield ")" # this is just a demonstration to make this function nontrivial. this could be an actual tokenizer
yield ","
yield "Int32"
yield "}"
for tok in tokens_generator(): # just a demonstration that it can be used in a for-loop
tok # do something with it
print(''.join(tokens_generator()))
# {Hash(Int32,Array(Int32)),Int32}
# However, all the for loop does is just call `next` on the generator until the end and give the results of each yield
# This function is a recursive generator based on the generator stream of tokens
def tokens_to_signature(g):
tok = next(g) # Get the next token
if tok == "Int32":
yield "i"
elif tok == "Array":
yield "a"
next(g) # skip (
yield from tokens_to_signature(g) # Recursively parse whatever type is inside
# equivalent code:
# for t in tokens_to_signature(g):
# yield t
next(g) # skip )
elif tok == "Hash":
yield "{"
next(g) # skip (
yield from tokens_to_signature(g)
next(g) # skip ,
yield from tokens_to_signature(g)
next(g) # skip )
yield "}"
elif tok == "{":
yield "("
while True:
yield from tokens_to_signature(g)
tok = next(g) # , or }
if tok == "}":
break
yield ")"
print(''.join(tokens_to_signature(tokens_generator())))
# ({iai}i)
# this is the DBus signature for
# {Hash(Int32,Array(Int32)),Int32}
def tokens():
result = []
result.append("{")
result.append("Hash")
result.append("(")
result.append("Int32")
result.append(",")
result.append("Array")
result.append("(")
result.append("Int32")
for i in range(2): # 2 times
result.append(")") # this is just a demonstration to make this function nontrivial. this could be an actual tokenizer
result.append(",")
result.append("Int32")
result.append("}")
return result
print(''.join(tokens()))
# {Hash(Int32,Array(Int32)),Int32}
def tokens_to_signature(tokens):
index = 0
result = []
def token():
nonlocal index
index += 1
return tokens[index - 1]
def parse():
tok = token() # Get the next token
if tok == "Int32":
result.append("i")
elif tok == "Array":
result.append("a")
token() # skip (
parse() # Recursively parse whatever type is inside
token() # skip )
elif tok == "Hash":
result.append("{")
token() # skip (
parse()
token() # skip ,
parse()
token() # skip )
result.append("}")
elif tok == "{":
result.append("(")
while True:
parse()
tok = token() # , or }
if tok == "}":
break
result.append(")")
parse()
return result
print(''.join(tokens_to_signature(tokens())))
# ({iai}i)
# this is the DBus signature for
# {Hash(Int32,Array(Int32)),Int32}
def tokens
result = [] of String
result << "{"
result << "Hash"
result << "("
result << "Int32"
result << ","
result << "Array"
result << "("
result << "Int32"
2.times do
result << ")" # this is just a demonstration to make this function nontrivial. this could be an actual tokenizer
end
result << ","
result << "Int32"
result << "}"
return result
end
puts tokens.join
# {Hash(Int32,Array(Int32)),Int32}
private def tokens_to_signature(tokens, io)
case tokens.next
when "Int32"
io << "i"
when "Array"
io << "a"
tokens.next # skip (
tokens_to_signature(tokens, io) # Recursively parse whatever type is inside
tokens.next # skip )
when "Hash"
io << "{"
tokens.next # skip (
tokens_to_signature(tokens, io)
tokens.next # skip ,
tokens_to_signature(tokens, io)
tokens.next # skip )
io << "}"
when "{"
io << "("
loop do
tokens_to_signature(tokens, io)
break if tokens.next == "}" # , or }
end
io << ")"
end
end
def tokens_to_signature(tokens)
io = String::Builder.new
tokens_to_signature(tokens.each, io)
io.to_s
end
puts tokens_to_signature(tokens)
# ({iai}i)
# this is the DBus signature for
# {Hash(Int32,Array(Int32)),Int32}
# Solution provided by jhass http://paste.mrzyx.de/pf18uww81
def tokens
yield "{"
yield "Hash"
yield "("
yield "Int32"
yield ","
yield "Array"
yield "("
yield "Int32"
2.times do
yield ")" # this is just a demonstration to make this function nontrivial. this could be an actual tokenizer
end
yield ","
yield "Int32"
yield "}"
end
# {Hash(Int32,Array(Int32)),Int32}
private def tokens_to_signature(tokens, io)
case tokens.receive
when "Int32"
io << "i"
when "Array"
io << "a"
tokens.receive # skip (
tokens_to_signature(tokens, io) # Recursively parse whatever type is inside
tokens.receive # skip )
when "Hash"
io << "{"
tokens.receive # skip (
tokens_to_signature(tokens, io)
tokens.receive # skip ,
tokens_to_signature(tokens, io)
tokens.receive # skip )
io << "}"
when "{"
io << "("
loop do
tokens_to_signature(tokens, io)
break if tokens.receive == "}" # , or }
end
io << ")"
end
end
def tokens_to_signature
io = String::Builder.new
channel = Channel(String).new
spawn do
tokens do |token|
channel.send token
end
end
tokens_to_signature(channel, io)
io.to_s
end
puts tokens_to_signature
# ({iai}i)
# this is the DBus signature for
# {Hash(Int32,Array(Int32)),Int32}
def tokens
result = []
result << "{"
result << "Hash"
result << "("
result << "Int32"
result << ","
result << "Array"
result << "("
result << "Int32"
2.times do
result << ")" # this is just a demonstration to make this function nontrivial. this could be an actual tokenizer
end
result << ","
result << "Int32"
result << "}"
return result
end
puts tokens.join
# {Hash(Int32,Array(Int32)),Int32}
def tts(tokens, io)
case tokens.next
when "Int32"
io << "i"
when "Array"
io << "a"
tokens.next # skip (
tts(tokens, io) # Recursively parse whatever type is inside
tokens.next # skip )
when "Hash"
io << "{"
tokens.next # skip (
tts(tokens, io)
tokens.next # skip ,
tts(tokens, io)
tokens.next # skip )
io << "}"
when "{"
io << "("
loop do
tts(tokens, io)
break if tokens.next == "}" # , or }
end
io << ")"
end
end
def tokens_to_signature(tokens)
io = ""
tts(tokens.each, io)
io
end
puts tokens_to_signature(tokens)
# ({iai}i)
# this is the DBus signature for
# {Hash(Int32,Array(Int32)),Int32}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment