ehaliewicz · May 24, 2012 03:04
diff --git a/fsm.py b/fsm.py
 # process() kind of parses Tadoku entries

 # Character Classes
 # 0. Space
 # 1. Hash
 # 2. Semicolon
 # 3. Number
 # 4. Other


 # states
 # 0 Start  (eats spaces until (number -> 1))
 # 1 Number (takes spaces until (#media -> 2))
 # 2 Media  (takes spaces until (#times -> 3) (; -> 0)) 
 # 3 Times (takes spaces until (; - > 0))
 # 4 After semicolon (takes everything but hashes and numbers) until (number -> 1)
 	
 	
 # [medium-dec/times-read/number-of/comment, start-pos, end-pos]

 string = ""
 position = 0
 state = 0
 cur_collection = ["", 0, 0]
 collected = [ ["medium", 0, 0] ] #type, start-pos, end-pos


 def current_char():
 	global string, position
 	return string[position];
 	
 def next_char():
 	global string, position
 	return string[position+1] if position < len(string) else False;

 def hash_p():
 	global string, position
 	return True if string.startswith("#", position) else False;

 def input_class(c):
 	if c == ';':
 		return 2
 	elif c == '#':
 		return 1
 	elif c == ' ' or c == '\t' or c == '\n' or c == '\r':
 		return 0
 	elif ('0' <= c <= '9'):
 		return 3
 	else:
 		return 4

 def skip():
 	global position
 	position = position+1
 	
 	
 def error():
 	global state, string, position
 	print('Error, skipping misplaced character: {} in state: {}'.format(string[position], state))
 	skip()
 	
 def declaration_p(type_container, prelude_func=True, prelude_lng=0):
 	global string, position
 	if prelude_func:
 		xpos = string.find(" ", position)
 		if (xpos == -1):
 			xpos = len(string)
 		if string[position+prelude_lng:xpos] in type_container:
 			return xpos
 	return False
 	
 def times_p():
 	return declaration_p(times, hash_p(), 1)
 	
 def media_p():
 	return declaration_p(media, hash_p(), 1)
 		
 def read_num():
 	global string, position, cur_collection, collected
 	endpos = string.find(" ", position)
 	if endpos == -1:
 		endpos = len(string)
 	substring = string[position:endpos]
 	if substring.isdigit():
 		cur_collection = ["number", position, endpos]
 		collected.append(cur_collection)
 		cur_collection = ["", 0, 0]
 		position = endpos

 	else:
 		print("Skipping malformed number {}".format(substring))
 		position = endpos
 	return

 def read_media():
 	global string, position, cur_collection, collected
 	endpos = media_p()
 	if endpos > -1:
 		cur_collection = ["media", position, endpos]
 		collected.append(cur_collection)
 		position = endpos
 	else:
 		print(endpos)
 		print("Error, malformed media declaration in {}".format(string[position:]))
 		next_sem = string.find(";", position)
 		if next_sem:
 			position = next_sem
 		else:
 			position = string.find(" ", position)
 		return

 def read_times():
 	global string, position, cur_collection, collected
 	endpos = times_p()
 	if endpos:
 		cur_collection = ["times", position, endpos]
 		collected.append(cur_collection)
 		position=endpos
 	else:
 		print("Error, malformed times declaration")
 		position = string.find(" ", position)
 		return	
 	
 times = ["first", "second", "third", "fourth", "fifth"]
 media = [ 'book', 'dr', 'manga', 'fullgame', 'game', 'lyric', 'subs', 'news', 'nico', 'sentences']
 states = ["Start", "Number",  "Media", "Times", "Semicolon", "End"]

 #      Space   Hash           Semicolon     Number       Other
 sm = [ [skip, 0, error,      0, error,    0, read_num, 1, error, 0],    # Start
 	   [skip, 1, read_media, 2, error,    1, error,    1, error, 1],    # Number
 	   [skip, 2, read_times, 3, skip,     0, error,    2, error, 2],    # Media
 	   [skip, 3, error,      3, skip,     0, error,    3, error, 3],    # Times
 	   [skip, 4, skip,       4, skip,     4, read_num, 1, skip, 4],     # After semicolon 
 		]
 	
 def process(incoming):
 	global position, string, collected, state
 	string = incoming
 	position = 0
 	state = 0
 	collected = []
 	while position < len(incoming):
 		char_class = input_class(current_char())
 		statefunc = sm[state][2*char_class]
 		print("Character: {}, Class: {}, State: {}, State Function: {}".format(current_char(), char_class, state, statefunc)) 
 		statefunc()
 		state = sm[state][1+2*char_class]
 		if (state > 3): break;
 	return collected
	# process() kind of parses Tadoku entries

	# Character Classes
	# 0. Space
	# 1. Hash
	# 2. Semicolon
	# 3. Number
	# 4. Other


	# states
	# 0 Start (eats spaces until (number -> 1))
	# 1 Number (takes spaces until (#media -> 2))
	# 2 Media (takes spaces until (#times -> 3) (; -> 0))
	# 3 Times (takes spaces until (; - > 0))
	# 4 After semicolon (takes everything but hashes and numbers) until (number -> 1)


	# [medium-dec/times-read/number-of/comment, start-pos, end-pos]

	string = ""
	position = 0
	state = 0
	cur_collection = ["", 0, 0]
	collected = [ ["medium", 0, 0] ] #type, start-pos, end-pos


	def current_char():
	global string, position
	return string[position];

	def next_char():
	global string, position
	return string[position+1] if position < len(string) else False;

	def hash_p():
	global string, position
	return True if string.startswith("#", position) else False;

	def input_class(c):
	if c == ';':
	return 2
	elif c == '#':
	return 1
	elif c == ' ' or c == '\t' or c == '\n' or c == '\r':
	return 0
	elif ('0' <= c <= '9'):
	return 3
	else:
	return 4

	def skip():
	global position
	position = position+1


	def error():
	global state, string, position
	print('Error, skipping misplaced character: {} in state: {}'.format(string[position], state))
	skip()

	def declaration_p(type_container, prelude_func=True, prelude_lng=0):
	global string, position
	if prelude_func:
	xpos = string.find(" ", position)
	if (xpos == -1):
	xpos = len(string)
	if string[position+prelude_lng:xpos] in type_container:
	return xpos
	return False

	def times_p():
	return declaration_p(times, hash_p(), 1)

	def media_p():
	return declaration_p(media, hash_p(), 1)

	def read_num():
	global string, position, cur_collection, collected
	endpos = string.find(" ", position)
	if endpos == -1:
	endpos = len(string)
	substring = string[position:endpos]
	if substring.isdigit():
	cur_collection = ["number", position, endpos]
	collected.append(cur_collection)
	cur_collection = ["", 0, 0]
	position = endpos

	else:
	print("Skipping malformed number {}".format(substring))
	position = endpos
	return

	def read_media():
	global string, position, cur_collection, collected
	endpos = media_p()
	if endpos > -1:
	cur_collection = ["media", position, endpos]
	collected.append(cur_collection)
	position = endpos
	else:
	print(endpos)
	print("Error, malformed media declaration in {}".format(string[position:]))
	next_sem = string.find(";", position)
	if next_sem:
	position = next_sem
	else:
	position = string.find(" ", position)
	return

	def read_times():
	global string, position, cur_collection, collected
	endpos = times_p()
	if endpos:
	cur_collection = ["times", position, endpos]
	collected.append(cur_collection)
	position=endpos
	else:
	print("Error, malformed times declaration")
	position = string.find(" ", position)
	return

	times = ["first", "second", "third", "fourth", "fifth"]
	media = [ 'book', 'dr', 'manga', 'fullgame', 'game', 'lyric', 'subs', 'news', 'nico', 'sentences']
	states = ["Start", "Number", "Media", "Times", "Semicolon", "End"]

	# Space Hash Semicolon Number Other
	sm = [ [skip, 0, error, 0, error, 0, read_num, 1, error, 0], # Start
	[skip, 1, read_media, 2, error, 1, error, 1, error, 1], # Number
	[skip, 2, read_times, 3, skip, 0, error, 2, error, 2], # Media
	[skip, 3, error, 3, skip, 0, error, 3, error, 3], # Times
	[skip, 4, skip, 4, skip, 4, read_num, 1, skip, 4], # After semicolon
	]

	def process(incoming):
	global position, string, collected, state
	string = incoming
	position = 0
	state = 0
	collected = []
	while position < len(incoming):
	char_class = input_class(current_char())
	statefunc = sm[state][2*char_class]
	print("Character: {}, Class: {}, State: {}, State Function: {}".format(current_char(), char_class, state, statefunc))
	statefunc()
	state = sm[state][1+2*char_class]
	if (state > 3): break;
	return collected
No results found