haeric · January 15, 2014 07:07
diff --git a/gistfile1.py b/gistfile1.py
 data = """
 Title       : CAREER: Markov Chain Monte Carlo Methods
 Type: Award
 NSF Org     : CCR
 Latest
 Amendment
 Date        : May 5, 2003
 File        : a0237834

 Award Number: 0237834
 Award Instr.: Continuing grant
 Prgm Manager: Ding-Zhu Du
          CCR  DIV OF COMPUTER-COMMUNICATIONS RESEARCH
          CSE  DIRECT FOR COMPUTER & INFO SCIE & ENGINR
 Start Date  : August 1,  2003
 Expires     : May 31,  2008 (Estimated)
 Expected
 Total Amt.  : $400000             (Estimated)
 Investigator: Eric Vigoda [email protected]  (Principal Investigator current)
 Sponsor     : University of Chicago
          5801 South Ellis Avenue
          Chicago, IL  606371404    773/702-8602

 NSF Program : 2860      THEORY OF COMPUTING
 Fld Applictn:
 Program Ref : 1045,1187,9216,HPCC,
 Abstract    :

     Markov chain Monte Carlo (MCMC) methods are an important algorithmic
     device in a variety of fields.  This project studies techniques for rigorous
     analysis of the convergence properties of Markov chains.   The emphasis is on
     refining probabilistic, analytic and combinatorial tools (such as coupling,
     log-Sobolev, and canonical paths) to improve existing algorithms and develop
     efficient algorithms for important open problems.

     Problems arising in
     computer science, discrete mathematics, and physics are of particular interest,
     e.g., generating random colorings and independent sets of bounded-degree
     graphs, approximating the permanent, estimating the volume of a convex body,
     and sampling contingency tables.  The project also studies inherent connections
     between phase transitions in statistical physics models and convergence
     properties of associated Markov chains.

     The investigator is developing a
     new graduate course on MCMC methods."""

 result = {}
 current_key = []
 current_value = []

 def reset(result, current_key, current_value):
 	result[' '.join(current_key)] = (' '.join(current_value)).strip()
 	return [], []

 for line in data.split("\n"):

 	# Value continues on this line
 	if line.startswith(" ") or line == "":
 		current_value.append(line.strip())
 		continue

 	# Oh, the value ended here, lets reset the machine
 	if len(current_key) > 0:
 		current_key, current_value = reset(result, current_key, current_value)	

 	# Key spans multiple lines, this is a preceding line
 	if not ':' in line:
 		current_key.append(line.strip())

 	# Oh, this is a line with both a key and a value....
 	else:
 		split_on_colon = line.split(":")
 		current_key.append(split_on_colon[0].strip())
 		current_value += map(lambda x: x.strip(), split_on_colon[1:])

 reset(result, current_key, current_value)

 import pprint	
 print pprint.pprint(result)
	data = """
	Title : CAREER: Markov Chain Monte Carlo Methods
	Type: Award
	NSF Org : CCR
	Latest
	Amendment
	Date : May 5, 2003
	File : a0237834

	Award Number: 0237834
	Award Instr.: Continuing grant
	Prgm Manager: Ding-Zhu Du
	CCR DIV OF COMPUTER-COMMUNICATIONS RESEARCH
	CSE DIRECT FOR COMPUTER & INFO SCIE & ENGINR
	Start Date : August 1, 2003
	Expires : May 31, 2008 (Estimated)
	Expected
	Total Amt. : $400000 (Estimated)
	Investigator: Eric Vigoda [email protected] (Principal Investigator current)
	Sponsor : University of Chicago
	5801 South Ellis Avenue
	Chicago, IL 606371404 773/702-8602

	NSF Program : 2860 THEORY OF COMPUTING
	Fld Applictn:
	Program Ref : 1045,1187,9216,HPCC,
	Abstract :

	Markov chain Monte Carlo (MCMC) methods are an important algorithmic
	device in a variety of fields. This project studies techniques for rigorous
	analysis of the convergence properties of Markov chains. The emphasis is on
	refining probabilistic, analytic and combinatorial tools (such as coupling,
	log-Sobolev, and canonical paths) to improve existing algorithms and develop
	efficient algorithms for important open problems.

	Problems arising in
	computer science, discrete mathematics, and physics are of particular interest,
	e.g., generating random colorings and independent sets of bounded-degree
	graphs, approximating the permanent, estimating the volume of a convex body,
	and sampling contingency tables. The project also studies inherent connections
	between phase transitions in statistical physics models and convergence
	properties of associated Markov chains.

	The investigator is developing a
	new graduate course on MCMC methods."""

	result = {}
	current_key = []
	current_value = []

	def reset(result, current_key, current_value):
	result[' '.join(current_key)] = (' '.join(current_value)).strip()
	return [], []

	for line in data.split("\n"):

	# Value continues on this line
	if line.startswith(" ") or line == "":
	current_value.append(line.strip())
	continue

	# Oh, the value ended here, lets reset the machine
	if len(current_key) > 0:
	current_key, current_value = reset(result, current_key, current_value)

	# Key spans multiple lines, this is a preceding line
	if not ':' in line:
	current_key.append(line.strip())

	# Oh, this is a line with both a key and a value....
	else:
	split_on_colon = line.split(":")
	current_key.append(split_on_colon[0].strip())
	current_value += map(lambda x: x.strip(), split_on_colon[1:])

	reset(result, current_key, current_value)

	import pprint
	print pprint.pprint(result)