dumpmycode · March 29, 2016 08:06
diff --git a/names.py b/names.py
 #!/usr/bin/python
 # Copyright 2010 Google Inc.
 # Licensed under the Apache License, Version 2.0
 # http://www.apache.org/licenses/LICENSE-2.0

 # Google's Python Class
 # http://code.google.com/edu/languages/google-python-class/

 import sys
 import re

 """Baby Names exercise

 Define the extract_names() function below and change main()
 to call it.

 For writing regex, it's nice to include a copy of the target
 text for inspiration.

 Here's what the html looks like in the baby.html files:
 ...
 <h3 align="center">Popularity in 1990</h3>
 ....
 <tr align="right"><td>1</td><td>Michael</td><td>Jessica</td>
 <tr align="right"><td>2</td><td>Christopher</td><td>Ashley</td>
 <tr align="right"><td>3</td><td>Matthew</td><td>Brittany</td>
 ...

 Suggested milestones for incremental development:
  Extract all the text from the file and print it
  Find and extract the year and print it
  Extract the names and rank numbers and print them
  Get the names data into a dict and print it
  Build the [year, 'name rank', ... ] list and print it
  Fix main() to use the ExtractNames list
  
 def test():
    # this function uses 10k+ function calls. not a good way to iterate things.
    name_list = []
    with open(f) as fo:
        data = fo.readlines()
    for line in data:
        nrmatch = re.search(r'<td>(\w+)</td><td>(\w+)</td><td>(\w+)</td>', line)
        if nrmatch:
            name_list.append('{} {}'.format(nrmatch.group(2), nrmatch.group(1)))
            name_list.append('{} {}'.format(nrmatch.group(3), nrmatch.group(1)))
    name_list = sorted(name_list)
    name_list.insert(0, ''.join(re.findall(r'<h3 align="center">Popularity in (\w+)</h3>', ''.join(data))))
    print name_list[:10]

 def test():
    # this function uses 4k function calls.
    # less function calls, less resources wasted.
    name_list = []
    with open(f) as fo:
        data = fo.read()
    match = re.findall(r'<td>(\w+)</td><td>(\w+)</td><td>(\w+)</td>', data)
    for item in match:
        name_list.append('{} {}'.format(match[1], match[0]))
        name_list.append('{} {}'.format(match[3], match[0]))
    name_list.sort()
    name_list.insert(0, ''.join(re.findall(r'<h3 align="center">Popularity in (\w+)</h3>', data)))
 """

 def extract_names(filename):
    # +++your code here+++
    '''
    this function uses 2k function calls, tuple extraction ftw!
    read fileobj then search with regex pattern using findall
    which returns a list of tuples. extract tuples and assign it
    to name rank list.
    as the data is already sorted in ascending order, we can just
    put names in rankfile if name not in there yet, thereby easily
    removing any name duplicate.
    '''
    rankfile = []
    with open(filename) as fobj:
        data = fobj.read()
    year = re.search(r'Popularity in (\w+)', data).group(1)
    matchlist = re.findall(r'<td>(\w+)</td><td>(\w+)</td><td>(\w+)</td>', data)
    for line in matchlist:
        rank, male, female = line
        if (male not in rankfile) or (female not in rankfile):
            rankfile.append('{} {}'.format(male, rank))
            rankfile.append('{} {}'.format(female, rank))
    rankfile = sorted(rankfile)
    rankfile.insert(0, year)
    return(rankfile)
    
 def main():
  # This command-line parsing code is provided.
  # Make a list of command line arguments, omitting the [0] element
  # which is the script itself.
  args = sys.argv[1:]

  if not args:
    print 'usage: [--summaryfile] file [file ...]'
    sys.exit(1)

  # Notice the summary flag and remove it from args if it is present.
  summary = False
  if args[0] == '--summaryfile':
    summary = True
    del args[0]

    # +++your code here+++
    # For each filename, get the names, then either print the text output
    # or write it to a summary file
    for filename in args:
        mylist = extract_names(filename)
        if summary:
            with open(filename+'.summary', 'w') as fobj:
                fobj.write('\n'.join(mylist) + '\n')

 if __name__ == '__main__':
  main()
	#!/usr/bin/python
	# Copyright 2010 Google Inc.
	# Licensed under the Apache License, Version 2.0
	# http://www.apache.org/licenses/LICENSE-2.0

	# Google's Python Class
	# http://code.google.com/edu/languages/google-python-class/

	import sys
	import re

	"""Baby Names exercise

	Define the extract_names() function below and change main()
	to call it.

	For writing regex, it's nice to include a copy of the target
	text for inspiration.

	Here's what the html looks like in the baby.html files:
	...
	<h3 align="center">Popularity in 1990</h3>
	....
	<tr align="right"><td>1</td><td>Michael</td><td>Jessica</td>
	<tr align="right"><td>2</td><td>Christopher</td><td>Ashley</td>
	<tr align="right"><td>3</td><td>Matthew</td><td>Brittany</td>
	...

	Suggested milestones for incremental development:
	Extract all the text from the file and print it
	Find and extract the year and print it
	Extract the names and rank numbers and print them
	Get the names data into a dict and print it
	Build the [year, 'name rank', ... ] list and print it
	Fix main() to use the ExtractNames list

	def test():
	# this function uses 10k+ function calls. not a good way to iterate things.
	name_list = []
	with open(f) as fo:
	data = fo.readlines()
	for line in data:
	nrmatch = re.search(r'<td>(\w+)</td><td>(\w+)</td><td>(\w+)</td>', line)
	if nrmatch:
	name_list.append('{} {}'.format(nrmatch.group(2), nrmatch.group(1)))
	name_list.append('{} {}'.format(nrmatch.group(3), nrmatch.group(1)))
	name_list = sorted(name_list)
	name_list.insert(0, ''.join(re.findall(r'<h3 align="center">Popularity in (\w+)</h3>', ''.join(data))))
	print name_list[:10]

	def test():
	# this function uses 4k function calls.
	# less function calls, less resources wasted.
	name_list = []
	with open(f) as fo:
	data = fo.read()
	match = re.findall(r'<td>(\w+)</td><td>(\w+)</td><td>(\w+)</td>', data)
	for item in match:
	name_list.append('{} {}'.format(match[1], match[0]))
	name_list.append('{} {}'.format(match[3], match[0]))
	name_list.sort()
	name_list.insert(0, ''.join(re.findall(r'<h3 align="center">Popularity in (\w+)</h3>', data)))
	"""

	def extract_names(filename):
	# +++your code here+++
	'''
	this function uses 2k function calls, tuple extraction ftw!
	read fileobj then search with regex pattern using findall
	which returns a list of tuples. extract tuples and assign it
	to name rank list.
	as the data is already sorted in ascending order, we can just
	put names in rankfile if name not in there yet, thereby easily
	removing any name duplicate.
	'''
	rankfile = []
	with open(filename) as fobj:
	data = fobj.read()
	year = re.search(r'Popularity in (\w+)', data).group(1)
	matchlist = re.findall(r'<td>(\w+)</td><td>(\w+)</td><td>(\w+)</td>', data)
	for line in matchlist:
	rank, male, female = line
	if (male not in rankfile) or (female not in rankfile):
	rankfile.append('{} {}'.format(male, rank))
	rankfile.append('{} {}'.format(female, rank))
	rankfile = sorted(rankfile)
	rankfile.insert(0, year)
	return(rankfile)

	def main():
	# This command-line parsing code is provided.
	# Make a list of command line arguments, omitting the [0] element
	# which is the script itself.
	args = sys.argv[1:]

	if not args:
	print 'usage: [--summaryfile] file [file ...]'
	sys.exit(1)

	# Notice the summary flag and remove it from args if it is present.
	summary = False
	if args[0] == '--summaryfile':
	summary = True
	del args[0]

	# +++your code here+++
	# For each filename, get the names, then either print the text output
	# or write it to a summary file
	for filename in args:
	mylist = extract_names(filename)
	if summary:
	with open(filename+'.summary', 'w') as fobj:
	fobj.write('\n'.join(mylist) + '\n')

	if __name__ == '__main__':
	main()