leonardreidy · June 25, 2013 23:14
diff --git a/extract-contacts b/extract-contacts
 from bs4 import BeautifulSoup

 def preproc(infile, outfile):

  #open input file for reading
  file = open(infile, 'r')

  #create BeautifulSoup object with the file contents
  soup = BeautifulSoup(file)

  #use 'with' syntax to temporarily open the outfile
  #this way, the interpreter takes care of closing/flushing
  #the file afterwards
  with open(outfile, 'w') as file:

  #find the h2 with the school title and write it
  file.write(soup('h2')[0].string.encode('utf-8')+",")

  #iterate through the soup of <tr> tags
  for i in soup('tr'):

  #drill down to the contents of each i
    for j in i: 

  #to avoid throwing errors with NoneTypes
  #write to file only if the item of interest is not an empty tag
    if j.string != None:
      file.write(j.string.encode('utf-8')+",")
	from bs4 import BeautifulSoup

	def preproc(infile, outfile):

	#open input file for reading
	file = open(infile, 'r')

	#create BeautifulSoup object with the file contents
	soup = BeautifulSoup(file)

	#use 'with' syntax to temporarily open the outfile
	#this way, the interpreter takes care of closing/flushing
	#the file afterwards
	with open(outfile, 'w') as file:

	#find the h2 with the school title and write it
	file.write(soup('h2')[0].string.encode('utf-8')+",")

	#iterate through the soup of <tr> tags
	for i in soup('tr'):

	#drill down to the contents of each i
	for j in i:

	#to avoid throwing errors with NoneTypes
	#write to file only if the item of interest is not an empty tag
	if j.string != None:
	file.write(j.string.encode('utf-8')+",")
No results found