chesster · January 3, 2014 22:27
diff --git a/filmweb.py b/filmweb.py
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-

 import urllib
 import sys
 import os
 import re

 import unicodecsv as csv
 from BeautifulSoup import BeautifulSoup

 try:
 	from cStringIO import StringIO
 except:
 	from StringIO import StringIO


 class FilmwebTitile(object):

 	def __init__(self, start=1):
 		self.url   = "http://www.filmweb.pl/Film?id=%s"
 		self.start = start
 		self.stop  = 700000


 	def get_info(self, id):
 		try:
 			soup   = BeautifulSoup(urllib.urlopen(self.url % id).read())
 			titles = soup.find('div', attrs = {'class': 'filmTitle'})

 			# Title
 			try:
 				title = titles.h1.text
 			except Exception:
 				title = None

 			# Original Title
 			try:
 				original_title = titles.h2.text
 			except Exception:
 				original_title = None

 			# Year
 			try:
 				year = titles.span.text.strip()[1:-1]
 			except Exception:
 				year = ''

 			# Country
 			try:
 				countries = soup.findAll('a', href = re.compile(r'.*countryIds*'))
 				countries = '|'.join([e.text for e in countries])
 			except Exception:
 				country = None

 			if title and original_title and year and countries:
 				return (title, original_title, year, countries,)
 			return None

 		except Exception, e:
 			print("[ERROR]: %s" % e)
 			return None


 	def go(self, db_file):
 		with open(db_file, "a") as myfile:
 			for id in range(self.start, self.stop+1):
 				csv_list = self.get_info(id)
 				if csv_list:
 					print "[%s] %s" %  (id, csv_list[0])
 					myfile.write(self.csv_row(csv_list))


 	def csv_row(self, row):
 		csvfile   = StringIO()
 		csvwriter = csv.writer(csvfile)
 		csvwriter.writerow(row)
 		return csvfile.getvalue()

 if __name__ == '__main__':
 	filmweb = FilmwebTitile()
 	filmweb.go('filmweb.txt')
	#!/usr/bin/env python
	# -- coding: utf-8 --

	import urllib
	import sys
	import os
	import re

	import unicodecsv as csv
	from BeautifulSoup import BeautifulSoup

	try:
	from cStringIO import StringIO
	except:
	from StringIO import StringIO


	class FilmwebTitile(object):

	def __init__(self, start=1):
	self.url = "http://www.filmweb.pl/Film?id=%s"
	self.start = start
	self.stop = 700000


	def get_info(self, id):
	try:
	soup = BeautifulSoup(urllib.urlopen(self.url % id).read())
	titles = soup.find('div', attrs = {'class': 'filmTitle'})

	# Title
	try:
	title = titles.h1.text
	except Exception:
	title = None

	# Original Title
	try:
	original_title = titles.h2.text
	except Exception:
	original_title = None

	# Year
	try:
	year = titles.span.text.strip()[1:-1]
	except Exception:
	year = ''

	# Country
	try:
	countries = soup.findAll('a', href = re.compile(r'.countryIds'))
	countries = '\|'.join([e.text for e in countries])
	except Exception:
	country = None

	if title and original_title and year and countries:
	return (title, original_title, year, countries,)
	return None

	except Exception, e:
	print("[ERROR]: %s" % e)
	return None


	def go(self, db_file):
	with open(db_file, "a") as myfile:
	for id in range(self.start, self.stop+1):
	csv_list = self.get_info(id)
	if csv_list:
	print "[%s] %s" % (id, csv_list[0])
	myfile.write(self.csv_row(csv_list))


	def csv_row(self, row):
	csvfile = StringIO()
	csvwriter = csv.writer(csvfile)
	csvwriter.writerow(row)
	return csvfile.getvalue()

	if __name__ == '__main__':
	filmweb = FilmwebTitile()
	filmweb.go('filmweb.txt')