NaPs · December 29, 2008 11:34
diff --git a/allocine.py b/allocine.py
 #!/usr/bin/env python
 #coding=utf8

 import urllib2
 import re

 MOVIE_SEARCH_URL = 'http://www.allocine.fr/recherche/?motcle=__SEARCH__&rub=1'
 MOVIE_DETAIL_URL = 'http://www.allocine.fr/film/fichefilm_gen_cfilm=__ID__.html'
 SHOW_SEARCH_URL = 'http://www.allocine.fr/recherche/?motcle=__SEARCH__&rub=6'
 SHOW_DETAIL_URL = 'http://www.allocine.fr/series/ficheserie_gen_cserie=__ID__.html'

 REGEX_LINKTOFILM = re.compile(r'(?i)<a href="\/film\/fichefilm_gen_cfilm=(\d+).html" class="link(\d+)">(.*?)<\/a>')
 REGEX_LINKTOSERIE = re.compile(r'(?i)<a href="\/series\/ficheserie_gen_cserie=(\d+).html" class="link(\d+)">(.*?)<\/a>')

 class Allocine:
 	''''''
 	
 	@staticmethod
 	def find_movie(search, debug=False):
 		''''''
 		search = search.replace(' ', '+')
 		str = urllib2.urlopen(MOVIE_SEARCH_URL.replace('__SEARCH__', search)).read()
 		data = str.decode('latin1')
 		films = {}
 		for id, klass, name in REGEX_LINKTOFILM.findall(data):
 			name = re.sub(r'<(.+?)>', '', name).strip()
 			films[id] = name
 		
 		return films

 	@staticmethod
 	def find_show(search, debug=False):
 		''''''
 		search = search.replace(' ', '+')
 		str = urllib2.urlopen(SHOW_SEARCH_URL.replace('__SEARCH__', search)).read()
 		data = str.decode('latin1')
 		films = {}
 		for id, klass, name in REGEX_LINKTOSERIE.findall(data):
 			name = re.sub(r'<(.+?)>', '', name).strip()
 			films[id] = name
 		
 		return films

 class AllocineMovie:
 	''''''
 	
 	REGEXPS = {
 		'title': re.compile(r'(?m)<title>(.*?)<\/title>'),
 		'directors': re.compile(r'(?m)<h4>R.alis. par <a .*?>(.*?)<\/a><\/h4>'),
 		'nat': re.compile(r'(?m)<h4>Film (.*?).&nbsp;</h4>'),
 		'genres': re.compile(r'(?m)<h4>Genre : (.*?)</h4>'),
 		'out_date': re.compile(r'(?m)<h4>Date de sortie : <b>(.*?)</b>'),
 		'duree': re.compile(r'(?m)<h4>Dur.e : (.*?).&nbsp;</h4>'),
 		'production_date': re.compile(r'(?m)<h4>Ann.e de production : (.*?)</h4>'),
 		'original_title': re.compile(r'(?m)<h4>Titre original : <i>(.*?)</i></h4>'),
 		'actors': re.compile(r'(?m)<h4>Avec (.*?) &nbsp;&nbsp;'),
 		'synopsis': re.compile(r'(?m)<td valign="top" style="padding:10 0 0 0"><div align="justify"><h4>(.*?)</h4>'),
 		'image': re.compile(r'(?m)<td valign="top" width="120".*?img src="(.*?)" border="0" alt="" class="affichette" />'),
 		'interdit': re.compile(r'(?m)<h4 style="color: #D20000;">Interdit(.*?)</h4>'),
 	}

 	def __init__(self, id, debug=False):
 		if debug: print 'Getting %s' % MOVIE_DETAIL_URL.replace('__ID__', id)
 		str = urllib2.urlopen(MOVIE_DETAIL_URL.replace('__ID__', id)).read()
 		data = str.decode('latin1')
 		for regex_name, regex in self.REGEXPS.items():
 			if debug: print '%s: ' % regex_name,
 			r = regex.search(data)
 			if r:
 				r = re.sub(r'<.*?>', '', r.groups()[0]).strip()
 			setattr(self, regex_name, r)
 			if debug: print r

 class AllocineShow:
 	''''''
 	
 	REGEXPS = {
 		'title': re.compile(r'(?m)<title>(.*?)<\/title>'),
 		'producters': re.compile(r'(?m)<h4>Producteurs : (.*?)</h4>'),
 		'created_by': re.compile(r'(?m)<h4>Série créée par <a .*?>(.*?)</a>'),
 		'nat': re.compile(r'(?m)<span style=\'font-weight:bold\'>Nationalit.</span> : (.*?)</h5>'),
 		'genres': re.compile(r'(?m)<span style=\'font-weight:bold\'>Genre</span> : (.*?)&nbsp;&nbsp;'),
 		'duree': re.compile(r'(?m)<span style=\'font-weight:bold\'>Format</span> : (.+?).&nbsp;'),
 		'original_title': re.compile(r'(?m)<h4><b>Titre original : </b></h4><h4 style="color:#D20000"><b>(.*?)</b></h4>'),
 		'actors': re.compile(r'(?m)<h4>Avec : (.*?)&nbsp;&nbsp;'),
 		'synopsis': re.compile(r'(?m)<h5><span style=\'font-weight:bold\'>Synopsis</span>&nbsp;&nbsp;&nbsp;.*?<br />(.*?)</h5>'),
 		'image': re.compile(r'(?m)<td><div id=\'divM\' .*?><img src=\'(.*?)\' style=\'border:1px solid black;.*?>'),
 	}

 	def __init__(self, id, debug=False):
 		if debug: print 'Getting %s' % SHOW_DETAIL_URL.replace('__ID__', id)
 		str = urllib2.urlopen(SHOW_DETAIL_URL.replace('__ID__', id)).read()
 		data = str.decode('latin1')
 		for regex_name, regex in self.REGEXPS.items():
 			if debug: print '%s: ' % regex_name,
 			r = regex.search(data)
 			if r:
 				r = re.sub(r'<.*?>', '', r.groups()[0]).strip()
 			setattr(self, regex_name, r)
 			if debug: print r
	#!/usr/bin/env python
	#coding=utf8

	import urllib2
	import re

	MOVIE_SEARCH_URL = 'http://www.allocine.fr/recherche/?motcle=__SEARCH__&rub=1'
	MOVIE_DETAIL_URL = 'http://www.allocine.fr/film/fichefilm_gen_cfilm=__ID__.html'
	SHOW_SEARCH_URL = 'http://www.allocine.fr/recherche/?motcle=__SEARCH__&rub=6'
	SHOW_DETAIL_URL = 'http://www.allocine.fr/series/ficheserie_gen_cserie=__ID__.html'

	REGEX_LINKTOFILM = re.compile(r'(?i)<a href="\/film\/fichefilm_gen_cfilm=(\d+).html" class="link(\d+)">(.*?)<\/a>')
	REGEX_LINKTOSERIE = re.compile(r'(?i)<a href="\/series\/ficheserie_gen_cserie=(\d+).html" class="link(\d+)">(.*?)<\/a>')

	class Allocine:
	''''''

	@staticmethod
	def find_movie(search, debug=False):
	''''''
	search = search.replace(' ', '+')
	str = urllib2.urlopen(MOVIE_SEARCH_URL.replace('__SEARCH__', search)).read()
	data = str.decode('latin1')
	films = {}
	for id, klass, name in REGEX_LINKTOFILM.findall(data):
	name = re.sub(r'<(.+?)>', '', name).strip()
	films[id] = name

	return films

	@staticmethod
	def find_show(search, debug=False):
	''''''
	search = search.replace(' ', '+')
	str = urllib2.urlopen(SHOW_SEARCH_URL.replace('__SEARCH__', search)).read()
	data = str.decode('latin1')
	films = {}
	for id, klass, name in REGEX_LINKTOSERIE.findall(data):
	name = re.sub(r'<(.+?)>', '', name).strip()
	films[id] = name

	return films

	class AllocineMovie:
	''''''

	REGEXPS = {
	'title': re.compile(r'(?m)<title>(.*?)<\/title>'),
	'directors': re.compile(r'(?m)<h4>R.alis. par <a .?>(.?)<\/a><\/h4>'),
	'nat': re.compile(r'(?m)<h4>Film (.*?). </h4>'),
	'genres': re.compile(r'(?m)<h4>Genre : (.*?)</h4>'),
	'out_date': re.compile(r'(?m)<h4>Date de sortie : <b>(.*?)</b>'),
	'duree': re.compile(r'(?m)<h4>Dur.e : (.*?). </h4>'),
	'production_date': re.compile(r'(?m)<h4>Ann.e de production : (.*?)</h4>'),
	'original_title': re.compile(r'(?m)<h4>Titre original : <i>(.*?)</i></h4>'),
	'actors': re.compile(r'(?m)<h4>Avec (.*?)   '),
	'synopsis': re.compile(r'(?m)<td valign="top" style="padding:10 0 0 0"><div align="justify"><h4>(.*?)</h4>'),
	'image': re.compile(r'(?m)<td valign="top" width="120".?img src="(.?)" border="0" alt="" class="affichette" />'),
	'interdit': re.compile(r'(?m)<h4 style="color: #D20000;">Interdit(.*?)</h4>'),
	}

	def __init__(self, id, debug=False):
	if debug: print 'Getting %s' % MOVIE_DETAIL_URL.replace('__ID__', id)
	str = urllib2.urlopen(MOVIE_DETAIL_URL.replace('__ID__', id)).read()
	data = str.decode('latin1')
	for regex_name, regex in self.REGEXPS.items():
	if debug: print '%s: ' % regex_name,
	r = regex.search(data)
	if r:
	r = re.sub(r'<.*?>', '', r.groups()[0]).strip()
	setattr(self, regex_name, r)
	if debug: print r

	class AllocineShow:
	''''''

	REGEXPS = {
	'title': re.compile(r'(?m)<title>(.*?)<\/title>'),
	'producters': re.compile(r'(?m)<h4>Producteurs : (.*?)</h4>'),
	'created_by': re.compile(r'(?m)<h4>Série créée par <a .?>(.?)</a>'),
	'nat': re.compile(r'(?m)<span style=\'font-weight:bold\'>Nationalit.</span> : (.*?)</h5>'),
	'genres': re.compile(r'(?m)<span style=\'font-weight:bold\'>Genre</span> : (.*?)  '),
	'duree': re.compile(r'(?m)<span style=\'font-weight:bold\'>Format</span> : (.+?). '),
	'original_title': re.compile(r'(?m)<h4><b>Titre original : </b></h4><h4 style="color:#D20000"><b>(.*?)</b></h4>'),
	'actors': re.compile(r'(?m)<h4>Avec : (.*?)  '),
	'synopsis': re.compile(r'(?m)<h5><span style=\'font-weight:bold\'>Synopsis</span>   .?<br />(.?)</h5>'),
	'image': re.compile(r'(?m)<td><div id=\'divM\' .?><img src=\'(.?)\' style=\'border:1px solid black;.*?>'),
	}

	def __init__(self, id, debug=False):
	if debug: print 'Getting %s' % SHOW_DETAIL_URL.replace('__ID__', id)
	str = urllib2.urlopen(SHOW_DETAIL_URL.replace('__ID__', id)).read()
	data = str.decode('latin1')
	for regex_name, regex in self.REGEXPS.items():
	if debug: print '%s: ' % regex_name,
	r = regex.search(data)
	if r:
	r = re.sub(r'<.*?>', '', r.groups()[0]).strip()
	setattr(self, regex_name, r)
	if debug: print r
No results found