Skip to content

Instantly share code, notes, and snippets.

@bussiere
Created October 26, 2010 20:24
Show Gist options
  • Save bussiere/647715 to your computer and use it in GitHub Desktop.
Save bussiere/647715 to your computer and use it in GitHub Desktop.
question ?
# -*- coding: utf-8 -*-
import urllib
import re
def ouvrirpage(num,question,opener,url):
f = opener.open(url)
page = f.read()
match = re.search("""<a class="navPages".*>%i<\/a>"""%num, page)
if match :
s = match.start()
e = match.end()
url = page[s:e]
url = url.replace("""<a class=\"navPages" href=\"""","")
url = url.replace("""">%i</a>"""%num,"")
for match in re.findall(">{1}.*?\?<{1}",page):
q = match.split("<br />")
q = q[len(q)-1]
q = q.replace(">","")
q = q.replace("<","")
if q not in question :
question += q
print num
question = ouvrirpage(num+1,question,opener,url)
else :
return question
opener = urllib.FancyURLopener({})
question = ouvrirpage(132,"",opener,"http://suzumiya.haruhi.fr/index.php/topic,817.0.html")
questiontxt = open('question.txt', 'w')
questiontxt.write(question)
questiontxt.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment