Skip to content

Instantly share code, notes, and snippets.

@v2e4lisp
Created January 9, 2013 09:29
Show Gist options
  • Save v2e4lisp/4491856 to your computer and use it in GitHub Desktop.
Save v2e4lisp/4491856 to your computer and use it in GitHub Desktop.
meibutu.name
from bs4 import BeautifulSoup as Magic
import urllib
# urllib.urlretrieve("http://www.gunnerkrigg.com//comics/00000001.jpg",
# "1.jpg")
base = "http://www.gojapan.jp/toku3/"
html = urllib.urlopen(base).read()
soup = Magic(html)
divs = soup.find_all(class_="top02")
content = []
def fix_encode (text):
return unicode(text).encode('utf-8')
def get_image(url, path):
urllib.urlretrieve(url, path)
for div in divs:
links = div.find_all('a')
for link in links:
content.append( fix_encode(link.get_text()) )
f = open("meibutu", "w")
f.write('\n'.join(content))
print 'done'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment