Created
January 9, 2013 09:29
-
-
Save v2e4lisp/4491856 to your computer and use it in GitHub Desktop.
meibutu.name
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup as Magic | |
import urllib | |
# urllib.urlretrieve("http://www.gunnerkrigg.com//comics/00000001.jpg", | |
# "1.jpg") | |
base = "http://www.gojapan.jp/toku3/" | |
html = urllib.urlopen(base).read() | |
soup = Magic(html) | |
divs = soup.find_all(class_="top02") | |
content = [] | |
def fix_encode (text): | |
return unicode(text).encode('utf-8') | |
def get_image(url, path): | |
urllib.urlretrieve(url, path) | |
for div in divs: | |
links = div.find_all('a') | |
for link in links: | |
content.append( fix_encode(link.get_text()) ) | |
f = open("meibutu", "w") | |
f.write('\n'.join(content)) | |
print 'done' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment