Skip to content

Instantly share code, notes, and snippets.

@v2e4lisp
Created January 9, 2013 09:30
Show Gist options
  • Save v2e4lisp/4491861 to your computer and use it in GitHub Desktop.
Save v2e4lisp/4491861 to your computer and use it in GitHub Desktop.
find place info
from bs4 import BeautifulSoup as Magic
import urllib
# urllib.urlretrieve("http://www.gunnerkrigg.com//comics/00000001.jpg",
# "1.jpg")
base = "http://www.gojapan.jp/toku3/"
image_base = "http://www.gojapan.jp/toku3/aaimage/ken/"
html = urllib.urlopen(base).read()
soup = Magic(html)
divs = soup.find_all(class_="right03")
content = []
def fix_encode (text):
return unicode(text).encode('utf-8')
def get_image(url, path):
urllib.urlretrieve(url, path)
for div in divs:
links = div.find_all('a')
for link in links:
content.append( fix_encode(link.get_text()) )
filename = link.get("href").split('/')[-1][:-5] + ".jpg"
print ">",
get_image( image_base + filename, filename )
print '.'
f = open("place", "w")
f.write('\n'.join(content))
print 'done'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment