v2e4lisp · January 9, 2013 09:30
diff --git a/place.py b/place.py
 from bs4 import BeautifulSoup as Magic
 import urllib


 # urllib.urlretrieve("http://www.gunnerkrigg.com//comics/00000001.jpg",
 # "1.jpg")



 base = "http://www.gojapan.jp/toku3/"
 image_base = "http://www.gojapan.jp/toku3/aaimage/ken/"
 html = urllib.urlopen(base).read()
 soup = Magic(html)
 divs = soup.find_all(class_="right03")
 content = []

 def fix_encode (text):
    return unicode(text).encode('utf-8')

 def get_image(url, path):
    urllib.urlretrieve(url, path)


 for div in divs:
    links = div.find_all('a')
    for link in links:
        content.append( fix_encode(link.get_text()) )
        filename = link.get("href").split('/')[-1][:-5] + ".jpg"
        print ">",
        get_image( image_base + filename, filename )

 print '.'

 f = open("place", "w")
 f.write('\n'.join(content))
 print 'done'
	from bs4 import BeautifulSoup as Magic
	import urllib


	# urllib.urlretrieve("http://www.gunnerkrigg.com//comics/00000001.jpg",
	# "1.jpg")



	base = "http://www.gojapan.jp/toku3/"
	image_base = "http://www.gojapan.jp/toku3/aaimage/ken/"
	html = urllib.urlopen(base).read()
	soup = Magic(html)
	divs = soup.find_all(class_="right03")
	content = []

	def fix_encode (text):
	return unicode(text).encode('utf-8')

	def get_image(url, path):
	urllib.urlretrieve(url, path)


	for div in divs:
	links = div.find_all('a')
	for link in links:
	content.append( fix_encode(link.get_text()) )
	filename = link.get("href").split('/')[-1][:-5] + ".jpg"
	print ">",
	get_image( image_base + filename, filename )

	print '.'

	f = open("place", "w")
	f.write('\n'.join(content))
	print 'done'