Skip to content

Instantly share code, notes, and snippets.

@HeartSaVioR
Last active December 21, 2015 01:38
Show Gist options
  • Save HeartSaVioR/6228652 to your computer and use it in GitHub Desktop.
Save HeartSaVioR/6228652 to your computer and use it in GitHub Desktop.
import urllib
from time import sleep
url_template = "https://m.mypeople.daum.net/img/i1.daumcdn.net/air21.sticker/pc/still_img/sticker_%03d.png"
start_range=0
end_range=1500 # 2013/08/14 last id : 1457
sleep_for_crawl_success = 1
sleep_for_crawl_fail = 2
for sticker_id in range(start_range, end_range+1):
url = url_template % sticker_id
filename = url.split('/')[-1]
success = False
print "about to download %s -> %s" % (url, filename)
try:
http = urllib.urlopen(url)
if http.getcode() != 200:
print "status code is not 200"
elif not http.headers.has_key('content-type') or http.headers.get('content-type') != "image/png":
print "content-type not found or not image/png"
else:
with open(filename, 'w') as fw:
fw.write(http.read())
success = True
except:
print "cannot download %s" % url
finally:
if success:
sleep(sleep_for_crawl_success)
else:
sleep(sleep_for_crawl_fail)
print "end process..."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment