Created
January 19, 2015 19:33
-
-
Save jackyyf/0c74113d11508bf0b02a to your computer and use it in GitHub Desktop.
Gist by paste.py @ 2015-01-20 03:33:37.188661
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- encoding: utf-8 -*- | |
# Author: Yifu Yu <[email protected]> | |
from tornado.httpclient import * | |
from tornado.ioloop import * | |
import re | |
import os.path | |
import sys | |
img_patt = re.compile(r'"(http:\/\/.*\.(?:jpg|gif|png))"') | |
class FileHandler(object): | |
def __init__(self, filename, dirname='.'): | |
self.filename = filename | |
self.f = open(dirname + '/' + filename, 'wb') | |
def write(self, chunked): | |
print >>sys.stderr, 'Writed %d bytes to %s' % (len(chunked), self.filename) | |
self.f.write(chunked) | |
def close(self, *args, **kwargs): | |
print >>sys.stderr, 'Finished file %s' % self.filename | |
self.f.close() | |
global req | |
req -= 1 | |
if req == 0: | |
IOLoop.current().stop() | |
AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient") | |
data = HTTPClient().fetch('http://www.bilibili.com/').body.split('\n') | |
fetcher = AsyncHTTPClient() | |
req = 0 | |
for line in data: | |
res = img_patt.search(line) | |
if res: | |
url = res.group(1) | |
name = os.path.basename(url) | |
obj = FileHandler(name, './storage') | |
fetcher.fetch(HTTPRequest(url, streaming_callback=obj.write), obj.close) | |
req += 1 | |
IOLoop.current().start() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment