Created
February 25, 2016 20:36
-
-
Save pawelmhm/4b0184e0cb97f2473af0 to your computer and use it in GitHub Desktop.
basic crawler
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/python3.5 | |
import asyncio | |
from aiohttp import ClientSession | |
class Crawler(object): | |
i = 1 | |
limit = 5 | |
url = "http://localhost:8080/{}" | |
def __init__(self, loop): | |
self.loop = loop | |
self.tasks = set() | |
def busy(self): | |
return self.i < self.limit | |
def tasks_remain(self): | |
print("tasks remaining {}".format(len(self.tasks))) | |
return len(self.tasks) != 0 | |
@asyncio.coroutine | |
def run(self): | |
while self.busy(): | |
yield from asyncio.sleep(0) | |
asyncio.Task(self.start()) | |
print("outside run") | |
while self.tasks_remain(): | |
yield from asyncio.sleep(0) | |
self.loop.stop() | |
@asyncio.coroutine | |
def start(self): | |
task = asyncio.Task(self.fetch(self.url.format(self.i))) | |
task.add_done_callback(self.print_response) | |
def remove_task(task): | |
print("about to remove task") | |
self.tasks.remove(task) | |
self.i += 1 | |
task.add_done_callback(remove_task) | |
self.tasks.add(task) | |
@asyncio.coroutine | |
def fetch(self, url): | |
with ClientSession() as session: | |
print("about to fetch... {}".format(url)) | |
response = yield from session.get(url) | |
response_body = yield from response.read() | |
return response_body | |
def print_response(self, response): | |
print("got response here {}".format(response.result())) | |
loop = asyncio.get_event_loop() | |
c = Crawler(loop) | |
asyncio.Task(c.run()) | |
loop.run_forever() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment