Created
October 16, 2012 12:02
-
-
Save ncoghlan/3898874 to your computer and use it in GitHub Desktop.
Tentative Python 3.4 async example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from concurrent.futures import Future | |
# This first part is a helper function to wait for the first Future in a group to complete | |
def _wait_first(futures): | |
# futures must be a set as items will be removed as they complete | |
# we create a signalling future to return to our caller. We will copy | |
# the result of the first future to complete to this signalling future | |
signal = Future() | |
signal.completed = None | |
def copy_result(completed): | |
# We ignore every callback after the first one | |
if signal.done(): | |
return | |
# Keep track of which ones have been processed across multiple calls | |
futures.remove(completed) | |
# And report the underlying future which completed | |
signal.completed = completed | |
# It would be nice if we could also remove our callback from all the other futures at | |
# this point, but the Future API doesn't currently allow that | |
# Now we pass the result of this future through to our signalling future | |
signal.completed_future = completed | |
if completed.cancelled(): | |
signal.cancel() | |
signal.set_running_or_notify_cancel() | |
else: | |
try: | |
result = completed.result() | |
except Exception as exc: | |
signal.set_exception(exc) | |
else: | |
signal.set_result(result) | |
# Here we hook our signalling future up to all our actual operations | |
# If any of them are already complete, then the callback will fire immediately | |
# and we're OK with that | |
for f in futures: | |
f.add_done_callback(copy_result) | |
# And, for our signalling future to be useful, the caller must be able to access it | |
return signal | |
# This is just a public version of the above helper that works with arbitrary iterables: | |
def wait_first(futures): | |
# Helper needs a real set, so we give it one | |
# Also makes sure all operations start immediately when passed a generator | |
return _wait_first(set(futures)) | |
# This is the API I'm most interested in, as it's the async equivalent of | |
# http://docs.python.org/py3k/library/concurrent.futures#concurrent.futures.as_completed, | |
# which powers the URL retrieval example in the docs: | |
# http://docs.python.org/py3k/library/concurrent.futures#threadpoolexecutor-example | |
# Note that this is an *ordinary iterator* that produces futures, not a tasklet | |
def as_completed(futures): | |
# We ensure all the operations have started, and get ourselves a set to work with | |
remaining = set(futures) | |
while remaining: | |
# The trick here is that we *don't yield the original futures directly* | |
# Instead, we yield the signalling Futures created by _wait_first | |
# We do end up with a bunch of useless callbacks registered since there's no | |
# way to say "we're not interested any more" :( | |
yield _wait_first(remaining) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Now for the classic "load multiple pages in parallel" async IO example | |
# First, a tasklet for loading a single page | |
@task | |
def load_url_async(url) | |
# The async URL open operation does three things: | |
# 1. kicks off the connection process | |
# 2. registers a callback with the event handler that will signal a Future object when IO is complete | |
# 3. returns the future object | |
# We then *yield* the Future object, at which point the task decorator takes over and registers a callback | |
# with the *Future* object to resume this generator with the *result* that was passed to the Future object | |
conn = yield urllib.urlopen_async(url) | |
# We assume "conn.read()" is defined in such a way that it allows both "read everything at once" usage *and* a | |
# usage where you read the individual bits of data as they arrive like this: | |
# for wait_for_chunk in conn.read(): | |
# chunk = yield wait_for_chunk | |
# The secret is that conn.read() would be an *ordinary generator* in that case rather than a tasklet. | |
# You could also do a version that *only* supported complete reads, in which case the "from" wouldn't be needed | |
return yield from conn.read() | |
# And now the payoff: defining a tasklet to read a bunch of URLs in parallel, processing them in the order of loading rather than the order of requesting them or having to wait until the slowest load completes before doing anything | |
@task | |
def example(urls): | |
# We define the tasks we want to run based on the given URLs | |
# This results in an iterable of Future objects that will fire when | |
# the associated page has been read completely | |
future_to_url = {load_url_async(url):url for url in urls} | |
# And now we use our helper iterable to run things in parallel | |
# and get access to the results as they complete | |
for wait_for_page in as_completed(future_to_url): | |
# At this point, wait_for_page.completed is still None | |
# It will be set once we resume after the yield | |
try: | |
data = yield wait_for_page | |
except Exception as exc: | |
url = future_to_url[wait_for_page.completed] | |
print("Something broke for {!r} ({}: {})".format(url, type(exc), exc)) | |
else: | |
url = future_to_url[wait_for_page.completed] | |
print("Loaded {} bytes from {!r}".format(len(data), url)) | |
# The real kicker here? Replace "yield wait_for_page" with "wait_for_page.result()" and "future_to_url[wait_for_page.completed]" with "future_to_url[wait_for_page]" and you have the equivalent concurrent.futures code. The difference is that with the concurrent.futures Executor model, as_completed is a *blocking* iterator, so it knows which underlying future to pass back on each iteration. For async operation, we instead pass back a *new* Future on each iteration, which then needs to be yielded in order to say "tell me when the next operation is complete". |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment