Skip to content

Instantly share code, notes, and snippets.

@fahadysf
Created February 26, 2017 22:42
Show Gist options
  • Save fahadysf/0814db1a183e637a249308fa8021fcd8 to your computer and use it in GitHub Desktop.
Save fahadysf/0814db1a183e637a249308fa8021fcd8 to your computer and use it in GitHub Desktop.
#!/usr/bin/python
from multiprocessing import Pool, Manager
from http.server import BaseHTTPRequestHandler, HTTPServer
import urllib.request
import json
import cgi
import random, time
# Resources to read
#
# http://stackoverflow.com/a/1239252/603280
# http://stackoverflow.com/questions/13689927/how-to-get-the-amount-of-work-left-to-be-done-by-a-python-multiprocessing-pool
#
# This is your task which will run its its own process. You can modify it to have your desired args and kwargs and
# report back to the manager dictionary 'd' or any other manager resource as necessary.
# Read the documentation for multiprocess to find the facilities provided by multiprocess.Manager
def task(d, taskname, url_to_test):
"""
This function will check if a URL from a list is alive
"""
response = urllib.request.urlopen(url_to_test)
d[taskname] = {
'status': "running",
'url': url_to_test,
}
data = response.read()
if len(data):
d[taskname] = {
'status': "finished",
'url': url_to_test,
'http_status': response.status,
'data': data,
}
return
if __name__ == '__main__':
# Initializing our global resources.
# Run 2 tasks at a time
MAX_CONCURRENT_TASKS = 2
p = Pool(processes=MAX_CONCURRENT_TASKS)
m = Manager()
d = m.dict()
finished_tasks = dict()
urls = ['http://ip4.me', 'http://bbc.com', 'http://onedrive.com', 'http://monopoly.com', 'http://teamliquid.net']
# Launch the tasks
for i,url in enumerate(urls):
result = p.apply_async(task, (d, "task-"+str(i), url))
# Print the status of the tasks every second
pending_work = True
while pending_work:
for key in d.keys():
if d[key]['status'] == "running":
print("%s has status %s - URL: %s" % (key, d[key]['status'], d[key]['url']))
else:
print("%s has status %s - URL: %s - Response Code: %s" % (key, d[key]['status'], d[key]['url'], d[key]['http_status']))
time.sleep(0.1)
if d[key]['status'] == 'finished':
finished_tasks[key] = d[key]
# If there are as many tasks in the status dictionary as there are URLs in the array
# and all of them have status finished. Break the loop and exit
if len(finished_tasks) == len(urls):
pending_work = False
print("Finished! Exiting now.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment