Created
August 11, 2011 18:43
-
-
Save capttwinky/1140398 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
from Queue import Queue | |
from threading import Thread | |
from urllib2 import urlopen | |
from time import time | |
from random import shuffle | |
hosts = ["yahoo.com", "google.com", "amazon.com","ibm.com", "apple.com", | |
"bbc.co.uk", "npr.org", "cnn.com", "ubuntu.com", "soundcloud.com", | |
"bogus.url",'ebay.com','linux.org','osuosl.org','renren.com','alibaba.com', | |
"dell.com","senate.gov","microsoft.com","bing.com","ideamerge.com", | |
"nasa.gov",'archive.org',"wikipedia.com","python.org","aol.com","trimet.org", | |
"arstechnica.com","redit.com","steinbarts.com","hulu.com","itunes.com", | |
"att.com","usbank.com","gmail.com","etsy.com",'facebook.com','opb.org', | |
"mtv.com","wwf.org","sony.com","parthenonsoftware.com","co-op.org"] | |
totalLen = max([len(host) for host in hosts])+1 | |
shuffle(hosts) | |
class ThreadUrl(Thread): | |
"""Threaded Url Grab""" | |
def __init__(self, queue): | |
Thread.__init__(self) | |
self.queue = queue | |
def run(self): | |
while True: | |
errorOut = False | |
host = self.queue.get() | |
try: | |
url = urlopen("http://%s"%host) | |
url.read(1024) | |
except Exception as e: | |
errorOut = str(e)[:35] | |
self.queue.task_done() | |
q_sum.put((time()-start, (host, (time()-start), errorOut))) | |
class mySummer(Thread): | |
def __init__(self, queue): | |
Thread.__init__(self) | |
self.queue = q_sum | |
self.lstOut = [] | |
self.lstTotal = [] | |
def run(self): | |
while True: | |
myNum, tplOut = self.queue.get() | |
self.lstTotal.append(myNum) | |
self.lstOut.append(tplOut) | |
self.queue.task_done() | |
def total(self, intEnd=False): | |
if not intEnd: intEnd = len(self.lstTotal) | |
return sum(self.lstTotal[len(self.lstTotal)-intEnd:]) | |
def doIt(intWorkers): | |
#spawning pools of threads, and passing them the q_url instance | |
for i in range(intWorkers): | |
t = ThreadUrl(q_url) | |
t.setDaemon(True) | |
t.start() | |
#populate queue with data | |
for host in hosts: | |
q_url.put(host) | |
#now that is going, we only need one thread for summing | |
mySum = mySummer(q_sum) | |
mySum.setDaemon(True) | |
mySum.start() | |
#wait on the queues until everything has been processed | |
q_url.join() | |
q_sum.join() | |
return mySum.total(intWorkers), mySum.lstOut | |
#build the Queues | |
q_url = Queue() | |
q_sum = Queue() | |
#myWorkers = 1 #single threaded! | |
myWorkers = len(hosts) | |
#myWorkers = 5 | |
print("Fetching %i URLS with %i workers:"%(len(hosts),myWorkers)) | |
start = time() | |
lTime, lstOut = doIt(myWorkers) | |
eTime = time() - start | |
for (host, eTime, error)in lstOut: | |
print ("%s:%.02f%s"%(host.ljust(totalLen),eTime,":E:%s"%str(error) if error else "")) | |
print "eTime: %.02f, sum:%.02f: %.02f times faster, %.02f%% deltaTime/thread"%(eTime, lTime, abs(eTime-lTime)/eTime, (eTime-lTime)*100/(eTime*myWorkers)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment