Created
December 5, 2017 14:02
-
-
Save wil3/75483d2594df2f85439ae6920ef18a78 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Purpose: | |
This python script is meant to introduce you to parellel processing with | |
threads and processes. The example uses a simple worker than randomly selects | |
a letter and continually hashes this value. This demonstrates two methods for | |
creating a thread, (1) by specifying a target function or (2) inheriting | |
threading.Thread. | |
This lesson also shows that the Thread and Process API are (for the most part) | |
interchangable allowing us to simply replace threading.Thread with multiprocessing.Process | |
to switch from threads to processes. | |
""" | |
__author__ = "William Koch" | |
__email__ = "[email protected]" | |
# STEP 1) Import libraries we will be using | |
# This library is for multi-threading | |
import threading | |
# While this is for multi processing, the API is almost identical | |
import multiprocessing | |
#Lets log stuff | |
import logging | |
# The remaining libraries will be used to create a simple worker | |
# This library is for creating hashes, https://docs.python.org/2/library/hashlib.html | |
import hashlib | |
# Get times | |
import time | |
# Random sampling | |
import random | |
# Helper for accessing letters | |
import string | |
# STEP 2) Init our logger | |
# More info: https://docs.python.org/2/howto/logging.html#logging-basic-tutorial | |
logging.basicConfig(level=logging.DEBUG) | |
logger = logging.getLogger("disscussion-wk8") | |
# There are two ways to init worker (1) by function (2) inherit threading.Thread | |
""" | |
Method 2 for creating thread inheriting threading.Thread, | |
to use multiple processors instead just replace threading.Thread with | |
multiprocessing.Process | |
""" | |
class HashPerformance(threading.Thread): | |
def __init__(self, hashfn, count): | |
# First call the parents constructor | |
self.hashfn = hashfn | |
self.count = count | |
self.message = random.choice(string.ascii_uppercase) | |
super(HashPerformance, self).__init__() | |
def run(self): | |
# Refer to threading.Thread API, teh start method will call this | |
# method internally in the Thread class and is what we override. We | |
# must match the method signature which does not allow any parameters | |
# therefore we must modify our code to pass them through the constructor. | |
start_time = time.time() | |
for i in range(self.count): | |
self.message = self.message.encode("utf-8") | |
self.message = hashfn(self.message).hexdigest() | |
lapse_time = time.time() - start_time | |
# Compute throughput | |
tput = self.count / lapse_time | |
logger.info(" {} Tput = {} hashes/second".format(threading.current_thread().name, tput)) | |
# STEP 3) Creat a worker | |
""" | |
An example of a worker function, see below for thread initialization | |
""" | |
def compute_hash_tput(count, hashfn): | |
""" Do some work, repeatidy hash a message and compute tput""" | |
# Just choose a random letter to hash so output isnt deterministic | |
message = random.choice(string.ascii_uppercase) | |
start_time = time.time() | |
for i in range(count): | |
# We has some issues on certain computers with encoding, | |
# make sure in utf-8 required by hashlib if the system is unicode by default | |
message = message.encode("utf-8") | |
message = hashfn(message).hexdigest() | |
lapse_time = time.time() - start_time | |
# Compute throughput | |
tput = count / lapse_time | |
logger.info(" {} Tput = {} hashes/second".format(threading.current_thread().name, tput)) | |
# STEP 4) Create main and initalize variables | |
if __name__ == "__main__": | |
# Define the hash we will use, refer to hashlib API doc for options | |
hashfn = hashlib.sha256 | |
# number of hashes | |
num_hashes = 100000 | |
num_workers = 10 | |
logger.info("Main thread name = {}".format(threading.current_thread().name)) | |
# STEP 5) Deomonstrate function is working | |
#compute_hash_tput(100, hashlib.sha256) | |
# Demonstrate multiple threads doing work | |
# Create a bunch of threads | |
workers = [] | |
for i in range(num_workers): | |
# Method 2 for defining a thread, specify the worker as the target | |
# Note args is based in a tuple! | |
t = threading.Thread(target=compute_hash_tput, args=(num_hashes, hashlib.sha256,) ) | |
# start the execution of the thread | |
t.start() | |
# save the thread so we can join later | |
workers.append(t) | |
# Wait for each worker to finish and join back with the main thread | |
for w in workers: | |
w.join() | |
# Uncomment to demonstrate Method 1 | |
# Now show by craeting classes | |
""" | |
class_workers = [] | |
for i in range(num_workers): | |
t = HashPerformance(hashfn, num_hashes) | |
t.start() | |
class_workers.append(t) | |
for w in class_workers: | |
w.join() | |
""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment