wooddar · June 20, 2025 07:10 · sakydev · Jun 14, 2019 · pr1boom · Mar 15, 2020
diff --git a/multiprocess_selenium.py b/multiprocess_selenium.py
 """
 This is an adaptable example script for using selenium across multiple webbrowsers simultaneously. This makes use of 
 two queues - one to store idle webworkers and another to store data to pass to any idle webworkers in a selenium function

 """
 from multiprocessing import Queue, cpu_count
 from threading import Thread
 from selenium import webdriver
 from time import sleep
 from numpy.random import randint
 import logging


 logger = logging.getLogger(__name__)

 # Some example data to pass the the selenium processes, this will just cause a sleep of time i
 # This data can be a list of any datatype that can be pickled
 selenium_data = [4, 2, 3, 3, 4, 3, 4, 3, 1, 2, 3, 2, 'STOP']

 # Create the two queues to hold the data and the IDs for the selenium workers
 selenium_data_queue = Queue()
 worker_queue = Queue()

 # Create Selenium processes and assign them a worker ID
 # This ID is what needs to be put on the queue as Selenium workers cannot be pickled
 # By default, make one selenium process per cpu core with cpu_count
 # TODO: Change the worker creation code to be your webworker of choice e.g. PhantomJS
 worker_ids = list(range(cpu_count()))
 selenium_workers = {i: webdriver.Chrome() for i in worker_ids}
 for worker_id in worker_ids:
    worker_queue.put(worker_id)


 def selenium_task(worker, data):
    """
    This is a demonstration selenium function that takes a worker and data and then does something with the worker and
    data.

    TODO: change the below code to be whatever it is you want your worker to do e.g. scrape webpages or run browser tests

    :param worker: A selenium web worker NOT a worker ID
    :type worker: webdriver.XXX
    :param data: Any data for your selenium function (must be pickleable)
    :rtype: None
    """
    worker.set_window_size(randint(100, 200), randint(200, 400))
    logger.info("Getting Google")
    worker.get(f'https://ytroulette.com')
    logger.info("Sleeping")
    sleep(data)


 def selenium_queue_listener(data_queue, worker_queue):
    """
    Monitor a data queue and assign new pieces of data to any available web workers to action

    :param data_queue: The python FIFO queue containing the data to run on the web worker
    :type data_queue: Queue
    :param worker_queue: The queue that holds the IDs of any idle workers
    :type worker_queue: Queue
    :rtype: None
    """
    logger.info("Selenium func worker started")
    while True:
        current_data = data_queue.get()
        if current_data == 'STOP':
            # If a stop is encountered then kill the current worker and put the stop back onto the queue
            # to poison other workers listening on the queue
            logger.warning("STOP encountered, killing worker thread")
            data_queue.put(current_data)
            break
        else:
            logger.info(f"Got the item {current_data} on the data queue")
        # Get the ID of any currently free workers from the worker queue
        worker_id = worker_queue.get()
        worker = selenium_workers[worker_id]
        # Assign current worker and current data to your selenium function
        selenium_task(worker, current_data)
        # Put the worker back into the worker queue as  it has completed it's task
        worker_queue.put(worker_id)
    return


 # Create one new queue listener thread per selenium worker and start them
 logger.info("Starting selenium background processes")
 selenium_processes = [Thread(target=selenium_queue_listener,
                             args=(selenium_data_queue, worker_queue)) for _ in worker_ids]
 for p in selenium_processes:
    p.daemon = True
    p.start()

 # Add each item of data to the data queue, this could be done over time so long as the selenium queue listening
 # processes are still running
 logger.info("Adding data to data queue")
 for d in selenium_data:
    selenium_data_queue.put(d)

 # Wait for all selenium queue listening processes to complete, this happens when the queue listener returns
 logger.info("Waiting for Queue listener threads to complete")
 for p in selenium_processes:
    p.join()

 # Quit all the web workers elegantly in the background
 logger.info("Tearing down web workers")
 for b in selenium_workers.values():
    b.quit()
	"""
	This is an adaptable example script for using selenium across multiple webbrowsers simultaneously. This makes use of
	two queues - one to store idle webworkers and another to store data to pass to any idle webworkers in a selenium function

	"""
	from multiprocessing import Queue, cpu_count
	from threading import Thread
	from selenium import webdriver
	from time import sleep
	from numpy.random import randint
	import logging


	logger = logging.getLogger(__name__)

	# Some example data to pass the the selenium processes, this will just cause a sleep of time i
	# This data can be a list of any datatype that can be pickled
	selenium_data = [4, 2, 3, 3, 4, 3, 4, 3, 1, 2, 3, 2, 'STOP']

	# Create the two queues to hold the data and the IDs for the selenium workers
	selenium_data_queue = Queue()
	worker_queue = Queue()

	# Create Selenium processes and assign them a worker ID
	# This ID is what needs to be put on the queue as Selenium workers cannot be pickled
	# By default, make one selenium process per cpu core with cpu_count
	# TODO: Change the worker creation code to be your webworker of choice e.g. PhantomJS
	worker_ids = list(range(cpu_count()))
	selenium_workers = {i: webdriver.Chrome() for i in worker_ids}
	for worker_id in worker_ids:
	worker_queue.put(worker_id)


	def selenium_task(worker, data):
	"""
	This is a demonstration selenium function that takes a worker and data and then does something with the worker and
	data.

	TODO: change the below code to be whatever it is you want your worker to do e.g. scrape webpages or run browser tests

	:param worker: A selenium web worker NOT a worker ID
	:type worker: webdriver.XXX
	:param data: Any data for your selenium function (must be pickleable)
	:rtype: None
	"""
	worker.set_window_size(randint(100, 200), randint(200, 400))
	logger.info("Getting Google")
	worker.get(f'https://ytroulette.com')
	logger.info("Sleeping")
	sleep(data)


	def selenium_queue_listener(data_queue, worker_queue):
	"""
	Monitor a data queue and assign new pieces of data to any available web workers to action

	:param data_queue: The python FIFO queue containing the data to run on the web worker
	:type data_queue: Queue
	:param worker_queue: The queue that holds the IDs of any idle workers
	:type worker_queue: Queue
	:rtype: None
	"""
	logger.info("Selenium func worker started")
	while True:
	current_data = data_queue.get()
	if current_data == 'STOP':
	# If a stop is encountered then kill the current worker and put the stop back onto the queue
	# to poison other workers listening on the queue
	logger.warning("STOP encountered, killing worker thread")
	data_queue.put(current_data)
	break
	else:
	logger.info(f"Got the item {current_data} on the data queue")
	# Get the ID of any currently free workers from the worker queue
	worker_id = worker_queue.get()
	worker = selenium_workers[worker_id]
	# Assign current worker and current data to your selenium function
	selenium_task(worker, current_data)
	# Put the worker back into the worker queue as it has completed it's task
	worker_queue.put(worker_id)
	return


	# Create one new queue listener thread per selenium worker and start them
	logger.info("Starting selenium background processes")
	selenium_processes = [Thread(target=selenium_queue_listener,
	args=(selenium_data_queue, worker_queue)) for _ in worker_ids]
	for p in selenium_processes:
	p.daemon = True
	p.start()

	# Add each item of data to the data queue, this could be done over time so long as the selenium queue listening
	# processes are still running
	logger.info("Adding data to data queue")
	for d in selenium_data:
	selenium_data_queue.put(d)

	# Wait for all selenium queue listening processes to complete, this happens when the queue listener returns
	logger.info("Waiting for Queue listener threads to complete")
	for p in selenium_processes:
	p.join()

	# Quit all the web workers elegantly in the background
	logger.info("Tearing down web workers")
	for b in selenium_workers.values():
	b.quit()