Skip to content

Instantly share code, notes, and snippets.

@bdqnghi
Created July 29, 2021 12:46
Show Gist options
  • Save bdqnghi/f8156ccdff9f0de7444868828d0a8ab0 to your computer and use it in GitHub Desktop.
Save bdqnghi/f8156ccdff9f0de7444868828d0a8ab0 to your computer and use it in GitHub Desktop.
Read multiple files in multiple threads, process and write into a single file
import codecs
import queue # or queue in Python 3
import threading
import os
class PrintThread(threading.Thread):
def __init__(self, queue):
threading.Thread.__init__(self)
self.queue = queue
def printfiles(self, p):
with open("test.csv", "a") as f:
f.write(p)
def run(self):
while True:
result = self.queue.get()
print(result)
self.printfiles(result)
self.queue.task_done()
class ProcessThread(threading.Thread):
def __init__(self, in_queue, out_queue):
threading.Thread.__init__(self)
self.in_queue = in_queue
self.out_queue = out_queue
def run(self):
while True:
path = self.in_queue.get()
result = self.process(path)
self.out_queue.put(result)
self.in_queue.task_done()
def process(self, path):
# Do the processing job here
with open(path, "r") as f:
data = f.read()
return data
pathqueue = queue.Queue()
resultqueue = queue.Queue()
paths = ["file_1.txt", "file_2.txt", "file_3.txt"]
# output = codecs.open('test.csv', 'a')
# spawn threads to process
for i in range(0, 5):
t = ProcessThread(pathqueue, resultqueue)
if i == 1:
import time
time.sleep(3)
t.setDaemon(True)
t.start()
# spawn threads to print
t = PrintThread(resultqueue)
t.setDaemon(True)
t.start()
# add paths to queue
for path in paths:
pathqueue.put(path)
# wait for queue to get empty
pathqueue.join()
resultqueue.join()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment