Skip to content

Instantly share code, notes, and snippets.

@huseinzol05
Last active February 21, 2025 01:47
Show Gist options
  • Save huseinzol05/2e26de4f3b29d99e993b349864ab6c10 to your computer and use it in GitHub Desktop.
Save huseinzol05/2e26de4f3b29d99e993b349864ab6c10 to your computer and use it in GitHub Desktop.
distributed unzip
from glob import glob
from tqdm import tqdm
from multiprocess import Pool
import itertools
import zipfile
import os
def chunks(l, n):
for i in range(0, len(l), n):
yield (l[i: i + n], i // n)
def multiprocessing(strings, function, cores=6, returned=True):
df_split = chunks(strings, len(strings) // cores)
pool = Pool(cores)
pooled = pool.map(function, df_split)
pool.close()
pool.join()
if returned:
return list(itertools.chain(*pooled))
def main():
files = glob('*.zip')
def loop(files):
files, _ = files
for zip_file_path in tqdm(files):
destination_folder = './'
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
zip_ref.extractall(destination_folder)
os.remove(zip_file_path)
multiprocessing(files, loop, cores = min(len(files), 20), returned = False)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment