Last active
February 3, 2021 06:05
-
-
Save ryerh/702ef7739386aed993d03f9c5fc217a2 to your computer and use it in GitHub Desktop.
Python3 多进程并行
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import time | |
from concurrent import futures | |
def run_parallel_processes(worker_func, job_args, max_workers=None): | |
""" | |
每个进程执行一个 worker_func, | |
每个 worker_func 只从 job_args 中取一条任务处理, | |
最大并行粒度 max_workers 默认等于 CPU 核心数量。 | |
注意: | |
job_args 是个数组,只包含简单的数字、字符串。 | |
好的例子:job_args=[1, 2, 3] | |
坏的例子:job_args=[ComplexObject()] | |
""" | |
with futures.ProcessPoolExecutor(max_workers) as pool: | |
return pool.map(worker_func, job_args) | |
def download_single_file_worker(file_url): | |
""" | |
下载一个文件,随机 sleep 1.5~3.0s,然后返回任务结果, | |
注意: | |
return 值只能是简单的数字、字符串。 | |
好的例子:return "simple string" | |
坏的例子:return ComplexObject() | |
如果结果太复杂,可以把结果写入一个日志文件中,然后返回这个日志文件的路径。 | |
""" | |
print(f"Start downloading {file_url}") | |
cost_time = random.uniform(1.5, 3.0) | |
time.sleep(cost_time) | |
print(f"Finish {file_url} cost {cost_time} seconds") | |
return f"{file_url}-{cost_time}" | |
if __name__ == "__main__": | |
file_urls = [ | |
"https://a.zip", | |
"https://b.zip", | |
"https://c.zip", | |
] | |
rets = run_parallel_processes(download_single_file_worker, file_urls) | |
print("Rets from parallel workers", list(rets)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment