ryerh · February 3, 2021 06:05
diff --git a/ProcessPoolExecutor.py b/ProcessPoolExecutor.py
 import random
 import time
 from concurrent import futures


 def run_parallel_processes(worker_func, job_args, max_workers=None):
    """
    每个进程执行一个 worker_func，
    每个 worker_func 只从 job_args 中取一条任务处理，
    最大并行粒度 max_workers 默认等于 CPU 核心数量。

    注意：
        job_args 是个数组，只包含简单的数字、字符串。
        好的例子：job_args=[1, 2, 3]
        坏的例子：job_args=[ComplexObject()]
    """
    with futures.ProcessPoolExecutor(max_workers) as pool:
        return pool.map(worker_func, job_args)


 def download_single_file_worker(file_url):
    """
    下载一个文件，随机 sleep 1.5~3.0s，然后返回任务结果，

    注意：
        return 值只能是简单的数字、字符串。
        好的例子：return "simple string"
        坏的例子：return ComplexObject()
        如果结果太复杂，可以把结果写入一个日志文件中，然后返回这个日志文件的路径。
    """
    print(f"Start downloading {file_url}")
    cost_time = random.uniform(1.5, 3.0)
    time.sleep(cost_time)
    print(f"Finish {file_url} cost {cost_time} seconds")
    return f"{file_url}-{cost_time}"


 if __name__ == "__main__":
    file_urls = [
        "https://a.zip",
        "https://b.zip",
        "https://c.zip",
    ]
    rets = run_parallel_processes(download_single_file_worker, file_urls)
    print("Rets from parallel workers", list(rets))
	import random
	import time
	from concurrent import futures


	def run_parallel_processes(worker_func, job_args, max_workers=None):
	"""
	每个进程执行一个 worker_func，
	每个 worker_func 只从 job_args 中取一条任务处理，
	最大并行粒度 max_workers 默认等于 CPU 核心数量。

	注意：
	job_args 是个数组，只包含简单的数字、字符串。
	好的例子：job_args=[1, 2, 3]
	坏的例子：job_args=[ComplexObject()]
	"""
	with futures.ProcessPoolExecutor(max_workers) as pool:
	return pool.map(worker_func, job_args)


	def download_single_file_worker(file_url):
	"""
	下载一个文件，随机 sleep 1.5~3.0s，然后返回任务结果，

	注意：
	return 值只能是简单的数字、字符串。
	好的例子：return "simple string"
	坏的例子：return ComplexObject()
	如果结果太复杂，可以把结果写入一个日志文件中，然后返回这个日志文件的路径。
	"""
	print(f"Start downloading {file_url}")
	cost_time = random.uniform(1.5, 3.0)
	time.sleep(cost_time)
	print(f"Finish {file_url} cost {cost_time} seconds")
	return f"{file_url}-{cost_time}"


	if __name__ == "__main__":
	file_urls = [
	"https://a.zip",
	"https://b.zip",
	"https://c.zip",
	]
	rets = run_parallel_processes(download_single_file_worker, file_urls)
	print("Rets from parallel workers", list(rets))