BrunoGomesCoelho · July 21, 2019 22:06
diff --git a/faster_csv_concat.py b/faster_csv_concat.py
 from multiprocessing import Pool # for reading the CSVs faster

 def my_read_csv(filename):
    # Helper function for the parellel load_csvs
    return pd.read_csv(filename)

 def load_csvs(prefix):
    """Reads and joins all our CSV files into one big dataframe.
    We do it in parallel to make it faster, since otherwise it takes some time.
    Idea from: https://stackoverflow.com/questions/36587211/easiest-way-to-read-csv-files-with-multiprocessing-in-pandas
    
    """
    # set up your pool
    pool = Pool() 
    file_list = [f"{DATA_PATH}/{prefix}{idx}.csv" for idx in range(1, 21)]
    df_list = pool.map(my_read_csv, file_list)

    # reduce the list of dataframes to a single dataframe
    return pd.concat(df_list, ignore_index=True)
	from multiprocessing import Pool # for reading the CSVs faster

	def my_read_csv(filename):
	# Helper function for the parellel load_csvs
	return pd.read_csv(filename)

	def load_csvs(prefix):
	"""Reads and joins all our CSV files into one big dataframe.
	We do it in parallel to make it faster, since otherwise it takes some time.
	Idea from: https://stackoverflow.com/questions/36587211/easiest-way-to-read-csv-files-with-multiprocessing-in-pandas

	"""
	# set up your pool
	pool = Pool()
	file_list = [f"{DATA_PATH}/{prefix}{idx}.csv" for idx in range(1, 21)]
	df_list = pool.map(my_read_csv, file_list)

	# reduce the list of dataframes to a single dataframe
	return pd.concat(df_list, ignore_index=True)