Last active
May 7, 2023 16:53
-
-
Save rsalaza4/57e17a8146ff672390993180a8fe178a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Remove warning messages | |
| import warnings | |
| warnings.filterwarnings("ignore") | |
| # Import libraries and dependencies | |
| import os | |
| import pandas as pd | |
| # Define 'merge_files' function | |
| def merge_files(folder_path, excel_output=True, csv_output=True): | |
| """ | |
| Parameters | |
| ---------- | |
| folder_path : string | |
| Absolute path of the folder containing the files to be merged. | |
| excel_output : boolean | |
| Boolean value to generate Excel files. | |
| csv_output : boolean | |
| Boolean value to generate csv files. | |
| """ | |
| cwd = os.path.abspath(folder_path) | |
| files = os.listdir(cwd) | |
| # Print list of files inside folder for validation only | |
| print(files) | |
| # Initialize empty dataframes | |
| df1 = pd.DataFrame() | |
| df2 = pd.DataFrame() | |
| df3 = pd.DataFrame() | |
| df4 = pd.DataFrame() | |
| df5 = pd.DataFrame() | |
| # Loop through all the files in the folder and append rows to dataframes | |
| for file in files: | |
| df1 = df1.append(pd.read_excel(folder_path+'/'+file, sheet_name='Sheet 1')) | |
| df2 = df2.append(pd.read_excel(folder_path+'/'+file, sheet_name='Sheet 2')) | |
| df3 = df3.append(pd.read_excel(folder_path+'/'+file, sheet_name='Sheet 3')) | |
| df4 = df4.append(pd.read_excel(folder_path+'/'+file, sheet_name='Sheet 4')) | |
| df5 = df5.append(pd.read_excel(folder_path+'/'+file, sheet_name='Sheet 5')) | |
| # Generate Excel files | |
| if excel: | |
| df1.to_excel('excel_1.xslx', index=False) | |
| df2.to_excel('excel_2.xslx', index=False) | |
| df3.to_excel('excel_3.xslx', index=False) | |
| df4.to_excel('excel_4.xslx', index=False) | |
| df5.to_excel('excel_5.xslx', index=False) | |
| # Generate csv files | |
| if csv: | |
| df1.to_csv('csv_1.csv', index=False) | |
| df2.to_csv('csv_2.csv', index=False) | |
| df3.to_csv('csv_3.csv', index=False) | |
| df4.to_csv('csv_4.csv', index=False) | |
| df5.to_csv('csv_5.csv', index=False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment