Last active
April 30, 2021 15:02
-
-
Save xlbruce/1394b96e6985ff3e5247d82ee6ea77e7 to your computer and use it in GitHub Desktop.
Simple CSV to Excel converter.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import asyncio | |
import glob | |
import os | |
import pandas as pd | |
# Setup parser | |
description = '''Convert CSV files or an entire directory to XLSX.\n | |
Examples: | |
- Convert a single file to a directory | |
$ python main.py /path/to/file.csv /path/to/output/ | |
- Convert a single file with custom name | |
$ python main.py /path/to/file.csv /path/to/output/custom.xlsx | |
- Convert an entire directory and sub-directories to a directory | |
$ python main.py /path/to/dir/ /path/to/output --recurse | |
- The --index option indexes data | |
$ python main.py /path/to/file.csv /path/to/output --index | |
''' | |
parser = argparse.ArgumentParser(description=description, formatter_class=argparse.RawDescriptionHelpFormatter) | |
parser.add_argument('input_path', type=str, help='Path to a CSV file or a directory containing CSV files') | |
parser.add_argument('output_path', type=str, help='Path to save the XLSX files. Can be a directory or a path ending with .xlsx') | |
parser.add_argument('--index', dest='index', action='store_true', help="First column will be the index") | |
parser.add_argument('--no-index', dest='index', action='store_false', help="No index output file") | |
parser.add_argument('--recurse', dest='recurse', action='store_true', help="If input_path is a DIR, all CSV files in sub-directories will be converted") | |
parser.add_argument('--no-recurse', dest='recurse', action='store_false', help="No recurse input_path") | |
parser.set_defaults(index=False) | |
parser.set_defaults(recurse=False) | |
args = parser.parse_args() | |
######## | |
async def csv_to_excel(csv_path: str, output_path: str, index=False): | |
if os.path.isdir(output_path): | |
basename = os.path.basename(csv_path).split('.')[0] | |
output_path = os.path.join(output_path, f"{basename}.xlsx") | |
print(f'after conversion, output_path is {output_path}') | |
csv = pd.read_csv(csv_path) | |
csv.to_excel(output_path, index=index) | |
async def convert_dir(input_dir: str, output_dir: str, recurse: bool=False, index: bool=False): | |
glob_path = input_dir | |
if recurse: | |
glob_path = f"{glob_path}/**" | |
filenames = glob.glob(f"{glob_path}/*.csv", recursive=recurse) | |
tasks = [csv_to_excel(f, output_dir, index) for f in filenames] | |
print('disparando tasks de conversao...') | |
await asyncio.gather(*tasks) | |
async def main(args: argparse.Namespace): | |
if os.path.isdir(args.input_path): | |
if not os.path.isdir(args.output_path): | |
raise ValueError("When input path is a directory, output path MUST be a directory") | |
await convert_dir(args.input_path, args.output_path, args.recurse) | |
return | |
if not os.path.isfile(args.input_path): | |
raise ValueError(f"Input file [{args.input_path}] is not valid") | |
if os.path.isfile(args.output_path): | |
raise ValueError(f"Output file [{args.output_path}] already exists") | |
await csv_to_excel(args.input_path, args.output_path, args.index) | |
if __name__ == '__main__': | |
asyncio.run(main(args)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
et-xmlfile==1.1.0 | |
numpy==1.20.2 | |
openpyxl==3.0.7 | |
pandas==1.2.4 | |
python-dateutil==2.8.1 | |
pytz==2021.1 | |
six==1.15.0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment