Last active
March 15, 2022 14:25
-
-
Save bitnulleins/5de5aa3adde3670430d94c49e1a9648e to your computer and use it in GitHub Desktop.
CSV to Parquet Converter (Python)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import pandas as pd | |
import sys, getopt | |
def main(argv): | |
""" | |
Convert any csv file to parquet file with pandas library. | |
Requirments: | |
* pandas | |
INSTALL REQUIRMENTS | |
=================== | |
pip install pandas | |
USAGE | |
===== | |
python csv2parquet.py %SOURCEFILE% | |
""" | |
inputfile = '' | |
outputfile = '' | |
try: | |
opts, args = getopt.getopt(argv,"hi:",["ifile="]) | |
for opt, arg in opts: | |
if opt == '-h': | |
print('csv2parquet.py -i <inputfile>') | |
sys.exit() | |
elif opt in ("-i", "--ifile"): | |
inputfile = arg | |
outputfile = inputfile.replace('.csv','.parquet') | |
csv_data = pd.read_csv(inputfile) | |
csv_data.to_parquet(outputfile) | |
print("Sucessfuly convert %s to %s"%(inputfile, outputfile)) | |
except getopt.GetoptError: | |
print('csv2parquet.py -i <inputfile>') | |
sys.exit(2) | |
except pd.errors.ParserError: | |
print('Inputfile is not valid csv.') | |
sys.exit(2) | |
except Exception as err: | |
print('Error: Did you miss the -i option?') | |
sys.exit(2) | |
if __name__ == "__main__": | |
main(sys.argv[1:]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment