Created
March 11, 2024 10:53
-
-
Save mehd-io/31f74387f03e3589a6875d7e0572b9f0 to your computer and use it in GitHub Desktop.
Convert csv<->parquet
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# A simple script for converting files between CSV and Parquet formats using DuckDB. Requires DuckDB installation. | |
convert_file() { | |
local input_file="$1" | |
local output_extension="$2" | |
# Extracting the filename without extension | |
local base_name=$(basename -- "$input_file") | |
local name="${base_name%.*}" | |
# Constructing the output filename | |
local output_file="${name}.${output_extension}" | |
# Performing the conversion | |
duckdb -c "copy (select * from '${input_file}') to '${output_file}'" | |
echo "Conversion complete: ${output_file}" | |
} | |
# Check if the number of arguments is less than 2 | |
if [ "$#" -lt 2 ]; then | |
echo "Usage: $0 <input_file> <output_extension>" | |
echo "Example: $0 example.parquet csv" | |
exit 1 | |
fi | |
# Call the conversion function with the provided arguments | |
convert_file "$1" "$2" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment