Last active
August 29, 2015 14:07
-
-
Save Willshaw/6626ac014a291dfd1478 to your computer and use it in GitHub Desktop.
Shell script to create trimmed versions of a CSV. Trims the file to the number of lines supplied in the second input.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
#AUTHOR: Pete Williamson | |
#DATE: 2014-10-21 | |
# | |
#USAGE: | |
# Creates smaller versions of supplied CSV file, | |
# based on the line number values as the second parameter | |
# | |
# The resulting CSVs are zipped up (for windows compatibility) | |
# as a zip file based on the original name. | |
# | |
# The generated zip files are then deleted. | |
# | |
# The following will create foo.zip, containing 3 files | |
# - foo.csv (original file) | |
# - foo.5.csv (first 5 lines) | |
# - foo.50.csv (first 50 lines | |
# | |
# ./trim.sh foo.csv 5,50 | |
# | |
# localize script params | |
input=$1 | |
sizes=$2 | |
# check input file is supplied | |
if [[ -z "$input" ]] | |
then | |
echo 'No input file supplied' | |
exit | |
fi | |
# check there is a valid csv of sizes | |
if ! [[ "$sizes" =~ ^[0-9]+(,[0-9]+)*$ ]] | |
then | |
echo 'No line counts specified' | |
exit | |
fi | |
# convert sizes csv into array | |
IFS=, | |
read -r -a arr_sizes <<< $sizes | |
unset IFS | |
# trim the file to each size specified | |
# save as new file, X = LINE COUNT > ORIGINAL.X.CSV | |
for cnt in $arr_sizes | |
do | |
new_file=$(echo $input | sed -e s/.csv/.$cnt.csv/) | |
sed -n '1,'$cnt'p' $input > $new_file | |
done | |
# build a zipped archive based on original name | |
archive=${input%.csv} | |
zip $archive *.csv | |
# remove csvs | |
rm *.csv |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment