Last active
December 18, 2020 21:53
-
-
Save brunerd/e2a129bbdf31a2ef44aef3e7d101a5e4 to your computer and use it in GitHub Desktop.
Minified, functionalized version of Lorance Stinson's csv2csv shell utility
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# csv2csv.function.min | |
# csv2csv by Lorance Stinson, 2007 | |
# This file is in the public domain. | |
# For more information email [email protected]. | |
# Or see http://lorance.freeshell.org/csvutils/ | |
# Additions by Joel Bruner, 2020 (https://github.com/brunerd) | |
#csv2csv function minified | |
function csv2csv { IFS=$' \t\n'; local NL=$'\n'; local TAB=$'\t';local CSV_ESCAPE=${CSV_ESCAPE:='"'}; local CSV_SEPARATOR=${CSV_SEPARATOR:=","}; local CSV_QUOTE=${CSV_QUOTE:='"'}; local OCSV_ESCAPE=${CSV_ESCAPE:='"'}; local OCSV_SEPARATOR=${CSV_SEPARATOR:=","}; local OCSV_QUOTE=${CSV_QUOTE:='"'}; local FILE="-"; local NL_TEXT=""; local NUM_COLS=""; local OUTPUT=""; local QUOTE_LEVEL="0"; local SKIP_ROWS="0"; local TAB_TEXT=""; while [ "$#" -gt 0 ] ; do case "$1" in -c) [ "$2" ] && local COL_LIST="$2" || usage="$1 requires an argument."; shift 2;; -e) [ "$2" ] && CSV_ESCAPE="$2" || usage="$1 requires an argument."; shift 2;; -E) [ "$2" ] && OCSV_ESCAPE="$2" || usage="$1 requires an argument."; shift 2;; -h) usage=" "; shift 1;; -l) [ "$2" ] && QUOTE_LEVEL="$2" || usage="$1 requires an argument."; shift 2;; -N) [ "$2" ] && NL_TEXT="$2" || usage="$1 requires an argument."; shift 2;; -n) [ "$2" ] && NUM_COLS="$2" || usage="$1 requires an argument."; shift 2;; -o) [ "$2" ] && OUTPUT="$2" || usage="$1 requires an argument."; shift 2;; -q) [ "$2" ] && CSV_QUOTE="$2" || usage="$1 requires an argument."; shift 2;; -Q) [ "$2" ] && OCSV_QUOTE="$2" || usage="$1 requires an argument."; shift 2;; -r) [ "$2" ] && SKIP_ROWS="$2" || usage="$1 requires an argument."; shift 2;; -s) [ "$2" ] && CSV_SEPARATOR="$2" || usage="$1 requires an argument."; shift 2;; -S) [ "$2" ] && OCSV_SEPARATOR="$2" || usage="$1 requires an argument."; shift 2;; -T) [ "$2" ] && TAB_TEXT="$2" || usage="$1 requires an argument."; shift 2;; -z) local CSV_MAWK_BUG="Y"; shift;; --) [ "$2" ] && FILE="$2"; break;; -) FILE="-"; shift;; -*) usage="Invalid option $1";; *) FILE="$1"; shift;; esac; done; [ "X$FILE" != "X-" -a ! -f "$FILE" -a ! -r "$FILE" ] && usage="Unable to read the file ($FILE)."; if [ -n "${usage}" ] ; then cat<<EOHELP | |
${usage}${NL}Usage: `basename $0` [OPTIONS] [INPUT]${NL}Converts a CSV file to a CSV file. This is useful for changing the CSV${NL}format or replacing newlines.${NL}Options:${NL}${TAB}-c <LIST> Output the columns specified by LIST. The list should${NL}${TAB}${TAB}${TAB}${TAB}contain the column numbers to output, starting from 1,${NL}${TAB}${TAB}${TAB}${TAB}separated by commas. Ranges are not allowed. Overrides -n.${NL}${TAB}-e <ESCAPE> The input CSV escape character. Defaults to $CSV_ESCAPE.${NL}${TAB}-E <ESCAPE> The output CSV escape character. Defaults to $OCSV_ESCAPE.${NL}${TAB}-h This text.${NL}${TAB}-l <LEVEL> Quote level for output. Defaults to 0.${NL}${TAB}${TAB}${TAB} -1 = Do not quote any fields. Use with caution.${NL}${TAB}${TAB}${TAB}${TAB}0 = Only strings that require it are quoted.${NL}${TAB}${TAB}${TAB}${TAB}1 = All non-number fields are quoted. -?[0-9.]${NL}${TAB}${TAB}${TAB}${TAB}2 = All fields are quoted except empty ones.${NL}${TAB}${TAB}${TAB}${TAB}3 = All fields are quoted including empty ones.${NL}${TAB}-N <TEXT> Replace embedded newlines with TEXT. Defaults to a newline.${NL}${TAB}-n <NUMBER> Output exactly NUMBER of columns. If a row has more columns${NL}${TAB}${TAB}${TAB}${TAB}the extra will be removed and if it has fewer extra blank${NL}${TAB}${TAB}${TAB}${TAB}columns will be added. Ignored when -c is used.${NL}${TAB}-o <FILE> Write the results to FILE.${NL}${TAB}-q <QUOTE> The input CSV quote character. Defaults to $CSV_ESCAPE.${NL}${TAB}-Q <QUOTE> The output CSV quote character. Defaults to $OCSV_ESCAPE.${NL}${TAB}-r <NUM> Skip NUM rows of the CSV file or input.${NL}${TAB}-s <SEP> The input CSV field separator. Defaults to $CSV_SEPARATOR.${NL}${TAB}-S <SEP> The output CSV field separator. Defaults to $OCSV_SEPARATOR.${NL}${TAB}-T <TEXT> Replace embedded tabs with TEXT. Defaults to a tab.${NL}${NL}Replacement for -N and -T are not checked for the output escape character.${NL} | |
EOHELP | |
exit 1; fi; if [ "$OUTPUT" ] ; then exec 1>$OUTPUT; fi; awk -v csv_sep="$CSV_SEPARATOR" -v csv_quote="$CSV_QUOTE" -v csv_escape="$CSV_ESCAPE" -v ocsv_sep="$OCSV_SEPARATOR" -v ocsv_quote="$OCSV_QUOTE" -v ocsv_escape="$OCSV_ESCAPE" -v col_list="$COL_LIST" -v quotelevel="$QUOTE_LEVEL" -v nltext="$NL_TEXT" -v numcols="$NUM_COLS" -v row_attr="$ROW_ATTR" -v skip="$SKIP_ROWS" -v tabtext="$TAB_TEXT" \-v csv_mawk_bug="$CSV_MAWK_BUG" 'function parse_csv(string,csv,sep,quote,escape,newline,fields,instr,pos,strtrim) { if (length(string) == 0) return 0;string = sep string; fields = 0; while (length(string) > 0) { strtrim = 0; if (substr(string,2,1) == quote) {instr = 1; pos = 2; strtrim = 1; do {pos++;if (pos != length(string) && substr(string,pos,1) == escape && (substr(string,pos + 1,1) == quote || substr(string,pos + 1,1) == escape)) { string = substr(string,1,pos - 1) substr(string,pos + 1); } else if (substr(string,pos,1) == quote) { instr = 0;} else if (newline && pos >= length(string)) { if (getline == -1) { csverr = "Unable to read the next line."; return -1; }; string = string newline $0;}} while (pos < length(string) && instr); if (instr) {csverr = "Missing end quote.";return -1;}} else {if (length(string) == 1 || substr(string,2,1) == sep) { csv[fields] = ""; fields++;if (length(string) == 1){ return fields; string = substr(string,2); continue;}}; pos = index(substr(string,2),sep);if (pos == 0) { csv[fields] = substr(string,2); fields++; return fields; }};if ((pos != length(string) && substr(string,pos + 1,1) != sep)) { csverr = "Missing separator.";return -1;}; csv[fields] = substr(string,2 + strtrim,pos - (1 + strtrim * 2)); fields++; string = substr(string,pos + 1); } return fields; }; BEGIN { if (ocsv_escape == "\\") escapere = "\\\\"; else escapere = ocsv_escape;if (col_list) cols_count = split(col_list,cols,/,/)}{ line++; fields = parse_csv($0,csv,csv_sep,csv_quote,csv_escape,"\n");if (fields < 0 ) { print "ERROR: " csverr " at line # "line | "cat 1>&2";next;} if (line <= skip) next; if (numcols){ if (fields < numcols) { for (col = fields; col < numcols; col++) csv[col] = "";}fields = numcols;}if (col_list) { for (col = 0; col < fields; col++) old[col] = csv[col]; for (col = 0; col < cols_count; col++) { field = cols[col + 1] - 1; if (field > fields - 1) old[field] = ""; csv[col] = old[field]}fields = cols_count} for (col = 0; col < fields; col++) { if (length(csv[col]) != 0) { if (((csv[col] ~ ocsv_sep) || (csv[col] ~ escapere) || (csv[col] ~ ocsv_quote) || (csv[col] ~ /\n/) || (csv[col] ~ /\t/) || (quotelevel >= 2) || ((quotelevel == 1) && (csv[col] !~ /^-*[0-9.][0-9.]*$/))) && (quotelevel != -1)) { if (csv_mawk_bug && ocsv_escape == "\\") { gsub (escapere,"\\\\\\",csv[col]) } else {gsub (escapere,ocsv_escape ocsv_escape,csv[col])}; if (ocsv_escape != ocsv_quote) {gsub (ocsv_quote,ocsv_escape ocsv_quote,csv[col]);}; if (nltext) gsub (/\n/,nltext,csv[col]); if (tabtext) gsub (/\t/,tabtext,csv[col]); printf "%s%s%s", ocsv_quote, csv[col], ocsv_quote; } else printf "%s", csv[col]; } else if (quotelevel == 3) printf "%s%s", ocsv_quote, ocsv_quote; if (col < fields - 1) printf "%s", ocsv_sep} printf "\n" }' "$FILE" | |
} | |
#for standalone use, invoke the function | |
IFS=$'\t\n'; csv2csv ${@} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment