Last active
December 18, 2020 21:53
-
-
Save brunerd/a90e60b0d596474e59599e23c1f19bed to your computer and use it in GitHub Desktop.
Minified, functionalized version of Lorance Stinson's csv2csv shell utility minus the help, truly a one-liner
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# csv2csv.function.min.no_help | |
# csv2csv by Lorance Stinson, 2007 | |
# This file is in the public domain. | |
# For more information email [email protected]. | |
# Or see http://lorance.freeshell.org/csvutils/ | |
# Additions by Joel Bruner, 2020 (https://github.com/brunerd) | |
#csv2csv function minified | |
function csv2csv { IFS=$' \t\n'; local NL=$'\n'; local TAB=$'\t';local CSV_ESCAPE=${CSV_ESCAPE:='"'}; local CSV_SEPARATOR=${CSV_SEPARATOR:=","}; local CSV_QUOTE=${CSV_QUOTE:='"'}; local OCSV_ESCAPE=${CSV_ESCAPE:='"'}; local OCSV_SEPARATOR=${CSV_SEPARATOR:=","}; local OCSV_QUOTE=${CSV_QUOTE:='"'}; local FILE="-"; local NL_TEXT=""; local NUM_COLS=""; local OUTPUT=""; local QUOTE_LEVEL="0"; local SKIP_ROWS="0"; local TAB_TEXT=""; while [ "$#" -gt 0 ] ; do case "$1" in -c) [ "$2" ] && local COL_LIST="$2" || usage="$1 requires an argument."; shift 2;; -e) [ "$2" ] && CSV_ESCAPE="$2" || usage="$1 requires an argument."; shift 2;; -E) [ "$2" ] && OCSV_ESCAPE="$2" || usage="$1 requires an argument."; shift 2;; -h) usage=" "; shift 1;; -l) [ "$2" ] && QUOTE_LEVEL="$2" || usage="$1 requires an argument."; shift 2;; -N) [ "$2" ] && NL_TEXT="$2" || usage="$1 requires an argument."; shift 2;; -n) [ "$2" ] && NUM_COLS="$2" || usage="$1 requires an argument."; shift 2;; -o) [ "$2" ] && OUTPUT="$2" || usage="$1 requires an argument."; shift 2;; -q) [ "$2" ] && CSV_QUOTE="$2" || usage="$1 requires an argument."; shift 2;; -Q) [ "$2" ] && OCSV_QUOTE="$2" || usage="$1 requires an argument."; shift 2;; -r) [ "$2" ] && SKIP_ROWS="$2" || usage="$1 requires an argument."; shift 2;; -s) [ "$2" ] && CSV_SEPARATOR="$2" || usage="$1 requires an argument."; shift 2;; -S) [ "$2" ] && OCSV_SEPARATOR="$2" || usage="$1 requires an argument."; shift 2;; -T) [ "$2" ] && TAB_TEXT="$2" || usage="$1 requires an argument."; shift 2;; -z) local CSV_MAWK_BUG="Y"; shift;; --) [ "$2" ] && FILE="$2"; break;; -) FILE="-"; shift;; -*) usage="Invalid option $1";; *) FILE="$1"; shift;; esac; done; [ "X$FILE" != "X-" -a ! -f "$FILE" -a ! -r "$FILE" ] && usage="Unable to read the file ($FILE)."; if [ -n "${usage}" ] ; then exit 1; fi; if [ "$OUTPUT" ] ; then exec 1>$OUTPUT; fi; awk -v csv_sep="$CSV_SEPARATOR" -v csv_quote="$CSV_QUOTE" -v csv_escape="$CSV_ESCAPE" -v ocsv_sep="$OCSV_SEPARATOR" -v ocsv_quote="$OCSV_QUOTE" -v ocsv_escape="$OCSV_ESCAPE" -v col_list="$COL_LIST" -v quotelevel="$QUOTE_LEVEL" -v nltext="$NL_TEXT" -v numcols="$NUM_COLS" -v row_attr="$ROW_ATTR" -v skip="$SKIP_ROWS" -v tabtext="$TAB_TEXT" \-v csv_mawk_bug="$CSV_MAWK_BUG" 'function parse_csv(string,csv,sep,quote,escape,newline,fields,instr,pos,strtrim) { if (length(string) == 0) return 0;string = sep string; fields = 0; while (length(string) > 0) { strtrim = 0; if (substr(string,2,1) == quote) {instr = 1; pos = 2; strtrim = 1; do {pos++;if (pos != length(string) && substr(string,pos,1) == escape && (substr(string,pos + 1,1) == quote || substr(string,pos + 1,1) == escape)) { string = substr(string,1,pos - 1) substr(string,pos + 1); } else if (substr(string,pos,1) == quote) { instr = 0;} else if (newline && pos >= length(string)) { if (getline == -1) { csverr = "Unable to read the next line."; return -1; }; string = string newline $0;}} while (pos < length(string) && instr); if (instr) {csverr = "Missing end quote.";return -1;}} else {if (length(string) == 1 || substr(string,2,1) == sep) { csv[fields] = ""; fields++;if (length(string) == 1){ return fields; string = substr(string,2); continue;}}; pos = index(substr(string,2),sep);if (pos == 0) { csv[fields] = substr(string,2); fields++; return fields; }};if ((pos != length(string) && substr(string,pos + 1,1) != sep)) { csverr = "Missing separator.";return -1;}; csv[fields] = substr(string,2 + strtrim,pos - (1 + strtrim * 2)); fields++; string = substr(string,pos + 1); } return fields; }; BEGIN { if (ocsv_escape == "\\") escapere = "\\\\"; else escapere = ocsv_escape;if (col_list) cols_count = split(col_list,cols,/,/)}{ line++; fields = parse_csv($0,csv,csv_sep,csv_quote,csv_escape,"\n");if (fields < 0 ) { print "ERROR: " csverr " at line # "line | "cat 1>&2";next;} if (line <= skip) next; if (numcols){ if (fields < numcols) { for (col = fields; col < numcols; col++) csv[col] = "";}fields = numcols;}if (col_list) { for (col = 0; col < fields; col++) old[col] = csv[col]; for (col = 0; col < cols_count; col++) { field = cols[col + 1] - 1; if (field > fields - 1) old[field] = ""; csv[col] = old[field]}fields = cols_count} for (col = 0; col < fields; col++) { if (length(csv[col]) != 0) { if (((csv[col] ~ ocsv_sep) || (csv[col] ~ escapere) || (csv[col] ~ ocsv_quote) || (csv[col] ~ /\n/) || (csv[col] ~ /\t/) || (quotelevel >= 2) || ((quotelevel == 1) && (csv[col] !~ /^-*[0-9.][0-9.]*$/))) && (quotelevel != -1)) { if (csv_mawk_bug && ocsv_escape == "\\") { gsub (escapere,"\\\\\\",csv[col]) } else {gsub (escapere,ocsv_escape ocsv_escape,csv[col])}; if (ocsv_escape != ocsv_quote) {gsub (ocsv_quote,ocsv_escape ocsv_quote,csv[col]);}; if (nltext) gsub (/\n/,nltext,csv[col]); if (tabtext) gsub (/\t/,tabtext,csv[col]); printf "%s%s%s", ocsv_quote, csv[col], ocsv_quote; } else printf "%s", csv[col]; } else if (quotelevel == 3) printf "%s%s", ocsv_quote, ocsv_quote; if (col < fields - 1) printf "%s", ocsv_sep} printf "\n" }' "$FILE"; } | |
#for standalone use, invoke the function | |
IFS=$'\t\n'; csv2csv ${@} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment