Created
October 9, 2018 06:28
-
-
Save efrecon/07f3b616205bfde15b9c58de35b66c8e to your computer and use it in GitHub Desktop.
Execute a sed script on one or several CSV files, in place or concatenating the output into a single CSV result file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
usage() { | |
if [ -n "$1" ]; then echo $1 1>&2; fi | |
cat << USAGE >&2 | |
Usage: | |
$0 options.. [--] files... | |
-v | --verbose Be more verbose when waiting | |
--no-header Do not consider CSV file with header | |
--dry-run Do not modify files in place, show result instead | |
-f | --file script Execute sed script | |
-n | --quiet | --silent Disable automatic printing of pattern space in sed | |
-h | --help Print this help and quit | |
-o | --output file Concatenate all files to this one (should share same header!) | |
-- FILES Execute sed script (in place) on each file, keep first CSV header line. | |
USAGE | |
exit 1 | |
} | |
VERBOSE=0 | |
HEADER=1 | |
DRYRUN=0 | |
SCRIPT="" | |
SEDOPTS="" | |
OUTPUT="" | |
while [ $# -gt 0 ] | |
do | |
case "$1" in | |
-v | --verbose) | |
VERBOSE=1 | |
shift 1 | |
;; | |
-f | --file) | |
SCRIPT=$2 | |
shift 2 | |
;; | |
-o | --output) | |
OUTPUT=$2 | |
shift 2 | |
;; | |
-n | --quiet | --silent) | |
SEDOPTS="-n" | |
shift 2 | |
;; | |
--dry-run) | |
DRYRUN=1 | |
shift 1 | |
;; | |
--no-header) | |
HEADER=0 | |
shift 1 | |
;; | |
-h | --help) | |
usage | |
;; | |
--) | |
shift | |
break | |
;; | |
-*) | |
usage "Unknown argument: $1" | |
;; | |
*) | |
break | |
;; | |
esac | |
done | |
log() { | |
if [ "$VERBOSE" -ne 0 ]; then echo $1 1>&2; fi | |
} | |
if [ "$SCRIPT" = "" ]; then | |
usage "You need to specify a sed script!" | |
fi | |
if [ -z "$OUTPUT" ]; then | |
for fname in $@; do | |
TMP=$(tempfile) | |
log "Executing $SCRIPT on $fname through tempfile: $TMP" | |
if [ $HEADER -eq 1 ]; then | |
head -n 1 $fname > $TMP | |
tail -n +2 $fname | sed -f $SCRIPT $SEDOPTS -E >> $TMP | |
else | |
sed -f $SCRIPT $SEDOPTS -E >> $TMP | |
fi | |
if [ $DRYRUN -eq 1 ]; then | |
cat $TMP | |
rm -f $TMP | |
else | |
mv $TMP $fname | |
fi | |
done | |
else | |
# Add header from first file or create empty file | |
if [ $HEADER -eq 1 ]; then | |
head -n 1 $1 > $OUTPUT | |
else | |
echo "" > $OUTPUT | |
fi | |
# Then take the tail or entirety of each file into the output | |
for fname in $@; do | |
log "Executing $SCRIPT on $fname directly into: $OUTPUT" | |
if [ $HEADER -eq 1 ]; then | |
tail -n +2 $fname | sed -f $SCRIPT $SEDOPTS -E >> $OUTPUT | |
else | |
sed -f $SCRIPT $SEDOPTS -E >> $OUTPUT | |
fi | |
done | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment