Created
April 1, 2012 23:49
-
-
Save rsvp/2279533 to your computer and use it in GitHub Desktop.
lincol.sh : convert regex delimited lines into single column. E.g. linecol foo.csv ', *' #-- so each item on a CSV line will be on its own line.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# bash 4.1.5(1) Linux Ubuntu 10.04 Date : 2012-04-06 | |
# | |
# _______________| linecol : convert regex delimited lines into single column. | |
# | |
# Usage: linecol [filename] [delimiter regex] | |
# | |
# Examples: % linecol foo.csv ', *' | |
# # ^use single quotes for regex. | |
# # Each item on a CSV line will be on its own line. | |
# | |
# Dependencies: sed, awk | |
# CHANGE LOG LATEST version available: https://bitbucket.org/rsvp/gists/src | |
# | |
# 2012-04-06 Reverse argument order, consistent with colline. | |
# 2012-03-26 First version of an idiom. Leading and trailing blank | |
# characters are removed. | |
# _____ PREAMBLE_v2: settings, variables, and error handling. | |
# | |
LC_ALL=POSIX | |
# locale means "ASCII, US English, no special rules, | |
# output per ISO and RFC standards." | |
# Esp. use ASCII encoding for glob and sorting characters. | |
shopt -s extglob | |
# ^set extended glob for pattern matching. | |
set -e | |
# ^errors checked: immediate exit if a command has non-zero status. | |
set -u | |
# ^unassigned variables shall be errors. | |
# Example of default VARIABLE ASSIGNMENT: arg1=${1:-'foo'} | |
arg1=${1:-'-'} | |
# ^default: standard input. | |
arg2=${2:-', *'} | |
# ^default: CSV format. | |
# arg2 is the FS Field Separator passed to awk. | |
program=${0##*/} # similar to using basename | |
memf=$( mktemp /dev/shm/88_${program}_tmp.XXXXXXXXXX ) | |
cleanup () { | |
# Delete temporary files, then optionally exit given status. | |
local status=${1:-'0'} | |
rm -f $memf | |
[ $status = '-1' ] || exit $status # thus -1 prevents exit. | |
} #-------------------------------------------------------------------- | |
warn () { | |
# Message with basename to stderr. Usage: warn "message" | |
echo -e "\n !! ${program}: $1 " >&2 | |
} #-------------------------------------------------------------------- | |
die () { | |
# Exit with status of most recent command or custom status, after | |
# cleanup and warn. Usage: command || die "message" [status] | |
local status=${2:-"$?"} | |
cleanup -1 && warn "$1" && exit $status | |
} #-------------------------------------------------------------------- | |
trap "die 'SIG disruption, but cleanup finished.' 114" 1 2 3 15 | |
# Cleanup after INTERRUPT: 1=SIGHUP, 2=SIGINT, 3=SIGQUIT, 15=SIGTERM | |
# | |
# _______________ :: BEGIN Script :::::::::::::::::::::::::::::::::::::::: | |
# _____ MASSAGE text | |
# Remove leading and trailing blanks... | |
# | |
sed -e 's/^[[:blank:]]*//' -e 's/[[:blank:]]*$//' \ | |
"$arg1" > $memf | |
# -e '/^[[:blank:]]*$/d' "$arg1" > $memf | |
# ^decided: NO REMOVAL of BLANK LINES | |
# because they might be record separators! | |
# Due to the previous substitution, each | |
# blank line will be a "pure" newline. | |
# __________ AWK within a bash script | |
# Here Document goes to stdin as program file. | |
# Bash variables are recognized, so escape awk field numbers. | |
# | |
{ awk -f - $memf <<EOHereDoc | |
BEGIN { FS = "$arg2" } | |
{ for ( i = 1; i <= NF; ++i ) | |
print \$i } | |
# ^each field gets printed on its own line. | |
EOHereDoc | |
} || die "bad awk." 113 | |
cleanup | |
# _______________ EOS :: END of Script :::::::::::::::::::::::::::::::::::::::: | |
# vim: set fileencoding=utf-8 ff=unix tw=78 ai syn=sh : |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment