Created
April 24, 2014 07:38
-
-
Save dbro/11245203 to your computer and use it in GitHub Desktop.
partition lines of incoming data into separate files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# write the incoming data on stdin to separate files depending on their contents | |
# for example, take a file that has different dates in it: | |
# 2014-02-15+12334567 hello there this is the first line | |
# 2014-02-16+23345678 hello there this is the second line | |
# this file can be used to send the first line to a file called /tmp/session.log-20140215-randomnumber | |
# and the second line to another file called /tmp/session.log-20140214-randomnumber | |
# it takes the first N characters from the line for use in the output filename | |
USAGE="usage: $0 -p \"/tmp/session-logs-ready-to-merge-\" [-s \"-ready-for-merge\" -r -c 10 -d'-'] [input_filename] [another_input_filename] | |
\tp\tprefix path | |
\ts\tsuffix of the path | |
\tc\tthe count of characters from the start of each row to use in the filename | |
\td\tdelete these characters from the extracted prefix | |
\tr\tappend a random string to the end of the file name | |
\t\tif input_filename is omitted, read from stdin | |
example: $0 -p \"/tmp/session.log-\" -s \"-merge-\" -c 10 -d'-' -r" | |
function display_usage() { | |
echo -e "$USAGE" | |
exit | |
} | |
#default values | |
prefix="" | |
suffix="" | |
random=false | |
charcount=0 | |
delchars="" | |
AWK=$(which mawk) || $(which awk) | |
# TODO: check parameters for errors | |
while getopts "p:s:c:d:r?h" arg; do | |
case $arg in | |
h) | |
display_usage | |
;; | |
\?) | |
display_usage | |
;; | |
p) | |
prefix=$OPTARG | |
;; | |
s) | |
suffix=$OPTARG | |
;; | |
c) | |
charcount=$OPTARG | |
;; | |
d) | |
delchars=$OPTARG | |
;; | |
r) | |
random=true | |
;; | |
esac | |
done | |
if [ -z "$prefix" ]; then | |
echo -e "no prefix specified" | |
display_usage | |
fi | |
randomstring="" | |
if [ -n "$delchars" ]; then | |
delregex="[${delchars}]" | |
else | |
delregex="" | |
fi | |
shift $(($OPTIND - 1)) | |
if [ "$#" -eq 0 ]; then | |
# no input filenames given, so we should read from stdin | |
# reset the array of arguments to contain one empty string | |
set -- "" | |
fi | |
for inputfile in "$@"; do | |
if [ ${#inputfile} -eq 0 ]; then | |
#echo "reading input from stdin" | |
CATFILE=`which cat` | |
elif [ ${inputfile:(-3)} == ".gz" ]; then | |
#echo "reading from input file = $inputfile" | |
CATFILE="`which zcat` $inputfile" | |
else | |
#echo "reading from input file = $inputfile" | |
CATFILE="`which cat` $inputfile" | |
fi | |
#if [ ${inputfile:(-3)} == ".gz" ]; then CAT=`which zcat`; else CAT=`which cat`; fi | |
if [ $random == true ]; then randomstring="-`uuidgen | tr -d '-' | cut -c1-10`"; fi | |
suffixplusrandom="${suffix}${randomstring}" | |
$CATFILE | $AWK -v "prefix=$prefix" -v "suffix=$suffixplusrandom" -v "charcount=$charcount" -v "delregex=$delregex" 'BEGIN {FS="\t"; OFS=FS} {snippet = substr($0, 1, charcount); gsub(delregex, "", snippet); outputfilename = prefix snippet suffix; print $0 > outputfilename}' | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment