dbro · April 24, 2014 07:38
diff --git a/partition b/partition
 #!/bin/bash
 # write the incoming data on stdin to separate files depending on their contents
 # for example, take a file that has different dates in it:
 # 2014-02-15+12334567   hello there   this is the first line
 # 2014-02-16+23345678   hello there   this is the second line
 # this file can be used to send the first line to a file called /tmp/session.log-20140215-randomnumber
 # and the second line to another file called /tmp/session.log-20140214-randomnumber
 # it takes the first N characters from the line for use in the output filename
 USAGE="usage: $0 -p \"/tmp/session-logs-ready-to-merge-\" [-s \"-ready-for-merge\" -r -c 10 -d'-'] [input_filename] [another_input_filename]
 \tp\tprefix path
 \ts\tsuffix of the path
 \tc\tthe count of characters from the start of each row to use in the filename
 \td\tdelete these characters from the extracted prefix
 \tr\tappend a random string to the end of the file name
 \t\tif input_filename is omitted, read from stdin
 example: $0 -p \"/tmp/session.log-\" -s \"-merge-\" -c 10 -d'-' -r"
 function display_usage() {
    echo -e "$USAGE"
    exit
 }
 #default values
 prefix=""
 suffix=""
 random=false
 charcount=0
 delchars=""
 AWK=$(which mawk) || $(which awk)
 # TODO: check parameters for errors
 while getopts "p:s:c:d:r?h" arg; do
    case $arg in
        h)
            display_usage
            ;;
        \?)
            display_usage
            ;;
        p)
            prefix=$OPTARG
            ;;
        s)
            suffix=$OPTARG
            ;;
        c)
            charcount=$OPTARG
            ;;
        d)
            delchars=$OPTARG
            ;;
        r)
            random=true
            ;;
    esac
 done
 if [ -z "$prefix" ]; then
    echo -e "no prefix specified"
    display_usage
 fi
 randomstring=""
 if [ -n "$delchars" ]; then
    delregex="[${delchars}]"
 else
    delregex=""
 fi
 shift $(($OPTIND - 1))
 if [ "$#" -eq 0 ]; then
    # no input filenames given, so we should read from stdin
    # reset the array of arguments to contain one empty string
    set -- ""
 fi
 for inputfile in "$@"; do
    if [ ${#inputfile} -eq 0 ]; then
        #echo "reading input from stdin"
        CATFILE=`which cat`
    elif [ ${inputfile:(-3)} == ".gz" ]; then
        #echo "reading from input file = $inputfile"
        CATFILE="`which zcat` $inputfile"
    else
        #echo "reading from input file = $inputfile"
        CATFILE="`which cat` $inputfile"
    fi
    #if [ ${inputfile:(-3)} == ".gz" ]; then CAT=`which zcat`; else CAT=`which cat`; fi
    if [ $random == true ]; then randomstring="-`uuidgen | tr -d '-' | cut -c1-10`"; fi
    suffixplusrandom="${suffix}${randomstring}"
    $CATFILE | $AWK -v "prefix=$prefix" -v "suffix=$suffixplusrandom" -v "charcount=$charcount" -v "delregex=$delregex" 'BEGIN {FS="\t"; OFS=FS} {snippet = substr($0, 1, charcount); gsub(delregex, "", snippet); outputfilename = prefix snippet suffix;  print $0 > outputfilename}'
 done
	#!/bin/bash
	# write the incoming data on stdin to separate files depending on their contents
	# for example, take a file that has different dates in it:
	# 2014-02-15+12334567 hello there this is the first line
	# 2014-02-16+23345678 hello there this is the second line
	# this file can be used to send the first line to a file called /tmp/session.log-20140215-randomnumber
	# and the second line to another file called /tmp/session.log-20140214-randomnumber
	# it takes the first N characters from the line for use in the output filename
	USAGE="usage: $0 -p \"/tmp/session-logs-ready-to-merge-\" [-s \"-ready-for-merge\" -r -c 10 -d'-'] [input_filename] [another_input_filename]
	\tp\tprefix path
	\ts\tsuffix of the path
	\tc\tthe count of characters from the start of each row to use in the filename
	\td\tdelete these characters from the extracted prefix
	\tr\tappend a random string to the end of the file name
	\t\tif input_filename is omitted, read from stdin
	example: $0 -p \"/tmp/session.log-\" -s \"-merge-\" -c 10 -d'-' -r"
	function display_usage() {
	echo -e "$USAGE"
	exit
	}
	#default values
	prefix=""
	suffix=""
	random=false
	charcount=0
	delchars=""
	AWK=$(which mawk) \|\| $(which awk)
	# TODO: check parameters for errors
	while getopts "p:s:c:d:r?h" arg; do
	case $arg in
	h)
	display_usage
	;;
	\?)
	display_usage
	;;
	p)
	prefix=$OPTARG
	;;
	s)
	suffix=$OPTARG
	;;
	c)
	charcount=$OPTARG
	;;
	d)
	delchars=$OPTARG
	;;
	r)
	random=true
	;;
	esac
	done
	if [ -z "$prefix" ]; then
	echo -e "no prefix specified"
	display_usage
	fi
	randomstring=""
	if [ -n "$delchars" ]; then
	delregex="[${delchars}]"
	else
	delregex=""
	fi
	shift $(($OPTIND - 1))
	if [ "$#" -eq 0 ]; then
	# no input filenames given, so we should read from stdin
	# reset the array of arguments to contain one empty string
	set -- ""
	fi
	for inputfile in "$@"; do
	if [ ${#inputfile} -eq 0 ]; then
	#echo "reading input from stdin"
	CATFILE=`which cat`
	elif [ ${inputfile:(-3)} == ".gz" ]; then
	#echo "reading from input file = $inputfile"
	CATFILE="`which zcat` $inputfile"
	else
	#echo "reading from input file = $inputfile"
	CATFILE="`which cat` $inputfile"
	fi
	#if [ ${inputfile:(-3)} == ".gz" ]; then CAT=`which zcat`; else CAT=`which cat`; fi
	if [ $random == true ]; then randomstring="-`uuidgen \| tr -d '-' \| cut -c1-10`"; fi
	suffixplusrandom="${suffix}${randomstring}"
	$CATFILE \| $AWK -v "prefix=$prefix" -v "suffix=$suffixplusrandom" -v "charcount=$charcount" -v "delregex=$delregex" 'BEGIN {FS="\t"; OFS=FS} {snippet = substr($0, 1, charcount); gsub(delregex, "", snippet); outputfilename = prefix snippet suffix; print $0 > outputfilename}'
	done