Last active
December 16, 2015 08:58
-
-
Save infotroph/5409286 to your computer and use it in GitHub Desktop.
Sed example for concatenating multiple space-delimited files, with varying kinds of messy header line, into single CSV files.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
trtary=(a b c) | |
echo -e '1\tfoo.out\n0\tunwanted.out\n1\tbar.out\n1\tbaz.out' > tocsv.files | |
for t in ${trtary[*]}; do | |
# generate sample data. Each treatment overwrites previous .out files. | |
echo -e 'col1 col2 col3\n1 2 3\n4 5 6\n7 8 9' > foo.out | |
echo -e 'extrajunk\ncol1 col2 col3 col4\n1 2 3 4\n5 6 7 8\n9 10 11 12' > bar.out | |
echo -e '1 2 3 4 5\n6 7 8 9 10\n11 12 13 14 15' > baz.out | |
if test $t = 'a'; then # first treatment, set up header | |
headermunge='1 {h; d;} | |
2 {H; x; | |
s/^'$t',extrajunk.*\n// | |
/^'$t',col1,col2/!i\ | |
INSERTEDtreatment,col1,col2,col3,col4 | |
s/^'$t',col1/treatment,col1/1 | |
p; d;}' | |
else | |
headermunge='1 {h; d;} | |
2 {H; x; | |
/^'$t',extrajunk/d | |
s/^'$t',col1.*\n//; }' | |
fi | |
while read -a fline; do # each line of outfiles.in: [0|1]\tfilename | |
if ((${fline[0]}!=1)); then continue; fi # file doesn't exist | |
sed -E -e 's/^ */'$t',/;' \ | |
-e 's/ +/,/g;' \ | |
-e "$headermunge" \ | |
${fline[1]} >> ${fline[1]%%.*}.csv | |
done < tocsv.files | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment