Created
July 11, 2017 07:57
-
-
Save laughedelic/31758a9e9adc488107bfa4bd662acfc9 to your computer and use it in GitHub Desktop.
A script for merging MG7 output tables using Metaphlan utility
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# project specific constants | |
name="m16s6mangel" | |
# should return sample prefix for a given index | |
function sample_prefix() { | |
echo "MAGEL${1}_S${1}" | |
} | |
# arguments | |
metaphlan=$1 # path to metaphlan | |
input=${2:-"data/in/counts"} # input data dir (optional) | |
output=${3:-"data/out"} # output data dir (optional) | |
metaphlan_merge=$metaphlan/utils/merge_metaphlan_tables.py | |
# preliminary checks | |
if (( $# < 1 )); then | |
echo "Need at least 1 argument: metaphlan path" | |
exit 0 | |
fi | |
if [ ! -e $metaphlan_merge ]; then | |
echo "File [$metaphlan_merge] doesn't exist" | |
exit 0 | |
fi | |
[ -d "$output/tsv" ] || mkdir -p $output/tsv | |
[ -d "$output/merged" ] || mkdir -p $output/merged | |
# download the data | |
# aws s3 cp --recursive s3://era7p/$name/data/out/mg7/count/ $input | |
# iterate over all kinds | |
for x in "bbh" "lca"; do | |
for y in "accumulated" "direct"; do | |
for z in "absolute.counts" "frequency.percentage"; do | |
kind=$x.$y.$z | |
for i in {1..12}; do | |
sample=$(sample_prefix $i) | |
# input file | |
file=$input/$sample/$sample.$kind.csv | |
# output file | |
tsv=$output/tsv/$sample.$kind.tsv | |
rm -rf $tsv | |
# replace spaces with _, merge 1-4 columns, make 5th separate (for TSV) | |
for row in $(sed 's/ /_/g' $file); do | |
echo -e "${row%,*}\t${row##*,}" >> $tsv | |
done | |
done | |
# once all samples of one x.y.z type are ready, run metaphlan | |
$metaphlan_merge $output/tsv/*.$kind.tsv > $output/merged/$name-all.$kind.merged-table.tsv | |
done | |
done | |
done | |
# cleanup | |
rm -rf $output/tsv/ | |
# upload data | |
aws s3 cp --recursive $output/merged/ s3://era7p/$name/data/out/mg7/merged-tables/ --exclude '*' --include '*.tsv' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment