Created
September 20, 2017 15:21
-
-
Save wbazant/7abfe8f1f01904e8a0d8e284bfe4f91d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
analyse(){ | |
geneSetFilePath=$1; | |
outputFile=$2; | |
analyticsFile=$ATLAS_EXPS/E-GEOD-59831/E-GEOD-59831-analytics.tsv | |
irap_GSE_piano --tsv=$analyticsFile --pvalue-col=3 --foldchange-col=4 --title="title" --pvalue=0.05 --gs_fdr=0.1 --method=fisher-exact --dup-use-best --plot-annot-only --top=10 --minsize 5 --maxsize 100 --descr $ATLAS_PROD/bioentity_properties/go/goIDToTerm.tsv.decorate.aux --go=$geneSetFilePath --out=$outputFile; | |
} | |
# a "normal" analysis | |
analyse /nfs/production3/ma/home/atlas3-production/bioentity_properties/archive/ensembl_88_35/mus_musculus.ensgene.go.tsv /var/tmp/out1 | |
#There are some IDs with no go terms: | |
comm -23 <( cut -f 1 /nfs/production3/ma/home/atlas3-production/bioentity_properties/archive/ensembl_88_35/mus_musculus.ensgene.go.tsv | sort -u ) <(cut -f1 $ATLAS_EXPS/E-GEOD-59831/E-GEOD-59831-analytics.tsv | sort -u ) | wc -l | |
# 289 | |
#Make a version of the annotations file with these terms added to the end | |
cat /nfs/production3/ma/home/atlas3-production/bioentity_properties/archive/ensembl_88_35/mus_musculus.ensgene.go.tsv <( comm -23 <( cut -f 1 /nfs/production3/ma/home/atlas3-production/bioentity_properties/archive/ensembl_88_35/mus_musculus.ensgene.go.tsv | sort -u ) <(cut -f1 $ATLAS_EXPS/E-GEOD-59831/E-GEOD-59831-analytics.tsv | sort -u ) | sed 's/$/\t/' ) > /var/tmp/mus_musculus.ensgene.go.tsv-ids-at-the-end | |
analyse /var/tmp/mus_musculus.ensgene.go.tsv-ids-at-the-end /var/tmp/out2 | |
# No results as predicted! | |
diff /var/tmp/out1.tsv /var/tmp/out2.tsv | wc -l | |
# 0 | |
#filter out all the gene id -> empty | |
grep -ve "[[:space:]]$" /nfs/production3/ma/home/atlas3-production/bioentity_properties/archive/ensembl_88_35/mus_musculus.ensgene.go.tsv > /var/tmp/mus_musculus.ensgene.go.tsv-no-empty-goterms | |
analyse /var/tmp/mus_musculus.ensgene.go.tsv-no-empty-goterms /var/tmp/out3 | |
# Lots of results! | |
diff /var/tmp/out1.tsv /var/tmp/out3.tsv | wc -l | |
# 9724 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
A lot of the change between ensembl 88 and 89 can be accounted as "gene id to empty" :