Created
September 19, 2017 10:59
-
-
Save wbazant/d1e194e7651015f4333d5e48d2696412 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Annotations changed a lot - mostly lots of new mappings gene -> term: | |
[fg_atlas@ebi-cli-002 ~]$ comm -12 <( sort $ATLAS_PROD/bioentity_properties/archive/ensembl_88_35/mus_musculus.ensgene.go.tsv ) <( sort $ATLAS_PROD/bioentity_properties/archive/ensembl_89_36/mus_musculus.ensgene.go.tsv) | wc -l | |
304880 | |
[fg_atlas@ebi-cli-002 ~]$ comm -13 <( sort $ATLAS_PROD/bioentity_properties/archive/ensembl_88_35/mus_musculus.ensgene.go.tsv ) <( sort $ATLAS_PROD/bioentity_properties/archive/ensembl_89_36/mus_musculus.ensgene.go.tsv) | wc -l | |
41488 | |
[fg_atlas@ebi-cli-002 ~]$ comm -23 <( sort $ATLAS_PROD/bioentity_properties/archive/ensembl_88_35/mus_musculus.ensgene.go.tsv ) <( sort $ATLAS_PROD/bioentity_properties/archive/ensembl_89_36/mus_musculus.ensgene.go.tsv) | wc -l | |
3566 | |
# But it's not because there are new GO terms: | |
[fg_atlas@ebi-cli-002 ~]$ comm -12 <( cut -f2 $ATLAS_PROD/bioentity_properties/archive/ensembl_88_35/mus_musculus.ensgene.go.tsv | sort -u ) <( cut -f2 $ATLAS_PROD/bioentity_properties/archive/ensembl_89_36/mus_musculus.ensgene.go.tsv | sort -u ) | wc -l | |
16734 | |
[fg_atlas@ebi-cli-002 ~]$ comm -13 <( cut -f2 $ATLAS_PROD/bioentity_properties/archive/ensembl_88_35/mus_musculus.ensgene.go.tsv | sort -u ) <( cut -f2 $ATLAS_PROD/bioentity_properties/archive/ensembl_89_36/mus_musculus.ensgene.go.tsv | sort -u ) | wc -l | |
257 | |
[fg_atlas@ebi-cli-002 ~]$ comm -23 <( cut -f2 $ATLAS_PROD/bioentity_properties/archive/ensembl_88_35/mus_musculus.ensgene.go.tsv | sort -u ) <( cut -f2 $ATLAS_PROD/bioentity_properties/archive/ensembl_89_36/mus_musculus.ensgene.go.tsv | sort -u ) | wc -l | |
45 | |
# Also when we look at top 500 genes in ensembl_88_35, where they map doesn't change that much: | |
head -n 500 /nfs/production3/ma/home/atlas3-production/go-unstable/ensembl_88_35/E-GEOD-59831/E-GEOD-59831.g2_g1.go.gsea.tsv.by_effect_size | cut -f 2 > /var/tmp/top_500_go_terms_ensembl_88_35_E-GEOD-59831.g2_g1.go.gsea.tsv | |
[fg_atlas@ebi-cli-002 ~]$ comm -12 <( join -1 2 -2 1 <(sort -k2 $ATLAS_PROD/bioentity_properties/archive/ensembl_88_35/mus_musculus.ensgene.go.tsv ) <(sort /var/tmp/top_500_go_terms_ensembl_88_35_E-GEOD-59831.g2_g1.go.gsea.tsv)) <( join -1 2 -2 1 <(sort -k2 $ATLAS_PROD/bioentity_properties/archive/ensembl_89_36/mus_musculus.ensgene.go.tsv ) <(sort /var/tmp/top_500_go_terms_ensembl_88_35_E-GEOD-59831.g2_g1.go.gsea.tsv)) | wc -l | |
6632 | |
[fg_atlas@ebi-cli-002 ~]$ comm -13 <( join -1 2 -2 1 <(sort -k2 $ATLAS_PROD/bioentity_properties/archive/ensembl_88_35/mus_musculus.ensgene.go.tsv ) <(sort /var/tmp/top_500_go_terms_ensembl_88_35_E-GEOD-59831.g2_g1.go.gsea.tsv)) <( join -1 2 -2 1 <(sort -k2 $ATLAS_PROD/bioentity_properties/archive/ensembl_89_36/mus_musculus.ensgene.go.tsv ) <(sort /var/tmp/top_500_go_terms_ensembl_88_35_E-GEOD-59831.g2_g1.go.gsea.tsv)) | wc -l | |
110 | |
[fg_atlas@ebi-cli-002 ~]$ comm -23 <( join -1 2 -2 1 <(sort -k2 $ATLAS_PROD/bioentity_properties/archive/ensembl_88_35/mus_musculus.ensgene.go.tsv ) <(sort /var/tmp/top_500_go_terms_ensembl_88_35_E-GEOD-59831.g2_g1.go.gsea.tsv)) <( join -1 2 -2 1 <(sort -k2 $ATLAS_PROD/bioentity_properties/archive/ensembl_89_36/mus_musculus.ensgene.go.tsv ) <(sort /var/tmp/top_500_go_terms_ensembl_88_35_E-GEOD-59831.g2_g1.go.gsea.tsv)) | wc -l | |
26 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment