Skip to content

Instantly share code, notes, and snippets.

@wbazant
Created October 3, 2017 12:38
Show Gist options
  • Save wbazant/a52fcd9c7f5bcc9ffd7eae833b2c37fc to your computer and use it in GitHub Desktop.
Save wbazant/a52fcd9c7f5bcc9ffd7eae833b2c37fc to your computer and use it in GitHub Desktop.
#configuration.xml has 16 assays, and it's from 2016
[fg_atlas@ebi-cli-001 analysis_archive]$ ls -latorh /nfs/production3/ma/home/atlas3-production/analysis/baseline/rna-seq/experiments/E-GEOD-42871/E-GEOD-42871-configuration.xml
-rw-r--r-- 1 fg_atlas 1.8K Feb 8 2016 /nfs/production3/ma/home/atlas3-production/analysis/baseline/rna-seq/experiments/E-GEOD-42871/E-GEOD-42871-configuration.xml
[fg_atlas@ebi-cli-001 analysis_archive]$ grep '<assay>' /nfs/production3/ma/home/atlas3-production/analysis/baseline/rna-seq/experiments/E-GEOD-42871/E-GEOD-42871-configuration.xml | wc -l
16
#data files in the folder match the configuration.xml: 17 columns, one is gene name
find /nfs/production3/ma/home/atlas3-production/analysis/baseline/rna-seq/experiments/E-GEOD-42871 -type f -name *tsv* | while read -r file ; do ls -latorh $file; head -n1 $file | tr $'\t' $'\n' | wc -l ; done
-rw-r--r-- 1 fg_atlas 2.7K Feb 2 2016 /nfs/production3/ma/home/atlas3-production/analysis/baseline/rna-seq/experiments/E-GEOD-42871/qc/E-GEOD-42871-findCRAMFiles-report.tsv
5
-rw-r--r-- 1 fg_atlas 1.1K Feb 2 2016 /nfs/production3/ma/home/atlas3-production/analysis/baseline/rna-seq/experiments/E-GEOD-42871/E-GEOD-42871-analysis-methods.tsv
2
-rw-r--r-- 1 fg_atlas 3.4M Feb 2 2016 /nfs/production3/ma/home/atlas3-production/analysis/baseline/rna-seq/experiments/E-GEOD-42871/E-GEOD-42871-raw-counts.tsv.undecorated
17
-rw-r--r-- 1 fg_atlas 4.5M May 19 12:58 /nfs/production3/ma/home/atlas3-production/analysis/baseline/rna-seq/experiments/E-GEOD-42871/E-GEOD-42871-tpms.tsv.undecorated
17
-rw-r--r-- 1 fg_atlas 6.9M May 19 12:59 /nfs/production3/ma/home/atlas3-production/analysis/baseline/rna-seq/experiments/E-GEOD-42871/E-GEOD-42871-tpms.tsv.undecorated.aggregated
10
-rw-r--r-- 1 fg_atlas 6.8M Sep 27 04:12 /nfs/production3/ma/home/atlas3-production/analysis/baseline/rna-seq/experiments/E-GEOD-42871/E-GEOD-42871-fpkms.tsv
11
-rw-r--r-- 1 fg_atlas 62M Jul 11 12:06 /nfs/production3/ma/home/atlas3-production/analysis/baseline/rna-seq/experiments/E-GEOD-42871/E-GEOD-42871-coexpressions.tsv.gz
1
-rw-r--r-- 1 fg_atlas 6.8M Feb 2 2016 /nfs/production3/ma/home/atlas3-production/analysis/baseline/rna-seq/experiments/E-GEOD-42871/E-GEOD-42871-fpkms.tsv.undecorated.aggregated
10
-rw-r--r-- 1 fg_atlas 6.9M Sep 27 04:14 /nfs/production3/ma/home/atlas3-production/analysis/baseline/rna-seq/experiments/E-GEOD-42871/E-GEOD-42871-tpms.tsv
11
# ISL has files that are too short - not enough columns, only 14.
Missing these runs:
SRR639163
SRR639165
find /nfs/production3/ma/home/irap_prod/single_lib/studies/E-GEOD-42871/glycine_max -type f -name '*.tsv' | while read -r file ; do ls -latorh $file; head -n1 $file | tr $'\t' $'\n' | wc -l ; done
-rw-r--r-- 1 fg_atlas 26M Jun 30 2016 /nfs/production3/ma/home/irap_prod/single_lib/studies/E-GEOD-42871/glycine_max/exons.tpm.dexseq.tsv
17
-rw-r--r-- 1 fg_atlas 4.9M Sep 4 15:14 /nfs/production3/ma/home/irap_prod/single_lib/studies/E-GEOD-42871/glycine_max/transcripts.fpkm.kallisto.tsv
14
-rw-r--r-- 1 fg_atlas 3.7M Sep 4 15:14 /nfs/production3/ma/home/irap_prod/single_lib/studies/E-GEOD-42871/glycine_max/genes.fpkm.htseq2.tsv
14
-rw-r--r-- 1 fg_atlas 27M Jun 30 2016 /nfs/production3/ma/home/irap_prod/single_lib/studies/E-GEOD-42871/glycine_max/exons.fpkm.dexseq.tsv
17
-rw-r--r-- 1 fg_atlas 1.2K Sep 4 15:12 /nfs/production3/ma/home/irap_prod/single_lib/studies/E-GEOD-42871/glycine_max/irap.versions.tsv
4
-rw-r--r-- 1 fg_atlas 19M Jun 30 2016 /nfs/production3/ma/home/irap_prod/single_lib/studies/E-GEOD-42871/glycine_max/exons.raw.dexseq.tsv
17
-rw-r--r-- 1 fg_atlas 3.7M Sep 4 15:14 /nfs/production3/ma/home/irap_prod/single_lib/studies/E-GEOD-42871/glycine_max/genes.tpm.htseq2.tsv
14
-rw-r--r-- 1 fg_atlas 2.9M Sep 4 15:14 /nfs/production3/ma/home/irap_prod/single_lib/studies/E-GEOD-42871/glycine_max/genes.raw.htseq2.tsv
14
-rw-r--r-- 1 fg_atlas 5.0M Sep 4 15:14 /nfs/production3/ma/home/irap_prod/single_lib/studies/E-GEOD-42871/glycine_max/transcripts.tpm.kallisto.tsv
14
-rw-r--r-- 1 fg_atlas 3.7M Sep 4 15:14 /nfs/production3/ma/home/irap_prod/single_lib/studies/E-GEOD-42871/glycine_max/genes.tpm.kallisto.tsv
14
-rw-r--r-- 1 fg_atlas 3.7M Sep 4 15:14 /nfs/production3/ma/home/irap_prod/single_lib/studies/E-GEOD-42871/glycine_max/genes.fpkm.kallisto.tsv
14
-rw-r--r-- 1 fg_atlas 3.6M Sep 4 15:14 /nfs/production3/ma/home/irap_prod/single_lib/studies/E-GEOD-42871/glycine_max/transcripts.riu.kallisto.tsv
14
-rw-r--r-- 1 fg_atlas 6.0M Sep 4 15:14 /nfs/production3/ma/home/irap_prod/single_lib/studies/E-GEOD-42871/glycine_max/transcripts.raw.kallisto.tsv
14
-rw-r--r-- 1 fg_atlas 4.4M Sep 4 15:14 /nfs/production3/ma/home/irap_prod/single_lib/studies/E-GEOD-42871/glycine_max/genes.raw.kallisto.tsv
14
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment