Created
October 15, 2012 19:04
-
-
Save sashaphanes/3894433 to your computer and use it in GitHub Desktop.
Streamline getting expression data for a particular array of genes in a particular brain tissues
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #This is a shellscript intended to streamline the process of checking an initial array of genes against | |
| #1) a file storing relevant column numbers for a particular neural tissue, | |
| #2) a file storing expression data regarding that tissue and those genes | |
| #!/usr/bin/perl | |
| use strict; | |
| #use warnings; | |
| require "getopts.pl"; | |
| use vars qw ($opt_f $opt_t $opt_s $opt_h $opt_H); | |
| &Getopts('f:t:s:hH:'); | |
| my $executable = "gen.CSV.perTissue.pl"; | |
| $| = 1; | |
| my $usage = qq{ | |
| ################################################################### | |
| usage: $executable -f fileName | |
| used for transposing a table (rows to columns and columns to rows) | |
| # -f the file name of the array of genes of interest; will use stdin if specified as "stdin". | |
| # | |
| # -t the tissue of interest, i.e. "hippocampus" or "dorsal prefrontal cortex" | |
| # | |
| # -s column separator | |
| # [default: tab] | |
| # C: comma | |
| # S: space | |
| # | |
| # -hH print this | |
| example: >$executable -f testInputFile -s S > output | |
| #################################################################### | |
| \n}; | |
| my ($genesOfInterest, $tissueOfInterest, $expressionData); | |
| # options: | |
| if (length($opt_h) > 0 || length($opt_H) > 0 ) {die($usage);} | |
| if (-s $opt_f) {$genesOfInterest = $opt_f; } | |
| elsif ($opt_f =~ /stdin/i) {$genesOfInterest = "stdin"; } | |
| else {die("Error: must have a input file for -f $opt_f !!!\n$usage"); } | |
| if (-s $opt_t) {$tissueOfInterest = $opt_t} | |
| if ($tissueOfInterest =~ /" "/) {$tissueofInterest =~ s/" "/_/} | |
| #if ($opt_s =~ /^C/i ) {$separator = "comma"; }elsif ($opt_s =~ /^S/i) {$separator = "space"; } else {$separator = "tab"; } | |
| ######This command merges two tables together on the basis of common genes found in one table, then stores these in a new file | |
| rosseraa@localhost:~/work.dir>/mnt/xgen.programs/qyScripts/merge.basedOn.column.pl -f BrainSPAN/RNAseq.genes.RPKM/rows_metadata.csv.gz -l 4 -F 0 -L '"' -q arrayTable.txt -m 1 -Q 0 -s C -t T > "$genesOfInterest.arrayTable.rowNum" | |
| ######These commands added 1 to all the values returned for columns that contained the string "hippocampus" | |
| rosseraa@localhost:~/work.dir>z BrainSPAN/RNAseq.genes.RPKM/columns_metadata.csv.gz | |
| | grep -i "$tissueOfInterest" |cut -d"," -f1 | |
| | ../Documents/practice/transposer.pl -f stdin | |
| | perl -ne '{chomp; @a =split (/\t/, $_); foreach $a (@a) {$b = $a + 1; print "$b,"; } print "\n" if (eof); }' | |
| rosseraa@localhost:~/work.dir>z BrainSPAN/RNAseq.genes.RPKM/expression_matrix.csv.gz | |
| |cut -d"," -f"$genesOfInterest.arrayTable.rowNum" > "$expressionData.data.txt" | |
| ########Last step: This command uses the merge script to concatenate table data and store that output in a new file | |
| rosseraa@localhost:~/work.dir>/mnt/xgen.programs/qyScripts/merge.basedOn.column.pl -f "$expressionData" -l 1 -F 0 -q "$genesOfInterest.arrayTable.rowNum" -m 1 -Q 0 -s C -t C |cut -f1 > "$tissueOfInterest.data.4arrayGenes.csv" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment