Created
October 26, 2012 21:25
-
-
Save ypandit/3961651 to your computer and use it in GitHub Desktop.
Script to check if GO ID exists in association with Gene ID
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
database: | |
dsn: "dbi:Oracle:" | |
user: '' | |
passwd: '' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package GAFLoader; | |
use strict; | |
use warnings; | |
use Bio::Chado::Schema; | |
use IO::String; | |
use LWP::UserAgent; | |
use Moose; | |
use MooseX::Attribute::Dependent; | |
use YAML::Tiny; | |
with 'MooseX::Getopt'; | |
has '_ebi_base_url' => ( | |
is => 'ro', | |
isa => 'Str', | |
default => 'http://www.ebi.ac.uk/QuickGO/GAnnotation?format=gaf&protein=' | |
); | |
has '_ua' => ( | |
is => 'ro', | |
isa => 'LWP::UserAgent', | |
default => sub { LWP::UserAgent->new }, | |
lazy => 1 | |
); | |
has 'schema' => ( | |
is => 'rw', | |
isa => 'Bio::Chado::Schema', | |
dependency => All [ '_dsn', '_user', '_passwd' ] | |
); | |
has 'config_file' => ( | |
is => 'rw', | |
isa => 'Str', | |
trigger => sub { | |
my ($self) = @_; | |
my $conf = YAML::Tiny->read( $self->config_file ); | |
$self->_dsn( $conf->[0]->{database}->{dsn} ); | |
$self->_user( $conf->[0]->{database}->{user} ); | |
$self->_passwd( $conf->[0]->{database}->{passwd} ); | |
$self->schema( | |
Bio::Chado::Schema->connect( | |
$self->_dsn, $self->_user, | |
$self->_passwd, { LongReadLen => 2**25 } | |
) | |
); | |
}, | |
required => 1 | |
); | |
has [qw/_dsn _user _passwd/] => ( | |
is => 'rw', | |
isa => 'Str', | |
default => "", | |
lazy => 1, | |
dependency => All ['config_file'] | |
); | |
sub parse_go_from_gaf { | |
my ( $self, $gaf ) = @_; | |
my @go_new; | |
my $io = IO::String->new(); | |
$io->open($gaf); | |
while ( my $line = $io->getline ) { | |
chomp($line); | |
next if $line =~ /^!/; | |
my @row_vals = split( "\t", $line ); | |
print $row_vals[4] . "\n"; | |
push( @go_new, $row_vals[4] ); | |
} | |
return @go_new; | |
} | |
sub get_go_for_gene { | |
my ( $self, $gene_id ) = @_; | |
my $go_rs = $self->schema->resultset('Sequence::Feature')->search( | |
); | |
} | |
sub get_gaf_from_ebi { | |
my ( $self, $gene_id ) = @_; | |
my $response | |
= $self->_ua->get( $self->_ebi_base_url . $gene_id )->decoded_content; | |
return $response; | |
} | |
sub compare_go { | |
} | |
sub run { | |
my ($self) = @_; | |
my $gene_rs = $self->schema->resultset('Sequence::Feature')->search( | |
{ 'type.name' => 'gene', | |
'organism.common_name' => 'dicty' | |
}, | |
{ join => [qw/type organism/], | |
select => [qw/feature_id uniquename/], | |
prefetch => 'dbxref' | |
} | |
); | |
while ( my $gene = $gene_rs->next ) { | |
my $gaf = $self->get_gaf_from_ebi( $gene->dbxref->accession ); # Get GAF from EBI for given Gene ID | |
my @go_new = $self->parse_go_from_gaf($gaf); # Get GO IDs in an array for a given GAF of a Gene ID | |
my @go_exist = $self->get_go_for_gene( $gene->dbxref->accession ); # Query using BCS to check what GO IDs are associated with given Gene ID | |
$self->compare_go( @go_new, @go_exist ); | |
} | |
} | |
1; | |
package main; | |
GAFLoader->new_with_options->run; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT GENE_ID.accession gene_id, GENE.uniquename gene, 'GO' || ':' || GO_ID.accession go_id | |
FROM feature_cvterm fcvt | |
JOIN feature GENE ON GENE.feature_id = fcvt.feature_id | |
JOIN dbxref GENE_ID ON GENE_ID.dbxref_id = GENE.dbxref_id | |
JOIN cvterm type ON type.cvterm_id = GENE.type_id | |
JOIN cvterm GO ON GO.cvterm_id = fcvt.cvterm_id | |
JOIN cv ON cv.cv_id = GO.cv_id | |
JOIN dbxref GO_ID ON GO_ID.dbxref_id = GO.dbxref_id | |
JOIN db ON db.db_id = GO_ID.db_id | |
JOIN organism ON organism.organism_id = GENE.organism_id | |
WHERE cv.name IN('molecular_function', 'biological_process', 'cellular_component') | |
AND organism.common_name = 'dicty' | |
AND db.name = 'GO' | |
AND GO.is_obsolete = 0 | |
AND GENE.uniquename = 'mhcA' | |
/* GENE_ID.accession = '' */ | |
ORDER BY GO_ID.accession, GENE_ID.accession ASC | |
; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
To run the script, fill up the required valies in
config.yaml
; then