Created
January 22, 2014 09:37
-
-
Save dbolser-ebi/8555984 to your computer and use it in GitHub Desktop.
Script for mapping features via the Ensembl API. Actually performing the database update via this script *is* possible, but it's painfully slow. Instead I dump the results and do the update in MySQL. To adapt this script to features not currently in the database, first build a (minimal) new feature from the input file and 'transform' that.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
use strict; | |
use warnings; | |
use Data::Dumper; | |
use Bio::EnsEMBL::Registry; | |
my $verbose = 0; | |
my $feature = "VariationFeature"; | |
my $type = "variation"; | |
my $species = "zea_mays"; | |
warn "Loading the registry\n"; | |
Bio::EnsEMBL::Registry-> | |
load_registry_from_db( | |
-host => 'mysql-eg-staging-2.ebi.ac.uk', | |
-port => '4275', | |
-user => 'ensro', | |
#-db_version => 72, | |
#-verbose => 1, | |
) | |
or die "FAIL!!\n"; | |
warn "Getting a datbase adaptor\n"; | |
my $db_adaptor = Bio::EnsEMBL::Registry-> | |
get_DBAdaptor($species, $type) or die; | |
warn "Getting a feature adaptor ($feature)\n"; | |
my $f_adaptor = $db_adaptor-> | |
get_adaptor($feature) or die; | |
warn "Counting features ($feature)\n"; | |
warn "There are ", $f_adaptor->generic_count, " ", $feature, "s\n"; | |
warn "Getting a feature itterator ($feature)\n"; | |
my $f_iterator = $f_adaptor-> | |
fetch_Iterator or die; | |
## Do it... | |
warn "Itterating over features ($feature)\n"; | |
while(my $f = $f_iterator->next){ | |
## debugging | |
#next unless $f->name eq "..."; | |
warn "Working on ", $f->name, "\n" | |
if $verbose; | |
if($verbose){ | |
warn | |
join("\t", | |
$f->dbID, | |
$f->coord_system_name, | |
$f->seq_region_name, | |
$f->slice->get_seq_region_id, | |
$f->start, | |
$f->end, | |
$f->strand, | |
), "\n"; | |
} | |
warn "Projecting to alternative assembly\n" | |
if $verbose; | |
my $fp = $f->transform('chromosome', 'v3'); | |
unless(defined($fp)){ | |
## Typically due to a variation not lying in a mapped region | |
warn "FAILED TO PROJECT ", $f->dbID, "!\n"; | |
next; | |
} | |
if($verbose){ | |
warn | |
join("\t", | |
$fp->dbID, | |
$fp->coord_system_name, | |
$fp->seq_region_name, | |
$fp->slice->get_seq_region_id, | |
$fp->start, | |
$fp->end, | |
$fp->strand, | |
), "\n"; | |
} | |
join("\t", | |
$f->dbID, | |
$f->name, | |
## We don't strictly need the old coords, but they are | |
## good for checking the result. | |
$f->slice->get_seq_region_id, | |
$f->start, | |
$f->end, | |
$f->strand, | |
## The new coords | |
$fp->slice->get_seq_region_id, | |
$fp->start, | |
$fp->end, | |
$fp->strand, | |
), "\n"; | |
#exit; | |
} | |
warn "OK\n"; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment