Skip to content

Instantly share code, notes, and snippets.

@andrewyatz
Created April 5, 2011 09:22
Show Gist options
  • Save andrewyatz/903310 to your computer and use it in GitHub Desktop.
Save andrewyatz/903310 to your computer and use it in GitHub Desktop.
An example script for downloading & storing introns from an Ensembl database (Dictyostelium discoideum)
use strict;
use warnings;
use Bio::EnsEMBL::Registry;
use Bio::EnsEMBL::DBSQL::DBConnection;
use Bio::EnsEMBL::Utils::SqlHelper;
#Load all DBs from Ensembl Genomes
Bio::EnsEMBL::Registry->load_registry_from_db(
-HOST => 'mysql.ebi.ac.uk',
-PORT => 4157,
-USER => 'anonymous'
);
my $dbc = Bio::EnsEMBL::DBSQL::DBConnection->new(
-HOST => '127.0.0.1',
-PORT => 3306,
-USER => 'USER',
-PASS => 'PASS',
-DBNAME => 'DBNAME'
);
#Holds a soft reference to the DBConnection so we cannot combine the two news
my $h = Bio::EnsEMBL::Utils::SqlHelper->new(-DB_CONNECTION => $dbc);
#Cleanup schema using do()
$dbc->do('DROP TABLE IF EXISTS introns');
$dbc->do(<<'SQL');
CREATE TABLE introns (
idgld int(10) NOT NULL auto_increment,
stbl_id varchar(12),
seq_start varchar(12),
seq_stop varchar(12),
sequence text,
PRIMARY KEY(idgld)
)
SQL
#File management
my $file = '/home/glduncan/data/introns.txt';
if(-f $file) {
unlink $file or die "Cannot remove file '${file}': $!";
}
# open my $out, '>', $file or die "Cannot open ${file} for writing: $!";
my $ids_file = '/home/glduncan/data/id.txt';
open my $fh, '<', $ids_file or die "Failed. Cannot open '${ids_file}' for reading: $!";
my $i = 1;
#Get the adaptors once
my $dba = Bio::EnsEMBL::Registry->get_DBAdaptor('Dictyostelium discoideum', 'core');
my $gene_adaptor = $dba->get_GeneAdaptor();
my @data;
while(my $stable_id = <$fh>){
chomp $stable_id;
my $gene = $gene_adaptor->fetch_by_stable_id($stable_id);
foreach my $transcript (@{$gene->get_all_Transcripts()}) {
# print $out $transcript->stable_id(), "\n";
foreach my $intron (@{$transcript->get_all_Introns}) {
#print $out $intron->seq_region_start.' - '.$intron->seq_region_end.' - '.$intron->seq. "\n";
#Store into a temporary array
push(@data, [
$transcript->stable_id(),
$intron->seq_region_start(),
$intron->seq_region_end(),
$intron->seq()
]);
}
}
}
close($fh) or die "Cannot close input file handle: $!";
#Use the batch command from the helper to insert the data using only one prepared statement
my $dml = <<'SQL';
INSERT INTO introns (stbl_id, seq_start, seq_stop, sequence)
VALUES (?,?,?,?)
SQL
my $affected_rows = $h->batch(-DATA => \@data, -SQL => $dml);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment