Created
March 1, 2013 14:32
-
-
Save avrilcoghlan/5065021 to your computer and use it in GitHub Desktop.
Perl script that retrieves a list of all the fully sequenced species that are in the TreeFam database, and stores them in a Perl pickle
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/perl | |
# | |
# Perl script store_treefam_species.pl | |
# Written by Avril Coghlan ([email protected]) | |
# 3-Apr-09. | |
# | |
# For the TreeFam project. | |
# | |
# This perl script connects to the TreeFam database and stores | |
# a list of fully sequenced species in a pickle. | |
# | |
# The command-line format is: | |
# % perl <store_treefam_species.pl> version | |
# where version is the version of the TreeFam database to use. | |
# | |
#------------------------------------------------------------------# | |
# CHECK IF THERE ARE THE CORRECT NUMBER OF COMMAND-LINE ARGUMENTS: | |
$num_args = $#ARGV + 1; | |
if ($num_args != 1) | |
{ | |
print "Usage of store_treefam_species.pl\n\n"; | |
print "perl store_treefam_species.pl <version>\n"; | |
print "where <version> is the version of the TreeFam database to use.\n"; | |
print "For example, >perl -w store_treefam_species.pl 7\n"; | |
exit; | |
} | |
# FIND THE RELEASE OF TREEFAM TO USE: | |
$version = $ARGV[0]; | |
# READ IN MY PERL MODULES: | |
use Avril_modules; | |
use Treefam::DBConnection; | |
use DBI; | |
use Storable; | |
$VERBOSE = 0; | |
#------------------------------------------------------------------# | |
# GET A LIST OF ALL FULLY SEQUENCED TREEFAM SPECIES: | |
$database = 'treefam_'.$version; | |
%SEQUENCED = (); | |
%TAXID = (); | |
$num = 0; | |
$dbh = DBI->connect("dbi:mysql:$database:db.treefam.org:3308", 'anonymous', '') || return; | |
$table_w = 'species'; | |
$st = "SELECT TAXNAME, SWCODE, TAX_ID from $table_w WHERE FLAG=\"1\""; | |
$sth = $dbh->prepare($st) or die "Cannot prepare $st: $dbh->errstr\n"; | |
$rv = $sth->execute or die "Cannot execute the query: $sth->errstr"; | |
if ($rv >= 1) | |
{ | |
while ((@array) = $sth->fetchrow_array) | |
{ | |
$num++; | |
$TAXNAME = $array[0]; | |
$swcode = $array[1]; # eg. CAEEL | |
$taxid = $array[2]; | |
if ($VERBOSE == 1) { print "$num: species $TAXNAME ($swcode), taxid $taxid\n";} | |
$SEQUENCED{$swcode} = 1; | |
$TAXID{$swcode} = $taxid; | |
} | |
} | |
$rc = $dbh->disconnect(); | |
$rc = ""; | |
print STDERR "Read in fully sequenced species...\n"; | |
# STORE THE HASH TABLES %SEQUENCED AND %TAXID IN A PICKLE: | |
$output1 = "treefam.".$version."_sequenced"; | |
store \%SEQUENCED,$output1; | |
$output2 = "treefam.".$version."_taxid"; | |
store \%TAXID,$output2; | |
#------------------------------------------------------------------# | |
print STDERR "FINISHED.\n"; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment