Created
March 1, 2013 14:35
-
-
Save avrilcoghlan/5065041 to your computer and use it in GitHub Desktop.
Perl script that retrieves all trees from the TreeFam database, and stores them in a Perl pickle
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/perl | |
# | |
# Perl script store_treefam_trees.pl | |
# Written by Avril Coghlan ([email protected]) | |
# 3-Apr-09. | |
# | |
# For the TreeFam project. | |
# | |
# This perl script connects to the TreeFam database and stores | |
# the trees in a pickle. | |
# | |
# The command-line format is: | |
# % perl <store_treefam_trees.pl> version | |
# where version is the version of the TreeFam database to use. | |
# | |
#------------------------------------------------------------------# | |
# CHECK IF THERE ARE THE CORRECT NUMBER OF COMMAND-LINE ARGUMENTS: | |
$num_args = $#ARGV + 1; | |
if ($num_args != 1) | |
{ | |
print "Usage of store_treefam_trees.pl\n\n"; | |
print "perl store_treefam_trees.pl <version>\n"; | |
print "where <version> is the version of the TreeFam database to use.\n"; | |
print "For example, >perl -w store_treefam_trees.pl 7\n"; | |
exit; | |
} | |
# FIND THE RELEASE OF TREEFAM TO USE: | |
$version = $ARGV[0]; | |
# READ IN MY PERL MODULES: | |
use Avril_modules; | |
use Treefam::DBConnection; | |
use DBI; | |
use Storable; | |
$VERBOSE = 0; | |
#------------------------------------------------------------------# | |
# GET A LIST OF ALL FAMILIES: | |
$cnt = 0; | |
$database = "dbi:mysql:treefam_".$version.":db.treefam.org:3308"; | |
$dbh = DBI->connect("$database", 'anonymous', '') || return; | |
$table_w = 'familyA'; | |
$st = "SELECT AC from $table_w"; | |
$sth = $dbh->prepare($st) or die "Cannot prepare $st: $dbh->errstr\n"; | |
$rv = $sth->execute or die "Cannot execute the query: $sth->errstr"; | |
if ($rv >= 1) | |
{ | |
while ((@array) = $sth->fetchrow_array) | |
{ | |
$cnt++; | |
$AC = $array[0]; | |
@families = (@families,$AC); | |
print "$cnt: Got $AC\n"; | |
} | |
} | |
print "Read in list of TreeFam-A families...\n"; | |
# GET A LIST OF ALL TREEFAM-B FAMILIES: | |
$dbh = DBI->connect("$database", 'anonymous', '') || return; | |
$table_w = 'familyB'; | |
$st = "SELECT AC from $table_w"; | |
$sth = $dbh->prepare($st) or die "Cannot prepare $st: $dbh->errstr\n"; | |
$rv = $sth->execute or die "Cannot execute the query: $sth->errstr"; | |
if ($rv >= 1) | |
{ | |
while ((@array) = $sth->fetchrow_array) | |
{ | |
$cnt++; | |
$AC = $array[0]; | |
@families = (@families,$AC); | |
print "$cnt: Got $AC\n"; | |
} | |
} | |
$rc = $dbh->disconnect(); | |
$rc = ""; | |
print "Read in list of TreeFam-B families...\n"; | |
# STORE THE TREES IN A PICKLE FILE: | |
%CLEAN = (); | |
%FULL = (); | |
$dbc = Treefam::DBConnection->new(); | |
for ($i = 0; $i <= $#families; $i++) | |
{ | |
$treefam_family = $families[$i]; | |
print "$i: looking at $treefam_family\n"; | |
if ($treefam_family eq '') { print STDERR "ERROR: treefam_family $treefam_family\n"; exit;} | |
$famh = $dbc->get_FamilyHandle(); | |
if (!(defined($famh->get_by_id($treefam_family)))) | |
{ | |
print "WARNING: there is no family $treefam_family...\n"; | |
goto NEXT_FAMILY; | |
} | |
$family = $famh->get_by_id($treefam_family); | |
if (!(defined($family->ID()))) | |
{ | |
print "WARNING: do not have ID stored for family $treefam_family...\n"; | |
goto NEXT_FAMILY; | |
} | |
$AC = $family->ID(); # GET THE FAMILY ID. | |
if ($AC ne $treefam_family) { print STDERR "ERROR: AC $AC treefam_family $treefam_family\n"; exit;} | |
# GET THE CLEAN TREE FOR THE FAMILY: | |
if (!(defined($family->get_tree('clean')))) | |
{ | |
print "WARNING: AC $AC: there is no clean tree!\n"; | |
} | |
else | |
{ | |
$cleantree = $family->get_tree('clean'); # GET THE TREEFAM CLEAN TREE. | |
if (!(defined($cleantree->nhx()))) { print STDERR "ERROR: AC $AC: there is no clean tree\n"; } | |
else | |
{ | |
$cleantree = $cleantree->nhx(); | |
if ($CLEAN{$treefam_family}) { print STDERR "ERROR: already have clean tree for $treefam_family\n"; exit;} | |
$CLEAN{$treefam_family} = $cleantree; | |
} | |
} | |
# GET THE FULL TREE FOR THE FAMILY: | |
if (!(defined($family->get_tree('full')))) | |
{ | |
print "WARNING: AC $AC: there is no full tree!\n"; | |
} | |
else | |
{ | |
$fulltree = $family->get_tree('full'); | |
if (!(defined($fulltree->nhx()))) { print STDERR "ERROR: AC $AC: there is no full tree\n"; } | |
else | |
{ | |
$fulltree = $fulltree->nhx(); | |
if ($FULL{$treefam_family}) { print STDERR "ERROR: already have full tree for $treefam_family\n"; exit;} | |
$FULL{$treefam_family} = $fulltree; | |
} | |
} | |
NEXT_FAMILY: | |
} | |
print STDERR "Read in trees...\n"; | |
# STORE THE HASH TABLES %CLEAN AND %FULL IN A PICKLE: | |
$output1 = "treefam.".$version."_cleantrees"; | |
store \%CLEAN,$output1; | |
$output2 = "treefam.".$version."_fulltrees"; | |
store \%FULL,$output2; | |
#------------------------------------------------------------------# | |
print STDERR "FINISHED.\n"; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment