Created
March 1, 2013 14:58
-
-
Save avrilcoghlan/5065181 to your computer and use it in GitHub Desktop.
Perl script that checks for cases where a TreeFam family seems to have disappeared from a particular version of TreeFam, even though it was present in the previous version of TreeFam and has not been curated since.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/perl | |
# | |
# Perl script treefam_QC12.pl | |
# Written by Avril Coghlan ([email protected]) | |
# 6-Feb-09. | |
# | |
# This perl script checks for cases where a TreeFam family seems to have | |
# disappeared from a particular version of TreeFam, even though it was | |
# present in the previous version of TreeFam and has not been curated since. | |
# | |
# The command-line format is: | |
# % perl <treefam_QC12.pl> <release> | |
# where <release> is the release of the TreeFam database to use. | |
# | |
# | |
#------------------------------------------------------------------# | |
# CHECK IF THERE ARE THE CORRECT NUMBER OF COMMAND-LINE ARGUMENTS: | |
$num_args = $#ARGV + 1; | |
if ($num_args != 1) | |
{ | |
print "Usage of treefam_QC12.pl\n\n"; | |
print "perl treefam_QC12.pl <release>\n"; | |
print "where <release> is the release of the TreeFam database to use.\n"; | |
print "For example, >perl -w treefam_QC12.pl 7\n"; | |
exit; | |
} | |
#------------------------------------------------------------------# | |
# DECLARE MYSQL USERNAME AND HOST: | |
use DBI; | |
# FIND WHICH RELEASE OF THE TREEFAM DATABASE TO USE: | |
$release = $ARGV[0]; | |
#------------------------------------------------------------------# | |
# FIND ALL FAMILIES IN THE FAMILYA/FAMILYB/FAMILYC TABLES, IN | |
# THE CURRENT TREEFAM RELEASE: | |
$database = "dbi:mysql:treefam_".$release.":db.treefam.org:3308"; | |
$dbh = DBI->connect("$database", 'anonymous', '') || return; | |
$RELEASE = &read_all_families1($dbh); | |
# READ IN THE FAMILYB FAMILIES WHICH WERE TURNED INTO FAMILYA FAMILIES | |
# SINCE THE PREVIOUS RELEASE: | |
$CURATED = &read_curated_families($dbh); | |
# NOW DISCONNECT FROM THE DATABASE: | |
$rc = $dbh->disconnect(); | |
$rc = ""; | |
# FIND ALL FAMILIES IN THE FAMILYA/FMAILYB/FAMILYC TABLES, IN | |
# THE PREVIOUS TREEFAM RELEASE: | |
$prev_release = $release - 1; | |
$database = "dbi:mysql:treefam_".$prev_release.":db.treefam.org:3308"; | |
$dbh = DBI->connect("$database", 'anonymous', '') || return; | |
&read_all_families2($dbh,$RELEASE,$CURATED); | |
# NOW DISCONNECT FROM THE DATABASE: | |
$rc = $dbh->disconnect(); | |
$rc = ""; | |
#------------------------------------------------------------------# | |
print STDERR "FINISHED.\n"; | |
print "FINISHED\n"; | |
#------------------------------------------------------------------# | |
# READ IN THE FAMILYB FAMILIES WHICH WERE TURNED INTO FAMILYA FAMILIES | |
# SINCE THE PREVIOUS RELEASE: | |
sub read_curated_families | |
{ | |
my $dbh = $_[0]; | |
my $table_w; | |
my $st; | |
my $sth; | |
my $rv; | |
my @array; | |
my $AC1; | |
my $AC2; | |
my %CURATED = (); | |
$table_w = "famB2A"; | |
$st = "SELECT famB, famA from $table_w"; | |
$sth = $dbh->prepare($st) or die "Cannot prepare $st: $dbh->errstr\n"; | |
$rv = $sth->execute or die "Cannot execute the query: $sth->errstr"; | |
if ($rv >= 1) | |
{ | |
while ((@array) = $sth->fetchrow_array) | |
{ | |
$AC1 = $array[0]; # eg. TF300003 | |
$AC2 = $array[1]; # eg. TF101534 | |
$CURATED{$AC1} = $AC2; | |
} | |
} | |
return(\%CURATED); | |
} | |
#------------------------------------------------------------------# | |
# READ ALL FAMILIES IN THE CURRENT RELEASE OF TREEFAM: | |
sub read_all_families1 | |
{ | |
my $dbh = $_[0]; | |
my $table_w; | |
my $st; | |
my $sth; | |
my $rv; | |
my @array; | |
my $i; | |
my $AC; | |
my %SEEN = (); | |
for ($i = 1; $i <= 3; $i++) | |
{ | |
if ($i == 1) { $table_w = 'familyA';} | |
elsif ($i == 2) { $table_w = 'familyB';} | |
elsif ($i == 3) { $table_w = 'familyC';} | |
$st = "SELECT AC from $table_w"; | |
$sth = $dbh->prepare($st) or die "Cannot prepare $st: $dbh->errstr\n"; | |
$rv = $sth->execute or die "Cannot execute the query: $sth->errstr"; | |
if ($rv >= 1) | |
{ | |
while ((@array) = $sth->fetchrow_array) | |
{ | |
$AC = $array[0]; | |
$SEEN{$AC} = 1; | |
} | |
} | |
} | |
print "Read all families in the TreeFam version $release\n"; | |
print STDERR "Read all families in the TreeFam version $release\n"; | |
return(\%SEEN); | |
} | |
#------------------------------------------------------------------# | |
# READ ALL FAMILIES IN THE CURRENT RELEASE OF TREEFAM: | |
sub read_all_families2 | |
{ | |
my $dbh = $_[0]; | |
my $RELEASE = $_[1]; | |
my $CURATED = $_[2]; | |
my $table_w; | |
my $st; | |
my $sth; | |
my $rv; | |
my @array; | |
my $i; | |
my $AC; | |
my $curated; | |
for ($i = 1; $i <= 3; $i++) | |
{ | |
if ($i == 1) { $table_w = 'familyA';} | |
elsif ($i == 2) { $table_w = 'familyB';} | |
elsif ($i == 3) { $table_w = 'familyC';} | |
$st = "SELECT AC from $table_w"; | |
$sth = $dbh->prepare($st) or die "Cannot prepare $st: $dbh->errstr\n"; | |
$rv = $sth->execute or die "Cannot execute the query: $sth->errstr"; | |
if ($rv >= 1) | |
{ | |
while ((@array) = $sth->fetchrow_array) | |
{ | |
$AC = $array[0]; | |
# CHECK IF THIS FAMILY IS IN THE CURRENT RELEASE: | |
if (!($RELEASE->{$AC})) | |
{ | |
# CHECK IF THE FAMILY $AC WAS CURATED: | |
if ($CURATED->{$AC}) | |
{ | |
$curated = $CURATED->{$AC}; | |
if (!($RELEASE->{$curated})) | |
{ | |
print "WARNING: family $AC was curated to $curated which is not in TreeFam release $release, even though $AC was in version $prev_release\n"; | |
} | |
} | |
else | |
{ | |
print "WARNING: family $AC is not in TreeFam version $release, but was in version $prev_release (and was not curated since)\n"; | |
} | |
} | |
} | |
} | |
} | |
print STDERR "Read all families in TreeFam version $prev_release\n"; | |
print "Read all families in TreeFam version $prev_release\n"; | |
} | |
#------------------------------------------------------------------# | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment