Created
March 1, 2013 16:45
-
-
Save avrilcoghlan/5065951 to your computer and use it in GitHub Desktop.
Perl script that connects to the TreeFam mysql database, and retrieves within-species Schistosoma mansoni paralogs from the 'ortholog' table of the database.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/perl | |
# | |
# Perl script treefam_flatworm3.pl | |
# Written by Avril Coghlan ([email protected]). | |
# 30-Jun-06. | |
# | |
# This perl script connects to the MYSQL database of | |
# TreeFam families and finds flatworm paralogous genes. | |
# | |
# The command-line format is: | |
# % perl <treefam_flatworm3.pl> | |
# | |
#------------------------------------------------------------------# | |
# CHECK IF THERE ARE THE CORRECT NUMBER OF COMMAND-LINE ARGUMENTS: | |
$num_args = $#ARGV + 1; | |
if ($num_args != 0) | |
{ | |
print "Usage of treefam_flatworm3.pl\n\n"; | |
print "perl -w treefam_flatworm3.pl\n"; | |
print "For example, >perl -w treefam_flatworm3.pl\n"; | |
exit; | |
} | |
# READ IN MY PERL MODULES: | |
BEGIN { | |
unshift (@INC, '/nfs/team54/alc/perl/modules'); | |
} | |
# DECLARE MYSQL USERNAME AND HOST: | |
use Avril_modules; | |
# DECLARE MYSQL USERNAME AND HOST: | |
use DBI; | |
#------------------------------------------------------------------# | |
# FIND THE SPECIES OF EACH TREEFAM GENE: | |
# HASH TABLES TO REMEMBER THE SPECIES OF GENES: | |
%ID = (); | |
$database = 'treefam_3'; | |
$dbh = DBI->connect("dbi:mysql:treefam_3:db.treefam.org:3308", 'anonymous', '') || return; | |
# SPECIFY THE TABLE: | |
$table_w = 'genes'; | |
# THE FIRST COLUMN IN THIS TABLE IS AN IDENTIFIER (IDX), AND | |
# THE SECOND COLUMN IS THE TRANSCRIPT NAME, AND THE LAST COLUMN | |
# IS THE TAXONOMY ID, eg. 1 ENSANGT00000032162.1 7165 | |
$st = "SELECT IDX, ID, TAX_ID from $table_w"; | |
$sth = $dbh->prepare($st) or die "Cannot prepare $st: $dbh->errstr\n"; | |
$rv = $sth->execute or die "Cannot execute the query: $sth->errstr"; | |
if ($rv >= 1) | |
{ | |
while ((@array) = $sth->fetchrow_array) { | |
$IDX = $array[0]; # eg., 1 | |
$ID = $array[1]; # eg., ENST00000356572.1 | |
$TAXID = $array[2]; # eg 7165 | |
if ($TAXID eq '6183') # SCHISTOSOMA MANSONI. | |
{ | |
$ID{$IDX} = $ID; | |
} | |
} | |
} | |
$rc = $dbh->disconnect(); | |
$rc = ""; | |
#------------------------------------------------------------------# | |
# NOW FIND ALL SCHISTOSOMA MANSONI WITHIN-SPECIES PARALOGS: | |
$database = 'treefam_3'; | |
$dbh = DBI->connect("dbi:mysql:treefam_3:db.treefam.org:3308", 'anonymous', '') || return; | |
$table_w = 'ortholog'; | |
# THE COLUMNS IN THE TABLE ARE IDX1, IDX2, TAXON_ID, BOOTSTRAP: | |
$st = "SELECT idx1, idx2, taxon_id, bootstrap from $table_w"; | |
$sth = $dbh->prepare($st) or die "Cannot prepare $st: $dbh->errstr\n"; | |
$rv = $sth->execute or die "Cannot execute the query: $sth->errstr"; | |
if ($rv >= 1) | |
{ | |
while ((@array) = $sth->fetchrow_array) { | |
$IDX1 = $array[0]; | |
$IDX2 = $array[1]; | |
$TAXON_ID = $array[2]; | |
$BOOTSTRAP = $array[3]; | |
if ($TAXON_ID eq '6183') # SCHISTOSOMA MANSONI. | |
{ | |
if ($ID{$IDX1} && $ID{$IDX2}) | |
{ | |
$ID1 = $ID{$IDX1}; | |
$ID2 = $ID{$IDX2}; | |
print "$ID1 $ID2 $BOOTSTRAP\n"; | |
} | |
} | |
} | |
} | |
$rc = $dbh->disconnect(); | |
$rc = ""; | |
#------------------------------------------------------------------# | |
print STDERR "FINISHED.\n"; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment