Created
March 1, 2013 16:31
-
-
Save avrilcoghlan/5065840 to your computer and use it in GitHub Desktop.
Perl script that prints out all the genes in a particular TreeFam family.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/perl | |
# | |
# Perl script treefam_4_genes.pl | |
# Written by Avril Coghlan ([email protected]) | |
# 31-Oct-07. | |
# | |
# For the TreeFam project. | |
# | |
# This perl script prints out all the genes in a TreeFam-4 family. | |
# | |
# The command-line format is: | |
# % perl <treefam_4_genes.pl> family | |
# where family is the family identifier. | |
# | |
#------------------------------------------------------------------# | |
# CHECK IF THERE ARE THE CORRECT NUMBER OF COMMAND-LINE ARGUMENTS: | |
$num_args = $#ARGV + 1; | |
if ($num_args != 1) | |
{ | |
print "Usage of treefam_4_genes.pl\n\n"; | |
print "perl treefam_4_genes.pl <family>\n"; | |
print "where <family> is the family identifier.\n"; | |
print "For example, >perl -w treefam_4_genes.pl TF101073\n"; | |
exit; | |
} | |
# READ IN MY PERL MODULES: | |
BEGIN { | |
unshift (@INC, '/nfs/team71/phd/alc/perl/modules'); | |
} | |
use lib "/nfs/team71/phd/alc/perl/treefam/Treefam_api_3may07/"; | |
use DBI; | |
use Treefam::DBConnection; | |
# FIND THE NAME OF THE INPUT FAMILY: | |
$family_id = $ARGV[0]; | |
#------------------------------------------------------------------# | |
# GET ALL THE GENES IN THIS FAMILY: | |
$dbc = Treefam::DBConnection->new(); | |
$famh = $dbc->get_FamilyHandle(); | |
$family = $famh->get_by_id($family_id); | |
if (!(defined($family))) | |
{ | |
print "Do not have $family_id in the database...\n"; | |
exit; | |
} | |
my $tree = $family->get_tree('clean'); | |
# FIRST PRINT ALL THE GENES IN THE FAMILY: | |
my @leaves = $tree->get_leaves; | |
foreach my $leaf(@leaves) | |
{ | |
if (defined($leaf->sequence_id())) # FOR SOME LEAVES, WE DON'T SEE TO HAVE | |
# A SEQUENCE ID, eg. SEE RICE GENE IN http://www.treefam.org/cgi-bin/TFinfo.pl?ac=TF101025 | |
{ | |
my $id = $leaf->sequence_id(); | |
# GET THE ENSEMBL/ORIGINAL GENE NAME: | |
my $gene_handle = $dbc->get_GeneHandle(); | |
if (defined($gene_handle->get_by_sequence_id($id))) { $gene = $gene_handle->get_by_sequence_id($id);} | |
else { print STDERR "ERROR: cannot get gene for id $id\n"; exit; } | |
if (defined($gene->ID)) { $gene_id = $gene->ID;} | |
else { $gene_id = "unknown";} | |
print "HAVE GENE $gene_id\n"; | |
} | |
} | |
#------------------------------------------------------------------# | |
print STDERR "FINISHED.\n"; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment