-
-
Save andrewyatz/831981 to your computer and use it in GitHub Desktop.
| #!/bin/env perl | |
| use strict; | |
| use warnings; | |
| use Getopt::Long; | |
| use Pod::Usage; | |
| use Bio::EnsEMBL::Registry; | |
| use Bio::EnsEMBL::Utils::Scalar qw(wrap_array); | |
| use IO::String; | |
| my $O = options(); | |
| my $DBA = database(); | |
| my $MLSS = mlss(); | |
| my $SYNTENY = synteny(); | |
| my $FH = fh(); | |
| print_synteny(); | |
| if(!$O->{file}) { | |
| print ${$FH->string_ref}; | |
| } | |
| close($FH); | |
| exit 0; | |
| sub options { | |
| my $opts = {}; | |
| my @flags = qw( | |
| ensembl ensemblgenomes database=s species=s@ version=i file=s help man | |
| ); | |
| GetOptions($opts, @flags) or pod2usage(1); | |
| _exit( undef, 0, 1) if $opts->{help}; | |
| _exit( undef, 0, 2) if $opts->{man}; | |
| if($opts->{ensemblgenomes} && $opts->{ensembl}) { | |
| _exit( 'Can only specify -ensembl or -ensemblgenomes', 1, 2); | |
| } | |
| my $species = wrap_array($opts->{species}); | |
| if(scalar(@{$species}) != 2) { | |
| _exit( 'Script requires two -species attributes', 1, 2); | |
| } | |
| $opts->{database} = 'multi' unless $opts->{database}; | |
| return $opts; | |
| } | |
| sub database { | |
| my %args; | |
| if($O->{ensemblgenomes}) { | |
| %args = ( | |
| -HOST => 'mysql.ebi.ac.uk', -PORT => 4157, -USER => 'anonymous' | |
| ); | |
| } | |
| elsif($O->{ensembl}) { | |
| %args = ( | |
| -HOST => 'ensembldb.ensembl.org', -PORT => 5306, -USER => 'anonymous' | |
| ); | |
| } | |
| else { | |
| _exit('Not aware of which server I should connect to. Specify -ensembl or -ensemblgenomes', 2, 1); | |
| } | |
| if($O->{version}) { | |
| $args{-DB_VERSION} = $O->{version}; | |
| } | |
| Bio::EnsEMBL::Registry->load_registry_from_db(%args); | |
| my $dba = Bio::EnsEMBL::Registry->get_DBAdaptor($O->{database}, 'compara'); | |
| if(! defined $dba) { | |
| _exit($O->{database}.' did not result in a valid DB. Try again specifying a different -database parameter. Also check your -version switch and API version', 2, 1); | |
| } | |
| return $dba; | |
| } | |
| sub mlss { | |
| my $species = $O->{species}; | |
| my $gdba = $DBA->get_GenomeDBAdaptor(); | |
| my $mlssa = $DBA->get_MethodLinkSpeciesSetAdaptor(); | |
| my @gdbs = map { $gdba->fetch_by_registry_name($_) } @{$species}; | |
| my $mlss = $mlssa->fetch_by_method_link_type_GenomeDBs('SYNTENY', \@gdbs); | |
| return $mlss; | |
| } | |
| sub synteny { | |
| my $sra = $DBA->get_SyntenyRegionAdaptor(); | |
| my $regions = $sra->fetch_all_by_MethodLinkSpeciesSet($MLSS); | |
| return $regions; | |
| } | |
| sub fh { | |
| if($O->{file}) { | |
| return IO::File->new($O->{file}, 'w'); | |
| } | |
| return IO::String->new(); | |
| } | |
| sub print_synteny { | |
| my ($source_gdb, $target_gdb) = @{$MLSS->species_set()->genome_dbs()}; | |
| my $sname = $source_gdb->name(); | |
| my $tname = $target_gdb->name(); | |
| print $FH sprintf( | |
| 'track name=synteny_%1$s description="%1$s %2$s synteny projections"', | |
| $sname, $tname); | |
| print $FH "\n"; | |
| foreach my $s (@{$SYNTENY}) { | |
| my $regions = $s->get_all_DnaFragRegions(); | |
| my ($source) = grep { $_->genome_db()->dbID() eq $source_gdb->dbID() } @{$regions}; | |
| my ($target) = grep { $_->genome_db()->dbID() eq $target_gdb->dbID() } @{$regions}; | |
| my $create_elements = sub { | |
| my ($frag) = @_; | |
| return ( | |
| $frag->dnafrag()->name(), #name of region | |
| $frag->dnafrag()->length(), #length of region e.g. chr1 in human | |
| $frag->dnafrag_start(), #start of alignment | |
| $frag->dnafrag_end() #end of alignment | |
| ); | |
| }; | |
| my @data = ( | |
| $source->length(), | |
| 0, #misMatches | |
| 0, #repMatches | |
| 0, #nCount | |
| 0, #qNumInsert | |
| 0, #qBaseInsert | |
| 0, #tNumInsert | |
| 0, #tBaseInsert | |
| ( $source->dnafrag_strand() ) ? '+' : '-', #strand, | |
| $create_elements->($source), #query info | |
| $create_elements->($target), #target info | |
| 1, # number of blocks | |
| $source->length(), #block sizes | |
| $source->dnafrag_start(), #query start blocks | |
| $target->dnafrag_start() #target start blocks | |
| ); | |
| print $FH join(q{ }, @data), "\n"; | |
| } | |
| return; | |
| } | |
| sub _exit { | |
| my ($msg, $status, $verbose) = @_; | |
| print STDERR $msg, "\n" if defined $msg; | |
| pod2usage( -exitstatus => $status, -verbose => $verbose ); | |
| } | |
| __END__ | |
| =pod | |
| =head1 NAME | |
| list_synteny.pl | |
| =head1 SYNOPSIS | |
| ./list_synteny.pl -species human -species mouse -ensembl -database multi [see opts] [ --help | --man ] | |
| =head1 DESCRIPTION | |
| Returns a PSL to screen or to a specified file location | |
| =head1 OPTIONS | |
| =over 8 | |
| =item B<--species> | |
| Accepts 2 invocations to specify the species to look for synteny between | |
| =item B<--ensembl> | |
| Specify to connect to the public ensembl database | |
| =item B<--ensemblgenomes> | |
| Specify to connect to the public ensembl genomes database | |
| =item B<--database> | |
| Specify the name of the database to connect to. If going to ensembl | |
| use multi otherwise if connecting to ensembl genomes use one like | |
| plants, metazoa, protists, bacteria or fungi (not all DBs have synteny | |
| projections though). | |
| =item B<--version> | |
| Optional integer which can be used to force the databases we load. Useful if | |
| your API does not match the DB you are connecting to. | |
| =item B<--file> | |
| Location of the file to write to. If not specified will write to STDOUT | |
| =back | |
| =head1 VERSION | |
| 1 | |
| =head1 LICENSE | |
| Copyright (c) 1999-2011 The European Bioinformatics Institute and | |
| Genome Research Limited. All rights reserved. | |
| This software is distributed under a modified Apache license. | |
| For license details, please see | |
| http://www.ensembl.org/info/about/code_licence.html | |
| =cut |
I've just updated this gist. The error was because I believe lots of years ago ->species_set() would return an array, where as now it returns an object. Changing line 105's dereferencing to @{$MLSS->species_set()->genome_dbs()} has fixed the issue. Verified by trying to extract human/mouse synteny.
Thank you so much for doing this!!
Last question: What version of perl are you using? I still get an issue in the line 109 saying "Can't use an undefined value as a symbol reference at list_synteny.pl"
Mine is following: This is perl 5, version 32, subversion 0 (v5.32.0) built for darwin-2level
I'm running This is perl 5, version 34, subversion 1 (v5.34.1) built for darwin-2level and do not see any issue on my side sorry. The command I ran was: perl list_synteny.pl -species human -species mouse -ensembl.
With respect to the error, this happens when a filehandle becomes undefined. You can see from the code that we initialise $FH on line 16 before trying to print to it in print_synteny. Are you working with a clean version of the script or a modified version? This certainly should work. Also try updating IO::String and IO::File modules in-case there is an issue with them
Okay, so I had to specify use IO::File and get rid of line 23 close($FH);, and now it works!! Thanks a lot, Andrew!
Unfortunately, it doesn't work for me. I get an error 'Not an ARRAY reference at list_synteny.pl line 105.'. When I change the array reference to hash (changing @ to %), I get further errors 'Can't locate object method "name" via package "adaptor" (perhaps you forgot to load "adaptor"'
I'm also aware that there are getsynteny.pl script under this link https://github.com/Ensembl/ensembl-compara/tree/release/107/scripts/synteny, but I'm struggling on how to use it correctly as well.
Do you mind helping me out with those? I really need this information for my undergrad thesis.