Skip to content

Instantly share code, notes, and snippets.

@epaule
epaule / genbankCDS2fasta.rb
Last active April 16, 2018 10:18
extract CDSes from a GenBank file and print it as FASTA
#!/usr/bin/env ruby
require 'bio'
ff = Bio::FlatFile.new(Bio::GenBank, ARGF)
ff.each_entry{|gb|
gb.each_cds{|cds|
position = cds.position
puts gb.naseq.splicing(position).to_fasta(cds.to_hash['protein_id'][0],60)
}
}
@epaule
epaule / fix_geneace_orthologs.pl
Created November 21, 2017 14:23
search for missing tags in orthologs and assign them based on the reverse edge
#!/usr/bin/env perl
use Ace;
my $db = Ace->connect(-path => shift)||die(Ace::Error);
my $genes = $db->fetch_many(-query => 'find Gene Species="Pristionchus pacificus"; Ortholog');
while (my $gene = $genes->next){
foreach my $o ($gene->Ortholog){
@epaule
epaule / dump_species_functional_descriptions.pl
Last active April 19, 2017 14:34
script to dump gene descriptions
#!/usr/bin/perl
#
# dumps gene descriptions
#
# Options:
# -format <record || tab> (defaults to record)
# -species <name> WormBase species name
# -store <storable file> pass a stored config
# -debug <user> send log mails to user
# -test use the test database
@epaule
epaule / dump_species_gene_interactions.pl
Last active April 18, 2017 10:55
cleanup interaction dumping
#!/usr/bin/perl
#
# dumps all interactions into a flatfile, except the no_interaction ones
#
use Getopt::Long;
use Time::localtime;
use IO::File;
use Storable;
@epaule
epaule / gff3ace.pl
Created March 2, 2017 11:03
trimmed down version of the gff3ace
#!/software/bin/perl -w
#
# Small script to convert GF3 gene predictions to ace
#
# Last updated by: $Author: gw3 $
# Last updated on: $Date: 2008/02/14 11:02:17 $
use Getopt::Long;
use Carp;
@epaule
epaule / random_sample_chooser.py
Created February 27, 2017 12:23
scriptlet to create sample meta table entries for an ensembl core database
#!/usr/bin/env python
# random_sample_chooser.py <GFF3>
import re
import sys
import random
gff=open(sys.argv[1],"r")
mRNAids = {}
@epaule
epaule / dump_gpi.pl
Last active February 7, 2017 13:53
GPI prototype
#!/usr/bin/env perl
#
# exporter to dump gene / transcript / protein info as GPI file
# specs: http://www.geneontology.org/page/gene-product-information-gpi-format
#
# uages:
# perl dump_gpi.pl -species elegans
use strict;
@epaule
epaule / dump_protein_domains.pl
Last active July 15, 2016 09:37
dump protein domains of C.elegans from ACeDB
#!/usr/bin/env perl
# dump the protein domains into a flatfile
use feature qw(say);
use Ace;
use strict;
my $db = Ace->connect(-path => shift)||die(Ace->error);
#!/usr/bin/env ruby
require 'net/https'
require 'uri'
require 'rubygems'
require 'json'
server = 'https://www.trackhubregistry.org'
hub_url = 'http://ftp.ebi.ac.uk/pub/databases/wormbase/releases/current-development-release/COMPARATIVE_ANALYSIS/hub/hub.txt'
user = 'michael.paulini'
@epaule
epaule / ovo_descriptions.pl
Created November 12, 2015 09:38
create a brief_identification for all CDSes of a gene
#!/usr/bin/perl
# script.pl database file
use Ace;
my $db = Ace->connect(-path => shift);
while (<>){
next unless /WBGene/; # skip the header line
s/\r\n//; # get rid of the mac line breaks