Skip to content

Instantly share code, notes, and snippets.

@danielecook
Created August 29, 2013 16:14
Show Gist options
  • Save danielecook/6380172 to your computer and use it in GitHub Desktop.
Save danielecook/6380172 to your computer and use it in GitHub Desktop.
The script I used to setup a chado database and load in some basic human reference data.
#!/usr/bin/env
# Requires wget - can be installed using homebrew if you are on a mac.
## Installation Variables ##
CHADO_DB_USERNAME=""
CHADO_DB_PASS=""
CHADO_DB_NAME="chado"
PATH_TO_PSQL="/Applications/Postgres.app/Contents/MacOS/bin/psql" # I use a special application for mac, for convenience.
$PATH_TO_PSQL <<EOF
CREATE database $CHADO_DB_NAME;
EOF
#############################
# Download the Chado Schema #
#############################
# Download chado schema
wget --timestamping http://sourceforge.net/projects/gmod/files/gmod/chado-1.23/chado-1.23.tar.gz/download -O chado-1.23.tar.gz
# Unzip
gunzip chado-1.23.tar.gz
tar -xf chado-1.23.tar
# Remove unnecessary files.
rm chado-1.23.tar
cd chado-1.23
##########################################
# Install Perl Dependencies if necessary #
##########################################
# Install Perl Dependencies
# Some of these are used later on.
sudo perl -MCPAN -e shell <<EOF
install GO::Parser
install Template
install version
install SQL::Translator
force install DBD::Pg
install Bio::Chado::Schema
install DBIx::DBSchema
install XML::Parser::PerlSAX
force install DBIx::DBStag
install Config::Std
install Getopt::Long
EOF
########
# MAKE #
########
perl Makefile.PL CHADO_DB_NAME=$CHADO_DB_NAME CHADO_DB_USERNAME=$CHADO_DB_USERNAME CHADO_DB_PASS=$CHADO_DB_PASS GO_ROOT=$HOME
# For Reference:
# -*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*WARNING-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
# STEP 3 WILL DELETE ANY DATA IN A DATABASE WITH THE
# DATABASE NAME YOU PROVIDED!
# -*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*WARNING-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*
# 3a. make load_schema (loads SQL schema into database)
#
# or
#
# 3b. make update (update an old Chado schema and stop here)
# 4. make prepdb (loads basic data)
# 5. make ontologies (loads data for various ontologies)
#
# Optional Targets:
# make rm_locks (removes ontology lock files, allowing installation
# of ontologies on successive builds of the database
# without removing the ontology files altogether)
# make clean (remove build related files and ontology tmp dir)
# make instructions (at any moment display these instructions)
#
# -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
#################
# Install Chado #
#################
perl Makefile.PL
make
sudo make install
make load_schema
make prepdb
make ontologies #selected 1,2,3,4
# Install, load scheme, prep db, and load ontologies.
sudo make install # Sets up install files.
make load_schema # Load a blank Schema
make prepdb # Prepare the database; Inserts a few funamentals.
make ontologies # Install ontologies.
1,2,3,4 # Relationship, Sequence, Gene, and feature
cd ..
##################################
# Download additional ontologies #
##################################
mkdir obo
# Human Phenotype
wget --timestamping --directory-prefix=obo "http://compbio.charite.de/hudson/job/hpo/lastSuccessfulBuild/artifact/ontology/release/hp.obo"
# Disease Ontology
wget --timestamping --directory-prefix=obo "http://purl.obolibrary.org/obo/doid.obo"
# Create a directory for storing the xml files.
mkdir obo_xml
# Create XML Files of each ontology.
go2fmt.pl -p obo_text -w xml obo/hp.obo | \
go-apply-xslt oboxml_to_chadoxml - > obo_xml/hp_obo.xml
# Within the disease ontology; some URLS need to be truncated b/c they were violating the chado schema.
python <<EOF
import re
f = file("obo/doid.obo",'r').read()
fixed = file("obo/doid_fixed.obo",'w')
def match_repl(m):
return m.group(0)[0:239] # Truncate long urls
m = re.subn( r'url:(.*)[,|\]]',match_repl,f)
fixed.write(m[0])
EOF
##################################
# Load Additional Ontologies #
##################################
# Use the fixed disease ontolgoy file to create the xml file.
go2fmt.pl -p obo_text -w xml obo/doid_fixed.obo | \
go-apply-xslt oboxml_to_chadoxml - > obo_xml/doid_obo.xml
# Finally - load the remaining obo's
stag-storenode.pl \
-d 'dbi:Pg:dbname=chado;host=localhost;port=5432' \
--user $CHADO_DB_USERNAME obo_xml/hp_obo.xml
stag-storenode.pl \
-d 'dbi:Pg:dbname=chado;host=localhost;port=5432' \
--user $CHADO_DB_USERNAME obo_xml/doid_obo.xml
####################################
# Download Human Features and Load #
####################################
cd add_db # Directory used to store files of databases being added.
mkdir gff3
cd gff3
wget 'https://biotoolbox.googlecode.com/svn-history/r600/trunk/scripts/ucsc_table2gff3.pl'
perl ucsc_table2gff3.pl --ftp refgene --db hg19 --table refGene --nocds
# Load Chromosomes
gmod_gff3_preprocessor.pl --gfffile 'hg19_chromInfo.gff3'
gmod_bulk_load_gff3.pl --gfffile 'hg19_chromInfo.gff3.sorted' --recreate_cache
# Load Data
# gmod_gff3_preprocessor.pl --gfffile 'hg19_refGene.gff3' # Apparently, you do not need to pre-process this file.
gmod_bulk_load_gff3.pl --gfffile 'hg19_refGene.gff3' --dbxref
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment