Created
August 29, 2013 16:14
-
-
Save danielecook/6380172 to your computer and use it in GitHub Desktop.
The script I used to setup a chado database and load in some basic human reference data.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env | |
# Requires wget - can be installed using homebrew if you are on a mac. | |
## Installation Variables ## | |
CHADO_DB_USERNAME="" | |
CHADO_DB_PASS="" | |
CHADO_DB_NAME="chado" | |
PATH_TO_PSQL="/Applications/Postgres.app/Contents/MacOS/bin/psql" # I use a special application for mac, for convenience. | |
$PATH_TO_PSQL <<EOF | |
CREATE database $CHADO_DB_NAME; | |
EOF | |
############################# | |
# Download the Chado Schema # | |
############################# | |
# Download chado schema | |
wget --timestamping http://sourceforge.net/projects/gmod/files/gmod/chado-1.23/chado-1.23.tar.gz/download -O chado-1.23.tar.gz | |
# Unzip | |
gunzip chado-1.23.tar.gz | |
tar -xf chado-1.23.tar | |
# Remove unnecessary files. | |
rm chado-1.23.tar | |
cd chado-1.23 | |
########################################## | |
# Install Perl Dependencies if necessary # | |
########################################## | |
# Install Perl Dependencies | |
# Some of these are used later on. | |
sudo perl -MCPAN -e shell <<EOF | |
install GO::Parser | |
install Template | |
install version | |
install SQL::Translator | |
force install DBD::Pg | |
install Bio::Chado::Schema | |
install DBIx::DBSchema | |
install XML::Parser::PerlSAX | |
force install DBIx::DBStag | |
install Config::Std | |
install Getopt::Long | |
EOF | |
######## | |
# MAKE # | |
######## | |
perl Makefile.PL CHADO_DB_NAME=$CHADO_DB_NAME CHADO_DB_USERNAME=$CHADO_DB_USERNAME CHADO_DB_PASS=$CHADO_DB_PASS GO_ROOT=$HOME | |
# For Reference: | |
# -*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*WARNING-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-* | |
# STEP 3 WILL DELETE ANY DATA IN A DATABASE WITH THE | |
# DATABASE NAME YOU PROVIDED! | |
# -*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*WARNING-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-* | |
# 3a. make load_schema (loads SQL schema into database) | |
# | |
# or | |
# | |
# 3b. make update (update an old Chado schema and stop here) | |
# 4. make prepdb (loads basic data) | |
# 5. make ontologies (loads data for various ontologies) | |
# | |
# Optional Targets: | |
# make rm_locks (removes ontology lock files, allowing installation | |
# of ontologies on successive builds of the database | |
# without removing the ontology files altogether) | |
# make clean (remove build related files and ontology tmp dir) | |
# make instructions (at any moment display these instructions) | |
# | |
# -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- | |
################# | |
# Install Chado # | |
################# | |
perl Makefile.PL | |
make | |
sudo make install | |
make load_schema | |
make prepdb | |
make ontologies #selected 1,2,3,4 | |
# Install, load scheme, prep db, and load ontologies. | |
sudo make install # Sets up install files. | |
make load_schema # Load a blank Schema | |
make prepdb # Prepare the database; Inserts a few funamentals. | |
make ontologies # Install ontologies. | |
1,2,3,4 # Relationship, Sequence, Gene, and feature | |
cd .. | |
################################## | |
# Download additional ontologies # | |
################################## | |
mkdir obo | |
# Human Phenotype | |
wget --timestamping --directory-prefix=obo "http://compbio.charite.de/hudson/job/hpo/lastSuccessfulBuild/artifact/ontology/release/hp.obo" | |
# Disease Ontology | |
wget --timestamping --directory-prefix=obo "http://purl.obolibrary.org/obo/doid.obo" | |
# Create a directory for storing the xml files. | |
mkdir obo_xml | |
# Create XML Files of each ontology. | |
go2fmt.pl -p obo_text -w xml obo/hp.obo | \ | |
go-apply-xslt oboxml_to_chadoxml - > obo_xml/hp_obo.xml | |
# Within the disease ontology; some URLS need to be truncated b/c they were violating the chado schema. | |
python <<EOF | |
import re | |
f = file("obo/doid.obo",'r').read() | |
fixed = file("obo/doid_fixed.obo",'w') | |
def match_repl(m): | |
return m.group(0)[0:239] # Truncate long urls | |
m = re.subn( r'url:(.*)[,|\]]',match_repl,f) | |
fixed.write(m[0]) | |
EOF | |
################################## | |
# Load Additional Ontologies # | |
################################## | |
# Use the fixed disease ontolgoy file to create the xml file. | |
go2fmt.pl -p obo_text -w xml obo/doid_fixed.obo | \ | |
go-apply-xslt oboxml_to_chadoxml - > obo_xml/doid_obo.xml | |
# Finally - load the remaining obo's | |
stag-storenode.pl \ | |
-d 'dbi:Pg:dbname=chado;host=localhost;port=5432' \ | |
--user $CHADO_DB_USERNAME obo_xml/hp_obo.xml | |
stag-storenode.pl \ | |
-d 'dbi:Pg:dbname=chado;host=localhost;port=5432' \ | |
--user $CHADO_DB_USERNAME obo_xml/doid_obo.xml | |
#################################### | |
# Download Human Features and Load # | |
#################################### | |
cd add_db # Directory used to store files of databases being added. | |
mkdir gff3 | |
cd gff3 | |
wget 'https://biotoolbox.googlecode.com/svn-history/r600/trunk/scripts/ucsc_table2gff3.pl' | |
perl ucsc_table2gff3.pl --ftp refgene --db hg19 --table refGene --nocds | |
# Load Chromosomes | |
gmod_gff3_preprocessor.pl --gfffile 'hg19_chromInfo.gff3' | |
gmod_bulk_load_gff3.pl --gfffile 'hg19_chromInfo.gff3.sorted' --recreate_cache | |
# Load Data | |
# gmod_gff3_preprocessor.pl --gfffile 'hg19_refGene.gff3' # Apparently, you do not need to pre-process this file. | |
gmod_bulk_load_gff3.pl --gfffile 'hg19_refGene.gff3' --dbxref | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment