Skip to content

Instantly share code, notes, and snippets.

@dazoakley
Created August 6, 2010 11:34
Show Gist options
  • Save dazoakley/511200 to your computer and use it in GitHub Desktop.
Save dazoakley/511200 to your computer and use it in GitHub Desktop.
I-DCC targ_rep wiki (example) scripts
#!/usr/bin/env perl
#
# Helper functions for interacting with the IKMC Targeting Repository
#
# Authors:: Darren Oakley (mailto:[email protected])
# Nelo Onyiah (mailto:[email protected])
#
use strict;
use warnings FATAL => 'all';
use JSON;
use REST::Client;
#
# Create a data object of the alleles and products we need to load
# - in your script this should come from your database.
#
# For NorCOMM products, use pipeline_id = 3.
#
my $DOMAIN = 'localhost:3000';
my $USER = 'user';
my $PASS = 'pass';
my $PIPELINE = 3;
my $alleles_and_products = [
{
mgi_accession_id => "MGI:123456",
project_design_id => 2,
cassette => "L1L2_gt2",
cassette_type => "Promotorless",
backbone => "L3L4_pZero_kan",
assembly => "NCBIM37",
chromosome => "1",
strand => "+",
design_type => "Knock Out",
design_subtype => "Frameshift",
homology_arm_start => 10,
homology_arm_end => 10000,
cassette_start => 50,
cassette_end => 500,
loxp_start => 1000,
loxp_end => 1500,
targeting_vectors => [
{
pipeline_id => $PIPELINE,
name => 'PRPGD001',
intermediate_vector => 'PGS001',
ikmc_project_id => 1,
es_cells => [
{ pipeline_id => $PIPELINE, name => 'EPD00001', allele_symbol_superscript => 'tm1a' },
{ pipeline_id => $PIPELINE, name => 'EPD00002', allele_symbol_superscript => 'tm1a' },
{ pipeline_id => $PIPELINE, name => 'EPD00003', allele_symbol_superscript => 'tm1a' },
],
},
{
pipeline_id => $PIPELINE,
name => 'PRPGD002',
intermediate_vector => 'PGS001',
ikmc_project_id => 1,
es_cells => [
{ pipeline_id => $PIPELINE, name => 'EPD00004', allele_symbol_superscript => 'tm1a' },
{ pipeline_id => $PIPELINE, name => 'EPD00005', allele_symbol_superscript => 'tm1a' },
{ pipeline_id => $PIPELINE, name => 'EPD00006', allele_symbol_superscript => 'tm1a' },
],
}
],
},
{
mgi_accession_id => "MGI:123456",
project_design_id => 2,
cassette => "L1L2_gt2",
cassette_type => "Promotorless",
backbone => "L3L4_pZero_kan",
assembly => "NCBIM37",
chromosome => "1",
strand => "+",
design_type => "Knock Out",
design_subtype => "Frameshift",
homology_arm_start => 10,
homology_arm_end => 10000,
cassette_start => 50,
cassette_end => 500,
loxp_start => undef,
loxp_end => undef,
targeting_vectors => [
{
pipeline_id => $PIPELINE,
name => 'PRPGD001',
intermediate_vector => 'PGS001',
ikmc_project_id => 1,
es_cells => [
{ pipeline_id => $PIPELINE, name => 'EPD00007', allele_symbol_superscript => 'tm1a' },
{ pipeline_id => $PIPELINE, name => 'EPD00008', allele_symbol_superscript => 'tm1a' },
],
},
],
}
];
#
# Now iterate over the alleles/products and load them
#
process_alleles_and_products( $alleles_and_products );
exit 0;
#
# Wrapper function to handle the whole data loading process
#
sub process_alleles_and_products {
my ($alleles_and_products) = @_;
foreach my $allele_data ( @{$alleles_and_products} ) {
# extract and remove the genbank file and product data
my $genbank_file_data = $allele_data->{genbank_file};
my $targeting_vectors_data = $allele_data->{targeting_vectors};
delete $allele_data->{genbank_file} if $allele_data->{genbank_file};
delete $allele_data->{targeting_vectors} if $allele_data->{targeting_vectors};
# Find,Update,Create the allele
my $allele = find_create_update_allele($allele_data);
if ( $allele_data->{genbank_file} ) {
# Find,Update,Create the genbank_files
$genbank_file_data->{allele_id} = $allele->{id};
my $genbank_file = find_create_update_genbank($genbank_file_data);
}
foreach my $vector_data ( @{$targeting_vectors_data} ) {
# extract and remove the es cell data
my $es_cell_clones_data = $vector_data->{es_cells};
delete $vector_data->{es_cells} if $vector_data->{es_cells};
# Find,Update,Create the vector
my $vector =
find_create_update_vector( $vector_data, $allele->{id} );
foreach my $clone_data ( @{$es_cell_clones_data} ) {
# Find,Update,Create the clone
$clone_data->{allele_id} = $allele->{id};
$clone_data->{targeting_vector_id} = $vector->{id};
my $clone = find_create_update_clone($clone_data);
}
}
sleep(1);
}
}
#
# Generic helper functions
#
sub compare {
my ( $original, $new ) = @_;
# Checks every key in $original to see if it's the same in $new.
# Returns 1 if the same, 0 if not.
foreach my $field ( keys %{$original} ) {
if ( defined $original->{$field} ) {
return 0 unless defined $new->{$field};
return 0 unless $original->{$field} eq $new->{$field};
}
else {
return 0 if defined $new->{$field};
}
}
return 1;
}
sub request {
my ( $method, $url, $data ) = @_;
die "Method $method unknown when requesting URL $url"
unless $method =~ m/DELETE|GET|POST|PUT/;
my @args = $data if $data;
my $client = REST::Client->new( { host => "http://$USER:$PASS\@$DOMAIN" } );
# Set the Content-Type and call the method with @args
$client->addHeader( content_type => "application/json" );
$client->$method( $url, @args );
# A small update message
warn join( " ", $method, $url, '-', $client->responseCode ), "\n";
# Handle failures here -- only code 200 | 201 are OK
die "Bad HTTP response " . $client->responseCode . " " . $client->responseContent
unless $client->responseCode =~ m/20[01]/;
return $client->responseContent;
}
sub find {
my ( $search_url, $error_string ) = @_;
my $response = from_json( request( "GET", $search_url ) );
if ( scalar( @{$response} ) > 1 ) { die $error_string . "\n"; }
elsif ( scalar( @{$response} ) == 1 ) { return $response->[0]; }
else { return undef; }
}
sub create {
my ( $data, $type, $controller ) = @_;
my $json = to_json( { $type => $data } );
my $return = from_json( request( "POST", "/$controller.json", $json ) );
return $return;
}
sub update {
my ( $data, $type, $controller ) = @_;
my $json = to_json( { $type => $data } );
my $return = from_json( request( "PUT", "/$controller/$data->{id}.json", $json ) );
return $return;
}
sub delete_entry {
my ( $data, $type, $controller ) = @_;
my $return = from_json( request( "DELETE", "/$controller/$data->{id}.json", undef ) );
return $return;
}
#
# Allele Methods
#
sub find_allele {
my ($allele_data) = @_;
my $loxp_start = $allele_data->{loxp_start} ? $allele_data->{loxp_start} : 'null';
my $loxp_end = $allele_data->{loxp_end} ? $allele_data->{loxp_end} : 'null';
my $search_url =
"/alleles.json"
. "?mgi_accession_id=" . $allele_data->{mgi_accession_id}
. "&assembly=" . $allele_data->{assembly}
. "&chromosome=" . $allele_data->{chromosome}
. "&strand=" . $allele_data->{strand}
. "&cassette=" . $allele_data->{cassette}
. "&backbone=" . $allele_data->{backbone}
. "&homology_arm_start=" . $allele_data->{homology_arm_start}
. "&homology_arm_end=" . $allele_data->{homology_arm_end}
. "&cassette_start=" . $allele_data->{cassette_start}
. "&cassette_end=" . $allele_data->{cassette_end}
. "&loxp_start=" . $loxp_start
. "&loxp_end=" . $loxp_end;
my $error_string =
"Error: found more than one allele for:"
. "\n - mgi_accession_id: " . $allele_data->{mgi_accession_id}
. "\n - assembly: " . $allele_data->{assembly}
. "\n - chromosome: " . $allele_data->{chromosome}
. "\n - strand: " . $allele_data->{strand}
. "\n - cassette: " . $allele_data->{cassette}
. "\n - backbone: " . $allele_data->{backbone}
. "\n - homology_arm_start: " . $allele_data->{homology_arm_start}
. "\n - homology_arm_end: " . $allele_data->{homology_arm_end}
. "\n - cassette_start: " . $allele_data->{cassette_start}
. "\n - cassette_end: " . $allele_data->{cassette_end}
. "\n - loxp_start: " . $loxp_start
. "\n - loxp_end: " . $loxp_end
. "\n";
return find( $search_url, $error_string );
}
sub create_allele {
my ($allele_data) = @_;
return create( $allele_data, 'allele', 'alleles' );
}
sub update_allele {
my ($allele_data) = @_;
return update( $allele_data, 'allele', 'alleles' );
}
sub delete_allele {
my ($allele_data) = @_;
return delete_entry( $allele_data, 'allele', 'alleles' );
}
sub find_create_update_allele {
my ($allele_data) = @_;
my $allele = find_allele($allele_data);
if ( defined $allele ) {
# We already have an allele entry, see if an update is required
my $update_required = compare( $allele_data, $allele );
unless ($update_required) {
$allele_data->{id} = $allele->{id};
$allele = update_allele($allele_data);
}
}
else {
$allele = create_allele($allele_data);
}
return $allele;
}
#
# Genbank File Methods
#
sub find_genbank {
my ($genbank_data) = @_;
my $search_url
= "/genbank_files.json" . "?allele_id=" . $genbank_data->{allele_id};
my $error_string = "Error: found more than one genbank_file for "
. $genbank_data->{allele_id};
return find( $search_url, $error_string );
}
sub create_genbank {
my ($genbank_data) = @_;
return create( $genbank_data, 'genbank_file', 'genbank_files' );
}
sub update_genbank {
my ($genbank_data) = @_;
return update( $genbank_data, 'genbank_file', 'genbank_files' );
}
sub delete_genbank {
my ($genbank_data) = @_;
return delete_entry( $genbank_data, 'genbank_file', 'genbank_files' );
}
sub find_create_update_genbank {
my ($genbank_data) = @_;
my $genbank = find_genbank($genbank_data);
if ( defined $genbank ) {
# We already have an genbank entry, see if an update is required
my $update_required = compare( $genbank_data, $genbank );
unless ($update_required) {
$genbank_data->{id} = $genbank->{id};
$genbank = update_genbank($genbank_data);
}
}
else {
$genbank = create_genbank($genbank_data);
}
return $genbank;
}
#
# Targeting Vector Methods
#
sub find_vector {
my ($vector_data) = @_;
my $search_url
= "/targeting_vectors.json" . "?name=" . $vector_data->{name};
my $error_string
= "Error: found more than one vector called " . $vector_data->{name};
return find( $search_url, $error_string );
}
sub create_vector {
my ($vector_data) = @_;
return create( $vector_data, 'targeting_vector', 'targeting_vectors' );
}
sub update_vector {
my ($vector_data) = @_;
return update( $vector_data, 'targeting_vector', 'targeting_vectors' );
}
sub delete_vector {
my ($vector_data) = @_;
return delete_entry( $vector_data, 'targeting_vector', 'targeting_vectors' );
}
sub find_create_update_vector {
my ( $vector_data, $allele_id ) = @_;
my $vector = find_vector($vector_data);
if ( defined $vector ) {
# We already have an vector entry, see if an update is required
my $update_required = compare( $vector_data, $vector );
unless ($update_required) {
$vector_data->{id} = $vector->{id};
$vector_data->{allele_id} = $vector->{allele_id};
$vector = update_vector($vector_data);
}
}
else {
$vector_data->{allele_id} = $allele_id;
$vector = create_vector($vector_data);
}
return $vector;
}
#
# ES Cell Clone Methods
#
sub find_clone {
my ($clone_data) = @_;
my $search_url = "/es_cells.json" . "?name=" . $clone_data->{name};
my $error_string
= "Error: found more than one es cell called " . $clone_data->{name};
return find( $search_url, $error_string );
}
sub create_clone {
my ($clone_data) = @_;
return create( $clone_data, 'es_cell', 'es_cells' );
}
sub update_clone {
my ($clone_data) = @_;
return update( $clone_data, 'es_cell', 'es_cells' );
}
sub delete_clone {
my ($clone_data) = @_;
return delete_entry( $clone_data, 'es_cell', 'es_cells' );
}
sub find_create_update_clone {
my ($clone_data) = @_;
my $clone = find_clone($clone_data);
if ( defined $clone ) {
# We already have an clone entry, see if an update is required
my $update_required = compare( $clone_data, $clone );
unless ($update_required) {
$clone_data->{id} = $clone->{id};
$clone = update_clone($clone_data);
}
}
else {
$clone = create_clone($clone_data);
}
return $clone;
}
#!/usr/bin/env perl
# Author:: Nelo Onyiah (mailto:[email protected])
#
# In this example we are going to update all the alleles
# for the KOMP-Regeneron pipeline. We are updating the
# "allele_symbol_superscript" for each Es Cell we find
# that needs updating.
#
# This example demonstrates that data is retrieved from
# the repository one page at a time. It also makes use
# of a generic "request" function (see earlier examples).
#
use strict;
use warnings FATAL => 'all';
use JSON;
use REST::Client;
my $domain = 'localhost:3000';
my $user = 'user';
my $pass = 'pass';
my $page = 0;
#
# Retrieve the available pipelines
my $response = request( "GET", "pipelines.json" );
my $pipelines = from_json($response);
#
# We are only interested in the KOMP-Regeneron pipeline, so let's get that
my ($regeneron) = grep { $_->{name} eq 'KOMP-Regeneron' } @{$pipelines};
#
# Due to size limits, data is returned from the repository in pages.
# Therefore, we need to process the data as such ... one page at a time.
while (1) {
my $alleles = update_es_cells_on_page( $regeneron, ++$page );
last unless @{$alleles};
}
exit 0;
#
# Generic helper function for handling the web requests to the repository.
sub request {
my ( $method, $url, $data ) = @_;
die "Method $method unknown when requesting URL $url"
unless $method =~ m/DELETE|GET|POST|PUT/;
my @args = $data if $data;
my $client = REST::Client->new( { host => "http://$user:$pass\@$domain" } );
# Set the Content-Type and call the method with @args
$client->addHeader( content_type => "application/json" );
$client->$method( $url, @args );
# A small update message
warn join( " ", $method, $url, '-', $client->responseCode ), "\n";
# Handle failures here -- only code 200 | 201 are OK
die "Bad HTTP response ", $client->responseCode
unless $client->responseCode =~ m/20[01]/;
return $client->responseContent;
}
#
# Generic function to process the data retrieved from a specified page
sub update_es_cells_on_page {
my ( $pipeline, $page ) = @_;
#
# Now let's fetch all the alleles from $page (this may take a while)
my $search_params = "es_cells_pipeline_id_is=$pipeline->{id}&page=$page";
my $response = request( "GET", "alleles.json?$search_params" );
my $alleles = from_json($response);
#
# Loop through the alleles ...
for my $allele (@{$alleles}) {
for my $es_cell ( @{ $allele->{es_cells} } ) {
# ... updating the es_cells that need fixing along the way
if ( $es_cell->{allele_symbol_superscript} =~ m/^.+\<(.+)\>$/ ) {
$es_cell->{allele_symbol_superscript} = $1;
my $es_cell_json = to_json( { es_cell => $es_cell } );
request( "PUT", "es_cells/$es_cell->{id}.json", $es_cell_json );
}
}
}
#
# When there is no data, we are on the last page
warn "Found 0 KOMP-Regeneron alleles on page $page\n" unless scalar @{$alleles};
# Return the list of alleles
return $alleles;
}
#! /usr/bin/python
# Author:: Sébastien Briois (mailto:[email protected])
import httplib2 # http://httplib2.googlecode.com/files/httplib2-0.6.0.zip
import urllib
import base64
try:
import json # Python 2.6
except ImportError:
import simplejson as json # Python 2.4+ - http://pypi.python.org/pypi/simplejson/2.0.9
DOMAIN = 'localhost:3000'
USERNAME = 'htgt'
PASSWORD = 'htgt'
# Generic helper class for handling the web requests to the repository.
class UserAgent(object):
def __init__(self):
self.http = httplib2.Http()
self.http.add_credentials(USERNAME, PASSWORD)
self.base_url = BASE_URL
def uri_for(self, rel_url, params = None):
if params:
params = urllib.urlencode(params)
return urljoin( self.base_url, "%s?%s" % (rel_url, params) )
return urljoin( self.base_url, rel_url )
def request(self, method, rel_url, data = {}):
if method in ['GET', 'DELETE']:
uri = self.uri_for( rel_url, data )
resp, content = self.http.request( uri, method, headers = { 'Content-Type': 'application/json' } )
elif method in ['POST', 'PUT']:
uri = self.uri_for( rel_url )
data = json.dumps( data )
resp, content = self.http.request( uri, method, data, headers = { 'Content-Type': 'application/json' } )
else:
raise Exception( "Method %s unknown when requesting URL %s" % (method, rel_url) )
print "%s %s: %s" % (method, uri, resp['status'])
if resp['status'] in ['200', '201']:
# DELETE methods does not return any content
return method == 'DELETE' and True or json.loads( content )
raise Exception(content)
# Create a User Agent
ua = UserAgent()
def find( url, params ):
results = ua.request( 'GET', url, params )
if len(results) > 1:
raise "Your search returned more than one result."
if not results:
return None
return results[0]
#
# Allele specific methods
#
def create_allele( data ):
return ua.request( 'POST', 'alleles.json', { 'allele' : data } )
def update_allele( id, data ):
return ua.request( 'PUT', 'alleles/%s.json' % id, { 'allele' : data } )
def create_or_update_allele( data ):
allele_found = find('alleles.json', {
'mgi_accession_id' : data['mgi_accession_id'],
'assembly' : data['assembly'],
'chromosome' : data['chromosome'],
'strand' : data['strand'],
'cassette' : data['cassette'],
'backbone' : data['backbone'],
'homology_arm_start': data['homology_arm_start'],
'homology_arm_end' : data['homology_arm_end'],
'cassette_start' : data['cassette_start'],
'cassette_end' : data['cassette_end'],
'loxp_start' : data['loxp_start'] or 'null',
'loxp_end' : data['loxp_end'] or 'null'
})
if not allele_found:
return create_allele( data )
else:
return update_allele( allele_found['id'], data )
def delete_allele( id ):
ua.request( 'DELETE', "alleles/%s.json" % id )
#
# Targeting Vector specific methods
#
def create_targeting_vector( data ):
return ua.request( 'POST', 'targeting_vectors.json', { 'targeting_vector' : data } )
def update_targeting_vector( id, data ):
return ua.request( 'PUT', 'targeting_vectors/%s.json' % id, { 'targeting_vector' : data } )
def create_or_update_vector( data ):
vector_found = find( "targeting_vectors.json", { 'name': data['name'] } )
if not vector_found:
return create_targeting_vector( data )
else:
return update_targeting_vector( vector_found['id'], data )
def delete_targeting_vector( id ):
ua.request( 'DELETE', "targeting_vectors/%s.json" % id )
#
# ES Cell specific methods
#
def create_es_cell( data ):
return ua.request( 'POST', 'es_cells.json', { 'es_cell' : data } )
def update_es_cell( id, data ):
return ua.request( 'POST', 'es_cells/%s.json' % id, { 'es_cell' : data } )
def create_or_update_es_cell( data ):
es_cell_found = find( "es_cells.json", { 'name': data['name'] } )
if not es_cell_found:
return create_es_cell( data )
else:
return update_es_cell( es_cell_found['id'], data )
def delete_es_cell( id ):
ua.request( 'DELETE', "es_cells/%s.json" % id )
##
## Main script scenario:
## - We create a data structure containing all the objects we want to create or update in the database
## - We loop over this data structure and follow this procedure:
## 1- Search the object
## 2- Object found ? Yes: Update; No: Create
##
# We will work with the data linked to the pipeline named "EUCOMM", let's find its ID
pipeline_list = ua.request( 'GET', 'pipelines.json' )
for pipeline in pipeline_list:
if pipeline['name'] == 'EUCOMM':
break
# Create our data structure
alleles = [
# First allele
{
'mgi_accession_id' : "MGI:123",
'project_design_id' : 23640,
'cassette' : "L1L2_gt2",
'backbone' : "L3L4_pZero_kan",
'assembly' : "NCBIM37",
'chromosome' : "1",
'strand' : "+",
'design_type' : "Knock Out",
'design_subtype' : "Frameshift",
'homology_arm_start' : 10,
'homology_arm_end' : 10000,
'cassette_start' : 50,
'cassette_end' : 500,
'loxp_start' : 1000,
'loxp_end' : 1500,
# Targeting vectors for the first allele
'targeting_vectors' : [
{
'pipeline_id' : pipeline['id'],
'name' : 'PRPGD001',
'intermediate_vector' : 'PGS001',
'ikmc_project_id' : 9801
},
{
'pipeline_id' : pipeline['id'],
'name' : 'PRPGD002',
'intermediate_vector' : 'PGS001',
'ikmc_project_id' : 9801
}
],
# ES Cells for the first allele
'es_cells' : [
{ 'pipeline_id' : pipeline['id'], 'name' : 'EPD001', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD001' },
{ 'pipeline_id' : pipeline['id'], 'name' : 'EPD002', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD001' },
{ 'pipeline_id' : pipeline['id'], 'name' : 'EPD003', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD001' },
{ 'pipeline_id' : pipeline['id'], 'name' : 'EPD004', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD002' },
{ 'pipeline_id' : pipeline['id'], 'name' : 'EPD005', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD002' },
{ 'pipeline_id' : pipeline['id'], 'name' : 'EPD006', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD002' }
],
# Genbank File for the first allele
'genbank_file' : {
'escell_clone' : "A GENBANK FILE IN PLAIN TEXT",
'targeting_vector' : "A GENBANK FILE IN PLAIN TEXT"
}
},
# Second allele
{
'mgi_accession_id' : "MGI:456",
'project_design_id' : 29871,
'cassette' : "L1L2_gt2",
'backbone' : "L3L4_pZero_kan",
'assembly' : "NCBIM37",
'chromosome' : "1",
'strand' : "+",
'design_type' : "Knock Out",
'design_subtype' : "Frameshift",
'homology_arm_start' : 10,
'homology_arm_end' : 10000,
'cassette_start' : 50,
'cassette_end' : 500,
'loxp_start' : 1000,
'loxp_end' : 1500,
# Targeting vectors for the second allele
'targeting_vectors' : [
{
'pipeline_id' : pipeline['id'],
'name' : 'PRPGD003',
'intermediate_vector' : 'PGS002',
'ikmc_project_id' : 6809480
},
{
'pipeline_id' : pipeline['id'],
'name' : 'PRPGD004',
'intermediate_vector' : 'PGS002',
'ikmc_project_id' : 6809480
}
],
# ES Cells for the second allele
'es_cells' : [
{ 'pipeline_id' : pipeline['id'], 'name' : 'EPD007', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD003' },
{ 'pipeline_id' : pipeline['id'], 'name' : 'EPD008', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD003' },
{ 'pipeline_id' : pipeline['id'], 'name' : 'EPD009', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD003' },
{ 'pipeline_id' : pipeline['id'], 'name' : 'EPD010', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD004' },
{ 'pipeline_id' : pipeline['id'], 'name' : 'EPD011', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD004' },
{ 'pipeline_id' : pipeline['id'], 'name' : 'EPD012', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD004' }
]
}
]
# Create or Update Alleles
for allele_hash in alleles:
# allele_hash should not contain unknown fields
targeting_vectors = allele_hash.pop( 'targeting_vectors' )
es_cells = allele_hash.pop( 'es_cells' )
allele = create_or_update_allele( allele_hash )
allele_hash['id'] = allele['id']
# Create or Update Targeting Vectors
for vector_hash in targeting_vectors:
vector_hash['allele_id'] = allele['id']
vector = create_or_update_vector( vector_hash )
vector_hash['id'] = vector['id']
# Find, Create or Update ES Cells
for es_cell_hash in es_cells:
es_cell_hash['allele_id'] = allele['id']
# Find targeting vector ID from its name or set it to nil
# if ES Cell is not linked to a targeting vector
if 'targeting_vector' in es_cell_hash:
targ_vec_name = es_cell_hash.pop('targeting_vector')
for vector in targeting_vectors:
if vector['name'] == targ_vec_name:
break
es_cell_hash['targeting_vector_id'] = vector['id']
else:
es_cell_hash['targeting_vector_id'] = None
es_cell = create_or_update_es_cell( es_cell_hash )
es_cell_hash['id'] = es_cell['id']
# DELETE All ES Cells
for es_cell in es_cells: delete_es_cell( es_cell['id'] )
for vector in targeting_vectors: delete_targeting_vector( vector['id'] )
for allele in alleles: delete_allele( allele['id'] )
#! /usr/bin/env ruby -wKU
# Author:: Sébastien Briois (mailto:[email protected])
require "rubygems"
require "rest_client"
require "json"
DOMAIN = RestClient::Resource.new( "http://user:password@localhost:3000" )
# Generic helper method for handling the web calls to the repository.
def request( method, url, data = nil )
response =
case method.upcase
when "GET" then DOMAIN[url].get
when "POST" then DOMAIN[url].post data, :content_type => "application/json"
when "PUT" then DOMAIN[url].put data, :content_type => "application/json"
when "DELETE" then DOMAIN[url].delete
else
raise "Method #{method} unknown when requesting url #{url}"
end
puts "#{method} #{url} - #{response.code} #{RestClient::STATUSES[response.code]}"
return response.body
end
#
# Allele specific methods
#
def find_allele( allele )
params = ""
params << "mgi_accession_id=" + allele[:mgi_accession_id]
params << "&assembly=" + allele[:assembly]
params << "&chromosome=" + allele[:chromosome]
params << "&strand=" + allele[:strand]
params << "&cassette=" + allele[:cassette]
params << "&backbone=" + allele[:backbone]
params << "&homology_arm_start=" + allele[:homology_arm_start]
params << "&homology_arm_end=" + allele[:homology_arm_end]
params << "&cassette_start=" + allele[:cassette_start]
params << "&cassette_end=" + allele[:cassette_end]
# Will find a conditional allele or a non-conditional allele
if ( allele.include? :loxp_start and allele[:loxp_start] ) and ( allele.include? :loxp_end and allele[:loxp_end] )
params += "&loxp_start=#{allele[:loxp_start]}&loxp_end=#{allele[:loxp_end]}"
else
params += "&loxp_start='null'&loxp_end='null'"
end
# Request for all the alleles that match the params.
# The '.json' indicates that we want a JSON string as a response.
response = request( 'GET', "alleles.json?#{params}" )
# This will be a list whether the request returned one allele or more.
allele_list = JSON.parse( response )
# If the search is not specific enough and returns more than 1 allele
if allele_list.length > 1
raise "Your search returned more than one allele, please refine it."
end
if allele_list == 1
return allele_list[0]
end
return nil
end
def create_allele( data )
json = JSON.generate({ :allele => data })
response = request( 'POST', 'alleles.json', json )
allele = JSON.parse( response )
return allele
end
def update_allele( id, data )
json = JSON.generate({ :allele => data })
response = request( 'PUT', "alleles/#{id}.json", json )
allele = JSON.parse( response )
return allele
end
def delete_allele( id )
request( 'DELETE', "alleles/#{id}" )
end
#
# Targeting Vector specific methods
#
def find_targeting_vector( vector )
response = request( 'GET', "targeting_vectors.json?name=#{vector['name']}" )
targeting_vector_list = JSON.parse( response )
if targeting_vector_list == 1
return targeting_vector_list[0]
end
return nil
end
def create_targeting_vector( data )
json = JSON.generate({ :targeting_vector => data })
response = request( 'POST', 'targeting_vectors.json', json )
vector = JSON.parse( response )
return vector
end
def update_targeting_vector( id, data )
json = JSON.generate({ :targeting_vector => data })
response = request( 'PUT', "targeting_vectors/#{id}.json", json )
vector = JSON.parse( response )
return vector
end
def delete_targeting_vector( id )
request( 'DELETE', "targeting_vectors/#{id}" )
end
#
# ES Cell specific methods
#
def find_es_cell( cell )
response = request( 'GET', "es_cells.json?name=#{cell['name']}" )
es_cell_list = JSON.parse( response )
if es_cell_list == 1
return es_cell_list[0]
end
return nil
end
def create_es_cell( cell )
json = JSON.generate({ :es_cell => cell })
response = request( 'POST', 'es_cells.json', json )
cell = JSON.parse( response )
return cell
end
def update_es_cell( id, data )
json = JSON.generate({ :es_cell => data })
response = request( 'PUT', "es_cells/#{id}.json", json )
cell = JSON.parse( response )
return cell
end
def delete_es_cell( id )
request( 'DELETE', "es_cells/#{id}" )
end
##
## Main script scenario:
## - We create a data structure containing all the objects we want to create or update in the database
## - We loop over this data structure and follow this procedure:
## 1- Search the object
## 2- Object found ? Yes: Update; No: Create
##
# We will work with the data linked to the pipeline named "EUCOMM", let's find its ID
response = request( method = 'GET', url = 'pipelines.json' )
pipeline_list = JSON.parse( response )
pipeline = pipeline_list.find { |pipeline| pipeline['name'] == 'EUCOMM' }
# Create our data structure
alleles = [
# First allele
{
:mgi_accession_id => "MGI:123",
:project_design_id => 23640,
:cassette => "L1L2_gt2",
:backbone => "L3L4_pZero_kan",
:assembly => "NCBIM37",
:chromosome => "1",
:strand => "+",
:design_type => "Knock Out",
:design_subtype => "Frameshift",
:homology_arm_start => 10,
:homology_arm_end => 10000,
:cassette_start => 50,
:cassette_end => 500,
:loxp_start => 1000,
:loxp_end => 1500,
# Targeting vectors for the first allele
:targeting_vectors => [
{
:pipeline_id => pipeline['id'],
:name => 'PRPGD001',
:intermediate_vector => 'PGS001',
:ikmc_project_id => 9801
},
{
:pipeline_id => pipeline['id'],
:name => 'PRPGD002',
:intermediate_vector => 'PGS001',
:ikmc_project_id => 9801
}
],
# ES Cells for the first allele
:es_cells => [
{ :pipeline_id => pipeline['id'], :name => 'EPD001', :allele_symbol_superscript => 'tm1a', :targeting_vector => 'PRPGD001' },
{ :pipeline_id => pipeline['id'], :name => 'EPD002', :allele_symbol_superscript => 'tm1a', :targeting_vector => 'PRPGD001' },
{ :pipeline_id => pipeline['id'], :name => 'EPD003', :allele_symbol_superscript => 'tm1a', :targeting_vector => 'PRPGD001' },
{ :pipeline_id => pipeline['id'], :name => 'EPD004', :allele_symbol_superscript => 'tm1a', :targeting_vector => 'PRPGD002' },
{ :pipeline_id => pipeline['id'], :name => 'EPD005', :allele_symbol_superscript => 'tm1a', :targeting_vector => 'PRPGD002' },
{ :pipeline_id => pipeline['id'], :name => 'EPD006', :allele_symbol_superscript => 'tm1a', :targeting_vector => 'PRPGD002' }
]
# Genbank File for the first allele
:genbank_file => {
:escell_clone => "A GENBANK FILE IN PLAIN TEXT",
:targeting_vector => "A GENBANK FILE IN PLAIN TEXT"
}
},
# Second allele
{
:mgi_accession_id => "MGI:456",
:project_design_id => 29871,
:cassette => "L1L2_gt2",
:backbone => "L3L4_pZero_kan",
:assembly => "NCBIM37",
:chromosome => "1",
:strand => "+",
:design_type => "Knock Out",
:design_subtype => "Frameshift",
:homology_arm_start => 10,
:homology_arm_end => 10000,
:cassette_start => 50,
:cassette_end => 500,
:loxp_start => 1000,
:loxp_end => 1500,
# Targeting vectors for the second allele
:targeting_vectors => [
{
:pipeline_id => pipeline['id'],
:name => 'PRPGD003',
:intermediate_vector => 'PGS002',
:ikmc_project_id => 6809480
},
{
:pipeline_id => pipeline['id'],
:name => 'PRPGD004',
:intermediate_vector => 'PGS002',
:ikmc_project_id => 6809480
}
],
# ES Cells for the second allele
:es_cells => [
{ :pipeline_id => pipeline['id'], :name => 'EPD007', :allele_symbol_superscript => 'tm1a', :targeting_vector => 'PRPGD003' },
{ :pipeline_id => pipeline['id'], :name => 'EPD008', :allele_symbol_superscript => 'tm1a', :targeting_vector => 'PRPGD003' },
{ :pipeline_id => pipeline['id'], :name => 'EPD009', :allele_symbol_superscript => 'tm1a', :targeting_vector => 'PRPGD003' },
{ :pipeline_id => pipeline['id'], :name => 'EPD010', :allele_symbol_superscript => 'tm1a', :targeting_vector => 'PRPGD004' },
{ :pipeline_id => pipeline['id'], :name => 'EPD011', :allele_symbol_superscript => 'tm1a', :targeting_vector => 'PRPGD004' },
{ :pipeline_id => pipeline['id'], :name => 'EPD012', :allele_symbol_superscript => 'tm1a', :targeting_vector => 'PRPGD004' }
]
}
]
alleles.each do |allele_hash|
# allele_hash should not contain unknown fields
targeting_vectors = allele_hash.delete( :targeting_vectors )
es_cells = allele_hash.delete( :es_cells )
# Find, Create or Update allele
allele_found = find_allele( allele_hash )
if allele_found.nil?
allele = create_allele( allele_hash )
else
# If allele has been found, it has an "id"
allele = update_allele( allele_found['id'], allele_hash )
end
# Our allele now has an ID
allele_hash[:id] = allele['id']
# Find, Create or Update Targeting Vector
targeting_vectors.each do |vector_hash|
vector_hash[:allele_id] = allele_hash[:id]
vector_found = find_targeting_vector( vector_hash )
if vector_found.nil?
vector = create_targeting_vector( vector_hash )
else
vector = update_targeting_vector( vector_found['id'], vector_hash )
end
vector_hash[:id] = vector['id']
end
# Find, Create or Update ES Cell
es_cells.each do |es_cell_hash|
# ES Cell must be linked to a Molecular Structure
es_cell_hash[:allele_id] = allele_hash[:id]
# If ES Cell is linked to a targeting vector, retrieve its ID
if es_cell_hash.include? :targeting_vector
es_cell_hash[:targeting_vector_id] =
targeting_vectors.find { |v| v[:name] == es_cell_hash[:targeting_vector] }['id']
else
es_cell_hash[:targeting_vector_id] = nil
end
# Find, Create or Update ES Cell
es_cell_found = find_es_cell( es_cell_hash )
if es_cell_found.nil?
es_cell = create_es_cell( es_cell_hash )
else
es_cell = update_es_cell( es_cell_found['id'], es_cell_hash )
end
es_cell_hash[:id] = es_cell['id']
end
end
# DELETE All ES Cells
es_cells.each { |es_cell| delete_es_cell( es_cell[:id] ) }
targeting_vectors.each { |vector| delete_targeting_vector( vector[:id] ) }
alleles.each { |allele| delete_allele( allele[:id] ) }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment