Created
August 6, 2010 11:34
-
-
Save dazoakley/511200 to your computer and use it in GitHub Desktop.
I-DCC targ_rep wiki (example) scripts
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
# | |
# Helper functions for interacting with the IKMC Targeting Repository | |
# | |
# Authors:: Darren Oakley (mailto:[email protected]) | |
# Nelo Onyiah (mailto:[email protected]) | |
# | |
use strict; | |
use warnings FATAL => 'all'; | |
use JSON; | |
use REST::Client; | |
# | |
# Create a data object of the alleles and products we need to load | |
# - in your script this should come from your database. | |
# | |
# For NorCOMM products, use pipeline_id = 3. | |
# | |
my $DOMAIN = 'localhost:3000'; | |
my $USER = 'user'; | |
my $PASS = 'pass'; | |
my $PIPELINE = 3; | |
my $alleles_and_products = [ | |
{ | |
mgi_accession_id => "MGI:123456", | |
project_design_id => 2, | |
cassette => "L1L2_gt2", | |
cassette_type => "Promotorless", | |
backbone => "L3L4_pZero_kan", | |
assembly => "NCBIM37", | |
chromosome => "1", | |
strand => "+", | |
design_type => "Knock Out", | |
design_subtype => "Frameshift", | |
homology_arm_start => 10, | |
homology_arm_end => 10000, | |
cassette_start => 50, | |
cassette_end => 500, | |
loxp_start => 1000, | |
loxp_end => 1500, | |
targeting_vectors => [ | |
{ | |
pipeline_id => $PIPELINE, | |
name => 'PRPGD001', | |
intermediate_vector => 'PGS001', | |
ikmc_project_id => 1, | |
es_cells => [ | |
{ pipeline_id => $PIPELINE, name => 'EPD00001', allele_symbol_superscript => 'tm1a' }, | |
{ pipeline_id => $PIPELINE, name => 'EPD00002', allele_symbol_superscript => 'tm1a' }, | |
{ pipeline_id => $PIPELINE, name => 'EPD00003', allele_symbol_superscript => 'tm1a' }, | |
], | |
}, | |
{ | |
pipeline_id => $PIPELINE, | |
name => 'PRPGD002', | |
intermediate_vector => 'PGS001', | |
ikmc_project_id => 1, | |
es_cells => [ | |
{ pipeline_id => $PIPELINE, name => 'EPD00004', allele_symbol_superscript => 'tm1a' }, | |
{ pipeline_id => $PIPELINE, name => 'EPD00005', allele_symbol_superscript => 'tm1a' }, | |
{ pipeline_id => $PIPELINE, name => 'EPD00006', allele_symbol_superscript => 'tm1a' }, | |
], | |
} | |
], | |
}, | |
{ | |
mgi_accession_id => "MGI:123456", | |
project_design_id => 2, | |
cassette => "L1L2_gt2", | |
cassette_type => "Promotorless", | |
backbone => "L3L4_pZero_kan", | |
assembly => "NCBIM37", | |
chromosome => "1", | |
strand => "+", | |
design_type => "Knock Out", | |
design_subtype => "Frameshift", | |
homology_arm_start => 10, | |
homology_arm_end => 10000, | |
cassette_start => 50, | |
cassette_end => 500, | |
loxp_start => undef, | |
loxp_end => undef, | |
targeting_vectors => [ | |
{ | |
pipeline_id => $PIPELINE, | |
name => 'PRPGD001', | |
intermediate_vector => 'PGS001', | |
ikmc_project_id => 1, | |
es_cells => [ | |
{ pipeline_id => $PIPELINE, name => 'EPD00007', allele_symbol_superscript => 'tm1a' }, | |
{ pipeline_id => $PIPELINE, name => 'EPD00008', allele_symbol_superscript => 'tm1a' }, | |
], | |
}, | |
], | |
} | |
]; | |
# | |
# Now iterate over the alleles/products and load them | |
# | |
process_alleles_and_products( $alleles_and_products ); | |
exit 0; | |
# | |
# Wrapper function to handle the whole data loading process | |
# | |
sub process_alleles_and_products { | |
my ($alleles_and_products) = @_; | |
foreach my $allele_data ( @{$alleles_and_products} ) { | |
# extract and remove the genbank file and product data | |
my $genbank_file_data = $allele_data->{genbank_file}; | |
my $targeting_vectors_data = $allele_data->{targeting_vectors}; | |
delete $allele_data->{genbank_file} if $allele_data->{genbank_file}; | |
delete $allele_data->{targeting_vectors} if $allele_data->{targeting_vectors}; | |
# Find,Update,Create the allele | |
my $allele = find_create_update_allele($allele_data); | |
if ( $allele_data->{genbank_file} ) { | |
# Find,Update,Create the genbank_files | |
$genbank_file_data->{allele_id} = $allele->{id}; | |
my $genbank_file = find_create_update_genbank($genbank_file_data); | |
} | |
foreach my $vector_data ( @{$targeting_vectors_data} ) { | |
# extract and remove the es cell data | |
my $es_cell_clones_data = $vector_data->{es_cells}; | |
delete $vector_data->{es_cells} if $vector_data->{es_cells}; | |
# Find,Update,Create the vector | |
my $vector = | |
find_create_update_vector( $vector_data, $allele->{id} ); | |
foreach my $clone_data ( @{$es_cell_clones_data} ) { | |
# Find,Update,Create the clone | |
$clone_data->{allele_id} = $allele->{id}; | |
$clone_data->{targeting_vector_id} = $vector->{id}; | |
my $clone = find_create_update_clone($clone_data); | |
} | |
} | |
sleep(1); | |
} | |
} | |
# | |
# Generic helper functions | |
# | |
sub compare { | |
my ( $original, $new ) = @_; | |
# Checks every key in $original to see if it's the same in $new. | |
# Returns 1 if the same, 0 if not. | |
foreach my $field ( keys %{$original} ) { | |
if ( defined $original->{$field} ) { | |
return 0 unless defined $new->{$field}; | |
return 0 unless $original->{$field} eq $new->{$field}; | |
} | |
else { | |
return 0 if defined $new->{$field}; | |
} | |
} | |
return 1; | |
} | |
sub request { | |
my ( $method, $url, $data ) = @_; | |
die "Method $method unknown when requesting URL $url" | |
unless $method =~ m/DELETE|GET|POST|PUT/; | |
my @args = $data if $data; | |
my $client = REST::Client->new( { host => "http://$USER:$PASS\@$DOMAIN" } ); | |
# Set the Content-Type and call the method with @args | |
$client->addHeader( content_type => "application/json" ); | |
$client->$method( $url, @args ); | |
# A small update message | |
warn join( " ", $method, $url, '-', $client->responseCode ), "\n"; | |
# Handle failures here -- only code 200 | 201 are OK | |
die "Bad HTTP response " . $client->responseCode . " " . $client->responseContent | |
unless $client->responseCode =~ m/20[01]/; | |
return $client->responseContent; | |
} | |
sub find { | |
my ( $search_url, $error_string ) = @_; | |
my $response = from_json( request( "GET", $search_url ) ); | |
if ( scalar( @{$response} ) > 1 ) { die $error_string . "\n"; } | |
elsif ( scalar( @{$response} ) == 1 ) { return $response->[0]; } | |
else { return undef; } | |
} | |
sub create { | |
my ( $data, $type, $controller ) = @_; | |
my $json = to_json( { $type => $data } ); | |
my $return = from_json( request( "POST", "/$controller.json", $json ) ); | |
return $return; | |
} | |
sub update { | |
my ( $data, $type, $controller ) = @_; | |
my $json = to_json( { $type => $data } ); | |
my $return = from_json( request( "PUT", "/$controller/$data->{id}.json", $json ) ); | |
return $return; | |
} | |
sub delete_entry { | |
my ( $data, $type, $controller ) = @_; | |
my $return = from_json( request( "DELETE", "/$controller/$data->{id}.json", undef ) ); | |
return $return; | |
} | |
# | |
# Allele Methods | |
# | |
sub find_allele { | |
my ($allele_data) = @_; | |
my $loxp_start = $allele_data->{loxp_start} ? $allele_data->{loxp_start} : 'null'; | |
my $loxp_end = $allele_data->{loxp_end} ? $allele_data->{loxp_end} : 'null'; | |
my $search_url = | |
"/alleles.json" | |
. "?mgi_accession_id=" . $allele_data->{mgi_accession_id} | |
. "&assembly=" . $allele_data->{assembly} | |
. "&chromosome=" . $allele_data->{chromosome} | |
. "&strand=" . $allele_data->{strand} | |
. "&cassette=" . $allele_data->{cassette} | |
. "&backbone=" . $allele_data->{backbone} | |
. "&homology_arm_start=" . $allele_data->{homology_arm_start} | |
. "&homology_arm_end=" . $allele_data->{homology_arm_end} | |
. "&cassette_start=" . $allele_data->{cassette_start} | |
. "&cassette_end=" . $allele_data->{cassette_end} | |
. "&loxp_start=" . $loxp_start | |
. "&loxp_end=" . $loxp_end; | |
my $error_string = | |
"Error: found more than one allele for:" | |
. "\n - mgi_accession_id: " . $allele_data->{mgi_accession_id} | |
. "\n - assembly: " . $allele_data->{assembly} | |
. "\n - chromosome: " . $allele_data->{chromosome} | |
. "\n - strand: " . $allele_data->{strand} | |
. "\n - cassette: " . $allele_data->{cassette} | |
. "\n - backbone: " . $allele_data->{backbone} | |
. "\n - homology_arm_start: " . $allele_data->{homology_arm_start} | |
. "\n - homology_arm_end: " . $allele_data->{homology_arm_end} | |
. "\n - cassette_start: " . $allele_data->{cassette_start} | |
. "\n - cassette_end: " . $allele_data->{cassette_end} | |
. "\n - loxp_start: " . $loxp_start | |
. "\n - loxp_end: " . $loxp_end | |
. "\n"; | |
return find( $search_url, $error_string ); | |
} | |
sub create_allele { | |
my ($allele_data) = @_; | |
return create( $allele_data, 'allele', 'alleles' ); | |
} | |
sub update_allele { | |
my ($allele_data) = @_; | |
return update( $allele_data, 'allele', 'alleles' ); | |
} | |
sub delete_allele { | |
my ($allele_data) = @_; | |
return delete_entry( $allele_data, 'allele', 'alleles' ); | |
} | |
sub find_create_update_allele { | |
my ($allele_data) = @_; | |
my $allele = find_allele($allele_data); | |
if ( defined $allele ) { | |
# We already have an allele entry, see if an update is required | |
my $update_required = compare( $allele_data, $allele ); | |
unless ($update_required) { | |
$allele_data->{id} = $allele->{id}; | |
$allele = update_allele($allele_data); | |
} | |
} | |
else { | |
$allele = create_allele($allele_data); | |
} | |
return $allele; | |
} | |
# | |
# Genbank File Methods | |
# | |
sub find_genbank { | |
my ($genbank_data) = @_; | |
my $search_url | |
= "/genbank_files.json" . "?allele_id=" . $genbank_data->{allele_id}; | |
my $error_string = "Error: found more than one genbank_file for " | |
. $genbank_data->{allele_id}; | |
return find( $search_url, $error_string ); | |
} | |
sub create_genbank { | |
my ($genbank_data) = @_; | |
return create( $genbank_data, 'genbank_file', 'genbank_files' ); | |
} | |
sub update_genbank { | |
my ($genbank_data) = @_; | |
return update( $genbank_data, 'genbank_file', 'genbank_files' ); | |
} | |
sub delete_genbank { | |
my ($genbank_data) = @_; | |
return delete_entry( $genbank_data, 'genbank_file', 'genbank_files' ); | |
} | |
sub find_create_update_genbank { | |
my ($genbank_data) = @_; | |
my $genbank = find_genbank($genbank_data); | |
if ( defined $genbank ) { | |
# We already have an genbank entry, see if an update is required | |
my $update_required = compare( $genbank_data, $genbank ); | |
unless ($update_required) { | |
$genbank_data->{id} = $genbank->{id}; | |
$genbank = update_genbank($genbank_data); | |
} | |
} | |
else { | |
$genbank = create_genbank($genbank_data); | |
} | |
return $genbank; | |
} | |
# | |
# Targeting Vector Methods | |
# | |
sub find_vector { | |
my ($vector_data) = @_; | |
my $search_url | |
= "/targeting_vectors.json" . "?name=" . $vector_data->{name}; | |
my $error_string | |
= "Error: found more than one vector called " . $vector_data->{name}; | |
return find( $search_url, $error_string ); | |
} | |
sub create_vector { | |
my ($vector_data) = @_; | |
return create( $vector_data, 'targeting_vector', 'targeting_vectors' ); | |
} | |
sub update_vector { | |
my ($vector_data) = @_; | |
return update( $vector_data, 'targeting_vector', 'targeting_vectors' ); | |
} | |
sub delete_vector { | |
my ($vector_data) = @_; | |
return delete_entry( $vector_data, 'targeting_vector', 'targeting_vectors' ); | |
} | |
sub find_create_update_vector { | |
my ( $vector_data, $allele_id ) = @_; | |
my $vector = find_vector($vector_data); | |
if ( defined $vector ) { | |
# We already have an vector entry, see if an update is required | |
my $update_required = compare( $vector_data, $vector ); | |
unless ($update_required) { | |
$vector_data->{id} = $vector->{id}; | |
$vector_data->{allele_id} = $vector->{allele_id}; | |
$vector = update_vector($vector_data); | |
} | |
} | |
else { | |
$vector_data->{allele_id} = $allele_id; | |
$vector = create_vector($vector_data); | |
} | |
return $vector; | |
} | |
# | |
# ES Cell Clone Methods | |
# | |
sub find_clone { | |
my ($clone_data) = @_; | |
my $search_url = "/es_cells.json" . "?name=" . $clone_data->{name}; | |
my $error_string | |
= "Error: found more than one es cell called " . $clone_data->{name}; | |
return find( $search_url, $error_string ); | |
} | |
sub create_clone { | |
my ($clone_data) = @_; | |
return create( $clone_data, 'es_cell', 'es_cells' ); | |
} | |
sub update_clone { | |
my ($clone_data) = @_; | |
return update( $clone_data, 'es_cell', 'es_cells' ); | |
} | |
sub delete_clone { | |
my ($clone_data) = @_; | |
return delete_entry( $clone_data, 'es_cell', 'es_cells' ); | |
} | |
sub find_create_update_clone { | |
my ($clone_data) = @_; | |
my $clone = find_clone($clone_data); | |
if ( defined $clone ) { | |
# We already have an clone entry, see if an update is required | |
my $update_required = compare( $clone_data, $clone ); | |
unless ($update_required) { | |
$clone_data->{id} = $clone->{id}; | |
$clone = update_clone($clone_data); | |
} | |
} | |
else { | |
$clone = create_clone($clone_data); | |
} | |
return $clone; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
# Author:: Nelo Onyiah (mailto:[email protected]) | |
# | |
# In this example we are going to update all the alleles | |
# for the KOMP-Regeneron pipeline. We are updating the | |
# "allele_symbol_superscript" for each Es Cell we find | |
# that needs updating. | |
# | |
# This example demonstrates that data is retrieved from | |
# the repository one page at a time. It also makes use | |
# of a generic "request" function (see earlier examples). | |
# | |
use strict; | |
use warnings FATAL => 'all'; | |
use JSON; | |
use REST::Client; | |
my $domain = 'localhost:3000'; | |
my $user = 'user'; | |
my $pass = 'pass'; | |
my $page = 0; | |
# | |
# Retrieve the available pipelines | |
my $response = request( "GET", "pipelines.json" ); | |
my $pipelines = from_json($response); | |
# | |
# We are only interested in the KOMP-Regeneron pipeline, so let's get that | |
my ($regeneron) = grep { $_->{name} eq 'KOMP-Regeneron' } @{$pipelines}; | |
# | |
# Due to size limits, data is returned from the repository in pages. | |
# Therefore, we need to process the data as such ... one page at a time. | |
while (1) { | |
my $alleles = update_es_cells_on_page( $regeneron, ++$page ); | |
last unless @{$alleles}; | |
} | |
exit 0; | |
# | |
# Generic helper function for handling the web requests to the repository. | |
sub request { | |
my ( $method, $url, $data ) = @_; | |
die "Method $method unknown when requesting URL $url" | |
unless $method =~ m/DELETE|GET|POST|PUT/; | |
my @args = $data if $data; | |
my $client = REST::Client->new( { host => "http://$user:$pass\@$domain" } ); | |
# Set the Content-Type and call the method with @args | |
$client->addHeader( content_type => "application/json" ); | |
$client->$method( $url, @args ); | |
# A small update message | |
warn join( " ", $method, $url, '-', $client->responseCode ), "\n"; | |
# Handle failures here -- only code 200 | 201 are OK | |
die "Bad HTTP response ", $client->responseCode | |
unless $client->responseCode =~ m/20[01]/; | |
return $client->responseContent; | |
} | |
# | |
# Generic function to process the data retrieved from a specified page | |
sub update_es_cells_on_page { | |
my ( $pipeline, $page ) = @_; | |
# | |
# Now let's fetch all the alleles from $page (this may take a while) | |
my $search_params = "es_cells_pipeline_id_is=$pipeline->{id}&page=$page"; | |
my $response = request( "GET", "alleles.json?$search_params" ); | |
my $alleles = from_json($response); | |
# | |
# Loop through the alleles ... | |
for my $allele (@{$alleles}) { | |
for my $es_cell ( @{ $allele->{es_cells} } ) { | |
# ... updating the es_cells that need fixing along the way | |
if ( $es_cell->{allele_symbol_superscript} =~ m/^.+\<(.+)\>$/ ) { | |
$es_cell->{allele_symbol_superscript} = $1; | |
my $es_cell_json = to_json( { es_cell => $es_cell } ); | |
request( "PUT", "es_cells/$es_cell->{id}.json", $es_cell_json ); | |
} | |
} | |
} | |
# | |
# When there is no data, we are on the last page | |
warn "Found 0 KOMP-Regeneron alleles on page $page\n" unless scalar @{$alleles}; | |
# Return the list of alleles | |
return $alleles; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/python | |
# Author:: Sébastien Briois (mailto:[email protected]) | |
import httplib2 # http://httplib2.googlecode.com/files/httplib2-0.6.0.zip | |
import urllib | |
import base64 | |
try: | |
import json # Python 2.6 | |
except ImportError: | |
import simplejson as json # Python 2.4+ - http://pypi.python.org/pypi/simplejson/2.0.9 | |
DOMAIN = 'localhost:3000' | |
USERNAME = 'htgt' | |
PASSWORD = 'htgt' | |
# Generic helper class for handling the web requests to the repository. | |
class UserAgent(object): | |
def __init__(self): | |
self.http = httplib2.Http() | |
self.http.add_credentials(USERNAME, PASSWORD) | |
self.base_url = BASE_URL | |
def uri_for(self, rel_url, params = None): | |
if params: | |
params = urllib.urlencode(params) | |
return urljoin( self.base_url, "%s?%s" % (rel_url, params) ) | |
return urljoin( self.base_url, rel_url ) | |
def request(self, method, rel_url, data = {}): | |
if method in ['GET', 'DELETE']: | |
uri = self.uri_for( rel_url, data ) | |
resp, content = self.http.request( uri, method, headers = { 'Content-Type': 'application/json' } ) | |
elif method in ['POST', 'PUT']: | |
uri = self.uri_for( rel_url ) | |
data = json.dumps( data ) | |
resp, content = self.http.request( uri, method, data, headers = { 'Content-Type': 'application/json' } ) | |
else: | |
raise Exception( "Method %s unknown when requesting URL %s" % (method, rel_url) ) | |
print "%s %s: %s" % (method, uri, resp['status']) | |
if resp['status'] in ['200', '201']: | |
# DELETE methods does not return any content | |
return method == 'DELETE' and True or json.loads( content ) | |
raise Exception(content) | |
# Create a User Agent | |
ua = UserAgent() | |
def find( url, params ): | |
results = ua.request( 'GET', url, params ) | |
if len(results) > 1: | |
raise "Your search returned more than one result." | |
if not results: | |
return None | |
return results[0] | |
# | |
# Allele specific methods | |
# | |
def create_allele( data ): | |
return ua.request( 'POST', 'alleles.json', { 'allele' : data } ) | |
def update_allele( id, data ): | |
return ua.request( 'PUT', 'alleles/%s.json' % id, { 'allele' : data } ) | |
def create_or_update_allele( data ): | |
allele_found = find('alleles.json', { | |
'mgi_accession_id' : data['mgi_accession_id'], | |
'assembly' : data['assembly'], | |
'chromosome' : data['chromosome'], | |
'strand' : data['strand'], | |
'cassette' : data['cassette'], | |
'backbone' : data['backbone'], | |
'homology_arm_start': data['homology_arm_start'], | |
'homology_arm_end' : data['homology_arm_end'], | |
'cassette_start' : data['cassette_start'], | |
'cassette_end' : data['cassette_end'], | |
'loxp_start' : data['loxp_start'] or 'null', | |
'loxp_end' : data['loxp_end'] or 'null' | |
}) | |
if not allele_found: | |
return create_allele( data ) | |
else: | |
return update_allele( allele_found['id'], data ) | |
def delete_allele( id ): | |
ua.request( 'DELETE', "alleles/%s.json" % id ) | |
# | |
# Targeting Vector specific methods | |
# | |
def create_targeting_vector( data ): | |
return ua.request( 'POST', 'targeting_vectors.json', { 'targeting_vector' : data } ) | |
def update_targeting_vector( id, data ): | |
return ua.request( 'PUT', 'targeting_vectors/%s.json' % id, { 'targeting_vector' : data } ) | |
def create_or_update_vector( data ): | |
vector_found = find( "targeting_vectors.json", { 'name': data['name'] } ) | |
if not vector_found: | |
return create_targeting_vector( data ) | |
else: | |
return update_targeting_vector( vector_found['id'], data ) | |
def delete_targeting_vector( id ): | |
ua.request( 'DELETE', "targeting_vectors/%s.json" % id ) | |
# | |
# ES Cell specific methods | |
# | |
def create_es_cell( data ): | |
return ua.request( 'POST', 'es_cells.json', { 'es_cell' : data } ) | |
def update_es_cell( id, data ): | |
return ua.request( 'POST', 'es_cells/%s.json' % id, { 'es_cell' : data } ) | |
def create_or_update_es_cell( data ): | |
es_cell_found = find( "es_cells.json", { 'name': data['name'] } ) | |
if not es_cell_found: | |
return create_es_cell( data ) | |
else: | |
return update_es_cell( es_cell_found['id'], data ) | |
def delete_es_cell( id ): | |
ua.request( 'DELETE', "es_cells/%s.json" % id ) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## | |
## Main script scenario: | |
## - We create a data structure containing all the objects we want to create or update in the database | |
## - We loop over this data structure and follow this procedure: | |
## 1- Search the object | |
## 2- Object found ? Yes: Update; No: Create | |
## | |
# We will work with the data linked to the pipeline named "EUCOMM", let's find its ID | |
pipeline_list = ua.request( 'GET', 'pipelines.json' ) | |
for pipeline in pipeline_list: | |
if pipeline['name'] == 'EUCOMM': | |
break | |
# Create our data structure | |
alleles = [ | |
# First allele | |
{ | |
'mgi_accession_id' : "MGI:123", | |
'project_design_id' : 23640, | |
'cassette' : "L1L2_gt2", | |
'backbone' : "L3L4_pZero_kan", | |
'assembly' : "NCBIM37", | |
'chromosome' : "1", | |
'strand' : "+", | |
'design_type' : "Knock Out", | |
'design_subtype' : "Frameshift", | |
'homology_arm_start' : 10, | |
'homology_arm_end' : 10000, | |
'cassette_start' : 50, | |
'cassette_end' : 500, | |
'loxp_start' : 1000, | |
'loxp_end' : 1500, | |
# Targeting vectors for the first allele | |
'targeting_vectors' : [ | |
{ | |
'pipeline_id' : pipeline['id'], | |
'name' : 'PRPGD001', | |
'intermediate_vector' : 'PGS001', | |
'ikmc_project_id' : 9801 | |
}, | |
{ | |
'pipeline_id' : pipeline['id'], | |
'name' : 'PRPGD002', | |
'intermediate_vector' : 'PGS001', | |
'ikmc_project_id' : 9801 | |
} | |
], | |
# ES Cells for the first allele | |
'es_cells' : [ | |
{ 'pipeline_id' : pipeline['id'], 'name' : 'EPD001', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD001' }, | |
{ 'pipeline_id' : pipeline['id'], 'name' : 'EPD002', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD001' }, | |
{ 'pipeline_id' : pipeline['id'], 'name' : 'EPD003', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD001' }, | |
{ 'pipeline_id' : pipeline['id'], 'name' : 'EPD004', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD002' }, | |
{ 'pipeline_id' : pipeline['id'], 'name' : 'EPD005', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD002' }, | |
{ 'pipeline_id' : pipeline['id'], 'name' : 'EPD006', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD002' } | |
], | |
# Genbank File for the first allele | |
'genbank_file' : { | |
'escell_clone' : "A GENBANK FILE IN PLAIN TEXT", | |
'targeting_vector' : "A GENBANK FILE IN PLAIN TEXT" | |
} | |
}, | |
# Second allele | |
{ | |
'mgi_accession_id' : "MGI:456", | |
'project_design_id' : 29871, | |
'cassette' : "L1L2_gt2", | |
'backbone' : "L3L4_pZero_kan", | |
'assembly' : "NCBIM37", | |
'chromosome' : "1", | |
'strand' : "+", | |
'design_type' : "Knock Out", | |
'design_subtype' : "Frameshift", | |
'homology_arm_start' : 10, | |
'homology_arm_end' : 10000, | |
'cassette_start' : 50, | |
'cassette_end' : 500, | |
'loxp_start' : 1000, | |
'loxp_end' : 1500, | |
# Targeting vectors for the second allele | |
'targeting_vectors' : [ | |
{ | |
'pipeline_id' : pipeline['id'], | |
'name' : 'PRPGD003', | |
'intermediate_vector' : 'PGS002', | |
'ikmc_project_id' : 6809480 | |
}, | |
{ | |
'pipeline_id' : pipeline['id'], | |
'name' : 'PRPGD004', | |
'intermediate_vector' : 'PGS002', | |
'ikmc_project_id' : 6809480 | |
} | |
], | |
# ES Cells for the second allele | |
'es_cells' : [ | |
{ 'pipeline_id' : pipeline['id'], 'name' : 'EPD007', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD003' }, | |
{ 'pipeline_id' : pipeline['id'], 'name' : 'EPD008', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD003' }, | |
{ 'pipeline_id' : pipeline['id'], 'name' : 'EPD009', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD003' }, | |
{ 'pipeline_id' : pipeline['id'], 'name' : 'EPD010', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD004' }, | |
{ 'pipeline_id' : pipeline['id'], 'name' : 'EPD011', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD004' }, | |
{ 'pipeline_id' : pipeline['id'], 'name' : 'EPD012', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD004' } | |
] | |
} | |
] | |
# Create or Update Alleles | |
for allele_hash in alleles: | |
# allele_hash should not contain unknown fields | |
targeting_vectors = allele_hash.pop( 'targeting_vectors' ) | |
es_cells = allele_hash.pop( 'es_cells' ) | |
allele = create_or_update_allele( allele_hash ) | |
allele_hash['id'] = allele['id'] | |
# Create or Update Targeting Vectors | |
for vector_hash in targeting_vectors: | |
vector_hash['allele_id'] = allele['id'] | |
vector = create_or_update_vector( vector_hash ) | |
vector_hash['id'] = vector['id'] | |
# Find, Create or Update ES Cells | |
for es_cell_hash in es_cells: | |
es_cell_hash['allele_id'] = allele['id'] | |
# Find targeting vector ID from its name or set it to nil | |
# if ES Cell is not linked to a targeting vector | |
if 'targeting_vector' in es_cell_hash: | |
targ_vec_name = es_cell_hash.pop('targeting_vector') | |
for vector in targeting_vectors: | |
if vector['name'] == targ_vec_name: | |
break | |
es_cell_hash['targeting_vector_id'] = vector['id'] | |
else: | |
es_cell_hash['targeting_vector_id'] = None | |
es_cell = create_or_update_es_cell( es_cell_hash ) | |
es_cell_hash['id'] = es_cell['id'] | |
# DELETE All ES Cells | |
for es_cell in es_cells: delete_es_cell( es_cell['id'] ) | |
for vector in targeting_vectors: delete_targeting_vector( vector['id'] ) | |
for allele in alleles: delete_allele( allele['id'] ) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env ruby -wKU | |
# Author:: Sébastien Briois (mailto:[email protected]) | |
require "rubygems" | |
require "rest_client" | |
require "json" | |
DOMAIN = RestClient::Resource.new( "http://user:password@localhost:3000" ) | |
# Generic helper method for handling the web calls to the repository. | |
def request( method, url, data = nil ) | |
response = | |
case method.upcase | |
when "GET" then DOMAIN[url].get | |
when "POST" then DOMAIN[url].post data, :content_type => "application/json" | |
when "PUT" then DOMAIN[url].put data, :content_type => "application/json" | |
when "DELETE" then DOMAIN[url].delete | |
else | |
raise "Method #{method} unknown when requesting url #{url}" | |
end | |
puts "#{method} #{url} - #{response.code} #{RestClient::STATUSES[response.code]}" | |
return response.body | |
end | |
# | |
# Allele specific methods | |
# | |
def find_allele( allele ) | |
params = "" | |
params << "mgi_accession_id=" + allele[:mgi_accession_id] | |
params << "&assembly=" + allele[:assembly] | |
params << "&chromosome=" + allele[:chromosome] | |
params << "&strand=" + allele[:strand] | |
params << "&cassette=" + allele[:cassette] | |
params << "&backbone=" + allele[:backbone] | |
params << "&homology_arm_start=" + allele[:homology_arm_start] | |
params << "&homology_arm_end=" + allele[:homology_arm_end] | |
params << "&cassette_start=" + allele[:cassette_start] | |
params << "&cassette_end=" + allele[:cassette_end] | |
# Will find a conditional allele or a non-conditional allele | |
if ( allele.include? :loxp_start and allele[:loxp_start] ) and ( allele.include? :loxp_end and allele[:loxp_end] ) | |
params += "&loxp_start=#{allele[:loxp_start]}&loxp_end=#{allele[:loxp_end]}" | |
else | |
params += "&loxp_start='null'&loxp_end='null'" | |
end | |
# Request for all the alleles that match the params. | |
# The '.json' indicates that we want a JSON string as a response. | |
response = request( 'GET', "alleles.json?#{params}" ) | |
# This will be a list whether the request returned one allele or more. | |
allele_list = JSON.parse( response ) | |
# If the search is not specific enough and returns more than 1 allele | |
if allele_list.length > 1 | |
raise "Your search returned more than one allele, please refine it." | |
end | |
if allele_list == 1 | |
return allele_list[0] | |
end | |
return nil | |
end | |
def create_allele( data ) | |
json = JSON.generate({ :allele => data }) | |
response = request( 'POST', 'alleles.json', json ) | |
allele = JSON.parse( response ) | |
return allele | |
end | |
def update_allele( id, data ) | |
json = JSON.generate({ :allele => data }) | |
response = request( 'PUT', "alleles/#{id}.json", json ) | |
allele = JSON.parse( response ) | |
return allele | |
end | |
def delete_allele( id ) | |
request( 'DELETE', "alleles/#{id}" ) | |
end | |
# | |
# Targeting Vector specific methods | |
# | |
def find_targeting_vector( vector ) | |
response = request( 'GET', "targeting_vectors.json?name=#{vector['name']}" ) | |
targeting_vector_list = JSON.parse( response ) | |
if targeting_vector_list == 1 | |
return targeting_vector_list[0] | |
end | |
return nil | |
end | |
def create_targeting_vector( data ) | |
json = JSON.generate({ :targeting_vector => data }) | |
response = request( 'POST', 'targeting_vectors.json', json ) | |
vector = JSON.parse( response ) | |
return vector | |
end | |
def update_targeting_vector( id, data ) | |
json = JSON.generate({ :targeting_vector => data }) | |
response = request( 'PUT', "targeting_vectors/#{id}.json", json ) | |
vector = JSON.parse( response ) | |
return vector | |
end | |
def delete_targeting_vector( id ) | |
request( 'DELETE', "targeting_vectors/#{id}" ) | |
end | |
# | |
# ES Cell specific methods | |
# | |
def find_es_cell( cell ) | |
response = request( 'GET', "es_cells.json?name=#{cell['name']}" ) | |
es_cell_list = JSON.parse( response ) | |
if es_cell_list == 1 | |
return es_cell_list[0] | |
end | |
return nil | |
end | |
def create_es_cell( cell ) | |
json = JSON.generate({ :es_cell => cell }) | |
response = request( 'POST', 'es_cells.json', json ) | |
cell = JSON.parse( response ) | |
return cell | |
end | |
def update_es_cell( id, data ) | |
json = JSON.generate({ :es_cell => data }) | |
response = request( 'PUT', "es_cells/#{id}.json", json ) | |
cell = JSON.parse( response ) | |
return cell | |
end | |
def delete_es_cell( id ) | |
request( 'DELETE', "es_cells/#{id}" ) | |
end |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## | |
## Main script scenario: | |
## - We create a data structure containing all the objects we want to create or update in the database | |
## - We loop over this data structure and follow this procedure: | |
## 1- Search the object | |
## 2- Object found ? Yes: Update; No: Create | |
## | |
# We will work with the data linked to the pipeline named "EUCOMM", let's find its ID | |
response = request( method = 'GET', url = 'pipelines.json' ) | |
pipeline_list = JSON.parse( response ) | |
pipeline = pipeline_list.find { |pipeline| pipeline['name'] == 'EUCOMM' } | |
# Create our data structure | |
alleles = [ | |
# First allele | |
{ | |
:mgi_accession_id => "MGI:123", | |
:project_design_id => 23640, | |
:cassette => "L1L2_gt2", | |
:backbone => "L3L4_pZero_kan", | |
:assembly => "NCBIM37", | |
:chromosome => "1", | |
:strand => "+", | |
:design_type => "Knock Out", | |
:design_subtype => "Frameshift", | |
:homology_arm_start => 10, | |
:homology_arm_end => 10000, | |
:cassette_start => 50, | |
:cassette_end => 500, | |
:loxp_start => 1000, | |
:loxp_end => 1500, | |
# Targeting vectors for the first allele | |
:targeting_vectors => [ | |
{ | |
:pipeline_id => pipeline['id'], | |
:name => 'PRPGD001', | |
:intermediate_vector => 'PGS001', | |
:ikmc_project_id => 9801 | |
}, | |
{ | |
:pipeline_id => pipeline['id'], | |
:name => 'PRPGD002', | |
:intermediate_vector => 'PGS001', | |
:ikmc_project_id => 9801 | |
} | |
], | |
# ES Cells for the first allele | |
:es_cells => [ | |
{ :pipeline_id => pipeline['id'], :name => 'EPD001', :allele_symbol_superscript => 'tm1a', :targeting_vector => 'PRPGD001' }, | |
{ :pipeline_id => pipeline['id'], :name => 'EPD002', :allele_symbol_superscript => 'tm1a', :targeting_vector => 'PRPGD001' }, | |
{ :pipeline_id => pipeline['id'], :name => 'EPD003', :allele_symbol_superscript => 'tm1a', :targeting_vector => 'PRPGD001' }, | |
{ :pipeline_id => pipeline['id'], :name => 'EPD004', :allele_symbol_superscript => 'tm1a', :targeting_vector => 'PRPGD002' }, | |
{ :pipeline_id => pipeline['id'], :name => 'EPD005', :allele_symbol_superscript => 'tm1a', :targeting_vector => 'PRPGD002' }, | |
{ :pipeline_id => pipeline['id'], :name => 'EPD006', :allele_symbol_superscript => 'tm1a', :targeting_vector => 'PRPGD002' } | |
] | |
# Genbank File for the first allele | |
:genbank_file => { | |
:escell_clone => "A GENBANK FILE IN PLAIN TEXT", | |
:targeting_vector => "A GENBANK FILE IN PLAIN TEXT" | |
} | |
}, | |
# Second allele | |
{ | |
:mgi_accession_id => "MGI:456", | |
:project_design_id => 29871, | |
:cassette => "L1L2_gt2", | |
:backbone => "L3L4_pZero_kan", | |
:assembly => "NCBIM37", | |
:chromosome => "1", | |
:strand => "+", | |
:design_type => "Knock Out", | |
:design_subtype => "Frameshift", | |
:homology_arm_start => 10, | |
:homology_arm_end => 10000, | |
:cassette_start => 50, | |
:cassette_end => 500, | |
:loxp_start => 1000, | |
:loxp_end => 1500, | |
# Targeting vectors for the second allele | |
:targeting_vectors => [ | |
{ | |
:pipeline_id => pipeline['id'], | |
:name => 'PRPGD003', | |
:intermediate_vector => 'PGS002', | |
:ikmc_project_id => 6809480 | |
}, | |
{ | |
:pipeline_id => pipeline['id'], | |
:name => 'PRPGD004', | |
:intermediate_vector => 'PGS002', | |
:ikmc_project_id => 6809480 | |
} | |
], | |
# ES Cells for the second allele | |
:es_cells => [ | |
{ :pipeline_id => pipeline['id'], :name => 'EPD007', :allele_symbol_superscript => 'tm1a', :targeting_vector => 'PRPGD003' }, | |
{ :pipeline_id => pipeline['id'], :name => 'EPD008', :allele_symbol_superscript => 'tm1a', :targeting_vector => 'PRPGD003' }, | |
{ :pipeline_id => pipeline['id'], :name => 'EPD009', :allele_symbol_superscript => 'tm1a', :targeting_vector => 'PRPGD003' }, | |
{ :pipeline_id => pipeline['id'], :name => 'EPD010', :allele_symbol_superscript => 'tm1a', :targeting_vector => 'PRPGD004' }, | |
{ :pipeline_id => pipeline['id'], :name => 'EPD011', :allele_symbol_superscript => 'tm1a', :targeting_vector => 'PRPGD004' }, | |
{ :pipeline_id => pipeline['id'], :name => 'EPD012', :allele_symbol_superscript => 'tm1a', :targeting_vector => 'PRPGD004' } | |
] | |
} | |
] | |
alleles.each do |allele_hash| | |
# allele_hash should not contain unknown fields | |
targeting_vectors = allele_hash.delete( :targeting_vectors ) | |
es_cells = allele_hash.delete( :es_cells ) | |
# Find, Create or Update allele | |
allele_found = find_allele( allele_hash ) | |
if allele_found.nil? | |
allele = create_allele( allele_hash ) | |
else | |
# If allele has been found, it has an "id" | |
allele = update_allele( allele_found['id'], allele_hash ) | |
end | |
# Our allele now has an ID | |
allele_hash[:id] = allele['id'] | |
# Find, Create or Update Targeting Vector | |
targeting_vectors.each do |vector_hash| | |
vector_hash[:allele_id] = allele_hash[:id] | |
vector_found = find_targeting_vector( vector_hash ) | |
if vector_found.nil? | |
vector = create_targeting_vector( vector_hash ) | |
else | |
vector = update_targeting_vector( vector_found['id'], vector_hash ) | |
end | |
vector_hash[:id] = vector['id'] | |
end | |
# Find, Create or Update ES Cell | |
es_cells.each do |es_cell_hash| | |
# ES Cell must be linked to a Molecular Structure | |
es_cell_hash[:allele_id] = allele_hash[:id] | |
# If ES Cell is linked to a targeting vector, retrieve its ID | |
if es_cell_hash.include? :targeting_vector | |
es_cell_hash[:targeting_vector_id] = | |
targeting_vectors.find { |v| v[:name] == es_cell_hash[:targeting_vector] }['id'] | |
else | |
es_cell_hash[:targeting_vector_id] = nil | |
end | |
# Find, Create or Update ES Cell | |
es_cell_found = find_es_cell( es_cell_hash ) | |
if es_cell_found.nil? | |
es_cell = create_es_cell( es_cell_hash ) | |
else | |
es_cell = update_es_cell( es_cell_found['id'], es_cell_hash ) | |
end | |
es_cell_hash[:id] = es_cell['id'] | |
end | |
end | |
# DELETE All ES Cells | |
es_cells.each { |es_cell| delete_es_cell( es_cell[:id] ) } | |
targeting_vectors.each { |vector| delete_targeting_vector( vector[:id] ) } | |
alleles.each { |allele| delete_allele( allele[:id] ) } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment