dazoakley · August 6, 2010 11:34
diff --git a/perl_loading_script.pl b/perl_loading_script.pl
 #!/usr/bin/env perl
 #
 # Helper functions for interacting with the IKMC Targeting Repository
 #
 # Authors:: Darren Oakley (mailto:[email protected])
 #           Nelo Onyiah   (mailto:[email protected])
 #

 use strict;
 use warnings FATAL => 'all';
 use JSON;
 use REST::Client;

 #
 # Create a data object of the alleles and products we need to load
 # - in your script this should come from your database.
 #
 # For NorCOMM products, use pipeline_id = 3.
 #

 my $DOMAIN   = 'localhost:3000';
 my $USER     = 'user';
 my $PASS     = 'pass';
 my $PIPELINE = 3;

 my $alleles_and_products = [
    {
        mgi_accession_id   => "MGI:123456",
        project_design_id  => 2,
        cassette           => "L1L2_gt2",
        cassette_type      => "Promotorless",
        backbone           => "L3L4_pZero_kan",
        assembly           => "NCBIM37",
        chromosome         => "1",
        strand             => "+",
        design_type        => "Knock Out",
        design_subtype     => "Frameshift",
        homology_arm_start => 10,
        homology_arm_end   => 10000,
        cassette_start     => 50,
        cassette_end       => 500,
        loxp_start         => 1000,
        loxp_end           => 1500,
        targeting_vectors  => [
            {
                pipeline_id         => $PIPELINE,
                name                => 'PRPGD001',
                intermediate_vector => 'PGS001',
                ikmc_project_id     => 1,
                es_cells            => [
                    { pipeline_id => $PIPELINE, name => 'EPD00001', allele_symbol_superscript => 'tm1a' },
                    { pipeline_id => $PIPELINE, name => 'EPD00002', allele_symbol_superscript => 'tm1a' },
                    { pipeline_id => $PIPELINE, name => 'EPD00003', allele_symbol_superscript => 'tm1a' },
                ],
            },
            {
                pipeline_id         => $PIPELINE,
                name                => 'PRPGD002',
                intermediate_vector => 'PGS001',
                ikmc_project_id     => 1,
                es_cells            => [
                    { pipeline_id => $PIPELINE, name => 'EPD00004', allele_symbol_superscript => 'tm1a' },
                    { pipeline_id => $PIPELINE, name => 'EPD00005', allele_symbol_superscript => 'tm1a' },
                    { pipeline_id => $PIPELINE, name => 'EPD00006', allele_symbol_superscript => 'tm1a' },
                ],
            }
        ],
    },
    {
        mgi_accession_id   => "MGI:123456",
        project_design_id  => 2,
        cassette           => "L1L2_gt2",
        cassette_type      => "Promotorless",
        backbone           => "L3L4_pZero_kan",
        assembly           => "NCBIM37",
        chromosome         => "1",
        strand             => "+",
        design_type        => "Knock Out",
        design_subtype     => "Frameshift",
        homology_arm_start => 10,
        homology_arm_end   => 10000,
        cassette_start     => 50,
        cassette_end       => 500,
        loxp_start         => undef,
        loxp_end           => undef,
        targeting_vectors  => [
            {
                pipeline_id         => $PIPELINE,
                name                => 'PRPGD001',
                intermediate_vector => 'PGS001',
                ikmc_project_id     => 1,
                es_cells            => [
                    { pipeline_id => $PIPELINE, name => 'EPD00007', allele_symbol_superscript => 'tm1a' },
                    { pipeline_id => $PIPELINE, name => 'EPD00008', allele_symbol_superscript => 'tm1a' },
                ],
            },
        ],
    }
 ];

 #
 # Now iterate over the alleles/products and load them
 #

 process_alleles_and_products( $alleles_and_products );

 exit 0;

 #
 # Wrapper function to handle the whole data loading process
 #

 sub process_alleles_and_products {
    my ($alleles_and_products) = @_;

    foreach my $allele_data ( @{$alleles_and_products} ) {

        # extract and remove the genbank file and product data
        my $genbank_file_data      = $allele_data->{genbank_file};
        my $targeting_vectors_data = $allele_data->{targeting_vectors};
        delete $allele_data->{genbank_file}      if $allele_data->{genbank_file};
        delete $allele_data->{targeting_vectors} if $allele_data->{targeting_vectors};

        # Find,Update,Create the allele
        my $allele = find_create_update_allele($allele_data);

        if ( $allele_data->{genbank_file} ) {

            # Find,Update,Create the genbank_files
            $genbank_file_data->{allele_id} = $allele->{id};
            my $genbank_file = find_create_update_genbank($genbank_file_data);
        }

        foreach my $vector_data ( @{$targeting_vectors_data} ) {

            # extract and remove the es cell data
            my $es_cell_clones_data = $vector_data->{es_cells};
            delete $vector_data->{es_cells} if $vector_data->{es_cells};

            # Find,Update,Create the vector
            my $vector =
              find_create_update_vector( $vector_data, $allele->{id} );

            foreach my $clone_data ( @{$es_cell_clones_data} ) {

                # Find,Update,Create the clone
                $clone_data->{allele_id}           = $allele->{id};
                $clone_data->{targeting_vector_id} = $vector->{id};
                my $clone = find_create_update_clone($clone_data);
            }
        }

        sleep(1);
    }
 }

 #
 # Generic helper functions
 #

 sub compare {
    my ( $original, $new ) = @_;

    # Checks every key in $original to see if it's the same in $new.
    # Returns 1 if the same, 0 if not.

    foreach my $field ( keys %{$original} ) {
        if ( defined $original->{$field} ) {
            return 0 unless defined $new->{$field};
            return 0 unless $original->{$field} eq $new->{$field};
        }
        else {
            return 0 if defined $new->{$field};
        }
    }

    return 1;
 }

 sub request {
    my ( $method, $url, $data ) = @_;

    die "Method $method unknown when requesting URL $url"
      unless $method =~ m/DELETE|GET|POST|PUT/;

    my @args = $data if $data;
    my $client = REST::Client->new( { host => "http://$USER:$PASS\@$DOMAIN" } );

    # Set the Content-Type and call the method with @args
    $client->addHeader( content_type => "application/json" );
    $client->$method( $url, @args );

    # A small update message
    warn join( " ", $method, $url, '-', $client->responseCode ), "\n";

    # Handle failures here -- only code 200 | 201 are OK
    die "Bad HTTP response " . $client->responseCode . " " . $client->responseContent
      unless $client->responseCode =~ m/20[01]/;

    return $client->responseContent;
 }

 sub find {
    my ( $search_url, $error_string ) = @_;

    my $response = from_json( request( "GET", $search_url ) );

    if    ( scalar( @{$response} ) > 1 )  { die $error_string . "\n"; }
    elsif ( scalar( @{$response} ) == 1 ) { return $response->[0]; }
    else                                  { return undef; }
 }

 sub create {
    my ( $data, $type, $controller ) = @_;
    my $json = to_json( { $type => $data } );
    my $return = from_json( request( "POST", "/$controller.json", $json ) );
    return $return;
 }

 sub update {
    my ( $data, $type, $controller ) = @_;
    my $json = to_json( { $type => $data } );
    my $return = from_json( request( "PUT", "/$controller/$data->{id}.json", $json ) );
    return $return;
 }

 sub delete_entry {
    my ( $data, $type, $controller ) = @_;
    my $return = from_json( request( "DELETE", "/$controller/$data->{id}.json", undef ) );
    return $return;
 }

 #
 # Allele Methods
 #

 sub find_allele {
    my ($allele_data) = @_;
    my $loxp_start = $allele_data->{loxp_start} ? $allele_data->{loxp_start} : 'null';
    my $loxp_end   = $allele_data->{loxp_end}   ? $allele_data->{loxp_end} : 'null';
    my $search_url =
          "/alleles.json"
        . "?mgi_accession_id="   . $allele_data->{mgi_accession_id}
        . "&assembly="           . $allele_data->{assembly}
        . "&chromosome="         . $allele_data->{chromosome}
        . "&strand="             . $allele_data->{strand}
        . "&cassette="           . $allele_data->{cassette}
        . "&backbone="           . $allele_data->{backbone}
        . "&homology_arm_start=" . $allele_data->{homology_arm_start}
        . "&homology_arm_end="   . $allele_data->{homology_arm_end}
        . "&cassette_start="     . $allele_data->{cassette_start}
        . "&cassette_end="       . $allele_data->{cassette_end}
        . "&loxp_start="         . $loxp_start
        . "&loxp_end="           . $loxp_end;
    my $error_string =
          "Error: found more than one allele for:"
        . "\n - mgi_accession_id: "   . $allele_data->{mgi_accession_id}
        . "\n - assembly: "           . $allele_data->{assembly}
        . "\n - chromosome: "         . $allele_data->{chromosome}
        . "\n - strand: "             . $allele_data->{strand}
        . "\n - cassette: "           . $allele_data->{cassette}
        . "\n - backbone: "           . $allele_data->{backbone}
        . "\n - homology_arm_start: " . $allele_data->{homology_arm_start}
        . "\n - homology_arm_end: "   . $allele_data->{homology_arm_end}
        . "\n - cassette_start: "     . $allele_data->{cassette_start}
        . "\n - cassette_end: "       . $allele_data->{cassette_end}
        . "\n - loxp_start: "         . $loxp_start
        . "\n - loxp_end: "           . $loxp_end
        . "\n";
    return find( $search_url, $error_string );
 }

 sub create_allele {
  my ($allele_data) = @_;
  return create( $allele_data, 'allele', 'alleles' );
 }

 sub update_allele {
  my ($allele_data) = @_;
  return update( $allele_data, 'allele', 'alleles' );
 }

 sub delete_allele {
  my ($allele_data) = @_;
  return delete_entry( $allele_data, 'allele', 'alleles' );
 }

 sub find_create_update_allele {
    my ($allele_data) = @_;

    my $allele = find_allele($allele_data);

    if ( defined $allele ) {

        # We already have an allele entry, see if an update is required
        my $update_required = compare( $allele_data, $allele );
        unless ($update_required) {
            $allele_data->{id} = $allele->{id};
            $allele = update_allele($allele_data);
        }
    }
    else {
        $allele = create_allele($allele_data);
    }

    return $allele;
 }

 #
 # Genbank File Methods
 #

 sub find_genbank {
    my ($genbank_data) = @_;

    my $search_url
        = "/genbank_files.json" . "?allele_id=" . $genbank_data->{allele_id};

    my $error_string = "Error: found more than one genbank_file for "
        . $genbank_data->{allele_id};

    return find( $search_url, $error_string );
 }

 sub create_genbank {
    my ($genbank_data) = @_;
    return create( $genbank_data, 'genbank_file', 'genbank_files' );
 }

 sub update_genbank {
    my ($genbank_data) = @_;
    return update( $genbank_data, 'genbank_file', 'genbank_files' );
 }

 sub delete_genbank {
    my ($genbank_data) = @_;
    return delete_entry( $genbank_data, 'genbank_file', 'genbank_files' );
 }

 sub find_create_update_genbank {
    my ($genbank_data) = @_;

    my $genbank = find_genbank($genbank_data);

    if ( defined $genbank ) {

        # We already have an genbank entry, see if an update is required
        my $update_required = compare( $genbank_data, $genbank );
        unless ($update_required) {
            $genbank_data->{id} = $genbank->{id};
            $genbank = update_genbank($genbank_data);
        }
    }
    else {
        $genbank = create_genbank($genbank_data);
    }

    return $genbank;
 }

 #
 # Targeting Vector Methods
 #

 sub find_vector {
    my ($vector_data) = @_;

    my $search_url
        = "/targeting_vectors.json" . "?name=" . $vector_data->{name};

    my $error_string
        = "Error: found more than one vector called " . $vector_data->{name};

    return find( $search_url, $error_string );
 }

 sub create_vector {
    my ($vector_data) = @_;
    return create( $vector_data, 'targeting_vector', 'targeting_vectors' );
 }

 sub update_vector {
    my ($vector_data) = @_;
    return update( $vector_data, 'targeting_vector', 'targeting_vectors' );
 }

 sub delete_vector {
    my ($vector_data) = @_;
    return delete_entry( $vector_data, 'targeting_vector', 'targeting_vectors' );
 }

 sub find_create_update_vector {
    my ( $vector_data, $allele_id ) = @_;

    my $vector = find_vector($vector_data);

    if ( defined $vector ) {

        # We already have an vector entry, see if an update is required
        my $update_required = compare( $vector_data, $vector );

        unless ($update_required) {
            $vector_data->{id}        = $vector->{id};
            $vector_data->{allele_id} = $vector->{allele_id};
            $vector                   = update_vector($vector_data);
        }
    }
    else {
        $vector_data->{allele_id} = $allele_id;
        $vector = create_vector($vector_data);
    }

    return $vector;
 }

 #
 # ES Cell Clone Methods
 #

 sub find_clone {
    my ($clone_data) = @_;

    my $search_url = "/es_cells.json" . "?name=" . $clone_data->{name};

    my $error_string
        = "Error: found more than one es cell called " . $clone_data->{name};

    return find( $search_url, $error_string );
 }

 sub create_clone {
    my ($clone_data) = @_;
    return create( $clone_data, 'es_cell', 'es_cells' );
 }

 sub update_clone {
    my ($clone_data) = @_;
    return update( $clone_data, 'es_cell', 'es_cells' );
 }

 sub delete_clone {
    my ($clone_data) = @_;
    return delete_entry( $clone_data, 'es_cell', 'es_cells' );
 }

 sub find_create_update_clone {
    my ($clone_data) = @_;

    my $clone = find_clone($clone_data);

    if ( defined $clone ) {

        # We already have an clone entry, see if an update is required
        my $update_required = compare( $clone_data, $clone );
        unless ($update_required) {
            $clone_data->{id} = $clone->{id};
            $clone = update_clone($clone_data);
        }
    }
    else {
        $clone = create_clone($clone_data);
    }

    return $clone;
 }
diff --git a/perl_specific_example.pl b/perl_specific_example.pl
 #!/usr/bin/env perl

 # Author:: Nelo Onyiah (mailto:[email protected])

 #
 # In this example we are going to update all the alleles
 # for the KOMP-Regeneron pipeline. We are updating the
 # "allele_symbol_superscript" for each Es Cell we find
 # that needs updating.
 #
 # This example demonstrates that data is retrieved from
 # the repository one page at a time. It also makes use
 # of a generic "request" function (see earlier examples).
 #

 use strict;
 use warnings FATAL => 'all';
 use JSON;
 use REST::Client;

 my $domain = 'localhost:3000';
 my $user   = 'user';
 my $pass   = 'pass';
 my $page   = 0;

 #
 # Retrieve the available pipelines
 my $response  = request( "GET", "pipelines.json" );
 my $pipelines = from_json($response);

 #
 # We are only interested in the KOMP-Regeneron pipeline, so let's get that
 my ($regeneron) = grep { $_->{name} eq 'KOMP-Regeneron' } @{$pipelines};

 #
 # Due to size limits, data is returned from the repository in pages.
 # Therefore, we need to process the data as such ... one page at a time.
 while (1) {
    my $alleles = update_es_cells_on_page( $regeneron, ++$page );
    last unless @{$alleles};
 }

 exit 0;

 #
 # Generic helper function for handling the web requests to the repository.
 sub request {
    my ( $method, $url, $data ) = @_;

    die "Method $method unknown when requesting URL $url"
      unless $method =~ m/DELETE|GET|POST|PUT/;

    my @args   = $data if $data;
    my $client = REST::Client->new( { host => "http://$user:$pass\@$domain" } );

    # Set the Content-Type and call the method with @args
    $client->addHeader( content_type => "application/json" );
    $client->$method( $url, @args );

    # A small update message
    warn join( " ", $method, $url, '-', $client->responseCode ), "\n";

    # Handle failures here -- only code 200 | 201 are OK
    die "Bad HTTP response ", $client->responseCode
      unless $client->responseCode =~ m/20[01]/;

    return $client->responseContent;
 }

 #
 # Generic function to process the data retrieved from a specified page
 sub update_es_cells_on_page {
    my ( $pipeline, $page ) = @_;

    #
    # Now let's fetch all the alleles from $page (this may take a while)
    my $search_params = "es_cells_pipeline_id_is=$pipeline->{id}&page=$page";
    my $response      = request( "GET", "alleles.json?$search_params" );
    my $alleles       = from_json($response);

    #
    # Loop through the alleles ...
    for my $allele (@{$alleles}) {
        for my $es_cell ( @{ $allele->{es_cells} } ) {
            # ... updating the es_cells that need fixing along the way
            if ( $es_cell->{allele_symbol_superscript} =~ m/^.+\<(.+)\>$/ ) {
                $es_cell->{allele_symbol_superscript} = $1;
                my $es_cell_json = to_json( { es_cell => $es_cell } );
                request( "PUT", "es_cells/$es_cell->{id}.json", $es_cell_json );
            }
        }
    }

    # 
    # When there is no data, we are on the last page
    warn "Found 0 KOMP-Regeneron alleles on page $page\n" unless scalar @{$alleles};

    # Return the list of alleles
    return $alleles;
 }
diff --git a/python_helpers.py b/python_helpers.py
 #! /usr/bin/python

 # Author::    Sébastien Briois (mailto:[email protected])

 import httplib2 # http://httplib2.googlecode.com/files/httplib2-0.6.0.zip
 import urllib
 import base64

 try:
  import json # Python 2.6
 except ImportError:
  import simplejson as json # Python 2.4+ - http://pypi.python.org/pypi/simplejson/2.0.9

 DOMAIN     = 'localhost:3000'
 USERNAME   = 'htgt'
 PASSWORD   = 'htgt'

 # Generic helper class for handling the web requests to the repository.
 class UserAgent(object):
  def __init__(self):
    self.http = httplib2.Http()
    self.http.add_credentials(USERNAME, PASSWORD)
    self.base_url = BASE_URL
  
  def uri_for(self, rel_url, params = None):
    if params:
      params = urllib.urlencode(params)
      return urljoin( self.base_url, "%s?%s" % (rel_url, params) )
    return urljoin( self.base_url, rel_url )
  
  def request(self, method, rel_url, data = {}):
    if method in ['GET', 'DELETE']:
      uri = self.uri_for( rel_url, data )
      resp, content = self.http.request( uri, method, headers = { 'Content-Type': 'application/json' } )
    elif method in ['POST', 'PUT']:
      uri = self.uri_for( rel_url )
      data = json.dumps( data )
      resp, content = self.http.request( uri, method, data, headers = { 'Content-Type': 'application/json' } )
    else:
      raise Exception( "Method %s unknown when requesting URL %s" % (method, rel_url) )
    
    print "%s %s: %s" % (method, uri, resp['status'])
    if resp['status'] in ['200', '201']:
      # DELETE methods does not return any content
      return method == 'DELETE' and True or json.loads( content )
    
    raise Exception(content)
  

 # Create a User Agent
 ua = UserAgent()

 def find( url, params ):
  results = ua.request( 'GET', url, params )
  
  if len(results) > 1:
    raise "Your search returned more than one result."
  
  if not results:
    return None
  
  return results[0]

 #
 # Allele specific methods
 #
 def create_allele( data ):
  return ua.request( 'POST', 'alleles.json', { 'allele' : data } )

 def update_allele( id, data ):
  return ua.request( 'PUT', 'alleles/%s.json' % id, { 'allele' : data } )

 def create_or_update_allele( data ):
  allele_found = find('alleles.json', {
    'mgi_accession_id'  : data['mgi_accession_id'],
    'assembly'          : data['assembly'],
    'chromosome'        : data['chromosome'],
    'strand'            : data['strand'],
    'cassette'          : data['cassette'],
    'backbone'          : data['backbone'],
    'homology_arm_start': data['homology_arm_start'],
    'homology_arm_end'  : data['homology_arm_end'],
    'cassette_start'    : data['cassette_start'],
    'cassette_end'      : data['cassette_end'],
    'loxp_start'        : data['loxp_start'] or 'null',
    'loxp_end'          : data['loxp_end']   or 'null'
  })
  
  if not allele_found:
    return create_allele( data )
  else:
    return update_allele( allele_found['id'], data )

 def delete_allele( id ):
  ua.request( 'DELETE', "alleles/%s.json" % id )

 #
 # Targeting Vector specific methods
 #
 def create_targeting_vector( data ):
  return ua.request( 'POST', 'targeting_vectors.json', { 'targeting_vector' : data } )

 def update_targeting_vector( id, data ):
  return ua.request( 'PUT', 'targeting_vectors/%s.json' % id, { 'targeting_vector' : data } )

 def create_or_update_vector( data ):
  vector_found = find( "targeting_vectors.json", { 'name': data['name'] } )
  
  if not vector_found:
    return create_targeting_vector( data )
  else:
    return update_targeting_vector( vector_found['id'], data )

 def delete_targeting_vector( id ):
  ua.request( 'DELETE', "targeting_vectors/%s.json" % id )

 #
 # ES Cell specific methods
 #
 def create_es_cell( data ):
  return ua.request( 'POST', 'es_cells.json', { 'es_cell' : data } )

 def update_es_cell( id, data ):
  return ua.request( 'POST', 'es_cells/%s.json' % id, { 'es_cell' : data } )

 def create_or_update_es_cell( data ):
  es_cell_found = find( "es_cells.json", { 'name': data['name'] } )
  if not es_cell_found:
    return create_es_cell( data )
  else:
    return update_es_cell( es_cell_found['id'], data )

 def delete_es_cell( id ):
  ua.request( 'DELETE', "es_cells/%s.json" % id )
diff --git a/python_loading_script.py b/python_loading_script.py
 ##
 ##  Main script scenario:
 ##    - We create a data structure containing all the objects we want to create or update in the database
 ##    - We loop over this data structure and follow this procedure:
 ##      1- Search the object
 ##      2- Object found ? Yes: Update; No: Create
 ##

 # We will work with the data linked to the pipeline named "EUCOMM", let's find its ID
 pipeline_list = ua.request( 'GET', 'pipelines.json' )
 for pipeline in pipeline_list:
  if pipeline['name'] == 'EUCOMM':
    break

 # Create our data structure
 alleles = [
  
  # First allele
  {
    'mgi_accession_id'   : "MGI:123",
    'project_design_id'  : 23640,
    'cassette'           : "L1L2_gt2",
    'backbone'           : "L3L4_pZero_kan",
    'assembly'           : "NCBIM37",
    'chromosome'         : "1",
    'strand'             : "+",
    'design_type'        : "Knock Out",
    'design_subtype'     : "Frameshift",
    'homology_arm_start' : 10,
    'homology_arm_end'   : 10000,
    'cassette_start'     : 50,
    'cassette_end'       : 500,
    'loxp_start'         : 1000,
    'loxp_end'           : 1500,
    
    # Targeting vectors for the first allele
    'targeting_vectors'  : [
      {
        'pipeline_id'         : pipeline['id'],
        'name'                : 'PRPGD001',
        'intermediate_vector' : 'PGS001',
        'ikmc_project_id'     : 9801
      },
      {
        'pipeline_id'         : pipeline['id'],
        'name'                : 'PRPGD002',
        'intermediate_vector' : 'PGS001',
        'ikmc_project_id'     : 9801
      }
    ],
    
    # ES Cells for the first allele
    'es_cells' : [
      { 'pipeline_id' : pipeline['id'], 'name' : 'EPD001', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD001' },
      { 'pipeline_id' : pipeline['id'], 'name' : 'EPD002', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD001' },
      { 'pipeline_id' : pipeline['id'], 'name' : 'EPD003', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD001' },
      { 'pipeline_id' : pipeline['id'], 'name' : 'EPD004', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD002' },
      { 'pipeline_id' : pipeline['id'], 'name' : 'EPD005', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD002' },
      { 'pipeline_id' : pipeline['id'], 'name' : 'EPD006', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD002' }
    ],
    
    # Genbank File for the first allele
    'genbank_file' : { 
      'escell_clone'     : "A GENBANK FILE IN PLAIN TEXT",
      'targeting_vector' : "A GENBANK FILE IN PLAIN TEXT"
    }
  },
  
  # Second allele
  {
    'mgi_accession_id'   : "MGI:456",
    'project_design_id'  : 29871,
    'cassette'           : "L1L2_gt2",
    'backbone'           : "L3L4_pZero_kan",
    'assembly'           : "NCBIM37",
    'chromosome'         : "1",
    'strand'             : "+",
    'design_type'        : "Knock Out",
    'design_subtype'     : "Frameshift",
    'homology_arm_start' : 10,
    'homology_arm_end'   : 10000,
    'cassette_start'     : 50,
    'cassette_end'       : 500,
    'loxp_start'         : 1000,
    'loxp_end'           : 1500,
    
    # Targeting vectors for the second allele
    'targeting_vectors'  : [
      {
        'pipeline_id'         : pipeline['id'],
        'name'                : 'PRPGD003',
        'intermediate_vector' : 'PGS002',
        'ikmc_project_id'     : 6809480
      },
      {
        'pipeline_id'         : pipeline['id'],
        'name'                : 'PRPGD004',
        'intermediate_vector' : 'PGS002',
        'ikmc_project_id'     : 6809480
      }
    ],
    
    # ES Cells for the second allele
    'es_cells' : [
      { 'pipeline_id' : pipeline['id'], 'name' : 'EPD007', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD003' },
      { 'pipeline_id' : pipeline['id'], 'name' : 'EPD008', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD003' },
      { 'pipeline_id' : pipeline['id'], 'name' : 'EPD009', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD003' },
      { 'pipeline_id' : pipeline['id'], 'name' : 'EPD010', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD004' },
      { 'pipeline_id' : pipeline['id'], 'name' : 'EPD011', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD004' },
      { 'pipeline_id' : pipeline['id'], 'name' : 'EPD012', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD004' }
    ]
  }
 ]

 # Create or Update Alleles
 for allele_hash in alleles:
  # allele_hash should not contain unknown fields
  targeting_vectors = allele_hash.pop( 'targeting_vectors' )
  es_cells          = allele_hash.pop( 'es_cells' )
  
  allele = create_or_update_allele( allele_hash )
  allele_hash['id'] = allele['id']
  
  # Create or Update Targeting Vectors
  for vector_hash in targeting_vectors:
    vector_hash['allele_id'] = allele['id']
    vector = create_or_update_vector( vector_hash )
    vector_hash['id'] = vector['id']
  
  # Find, Create or Update ES Cells
  for es_cell_hash in es_cells:
    es_cell_hash['allele_id'] = allele['id']
    
    # Find targeting vector ID from its name or set it to nil
    # if ES Cell is not linked to a targeting vector
    if 'targeting_vector' in es_cell_hash:
      targ_vec_name = es_cell_hash.pop('targeting_vector')
      for vector in targeting_vectors:
        if vector['name'] == targ_vec_name:
          break
      es_cell_hash['targeting_vector_id'] = vector['id']
    else:
      es_cell_hash['targeting_vector_id'] = None
    
    es_cell = create_or_update_es_cell( es_cell_hash )
    es_cell_hash['id'] = es_cell['id']

 # DELETE All ES Cells
 for es_cell in es_cells: delete_es_cell( es_cell['id'] )
 for vector in targeting_vectors: delete_targeting_vector( vector['id'] )
 for allele in alleles: delete_allele( allele['id'] )
diff --git a/ruby_helpers.rb b/ruby_helpers.rb
 #! /usr/bin/env ruby -wKU

 # Author::    Sébastien Briois (mailto:[email protected])

 require "rubygems"
 require "rest_client"
 require "json"

 DOMAIN = RestClient::Resource.new( "http://user:password@localhost:3000" )

 # Generic helper method for handling the web calls to the repository.
 def request( method, url, data = nil )
  response =
    case method.upcase
    when "GET"    then DOMAIN[url].get
    when "POST"   then DOMAIN[url].post data, :content_type => "application/json"
    when "PUT"    then DOMAIN[url].put  data, :content_type => "application/json"
    when "DELETE" then DOMAIN[url].delete
    else
      raise "Method #{method} unknown when requesting url #{url}"
    end
  
  puts "#{method} #{url} - #{response.code} #{RestClient::STATUSES[response.code]}"
  return response.body
 end

 #
 # Allele specific methods
 #
 def find_allele( allele )
  params = ""
  params << "mgi_accession_id="    + allele[:mgi_accession_id]
  params << "&assembly="           + allele[:assembly]
  params << "&chromosome="         + allele[:chromosome]
  params << "&strand="             + allele[:strand]
  params << "&cassette="           + allele[:cassette]
  params << "&backbone="           + allele[:backbone]
  params << "&homology_arm_start=" + allele[:homology_arm_start]
  params << "&homology_arm_end="   + allele[:homology_arm_end]
  params << "&cassette_start="     + allele[:cassette_start]
  params << "&cassette_end="       + allele[:cassette_end]
  
  # Will find a conditional allele or a non-conditional allele
  if ( allele.include? :loxp_start and allele[:loxp_start] ) and ( allele.include? :loxp_end and allele[:loxp_end] )
    params += "&loxp_start=#{allele[:loxp_start]}&loxp_end=#{allele[:loxp_end]}"
  else
    params += "&loxp_start='null'&loxp_end='null'"
  end
  
  # Request for all the alleles that match the params.
  # The '.json' indicates that we want a JSON string as a response.
  response = request( 'GET', "alleles.json?#{params}" )
  
  # This will be a list whether the request returned one allele or more.
  allele_list = JSON.parse( response )
  
  # If the search is not specific enough and returns more than 1 allele
  if allele_list.length > 1 
    raise "Your search returned more than one allele, please refine it."
  end
  
  if allele_list == 1
    return allele_list[0]
  end
  
  return nil
 end

 def create_allele( data )
  json     = JSON.generate({ :allele => data })
  response = request( 'POST', 'alleles.json', json )
  allele   = JSON.parse( response )
  return allele
 end

 def update_allele( id, data )
  json     = JSON.generate({ :allele => data })
  response = request( 'PUT', "alleles/#{id}.json", json )
  allele   = JSON.parse( response )
  return allele
 end

 def delete_allele( id )
  request( 'DELETE', "alleles/#{id}" )
 end


 #
 # Targeting Vector specific methods
 #
 def find_targeting_vector( vector )
  response = request( 'GET', "targeting_vectors.json?name=#{vector['name']}" )
  targeting_vector_list = JSON.parse( response )
  
  if targeting_vector_list == 1
    return targeting_vector_list[0]
  end
  
  return nil
 end

 def create_targeting_vector( data )
  json     = JSON.generate({ :targeting_vector => data })
  response = request( 'POST', 'targeting_vectors.json', json )
  vector   = JSON.parse( response )
  return vector
 end

 def update_targeting_vector( id, data )
  json     = JSON.generate({ :targeting_vector => data })
  response = request( 'PUT', "targeting_vectors/#{id}.json", json )
  vector   = JSON.parse( response )
  return vector
 end

 def delete_targeting_vector( id )
  request( 'DELETE', "targeting_vectors/#{id}" )
 end

 #
 # ES Cell specific methods
 #
 def find_es_cell( cell )
  response = request( 'GET', "es_cells.json?name=#{cell['name']}" )
  es_cell_list = JSON.parse( response )
  
  if es_cell_list == 1
    return es_cell_list[0]
  end
  
  return nil
 end

 def create_es_cell( cell )
  json     = JSON.generate({ :es_cell => cell })
  response = request( 'POST', 'es_cells.json', json )
  cell     = JSON.parse( response )
  return cell
 end

 def update_es_cell( id, data )
  json     = JSON.generate({ :es_cell => data })
  response = request( 'PUT', "es_cells/#{id}.json", json )
  cell     = JSON.parse( response )
  return cell
 end

 def delete_es_cell( id )
  request( 'DELETE', "es_cells/#{id}" )
 end
diff --git a/ruby_loading_script.rb b/ruby_loading_script.rb
 ##
 ##  Main script scenario:
 ##    - We create a data structure containing all the objects we want to create or update in the database
 ##    - We loop over this data structure and follow this procedure:
 ##      1- Search the object
 ##      2- Object found ? Yes: Update; No: Create
 ##

 # We will work with the data linked to the pipeline named "EUCOMM", let's find its ID
 response = request( method = 'GET', url = 'pipelines.json' )
 pipeline_list = JSON.parse( response )
 pipeline = pipeline_list.find { |pipeline| pipeline['name'] == 'EUCOMM' }

 # Create our data structure
 alleles = [
  
  # First allele
  {
    :mgi_accession_id   => "MGI:123",
    :project_design_id  => 23640,
    :cassette           => "L1L2_gt2",
    :backbone           => "L3L4_pZero_kan",
    :assembly           => "NCBIM37",
    :chromosome         => "1",
    :strand             => "+",
    :design_type        => "Knock Out",
    :design_subtype     => "Frameshift",
    :homology_arm_start => 10,
    :homology_arm_end   => 10000,
    :cassette_start     => 50,
    :cassette_end       => 500,
    :loxp_start         => 1000,
    :loxp_end           => 1500,
    
    # Targeting vectors for the first allele
    :targeting_vectors  => [
      {
        :pipeline_id         => pipeline['id'],
        :name                => 'PRPGD001',
        :intermediate_vector => 'PGS001',
        :ikmc_project_id     => 9801
      },
      {
        :pipeline_id         => pipeline['id'],
        :name                => 'PRPGD002',
        :intermediate_vector => 'PGS001',
        :ikmc_project_id     => 9801
      }
    ],
    
    # ES Cells for the first allele
    :es_cells => [
      { :pipeline_id => pipeline['id'], :name => 'EPD001', :allele_symbol_superscript => 'tm1a', :targeting_vector => 'PRPGD001' },
      { :pipeline_id => pipeline['id'], :name => 'EPD002', :allele_symbol_superscript => 'tm1a', :targeting_vector => 'PRPGD001' },
      { :pipeline_id => pipeline['id'], :name => 'EPD003', :allele_symbol_superscript => 'tm1a', :targeting_vector => 'PRPGD001' },
      { :pipeline_id => pipeline['id'], :name => 'EPD004', :allele_symbol_superscript => 'tm1a', :targeting_vector => 'PRPGD002' },
      { :pipeline_id => pipeline['id'], :name => 'EPD005', :allele_symbol_superscript => 'tm1a', :targeting_vector => 'PRPGD002' },
      { :pipeline_id => pipeline['id'], :name => 'EPD006', :allele_symbol_superscript => 'tm1a', :targeting_vector => 'PRPGD002' }
    ]
    
    # Genbank File for the first allele
    :genbank_file => { 
      :escell_clone     => "A GENBANK FILE IN PLAIN TEXT",
      :targeting_vector => "A GENBANK FILE IN PLAIN TEXT"
    }
  },
  
  # Second allele
  {
    :mgi_accession_id   => "MGI:456",
    :project_design_id  => 29871,
    :cassette           => "L1L2_gt2",
    :backbone           => "L3L4_pZero_kan",
    :assembly           => "NCBIM37",
    :chromosome         => "1",
    :strand             => "+",
    :design_type        => "Knock Out",
    :design_subtype     => "Frameshift",
    :homology_arm_start => 10,
    :homology_arm_end   => 10000,
    :cassette_start     => 50,
    :cassette_end       => 500,
    :loxp_start         => 1000,
    :loxp_end           => 1500,
    
    # Targeting vectors for the second allele
    :targeting_vectors  => [
      {
        :pipeline_id         => pipeline['id'],
        :name                => 'PRPGD003',
        :intermediate_vector => 'PGS002',
        :ikmc_project_id     => 6809480
      },
      {
        :pipeline_id         => pipeline['id'],
        :name                => 'PRPGD004',
        :intermediate_vector => 'PGS002',
        :ikmc_project_id     => 6809480
      }
    ],
    
    # ES Cells for the second allele
    :es_cells => [
      { :pipeline_id => pipeline['id'], :name => 'EPD007', :allele_symbol_superscript => 'tm1a', :targeting_vector => 'PRPGD003' },
      { :pipeline_id => pipeline['id'], :name => 'EPD008', :allele_symbol_superscript => 'tm1a', :targeting_vector => 'PRPGD003' },
      { :pipeline_id => pipeline['id'], :name => 'EPD009', :allele_symbol_superscript => 'tm1a', :targeting_vector => 'PRPGD003' },
      { :pipeline_id => pipeline['id'], :name => 'EPD010', :allele_symbol_superscript => 'tm1a', :targeting_vector => 'PRPGD004' },
      { :pipeline_id => pipeline['id'], :name => 'EPD011', :allele_symbol_superscript => 'tm1a', :targeting_vector => 'PRPGD004' },
      { :pipeline_id => pipeline['id'], :name => 'EPD012', :allele_symbol_superscript => 'tm1a', :targeting_vector => 'PRPGD004' }
    ]
  }
 ]

 alleles.each do |allele_hash|
  # allele_hash should not contain unknown fields
  targeting_vectors = allele_hash.delete( :targeting_vectors )
  es_cells          = allele_hash.delete( :es_cells )
  
  # Find, Create or Update allele
  allele_found = find_allele( allele_hash )
  if allele_found.nil?
    allele = create_allele( allele_hash )
  else
    # If allele has been found, it has an "id"
    allele = update_allele( allele_found['id'], allele_hash )
  end
  # Our allele now has an ID
  allele_hash[:id] = allele['id']
  
  # Find, Create or Update Targeting Vector
  targeting_vectors.each do |vector_hash|
    vector_hash[:allele_id] = allele_hash[:id]
    
    vector_found = find_targeting_vector( vector_hash )
    if vector_found.nil?
      vector = create_targeting_vector( vector_hash )
    else
      vector = update_targeting_vector( vector_found['id'], vector_hash )
    end
    vector_hash[:id] = vector['id']
  end
  
  # Find, Create or Update ES Cell
  es_cells.each do |es_cell_hash|
    # ES Cell must be linked to a Molecular Structure
    es_cell_hash[:allele_id] = allele_hash[:id]
    
    # If ES Cell is linked to a targeting vector, retrieve its ID
    if es_cell_hash.include? :targeting_vector
      es_cell_hash[:targeting_vector_id] =
        targeting_vectors.find { |v| v[:name] == es_cell_hash[:targeting_vector] }['id']
    else
      es_cell_hash[:targeting_vector_id] = nil
    end
    
    # Find, Create or Update ES Cell
    es_cell_found = find_es_cell( es_cell_hash )
    if es_cell_found.nil?
      es_cell = create_es_cell( es_cell_hash )
    else
      es_cell = update_es_cell( es_cell_found['id'], es_cell_hash )
    end
    es_cell_hash[:id] = es_cell['id']
  end
 end

 # DELETE All ES Cells
 es_cells.each { |es_cell| delete_es_cell( es_cell[:id] ) }
 targeting_vectors.each { |vector| delete_targeting_vector( vector[:id] ) }
 alleles.each { |allele| delete_allele( allele[:id] ) }
	#!/usr/bin/env perl
	#
	# Helper functions for interacting with the IKMC Targeting Repository
	#
	# Authors:: Darren Oakley (mailto:[email protected])
	# Nelo Onyiah (mailto:[email protected])
	#

	use strict;
	use warnings FATAL => 'all';
	use JSON;
	use REST::Client;

	#
	# Create a data object of the alleles and products we need to load
	# - in your script this should come from your database.
	#
	# For NorCOMM products, use pipeline_id = 3.
	#

	my $DOMAIN = 'localhost:3000';
	my $USER = 'user';
	my $PASS = 'pass';
	my $PIPELINE = 3;

	my $alleles_and_products = [
	{
	mgi_accession_id => "MGI:123456",
	project_design_id => 2,
	cassette => "L1L2_gt2",
	cassette_type => "Promotorless",
	backbone => "L3L4_pZero_kan",
	assembly => "NCBIM37",
	chromosome => "1",
	strand => "+",
	design_type => "Knock Out",
	design_subtype => "Frameshift",
	homology_arm_start => 10,
	homology_arm_end => 10000,
	cassette_start => 50,
	cassette_end => 500,
	loxp_start => 1000,
	loxp_end => 1500,
	targeting_vectors => [
	{
	pipeline_id => $PIPELINE,
	name => 'PRPGD001',
	intermediate_vector => 'PGS001',
	ikmc_project_id => 1,
	es_cells => [
	{ pipeline_id => $PIPELINE, name => 'EPD00001', allele_symbol_superscript => 'tm1a' },
	{ pipeline_id => $PIPELINE, name => 'EPD00002', allele_symbol_superscript => 'tm1a' },
	{ pipeline_id => $PIPELINE, name => 'EPD00003', allele_symbol_superscript => 'tm1a' },
	],
	},
	{
	pipeline_id => $PIPELINE,
	name => 'PRPGD002',
	intermediate_vector => 'PGS001',
	ikmc_project_id => 1,
	es_cells => [
	{ pipeline_id => $PIPELINE, name => 'EPD00004', allele_symbol_superscript => 'tm1a' },
	{ pipeline_id => $PIPELINE, name => 'EPD00005', allele_symbol_superscript => 'tm1a' },
	{ pipeline_id => $PIPELINE, name => 'EPD00006', allele_symbol_superscript => 'tm1a' },
	],
	}
	],
	},
	{
	mgi_accession_id => "MGI:123456",
	project_design_id => 2,
	cassette => "L1L2_gt2",
	cassette_type => "Promotorless",
	backbone => "L3L4_pZero_kan",
	assembly => "NCBIM37",
	chromosome => "1",
	strand => "+",
	design_type => "Knock Out",
	design_subtype => "Frameshift",
	homology_arm_start => 10,
	homology_arm_end => 10000,
	cassette_start => 50,
	cassette_end => 500,
	loxp_start => undef,
	loxp_end => undef,
	targeting_vectors => [
	{
	pipeline_id => $PIPELINE,
	name => 'PRPGD001',
	intermediate_vector => 'PGS001',
	ikmc_project_id => 1,
	es_cells => [
	{ pipeline_id => $PIPELINE, name => 'EPD00007', allele_symbol_superscript => 'tm1a' },
	{ pipeline_id => $PIPELINE, name => 'EPD00008', allele_symbol_superscript => 'tm1a' },
	],
	},
	],
	}
	];

	#
	# Now iterate over the alleles/products and load them
	#

	process_alleles_and_products( $alleles_and_products );

	exit 0;

	#
	# Wrapper function to handle the whole data loading process
	#

	sub process_alleles_and_products {
	my ($alleles_and_products) = @_;

	foreach my $allele_data ( @{$alleles_and_products} ) {

	# extract and remove the genbank file and product data
	my $genbank_file_data = $allele_data->{genbank_file};
	my $targeting_vectors_data = $allele_data->{targeting_vectors};
	delete $allele_data->{genbank_file} if $allele_data->{genbank_file};
	delete $allele_data->{targeting_vectors} if $allele_data->{targeting_vectors};

	# Find,Update,Create the allele
	my $allele = find_create_update_allele($allele_data);

	if ( $allele_data->{genbank_file} ) {

	# Find,Update,Create the genbank_files
	$genbank_file_data->{allele_id} = $allele->{id};
	my $genbank_file = find_create_update_genbank($genbank_file_data);
	}

	foreach my $vector_data ( @{$targeting_vectors_data} ) {

	# extract and remove the es cell data
	my $es_cell_clones_data = $vector_data->{es_cells};
	delete $vector_data->{es_cells} if $vector_data->{es_cells};

	# Find,Update,Create the vector
	my $vector =
	find_create_update_vector( $vector_data, $allele->{id} );

	foreach my $clone_data ( @{$es_cell_clones_data} ) {

	# Find,Update,Create the clone
	$clone_data->{allele_id} = $allele->{id};
	$clone_data->{targeting_vector_id} = $vector->{id};
	my $clone = find_create_update_clone($clone_data);
	}
	}

	sleep(1);
	}
	}

	#
	# Generic helper functions
	#

	sub compare {
	my ( $original, $new ) = @_;

	# Checks every key in $original to see if it's the same in $new.
	# Returns 1 if the same, 0 if not.

	foreach my $field ( keys %{$original} ) {
	if ( defined $original->{$field} ) {
	return 0 unless defined $new->{$field};
	return 0 unless $original->{$field} eq $new->{$field};
	}
	else {
	return 0 if defined $new->{$field};
	}
	}

	return 1;
	}

	sub request {
	my ( $method, $url, $data ) = @_;

	die "Method $method unknown when requesting URL $url"
	unless $method =~ m/DELETE\|GET\|POST\|PUT/;

	my @args = $data if $data;
	my $client = REST::Client->new( { host => "http://$USER:$PASS\@$DOMAIN" } );

	# Set the Content-Type and call the method with @args
	$client->addHeader( content_type => "application/json" );
	$client->$method( $url, @args );

	# A small update message
	warn join( " ", $method, $url, '-', $client->responseCode ), "\n";

	# Handle failures here -- only code 200 \| 201 are OK
	die "Bad HTTP response " . $client->responseCode . " " . $client->responseContent
	unless $client->responseCode =~ m/20[01]/;

	return $client->responseContent;
	}

	sub find {
	my ( $search_url, $error_string ) = @_;

	my $response = from_json( request( "GET", $search_url ) );

	if ( scalar( @{$response} ) > 1 ) { die $error_string . "\n"; }
	elsif ( scalar( @{$response} ) == 1 ) { return $response->[0]; }
	else { return undef; }
	}

	sub create {
	my ( $data, $type, $controller ) = @_;
	my $json = to_json( { $type => $data } );
	my $return = from_json( request( "POST", "/$controller.json", $json ) );
	return $return;
	}

	sub update {
	my ( $data, $type, $controller ) = @_;
	my $json = to_json( { $type => $data } );
	my $return = from_json( request( "PUT", "/$controller/$data->{id}.json", $json ) );
	return $return;
	}

	sub delete_entry {
	my ( $data, $type, $controller ) = @_;
	my $return = from_json( request( "DELETE", "/$controller/$data->{id}.json", undef ) );
	return $return;
	}

	#
	# Allele Methods
	#

	sub find_allele {
	my ($allele_data) = @_;
	my $loxp_start = $allele_data->{loxp_start} ? $allele_data->{loxp_start} : 'null';
	my $loxp_end = $allele_data->{loxp_end} ? $allele_data->{loxp_end} : 'null';
	my $search_url =
	"/alleles.json"
	. "?mgi_accession_id=" . $allele_data->{mgi_accession_id}
	. "&assembly=" . $allele_data->{assembly}
	. "&chromosome=" . $allele_data->{chromosome}
	. "&strand=" . $allele_data->{strand}
	. "&cassette=" . $allele_data->{cassette}
	. "&backbone=" . $allele_data->{backbone}
	. "&homology_arm_start=" . $allele_data->{homology_arm_start}
	. "&homology_arm_end=" . $allele_data->{homology_arm_end}
	. "&cassette_start=" . $allele_data->{cassette_start}
	. "&cassette_end=" . $allele_data->{cassette_end}
	. "&loxp_start=" . $loxp_start
	. "&loxp_end=" . $loxp_end;
	my $error_string =
	"Error: found more than one allele for:"
	. "\n - mgi_accession_id: " . $allele_data->{mgi_accession_id}
	. "\n - assembly: " . $allele_data->{assembly}
	. "\n - chromosome: " . $allele_data->{chromosome}
	. "\n - strand: " . $allele_data->{strand}
	. "\n - cassette: " . $allele_data->{cassette}
	. "\n - backbone: " . $allele_data->{backbone}
	. "\n - homology_arm_start: " . $allele_data->{homology_arm_start}
	. "\n - homology_arm_end: " . $allele_data->{homology_arm_end}
	. "\n - cassette_start: " . $allele_data->{cassette_start}
	. "\n - cassette_end: " . $allele_data->{cassette_end}
	. "\n - loxp_start: " . $loxp_start
	. "\n - loxp_end: " . $loxp_end
	. "\n";
	return find( $search_url, $error_string );
	}

	sub create_allele {
	my ($allele_data) = @_;
	return create( $allele_data, 'allele', 'alleles' );
	}

	sub update_allele {
	my ($allele_data) = @_;
	return update( $allele_data, 'allele', 'alleles' );
	}

	sub delete_allele {
	my ($allele_data) = @_;
	return delete_entry( $allele_data, 'allele', 'alleles' );
	}

	sub find_create_update_allele {
	my ($allele_data) = @_;

	my $allele = find_allele($allele_data);

	if ( defined $allele ) {

	# We already have an allele entry, see if an update is required
	my $update_required = compare( $allele_data, $allele );
	unless ($update_required) {
	$allele_data->{id} = $allele->{id};
	$allele = update_allele($allele_data);
	}
	}
	else {
	$allele = create_allele($allele_data);
	}

	return $allele;
	}

	#
	# Genbank File Methods
	#

	sub find_genbank {
	my ($genbank_data) = @_;

	my $search_url
	= "/genbank_files.json" . "?allele_id=" . $genbank_data->{allele_id};

	my $error_string = "Error: found more than one genbank_file for "
	. $genbank_data->{allele_id};

	return find( $search_url, $error_string );
	}

	sub create_genbank {
	my ($genbank_data) = @_;
	return create( $genbank_data, 'genbank_file', 'genbank_files' );
	}

	sub update_genbank {
	my ($genbank_data) = @_;
	return update( $genbank_data, 'genbank_file', 'genbank_files' );
	}

	sub delete_genbank {
	my ($genbank_data) = @_;
	return delete_entry( $genbank_data, 'genbank_file', 'genbank_files' );
	}

	sub find_create_update_genbank {
	my ($genbank_data) = @_;

	my $genbank = find_genbank($genbank_data);

	if ( defined $genbank ) {

	# We already have an genbank entry, see if an update is required
	my $update_required = compare( $genbank_data, $genbank );
	unless ($update_required) {
	$genbank_data->{id} = $genbank->{id};
	$genbank = update_genbank($genbank_data);
	}
	}
	else {
	$genbank = create_genbank($genbank_data);
	}

	return $genbank;
	}

	#
	# Targeting Vector Methods
	#

	sub find_vector {
	my ($vector_data) = @_;

	my $search_url
	= "/targeting_vectors.json" . "?name=" . $vector_data->{name};

	my $error_string
	= "Error: found more than one vector called " . $vector_data->{name};

	return find( $search_url, $error_string );
	}

	sub create_vector {
	my ($vector_data) = @_;
	return create( $vector_data, 'targeting_vector', 'targeting_vectors' );
	}

	sub update_vector {
	my ($vector_data) = @_;
	return update( $vector_data, 'targeting_vector', 'targeting_vectors' );
	}

	sub delete_vector {
	my ($vector_data) = @_;
	return delete_entry( $vector_data, 'targeting_vector', 'targeting_vectors' );
	}

	sub find_create_update_vector {
	my ( $vector_data, $allele_id ) = @_;

	my $vector = find_vector($vector_data);

	if ( defined $vector ) {

	# We already have an vector entry, see if an update is required
	my $update_required = compare( $vector_data, $vector );

	unless ($update_required) {
	$vector_data->{id} = $vector->{id};
	$vector_data->{allele_id} = $vector->{allele_id};
	$vector = update_vector($vector_data);
	}
	}
	else {
	$vector_data->{allele_id} = $allele_id;
	$vector = create_vector($vector_data);
	}

	return $vector;
	}

	#
	# ES Cell Clone Methods
	#

	sub find_clone {
	my ($clone_data) = @_;

	my $search_url = "/es_cells.json" . "?name=" . $clone_data->{name};

	my $error_string
	= "Error: found more than one es cell called " . $clone_data->{name};

	return find( $search_url, $error_string );
	}

	sub create_clone {
	my ($clone_data) = @_;
	return create( $clone_data, 'es_cell', 'es_cells' );
	}

	sub update_clone {
	my ($clone_data) = @_;
	return update( $clone_data, 'es_cell', 'es_cells' );
	}

	sub delete_clone {
	my ($clone_data) = @_;
	return delete_entry( $clone_data, 'es_cell', 'es_cells' );
	}

	sub find_create_update_clone {
	my ($clone_data) = @_;

	my $clone = find_clone($clone_data);

	if ( defined $clone ) {

	# We already have an clone entry, see if an update is required
	my $update_required = compare( $clone_data, $clone );
	unless ($update_required) {
	$clone_data->{id} = $clone->{id};
	$clone = update_clone($clone_data);
	}
	}
	else {
	$clone = create_clone($clone_data);
	}

	return $clone;
	}
	#!/usr/bin/env perl

	# Author:: Nelo Onyiah (mailto:[email protected])

	#
	# In this example we are going to update all the alleles
	# for the KOMP-Regeneron pipeline. We are updating the
	# "allele_symbol_superscript" for each Es Cell we find
	# that needs updating.
	#
	# This example demonstrates that data is retrieved from
	# the repository one page at a time. It also makes use
	# of a generic "request" function (see earlier examples).
	#

	use strict;
	use warnings FATAL => 'all';
	use JSON;
	use REST::Client;

	my $domain = 'localhost:3000';
	my $user = 'user';
	my $pass = 'pass';
	my $page = 0;

	#
	# Retrieve the available pipelines
	my $response = request( "GET", "pipelines.json" );
	my $pipelines = from_json($response);

	#
	# We are only interested in the KOMP-Regeneron pipeline, so let's get that
	my ($regeneron) = grep { $_->{name} eq 'KOMP-Regeneron' } @{$pipelines};

	#
	# Due to size limits, data is returned from the repository in pages.
	# Therefore, we need to process the data as such ... one page at a time.
	while (1) {
	my $alleles = update_es_cells_on_page( $regeneron, ++$page );
	last unless @{$alleles};
	}

	exit 0;

	#
	# Generic helper function for handling the web requests to the repository.
	sub request {
	my ( $method, $url, $data ) = @_;

	die "Method $method unknown when requesting URL $url"
	unless $method =~ m/DELETE\|GET\|POST\|PUT/;

	my @args = $data if $data;
	my $client = REST::Client->new( { host => "http://$user:$pass\@$domain" } );

	# Set the Content-Type and call the method with @args
	$client->addHeader( content_type => "application/json" );
	$client->$method( $url, @args );

	# A small update message
	warn join( " ", $method, $url, '-', $client->responseCode ), "\n";

	# Handle failures here -- only code 200 \| 201 are OK
	die "Bad HTTP response ", $client->responseCode
	unless $client->responseCode =~ m/20[01]/;

	return $client->responseContent;
	}

	#
	# Generic function to process the data retrieved from a specified page
	sub update_es_cells_on_page {
	my ( $pipeline, $page ) = @_;

	#
	# Now let's fetch all the alleles from $page (this may take a while)
	my $search_params = "es_cells_pipeline_id_is=$pipeline->{id}&page=$page";
	my $response = request( "GET", "alleles.json?$search_params" );
	my $alleles = from_json($response);

	#
	# Loop through the alleles ...
	for my $allele (@{$alleles}) {
	for my $es_cell ( @{ $allele->{es_cells} } ) {
	# ... updating the es_cells that need fixing along the way
	if ( $es_cell->{allele_symbol_superscript} =~ m/^.+\<(.+)\>$/ ) {
	$es_cell->{allele_symbol_superscript} = $1;
	my $es_cell_json = to_json( { es_cell => $es_cell } );
	request( "PUT", "es_cells/$es_cell->{id}.json", $es_cell_json );
	}
	}
	}

	#
	# When there is no data, we are on the last page
	warn "Found 0 KOMP-Regeneron alleles on page $page\n" unless scalar @{$alleles};

	# Return the list of alleles
	return $alleles;
	}
	#! /usr/bin/python

	# Author:: Sébastien Briois (mailto:[email protected])

	import httplib2 # http://httplib2.googlecode.com/files/httplib2-0.6.0.zip
	import urllib
	import base64

	try:
	import json # Python 2.6
	except ImportError:
	import simplejson as json # Python 2.4+ - http://pypi.python.org/pypi/simplejson/2.0.9

	DOMAIN = 'localhost:3000'
	USERNAME = 'htgt'
	PASSWORD = 'htgt'

	# Generic helper class for handling the web requests to the repository.
	class UserAgent(object):
	def __init__(self):
	self.http = httplib2.Http()
	self.http.add_credentials(USERNAME, PASSWORD)
	self.base_url = BASE_URL

	def uri_for(self, rel_url, params = None):
	if params:
	params = urllib.urlencode(params)
	return urljoin( self.base_url, "%s?%s" % (rel_url, params) )
	return urljoin( self.base_url, rel_url )

	def request(self, method, rel_url, data = {}):
	if method in ['GET', 'DELETE']:
	uri = self.uri_for( rel_url, data )
	resp, content = self.http.request( uri, method, headers = { 'Content-Type': 'application/json' } )
	elif method in ['POST', 'PUT']:
	uri = self.uri_for( rel_url )
	data = json.dumps( data )
	resp, content = self.http.request( uri, method, data, headers = { 'Content-Type': 'application/json' } )
	else:
	raise Exception( "Method %s unknown when requesting URL %s" % (method, rel_url) )

	print "%s %s: %s" % (method, uri, resp['status'])
	if resp['status'] in ['200', '201']:
	# DELETE methods does not return any content
	return method == 'DELETE' and True or json.loads( content )

	raise Exception(content)


	# Create a User Agent
	ua = UserAgent()

	def find( url, params ):
	results = ua.request( 'GET', url, params )

	if len(results) > 1:
	raise "Your search returned more than one result."

	if not results:
	return None

	return results[0]

	#
	# Allele specific methods
	#
	def create_allele( data ):
	return ua.request( 'POST', 'alleles.json', { 'allele' : data } )

	def update_allele( id, data ):
	return ua.request( 'PUT', 'alleles/%s.json' % id, { 'allele' : data } )

	def create_or_update_allele( data ):
	allele_found = find('alleles.json', {
	'mgi_accession_id' : data['mgi_accession_id'],
	'assembly' : data['assembly'],
	'chromosome' : data['chromosome'],
	'strand' : data['strand'],
	'cassette' : data['cassette'],
	'backbone' : data['backbone'],
	'homology_arm_start': data['homology_arm_start'],
	'homology_arm_end' : data['homology_arm_end'],
	'cassette_start' : data['cassette_start'],
	'cassette_end' : data['cassette_end'],
	'loxp_start' : data['loxp_start'] or 'null',
	'loxp_end' : data['loxp_end'] or 'null'
	})

	if not allele_found:
	return create_allele( data )
	else:
	return update_allele( allele_found['id'], data )

	def delete_allele( id ):
	ua.request( 'DELETE', "alleles/%s.json" % id )

	#
	# Targeting Vector specific methods
	#
	def create_targeting_vector( data ):
	return ua.request( 'POST', 'targeting_vectors.json', { 'targeting_vector' : data } )

	def update_targeting_vector( id, data ):
	return ua.request( 'PUT', 'targeting_vectors/%s.json' % id, { 'targeting_vector' : data } )

	def create_or_update_vector( data ):
	vector_found = find( "targeting_vectors.json", { 'name': data['name'] } )

	if not vector_found:
	return create_targeting_vector( data )
	else:
	return update_targeting_vector( vector_found['id'], data )

	def delete_targeting_vector( id ):
	ua.request( 'DELETE', "targeting_vectors/%s.json" % id )

	#
	# ES Cell specific methods
	#
	def create_es_cell( data ):
	return ua.request( 'POST', 'es_cells.json', { 'es_cell' : data } )

	def update_es_cell( id, data ):
	return ua.request( 'POST', 'es_cells/%s.json' % id, { 'es_cell' : data } )

	def create_or_update_es_cell( data ):
	es_cell_found = find( "es_cells.json", { 'name': data['name'] } )
	if not es_cell_found:
	return create_es_cell( data )
	else:
	return update_es_cell( es_cell_found['id'], data )

	def delete_es_cell( id ):
	ua.request( 'DELETE', "es_cells/%s.json" % id )
	##
	## Main script scenario:
	## - We create a data structure containing all the objects we want to create or update in the database
	## - We loop over this data structure and follow this procedure:
	## 1- Search the object
	## 2- Object found ? Yes: Update; No: Create
	##

	# We will work with the data linked to the pipeline named "EUCOMM", let's find its ID
	pipeline_list = ua.request( 'GET', 'pipelines.json' )
	for pipeline in pipeline_list:
	if pipeline['name'] == 'EUCOMM':
	break

	# Create our data structure
	alleles = [

	# First allele
	{
	'mgi_accession_id' : "MGI:123",
	'project_design_id' : 23640,
	'cassette' : "L1L2_gt2",
	'backbone' : "L3L4_pZero_kan",
	'assembly' : "NCBIM37",
	'chromosome' : "1",
	'strand' : "+",
	'design_type' : "Knock Out",
	'design_subtype' : "Frameshift",
	'homology_arm_start' : 10,
	'homology_arm_end' : 10000,
	'cassette_start' : 50,
	'cassette_end' : 500,
	'loxp_start' : 1000,
	'loxp_end' : 1500,

	# Targeting vectors for the first allele
	'targeting_vectors' : [
	{
	'pipeline_id' : pipeline['id'],
	'name' : 'PRPGD001',
	'intermediate_vector' : 'PGS001',
	'ikmc_project_id' : 9801
	},
	{
	'pipeline_id' : pipeline['id'],
	'name' : 'PRPGD002',
	'intermediate_vector' : 'PGS001',
	'ikmc_project_id' : 9801
	}
	],

	# ES Cells for the first allele
	'es_cells' : [
	{ 'pipeline_id' : pipeline['id'], 'name' : 'EPD001', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD001' },
	{ 'pipeline_id' : pipeline['id'], 'name' : 'EPD002', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD001' },
	{ 'pipeline_id' : pipeline['id'], 'name' : 'EPD003', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD001' },
	{ 'pipeline_id' : pipeline['id'], 'name' : 'EPD004', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD002' },
	{ 'pipeline_id' : pipeline['id'], 'name' : 'EPD005', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD002' },
	{ 'pipeline_id' : pipeline['id'], 'name' : 'EPD006', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD002' }
	],

	# Genbank File for the first allele
	'genbank_file' : {
	'escell_clone' : "A GENBANK FILE IN PLAIN TEXT",
	'targeting_vector' : "A GENBANK FILE IN PLAIN TEXT"
	}
	},

	# Second allele
	{
	'mgi_accession_id' : "MGI:456",
	'project_design_id' : 29871,
	'cassette' : "L1L2_gt2",
	'backbone' : "L3L4_pZero_kan",
	'assembly' : "NCBIM37",
	'chromosome' : "1",
	'strand' : "+",
	'design_type' : "Knock Out",
	'design_subtype' : "Frameshift",
	'homology_arm_start' : 10,
	'homology_arm_end' : 10000,
	'cassette_start' : 50,
	'cassette_end' : 500,
	'loxp_start' : 1000,
	'loxp_end' : 1500,

	# Targeting vectors for the second allele
	'targeting_vectors' : [
	{
	'pipeline_id' : pipeline['id'],
	'name' : 'PRPGD003',
	'intermediate_vector' : 'PGS002',
	'ikmc_project_id' : 6809480
	},
	{
	'pipeline_id' : pipeline['id'],
	'name' : 'PRPGD004',
	'intermediate_vector' : 'PGS002',
	'ikmc_project_id' : 6809480
	}
	],

	# ES Cells for the second allele
	'es_cells' : [
	{ 'pipeline_id' : pipeline['id'], 'name' : 'EPD007', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD003' },
	{ 'pipeline_id' : pipeline['id'], 'name' : 'EPD008', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD003' },
	{ 'pipeline_id' : pipeline['id'], 'name' : 'EPD009', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD003' },
	{ 'pipeline_id' : pipeline['id'], 'name' : 'EPD010', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD004' },
	{ 'pipeline_id' : pipeline['id'], 'name' : 'EPD011', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD004' },
	{ 'pipeline_id' : pipeline['id'], 'name' : 'EPD012', 'allele_symbol_superscript' : 'tm1a', 'targeting_vector' : 'PRPGD004' }
	]
	}
	]

	# Create or Update Alleles
	for allele_hash in alleles:
	# allele_hash should not contain unknown fields
	targeting_vectors = allele_hash.pop( 'targeting_vectors' )
	es_cells = allele_hash.pop( 'es_cells' )

	allele = create_or_update_allele( allele_hash )
	allele_hash['id'] = allele['id']

	# Create or Update Targeting Vectors
	for vector_hash in targeting_vectors:
	vector_hash['allele_id'] = allele['id']
	vector = create_or_update_vector( vector_hash )
	vector_hash['id'] = vector['id']

	# Find, Create or Update ES Cells
	for es_cell_hash in es_cells:
	es_cell_hash['allele_id'] = allele['id']

	# Find targeting vector ID from its name or set it to nil
	# if ES Cell is not linked to a targeting vector
	if 'targeting_vector' in es_cell_hash:
	targ_vec_name = es_cell_hash.pop('targeting_vector')
	for vector in targeting_vectors:
	if vector['name'] == targ_vec_name:
	break
	es_cell_hash['targeting_vector_id'] = vector['id']
	else:
	es_cell_hash['targeting_vector_id'] = None

	es_cell = create_or_update_es_cell( es_cell_hash )
	es_cell_hash['id'] = es_cell['id']

	# DELETE All ES Cells
	for es_cell in es_cells: delete_es_cell( es_cell['id'] )
	for vector in targeting_vectors: delete_targeting_vector( vector['id'] )
	for allele in alleles: delete_allele( allele['id'] )
	#! /usr/bin/env ruby -wKU

	# Author:: Sébastien Briois (mailto:[email protected])

	require "rubygems"
	require "rest_client"
	require "json"

	DOMAIN = RestClient::Resource.new( "http://user:password@localhost:3000" )

	# Generic helper method for handling the web calls to the repository.
	def request( method, url, data = nil )
	response =
	case method.upcase
	when "GET" then DOMAIN[url].get
	when "POST" then DOMAIN[url].post data, :content_type => "application/json"
	when "PUT" then DOMAIN[url].put data, :content_type => "application/json"
	when "DELETE" then DOMAIN[url].delete
	else
	raise "Method #{method} unknown when requesting url #{url}"
	end

	puts "#{method} #{url} - #{response.code} #{RestClient::STATUSES[response.code]}"
	return response.body
	end

	#
	# Allele specific methods
	#
	def find_allele( allele )
	params = ""
	params << "mgi_accession_id=" + allele[:mgi_accession_id]
	params << "&assembly=" + allele[:assembly]
	params << "&chromosome=" + allele[:chromosome]
	params << "&strand=" + allele[:strand]
	params << "&cassette=" + allele[:cassette]
	params << "&backbone=" + allele[:backbone]
	params << "&homology_arm_start=" + allele[:homology_arm_start]
	params << "&homology_arm_end=" + allele[:homology_arm_end]
	params << "&cassette_start=" + allele[:cassette_start]
	params << "&cassette_end=" + allele[:cassette_end]

	# Will find a conditional allele or a non-conditional allele
	if ( allele.include? :loxp_start and allele[:loxp_start] ) and ( allele.include? :loxp_end and allele[:loxp_end] )
	params += "&loxp_start=#{allele[:loxp_start]}&loxp_end=#{allele[:loxp_end]}"
	else
	params += "&loxp_start='null'&loxp_end='null'"
	end

	# Request for all the alleles that match the params.
	# The '.json' indicates that we want a JSON string as a response.
	response = request( 'GET', "alleles.json?#{params}" )

	# This will be a list whether the request returned one allele or more.
	allele_list = JSON.parse( response )

	# If the search is not specific enough and returns more than 1 allele
	if allele_list.length > 1
	raise "Your search returned more than one allele, please refine it."
	end

	if allele_list == 1
	return allele_list[0]
	end

	return nil
	end

	def create_allele( data )
	json = JSON.generate({ :allele => data })
	response = request( 'POST', 'alleles.json', json )
	allele = JSON.parse( response )
	return allele
	end

	def update_allele( id, data )
	json = JSON.generate({ :allele => data })
	response = request( 'PUT', "alleles/#{id}.json", json )
	allele = JSON.parse( response )
	return allele
	end

	def delete_allele( id )
	request( 'DELETE', "alleles/#{id}" )
	end


	#
	# Targeting Vector specific methods
	#
	def find_targeting_vector( vector )
	response = request( 'GET', "targeting_vectors.json?name=#{vector['name']}" )
	targeting_vector_list = JSON.parse( response )

	if targeting_vector_list == 1
	return targeting_vector_list[0]
	end

	return nil
	end

	def create_targeting_vector( data )
	json = JSON.generate({ :targeting_vector => data })
	response = request( 'POST', 'targeting_vectors.json', json )
	vector = JSON.parse( response )
	return vector
	end

	def update_targeting_vector( id, data )
	json = JSON.generate({ :targeting_vector => data })
	response = request( 'PUT', "targeting_vectors/#{id}.json", json )
	vector = JSON.parse( response )
	return vector
	end

	def delete_targeting_vector( id )
	request( 'DELETE', "targeting_vectors/#{id}" )
	end

	#
	# ES Cell specific methods
	#
	def find_es_cell( cell )
	response = request( 'GET', "es_cells.json?name=#{cell['name']}" )
	es_cell_list = JSON.parse( response )

	if es_cell_list == 1
	return es_cell_list[0]
	end

	return nil
	end

	def create_es_cell( cell )
	json = JSON.generate({ :es_cell => cell })
	response = request( 'POST', 'es_cells.json', json )
	cell = JSON.parse( response )
	return cell
	end

	def update_es_cell( id, data )
	json = JSON.generate({ :es_cell => data })
	response = request( 'PUT', "es_cells/#{id}.json", json )
	cell = JSON.parse( response )
	return cell
	end

	def delete_es_cell( id )
	request( 'DELETE', "es_cells/#{id}" )
	end