Skip to content

Instantly share code, notes, and snippets.

@bwalsh
Created April 15, 2016 15:59
Show Gist options
  • Select an option

  • Save bwalsh/c4655f7006281daa924a612ee12e6eb8 to your computer and use it in GitHub Desktop.

Select an option

Save bwalsh/c4655f7006281daa924a612ee12e6eb8 to your computer and use it in GitHub Desktop.
Initial setup of new biodata backend for elasticsearch
# git diff
diff --git a/ga4gh/datamodel/datasets.py b/ga4gh/datamodel/datasets.py
index 25f3f7f..c07537e 100644
--- a/ga4gh/datamodel/datasets.py
+++ b/ga4gh/datamodel/datasets.py
@@ -358,3 +358,66 @@ class FileSystemDataset(AbstractDataset):
except KeyError as err:
raise exceptions.MissingDatasetMetadataException(
metadataFileName, str(err))
+
+class FileSystemElasticSearchDataset(FileSystemDataset):
+ """
+ Similar to FileSystemDataset, except the biodata is served from
+ elasticsearch.
+ """
+
+ def __init__(self, localId, dataDir, dataRepository):
+ super(FileSystemElasticSearchDataset, self).__init__(localId)
+
+ def addBioSample(self, bioSample):
+ id_ = bioSample.getId()
+ # TODO - call ES
+ pass
+
+ def addIndividual(self, individual):
+ id_ = individual.getId()
+ # TODO - call ES
+ pass
+
+ def getBioSample(self, id_):
+ """
+ Returns the BioSample with the specified ID, or raises a
+ BioSampleNotFoundException otherwise.
+ """
+ # TODO - call ES
+ pass
+
+ def getBioSamples(self):
+ """
+ Returns all BioSamples in this dataset
+ """
+ # TODO - call ES
+ pass
+
+ def getIndividual(self, id_):
+ """
+ Returns the Individual with the specified ID, or raises a
+ IndividualNotFoundException otherwise.
+ """
+ # TODO - call ES
+ pass
+
+ def getIndividuals(self):
+ """
+ Returns all Individuals in this dataset
+ """
+ # TODO - call ES
+ pass
+
+ def getBioSampleByIndex(self, index):
+ """
+ Returns the BioSample set at the specified index in this dataset.
+ """
+ # TODO - call ES
+ pass
+
+ def getIndividualByIndex(self, index):
+ """
+ Returns the Individual set at the specified index in this dataset.
+ """
+ # TODO - call ES
+ pass
diff --git a/ga4gh/datarepo.py b/ga4gh/datarepo.py
index 14a265c..9571e2c 100644
--- a/ga4gh/datarepo.py
+++ b/ga4gh/datarepo.py
@@ -212,3 +212,11 @@ class FileSystemDataRepository(AbstractDataRepository):
readGroupSet.checkConsistency(self)
for variantSet in dataset.getVariantSets():
variantSet.checkConsistency()
+
+class FileSystemElasticSearchDataRepository(FileSystemDataRepository):
+
+ def __init__(self, dataDir):
+ super(FileSystemElasticSearchDataRepository, self).__init__()
+ constructors = [
+ references.HtslibReferenceSet, ontologies.FileSystemOntologies,
+ datasets.FileSystemElasticSearchDataset]
diff --git a/ga4gh/frontend.py b/ga4gh/frontend.py
index 849e154..84d6257 100644
# ideally adding a backend shouldn't mean having to change so many (or any) existing core python file
--- a/ga4gh/frontend.py
+++ b/ga4gh/frontend.py
@@ -213,6 +213,10 @@ def configure(configFile=None, baseConfig="ProductionConfig",
elif dataSource.scheme == "file":
dataRepository = datarepo.FileSystemDataRepository(os.path.join(
dataSource.netloc, dataSource.path))
+ elif dataSource.scheme == "file-elasticsearch":
+ dataRepository = datarepo.FileSystemElasticSearchDataRepository(
+ os.path.join(dataSource.netloc, dataSource.path))
+
else:
raise exceptions.ConfigurationException(
"Unsupported data source scheme: " + dataSource.scheme)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment