4ndr01d3 · January 25, 2017 14:20
diff --git a/tests_elastic.py b/tests_elastic.py
 import unittest
 import http.client
 import json
 import urllib.parse

 class InterproSolrTest(unittest.TestCase):
  server = "hmmer-prod-db01"
  port = 9200
  response_times = []

  queries = {
      "*:*": "Number of Docs per entry_db",
      "protein_db:s": "Number of Swissprot Docs per entry_db",
      "protein_db:t": "Number of Trembl Docs per entry_db",
      "protein_acc:a0a0a2t3z9": "Number of Docs with protein Acc per entry_db",
      "structure_acc:*": "Number of Docs with structure per entry_db",
      "structure_acc:3nyw": "Number of Docs with structure Acc per entry_db",
      "!entry_db:interpro%20AND%20!integrated:*": "Number of Unintegrated Docs per entry_db",
  }


  def setUp(self):
    self._connection = http.client.HTTPConnection(self.server, self.port)

  def tearDown(self):
    self._connection.close()

  @classmethod
  def tearDownClass(self):
    print("\n"+("*"*80))
    print("-= ElasticSearch response times =-".center(80)+"\n")
    for k,v in self.response_times:
        print("{:>68}: {:>8}".format(k,v))

  def test_solr_core_exists(self):
    self._connection.request("GET", "/interpro")
    response = self._connection.getresponse()
    self.assertEqual(response.status, 200, "The response should be OK")

  def test_solr_core_returns_json(self):
    self._connection.request("GET", "/interpro/relationship/_search")
    response = self._connection.getresponse()
    data = response.read().decode()
    try:
        json.loads(data)
    except json.JSONDecodeError:
        self.fail("The document is not JSON")

  def _elastic_json_query(self, q, query_obj):
    self._connection.request(
        "GET",
        "/interpro/relationship/_search?pretty&q="+q,
        json.dumps(query_obj)
    )
    response = self._connection.getresponse()
    self.assertEqual(response.status, 200, "The response should be OK")
    data = response.read().decode()
    try:
        obj = json.loads(data)
        self.assertIn("took", obj)
        self.assertIn("hits", obj)
        self.assertIn("aggregations", obj)
        return obj
    except json.JSONDecodeError:
        self.fail("The document is not JSON")

  def test_number_of_docs_per_entry_db(self):
    facet= {
      "aggs" : {
        "rscount": {
          "terms": {
            "field": "entry_db"
          }
        }
      },
      "size": 0
    }
    for q,tag in self.queries.items():
        response = self._elastic_json_query(q, facet)
        self.response_times.append((tag, response["took"]))

  def test_number_of_unique_entries_per_entry_db(self):
    facet= {
      "aggs" : {
        "rscount": {
          "terms": {
            "field": "entry_db"
          },
          "aggs": {
            "unique_entries": {
              "cardinality": {
                "field": "entry_acc"
              }
            },
          }
        }
      },
      "size": 0
    }

    for q,tag in self.queries.items():
        response = self._elastic_json_query(q, facet)
        self.response_times.append((tag+" (unique entries)", response["took"]))
        # self.response_times[tag+" (unique entries)"]=response["took"]

  def test_number_of_unique_proteins_per_entry_db(self):
    facet= {
      "aggs" : {
        "rscount": {
          "terms": {
            "field": "entry_db"
          },
          "aggs": {
            "unique_proteins": {
              "cardinality": {
                "field": "protein_acc"
              }
            },
          }
        }
      },
      "size": 0
    }
    for q,tag in self.queries.items():
        response = self._elastic_json_query(q, facet)
        self.response_times.append((tag+" (unique proteins)", response["took"]))
        # self.response_times[tag+" (unique proteins)"]=response["took"]

  def test_grouping_entries(self):
    fq = "{}:*%20AND%20{}_acc:{}".format("entry_acc", "protein", "protein_64440985")
    for q,tag in self.queries.items():
        response = self._elastic_group_query(q, "entry_acc", 1, 0)
        self.response_times.append((tag+" (group)", response["took"]))
        # self.response_times[tag+" (group)"]=response["took"]
        response = self._elastic_group_query(q, "entry_acc", 1, 0, fq)
        self.response_times.append((tag+" (group+fq)", response["took"]))
        # self.response_times[tag+" (group+fq)"]=response["took"]

  def _elastic_group_query(self, q, field, rows, start, fq=""):
    query_obj = {
      "size": 0,
      "aggs":{
        "by_entry":{
          "terms": {
            "field": field,
            "size": rows
          },
          "aggs": {
            "tops": {
              "top_hits": { "size": 1}
            }
          }
        }
      }
    }
    if fq != "":
        fq = "%20AND%20"+fq
    self._connection.request(
        "GET",
        "/interpro/relationship/_search?pretty&q="+q+fq,
        json.dumps(query_obj)
    )
    response = self._connection.getresponse()
    self.assertEqual(response.status, 200, "The response should be OK")
    data = response.read().decode()
    try:
        obj = json.loads(data)
        self.assertIn("took", obj)
        self.assertIn("hits", obj)
        self.assertIn("aggregations", obj)
        return obj
    except json.JSONDecodeError:
        self.fail("The document is not JSON")

 if __name__ == '__main__':
    unittest.main()
diff --git a/tests_solr.py b/tests_solr.py
 import unittest
 import http.client
 import json
 import urllib.parse

 class InterproSolrTest(unittest.TestCase):
  server = "hmmer-prod-db02"
  port = 8983
  response_times = {}
  queries = {
    "*:*": "Number of Docs per entry_db",
    "protein_db:swissprot": "Number of Swissprot Docs per entry_db",
    "protein_db:trembl": "Number of Trembl Docs per entry_db",
    "protein_acc:protein_64440985": "Number of Docs with protein Acc per entry_db",
    "structure_acc:*": "Number of Docs with structure per entry_db",
    "structure_acc:protein_32860": "Number of Docs with structure Acc per entry_db",
    "!entry_db:interpro && !integrated:*": "Number of Unintegrated Docs per entry_db",
  }

  def setUp(self):
    self._connection = http.client.HTTPConnection(self.server, self.port)

  def tearDown(self):
    self._connection.close()

  @classmethod
  def tearDownClass(self):
    print("\n*********\n-= Solr response times =-\n")
    for k,v in self.response_times.items():
        print("{:>60}: {:>8}".format(k,v))

  def test_solr_core_exists(self):
    self._connection.request("GET", "/solr/generated/select")
    response = self._connection.getresponse()
    self.assertEqual(response.status, 200, "The response should be OK")

  def test_solr_core_returns_json(self):
    self._connection.request("GET", "/solr/generated/select?indent=on&wt=json")
    response = self._connection.getresponse()
    data = response.read().decode()
    try:
        json.loads(data)
    except json.JSONDecodeError:
        self.fail("The document is not JSON")

  def _solr_json_query(self, q, json_facet):
    params = {
        "indent" : "on",
        "wt": "json",
        "q": q,
        "rows": "0",
        "facet": "on",
        "json.facet": json_facet
    }
    self._connection.request("GET", "/solr/generated/select?"+urllib.parse.urlencode(params))
    response = self._connection.getresponse()
    self.assertEqual(response.status, 200, "The response should be OK")
    data = response.read().decode()
    try:
        obj = json.loads(data)
        self.assertIn("responseHeader", obj)
        self.assertIn("response", obj)
        self.assertIn("facets", obj)
        return obj
    except json.JSONDecodeError:
        self.fail("The document is not JSON")

  @unittest.skip("yes")
  def test_number_of_docs_per_entry_db(self):
    facet= {
        "databases": {
            "type": "terms",
            "field": "entry_db",
        }
    }
    for q,tag in self.queries.items():
        response = self._solr_json_query(q, facet)
        self.response_times[tag]=response["responseHeader"]["QTime"]

  @unittest.skip("yes")
  def test_number_of_unique_entries_per_entry_db(self):
    facet= {
        "databases": {
            "type": "terms",
            "field": "entry_db",
            "facet": {
                "unique": "unique(entry_acc)"
            }
        }
    }
    for q,tag in self.queries.items():
        response = self._solr_json_query(q, facet)
        self.response_times[tag+" (unique entries)"]=response["responseHeader"]["QTime"]

  @unittest.skip("yes")
  def test_number_of_unique_proteins_per_entry_db(self):
    facet= {
        "databases": {
            "type": "terms",
            "field": "entry_db",
            "facet": {
                "unique": "unique(protein_acc)"
            }
        }
    }
    for q,tag in self.queries.items():
        response = self._solr_json_query(q, facet)
        self.response_times[tag+" (unique proteins)"]=response["responseHeader"]["QTime"]

  def test_grouping_entries(self):
    fq = "{}:* && {}_acc:{}".format("entry_acc", "protein", "protein_64440985")
    for q,tag in self.queries.items():
        response = self._solr_group_query(q, "entry_acc", 1, 0)
        self.response_times[tag+" (group)"]=response["responseHeader"]["QTime"]
        response = self._solr_group_query(q, "entry_acc", 1, 0, fq)
        self.response_times[tag+" (group+fq)"]=response["responseHeader"]["QTime"]


  def _solr_group_query(self, q, field, rows, start, fq=None):
    params = {
        "indent" : "on",
        "wt": "json",
        'group': 'true',
        'group.field': field,
        'group.ngroups': 'true',
        'rows': rows,
        'start': start,
        "q": q,
    }
    if fq is not None:
        params['fq'] = fq.lower()
    self._connection.request("GET", "/solr/generated/select?"+urllib.parse.urlencode(params))
    response = self._connection.getresponse()
    self.assertEqual(response.status, 200, "The response should be OK")
    data = response.read().decode()
    try:
        obj = json.loads(data)
        self.assertIn("responseHeader", obj)
        self.assertIn("grouped", obj)
        return obj
    except json.JSONDecodeError:
        self.fail("The document is not JSON")

 if __name__ == '__main__':
    unittest.main()
	import unittest
	import http.client
	import json
	import urllib.parse

	class InterproSolrTest(unittest.TestCase):
	server = "hmmer-prod-db01"
	port = 9200
	response_times = []

	queries = {
	":": "Number of Docs per entry_db",
	"protein_db:s": "Number of Swissprot Docs per entry_db",
	"protein_db:t": "Number of Trembl Docs per entry_db",
	"protein_acc:a0a0a2t3z9": "Number of Docs with protein Acc per entry_db",
	"structure_acc:*": "Number of Docs with structure per entry_db",
	"structure_acc:3nyw": "Number of Docs with structure Acc per entry_db",
	"!entry_db:interpro%20AND%20!integrated:*": "Number of Unintegrated Docs per entry_db",
	}


	def setUp(self):
	self._connection = http.client.HTTPConnection(self.server, self.port)

	def tearDown(self):
	self._connection.close()

	@classmethod
	def tearDownClass(self):
	print("\n"+(""80))
	print("-= ElasticSearch response times =-".center(80)+"\n")
	for k,v in self.response_times:
	print("{:>68}: {:>8}".format(k,v))

	def test_solr_core_exists(self):
	self._connection.request("GET", "/interpro")
	response = self._connection.getresponse()
	self.assertEqual(response.status, 200, "The response should be OK")

	def test_solr_core_returns_json(self):
	self._connection.request("GET", "/interpro/relationship/_search")
	response = self._connection.getresponse()
	data = response.read().decode()
	try:
	json.loads(data)
	except json.JSONDecodeError:
	self.fail("The document is not JSON")

	def _elastic_json_query(self, q, query_obj):
	self._connection.request(
	"GET",
	"/interpro/relationship/_search?pretty&q="+q,
	json.dumps(query_obj)
	)
	response = self._connection.getresponse()
	self.assertEqual(response.status, 200, "The response should be OK")
	data = response.read().decode()
	try:
	obj = json.loads(data)
	self.assertIn("took", obj)
	self.assertIn("hits", obj)
	self.assertIn("aggregations", obj)
	return obj
	except json.JSONDecodeError:
	self.fail("The document is not JSON")

	def test_number_of_docs_per_entry_db(self):
	facet= {
	"aggs" : {
	"rscount": {
	"terms": {
	"field": "entry_db"
	}
	}
	},
	"size": 0
	}
	for q,tag in self.queries.items():
	response = self._elastic_json_query(q, facet)
	self.response_times.append((tag, response["took"]))

	def test_number_of_unique_entries_per_entry_db(self):
	facet= {
	"aggs" : {
	"rscount": {
	"terms": {
	"field": "entry_db"
	},
	"aggs": {
	"unique_entries": {
	"cardinality": {
	"field": "entry_acc"
	}
	},
	}
	}
	},
	"size": 0
	}

	for q,tag in self.queries.items():
	response = self._elastic_json_query(q, facet)
	self.response_times.append((tag+" (unique entries)", response["took"]))
	# self.response_times[tag+" (unique entries)"]=response["took"]

	def test_number_of_unique_proteins_per_entry_db(self):
	facet= {
	"aggs" : {
	"rscount": {
	"terms": {
	"field": "entry_db"
	},
	"aggs": {
	"unique_proteins": {
	"cardinality": {
	"field": "protein_acc"
	}
	},
	}
	}
	},
	"size": 0
	}
	for q,tag in self.queries.items():
	response = self._elastic_json_query(q, facet)
	self.response_times.append((tag+" (unique proteins)", response["took"]))
	# self.response_times[tag+" (unique proteins)"]=response["took"]

	def test_grouping_entries(self):
	fq = "{}:*%20AND%20{}_acc:{}".format("entry_acc", "protein", "protein_64440985")
	for q,tag in self.queries.items():
	response = self._elastic_group_query(q, "entry_acc", 1, 0)
	self.response_times.append((tag+" (group)", response["took"]))
	# self.response_times[tag+" (group)"]=response["took"]
	response = self._elastic_group_query(q, "entry_acc", 1, 0, fq)
	self.response_times.append((tag+" (group+fq)", response["took"]))
	# self.response_times[tag+" (group+fq)"]=response["took"]

	def _elastic_group_query(self, q, field, rows, start, fq=""):
	query_obj = {
	"size": 0,
	"aggs":{
	"by_entry":{
	"terms": {
	"field": field,
	"size": rows
	},
	"aggs": {
	"tops": {
	"top_hits": { "size": 1}
	}
	}
	}
	}
	}
	if fq != "":
	fq = "%20AND%20"+fq
	self._connection.request(
	"GET",
	"/interpro/relationship/_search?pretty&q="+q+fq,
	json.dumps(query_obj)
	)
	response = self._connection.getresponse()
	self.assertEqual(response.status, 200, "The response should be OK")
	data = response.read().decode()
	try:
	obj = json.loads(data)
	self.assertIn("took", obj)
	self.assertIn("hits", obj)
	self.assertIn("aggregations", obj)
	return obj
	except json.JSONDecodeError:
	self.fail("The document is not JSON")

	if __name__ == '__main__':
	unittest.main()