Skip to content

Instantly share code, notes, and snippets.

@lbjay
Created February 1, 2011 19:01
Show Gist options
  • Save lbjay/806397 to your computer and use it in GitHub Desktop.
Save lbjay/806397 to your computer and use it in GitHub Desktop.
script for testing facet queries using a compressed bitset
import sys
import urllib2
from invenio import intbitset
from optparse import OptionParser
import simplejson
import mimetools
if __name__ == '__main__':
op = OptionParser()
op.set_usage("usage: bitset_facet_query.py [options] ")
op.add_option('--query', dest='query', action='store',
help='solr query', type=str, default="galaxy cluster")
op.add_option('--solr_url', dest='solr_url', action='store',
help='solr url', type=str, default="http://localhost:8983/solr")
opts, args = op.parse_args()
facet_query_url = "%s/invenio_facets" % opts.solr_url
invenio_query_url = "%s/select?qt=invenio_query&q=%s" % (opts.solr_url, urllib2.quote(opts.query))
print invenio_query_url
# query to get a bitset
bitset = intbitset.intbitset()
u = urllib2.urlopen(invenio_query_url)
data = u.read()
bitset.fastload(data)
print "query result: " + str(bitset)
print "results length: %d" % len(bitset.tolist())
# now use the bitset to fetch the facet data
r = urllib2.Request(facet_query_url)
data = bitset.fastdump()
boundary = mimetools.choose_boundary()
# fool solr into thinking we're uploading a file so it will read our data as a stream
data_size = len(data)
contents = '--%s\r\n' % boundary
contents += 'Content-Disposition: form-data; name="bitset"; filename="bitset"\r\n'
contents += 'Content-Type: application/octet-stream\r\n'
contents += '\r\n' + data + '\r\n'
contents += '--%s--\r\n\r\n' % boundary
r.add_data(contents)
contenttype = 'multipart/form-data; boundary=%s' % boundary
r.add_unredirected_header('Content-Type', contenttype)
# post the request and get back the facets as json
u = urllib2.urlopen(r)
print simplejson.load(u)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment