|
/* |
|
* This file is part of the LIRE project: http://www.semanticmetadata.net/lire |
|
* LIRE is free software; you can redistribute it and/or modify |
|
* it under the terms of the GNU General Public License as published by |
|
* the Free Software Foundation; either version 2 of the License, or |
|
* (at your option) any later version. |
|
* |
|
* LIRE is distributed in the hope that it will be useful, |
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
* GNU General Public License for more details. |
|
* |
|
* You should have received a copy of the GNU General Public License |
|
* along with LIRE; if not, write to the Free Software |
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
|
* |
|
* We kindly ask you to refer the any or one of the following publications in |
|
* any publication mentioning or employing Lire: |
|
* |
|
* Lux Mathias, Savvas A. Chatzichristofis. Lire: Lucene Image Retrieval – |
|
* An Extensible Java CBIR Library. In proceedings of the 16th ACM International |
|
* Conference on Multimedia, pp. 1085-1088, Vancouver, Canada, 2008 |
|
* URL: http://doi.acm.org/10.1145/1459359.1459577 |
|
* |
|
* Lux Mathias. Content Based Image Retrieval with LIRE. In proceedings of the |
|
* 19th ACM International Conference on Multimedia, pp. 735-738, Scottsdale, |
|
* Arizona, USA, 2011 |
|
* URL: http://dl.acm.org/citation.cfm?id=2072432 |
|
* |
|
* Mathias Lux, Oge Marques. Visual Information Retrieval using Java and LIRE |
|
* Morgan & Claypool, 2013 |
|
* URL: http://www.morganclaypool.com/doi/abs/10.2200/S00468ED1V01Y201301ICR025 |
|
* |
|
* Copyright statement: |
|
* -------------------- |
|
* (c) 2002-2013 by Mathias Lux ([email protected]) |
|
* http://www.semanticmetadata.net/lire, http://www.lire-project.net |
|
*/ |
|
|
|
package net.semanticmetadata.lire.solr; |
|
|
|
import net.semanticmetadata.lire.imageanalysis.EdgeHistogram; |
|
import net.semanticmetadata.lire.imageanalysis.LireFeature; |
|
import net.semanticmetadata.lire.impl.SimpleResult; |
|
import net.semanticmetadata.lire.indexing.hashing.BitSampling; |
|
import net.semanticmetadata.lire.utils.ImageUtils; |
|
import org.apache.commons.codec.binary.Base64; |
|
import org.apache.lucene.document.Document; |
|
import org.apache.lucene.index.*; |
|
import org.apache.lucene.queries.TermsFilter; |
|
import org.apache.lucene.search.*; |
|
import org.apache.lucene.util.BytesRef; |
|
import org.apache.solr.common.params.SolrParams; |
|
import org.apache.solr.common.util.NamedList; |
|
import org.apache.solr.handler.RequestHandlerBase; |
|
import org.apache.solr.request.SolrQueryRequest; |
|
import org.apache.solr.response.SolrQueryResponse; |
|
import org.apache.solr.search.SolrIndexSearcher; |
|
|
|
import javax.imageio.ImageIO; |
|
import java.awt.image.BufferedImage; |
|
import java.io.IOException; |
|
import java.net.URL; |
|
import java.util.*; |
|
|
|
import java.io.*; |
|
|
|
/** |
|
* This is the main LIRE RequestHandler for the Solr Plugin. It supports query by example using the indexed id, |
|
* an url or a feature vector. Furthermore, feature extraction and random selection of images are supported. |
|
* |
|
* @author Mathias Lux, [email protected], 07.07.13 |
|
*/ |
|
|
|
public class LireRequestHandler extends RequestHandlerBase { |
|
|
|
private HashMap<Integer,Integer> docCount = new HashMap<Integer, Integer>(); |
|
|
|
// private static HashMap<String, Class> fieldToClass = new HashMap<String, Class>(5); |
|
private long time = 0; |
|
private int countRequests = 0; |
|
private int defaultNumberOfResults = 60; |
|
/** |
|
* number of candidate results retrieved from the index. The higher this number, the slower, |
|
* the but more accurate the retrieval will be. 10k is a good value for starters. |
|
*/ |
|
private int numberOfCandidateResults = 10000; |
|
private static final int DEFAULT_NUMBER_OF_CANDIDATES = 10000; |
|
|
|
/** |
|
* The number of query terms that go along with the TermsFilter search. We need some to get a |
|
* score, the less the faster. I put down a minimum of three in the method, this value gives |
|
* the percentage of the overall number used (selected randomly). |
|
*/ |
|
private double numberOfQueryTerms = 0.33; |
|
private static final double DEFAULT_NUMBER_OF_QUERY_TERMS = 0.33; |
|
|
|
static { |
|
// one time hash function read ... |
|
try { |
|
BitSampling.readHashFunctions(); |
|
} catch (IOException e) { |
|
e.printStackTrace(); |
|
} |
|
} |
|
|
|
|
|
@Override |
|
public void init(NamedList args) { |
|
super.init(args); |
|
try{ |
|
BufferedReader br = new BufferedReader(new FileReader("/var/solr/data/anime_cl/histogram.csv")); |
|
String line = null; |
|
|
|
while((line=br.readLine())!=null){ |
|
String str[] = line.split(","); |
|
docCount.put(Integer.parseInt(str[0]), Integer.parseInt(str[1])); |
|
} |
|
} catch(Exception e){ |
|
} |
|
|
|
} |
|
|
|
/** |
|
* Handles three types of requests. |
|
* <ol> |
|
* <li>search by already extracted images.</li> |
|
* <li>search by an image URL.</li> |
|
* <li>Random results.</li> |
|
* </ol> |
|
* |
|
* @param req |
|
* @param rsp |
|
* @throws Exception |
|
*/ |
|
@Override |
|
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception { |
|
// (1) check if the necessary parameters are here |
|
if (req.getParams().get("hashes") != null) { // we are searching for hashes ... |
|
handleHashSearch(req, rsp); |
|
} else if (req.getParams().get("url") != null) { // we are searching for an image based on an URL |
|
handleUrlSearch(req, rsp); |
|
} else if (req.getParams().get("id") != null) { // we are searching for an image based on an URL |
|
handleIdSearch(req, rsp); |
|
} else if (req.getParams().get("extract") != null) { // we are trying to extract from an image URL. |
|
handleExtract(req, rsp); |
|
} else { // lets return random results. |
|
handleRandomSearch(req, rsp); |
|
} |
|
} |
|
|
|
/** |
|
* Handles the get parameters id, field and rows. |
|
* |
|
* @param req |
|
* @param rsp |
|
* @throws IOException |
|
* @throws InstantiationException |
|
* @throws IllegalAccessException |
|
*/ |
|
private void handleIdSearch(SolrQueryRequest req, SolrQueryResponse rsp) throws IOException, InstantiationException, IllegalAccessException { |
|
SolrIndexSearcher searcher = req.getSearcher(); |
|
try { |
|
TopDocs hits = searcher.search(new TermQuery(new Term("id", req.getParams().get("id"))), 1); |
|
String paramField = "cl_ha"; |
|
if (req.getParams().get("field") != null) |
|
paramField = req.getParams().get("field"); |
|
LireFeature queryFeature = (LireFeature) FeatureRegistry.getClassForHashField(paramField).newInstance(); |
|
rsp.add("QueryField", paramField); |
|
rsp.add("QueryFeature", queryFeature.getClass().getName()); |
|
numberOfQueryTerms = req.getParams().getDouble("accuracy", DEFAULT_NUMBER_OF_QUERY_TERMS); |
|
numberOfCandidateResults = req.getParams().getInt("candidates", DEFAULT_NUMBER_OF_CANDIDATES); |
|
if (hits.scoreDocs.length > 0) { |
|
// Using DocValues to get the actual data from the index. |
|
BinaryDocValues binaryValues = MultiDocValues.getBinaryValues(searcher.getIndexReader(), FeatureRegistry.getFeatureFieldName(paramField)); // *** # |
|
if (binaryValues == null) |
|
System.err.println("Could not find the DocValues of the query document. Are they in the index?"); |
|
BytesRef bytesRef = new BytesRef(); |
|
bytesRef = binaryValues.get(hits.scoreDocs[0].doc); |
|
// Document d = searcher.getIndexReader().document(hits.scoreDocs[0].doc); |
|
// String histogramFieldName = paramField.replace("_ha", "_hi"); |
|
queryFeature.setByteArrayRepresentation(bytesRef.bytes, bytesRef.offset, bytesRef.length); |
|
int paramRows = defaultNumberOfResults; |
|
if (req.getParams().getInt("rows") != null) |
|
paramRows = req.getParams().getInt("rows"); |
|
// Re-generating the hashes to save space (instead of storing them in the index) |
|
int[] hashes = BitSampling.generateHashes(queryFeature.getDoubleHistogram()); |
|
List<Term> termFilter = createTermFilter(hashes, paramField); |
|
doSearch(req, rsp, searcher, paramField, paramRows, termFilter, createQuery(hashes, paramField, numberOfQueryTerms, "*"), queryFeature); |
|
} else { |
|
rsp.add("Error", "Did not find an image with the given id " + req.getParams().get("id")); |
|
} |
|
} catch (Exception e) { |
|
rsp.add("Error", "There was an error with your search for the image with the id " + req.getParams().get("id") |
|
+ ": " + e.getMessage()); |
|
} |
|
} |
|
|
|
/** |
|
* Returns a random set of documents from the index. Mainly for testing purposes. |
|
* |
|
* @param req |
|
* @param rsp |
|
* @throws IOException |
|
*/ |
|
private void handleRandomSearch(SolrQueryRequest req, SolrQueryResponse rsp) throws IOException { |
|
SolrIndexSearcher searcher = req.getSearcher(); |
|
DirectoryReader indexReader = searcher.getIndexReader(); |
|
double maxDoc = indexReader.maxDoc(); |
|
int paramRows = defaultNumberOfResults; |
|
if (req.getParams().getInt("rows") != null) |
|
paramRows = req.getParams().getInt("rows"); |
|
LinkedList list = new LinkedList(); |
|
while (list.size() < paramRows) { |
|
HashMap m = new HashMap(2); |
|
Document d = indexReader.document((int) Math.floor(Math.random() * maxDoc)); |
|
m.put("id", d.getValues("id")[0]); |
|
m.put("title", d.getValues("title")[0]); |
|
list.add(m); |
|
} |
|
rsp.add("docs", list); |
|
} |
|
|
|
/** |
|
* Searches for an image given by an URL. Note that (i) extracting image features takes time and |
|
* (ii) not every image is readable by Java. |
|
* |
|
* @param req |
|
* @param rsp |
|
* @throws IOException |
|
* @throws InstantiationException |
|
* @throws IllegalAccessException |
|
*/ |
|
private void handleUrlSearch(SolrQueryRequest req, SolrQueryResponse rsp) throws IOException, InstantiationException, IllegalAccessException { |
|
SolrParams params = req.getParams(); |
|
String paramUrl = params.get("url"); |
|
String paramField = "cl_ha"; |
|
if (req.getParams().get("field") != null) |
|
paramField = req.getParams().get("field"); |
|
int paramRows = defaultNumberOfResults; |
|
if (params.get("rows") != null) |
|
paramRows = params.getInt("rows"); |
|
numberOfQueryTerms = req.getParams().getDouble("accuracy", DEFAULT_NUMBER_OF_QUERY_TERMS); |
|
numberOfCandidateResults = req.getParams().getInt("candidates", DEFAULT_NUMBER_OF_CANDIDATES); |
|
LireFeature feat = null; |
|
List<Term> termFilter = null; |
|
int[] hashes = null; |
|
// wrapping the whole part in the try |
|
try { |
|
BufferedImage img = ImageIO.read(new URL(paramUrl).openStream()); |
|
img = ImageUtils.trimWhiteSpace(img); |
|
// getting the right feature per field: |
|
if (paramField == null || FeatureRegistry.getClassForHashField(paramField) == null) // if the feature is not registered. |
|
feat = new EdgeHistogram(); |
|
else { |
|
feat = (LireFeature) FeatureRegistry.getClassForHashField(paramField).newInstance(); |
|
} |
|
feat.extract(img); |
|
hashes = BitSampling.generateHashes(feat.getDoubleHistogram()); |
|
termFilter = createTermFilter(hashes, paramField); |
|
|
|
ArrayList<String> hashStrings = new ArrayList<String>(hashes.length); |
|
for (int i = 0; i < hashes.length; i++) { |
|
hashStrings.add(Integer.toHexString(hashes[i])); |
|
} |
|
rsp.add("hashes", hashStrings); |
|
|
|
} catch (Exception e) { |
|
rsp.add("Error", "Error reading image from URL: " + paramUrl + ": " + e.getMessage()); |
|
e.printStackTrace(); |
|
} |
|
// search if the feature has been extracted. |
|
if (feat != null) |
|
doSearch(req, rsp, req.getSearcher(), paramField, paramRows, termFilter, createQuery(hashes, paramField, numberOfQueryTerms, "*"), feat); |
|
} |
|
|
|
private void handleExtract(SolrQueryRequest req, SolrQueryResponse rsp) throws IOException, InstantiationException, IllegalAccessException { |
|
SolrParams params = req.getParams(); |
|
String paramUrl = params.get("extract"); |
|
String paramField = "cl_ha"; |
|
if (req.getParams().get("field") != null) |
|
paramField = req.getParams().get("field"); |
|
// int paramRows = defaultNumberOfResults; |
|
// if (params.get("rows") != null) |
|
// paramRows = params.getInt("rows"); |
|
LireFeature feat = null; |
|
// BooleanQuery query = null; |
|
// wrapping the whole part in the try |
|
try { |
|
BufferedImage img = ImageIO.read(new URL(paramUrl).openStream()); |
|
img = ImageUtils.trimWhiteSpace(img); |
|
// getting the right feature per field: |
|
if (paramField == null || FeatureRegistry.getClassForHashField(paramField) == null) // if the feature is not registered. |
|
feat = new EdgeHistogram(); |
|
else { |
|
feat = (LireFeature) FeatureRegistry.getClassForHashField(paramField).newInstance(); |
|
} |
|
feat.extract(img); |
|
rsp.add("histogram", Base64.encodeBase64String(feat.getByteArrayRepresentation())); |
|
int[] hashes = BitSampling.generateHashes(feat.getDoubleHistogram()); |
|
ArrayList<String> hashStrings = new ArrayList<String>(hashes.length); |
|
for (int i = 0; i < hashes.length; i++) { |
|
hashStrings.add(Integer.toHexString(hashes[i])); |
|
} |
|
//Collections.shuffle(hashStrings); |
|
rsp.add("hashes", hashStrings); |
|
// just use 50% of the hashes for search ... |
|
// query = createTermFilter(hashes, paramField, 0.5d); |
|
} catch (Exception e) { |
|
// rsp.add("Error", "Error reading image from URL: " + paramUrl + ": " + e.getMessage()); |
|
e.printStackTrace(); |
|
} |
|
// search if the feature has been extracted. |
|
// if (feat != null) doSearch(rsp, req.getSearcher(), paramField, paramRows, query, feat); |
|
} |
|
|
|
/** |
|
* Search based on the given image hashes. |
|
* |
|
* @param req |
|
* @param rsp |
|
* @throws IOException |
|
* @throws IllegalAccessException |
|
* @throws InstantiationException |
|
*/ |
|
private void handleHashSearch(SolrQueryRequest req, SolrQueryResponse rsp) throws IOException, IllegalAccessException, InstantiationException { |
|
SolrParams params = req.getParams(); |
|
SolrIndexSearcher searcher = req.getSearcher(); |
|
// get the params needed: |
|
// hashes=x y z ... |
|
// feature=<base64> |
|
// field=<cl_ha|ph_ha|...> |
|
|
|
String[] hashStrings = params.get("hashes").trim().split(","); |
|
int[] hashes = new int[100]; |
|
byte[] featureVector = Base64.decodeBase64(params.get("feature")); |
|
String paramField = "cl_ha"; |
|
if (req.getParams().get("field") != null) |
|
paramField = req.getParams().get("field"); |
|
int paramRows = defaultNumberOfResults; |
|
if (params.getInt("rows") != null) |
|
paramRows = params.getInt("rows"); |
|
numberOfQueryTerms = req.getParams().getDouble("accuracy", DEFAULT_NUMBER_OF_QUERY_TERMS); |
|
numberOfCandidateResults = req.getParams().getInt("candidates", DEFAULT_NUMBER_OF_CANDIDATES); |
|
// create boolean query: |
|
// System.out.println("** Creating query."); |
|
LinkedList<Term> termFilter = new LinkedList<Term>(); |
|
BooleanQuery query = new BooleanQuery(); |
|
for (int i = 0; i < hashStrings.length; i++) { |
|
hashes[i] = Integer.parseInt(hashStrings[i],16); |
|
// be aware that the hashFunctionsFileName of the field must match the one you put the hashes in before. |
|
// hashStrings[i] = hashStrings[i].trim(); |
|
// if (hashStrings[i].length() > 0) { |
|
// termFilter.add(new Term(paramField, hashStrings[i].trim())); |
|
// System.out.println("** " + field + ": " + hashes[i].trim()); |
|
// } |
|
} |
|
// Collections.shuffle(termFilter); |
|
// for (int k = 0; k < termFilter.size() * numberOfQueryTerms; k++) { |
|
// query.add(new BooleanClause(new TermQuery(termFilter.get(k)), BooleanClause.Occur.SHOULD)); |
|
// } |
|
// System.out.println("** Doing search."); |
|
|
|
// query feature |
|
LireFeature queryFeature = (LireFeature) FeatureRegistry.getClassForHashField(paramField).newInstance(); |
|
queryFeature.setByteArrayRepresentation(featureVector); |
|
|
|
// get results: |
|
// doSearch(req, rsp, searcher, paramField, paramRows, termFilter, new MatchAllDocsQuery(), queryFeature); |
|
String idFilter = req.getParams().get("filter"); |
|
doSearch(req, rsp, req.getSearcher(), paramField, paramRows, termFilter, createQuery(hashes, paramField, numberOfQueryTerms, idFilter), queryFeature); |
|
} |
|
|
|
/** |
|
* Actual search implementation based on (i) hash based retrieval and (ii) feature based re-ranking. |
|
* |
|
* @param rsp |
|
* @param searcher |
|
* @param hashFieldName the hash field name |
|
* @param maximumHits |
|
* @param terms |
|
* @param queryFeature |
|
* @throws IOException |
|
* @throws IllegalAccessException |
|
* @throws InstantiationException |
|
*/ |
|
private void doSearch(SolrQueryRequest req, SolrQueryResponse rsp, SolrIndexSearcher searcher, String hashFieldName, int maximumHits, List<Term> terms, Query query, LireFeature queryFeature) throws IOException, IllegalAccessException, InstantiationException { |
|
// temp feature instance |
|
LireFeature tmpFeature = queryFeature.getClass().newInstance(); |
|
// Taking the time of search for statistical purposes. |
|
time = System.currentTimeMillis(); |
|
|
|
Filter filter = null; |
|
// if the request contains a filter: |
|
if (req.getParams().get("fq")!=null) { |
|
// only filters with [<field>:<value> ]+ are supported |
|
StringTokenizer st = new StringTokenizer(req.getParams().get("fq"), " "); |
|
LinkedList<Term> filterTerms = new LinkedList<Term>(); |
|
while (st.hasMoreElements()) { |
|
String[] tmpToken = st.nextToken().split(":"); |
|
if (tmpToken.length>1) { |
|
filterTerms.add(new Term(tmpToken[0], tmpToken[1])); |
|
} |
|
} |
|
if (filterTerms.size()>0) |
|
filter = new TermsFilter(filterTerms); |
|
} |
|
|
|
TopDocs docs; // with query only. |
|
if (filter == null) { |
|
docs = searcher.search(query, numberOfCandidateResults); |
|
} else { |
|
docs = searcher.search(query, filter, numberOfCandidateResults); |
|
} |
|
// TopDocs docs = searcher.search(query, new TermsFilter(terms), numberOfCandidateResults); // with TermsFilter and boosting by simple query |
|
// TopDocs docs = searcher.search(new ConstantScoreQuery(new TermsFilter(terms)), numberOfCandidateResults); // just with TermsFilter |
|
time = time == 0 ? 0 : System.currentTimeMillis() - time; |
|
rsp.add("RawDocsCount", docs.scoreDocs.length + ""); |
|
rsp.add("RawDocsSearchTime", time + ""); |
|
// re-rank |
|
time = System.currentTimeMillis(); |
|
TreeSet<SimpleResult> resultScoreDocs = new TreeSet<SimpleResult>(); |
|
float maxDistance = -1f; |
|
float tmpScore; |
|
|
|
String featureFieldName = FeatureRegistry.getFeatureFieldName(hashFieldName); |
|
// iterating and re-ranking the documents. |
|
BinaryDocValues binaryValues = MultiDocValues.getBinaryValues(searcher.getIndexReader(), featureFieldName); // *** # |
|
BytesRef bytesRef;// = new BytesRef(); |
|
for (int i = 0; i < docs.scoreDocs.length; i++) { |
|
// using DocValues to retrieve the field values ... |
|
bytesRef = binaryValues.get(docs.scoreDocs[i].doc); |
|
tmpFeature.setByteArrayRepresentation(bytesRef.bytes, bytesRef.offset, bytesRef.length); |
|
// Getting the document from the index. |
|
// This is the slow step based on the field compression of stored fields. |
|
// tmpFeature.setByteArrayRepresentation(d.getBinaryValue(name).bytes, d.getBinaryValue(name).offset, d.getBinaryValue(name).length); |
|
tmpScore = queryFeature.getDistance(tmpFeature); |
|
//if(tmpScore > 20) continue; |
|
if (resultScoreDocs.size() < maximumHits) { // todo: There's potential here for a memory saver, think of a clever data structure that can do the trick without creating a new SimpleResult for each result. |
|
resultScoreDocs.add(new SimpleResult(tmpScore, searcher.doc(docs.scoreDocs[i].doc), docs.scoreDocs[i].doc)); |
|
maxDistance = resultScoreDocs.last().getDistance(); |
|
} else if (tmpScore < maxDistance) { |
|
// if it is nearer to the sample than at least one of the current set: |
|
// remove the last one ... |
|
resultScoreDocs.remove(resultScoreDocs.last()); |
|
// add the new one ... |
|
resultScoreDocs.add(new SimpleResult(tmpScore, searcher.doc(docs.scoreDocs[i].doc), docs.scoreDocs[i].doc)); |
|
// and set our new distance border ... |
|
maxDistance = resultScoreDocs.last().getDistance(); |
|
} |
|
} |
|
// System.out.println("** Creating response."); |
|
time = time == 0 ? 0 : System.currentTimeMillis() - time; |
|
rsp.add("ReRankSearchTime", time + ""); |
|
LinkedList list = new LinkedList(); |
|
for (Iterator<SimpleResult> it = resultScoreDocs.iterator(); it.hasNext(); ) { |
|
SimpleResult result = it.next(); |
|
HashMap m = new HashMap(2); |
|
m.put("d", result.getDistance()); |
|
// add fields as requested: |
|
if (req.getParams().get("fl") == null) { |
|
m.put("id", result.getDocument().get("id")); |
|
if (result.getDocument().get("title") != null) |
|
m.put("title", result.getDocument().get("title")); |
|
} else { |
|
String fieldsRequested = req.getParams().get("fl"); |
|
if (fieldsRequested.contains("score")) { |
|
m.put("score", result.getDistance()); |
|
} |
|
if (fieldsRequested.contains("*")) { |
|
// all fields |
|
for (IndexableField field : result.getDocument().getFields()) { |
|
String tmpField = field.name(); |
|
if (result.getDocument().getFields(tmpField).length > 1) { |
|
m.put(result.getDocument().getFields(tmpField)[0].name(), result.getDocument().getValues(tmpField)); |
|
} else if (result.getDocument().getFields(tmpField).length > 0) { |
|
m.put(result.getDocument().getFields(tmpField)[0].name(), result.getDocument().getFields(tmpField)[0].stringValue()); |
|
} |
|
} |
|
} else { |
|
StringTokenizer st; |
|
if (fieldsRequested.contains(",")) |
|
st = new StringTokenizer(fieldsRequested, ","); |
|
else |
|
st = new StringTokenizer(fieldsRequested, " "); |
|
while (st.hasMoreElements()) { |
|
String tmpField = st.nextToken(); |
|
if (result.getDocument().getFields(tmpField).length > 1) { |
|
m.put(result.getDocument().getFields(tmpField)[0].name(), result.getDocument().getValues(tmpField)); |
|
} else if (result.getDocument().getFields(tmpField).length > 0) { |
|
m.put(result.getDocument().getFields(tmpField)[0].name(), result.getDocument().getFields(tmpField)[0].stringValue()); |
|
} |
|
} |
|
} |
|
} |
|
// m.put(field, result.getDocument().get(field)); |
|
// m.put(field.replace("_ha", "_hi"), result.getDocument().getBinaryValue(field)); |
|
list.add(m); |
|
} |
|
rsp.add("docs", list); |
|
// rsp.add("Test-name", "Test-val"); |
|
} |
|
|
|
@Override |
|
public String getDescription() { |
|
return "LIRE Request Handler to add images to an index and search them. Search images by id, by url and by extracted features."; |
|
} |
|
|
|
@Override |
|
public String getSource() { |
|
return "http://lire-project.net"; |
|
} |
|
|
|
@Override |
|
public NamedList<Object> getStatistics() { |
|
// Change stats here to get an insight in the admin console. |
|
NamedList<Object> statistics = super.getStatistics(); |
|
statistics.add("Number of Requests", countRequests); |
|
return statistics; |
|
} |
|
|
|
private BooleanQuery createQuery(int[] hashes, String paramField, double size, String idFilter) { |
|
|
|
List<Integer> hList = new ArrayList<Integer>(hashes.length); |
|
|
|
try{ |
|
PrintWriter writer = new PrintWriter("/tmp/!.txt", "UTF-8"); |
|
for (int i = 0; i < hashes.length; i++) { |
|
if(docCount.get(hashes[i]) > 0 && docCount.get(hashes[i]) < 300000000){ |
|
writer.println(hashes[i]); |
|
writer.println(docCount.get(hashes[i])); |
|
} |
|
} |
|
writer.close(); |
|
} catch (IOException ex) { |
|
} |
|
|
|
for (int i = 0; i < hashes.length; i++) { |
|
if(docCount.get(hashes[i]) > 0 && docCount.get(hashes[i]) < 300000000){ |
|
hList.add(hashes[i]); |
|
} |
|
} |
|
|
|
//remove duplicates |
|
Set<Integer> hs = new HashSet<>(); |
|
hs.addAll(hList); |
|
hList.clear(); |
|
hList.addAll(hs); |
|
|
|
Comparator<Integer> compareByFrequency = new Comparator<Integer>() { |
|
@Override |
|
public int compare(Integer h1, Integer h2) { |
|
int v1 = docCount.get(h1); |
|
int v2 = docCount.get(h2); |
|
return v1 - v2; |
|
} |
|
}; |
|
Collections.sort(hList,compareByFrequency); |
|
|
|
try{ |
|
PrintWriter writer = new PrintWriter("/tmp/!!.txt", "UTF-8"); |
|
for (int i = 0; i < hList.size(); i++) { |
|
//writer.println(hList.get(i)); |
|
writer.println(docCount.get(hList.get(i))); |
|
} |
|
writer.close(); |
|
} catch (IOException ex) { |
|
} |
|
|
|
int[] offsets1 = {0,1,2,3,1,2,1,0,0,0}; |
|
int[] offsets2 = {1,2,3,4,3,4,4,2,3,4}; |
|
BooleanQuery query = new BooleanQuery(); |
|
int offset = (int)(size-1); |
|
if(offset < 4){ |
|
query.add(new BooleanClause(new TermQuery(new Term(paramField, Integer.toHexString(hList.get(offsets1[offset])))), BooleanClause.Occur.MUST)); |
|
query.add(new BooleanClause(new TermQuery(new Term(paramField, Integer.toHexString(hList.get(offsets2[offset])))), BooleanClause.Occur.MUST)); |
|
if(!idFilter.equals("*")){ |
|
query.add(new BooleanClause(new WildcardQuery(new Term("id", idFilter)), BooleanClause.Occur.MUST)); |
|
} |
|
} |
|
else{ |
|
offset = offset - 4; |
|
query.add(new BooleanClause(new TermQuery(new Term(paramField, Integer.toHexString(hList.get(offset)))), BooleanClause.Occur.MUST)); |
|
if(!idFilter.equals("*")){ |
|
query.add(new BooleanClause(new WildcardQuery(new Term("id", idFilter)), BooleanClause.Occur.MUST)); |
|
} |
|
} |
|
return query; |
|
} |
|
|
|
/** |
|
* This is used to create a TermsFilter ... should be used to select in the index based on many terms. |
|
* We just need to integrate a minimum query too, else we'd not get the appropriate results. |
|
* |
|
* @param hashes |
|
* @param paramField |
|
* @return |
|
*/ |
|
private List<Term> createTermFilter(int[] hashes, String paramField) { |
|
LinkedList<Term> termFilter = new LinkedList<Term>(); |
|
for (int i = 0; i < hashes.length; i++) { |
|
// be aware that the hashFunctionsFileName of the field must match the one you put the hashes in before. |
|
termFilter.add(new Term(paramField, Integer.toHexString(hashes[i]))); |
|
} |
|
return termFilter; |
|
} |
|
} |