soruly · April 2, 2018 08:53
diff --git a/_README.md b/_README.md
diff --git a/index-anime-all.sh b/index-anime-all.sh
 #!/bin/bash

 tmp_path=/tmp/animehash/
 anime_path=/mnt/data/anime_new/
 hash_path=/mnt/data/anime_hash/

 cd /home/soruly
 input_path="$1"
 if [[ "$1" != */ ]] ; then
 	input_path="$1"/
 fi

 if [[ -d $input_path ]] ; then
 if [[ "$input_path" == "$anime_path"* ]] ; then
        relative_path="${1//$anime_path/}"
        input_season=$(echo ${relative_path} | cut -d'/' -f1 -s)
        input_series=$(echo ${relative_path} | cut -d'/' -f2 -s)
 	if [ "$input_season" ] && [ "$input_series" ] ; then
                        if [[ -d $input_path ]]; then
                        for file in "$input_path"*.mp4
                        do
 				#echo "$file"
                                /home/soruly/index-anime.sh "$file"
                        done
                        fi
 	elif [ "$input_season" ] ; then
                if [[ -d $input_path ]]; then
                for series in "$input_path"*
                do
                        if [[ -d $series ]]; then
                        for file in "$series"/*.mp4
                        do
 				#echo "$file"
                                /home/soruly/index-anime.sh "$file"
                        done
                        fi
                done
                fi
 	else
 	for season in "$anime_path"*
 	do
 		if [[ -d $season ]]; then
                for series in "$season"/*
                do
 			if [[ -d $series ]]; then
                	for file in "$series"/*.mp4
                        do
 				#echo "$file"
 				/home/soruly/index-anime.sh "$file"
                        done
 			fi
 		done
 		fi
 	done
 	fi
 else
        echo Input path is not anime
 fi
 else
 	echo Input path is not directory
 fi
diff --git a/index-anime-extract.py b/index-anime-extract.py
 #!/usr/bin/python

 import os, sys, getopt, collections
 from lxml import etree as ET
 from operator import itemgetter
 from collections import Counter

 def getkey(elem):
    return elem.findtext("field[@name='id']")

 def main(argv):
    path = ''
    filename = ''
    try:
        opts, args = getopt.getopt(argv,"hi:o:",["ifile=","ofile="])
    except getopt.GetoptError:
        print 'test.py -i <path> -o <filename>'
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print 'test.py -i <path> -o <filename>'
            sys.exit()
        elif opt in ("-i", "--ifile"):
            path = os.path.join(arg)
        elif opt in ("-o", "--ofile"):
            filename = arg
    #print 'Working path is ', path
    print 'Working file is ', filename
    
    tree = ET.parse(filename)
    root = tree.getroot()
    
    #root[:] = sorted(root, key=getkey)
    
    new_root = ET.Element("add")
    duplicated_doc_count = 0
    queue = collections.deque('',12)
    for doc in root.findall("doc"):
        if doc.find("field[@name='cl_hi']").text in queue:
            duplicated_doc_count += 1
        else:
            queue.append(doc.find("field[@name='cl_hi']").text)
            new_doc = ET.SubElement(new_root, "doc")
            ET.SubElement(new_doc, "field", name="id").text = doc.find("field[@name='id']").text
            ET.SubElement(new_doc, "field", name="cl_hi").text = doc.find("field[@name='cl_hi']").text
            ET.SubElement(new_doc, "field", name="cl_ha").text = doc.find("field[@name='cl_ha']").text
            
    #new_root[:] = sorted(new_root, key=getkey)
    new_tree = ET.ElementTree(new_root)
    new_tree.write(path+"tmp.xml", encoding="UTF-8", xml_declaration=True, pretty_print=True)
    
 if __name__ == "__main__":
    main(sys.argv[1:])

diff --git a/index-anime-recent.sh b/index-anime-recent.sh
 #!/bin/bash

 find -L "/mnt/data/anime_new" \
 -type f \
 -name "*.mp4" \
 -mmin -1800 \
 -not \( \
 -path "/mnt/data/anime_new/HKTVBJ2/*" \
 -prune \
 \) \
 -not \( \
 -path "/mnt/data/anime_new/Others/*" \
 -prune \
 \) \
 -exec /home/soruly/index-anime.sh "{}" \;

 find -L "/mnt/data/anime_new/Others/Naruto Shippuuden" \
 -type f \
 -name "*.mp4" \
 -mmin -1800 \
 -exec /home/soruly/index-anime.sh "{}" \;

 find -L "/mnt/data/anime_new/Others/One Piece" \
 -type f \
 -name "*.mp4" \
 -mmin -1800 \
 -exec /home/soruly/index-anime.sh "{}" \;

diff --git a/index-anime.sh b/index-anime.sh
 #!/bin/bash

 tmp_path=/tmp/animehash/
 anime_path=/mnt/data/anime_new/
 hash_path=/mnt/data/anime_hash/

 cd /home/soruly
 #curl -s http://localhost:8983/solr/lireq/update?commit=true -d '<delete><query>*:*</query></delete>' > /dev/null
 echo $1

 if [[ ! -f index-anime.lock ]] ; then
  touch index-anime.lock

 if [[ "$1" == "$anime_path"*.mp4 ]] ; then
 	relative_path="${1//$anime_path/}"
 	input_season=$(echo ${relative_path} | cut -d'/' -f1)
 	input_series=$(echo ${relative_path} | cut -d'/' -f2)
  file_name=$(basename "$1")
 else
 	echo Invalid input file
 	exit
 fi
 	file="$1"
 	#echo "${file}"
 	#continue
 xmlxxx="${file%.mp4}.xml"
 xmlfile="${xmlxxx//$anime_path/$hash_path}"
 xmlpath=$(dirname "$xmlfile")

 if [ ! -f "$xmlfile" ] ; then
 	echo Removing old files
 	rm -rf "${tmp_path}"

 	echo Creating temp directory
 	mkdir -p "${tmp_path}"

 	echo Extracting thumbnails
 	ffmpeg -i "$file" -q:v 2 -an -vf "fps=12,scale=-1:120,showinfo" "${tmp_path}%08d.jpg" 2>&1 | grep pts_time | awk -F"(pos:|pts_time:)" '{print $2}' | tr -d ' ' > "${tmp_path}pts_time.txt"

 	echo Preparing frame files for analysis
 	find "${tmp_path}" -mindepth 1 -maxdepth 1 -type f -name "*.jpg" -printf "${tmp_path}%f\n" | sort > "${tmp_path}frames.txt"

 	echo Analyzing frames
 	java -jar /var/solr/data/lib/lire-request-handler.jar -i "${tmp_path}frames.txt" -o "${tmp_path}tmp.xml" -n 8 -f
 	#java -jar /home/soruly/liresolr/dist/lire-request-handler.jar -i "${tmp_path}frames.txt" -o "${tmp_path}tmp.xml" -n 8 -f

 	echo Parsing output XML
 	python index-anime-parse.py -i "${tmp_path}" -o "${file//$anime_path/}"
 	
        echo Parsing input XML
        python index-anime-extract.py -i "${tmp_path}" -o "${tmp_path}analyzed.xml"

        echo Copy back parsed XML
        mkdir -p "$xmlpath"
        cp "${tmp_path}tmp.xml" "${xmlfile}"

        echo Updating Solr
        curl -s http://192.168.2.11:8983/solr/anime_cl/update  -H "Content-Type: text/xml" --data-binary @"${tmp_path}tmp.xml" > /dev/null
        curl -s http://192.168.2.11:8983/solr/anime_cl/update  -H "Content-Type: text/xml" --data-binary "<commit/>" > /dev/null

 	echo Removing temp files
 	rm -rf "${tmp_path}"

 	echo Completed
  
  curl -X POST "https://api.telegram.org/bot394963134:AAF07epjH1zYTSz7jZaaaaaaaRZDb4RV5ew/sendMessage" -d chat_id="@whatanimeupdates" --data-urlencode text="$input_series"$'\n'"$file_name"
 fi

 rm index-anime.lock

 else
  echo "Another process is running"
 fi
diff --git a/install_solr.sh b/install_solr.sh
 sudo su
 cd ~
 wget http://archive.apache.org/dist/lucene/solr/5.3.1/solr-5.3.1.zip
 unzip solr-5.3.1.zip
 cd solr-5.3.1/bin/
 ./install_solr_service.sh ../../solr-5.3.1.zip
 ./solr stop -p 8983
 systemctl start solr
 cp -rf /opt/solr/server/solr/* /var/solr/data/
 mkdir -p /var/solr/data/lire_core/config
 cp -rf /var/solr/data/configsets/basic_configs/conf/* /var/solr/data/lire_core/conf/
 cd /var/solr/data/lire_core/conf/
 vim solrconfig.xml
 vim schema.xml
 systemctl restart solr
 sudo mkdir -p /var/solr/data/lib
 sudo cp -rf dist/* /var/solr/data/lib
 chown -R solr:solr /var/solr
 #create new core named lire_core
 #Change heap memory at /var/solr/solr.in.sh
diff --git a/LireRequestHandler.java b/LireRequestHandler.java
 /*
 * This file is part of the LIRE project: http://www.semanticmetadata.net/lire
 * LIRE is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * LIRE is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with LIRE; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 * We kindly ask you to refer the any or one of the following publications in
 * any publication mentioning or employing Lire:
 *
 * Lux Mathias, Savvas A. Chatzichristofis. Lire: Lucene Image Retrieval –
 * An Extensible Java CBIR Library. In proceedings of the 16th ACM International
 * Conference on Multimedia, pp. 1085-1088, Vancouver, Canada, 2008
 * URL: http://doi.acm.org/10.1145/1459359.1459577
 *
 * Lux Mathias. Content Based Image Retrieval with LIRE. In proceedings of the
 * 19th ACM International Conference on Multimedia, pp. 735-738, Scottsdale,
 * Arizona, USA, 2011
 * URL: http://dl.acm.org/citation.cfm?id=2072432
 *
 * Mathias Lux, Oge Marques. Visual Information Retrieval using Java and LIRE
 * Morgan & Claypool, 2013
 * URL: http://www.morganclaypool.com/doi/abs/10.2200/S00468ED1V01Y201301ICR025
 *
 * Copyright statement:
 * --------------------
 * (c) 2002-2013 by Mathias Lux ([email protected])
 *     http://www.semanticmetadata.net/lire, http://www.lire-project.net
 */

 package net.semanticmetadata.lire.solr;

 import net.semanticmetadata.lire.imageanalysis.EdgeHistogram;
 import net.semanticmetadata.lire.imageanalysis.LireFeature;
 import net.semanticmetadata.lire.impl.SimpleResult;
 import net.semanticmetadata.lire.indexing.hashing.BitSampling;
 import net.semanticmetadata.lire.utils.ImageUtils;
 import org.apache.commons.codec.binary.Base64;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.*;
 import org.apache.lucene.queries.TermsFilter;
 import org.apache.lucene.search.*;
 import org.apache.lucene.util.BytesRef;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.handler.RequestHandlerBase;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.response.SolrQueryResponse;
 import org.apache.solr.search.SolrIndexSearcher;

 import javax.imageio.ImageIO;
 import java.awt.image.BufferedImage;
 import java.io.IOException;
 import java.net.URL;
 import java.util.*;

 import java.io.*;

 /**
 * This is the main LIRE RequestHandler for the Solr Plugin. It supports query by example using the indexed id,
 * an url or a feature vector. Furthermore, feature extraction and random selection of images are supported.
 *
 * @author Mathias Lux, [email protected], 07.07.13
 */

 public class LireRequestHandler extends RequestHandlerBase {
    
    private HashMap<Integer,Integer> docCount = new HashMap<Integer, Integer>();

    //    private static HashMap<String, Class> fieldToClass = new HashMap<String, Class>(5);
    private long time = 0;
    private int countRequests = 0;
    private int defaultNumberOfResults = 60;
    /**
     * number of candidate results retrieved from the index. The higher this number, the slower,
     * the but more accurate the retrieval will be. 10k is a good value for starters.
     */
    private int numberOfCandidateResults = 10000;
    private static final int DEFAULT_NUMBER_OF_CANDIDATES = 10000;

    /**
     * The number of query terms that go along with the TermsFilter search. We need some to get a
     * score, the less the faster. I put down a minimum of three in the method, this value gives
     * the percentage of the overall number used (selected randomly).
     */
    private double numberOfQueryTerms = 0.33;
    private static final double DEFAULT_NUMBER_OF_QUERY_TERMS = 0.33;

    static {
        // one time hash function read ...
        try {
            BitSampling.readHashFunctions();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }


    @Override
    public void init(NamedList args) {
        super.init(args);
 try{
    BufferedReader br = new BufferedReader(new FileReader("/var/solr/data/anime_cl/histogram.csv"));
    String line =  null;

    while((line=br.readLine())!=null){
        String str[] = line.split(",");
        docCount.put(Integer.parseInt(str[0]), Integer.parseInt(str[1]));
    }
 } catch(Exception e){
 }

    }

    /**
     * Handles three types of requests.
     * <ol>
     * <li>search by already extracted images.</li>
     * <li>search by an image URL.</li>
     * <li>Random results.</li>
     * </ol>
     *
     * @param req
     * @param rsp
     * @throws Exception
     */
    @Override
    public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
        // (1) check if the necessary parameters are here
        if (req.getParams().get("hashes") != null) { // we are searching for hashes ...
            handleHashSearch(req, rsp);
        } else if (req.getParams().get("url") != null) { // we are searching for an image based on an URL
            handleUrlSearch(req, rsp);
        } else if (req.getParams().get("id") != null) { // we are searching for an image based on an URL
            handleIdSearch(req, rsp);
        } else if (req.getParams().get("extract") != null) { // we are trying to extract from an image URL.
            handleExtract(req, rsp);
        } else { // lets return random results.
            handleRandomSearch(req, rsp);
        }
    }

    /**
     * Handles the get parameters id, field and rows.
     *
     * @param req
     * @param rsp
     * @throws IOException
     * @throws InstantiationException
     * @throws IllegalAccessException
     */
    private void handleIdSearch(SolrQueryRequest req, SolrQueryResponse rsp) throws IOException, InstantiationException, IllegalAccessException {
        SolrIndexSearcher searcher = req.getSearcher();
        try {
            TopDocs hits = searcher.search(new TermQuery(new Term("id", req.getParams().get("id"))), 1);
            String paramField = "cl_ha";
            if (req.getParams().get("field") != null)
                paramField = req.getParams().get("field");
            LireFeature queryFeature = (LireFeature) FeatureRegistry.getClassForHashField(paramField).newInstance();
            rsp.add("QueryField", paramField);
            rsp.add("QueryFeature", queryFeature.getClass().getName());
            numberOfQueryTerms = req.getParams().getDouble("accuracy", DEFAULT_NUMBER_OF_QUERY_TERMS);
            numberOfCandidateResults = req.getParams().getInt("candidates", DEFAULT_NUMBER_OF_CANDIDATES);
            if (hits.scoreDocs.length > 0) {
                // Using DocValues to get the actual data from the index.
                BinaryDocValues binaryValues = MultiDocValues.getBinaryValues(searcher.getIndexReader(), FeatureRegistry.getFeatureFieldName(paramField)); // ***  #
                if (binaryValues == null)
                    System.err.println("Could not find the DocValues of the query document. Are they in the index?");
                BytesRef bytesRef = new BytesRef();
                bytesRef = binaryValues.get(hits.scoreDocs[0].doc);
 //                Document d = searcher.getIndexReader().document(hits.scoreDocs[0].doc);
 //                String histogramFieldName = paramField.replace("_ha", "_hi");
                queryFeature.setByteArrayRepresentation(bytesRef.bytes, bytesRef.offset, bytesRef.length);
                int paramRows = defaultNumberOfResults;
                if (req.getParams().getInt("rows") != null)
                    paramRows = req.getParams().getInt("rows");
                // Re-generating the hashes to save space (instead of storing them in the index)
                int[] hashes = BitSampling.generateHashes(queryFeature.getDoubleHistogram());
                List<Term> termFilter = createTermFilter(hashes, paramField);
                doSearch(req, rsp, searcher, paramField, paramRows, termFilter, createQuery(hashes, paramField, numberOfQueryTerms, "*"), queryFeature);
            } else {
                rsp.add("Error", "Did not find an image with the given id " + req.getParams().get("id"));
            }
        } catch (Exception e) {
            rsp.add("Error", "There was an error with your search for the image with the id " + req.getParams().get("id")
                    + ": " + e.getMessage());
        }
    }

    /**
     * Returns a random set of documents from the index. Mainly for testing purposes.
     *
     * @param req
     * @param rsp
     * @throws IOException
     */
    private void handleRandomSearch(SolrQueryRequest req, SolrQueryResponse rsp) throws IOException {
        SolrIndexSearcher searcher = req.getSearcher();
        DirectoryReader indexReader = searcher.getIndexReader();
        double maxDoc = indexReader.maxDoc();
        int paramRows = defaultNumberOfResults;
        if (req.getParams().getInt("rows") != null)
            paramRows = req.getParams().getInt("rows");
        LinkedList list = new LinkedList();
        while (list.size() < paramRows) {
            HashMap m = new HashMap(2);
            Document d = indexReader.document((int) Math.floor(Math.random() * maxDoc));
            m.put("id", d.getValues("id")[0]);
            m.put("title", d.getValues("title")[0]);
            list.add(m);
        }
        rsp.add("docs", list);
    }

    /**
     * Searches for an image given by an URL. Note that (i) extracting image features takes time and
     * (ii) not every image is readable by Java.
     *
     * @param req
     * @param rsp
     * @throws IOException
     * @throws InstantiationException
     * @throws IllegalAccessException
     */
    private void handleUrlSearch(SolrQueryRequest req, SolrQueryResponse rsp) throws IOException, InstantiationException, IllegalAccessException {
        SolrParams params = req.getParams();
        String paramUrl = params.get("url");
        String paramField = "cl_ha";
        if (req.getParams().get("field") != null)
            paramField = req.getParams().get("field");
        int paramRows = defaultNumberOfResults;
        if (params.get("rows") != null)
            paramRows = params.getInt("rows");
        numberOfQueryTerms = req.getParams().getDouble("accuracy", DEFAULT_NUMBER_OF_QUERY_TERMS);
        numberOfCandidateResults = req.getParams().getInt("candidates", DEFAULT_NUMBER_OF_CANDIDATES);
        LireFeature feat = null;
        List<Term> termFilter = null;
        int[] hashes = null;
        // wrapping the whole part in the try
        try {
            BufferedImage img = ImageIO.read(new URL(paramUrl).openStream());
            img = ImageUtils.trimWhiteSpace(img);
            // getting the right feature per field:
            if (paramField == null || FeatureRegistry.getClassForHashField(paramField) == null) // if the feature is not registered.
                feat = new EdgeHistogram();
            else {
                feat = (LireFeature) FeatureRegistry.getClassForHashField(paramField).newInstance();
            }
            feat.extract(img);
            hashes = BitSampling.generateHashes(feat.getDoubleHistogram());
            termFilter = createTermFilter(hashes, paramField);

        ArrayList<String> hashStrings = new ArrayList<String>(hashes.length);
            for (int i = 0; i < hashes.length; i++) {
                hashStrings.add(Integer.toHexString(hashes[i]));
            }
            rsp.add("hashes", hashStrings);

        } catch (Exception e) {
            rsp.add("Error", "Error reading image from URL: " + paramUrl + ": " + e.getMessage());
            e.printStackTrace();
        }
        // search if the feature has been extracted.
        if (feat != null)
            doSearch(req, rsp, req.getSearcher(), paramField, paramRows, termFilter, createQuery(hashes, paramField, numberOfQueryTerms, "*"), feat);
    }

    private void handleExtract(SolrQueryRequest req, SolrQueryResponse rsp) throws IOException, InstantiationException, IllegalAccessException {
        SolrParams params = req.getParams();
        String paramUrl = params.get("extract");
        String paramField = "cl_ha";
        if (req.getParams().get("field") != null)
            paramField = req.getParams().get("field");
 //        int paramRows = defaultNumberOfResults;
 //        if (params.get("rows") != null)
 //            paramRows = params.getInt("rows");
        LireFeature feat = null;
 //        BooleanQuery query = null;
        // wrapping the whole part in the try
        try {
            BufferedImage img = ImageIO.read(new URL(paramUrl).openStream());
            img = ImageUtils.trimWhiteSpace(img);
            // getting the right feature per field:
            if (paramField == null || FeatureRegistry.getClassForHashField(paramField) == null) // if the feature is not registered.
                feat = new EdgeHistogram();
            else {
                feat = (LireFeature) FeatureRegistry.getClassForHashField(paramField).newInstance();
            }
            feat.extract(img);
            rsp.add("histogram", Base64.encodeBase64String(feat.getByteArrayRepresentation()));
            int[] hashes = BitSampling.generateHashes(feat.getDoubleHistogram());
            ArrayList<String> hashStrings = new ArrayList<String>(hashes.length);
            for (int i = 0; i < hashes.length; i++) {
                hashStrings.add(Integer.toHexString(hashes[i]));
            }
            //Collections.shuffle(hashStrings);
            rsp.add("hashes", hashStrings);
 //            just use 50% of the hashes for search ...
 //            query = createTermFilter(hashes, paramField, 0.5d);
        } catch (Exception e) {
 //            rsp.add("Error", "Error reading image from URL: " + paramUrl + ": " + e.getMessage());
            e.printStackTrace();
        }
        // search if the feature has been extracted.
 //        if (feat != null) doSearch(rsp, req.getSearcher(), paramField, paramRows, query, feat);
    }

    /**
     * Search based on the given image hashes.
     *
     * @param req
     * @param rsp
     * @throws IOException
     * @throws IllegalAccessException
     * @throws InstantiationException
     */
    private void handleHashSearch(SolrQueryRequest req, SolrQueryResponse rsp) throws IOException, IllegalAccessException, InstantiationException {
        SolrParams params = req.getParams();
        SolrIndexSearcher searcher = req.getSearcher();
        // get the params needed:
        // hashes=x y z ...
        // feature=<base64>
        // field=<cl_ha|ph_ha|...>

        String[] hashStrings = params.get("hashes").trim().split(",");
        int[] hashes = new int[100];
        byte[] featureVector = Base64.decodeBase64(params.get("feature"));
        String paramField = "cl_ha";
        if (req.getParams().get("field") != null)
            paramField = req.getParams().get("field");
        int paramRows = defaultNumberOfResults;
        if (params.getInt("rows") != null)
            paramRows = params.getInt("rows");
        numberOfQueryTerms = req.getParams().getDouble("accuracy", DEFAULT_NUMBER_OF_QUERY_TERMS);
        numberOfCandidateResults = req.getParams().getInt("candidates", DEFAULT_NUMBER_OF_CANDIDATES);
        // create boolean query:
 //        System.out.println("** Creating query.");
        LinkedList<Term> termFilter = new LinkedList<Term>();
        BooleanQuery query = new BooleanQuery();
        for (int i = 0; i < hashStrings.length; i++) {
        hashes[i] = Integer.parseInt(hashStrings[i],16);
            // be aware that the hashFunctionsFileName of the field must match the one you put the hashes in before.
 //            hashStrings[i] = hashStrings[i].trim();
 //            if (hashStrings[i].length() > 0) {
 //                termFilter.add(new Term(paramField, hashStrings[i].trim()));
 //                System.out.println("** " + field + ": " + hashes[i].trim());
 //            }
        }
 //        Collections.shuffle(termFilter);
 //        for (int k = 0; k < termFilter.size() * numberOfQueryTerms; k++) {
 //            query.add(new BooleanClause(new TermQuery(termFilter.get(k)), BooleanClause.Occur.SHOULD));
 //        }
 //        System.out.println("** Doing search.");

        // query feature
        LireFeature queryFeature = (LireFeature) FeatureRegistry.getClassForHashField(paramField).newInstance();
        queryFeature.setByteArrayRepresentation(featureVector);

        // get results:
 //        doSearch(req, rsp, searcher, paramField, paramRows, termFilter, new MatchAllDocsQuery(), queryFeature);
        String idFilter = req.getParams().get("filter");
 doSearch(req, rsp, req.getSearcher(), paramField, paramRows, termFilter, createQuery(hashes, paramField, numberOfQueryTerms, idFilter), queryFeature);
    }

    /**
     * Actual search implementation based on (i) hash based retrieval and (ii) feature based re-ranking.
     *
     * @param rsp
     * @param searcher
     * @param hashFieldName the hash field name
     * @param maximumHits
     * @param terms
     * @param queryFeature
     * @throws IOException
     * @throws IllegalAccessException
     * @throws InstantiationException
     */
    private void doSearch(SolrQueryRequest req, SolrQueryResponse rsp, SolrIndexSearcher searcher, String hashFieldName, int maximumHits, List<Term> terms, Query query, LireFeature queryFeature) throws IOException, IllegalAccessException, InstantiationException {
        // temp feature instance
        LireFeature tmpFeature = queryFeature.getClass().newInstance();
        // Taking the time of search for statistical purposes.
        time = System.currentTimeMillis();

        Filter filter = null;
        // if the request contains a filter:
        if (req.getParams().get("fq")!=null) {
            // only filters with [<field>:<value> ]+ are supported
            StringTokenizer st = new StringTokenizer(req.getParams().get("fq"), " ");
            LinkedList<Term> filterTerms = new LinkedList<Term>();
            while (st.hasMoreElements()) {
                String[] tmpToken = st.nextToken().split(":");
                if (tmpToken.length>1) {
                    filterTerms.add(new Term(tmpToken[0], tmpToken[1]));
                }
            }
            if (filterTerms.size()>0)
                filter = new TermsFilter(filterTerms);
        }

        TopDocs docs;   // with query only.
        if (filter == null) {
            docs = searcher.search(query, numberOfCandidateResults);
        } else {
            docs = searcher.search(query, filter, numberOfCandidateResults);
        }
 //        TopDocs docs = searcher.search(query, new TermsFilter(terms), numberOfCandidateResults);   // with TermsFilter and boosting by simple query
 //        TopDocs docs = searcher.search(new ConstantScoreQuery(new TermsFilter(terms)), numberOfCandidateResults); // just with TermsFilter
        time = time == 0 ? 0 : System.currentTimeMillis() - time;
        rsp.add("RawDocsCount", docs.scoreDocs.length + "");
        rsp.add("RawDocsSearchTime", time + "");
        // re-rank
        time = System.currentTimeMillis();
        TreeSet<SimpleResult> resultScoreDocs = new TreeSet<SimpleResult>();
        float maxDistance = -1f;
        float tmpScore;

        String featureFieldName = FeatureRegistry.getFeatureFieldName(hashFieldName);
        // iterating and re-ranking the documents.
        BinaryDocValues binaryValues = MultiDocValues.getBinaryValues(searcher.getIndexReader(), featureFieldName); // ***  #
        BytesRef bytesRef;// = new BytesRef();
        for (int i = 0; i < docs.scoreDocs.length; i++) {
            // using DocValues to retrieve the field values ...
            bytesRef = binaryValues.get(docs.scoreDocs[i].doc);
            tmpFeature.setByteArrayRepresentation(bytesRef.bytes, bytesRef.offset, bytesRef.length);
            // Getting the document from the index.
            // This is the slow step based on the field compression of stored fields.
 //            tmpFeature.setByteArrayRepresentation(d.getBinaryValue(name).bytes, d.getBinaryValue(name).offset, d.getBinaryValue(name).length);
            tmpScore = queryFeature.getDistance(tmpFeature);
        //if(tmpScore > 20) continue;
            if (resultScoreDocs.size() < maximumHits) { // todo: There's potential here for a memory saver, think of a clever data structure that can do the trick without creating a new SimpleResult for each result.
                resultScoreDocs.add(new SimpleResult(tmpScore, searcher.doc(docs.scoreDocs[i].doc), docs.scoreDocs[i].doc));
                maxDistance = resultScoreDocs.last().getDistance();
            } else if (tmpScore < maxDistance) {
 //                if it is nearer to the sample than at least one of the current set:
 //                remove the last one ...
                resultScoreDocs.remove(resultScoreDocs.last());
 //                add the new one ...
                resultScoreDocs.add(new SimpleResult(tmpScore, searcher.doc(docs.scoreDocs[i].doc), docs.scoreDocs[i].doc));
 //                and set our new distance border ...
                maxDistance = resultScoreDocs.last().getDistance();
            }
        }
 //        System.out.println("** Creating response.");
        time = time == 0 ? 0 : System.currentTimeMillis() - time;
        rsp.add("ReRankSearchTime", time + "");
        LinkedList list = new LinkedList();
        for (Iterator<SimpleResult> it = resultScoreDocs.iterator(); it.hasNext(); ) {
            SimpleResult result = it.next();
            HashMap m = new HashMap(2);
            m.put("d", result.getDistance());
            // add fields as requested:
            if (req.getParams().get("fl") == null) {
                m.put("id", result.getDocument().get("id"));
                if (result.getDocument().get("title") != null)
                    m.put("title", result.getDocument().get("title"));
            } else {
                String fieldsRequested = req.getParams().get("fl");
                if (fieldsRequested.contains("score")) {
                    m.put("score", result.getDistance());
                }
                if (fieldsRequested.contains("*")) {
                    // all fields
                    for (IndexableField field : result.getDocument().getFields()) {
                        String tmpField = field.name();
                        if (result.getDocument().getFields(tmpField).length > 1) {
                            m.put(result.getDocument().getFields(tmpField)[0].name(), result.getDocument().getValues(tmpField));
                        } else if (result.getDocument().getFields(tmpField).length > 0) {
                            m.put(result.getDocument().getFields(tmpField)[0].name(), result.getDocument().getFields(tmpField)[0].stringValue());
                        }
                    }
                } else {
                    StringTokenizer st;
                    if (fieldsRequested.contains(","))
                        st = new StringTokenizer(fieldsRequested, ",");
                    else
                        st = new StringTokenizer(fieldsRequested, " ");
                    while (st.hasMoreElements()) {
                        String tmpField = st.nextToken();
                        if (result.getDocument().getFields(tmpField).length > 1) {
                            m.put(result.getDocument().getFields(tmpField)[0].name(), result.getDocument().getValues(tmpField));
                        } else if (result.getDocument().getFields(tmpField).length > 0) {
                            m.put(result.getDocument().getFields(tmpField)[0].name(), result.getDocument().getFields(tmpField)[0].stringValue());
                        }
                    }
                }
            }
 //            m.put(field, result.getDocument().get(field));
 //            m.put(field.replace("_ha", "_hi"), result.getDocument().getBinaryValue(field));
            list.add(m);
        }
        rsp.add("docs", list);
        // rsp.add("Test-name", "Test-val");
    }

    @Override
    public String getDescription() {
        return "LIRE Request Handler to add images to an index and search them. Search images by id, by url and by extracted features.";
    }

    @Override
    public String getSource() {
        return "http://lire-project.net";
    }

    @Override
    public NamedList<Object> getStatistics() {
        // Change stats here to get an insight in the admin console.
        NamedList<Object> statistics = super.getStatistics();
        statistics.add("Number of Requests", countRequests);
        return statistics;
    }

    private BooleanQuery createQuery(int[] hashes, String paramField, double size, String idFilter) {

        List<Integer> hList = new ArrayList<Integer>(hashes.length);

    try{
        PrintWriter writer = new PrintWriter("/tmp/!.txt", "UTF-8");
            for (int i = 0; i < hashes.length; i++) {
        if(docCount.get(hashes[i]) > 0 && docCount.get(hashes[i]) < 300000000){
            writer.println(hashes[i]);
            writer.println(docCount.get(hashes[i]));
        }
        }
        writer.close();
    } catch (IOException ex) {
    }  

        for (int i = 0; i < hashes.length; i++) {
                if(docCount.get(hashes[i]) > 0 && docCount.get(hashes[i]) < 300000000){
                   hList.add(hashes[i]);
        }
        }

    //remove duplicates
    Set<Integer> hs = new HashSet<>();
    hs.addAll(hList);
    hList.clear();
    hList.addAll(hs);

 Comparator<Integer> compareByFrequency = new Comparator<Integer>() {
    @Override
    public int compare(Integer h1, Integer h2) {
                        int v1 = docCount.get(h1);
                        int v2 = docCount.get(h2);
        return v1 - v2;
    }
 };
    Collections.sort(hList,compareByFrequency);

        try{
            PrintWriter writer = new PrintWriter("/tmp/!!.txt", "UTF-8");
            for (int i = 0; i < hList.size(); i++) {
                        //writer.println(hList.get(i));
                        writer.println(docCount.get(hList.get(i)));
            }
            writer.close();
        } catch (IOException ex) {
        }

    int[] offsets1 = {0,1,2,3,1,2,1,0,0,0};
    int[] offsets2 = {1,2,3,4,3,4,4,2,3,4};
        BooleanQuery query = new BooleanQuery();
    int offset = (int)(size-1);
    if(offset < 4){
        query.add(new BooleanClause(new TermQuery(new Term(paramField, Integer.toHexString(hList.get(offsets1[offset])))), BooleanClause.Occur.MUST));
        query.add(new BooleanClause(new TermQuery(new Term(paramField, Integer.toHexString(hList.get(offsets2[offset])))), BooleanClause.Occur.MUST));
        if(!idFilter.equals("*")){
            query.add(new BooleanClause(new WildcardQuery(new Term("id", idFilter)), BooleanClause.Occur.MUST));
        }
    }
    else{
        offset = offset - 4;
        query.add(new BooleanClause(new TermQuery(new Term(paramField, Integer.toHexString(hList.get(offset)))), BooleanClause.Occur.MUST));
        if(!idFilter.equals("*")){
            query.add(new BooleanClause(new WildcardQuery(new Term("id", idFilter)), BooleanClause.Occur.MUST));
        }
    }
        return query;
    }

    /**
     * This is used to create a TermsFilter ... should be used to select in the index based on many terms.
     * We just need to integrate a minimum query too, else we'd not get the appropriate results.
     *
     * @param hashes
     * @param paramField
     * @return
     */
    private List<Term> createTermFilter(int[] hashes, String paramField) {
        LinkedList<Term> termFilter = new LinkedList<Term>();
        for (int i = 0; i < hashes.length; i++) {
            // be aware that the hashFunctionsFileName of the field must match the one you put the hashes in before.
            termFilter.add(new Term(paramField, Integer.toHexString(hashes[i])));
        }
        return termFilter;
    }
 }
diff --git a/setup_solr.sh b/setup_solr.sh
 # Install Solr as service
 wget http://archive.apache.org/dist/lucene/solr/6.6.0/solr-6.6.0.tgz
 tar xzf solr-6.6.0.tgz solr-6.6.0/bin/install_solr_service.sh --strip-components=2
 # The script install_solr_service.sh may detect the wrong version of your linux
 # Edit the script around line 68
 sudo bash ./install_solr_service.sh solr-6.6.0.tgz
 sudo systemctl start solr
 sudo -u solr /opt/solr/bin/solr create -c new_core

 # Setup project
 sudo dnf install java-1.8.0-openjdk-devel
 git clone [email protected]:soruly/liresolr.git
 cd liresolr || exit
 ./gradlew distForSolr

 # If gradle failed at org.apache.http.ssl.SSLInitializationException
 # /usr/lib/jvm/java-1.8.0-openjdk-1.8.0.131-1.b12.fc25.x86_64/jre/lib/security/cacerts (No such file or directory)
 # Link your system cacerts to the missing java lib
 # ln -s /etc/pki/java/cacerts /usr/lib/jvm/java-1.8.0-openjdk-1.8.0.131-1.b12.fc25.x86_64/jre/lib/security

 # Copy compiled plugin
 sudo cp ./dist/*.jar /opt/solr/server/solr-webapp/webapp/WEB-INF/lib/
 sudo systemctl restart solr

 # Setup database
 sudo -u solr vim /var/solr/data/new_core/conf/solrconfig.xml
 sudo -u solr vim /var/solr/data/new_core/conf/managed-schema

 # Delete everything in solr core
 curl http://localhost:8983/solr/new_core/update -H "Content-Type: text/xml" --data-binary "<delete><query>*:*</query></delete>"

 # Load xml files to solr core
 curl http://localhost:8983/solr/new_core/update -H "Content-Type: text/xml" --data-binary @outfile.xml

 # Commit changes
 curl http://localhost:8983/solr/new_core/update -H "Content-Type: text/xml" --data-binary "<commit/>"

 # Get random results
 http://localhost:8983/solr/new_core/lireq?indent=on&wt=json&rows=10
	#!/bin/bash

	tmp_path=/tmp/animehash/
	anime_path=/mnt/data/anime_new/
	hash_path=/mnt/data/anime_hash/

	cd /home/soruly
	input_path="$1"
	if [[ "$1" != */ ]] ; then
	input_path="$1"/
	fi

	if [[ -d $input_path ]] ; then
	if [[ "$input_path" == "$anime_path"* ]] ; then
	relative_path="${1//$anime_path/}"
	input_season=$(echo ${relative_path} \| cut -d'/' -f1 -s)
	input_series=$(echo ${relative_path} \| cut -d'/' -f2 -s)
	if [ "$input_season" ] && [ "$input_series" ] ; then
	if [[ -d $input_path ]]; then
	for file in "$input_path"*.mp4
	do
	#echo "$file"
	/home/soruly/index-anime.sh "$file"
	done
	fi
	elif [ "$input_season" ] ; then
	if [[ -d $input_path ]]; then
	for series in "$input_path"*
	do
	if [[ -d $series ]]; then
	for file in "$series"/*.mp4
	do
	#echo "$file"
	/home/soruly/index-anime.sh "$file"
	done
	fi
	done
	fi
	else
	for season in "$anime_path"*
	do
	if [[ -d $season ]]; then
	for series in "$season"/*
	do
	if [[ -d $series ]]; then
	for file in "$series"/*.mp4
	do
	#echo "$file"
	/home/soruly/index-anime.sh "$file"
	done
	fi
	done
	fi
	done
	fi
	else
	echo Input path is not anime
	fi
	else
	echo Input path is not directory
	fi
	#!/usr/bin/python

	import os, sys, getopt, collections
	from lxml import etree as ET
	from operator import itemgetter
	from collections import Counter

	def getkey(elem):
	return elem.findtext("field[@name='id']")

	def main(argv):
	path = ''
	filename = ''
	try:
	opts, args = getopt.getopt(argv,"hi:o:",["ifile=","ofile="])
	except getopt.GetoptError:
	print 'test.py -i <path> -o <filename>'
	sys.exit(2)
	for opt, arg in opts:
	if opt == '-h':
	print 'test.py -i <path> -o <filename>'
	sys.exit()
	elif opt in ("-i", "--ifile"):
	path = os.path.join(arg)
	elif opt in ("-o", "--ofile"):
	filename = arg
	#print 'Working path is ', path
	print 'Working file is ', filename

	tree = ET.parse(filename)
	root = tree.getroot()

	#root[:] = sorted(root, key=getkey)

	new_root = ET.Element("add")
	duplicated_doc_count = 0
	queue = collections.deque('',12)
	for doc in root.findall("doc"):
	if doc.find("field[@name='cl_hi']").text in queue:
	duplicated_doc_count += 1
	else:
	queue.append(doc.find("field[@name='cl_hi']").text)
	new_doc = ET.SubElement(new_root, "doc")
	ET.SubElement(new_doc, "field", name="id").text = doc.find("field[@name='id']").text
	ET.SubElement(new_doc, "field", name="cl_hi").text = doc.find("field[@name='cl_hi']").text
	ET.SubElement(new_doc, "field", name="cl_ha").text = doc.find("field[@name='cl_ha']").text

	#new_root[:] = sorted(new_root, key=getkey)
	new_tree = ET.ElementTree(new_root)
	new_tree.write(path+"tmp.xml", encoding="UTF-8", xml_declaration=True, pretty_print=True)

	if __name__ == "__main__":
	main(sys.argv[1:])
	#!/bin/bash

	find -L "/mnt/data/anime_new" \
	-type f \
	-name "*.mp4" \
	-mmin -1800 \
	-not \( \
	-path "/mnt/data/anime_new/HKTVBJ2/*" \
	-prune \
	\) \
	-not \( \
	-path "/mnt/data/anime_new/Others/*" \
	-prune \
	\) \
	-exec /home/soruly/index-anime.sh "{}" \;

	find -L "/mnt/data/anime_new/Others/Naruto Shippuuden" \
	-type f \
	-name "*.mp4" \
	-mmin -1800 \
	-exec /home/soruly/index-anime.sh "{}" \;

	find -L "/mnt/data/anime_new/Others/One Piece" \
	-type f \
	-name "*.mp4" \
	-mmin -1800 \
	-exec /home/soruly/index-anime.sh "{}" \;
	sudo su
	cd ~
	wget http://archive.apache.org/dist/lucene/solr/5.3.1/solr-5.3.1.zip
	unzip solr-5.3.1.zip
	cd solr-5.3.1/bin/
	./install_solr_service.sh ../../solr-5.3.1.zip
	./solr stop -p 8983
	systemctl start solr
	cp -rf /opt/solr/server/solr/* /var/solr/data/
	mkdir -p /var/solr/data/lire_core/config
	cp -rf /var/solr/data/configsets/basic_configs/conf/* /var/solr/data/lire_core/conf/
	cd /var/solr/data/lire_core/conf/
	vim solrconfig.xml
	vim schema.xml
	systemctl restart solr
	sudo mkdir -p /var/solr/data/lib
	sudo cp -rf dist/* /var/solr/data/lib
	chown -R solr:solr /var/solr
	#create new core named lire_core
	#Change heap memory at /var/solr/solr.in.sh
	# Install Solr as service
	wget http://archive.apache.org/dist/lucene/solr/6.6.0/solr-6.6.0.tgz
	tar xzf solr-6.6.0.tgz solr-6.6.0/bin/install_solr_service.sh --strip-components=2
	# The script install_solr_service.sh may detect the wrong version of your linux
	# Edit the script around line 68
	sudo bash ./install_solr_service.sh solr-6.6.0.tgz
	sudo systemctl start solr
	sudo -u solr /opt/solr/bin/solr create -c new_core

	# Setup project
	sudo dnf install java-1.8.0-openjdk-devel
	git clone [email protected]:soruly/liresolr.git
	cd liresolr \|\| exit
	./gradlew distForSolr

	# If gradle failed at org.apache.http.ssl.SSLInitializationException
	# /usr/lib/jvm/java-1.8.0-openjdk-1.8.0.131-1.b12.fc25.x86_64/jre/lib/security/cacerts (No such file or directory)
	# Link your system cacerts to the missing java lib
	# ln -s /etc/pki/java/cacerts /usr/lib/jvm/java-1.8.0-openjdk-1.8.0.131-1.b12.fc25.x86_64/jre/lib/security

	# Copy compiled plugin
	sudo cp ./dist/*.jar /opt/solr/server/solr-webapp/webapp/WEB-INF/lib/
	sudo systemctl restart solr

	# Setup database
	sudo -u solr vim /var/solr/data/new_core/conf/solrconfig.xml
	sudo -u solr vim /var/solr/data/new_core/conf/managed-schema

	# Delete everything in solr core
	curl http://localhost:8983/solr/new_core/update -H "Content-Type: text/xml" --data-binary "<delete><query>:</query></delete>"

	# Load xml files to solr core
	curl http://localhost:8983/solr/new_core/update -H "Content-Type: text/xml" --data-binary @outfile.xml

	# Commit changes
	curl http://localhost:8983/solr/new_core/update -H "Content-Type: text/xml" --data-binary "<commit/>"

	# Get random results
	http://localhost:8983/solr/new_core/lireq?indent=on&wt=json&rows=10