anvie · June 8, 2012 15:40
diff --git a/AgeAndContentScoreQueryTest.java b/AgeAndContentScoreQueryTest.java
 package tests;

  import org.apache.lucene.analysis.Analyzer;
  import org.apache.lucene.analysis.WhitespaceAnalyzer;
  import org.apache.lucene.document.Document;
  import org.apache.lucene.document.Field;
  import org.apache.lucene.document.NumericField;
  import org.apache.lucene.index.IndexWriter;
  import org.apache.lucene.queryParser.QueryParser;
  import org.apache.lucene.search.IndexSearcher;
  import org.apache.lucene.search.Query;
  import org.apache.lucene.search.ScoreDoc;
  import org.apache.lucene.search.TopDocs;
  import org.apache.lucene.search.function.CustomScoreQuery;
  import org.apache.lucene.search.function.IntFieldSource;
  import org.apache.lucene.search.function.ValueSourceQuery;
  import org.apache.lucene.store.Directory;
  import org.apache.lucene.store.RAMDirectory;
  import org.apache.lucene.util.Version;

  import junit.framework.TestCase;

  public class AgeAndContentScoreQueryTest extends TestCase
  {
     public class AgeAndContentScoreQuery extends CustomScoreQuery
     {
        protected float peakX;
        protected float sigma;

        public AgeAndContentScoreQuery(Query subQuery, ValueSourceQuery valSrcQuery, float peakX, float sigma) {
           super(subQuery, valSrcQuery);
           this.setStrict(true); // do not normalize score values from ValueSourceQuery!
           this.peakX = peakX;   // age for which the age-relevance is best
           this.sigma = sigma;
        }

        @Override
        public float customScore(int doc, float subQueryScore, float valSrcScore){
           // subQueryScore is td-idf score from content query
           float contentScore = subQueryScore;

           // valSrcScore is a value of date-of-birth field, represented as a float
           // let's convert age value to gaussian-like age relevance score
           float x = (2011 - valSrcScore); // age
           float ageScore = (float) Math.exp(-Math.pow(x - peakX, 2) / 2*sigma*sigma);

           float finalScore = ageScore * contentScore;

           System.out.println("#contentScore: " + contentScore);
           System.out.println("#ageValue:     " + (int)valSrcScore);
           System.out.println("#ageScore:     " + ageScore);
           System.out.println("#finalScore:   " + finalScore);
           System.out.println("+++++++++++++++++");

           return finalScore;
        }
     }

     protected Directory directory;
     protected Analyzer analyzer = new WhitespaceAnalyzer();
     protected String fieldNameContent = "content";
     protected String fieldNameDOB = "dob";

     protected void setUp() throws Exception
     {
        directory = new RAMDirectory();
        analyzer = new WhitespaceAnalyzer();

        // indexed documents
        String[] contents = {"foo baz1", "foo baz2 baz3", "baz4"};
        int[] dobs = {1991, 1981, 1987}; // date of birth

        IndexWriter writer = new IndexWriter(directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED);
        for (int i = 0; i < contents.length; i++) 
        {
           Document doc = new Document();
           doc.add(new Field(fieldNameContent, contents[i], Field.Store.YES, Field.Index.ANALYZED)); // store & index
           doc.add(new NumericField(fieldNameDOB, Field.Store.YES, true).setIntValue(dobs[i]));      // store & index
           writer.addDocument(doc);
        }
        writer.close();
     }

     public void testSearch() throws Exception
     {
        String inputTextQuery = "foo bar";
        float peak = 27.0f;
        float sigma = 0.1f;

        QueryParser parser = new QueryParser(Version.LUCENE_30, fieldNameContent, analyzer);
        Query contentQuery = parser.parse(inputTextQuery);

        ValueSourceQuery dobQuery = new ValueSourceQuery( new IntFieldSource(fieldNameDOB) );
         // or: FieldScoreQuery dobQuery = new FieldScoreQuery(fieldNameDOB,Type.INT);

        CustomScoreQuery finalQuery = new AgeAndContentScoreQuery(contentQuery, dobQuery, peak, sigma);

        IndexSearcher searcher = new IndexSearcher(directory);
        TopDocs docs = searcher.search(finalQuery, 10);

        System.out.println("\nDocuments found:\n");
        for(ScoreDoc match : docs.scoreDocs)
        {
           Document d = searcher.doc(match.doc);
           System.out.println("CONTENT: " + d.get(fieldNameContent) );
           System.out.println("D.O.B.:  " + d.get(fieldNameDOB) );
           System.out.println("SCORE:   " + match.score );
           System.out.println("-----------------");
        }
     }
  }
	package tests;

	import org.apache.lucene.analysis.Analyzer;
	import org.apache.lucene.analysis.WhitespaceAnalyzer;
	import org.apache.lucene.document.Document;
	import org.apache.lucene.document.Field;
	import org.apache.lucene.document.NumericField;
	import org.apache.lucene.index.IndexWriter;
	import org.apache.lucene.queryParser.QueryParser;
	import org.apache.lucene.search.IndexSearcher;
	import org.apache.lucene.search.Query;
	import org.apache.lucene.search.ScoreDoc;
	import org.apache.lucene.search.TopDocs;
	import org.apache.lucene.search.function.CustomScoreQuery;
	import org.apache.lucene.search.function.IntFieldSource;
	import org.apache.lucene.search.function.ValueSourceQuery;
	import org.apache.lucene.store.Directory;
	import org.apache.lucene.store.RAMDirectory;
	import org.apache.lucene.util.Version;

	import junit.framework.TestCase;

	public class AgeAndContentScoreQueryTest extends TestCase
	{
	public class AgeAndContentScoreQuery extends CustomScoreQuery
	{
	protected float peakX;
	protected float sigma;

	public AgeAndContentScoreQuery(Query subQuery, ValueSourceQuery valSrcQuery, float peakX, float sigma) {
	super(subQuery, valSrcQuery);
	this.setStrict(true); // do not normalize score values from ValueSourceQuery!
	this.peakX = peakX; // age for which the age-relevance is best
	this.sigma = sigma;
	}

	@Override
	public float customScore(int doc, float subQueryScore, float valSrcScore){
	// subQueryScore is td-idf score from content query
	float contentScore = subQueryScore;

	// valSrcScore is a value of date-of-birth field, represented as a float
	// let's convert age value to gaussian-like age relevance score
	float x = (2011 - valSrcScore); // age
	float ageScore = (float) Math.exp(-Math.pow(x - peakX, 2) / 2sigmasigma);

	float finalScore = ageScore * contentScore;

	System.out.println("#contentScore: " + contentScore);
	System.out.println("#ageValue: " + (int)valSrcScore);
	System.out.println("#ageScore: " + ageScore);
	System.out.println("#finalScore: " + finalScore);
	System.out.println("+++++++++++++++++");

	return finalScore;
	}
	}

	protected Directory directory;
	protected Analyzer analyzer = new WhitespaceAnalyzer();
	protected String fieldNameContent = "content";
	protected String fieldNameDOB = "dob";

	protected void setUp() throws Exception
	{
	directory = new RAMDirectory();
	analyzer = new WhitespaceAnalyzer();

	// indexed documents
	String[] contents = {"foo baz1", "foo baz2 baz3", "baz4"};
	int[] dobs = {1991, 1981, 1987}; // date of birth

	IndexWriter writer = new IndexWriter(directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED);
	for (int i = 0; i < contents.length; i++)
	{
	Document doc = new Document();
	doc.add(new Field(fieldNameContent, contents[i], Field.Store.YES, Field.Index.ANALYZED)); // store & index
	doc.add(new NumericField(fieldNameDOB, Field.Store.YES, true).setIntValue(dobs[i])); // store & index
	writer.addDocument(doc);
	}
	writer.close();
	}

	public void testSearch() throws Exception
	{
	String inputTextQuery = "foo bar";
	float peak = 27.0f;
	float sigma = 0.1f;

	QueryParser parser = new QueryParser(Version.LUCENE_30, fieldNameContent, analyzer);
	Query contentQuery = parser.parse(inputTextQuery);

	ValueSourceQuery dobQuery = new ValueSourceQuery( new IntFieldSource(fieldNameDOB) );
	// or: FieldScoreQuery dobQuery = new FieldScoreQuery(fieldNameDOB,Type.INT);

	CustomScoreQuery finalQuery = new AgeAndContentScoreQuery(contentQuery, dobQuery, peak, sigma);

	IndexSearcher searcher = new IndexSearcher(directory);
	TopDocs docs = searcher.search(finalQuery, 10);

	System.out.println("\nDocuments found:\n");
	for(ScoreDoc match : docs.scoreDocs)
	{
	Document d = searcher.doc(match.doc);
	System.out.println("CONTENT: " + d.get(fieldNameContent) );
	System.out.println("D.O.B.: " + d.get(fieldNameDOB) );
	System.out.println("SCORE: " + match.score );
	System.out.println("-----------------");
	}
	}
	}