Created
December 1, 2019 12:32
-
-
Save mocobeta/a5b18506ebc933c0afa7ab61d1dd2295 to your computer and use it in GitHub Desktop.
POC example for approximate knn vector search example (LUCENE-9004)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.IOException; | |
import java.nio.file.FileVisitResult; | |
import java.nio.file.Files; | |
import java.nio.file.Path; | |
import java.nio.file.Paths; | |
import java.nio.file.SimpleFileVisitor; | |
import java.nio.file.attribute.BasicFileAttributes; | |
import java.util.Arrays; | |
import java.util.Random; | |
import org.apache.lucene.index.DirectoryReader; | |
import org.apache.lucene.index.IndexReader; | |
import org.apache.lucene.index.IndexWriter; | |
import org.apache.lucene.index.IndexWriterConfig; | |
import org.apache.lucene.index.VectorValues; | |
import org.apache.lucene.search.IndexSearcher; | |
import org.apache.lucene.search.KnnGraphQuery; | |
import org.apache.lucene.search.ScoreDoc; | |
import org.apache.lucene.search.TopDocs; | |
import org.apache.lucene.store.Directory; | |
import org.apache.lucene.store.FSDirectory; | |
/** See also https://github.com/mocobeta/lucene-solr-mirror/tree/jira/LUCENE-9004-aknn-2 */ | |
public class VectorFieldTest { | |
public static void main(String[] args) { | |
String indexDir = "/tmp/vector-field"; | |
try { | |
//cleanUp(indexDir); | |
Directory dir = FSDirectory.open(Paths.get(indexDir)); | |
IndexWriterConfig config = new IndexWriterConfig(); | |
config.setUseCompoundFile(false); | |
IndexWriter writer = new IndexWriter(dir, config); | |
int numDimensions = 100; // the number of dimensions of vector values | |
int numDocs = 100000; // the number of docs | |
// indexing vectors | |
long _start = System.currentTimeMillis(); | |
for (int i = 0; i < numDocs; i++) { | |
Document doc = new Document(); | |
// add a vector field (with randomly generated vector value) | |
// here, the Manhattan distance is used for similarity calculation | |
doc.add(new VectorField("vector", generateRandomVector(numDimensions), VectorValues.DistanceFunction.MANHATTAN)); | |
writer.addDocument(doc); | |
} | |
long _end = System.currentTimeMillis(); | |
System.out.println(numDocs + " docs were written. Num dims=" + numDimensions + ", Elapsed=" + (_end - _start) + " msec, RamBytesUsed=" + writer.ramBytesUsed()); | |
writer.close(); | |
// searching vectors | |
IndexReader reader = DirectoryReader.open(dir); | |
IndexSearcher searcher = new IndexSearcher(reader); | |
// query vector; this must have the same dimensions to indexed vectors | |
float[] queryVector = generateRandomVector(numDimensions); | |
// Knn graph query | |
KnnGraphQuery query = new KnnGraphQuery("vector", queryVector, KnnGraphQuery.DEFAULT_EF, reader); | |
System.out.println("Query: " + Arrays.toString(queryVector) + "\n RamBytesUsed=" + query.ramBytesUsed()); | |
long _start2 = System.currentTimeMillis(); | |
// executes the query and collects top 5 results (same as ordinary Lucene query) | |
TopDocs hits = searcher.search(query, 5); | |
long _end2 = System.currentTimeMillis(); | |
System.out.println("Total hits: " + hits.totalHits + " (elapsed: " + (_end2 - _start2) + " msec)"); | |
int rank = 0; | |
// show result documents with scores | |
for (ScoreDoc sd : hits.scoreDocs) { | |
System.out.println("Rank " + ++rank + ": doc=" + sd.doc + " score=" + sd.score); | |
} | |
} catch (Throwable e) { | |
e.printStackTrace(); | |
} | |
} | |
private static void cleanUp(String dir) throws IOException { | |
Path path = Paths.get(dir); | |
if (Files.exists(path)) { | |
Files.walkFileTree(path, new SimpleFileVisitor<Path>() { | |
@Override | |
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { | |
Files.delete(file); | |
return FileVisitResult.CONTINUE; | |
} | |
@Override | |
public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException { | |
Files.delete(dir); | |
return FileVisitResult.CONTINUE; | |
} | |
}); | |
} | |
} | |
static final Random random = new Random(System.currentTimeMillis()); | |
private static float[] generateRandomVector(int numDims) { | |
float[] vector = new float[numDims]; | |
for (int i = 0; i < numDims; i++) { | |
vector[i] = random.nextFloat(); | |
} | |
return vector; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment