Created
June 14, 2015 12:03
-
-
Save gilinachum/a297b743b3b05bb69888 to your computer and use it in GitHub Desktop.
Cost of fields in Lucene index
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.File; | |
import java.io.IOException; | |
import java.lang.management.ManagementFactory; | |
import java.util.Random; | |
import java.util.UUID; | |
import org.apache.lucene.analysis.Analyzer; | |
import org.apache.lucene.analysis.WhitespaceAnalyzer; | |
import org.apache.lucene.document.Document; | |
import org.apache.lucene.document.Field; | |
import org.apache.lucene.document.Field.Index; | |
import org.apache.lucene.document.Field.Store; | |
import org.apache.lucene.index.CorruptIndexException; | |
import org.apache.lucene.index.IndexReader; | |
import org.apache.lucene.index.IndexWriter; | |
import org.apache.lucene.queryParser.QueryParser; | |
import org.apache.lucene.search.IndexSearcher; | |
import org.apache.lucene.search.Query; | |
import org.apache.lucene.search.ScoreDoc; | |
import org.apache.lucene.store.FSDirectory; | |
import org.apache.lucene.store.LockObtainFailedException; | |
import org.apache.lucene.util.Version; | |
public class FieldsIndexingMemTest { | |
private static IndexReader ireader; | |
private static IndexSearcher isearcher; | |
private static FSDirectory directory; | |
private static Analyzer analyzer; | |
private static QueryParser parser; | |
private static IndexWriter iwriter; | |
private static Random random = new Random(); | |
private enum Mode { | |
fewFields, manyFields | |
}; | |
private static Mode mode = Mode.fewFields; | |
// private static Mode mode = Mode.manyFields; | |
/** | |
* @param args | |
*/ | |
@SuppressWarnings("deprecation") | |
public static void main(String[] args) throws Exception { | |
System.out.println(ManagementFactory.getRuntimeMXBean().getName()); | |
System.out.println("mode=" + mode); | |
long before = System.currentTimeMillis(); | |
printOutMemory("Before starting"); | |
File indexFolder = new File("C:\\temp\\index\\" + mode.toString()); | |
directory = FSDirectory.open(new File("C:\\temp\\index\\" + mode.toString())); | |
analyzer = new WhitespaceAnalyzer(); | |
parser = new QueryParser(Version.LUCENE_CURRENT, "content", analyzer); | |
if (indexFolder.exists()) { | |
openWriterOverExistingIndex(); | |
} else { | |
createNewIndex(); | |
} | |
printOutMemory("Before opening reader+searcher+running dummy query"); | |
ireader = IndexReader.open(directory); | |
isearcher = new IndexSearcher(ireader); | |
Query query = parser.parse("name:a*"); | |
System.out.println(query.rewrite(ireader)); | |
ScoreDoc[] hits = isearcher.search(query, null, 1000000).scoreDocs; | |
printOutMemory("After opening reader+searcher+running dummy query"); | |
printOutMemory("Before closing Lucene objects"); | |
System.out.println("Hit enter key to continue..."); | |
System.in.read(); | |
ireader.close(); | |
iwriter.close(); | |
directory.close(); | |
System.out.println(); | |
System.out.println("Done. Runtime duration=" + (System.currentTimeMillis() - before) + "ms"); | |
printOutMemory("After closing Lucene objects"); | |
} | |
private static void printOutMemory(String prefixMessage) { | |
Runtime runtime = Runtime.getRuntime(); | |
long beforeUsedMemory = runtime.totalMemory() - runtime.freeMemory(); | |
for (int i = 0; i < 10; i++) { | |
System.gc(); | |
} | |
try { | |
Thread.sleep(500); | |
} catch (InterruptedException e) { | |
e.printStackTrace(); | |
} | |
for (int i = 0; i < 10; i++) { | |
System.gc(); | |
} | |
long afterUsedMemory = runtime.totalMemory() - runtime.freeMemory(); | |
System.out.println(prefixMessage + " - Used memory=" + (afterUsedMemory / 1024 / 1024) + "MB (beforeUsedMemory=" + beforeUsedMemory / 1024 / 1024 | |
+ "MB)"); | |
} | |
private static void openWriterOverExistingIndex() throws CorruptIndexException, LockObtainFailedException, IOException { | |
System.out.println("Opening existing index"); | |
long before = System.currentTimeMillis(); | |
iwriter = new IndexWriter(directory, analyzer, false, IndexWriter.MaxFieldLength.UNLIMITED); | |
System.out.println("open existing index duration=" + (System.currentTimeMillis() - before) + "ms"); | |
} | |
private static void createNewIndex() throws CorruptIndexException, LockObtainFailedException, IOException { | |
System.out.println("Creating index from scratch"); | |
iwriter = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); | |
int numUniqueTerms = 100 * 1000; | |
String[] uniqueTerms = new String[numUniqueTerms]; | |
for (int i = 0; i < uniqueTerms.length; i++) { | |
uniqueTerms[i] = String.valueOf(i); | |
} | |
int numUniqueFieldNames = 1 * 1000 * 1000; | |
String[] uniqueFieldNames = new String[numUniqueFieldNames]; | |
for (int i = 0; i < uniqueFieldNames.length; i++) { | |
uniqueFieldNames[i] = "community_tag_" + UUID.randomUUID().toString(); | |
} | |
int numOfDocs = 100 * 1000; | |
for (int i = 0; i < numOfDocs; i++) { | |
addNewDocument(numUniqueTerms, uniqueTerms, numUniqueFieldNames, uniqueFieldNames); | |
if (i % 1000 == 0) { | |
System.out.println("Progress: " + (100 * i / numOfDocs) + "% (wrote " + i + " documents)"); | |
} | |
} | |
// release mem | |
uniqueTerms = null; | |
uniqueFieldNames = null; | |
printOutMemory("before commit()"); | |
iwriter.commit(); | |
printOutMemory("after commit()"); | |
} | |
private static void addNewDocument(int numUniqueTerms, String[] uniqueTerms, int numUniqueFieldNames, String[] uniqueFieldNames) | |
throws CorruptIndexException, IOException { | |
Document doc = new Document(); | |
for (int j = 0; j < 10; j++) { | |
String fieldName = (mode == Mode.fewFields) ? ("community_tag_" + j) : uniqueFieldNames[random.nextInt(numUniqueFieldNames)]; | |
String fieldValue = getFieldValue(numUniqueTerms, uniqueTerms); | |
doc.add(new Field(fieldName, fieldValue, Store.YES, Index.NOT_ANALYZED_NO_NORMS)); | |
} | |
iwriter.addDocument(doc); | |
} | |
private static String getFieldValue(int numUniqueTerms, String[] uniqueTerms) { | |
int termsInField = random.nextInt(10); | |
StringBuilder sb = new StringBuilder(); | |
for (int w = 0; w < termsInField; w++) { | |
sb.append(uniqueTerms[random.nextInt(numUniqueTerms)]).append(" "); | |
} | |
String fieldValue = sb.toString(); | |
return fieldValue; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment