Created
December 10, 2012 13:11
-
-
Save madan712/4250486 to your computer and use it in GitHub Desktop.
Lucene - Updating index for an existing file. How to delete a documents from the index?
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* LuceneExample.java */ | |
import java.io.File; | |
import java.io.FileReader; | |
import java.io.Reader; | |
import org.apache.lucene.analysis.Analyzer; | |
import org.apache.lucene.analysis.standard.StandardAnalyzer; | |
import org.apache.lucene.document.Document; | |
import org.apache.lucene.document.Field; | |
import org.apache.lucene.index.IndexReader; | |
import org.apache.lucene.index.IndexWriter; | |
import org.apache.lucene.index.Term; | |
import org.apache.lucene.index.IndexWriter.MaxFieldLength; | |
import org.apache.lucene.queryParser.QueryParser; | |
import org.apache.lucene.search.IndexSearcher; | |
import org.apache.lucene.search.Query; | |
import org.apache.lucene.search.TopDocs; | |
import org.apache.lucene.store.Directory; | |
import org.apache.lucene.store.FSDirectory; | |
import org.apache.lucene.store.SimpleFSDirectory; | |
import org.apache.lucene.util.Version; | |
public class LuceneExample { | |
public static final String files = "C:/TestLucene/files"; | |
public static final String index = "C:/TestLucene/index"; | |
public static void createIndex() { | |
System.out.println("Creating index...."); | |
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30); | |
try { | |
// Store the index in file | |
Directory directory = new SimpleFSDirectory(new File(index)); | |
IndexWriter iwriter = new IndexWriter(directory, analyzer, true, | |
MaxFieldLength.UNLIMITED); | |
File dir = new File(files); | |
File[] files = dir.listFiles(); | |
int i = 1; | |
for (File file : files) { | |
System.out.println(file.getPath()); | |
Document doc = new Document(); | |
doc.add(new Field("id",""+i,Field.Store.YES,Field.Index.ANALYZED)); | |
doc.add(new Field("path", file.getPath(), Field.Store.YES, | |
Field.Index.ANALYZED)); | |
Reader reader = new FileReader(file.getCanonicalPath()); | |
doc.add(new Field("contents", reader)); | |
iwriter.addDocument(doc); | |
i++; | |
} | |
iwriter.optimize(); | |
iwriter.close(); | |
} catch (Exception e) { | |
e.printStackTrace(); | |
} | |
} | |
public static void searchIndex(String searchString) { | |
System.out.println("Searching.... '" + searchString + "'"); | |
try { | |
IndexReader reader = IndexReader.open(FSDirectory.open(new File( | |
index)), true); | |
IndexSearcher searcher = new IndexSearcher(reader); | |
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);// construct our usual analyzer | |
QueryParser qp = new QueryParser(Version.LUCENE_30, "contents", | |
analyzer); | |
Query query = qp.parse(searchString); // parse the query and construct the Query object | |
TopDocs hits = searcher.search(query, 100); // run the query | |
if (hits.totalHits == 0) { | |
System.out.println("No data found."); | |
} else { | |
for (int i = 0; i < hits.totalHits; i++) { | |
Document doc = searcher.doc(hits.scoreDocs[i].doc); // get the next document | |
String id = doc.get("id"); // get its id field | |
String url = doc.get("path"); // get its path field | |
System.out.println("Found in :: "+ id +" "+ url); | |
} | |
} | |
} catch (Exception e) { | |
e.printStackTrace(); | |
} | |
} | |
public static void updateIndex(File newFile) { | |
System.out.println("Updating index...."); | |
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30); | |
try { | |
// Store the index in file | |
Directory directory = new SimpleFSDirectory(new File(index)); | |
IndexWriter iwriter = new IndexWriter(directory, analyzer, false, | |
MaxFieldLength.UNLIMITED); | |
System.out.println(newFile.getPath()); | |
Document doc = new Document(); | |
//get max id | |
IndexReader iReader = IndexReader.open(FSDirectory.open(new File(index)), true); | |
int i = iReader.maxDoc(); | |
i++; | |
doc.add(new Field("id",""+i,Field.Store.YES,Field.Index.ANALYZED)); | |
doc.add(new Field("path", newFile.getPath(), Field.Store.YES, | |
Field.Index.ANALYZED)); | |
Reader reader = new FileReader(newFile.getCanonicalPath()); | |
doc.add(new Field("contents", reader)); | |
iwriter.addDocument(doc); | |
iwriter.optimize(); | |
iwriter.close(); | |
} catch (Exception e) { | |
e.printStackTrace(); | |
} | |
} | |
public static void deleteIndex(String id) { | |
System.out.println("Deleting index...."+id); | |
try { | |
Term term = new Term("id", id); | |
Directory directory = FSDirectory.open(new File(index)); | |
IndexReader indexReader = IndexReader.open(directory, false); | |
indexReader.deleteDocuments(term); | |
indexReader.flush(); | |
indexReader.close(); | |
} | |
catch (Exception e) { | |
e.printStackTrace(); | |
} | |
} | |
public static void main(String[] args) { | |
createIndex(); | |
searchIndex("Object"); | |
updateIndex(new File("C:/TestLucene/newFiles/PHP.txt")); | |
searchIndex("Object"); | |
deleteIndex("2");//id to be deleted | |
searchIndex("Object"); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hello. I use Lucene .Net version 2.9.4.1 in ASP.Net MVC 5. But I can't delete document from indexer. I Use WhitespaceAnalyzer for indexer. For add field to lucene document I use this code
var luceneDoc = new Document();
var fieldId = new NumericField("Id", Field.Store.YES, true);
fieldId.SetIntValue(2211);
luceneDoc.Add(fieldId);
Than I add document to indexer
private readonly Analyzer _whitespaceAnalyzer=new WhitespaceAnalyzer();
using (FSDirectory indexDirectory = FSDirectory.Open(new DirectoryInfo(indexLocationAbsolute)))
using (IndexWriter writer = new IndexWriter(indexDirectory, _whitespaceAnalyzer, IndexWriter.MaxFieldLength.UNLIMITED))
{
writer.AddDocument(document);
writer.Flush(false, true, true);
}
It's work good. I can search this document by "Id" for this query:
var query = NumericRangeQuery.NewIntRange("Id", id, id, true, true);
But I cant delete this document from indexer.
private readonly Analyzer _whitespaceAnalyzer=new WhitespaceAnalyzer();
using (FSDirectory indexDirectory = FSDirectory.Open(new DirectoryInfo(indexLocationAbsolute)))
using (IndexWriter writer = new IndexWriter(indexDirectory, _whitespaceAnalyzer, IndexWriter.MaxFieldLength.UNLIMITED))
{
var query = NumericRangeQuery.NewIntRange("Id", id, id, true, true);
writer.DeleteDocuments(query);
writer.Flush(false, true, true);
}
What a problem? Thanks!