Created
October 8, 2012 16:59
-
-
Save maxpert/3853598 to your computer and use it in GitHub Desktop.
Lucene redis example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package mxp; | |
import java.io.IOException; | |
import org.apache.lucene.analysis.Analyzer; | |
import org.apache.lucene.analysis.standard.StandardAnalyzer; | |
import org.apache.lucene.document.Document; | |
import org.apache.lucene.document.Field; | |
import org.apache.lucene.document.NumericField; | |
import org.apache.lucene.index.CorruptIndexException; | |
import org.apache.lucene.index.IndexWriter; | |
import org.apache.lucene.index.IndexWriterConfig; | |
import org.apache.lucene.store.Directory; | |
import org.apache.lucene.store.LockObtainFailedException; | |
import org.apache.lucene.util.Version; | |
public class DumpIndexWriter { | |
private Directory dir; | |
private IndexWriter writer; | |
public DumpIndexWriter(Directory d) { | |
dir = d; | |
} | |
public void open() throws CorruptIndexException, LockObtainFailedException, IOException { | |
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36); | |
IndexWriterConfig writerConfig = new IndexWriterConfig( | |
Version.LUCENE_36, analyzer); | |
writer = new IndexWriter(dir, writerConfig); | |
} | |
public boolean parseAndIndex(String line){ | |
//3281929|MacGyver.S01E02.DVDRip.XviD-MEDiEVAL|376764032|1|0|6e43d748d9446e3cddec69ce9b2ababb51bbf827 | |
String[] fields = line.split("\\|"); | |
if( fields.length < 6 ) return false; | |
Document doc = new Document(); | |
System.out.printf("%s -> %s \n", fields[1], fields[5]); | |
doc.add(new Field("title", fields[1], Field.Store.YES, Field.Index.ANALYZED)); | |
doc.add(new Field("link", fields[5], Field.Store.YES, Field.Index.NOT_ANALYZED)); | |
try { | |
writer.addDocument(doc); | |
} catch (Exception e) { | |
e.printStackTrace(); | |
return false; | |
} | |
return true; | |
} | |
public void close() throws CorruptIndexException, IOException { | |
writer.close(); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package mxp; | |
import java.io.BufferedReader; | |
import java.io.DataInputStream; | |
import java.io.File; | |
import java.io.FileInputStream; | |
import java.io.FileNotFoundException; | |
import java.io.IOException; | |
import java.io.InputStreamReader; | |
import java.util.ArrayList; | |
import java.util.List; | |
import java.util.zip.CRC32; | |
import mxp.lucene.store.RedisDirectory; | |
import mxp.lucene.store.RedisFile; | |
import org.apache.commons.pool.impl.GenericObjectPool; | |
import org.apache.lucene.analysis.Analyzer; | |
import org.apache.lucene.analysis.standard.StandardAnalyzer; | |
import org.apache.lucene.document.Document; | |
import org.apache.lucene.document.Field; | |
import org.apache.lucene.index.CorruptIndexException; | |
import org.apache.lucene.index.IndexReader; | |
import org.apache.lucene.index.IndexWriter; | |
import org.apache.lucene.index.IndexWriterConfig; | |
import org.apache.lucene.queryParser.ParseException; | |
import org.apache.lucene.queryParser.QueryParser; | |
import org.apache.lucene.search.IndexSearcher; | |
import org.apache.lucene.search.Query; | |
import org.apache.lucene.search.ScoreDoc; | |
import org.apache.lucene.search.TopScoreDocCollector; | |
import org.apache.lucene.store.Directory; | |
import org.apache.lucene.store.FSDirectory; | |
import org.apache.lucene.store.LockObtainFailedException; | |
import org.apache.lucene.store.RAMDirectory; | |
import org.apache.lucene.util.Version; | |
import redis.clients.jedis.JedisShardInfo; | |
import redis.clients.jedis.ShardedJedisPool; | |
public class Main { | |
/** | |
* @param args | |
* @throws IOException | |
* @throws CorruptIndexException | |
* @throws ParseException | |
* @throws InterruptedException | |
*/ | |
public static void main(String[] args) throws CorruptIndexException, | |
IOException, ParseException, InterruptedException { | |
List<JedisShardInfo> shards = new ArrayList<JedisShardInfo>(); | |
JedisShardInfo si = new JedisShardInfo("localhost", 6379); | |
JedisShardInfo si2 = new JedisShardInfo("localhost", 6389); | |
JedisShardInfo si3 = new JedisShardInfo("localhost", 6399); | |
shards.add(si); | |
shards.add(si2); | |
shards.add(si3); | |
ShardedJedisPool pool = new ShardedJedisPool(new GenericObjectPool.Config(), shards); | |
RedisDirectory redisDir = new RedisDirectory("piratebay", pool); | |
FSDirectory fsDir = FSDirectory.open(new File("/Users/maxpert/labs/pbindex")); | |
long start = System.currentTimeMillis(); | |
System.out.println("Indexing in Redis..."); | |
DumpPirateBay(redisDir); | |
System.out.printf("Redis indexing took %d...", System.currentTimeMillis() - start); | |
Thread.sleep(5000); | |
//Test searching ;) | |
search( redisDir, "title", "batman" ); | |
pool.destroy(); | |
} | |
private static void DumpPirateBay(Directory redisDir) | |
throws CorruptIndexException, LockObtainFailedException, IOException | |
{ | |
DumpIndexWriter writer = new DumpIndexWriter(redisDir); | |
FileInputStream fstream = new FileInputStream("/Users/maxpert/labs/complete"); | |
DataInputStream in = new DataInputStream(fstream); | |
BufferedReader br = new BufferedReader(new InputStreamReader(in)); | |
String strLine; | |
writer.open(); | |
while ((strLine = br.readLine()) != null) { | |
writer.parseAndIndex(strLine); | |
} | |
writer.close(); | |
in.close(); | |
} | |
private static void search(Directory index, String field, String query) | |
throws ParseException, CorruptIndexException, IOException | |
{ | |
int hitsPerPage = 10; | |
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36); | |
QueryParser qp = new QueryParser(Version.LUCENE_36, field, analyzer); | |
Query q = qp.parse(query); | |
IndexReader reader = IndexReader.open(index); | |
IndexSearcher searcher = new IndexSearcher(reader); | |
TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); | |
searcher.search(q, collector); | |
ScoreDoc[] hits = collector.topDocs().scoreDocs; | |
System.out.println("Found " + hits.length + " hits."); | |
for(int i=0;i<hits.length;++i) { | |
int docId = hits[i].doc; | |
Document d = searcher.doc(docId); | |
System.out.println((i + 1) + ". " + d.get("title")); | |
} | |
searcher.close(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment