Last active
August 29, 2015 14:00
-
-
Save remeniuk/11180836 to your computer and use it in GitHub Desktop.
Lucene memory matcher
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.lucene.index.memory.MemoryIndex | |
import org.joda.time.format.{DateTimeFormat, ISODateTimeFormat} | |
import java.util.Date | |
import org.apache.lucene.analysis.miscellaneous.PatternAnalyzer | |
trait LuceneUserIndex extends UserLuceneAnalyzers { | |
self: User => | |
@transient lazy val memoryIndex = { | |
val memoryIndex = new MemoryIndex | |
for { | |
customField <- customFields if !customField.value.isEmpty | |
} memoryIndex.addField(customField.id, customField.value.mkString(" "), defaultAnalyzer) | |
memoryIndex.addField(f_id, user_id, defaultAnalyzer) | |
memoryIndex.addField(f_api_key, api_key, defaultAnalyzer) | |
memoryIndex.addField(f_login_type, login_type.toString, defaultAnalyzer) | |
memoryIndex.addField(f_application_user_id, application_user_id, defaultAnalyzer) | |
registration_date.map(d => memoryIndex.addField(f_registration_date, LuceneMarshaller.formatDate(d), defaultAnalyzer)) | |
memoryIndex.addField(f_events, events, eventStringAnalyzer) | |
memoryIndex.addField(f_platforms, platforms.mkString(" "), defaultAnalyzer) | |
import profile._ | |
nick_name.map(memoryIndex.addField(f_nick_name, _, defaultAnalyzer)) | |
first_name.map(memoryIndex.addField(f_first_name, _, defaultAnalyzer)) | |
last_name.map(memoryIndex.addField(f_last_name, _, defaultAnalyzer)) | |
source.map(memoryIndex.addField(f_source, _, defaultAnalyzer)) | |
email.map(memoryIndex.addField(f_email, _, defaultAnalyzer)) | |
birth_day.map(d => memoryIndex.addField(f_birth_day, LuceneMarshaller.formatDate(d), defaultAnalyzer)) | |
gender.map(g => memoryIndex.addField(f_gender, g.toString, defaultAnalyzer)) | |
lang_code.map(memoryIndex.addField(f_lang_code, _, defaultAnalyzer)) | |
country_code.map(memoryIndex.addField(f_country, _, defaultAnalyzer)) | |
purchaseStats.stats_first_deposit_date.map(d => memoryIndex.addField(stats_first_deposit_date.fqn, LuceneMarshaller.formatDate(d), defaultAnalyzer)) | |
purchaseStats.stats_last_deposit_date.map(d => memoryIndex.addField(stats_last_deposit_date.fqn, LuceneMarshaller.formatDate(d), defaultAnalyzer)) | |
purchaseStats.stats_total_purchase_amount.map(pa => memoryIndex.addField(stats_total_purchase_amount.fqn, pa.toString, defaultAnalyzer)) | |
purchaseStats.stats_purchases_count.map(pc => memoryIndex.addField(stats_purchase_count.fqn, pc.toString, defaultAnalyzer)) | |
onlineStats.stats_last_login_date.map(d => memoryIndex.addField(stats_last_login_date.fqn, LuceneMarshaller.formatDate(d), defaultAnalyzer)) | |
onlineStats.stats_logins_count.map(l => memoryIndex.addField(stats_logins_count.fqn, l.toString, defaultAnalyzer)) | |
onlineStats.stats_total_session_time_millis.map(t => memoryIndex.addField(stats_total_session_time_millis.fqn, t.toString, defaultAnalyzer)) | |
memoryIndex | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def marshal(filter: Query): lucene.search.Query = { | |
filter match { | |
case f: and => | |
val parentQuery = new BooleanQuery | |
f.filters.foreach { | |
case subFilter: neql => | |
parentQuery.add(new BooleanClause(new MatchAllDocsQuery(), BooleanClause.Occur.MUST)) | |
parentQuery.add(subFilter.toLuceneQuery, BooleanClause.Occur.MUST_NOT) | |
case subFilter => | |
Option(marshal(subFilter)) | |
.foreach(parentQuery.add(_, BooleanClause.Occur.MUST)) | |
} | |
parentQuery | |
case f: or => f.filters.map(marshal) | |
val parentQuery = new BooleanQuery | |
f.filters.foreach(subFilter => parentQuery.add(marshal(subFilter), BooleanClause.Occur.SHOULD)) | |
parentQuery | |
case f: in => | |
val subQuery = new BooleanQuery | |
f.value.asInstanceOf[Iterable[Any]].foreach { | |
value => | |
subQuery.add( | |
new TermQuery(new Term(s"${f.column.fqn}", value.toString)), | |
BooleanClause.Occur.MUST | |
) | |
} | |
subQuery | |
case p: pattern => | |
val parentQuery = new SpanOrQuery | |
p.value.foreach { | |
sequence => | |
parentQuery.addClause(new SpanNearQuery( | |
sequence.map { | |
regexTerm => new SpanMultiTermQueryWrapper(new RegexpQuery(new Term(p.column.fqn, regexTerm))) | |
} toArray, MAX_SLOP, true | |
)) | |
} | |
parentQuery | |
case other => other.toLuceneQuery | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
trait UserLuceneAnalyzers { | |
protected def defaultAnalyzer = | |
new PatternAnalyzer(Version.LUCENE_45, PatternAnalyzer.WHITESPACE_PATTERN, false, null) | |
protected def eventStringAnalyzer = { | |
val builder = new NormalizeCharMap.Builder(); | |
builder.add(":", "_"); | |
val mapping = builder.build | |
new Analyzer { | |
def createComponents(fieldName: String, reader: Reader): TokenStreamComponents = { | |
val source = new StandardTokenizer(Version.LUCENE_45, | |
new MappingCharFilter(mapping, reader)) | |
val filter = new PatternCaptureGroupTokenFilter(source, | |
true, Pattern.compile("(([0-9a-z]{2})(?:[0-9a-z]{1,})?)_[0-9]+")) | |
return new TokenStreamComponents(source, filter) | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment