Skip to content

Instantly share code, notes, and snippets.

@marcelcaraciolo
Created September 15, 2011 21:47
Show Gist options
  • Save marcelcaraciolo/1220584 to your computer and use it in GitHub Desktop.
Save marcelcaraciolo/1220584 to your computer and use it in GitHub Desktop.
bm25f
# BM25F Model
def bm25(idf, tf, fl, avgfl, B, K1):
# idf - inverse document frequency
# tf - term frequency in the current document
# fl - field length in the current document
# avgfl - average field length across documents in collection
# B, K1 - free paramters
return idf * ((tf * (K1 + 1)) / (tf + K1 * (1 - B + B * (fl / avgfl))))
class BM25F(WeightingModel):
"""Implements the BM25F scoring algorithm.
"""
def __init__(self, B=0.75, K1=1.2, **kwargs):
"""
>>> from whoosh import scoring
>>> # Set a custom B value for the "content" field
>>> w = scoring.BM25F(B=0.75, content_B=1.0, K1=1.5)
:param B: free parameter, see the BM25 literature. Keyword arguments of
the form ``fieldname_B`` (for example, ``body_B``) set field-
specific values for B.
:param K1: free parameter, see the BM25 literature.
"""
self.B = B
self.K1 = K1
self._field_B = {}
for k, v in iteritems(kwargs):
if k.endswith("_B"):
fieldname = k[:-2]
self._field_B[fieldname] = v
def supports_block_quality(self):
return True
def scorer(self, searcher, fieldname, text, qf=1):
if not searcher.schema[fieldname].scorable:
return WeightScorer.for_(searcher, fieldname, text)
if fieldname in self._field_B:
B = self._field_B[fieldname]
else:
B = self.B
return BM25FScorer(searcher, fieldname, text, B, self.K1, qf=qf)
class BM25FScorer(WeightLengthScorer):
def __init__(self, searcher, fieldname, text, B, K1, qf=1):
# IDF and average field length are global statistics, so get them from
# the top-level searcher
parent = searcher.get_parent() # Returns self if no parent
self.idf = parent.idf(fieldname, text)
self.avgfl = parent.avg_field_length(fieldname) or 1
self.B = B
self.K1 = K1
self.qf = qf
self.setup(searcher, fieldname, text)
def _score(self, weight, length):
s = bm25(self.idf, weight, length, self.avgfl, self.B, self.K1)
return s
@amirouche
Copy link

Thanks!

@amirouche
Copy link

How do you use boost value on fields?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment