Skip to content

Instantly share code, notes, and snippets.

View martinthenext's full-sized avatar

Martin martinthenext

View GitHub Profile
docs = [
'''About us. We deliver Artificial Intelligence & Machine Learning
solutions to solve business challenges.''',
'''Contact information. Email [martin davtyan at filament dot ai]
if you have any questions''',
'''Filament Chat. A framework for building and maintaining a scalable
chatbot capability''',
]
scorer.feature_weights = [0.6, 0.4]
scorer.score(query)
array([0.18856181, 0.23136585, 0.15411592])
docs[scorer.score(query).argmax()]
'Contact information. Email [martin davtyan at filament dot ai] if you have any questions'
scorer.learn_feedback(feedback)
scorer.score(query)
array([0.94280904, 0.69987944, 0.25685987])
docs[scorer.score(query).argmax()]
'About us. We deliver Artificial Intelligence & Machine Learning solutions to solve business challenges.'
docs[scorer.score(query).argmax()]
'Filament Chat. A framework for building and maintaining a scalable chatbot capability'
scorer = Scorer(docs)
query
'who is making chatbots information'
scorer.score(query)
array([0. , 0.22847492, 0.25685987])
class Scorer():
""" Scores documents for a search query based on tf-idf
similarity and relevance feedback
"""
def __init__(self, docs):
""" Initialize a scorer with a collection of documents, fit a
vectorizer and list feature functions
"""
nn_similarity = np.max(similarity)
pos_feedback_feature = [nn_similarity * pos_feedback_proportions.get(idx, 0.)
for idx, _ in enumerate(docs)]
pos_feedback_feature
[0.4714045207910317, 0.23570226039551584, 0.0]
from collections import Counter
counts = Counter(pos_feedback_doc_idx)
counts
Counter({0: 2, 1: 1})
pos_feedback_proportions = {
doc_idx: count / sum(counts.values()) for doc_idx, count in counts.items()
}
pos_feedback_doc_idx = [idx for idx, feedback_value
in feedback[feedback_queries[max_idx]]
if feedback_value == 1.]
pos_feedback_doc_idx
[0, 1, 0]
import numpy as np
query = 'who is making chatbots information'
feedback_queries = list(feedback.keys())
similarity = cosine_similarity(vectorizer.transform([query]),
vectorizer.transform(feedback_queries))
similarity
array([[0.70710678, 0. ]])