Skip to content

Instantly share code, notes, and snippets.

@alenegro81
Created March 20, 2021 10:23
Show Gist options
  • Save alenegro81/108986771fb5e924818626ade07cd462 to your computer and use it in GitHub Desktop.
Save alenegro81/108986771fb5e924818626ade07cd462 to your computer and use it in GitHub Desktop.
A piece of code for evaluating the embedding
def evaluate(self):
with self._driver.session(database=self.get_database()) as session:
query = """
MATCH (node:DrkgNode)
WITH node, rand() as rand
order by rand
LIMIT 10000
RETURN coalesce(node.symbol, node.id) as nodeId, node.embeddingVectorFastRP as embedding, labels(node)[1] as category
"""
result = session.run(query)
df = pd.DataFrame([dict(record) for record in result])
train, test = train_test_split(df, test_size=0.2)
X = train.embedding.values.tolist()
y = train.category.values.tolist()
scaler = StandardScaler().fit(X)
X_std = scaler.transform(X)
clf = LogisticRegression(random_state=0, solver='liblinear', multi_class='ovr', max_iter=1000)
model = clf.fit(X_std, y)
X_test = test.embedding.values.tolist()
y_test = test.category.values.tolist()
X_test_std = scaler.transform(X_test)
prediction = model.predict(X_test_std)
gold = y_test
print(list(prediction))
print(gold)
weighted = precision_recall_fscore_support(gold, prediction, average='weighted')
print(weighted[2])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment