Skip to content

Instantly share code, notes, and snippets.

@andrea-dagostino
Last active March 27, 2023 11:59
Show Gist options
  • Save andrea-dagostino/38ba6640383b10942f565a2e2c2368a8 to your computer and use it in GitHub Desktop.
Save andrea-dagostino/38ba6640383b10942f565a2e2c2368a8 to your computer and use it in GitHub Desktop.
fuzzy_logic_tagging
def fuzzy_tagging(tags, articles):
"""
This function receives as input a list of predefined tags and the list of textual content to be tagged.
Returns a Pandas dataframe with the articles tagged
"""
results = []
# iterate through tags
for i, tag in enumerate(tags):
d = {}
ranking = process.extract(tag, articles, limit=4) # extract the tag, ranking the 4 articles most representative
for r in ranking:
d = {"tag": tag, "index": articles.index(r[0]), "confidence": r[1]}
results.append(d)
# organize everything in a pandas dataframe
raw_tags = pd.DataFrame(results)
raw_tags.set_index('index', inplace=True, drop=True)
d = {}
for i, row in raw_tags.iterrows():
if d.get(i):
if row['confidence'] >= 55: # if the threshold exceeds the value of 55
d[i] += ', ' + str(row['tag'])
else:
d[i] = str(row['tag'])
# create the final dataset
tags = pd.Series(d, name='tag')
tagged_df = pd.concat([posts, tags], axis=1)
return tagged_df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment