andrea-dagostino · March 27, 2023 11:59
diff --git a/fuzzy_logic_tagging_eng3.py b/fuzzy_logic_tagging_eng3.py
 def fuzzy_tagging(tags, articles):
 	"""
 	This function receives as input a list of predefined tags and the list of textual content to be tagged.
  Returns a Pandas dataframe with the articles tagged
 	"""
 	results = []
 	# iterate through tags
 	for i, tag in enumerate(tags):
 	    d = {}
 	    ranking = process.extract(tag, articles, limit=4) # extract the tag, ranking the 4 articles most representative
 	    for r in ranking:
 	        d = {"tag": tag, "index": articles.index(r[0]), "confidence": r[1]}
 	        results.append(d)
 	# organize everything in a pandas dataframe
 	raw_tags = pd.DataFrame(results)
 	raw_tags.set_index('index', inplace=True, drop=True)

 	d = {}
 	for i, row in raw_tags.iterrows():
 	    if d.get(i):
 	        if row['confidence'] >= 55: # if the threshold exceeds the value of 55
 	            d[i] += ', ' + str(row['tag'])
 	    else:
 	        d[i] = str(row['tag'])
 	        
 	# create the final dataset
 	tags = pd.Series(d, name='tag')
 	tagged_df = pd.concat([posts, tags], axis=1)
 	
 	return tagged_df
	def fuzzy_tagging(tags, articles):
	"""
	This function receives as input a list of predefined tags and the list of textual content to be tagged.
	Returns a Pandas dataframe with the articles tagged
	"""
	results = []
	# iterate through tags
	for i, tag in enumerate(tags):
	d = {}
	ranking = process.extract(tag, articles, limit=4) # extract the tag, ranking the 4 articles most representative
	for r in ranking:
	d = {"tag": tag, "index": articles.index(r[0]), "confidence": r[1]}
	results.append(d)
	# organize everything in a pandas dataframe
	raw_tags = pd.DataFrame(results)
	raw_tags.set_index('index', inplace=True, drop=True)

	d = {}
	for i, row in raw_tags.iterrows():
	if d.get(i):
	if row['confidence'] >= 55: # if the threshold exceeds the value of 55
	d[i] += ', ' + str(row['tag'])
	else:
	d[i] = str(row['tag'])

	# create the final dataset
	tags = pd.Series(d, name='tag')
	tagged_df = pd.concat([posts, tags], axis=1)

	return tagged_df