Skip to content

Instantly share code, notes, and snippets.

@jinnosux
Created January 8, 2025 15:38
Show Gist options
  • Save jinnosux/827034f408588119a9154581151596d3 to your computer and use it in GitHub Desktop.
Save jinnosux/827034f408588119a9154581151596d3 to your computer and use it in GitHub Desktop.
import xlsxwriter
from collections import Counter
import re
from difflib import SequenceMatcher
workbook = xlsxwriter.Workbook("example.xlsx")
worksheet = workbook.add_worksheet()
categories = [
'Best Cop',
'Best Paramedic',
'Best Hitman',
'Best Gunrunner',
'Best Thief',
'Best Driver',
'Best Pilot',
'Best Disappearer',
'Best Kidnapper',
'Best Dressed Player',
'Best Gold Rush Finder',
'Funniest Player',
'Most Helpful Player',
'Best New Player',
'Favorite Couple',
'Server Bitch',
'Biggest Asshole',
'Biggest Rager',
'Most Friendly Admin',
'Most Effective Admin',
'Most Abusive Admin',
'Favorite Vehicle',
'Best NG Video',
'Best New Feature',
'Best DJ'
]
def are_names_similar(name1, name2, threshold=0.8):
"""
Check if two names are similar using SequenceMatcher.
Returns True if similarity ratio is above threshold.
"""
# If one name is fully contained in the other, consider them similar
if name1 in name2 or name2 in name1:
return True
return SequenceMatcher(None, name1, name2).ratio() > threshold
def extract_base_nickname(nickname):
# Remove @ symbol if present
nickname = nickname.lstrip('@')
# Extract base nickname by removing numbers and underscores
base_nickname = re.sub(r'[\d_]', '', nickname).lower()
# If a dot is present, attempt to find a base nickname with or without dots
if '.' in nickname:
left_part, right_part = nickname.split('.', 1)
left_base = re.sub(r'[\d_]', '', left_part).lower()
right_base = re.sub(r'[\d_]', '', right_part).lower()
# If both left and right base nicknames are present, choose the longer one
if left_base and right_base:
base_nickname = left_base if len(left_base) > len(right_base) else right_base
return base_nickname
def merge_similar_nicknames(votes, nickname_variations):
"""
Merge similar nicknames and their vote counts.
Returns merged votes list and updated nickname variations.
"""
merged_votes = []
processed_names = set()
merged_variations = {}
# Create a mapping of similar names
similar_names_map = {}
all_unique_names = list(set(votes))
for i, name1 in enumerate(all_unique_names):
if name1 not in similar_names_map:
similar_names_map[name1] = name1
for name2 in all_unique_names[i+1:]:
if name2 not in similar_names_map and are_names_similar(name1, name2):
similar_names_map[name2] = name1
# Merge nickname variations
if name1 in nickname_variations and name2 in nickname_variations:
nickname_variations[name1].update(nickname_variations[name2])
del nickname_variations[name2]
# Apply the mapping to votes
for vote in votes:
merged_vote = similar_names_map.get(vote, vote)
merged_votes.append(merged_vote)
return merged_votes, nickname_variations
def search_str(word, col):
with open("file.txt", 'r', encoding="utf8") as file:
votes = []
nickname_variations = {} # Dictionary to store variations of each base nickname
for line in file:
if word in line:
try:
vote_value = line.split(word + ":")[-1].strip('-').strip()
if vote_value and vote_value not in ["", "-", "/"]: # Exclude empty, "-", or "/" votes
base_nickname = extract_base_nickname(vote_value)
if base_nickname:
votes.append(base_nickname)
# Add the current vote_value as a variation for the base nickname
nickname_variations.setdefault(base_nickname, set()).add(vote_value)
except IndexError:
pass
# Merge similar nicknames
merged_votes, merged_variations = merge_similar_nicknames(votes, nickname_variations)
# Count occurrences of each non-empty, case-insensitive vote
vote_counter = Counter(merged_votes)
# Get 1st, 2nd, and 3rd most common votes
most_common_votes = vote_counter.most_common(3)
# Write 1st, 2nd, and 3rd columns
worksheet.write(col, 0, word)
for i, (vote, count) in enumerate(most_common_votes):
# Include all variations of the nickname in parentheses
variations = sorted(merged_variations.get(vote, {vote}))
variation_str = f"{vote} ({count} votes - variants: {', '.join(variations)})"
worksheet.write(col, i + 1, variation_str)
# Write all votes starting from column E
worksheet.write_row(col, 4, merged_votes)
# Print the number of votes processed
print(f"{word}: {len(merged_votes)} votes processed")
col = 0
for category in categories:
search_str(category, col)
col += 1
workbook.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment