Created
January 8, 2025 15:38
-
-
Save jinnosux/827034f408588119a9154581151596d3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import xlsxwriter | |
from collections import Counter | |
import re | |
from difflib import SequenceMatcher | |
workbook = xlsxwriter.Workbook("example.xlsx") | |
worksheet = workbook.add_worksheet() | |
categories = [ | |
'Best Cop', | |
'Best Paramedic', | |
'Best Hitman', | |
'Best Gunrunner', | |
'Best Thief', | |
'Best Driver', | |
'Best Pilot', | |
'Best Disappearer', | |
'Best Kidnapper', | |
'Best Dressed Player', | |
'Best Gold Rush Finder', | |
'Funniest Player', | |
'Most Helpful Player', | |
'Best New Player', | |
'Favorite Couple', | |
'Server Bitch', | |
'Biggest Asshole', | |
'Biggest Rager', | |
'Most Friendly Admin', | |
'Most Effective Admin', | |
'Most Abusive Admin', | |
'Favorite Vehicle', | |
'Best NG Video', | |
'Best New Feature', | |
'Best DJ' | |
] | |
def are_names_similar(name1, name2, threshold=0.8): | |
""" | |
Check if two names are similar using SequenceMatcher. | |
Returns True if similarity ratio is above threshold. | |
""" | |
# If one name is fully contained in the other, consider them similar | |
if name1 in name2 or name2 in name1: | |
return True | |
return SequenceMatcher(None, name1, name2).ratio() > threshold | |
def extract_base_nickname(nickname): | |
# Remove @ symbol if present | |
nickname = nickname.lstrip('@') | |
# Extract base nickname by removing numbers and underscores | |
base_nickname = re.sub(r'[\d_]', '', nickname).lower() | |
# If a dot is present, attempt to find a base nickname with or without dots | |
if '.' in nickname: | |
left_part, right_part = nickname.split('.', 1) | |
left_base = re.sub(r'[\d_]', '', left_part).lower() | |
right_base = re.sub(r'[\d_]', '', right_part).lower() | |
# If both left and right base nicknames are present, choose the longer one | |
if left_base and right_base: | |
base_nickname = left_base if len(left_base) > len(right_base) else right_base | |
return base_nickname | |
def merge_similar_nicknames(votes, nickname_variations): | |
""" | |
Merge similar nicknames and their vote counts. | |
Returns merged votes list and updated nickname variations. | |
""" | |
merged_votes = [] | |
processed_names = set() | |
merged_variations = {} | |
# Create a mapping of similar names | |
similar_names_map = {} | |
all_unique_names = list(set(votes)) | |
for i, name1 in enumerate(all_unique_names): | |
if name1 not in similar_names_map: | |
similar_names_map[name1] = name1 | |
for name2 in all_unique_names[i+1:]: | |
if name2 not in similar_names_map and are_names_similar(name1, name2): | |
similar_names_map[name2] = name1 | |
# Merge nickname variations | |
if name1 in nickname_variations and name2 in nickname_variations: | |
nickname_variations[name1].update(nickname_variations[name2]) | |
del nickname_variations[name2] | |
# Apply the mapping to votes | |
for vote in votes: | |
merged_vote = similar_names_map.get(vote, vote) | |
merged_votes.append(merged_vote) | |
return merged_votes, nickname_variations | |
def search_str(word, col): | |
with open("file.txt", 'r', encoding="utf8") as file: | |
votes = [] | |
nickname_variations = {} # Dictionary to store variations of each base nickname | |
for line in file: | |
if word in line: | |
try: | |
vote_value = line.split(word + ":")[-1].strip('-').strip() | |
if vote_value and vote_value not in ["", "-", "/"]: # Exclude empty, "-", or "/" votes | |
base_nickname = extract_base_nickname(vote_value) | |
if base_nickname: | |
votes.append(base_nickname) | |
# Add the current vote_value as a variation for the base nickname | |
nickname_variations.setdefault(base_nickname, set()).add(vote_value) | |
except IndexError: | |
pass | |
# Merge similar nicknames | |
merged_votes, merged_variations = merge_similar_nicknames(votes, nickname_variations) | |
# Count occurrences of each non-empty, case-insensitive vote | |
vote_counter = Counter(merged_votes) | |
# Get 1st, 2nd, and 3rd most common votes | |
most_common_votes = vote_counter.most_common(3) | |
# Write 1st, 2nd, and 3rd columns | |
worksheet.write(col, 0, word) | |
for i, (vote, count) in enumerate(most_common_votes): | |
# Include all variations of the nickname in parentheses | |
variations = sorted(merged_variations.get(vote, {vote})) | |
variation_str = f"{vote} ({count} votes - variants: {', '.join(variations)})" | |
worksheet.write(col, i + 1, variation_str) | |
# Write all votes starting from column E | |
worksheet.write_row(col, 4, merged_votes) | |
# Print the number of votes processed | |
print(f"{word}: {len(merged_votes)} votes processed") | |
col = 0 | |
for category in categories: | |
search_str(category, col) | |
col += 1 | |
workbook.close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment