Last active
August 29, 2015 14:15
-
-
Save TheEnigmaBlade/50e364c2d7dff2986949 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
###################### | |
# User configuration # | |
###################### | |
username = "" | |
password = "" | |
subreddit = "" | |
remove_flair = False | |
message_user = False | |
output_file = "bad_flair.txt" # Set to None to not output anything | |
check_domains = False | |
link_exclusive = False # Flair can only a link (unaffected by check_domains) | |
domain_whitelist = [] # Leave empty to allow all domains | |
domain_blacklist = [] # Overrides domain whitelist | |
check_keywords = False | |
keywords_as_words = False # Check with word boundaries | |
keyword_whitelist = [] # Leave empty to allow everything | |
keyword_blacklist = [] # Overrides phrase whitelist | |
message_body = "" | |
message_subject = "" | |
message_as_sub = False | |
########################################## | |
# DO NOT TOUCH ANYTHING AFTER THIS POINT # | |
########################################## | |
import praw | |
from praw.errors import * | |
from urllib.parse import urlparse | |
import string | |
import re | |
from requests.exceptions import HTTPError | |
from time import sleep | |
user_agent = "Flair format enforcer, by /u/TheEnigmaBlade" | |
# Start reddit session | |
if len(username) == 0 or len(password) == 0: | |
print("Username and password required") | |
exit(0) | |
try: | |
print("Connecting to reddit...", end=" ") | |
r = praw.Reddit(user_agent=user_agent) | |
print("logging in...", end=" ") | |
r.login(username, password) | |
print("done!") | |
except InvalidUserPass: | |
print("Failed to connect to reddit: invalid password or account") | |
exit(0) | |
except Exception as e: | |
print("Failed to connect to reddit, {}: {}".format(e.__class__.__name__, e)) | |
exit(0) | |
subreddit = r.get_subreddit(subreddit) | |
# Process flair | |
def check_flair(flair_text): | |
if flair_text is None or len(flair_text) == 0: | |
return False | |
flair_text = flair_text.lower() | |
if check_domains or link_exclusive: | |
def check(netloc): | |
if domain_in(netloc, domain_blacklist): | |
return True | |
if len(domain_whitelist) > 0 and not domain_in(netloc, domain_whitelist): | |
return True | |
return False | |
try: | |
# Attempt one: well-formatted URL (ex. http://reddit.com/r/anime) | |
url = urlparse(flair_text) | |
if len(url.netloc) > 0: | |
if check_domains and check(url.netloc): | |
return True | |
else: | |
# Attempt two: protocol-less URL (ex. reddit.com/r/anime) | |
url = urlparse("http://"+flair_text) | |
if len(url.netloc) > 0: | |
if check_domains and check(url.netloc): | |
return True | |
# Otherwise stop if we NEED a link | |
elif link_exclusive: | |
return True | |
# URL parse error (not always thrown) | |
except ValueError: | |
if link_exclusive: | |
return True | |
if check_keywords: | |
if keyword_in(flair_text, keyword_blacklist): | |
return True | |
if len(keyword_whitelist) > 0 and not keyword_in(flair_text, keyword_whitelist): | |
return True | |
return False | |
return False | |
_keyword_re = re.compile("[{}]+".format(re.escape(string.punctuation))) | |
def keyword_in(text, keyword_list, as_words=False): | |
if as_words: | |
text = " "+_keyword_re.sub(" ", text)+" " | |
for keyword in keyword_list: | |
if " "+keyword+" " in text: | |
return True | |
return False | |
def domain_in(text, domain_list): | |
for domain in domain_list: | |
if text.endswith(domain): | |
return True | |
return False | |
num_processed = 0 | |
num_removed = 0 | |
removed_users = [] | |
print("Finding bad flair...") | |
chunk_size = 1000 | |
after = None | |
while True: | |
try: | |
print("Processing {} to {}".format(num_processed+1, num_processed+chunk_size-1)) | |
flairs = subreddit.get_flair_list(limit=chunk_size, params={"after": after, "show": "all"}) | |
num_processed_now = 0 | |
for flair in flairs: | |
user = flair["user"] | |
flair_text = flair["flair_text"] | |
if check_flair(flair_text): | |
print("Bad flair on {}: {}".format(user, flair_text)) | |
removed_users.append((user, flair_text)) | |
num_removed += 1 | |
num_processed_now += 1 | |
num_processed += num_processed_now | |
if num_processed_now < chunk_size: | |
break | |
after_user = r.get_redditor(user) | |
after = "t2_"+after_user.id | |
# Don't have permission to access | |
except (ModeratorRequired, ModeratorOrScopeRequired) as e: | |
print("Error: Mod authorization required") | |
exit(0) | |
except HTTPError as e: | |
code = e.response.status_code | |
if code == 403: | |
print("Error: Mod authorization required (HTTP 403)") | |
exit(0) | |
else: | |
print("Error: Failed to load page, {} ({}) returned by server".format(code, e.response)) | |
sleep(5) | |
# Couldn't connect to reddit | |
except ConnectionError as e: | |
print("Error: Connection failed, {}".format(e)) | |
sleep(5) | |
# Process results | |
if output_file is not None: | |
with open(output_file, "w", encoding='utf-8') as file: | |
for (user, flair_text) in removed_users: | |
print(user, flair_text, sep=" | ", file=file) | |
not_processed = [] | |
if remove_flair or message_user: | |
print("Processing bad users...") | |
for user in removed_users: | |
try: | |
if remove_flair: | |
subreddit.delete_flair(user) | |
if message_user: | |
r.send_message(user, message_subject, message_body, from_sr=subreddit if message_as_sub else None) | |
break | |
except (ConnectionError, HTTPError) as e: | |
print("Error: Failed to do a thing, {}: {}".format(e.__class__.__name__, e)) | |
not_processed.append(user) | |
print("\nDone!\n") | |
print("Num flair: {}".format(num_processed)) | |
print("Num bad flair: {}".format(num_removed)) | |
print() | |
if len(not_processed) > 0: | |
print("The following users weren't processed:") | |
for user in not_processed: | |
print(user) | |
print() | |
# Clean up | |
r.clear_authentication() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment