Skip to content

Instantly share code, notes, and snippets.

@TheEnigmaBlade
Last active August 29, 2015 14:15
Show Gist options
  • Save TheEnigmaBlade/50e364c2d7dff2986949 to your computer and use it in GitHub Desktop.
Save TheEnigmaBlade/50e364c2d7dff2986949 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
######################
# User configuration #
######################
username = ""
password = ""
subreddit = ""
remove_flair = False
message_user = False
output_file = "bad_flair.txt" # Set to None to not output anything
check_domains = False
link_exclusive = False # Flair can only a link (unaffected by check_domains)
domain_whitelist = [] # Leave empty to allow all domains
domain_blacklist = [] # Overrides domain whitelist
check_keywords = False
keywords_as_words = False # Check with word boundaries
keyword_whitelist = [] # Leave empty to allow everything
keyword_blacklist = [] # Overrides phrase whitelist
message_body = ""
message_subject = ""
message_as_sub = False
##########################################
# DO NOT TOUCH ANYTHING AFTER THIS POINT #
##########################################
import praw
from praw.errors import *
from urllib.parse import urlparse
import string
import re
from requests.exceptions import HTTPError
from time import sleep
user_agent = "Flair format enforcer, by /u/TheEnigmaBlade"
# Start reddit session
if len(username) == 0 or len(password) == 0:
print("Username and password required")
exit(0)
try:
print("Connecting to reddit...", end=" ")
r = praw.Reddit(user_agent=user_agent)
print("logging in...", end=" ")
r.login(username, password)
print("done!")
except InvalidUserPass:
print("Failed to connect to reddit: invalid password or account")
exit(0)
except Exception as e:
print("Failed to connect to reddit, {}: {}".format(e.__class__.__name__, e))
exit(0)
subreddit = r.get_subreddit(subreddit)
# Process flair
def check_flair(flair_text):
if flair_text is None or len(flair_text) == 0:
return False
flair_text = flair_text.lower()
if check_domains or link_exclusive:
def check(netloc):
if domain_in(netloc, domain_blacklist):
return True
if len(domain_whitelist) > 0 and not domain_in(netloc, domain_whitelist):
return True
return False
try:
# Attempt one: well-formatted URL (ex. http://reddit.com/r/anime)
url = urlparse(flair_text)
if len(url.netloc) > 0:
if check_domains and check(url.netloc):
return True
else:
# Attempt two: protocol-less URL (ex. reddit.com/r/anime)
url = urlparse("http://"+flair_text)
if len(url.netloc) > 0:
if check_domains and check(url.netloc):
return True
# Otherwise stop if we NEED a link
elif link_exclusive:
return True
# URL parse error (not always thrown)
except ValueError:
if link_exclusive:
return True
if check_keywords:
if keyword_in(flair_text, keyword_blacklist):
return True
if len(keyword_whitelist) > 0 and not keyword_in(flair_text, keyword_whitelist):
return True
return False
return False
_keyword_re = re.compile("[{}]+".format(re.escape(string.punctuation)))
def keyword_in(text, keyword_list, as_words=False):
if as_words:
text = " "+_keyword_re.sub(" ", text)+" "
for keyword in keyword_list:
if " "+keyword+" " in text:
return True
return False
def domain_in(text, domain_list):
for domain in domain_list:
if text.endswith(domain):
return True
return False
num_processed = 0
num_removed = 0
removed_users = []
print("Finding bad flair...")
chunk_size = 1000
after = None
while True:
try:
print("Processing {} to {}".format(num_processed+1, num_processed+chunk_size-1))
flairs = subreddit.get_flair_list(limit=chunk_size, params={"after": after, "show": "all"})
num_processed_now = 0
for flair in flairs:
user = flair["user"]
flair_text = flair["flair_text"]
if check_flair(flair_text):
print("Bad flair on {}: {}".format(user, flair_text))
removed_users.append((user, flair_text))
num_removed += 1
num_processed_now += 1
num_processed += num_processed_now
if num_processed_now < chunk_size:
break
after_user = r.get_redditor(user)
after = "t2_"+after_user.id
# Don't have permission to access
except (ModeratorRequired, ModeratorOrScopeRequired) as e:
print("Error: Mod authorization required")
exit(0)
except HTTPError as e:
code = e.response.status_code
if code == 403:
print("Error: Mod authorization required (HTTP 403)")
exit(0)
else:
print("Error: Failed to load page, {} ({}) returned by server".format(code, e.response))
sleep(5)
# Couldn't connect to reddit
except ConnectionError as e:
print("Error: Connection failed, {}".format(e))
sleep(5)
# Process results
if output_file is not None:
with open(output_file, "w", encoding='utf-8') as file:
for (user, flair_text) in removed_users:
print(user, flair_text, sep=" | ", file=file)
not_processed = []
if remove_flair or message_user:
print("Processing bad users...")
for user in removed_users:
try:
if remove_flair:
subreddit.delete_flair(user)
if message_user:
r.send_message(user, message_subject, message_body, from_sr=subreddit if message_as_sub else None)
break
except (ConnectionError, HTTPError) as e:
print("Error: Failed to do a thing, {}: {}".format(e.__class__.__name__, e))
not_processed.append(user)
print("\nDone!\n")
print("Num flair: {}".format(num_processed))
print("Num bad flair: {}".format(num_removed))
print()
if len(not_processed) > 0:
print("The following users weren't processed:")
for user in not_processed:
print(user)
print()
# Clean up
r.clear_authentication()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment