TheEnigmaBlade · August 29, 2015 14:15
diff --git a/FunkyFlairFinder.py b/FunkyFlairFinder.py
 #!/usr/bin/env python3

 ######################
 # User configuration #
 ######################

 username = ""
 password = ""

 subreddit = ""
 remove_flair = False
 message_user = False
 output_file = "bad_flair.txt"	# Set to None to not output anything

 check_domains = False
 link_exclusive = False		# Flair can only a link (unaffected by check_domains)
 domain_whitelist = []		# Leave empty to allow all domains
 domain_blacklist = []		# Overrides domain whitelist

 check_keywords = False
 keywords_as_words = False	# Check with word boundaries
 keyword_whitelist = []		# Leave empty to allow everything
 keyword_blacklist = []		# Overrides phrase whitelist

 message_body = ""
 message_subject = ""
 message_as_sub = False

 ##########################################
 # DO NOT TOUCH ANYTHING AFTER THIS POINT #
 ##########################################

 import praw
 from praw.errors import *
 from urllib.parse import urlparse
 import string
 import re
 from requests.exceptions import HTTPError
 from time import sleep

 user_agent = "Flair format enforcer, by /u/TheEnigmaBlade"

 # Start reddit session
 if len(username) == 0 or len(password) == 0:
 	print("Username and password required")
 	exit(0)

 try:
 	print("Connecting to reddit...", end=" ")
 	r = praw.Reddit(user_agent=user_agent)
 	print("logging in...", end=" ")
 	r.login(username, password)
 	print("done!")
 except InvalidUserPass:
 	print("Failed to connect to reddit: invalid password or account")
 	exit(0)
 except Exception as e:
 	print("Failed to connect to reddit, {}: {}".format(e.__class__.__name__, e))
 	exit(0)

 subreddit = r.get_subreddit(subreddit)

 # Process flair
 def check_flair(flair_text):
 	if flair_text is None or len(flair_text) == 0:
 		return False
 	flair_text = flair_text.lower()
 	
 	if check_domains or link_exclusive:
 		def check(netloc):
 			if domain_in(netloc, domain_blacklist):
 				return True
 			if len(domain_whitelist) > 0 and not domain_in(netloc, domain_whitelist):
 				return True
 			return False
 		
 		try:
 			# Attempt one: well-formatted URL (ex. http://reddit.com/r/anime)
 			url = urlparse(flair_text)
 			if len(url.netloc) > 0:
 				if check_domains and check(url.netloc):
 					return True
 			else:
 				# Attempt two: protocol-less URL (ex. reddit.com/r/anime)
 				url = urlparse("http://"+flair_text)
 				if len(url.netloc) > 0:
 					if check_domains and check(url.netloc):
 						return True
 				# Otherwise stop if we NEED a link
 				elif link_exclusive:
 					return True
 		# URL parse error (not always thrown)		
 		except ValueError:
 			if link_exclusive:
 				return True
 	
 	if check_keywords:
 		if keyword_in(flair_text, keyword_blacklist):
 			return True
 		if len(keyword_whitelist) > 0 and not keyword_in(flair_text, keyword_whitelist):
 			return True
 		return False
 	
 	return False

 _keyword_re = re.compile("[{}]+".format(re.escape(string.punctuation)))
 def keyword_in(text, keyword_list, as_words=False):
 	if as_words:
 		text = " "+_keyword_re.sub(" ", text)+" "
 	
 	for keyword in keyword_list:
 		if " "+keyword+" " in text:
 			return True
 	return False

 def domain_in(text, domain_list):
 	for domain in domain_list:
 		if text.endswith(domain):
 			return True
 	return False

 num_processed = 0
 num_removed = 0
 removed_users = []

 print("Finding bad flair...")
 chunk_size = 1000
 after = None
 while True:
 	try:
 		print("Processing {} to {}".format(num_processed+1, num_processed+chunk_size-1))
 		flairs = subreddit.get_flair_list(limit=chunk_size, params={"after": after, "show": "all"})
 		
 		num_processed_now = 0
 		for flair in flairs:
 			user = flair["user"]
 			flair_text = flair["flair_text"]
 			if check_flair(flair_text):
 				print("Bad flair on {}: {}".format(user, flair_text))
 				removed_users.append((user, flair_text))
 				num_removed += 1
 			num_processed_now += 1
 		num_processed += num_processed_now
 		
 		if num_processed_now < chunk_size:
 			break
 		
 		after_user = r.get_redditor(user)
 		after = "t2_"+after_user.id
 		
 	# Don't have permission to access
 	except (ModeratorRequired, ModeratorOrScopeRequired) as e:
 		print("Error: Mod authorization required")
 		exit(0)
 	except HTTPError as e:
 		code = e.response.status_code
 		if code == 403:
 			print("Error: Mod authorization required (HTTP 403)")
 			exit(0)
 		else:
 			print("Error: Failed to load page, {} ({}) returned by server".format(code, e.response))
 			sleep(5)
 	# Couldn't connect to reddit
 	except ConnectionError as e:
 		print("Error: Connection failed, {}".format(e))
 		sleep(5)

 # Process results
 if output_file is not None:
 	with open(output_file, "w", encoding='utf-8') as file:
 		for (user, flair_text) in removed_users:
 			print(user, flair_text, sep=" | ", file=file)

 not_processed = []
 if remove_flair or message_user:
 	print("Processing bad users...")
 	for user in removed_users:
 		try:
 			if remove_flair:
 				subreddit.delete_flair(user)
 			if message_user:
 				r.send_message(user, message_subject, message_body, from_sr=subreddit if message_as_sub else None)
 			break
 		except (ConnectionError, HTTPError) as e:
 			print("Error: Failed to do a thing, {}: {}".format(e.__class__.__name__, e))
 			not_processed.append(user)
 	
 print("\nDone!\n")
 print("Num flair:     {}".format(num_processed))
 print("Num bad flair: {}".format(num_removed))
 print()

 if len(not_processed) > 0:
 	print("The following users weren't processed:")
 	for user in not_processed:
 		print(user)
 	print()

 # Clean up
 r.clear_authentication()
	#!/usr/bin/env python3

	######################
	# User configuration #
	######################

	username = ""
	password = ""

	subreddit = ""
	remove_flair = False
	message_user = False
	output_file = "bad_flair.txt" # Set to None to not output anything

	check_domains = False
	link_exclusive = False # Flair can only a link (unaffected by check_domains)
	domain_whitelist = [] # Leave empty to allow all domains
	domain_blacklist = [] # Overrides domain whitelist

	check_keywords = False
	keywords_as_words = False # Check with word boundaries
	keyword_whitelist = [] # Leave empty to allow everything
	keyword_blacklist = [] # Overrides phrase whitelist

	message_body = ""
	message_subject = ""
	message_as_sub = False

	##########################################
	# DO NOT TOUCH ANYTHING AFTER THIS POINT #
	##########################################

	import praw
	from praw.errors import *
	from urllib.parse import urlparse
	import string
	import re
	from requests.exceptions import HTTPError
	from time import sleep

	user_agent = "Flair format enforcer, by /u/TheEnigmaBlade"

	# Start reddit session
	if len(username) == 0 or len(password) == 0:
	print("Username and password required")
	exit(0)

	try:
	print("Connecting to reddit...", end=" ")
	r = praw.Reddit(user_agent=user_agent)
	print("logging in...", end=" ")
	r.login(username, password)
	print("done!")
	except InvalidUserPass:
	print("Failed to connect to reddit: invalid password or account")
	exit(0)
	except Exception as e:
	print("Failed to connect to reddit, {}: {}".format(e.__class__.__name__, e))
	exit(0)

	subreddit = r.get_subreddit(subreddit)

	# Process flair
	def check_flair(flair_text):
	if flair_text is None or len(flair_text) == 0:
	return False
	flair_text = flair_text.lower()

	if check_domains or link_exclusive:
	def check(netloc):
	if domain_in(netloc, domain_blacklist):
	return True
	if len(domain_whitelist) > 0 and not domain_in(netloc, domain_whitelist):
	return True
	return False

	try:
	# Attempt one: well-formatted URL (ex. http://reddit.com/r/anime)
	url = urlparse(flair_text)
	if len(url.netloc) > 0:
	if check_domains and check(url.netloc):
	return True
	else:
	# Attempt two: protocol-less URL (ex. reddit.com/r/anime)
	url = urlparse("http://"+flair_text)
	if len(url.netloc) > 0:
	if check_domains and check(url.netloc):
	return True
	# Otherwise stop if we NEED a link
	elif link_exclusive:
	return True
	# URL parse error (not always thrown)
	except ValueError:
	if link_exclusive:
	return True

	if check_keywords:
	if keyword_in(flair_text, keyword_blacklist):
	return True
	if len(keyword_whitelist) > 0 and not keyword_in(flair_text, keyword_whitelist):
	return True
	return False

	return False

	_keyword_re = re.compile("[{}]+".format(re.escape(string.punctuation)))
	def keyword_in(text, keyword_list, as_words=False):
	if as_words:
	text = " "+_keyword_re.sub(" ", text)+" "

	for keyword in keyword_list:
	if " "+keyword+" " in text:
	return True
	return False

	def domain_in(text, domain_list):
	for domain in domain_list:
	if text.endswith(domain):
	return True
	return False

	num_processed = 0
	num_removed = 0
	removed_users = []

	print("Finding bad flair...")
	chunk_size = 1000
	after = None
	while True:
	try:
	print("Processing {} to {}".format(num_processed+1, num_processed+chunk_size-1))
	flairs = subreddit.get_flair_list(limit=chunk_size, params={"after": after, "show": "all"})

	num_processed_now = 0
	for flair in flairs:
	user = flair["user"]
	flair_text = flair["flair_text"]
	if check_flair(flair_text):
	print("Bad flair on {}: {}".format(user, flair_text))
	removed_users.append((user, flair_text))
	num_removed += 1
	num_processed_now += 1
	num_processed += num_processed_now

	if num_processed_now < chunk_size:
	break

	after_user = r.get_redditor(user)
	after = "t2_"+after_user.id

	# Don't have permission to access
	except (ModeratorRequired, ModeratorOrScopeRequired) as e:
	print("Error: Mod authorization required")
	exit(0)
	except HTTPError as e:
	code = e.response.status_code
	if code == 403:
	print("Error: Mod authorization required (HTTP 403)")
	exit(0)
	else:
	print("Error: Failed to load page, {} ({}) returned by server".format(code, e.response))
	sleep(5)
	# Couldn't connect to reddit
	except ConnectionError as e:
	print("Error: Connection failed, {}".format(e))
	sleep(5)

	# Process results
	if output_file is not None:
	with open(output_file, "w", encoding='utf-8') as file:
	for (user, flair_text) in removed_users:
	print(user, flair_text, sep=" \| ", file=file)

	not_processed = []
	if remove_flair or message_user:
	print("Processing bad users...")
	for user in removed_users:
	try:
	if remove_flair:
	subreddit.delete_flair(user)
	if message_user:
	r.send_message(user, message_subject, message_body, from_sr=subreddit if message_as_sub else None)
	break
	except (ConnectionError, HTTPError) as e:
	print("Error: Failed to do a thing, {}: {}".format(e.__class__.__name__, e))
	not_processed.append(user)

	print("\nDone!\n")
	print("Num flair: {}".format(num_processed))
	print("Num bad flair: {}".format(num_removed))
	print()

	if len(not_processed) > 0:
	print("The following users weren't processed:")
	for user in not_processed:
	print(user)
	print()

	# Clean up
	r.clear_authentication()