Skip to content

Instantly share code, notes, and snippets.

@ekager
Last active October 28, 2022 08:18
Show Gist options
  • Save ekager/4c9b634696e0c472619e6dd127e3920a to your computer and use it in GitHub Desktop.
Save ekager/4c9b634696e0c472619e6dd127e3920a to your computer and use it in GitHub Desktop.
Searches for specified search terms and sends emails if found
"""
This script will search 4chan (need to specify a board unfortunately) and then
search all comments on Reddit for specific keywords. If found, it will then send
an email with the links to any matching posts.
Because 4chan posts are archived after ~48 hours I would recommend setting this up
to run on that cadence as well.
Sender email will need "Allow less secure apps" to ON or similar setting.
I followed these instructions for setting that up:
https://realpython.com/python-send-email/#option-1-setting-up-a-gmail-account-for-development
4chan search modified from https://gist.github.com/Taiiwo/7323144
Needs python3
Usage: python3 bad_place_notify.py 1/2
1: all: Search every reply to every thread on the board
2: onlyop: Search only the OP.
Example: python3 bad_place_notify.py all
"""
import json, time, sys, smtplib, ssl
from urllib.request import urlopen
# Searches a specific 4chan board for key words.
# Things are archived on 4chan after ~48 hours and they will no longer be searchable.
# API Docs: https://github.com/4chan/4chan-API
def search_4chan_replies(*args): # Search text in the replies of threads
# All the search terms you want to look for.
strings = ["TODO_1", "TODO_2", "TODO_ETC"]
# The board you want to search
board = "g"
rawjson = urlopen('https://a.4cdn.org/' + board + '/catalog.json').read()
# Getting around API limits
time.sleep(1)
parsedjson = json.loads(rawjson)
count = 0
pagecount = 0
retme = []
for page in parsedjson:
print('Searching 4chan page ' + str(count))
count += 1
threadcount = 0
for thread in page['threads']:
if 'args' in locals():
if args[0] != 'onlyop':
threadcount += 1
# Get thread number
num = thread['no']
try:
rawreplies = urlopen('https://a.4cdn.org/' + board + '/res/' + str(num) + '.json').read()
except:
print("Thread 404'd")
break
time.sleep(0.0001)
parsedreplies = json.loads(rawreplies)
for post in parsedreplies['posts']:
if 'com' in post and any(x in post['com'] for x in strings): # (Thinking of checking post['name']
if num == post['no']:
retme.append('http://boards.4chan.org/' + board + '/res/' + str(num))
else:
retme.append('http://boards.4chan.org/' + board + '/res/' + str(num) + '#p' + str(post['no']))
else:
pagecount += 1
if 'com' in thread:
if any(x in thread['com'] for x in strings):
retme.append('http://boards.4chan.org/' + board + '/res/' + str(thread['no']))
return retme
# Searches all reddit comments for search terms
# API Docs: https://github.com/pushshift/api
def search_reddit_replies():
# The search terms we will look for in the last 3 days. Not case sensitive.
strings = ["TODO_1", "TODO_2", "TODO_ETC"]
results = []
for string in strings:
rawjson = urlopen('https://api.pushshift.io/reddit/search/comment/?q=' + string + '&after=3d').read()
parsedjson = json.loads(rawjson)
print("Searching Reddit for " + string + "...")
for data in parsedjson['data']:
results.append("https://reddit.com" + data['permalink'])
return results
def emailMe(results):
print('Emailing ' + results)
smtp_server = "smtp.gmail.com"
port = 587 # For starttls
sender_email = "TODO_sender_email"
receiver_email = "TODO_receiver_email"
message = """\
Subject: You've Been Mentioned On A Bad Place :(
"""
password = 'TODO_sender_email_password'
# Create a secure SSL context
context = ssl.create_default_context()
# Try to log in to server and send email
try:
server = smtplib.SMTP(smtp_server,port)
server.ehlo() # Can be omitted
server.starttls(context=context) # Secure the connection
server.ehlo() # Can be omitted
server.login(sender_email, password)
server.sendmail(sender_email, receiver_email, message + results)
print("Email sent!")
except Exception as e:
# Print any error messages to stdout
print(e)
finally:
server.quit()
def main():
results = search_4chan_replies(sys.argv[1]) + search_reddit_replies()
if results:
emailMe('\n'.join(results))
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment