Skip to content

Instantly share code, notes, and snippets.

@Taiiwo
Last active December 27, 2015 12:09
Show Gist options
  • Save Taiiwo/7323144 to your computer and use it in GitHub Desktop.
Save Taiiwo/7323144 to your computer and use it in GitHub Desktop.
Search strings in any 4chan post. - Only views 1 thread per second to abide by the 4chan API rules (Plus a second to load the page) - Got rid of this. Bored of waiting... - Will error at the end of a search - Fixed. I forgot that threads 404. - Doesn't search thread titles - Debugging is still in there, so be prepared for a lot of output - It su…
"""Inspired by /g/sicp's bot's threadsearching module
https://github.com/gentoomen/bhottu/blob/master/modules/threadsearch.py
irc.rizon.net #/g/sicp
Search intensity added by Taiiwo - github.com/Taiiwo
Usage: python 4chansearch.py "1" 2 3/4
1: String to search for
2: Board letter
3: all: Search every reply to every thread on the board
4: onlyop: Search only the OP.
Example: python 4chansearch.py "ARG" x all
"""
import urllib2, json, time, sys
def search_replies(string, board, *args): # Search text in the replies of threads
results = []
rawjson = urllib2.urlopen('http://api.4chan.org/' + board + '/catalog.json').read()
time.sleep(1)
parsedjson = json.loads(rawjson)
count = 0
pagecount = 0
retme = []
for page in parsedjson:
print 'Searching page ' + str(count)
count += 1
threadcount = 0
for thread in page['threads']:
if 'args' in locals():
if args[0] != 'onlyop':
print 'On thread ' + str(threadcount)
threadcount += 1
#get thread number
num = thread['no']
try:
rawreplies = urllib2.urlopen('http://api.4chan.org/' + board + '/res/' + str(num) + '.json').read()
except:
print "Thread 404'd"
break
time.sleep(0.0001)
parsedreplies = json.loads(rawreplies)
for post in parsedreplies['posts']:
if 'com' in post and string in post['com']: # (Thinking of checking post['name']
if num == post['no']:
retme.append('http://boards.4chan.org/' + board + '/res/' + str(num))
print 'http://boards.4chan.org/' + board + '/res/' + str(num)
else:
retme.append('http://boards.4chan.org/' + board + '/res/' + str(num) + '#p' + str(post['no']))
print 'http://boards.4chan.org/' + board + '/res/' + str(num) + '#p' + str(post['no'])
else:
#print pagecount
pagecount += 1
if 'com' in thread:
if string in thread['com']:
retme.append('http://boards.4chan.org/' + board + '/res/' + str(thread['no']))
print 'http://boards.4chan.org/' + board + '/res/' + str(thread['no'])
return retme
def main():
for i in search_replies(sys.argv[1], sys.argv[2], sys.argv[3]):
print i
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment