Last active
December 27, 2015 12:09
-
-
Save Taiiwo/7323144 to your computer and use it in GitHub Desktop.
Search strings in any 4chan post. - Only views 1 thread per second to abide by the 4chan API rules (Plus a second to load the page) - Got rid of this. Bored of waiting...
- Will error at the end of a search - Fixed. I forgot that threads 404.
- Doesn't search thread titles
- Debugging is still in there, so be prepared for a lot of output - It su…
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Inspired by /g/sicp's bot's threadsearching module | |
https://github.com/gentoomen/bhottu/blob/master/modules/threadsearch.py | |
irc.rizon.net #/g/sicp | |
Search intensity added by Taiiwo - github.com/Taiiwo | |
Usage: python 4chansearch.py "1" 2 3/4 | |
1: String to search for | |
2: Board letter | |
3: all: Search every reply to every thread on the board | |
4: onlyop: Search only the OP. | |
Example: python 4chansearch.py "ARG" x all | |
""" | |
import urllib2, json, time, sys | |
def search_replies(string, board, *args): # Search text in the replies of threads | |
results = [] | |
rawjson = urllib2.urlopen('http://api.4chan.org/' + board + '/catalog.json').read() | |
time.sleep(1) | |
parsedjson = json.loads(rawjson) | |
count = 0 | |
pagecount = 0 | |
retme = [] | |
for page in parsedjson: | |
print 'Searching page ' + str(count) | |
count += 1 | |
threadcount = 0 | |
for thread in page['threads']: | |
if 'args' in locals(): | |
if args[0] != 'onlyop': | |
print 'On thread ' + str(threadcount) | |
threadcount += 1 | |
#get thread number | |
num = thread['no'] | |
try: | |
rawreplies = urllib2.urlopen('http://api.4chan.org/' + board + '/res/' + str(num) + '.json').read() | |
except: | |
print "Thread 404'd" | |
break | |
time.sleep(0.0001) | |
parsedreplies = json.loads(rawreplies) | |
for post in parsedreplies['posts']: | |
if 'com' in post and string in post['com']: # (Thinking of checking post['name'] | |
if num == post['no']: | |
retme.append('http://boards.4chan.org/' + board + '/res/' + str(num)) | |
print 'http://boards.4chan.org/' + board + '/res/' + str(num) | |
else: | |
retme.append('http://boards.4chan.org/' + board + '/res/' + str(num) + '#p' + str(post['no'])) | |
print 'http://boards.4chan.org/' + board + '/res/' + str(num) + '#p' + str(post['no']) | |
else: | |
#print pagecount | |
pagecount += 1 | |
if 'com' in thread: | |
if string in thread['com']: | |
retme.append('http://boards.4chan.org/' + board + '/res/' + str(thread['no'])) | |
print 'http://boards.4chan.org/' + board + '/res/' + str(thread['no']) | |
return retme | |
def main(): | |
for i in search_replies(sys.argv[1], sys.argv[2], sys.argv[3]): | |
print i | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment