-
-
Save crypdick/0a9e2a98e2f3587201c6d4d9c5add43e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import urllib2, re, json, os, time, sys, HTMLParser | |
html_parser = HTMLParser.HTMLParser() | |
auth_address = "1KbV1e1u6P6AsY8XNBydgtbtN8iSB5WMyG" | |
auth_privatekey = "xxxx" | |
site = "1TaLkFrMwvbNsooF4ioKAY9EuxTBTjipT" | |
zeronet_dir = ".." | |
os.chdir(zeronet_dir) | |
json_path = "data/%s/data/users/%s/data.json" % (site, auth_address) | |
data = json.load(open(json_path)) | |
def addNews(title, source, url, descr): | |
global data | |
added_urls = [re.match(".*?(http[s]{0,1}://.*$|$)", topic["body"], re.DOTALL).group(1) for topic in data["topic"]] | |
added_bodys = [topic["body"] for topic in data["topic"]] | |
added_titles = [topic["title"] for topic in data["topic"]] | |
if url not in added_urls and descr[0:30] not in "".join(added_bodys) and title[0:30] not in "".join(added_titles): | |
topic = { | |
"topic_id": data["next_topic_id"], | |
"title": title, | |
"body": descr+"\n\n"+url, | |
"added": int(time.time()), | |
"parent_topic_uri": "1_1KbV1e1u6P6AsY8XNBydgtbtN8iSB5WMyG" | |
} | |
data["next_topic_id"] += 1 | |
data["topic"].append(topic) | |
data["topic"] = [topic for topic in data["topic"] if topic["added"] > time.time()-60*60*24*2 or "type" in topic] # Only keep last 2 day topics + the group | |
print "Added:", repr(title) | |
return True | |
else: | |
return False | |
def getNews(): | |
opener = urllib2.build_opener() | |
opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11')] | |
response = opener.open("https://www.google.hu/search?q=bitcoin+OR+p2p+OR+tor+OR+i2p+OR+decentralized+OR+darkweb+OR+bittorrent+OR+blockchain+OR+namecoin+OR+(p2p+web)&lr=lang_en&safe=images&hl=hu&tbs=qdr:d,lr:lang_1en&source=lnms&tbm=nws&sa=X", timeout=10) | |
added = 0 | |
found = 0 | |
blocks = re.findall('<div class="g">(.*?)</table>', response.read()) | |
if not blocks: | |
print "No blocks" | |
for block in blocks: | |
match = re.match('.*?<h3.*? href=\"(?P<url>.*?)\".*?>(?P<title>.*?)</a>.*<div class="slp"><span.*?>(?P<source>[A-Za-z0-9 ]*).*<div class="st">(?P<descr>.*?)</div>(.*?)</td>', block.replace("<b>", "").replace("</b>", "")) | |
if match: | |
found += 1 | |
url, title, source, descr, more = match.groups() | |
title = html_parser.unescape(title.decode("utf8")) | |
descr = html_parser.unescape(descr.decode("utf8")) | |
more = more.strip() | |
url = re.sub(".*?q=(.*?)&.*", "\\1", url) | |
if not more: | |
added += addNews(title, source, url, descr) | |
if not found: | |
print "Not found any" | |
if added: | |
json.dump(data, open(json_path, "w"), indent=2) | |
print "* Sign and publish..." | |
os.system("python zeronet.py siteSign %s %s --inner_path data/users/%s/content.json --publish" % (site, auth_privatekey, auth_address)) | |
while 1: | |
try: | |
getNews() | |
except Exception, err: | |
print "Exception", err | |
print ".", | |
sys.stdout.flush() | |
time.sleep(60) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment