Skip to content

Instantly share code, notes, and snippets.

@c4pt0r
Last active March 28, 2020 06:56
Show Gist options
  • Save c4pt0r/1a12e8357e430a3f9cc29a16a186de36 to your computer and use it in GitHub Desktop.
Save c4pt0r/1a12e8357e430a3f9cc29a16a186de36 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import time
import slack
import os
import sqlite3
from requests_html import HTMLSession
client = slack.WebClient(os.environ.get('SLACK_BOT_TOKEN'))
session = HTMLSession()
db_conn = sqlite3.connect('item.db')
def init_db():
cur = db_conn.cursor()
cur.executescript("""
create table if not exists item( url text not null, content text not null,
create_at text not null
);
create unique index if not exists idx_item_url
on item(url);
""")
def append_item_to_db(item):
keys = ','.join(item.keys())
question_marks = ','.join(list('?' * len(item)))
values = tuple(item.values())
db_conn.execute('insert into item (' + keys + ') values (' + question_marks + ')', values)
db_conn.commit()
def is_seen_url(url):
cur = db_conn.cursor()
cur.execute('select * from item where url=?', (url,))
item = cur.fetchone()
cur.close()
return item != None
def is_seen_item(item):
return is_seen_url(item['url'])
def has_keyword(content, keyword):
content = content.lower()
keyword = keyword.lower()
return content.find(keyword) != -1
def get_feeds(keyword, duration='1d'):
response = session.get('https://www.zhihu.com/search?q=%s&type=content&range=%s' % (keyword, duration))
content = response.html.find('div.List', first=True)
items = content.find('div.List-item')
result = []
for item in items:
content = item.text
if not has_keyword(content, keyword):
continue
title = item.find('h2.ContentItem-title',first=True)
if not title:
continue
url = title.find('a', first=True).attrs['href']
if url.startswith('//'):
url = 'https:' + url
elif url.startswith('/'):
url = 'https://zhihu.com' + url
result.append({'content': content, 'url':url, 'create_at': time.ctime()})
return result
def loop(keywords = ['pingcap', 'tikv', 'tidb', 'chaos-mesh', 'chaos mesh']):
while True:
for keyword in keywords:
ret = get_feeds(keyword,'3m')
for item in ret:
if not is_seen_item(item):
append_item_to_db(item)
client.chat_postMessage(
channel='#bot-playground',
text=item['content'] + ' via ' + item['url'])
print(item)
time.sleep(3)
if __name__ == '__main__':
init_db()
loop()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment