Created
March 31, 2020 02:31
-
-
Save juice500ml/be4e9219b2be68183d98c5104a0adf29 to your computer and use it in GitHub Desktop.
arxiv_bot
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ~/.config/systemd/user/arxiv_bot.service | |
[Unit] | |
Description=Slack Arxiv Bot | |
[Service] | |
ExecStart=VENV_PATH RUN_PY_PATH | |
WorkingDirectory=DIR_PATH | |
Restart=always |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
requests |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datetime | |
import getpass | |
import json | |
import os | |
import pathlib | |
import pickle | |
import socket | |
import time | |
import traceback | |
import requests | |
class PaperDatabase(): | |
def __init__(self, filename='/home/kyle/arxiv_bot/paper.pkl'): | |
if not pathlib.Path(filename).is_file() or os.path.getsize(filename) == 0: | |
with open(filename, 'wb') as f: | |
pickle.dump([], f) | |
with open(filename, 'rb') as f: | |
self._db = pickle.load(f) | |
self._filename = filename | |
def __len__(self): | |
return len(self._db) | |
def pop(self, k=1): | |
elems = self._db[:k] | |
for index, elem in enumerate(elems, start=1): | |
elem['order'] = self._db[-1]['order'] + index | |
self._db = self._db[k:] + elems | |
self._commit() | |
return elems.copy() | |
def insert(self, elems): | |
for elem in elems: | |
index = self._find_elem(elem) | |
if index == -1: | |
new_index = self._ordered_index(elem) | |
self._db = self._db[:new_index] + [elem.copy()] + self._db[new_index:] | |
self._commit() | |
def _find_elem(self, elem): | |
for index, row in enumerate(self._db): | |
if row['title'] == elem['title']: | |
return index | |
return -1 | |
def _ordered_index(self, elem): | |
for index, row in enumerate(self._db): | |
if row['order'] >= elem['order']: | |
return index | |
return len(self._db) | |
def _commit(self): | |
with open(self._filename, 'wb') as f: | |
return pickle.dump(self._db, f) | |
def get_arxiv_papers(url): | |
start_token = 'var papers = ' | |
end_token = ';\nvar pid_to_users' | |
data = requests.get(url) | |
papers = data.text[data.text.find(start_token)+len(start_token):data.text.find(end_token)] | |
parsed_papers = json.loads(papers) | |
for index, paper in enumerate(parsed_papers): | |
paper['order'] = index | |
return parsed_papers | |
def get_weekly_arxiv_papers(): | |
return get_arxiv_papers('http://www.arxiv-sanity.com/top?timefilter=week&vfilter=all') | |
def send_to_slack(url, content): | |
requests.post(url, json={'attachments': content}) | |
def send_paper(paper): | |
urls = ['SLACK_URLS'] | |
content = [{ | |
'title': paper['title'], | |
'title_link': paper['link'], | |
'text': '_Published on ' + paper['published_time'] + ' by ' + ', '.join('%s' % x for x in paper['authors']) + '_\n\n' + paper['abstract'].replace('\n', ' '), | |
}] | |
for url in urls: | |
send_to_slack(url, content) | |
def send_debug(string): | |
url = 'SLACK_URL' | |
content = [{ | |
'title': getpass.getuser() + '@' + socket.gethostname(), | |
'text': '```{}```'.format(string), | |
}] | |
send_to_slack(url, content) | |
def check_time(now, delta, time_list): | |
base = datetime.date(2000, 1, 1) | |
for t in time_list: | |
if datetime.datetime.combine(base, t) <= datetime.datetime.combine(base, now) <= datetime.datetime.combine(base, t) + delta: | |
return True | |
return False | |
def db_update_time(now, delta): | |
return check_time(now, delta, [datetime.time(hour=9), ]) | |
def message_time(now, delta): | |
return check_time(now, delta, [datetime.time(hour=10), datetime.time(hour=14), datetime.time(hour=18), ]) | |
def main(delta): | |
now = datetime.datetime.now(tz=datetime.timezone(datetime.timedelta(hours=9))).time() | |
if db_update_time(now, delta): | |
papers = get_weekly_arxiv_papers() | |
PaperDatabase().insert(papers) | |
print('db updated') | |
if message_time(now, delta): | |
for paper in PaperDatabase().pop(3): | |
send_paper(paper) | |
print('slack messaged') | |
if __name__ == '__main__': | |
delta = datetime.timedelta(minutes=10) | |
while True: | |
try: | |
main(delta) | |
except KeyboardInterrupt: | |
break | |
except Exception: | |
send_debug(traceback.format_exc()) | |
time.sleep(delta.total_seconds()) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
systemctl --user daemon-reload | |
systemctl --user status arxiv_bot.service | |
systemctl --user start arxiv_bot.service |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment