Skip to content

Instantly share code, notes, and snippets.

@juice500ml
Created March 31, 2020 02:31
Show Gist options
  • Save juice500ml/be4e9219b2be68183d98c5104a0adf29 to your computer and use it in GitHub Desktop.
Save juice500ml/be4e9219b2be68183d98c5104a0adf29 to your computer and use it in GitHub Desktop.
arxiv_bot
# ~/.config/systemd/user/arxiv_bot.service
[Unit]
Description=Slack Arxiv Bot
[Service]
ExecStart=VENV_PATH RUN_PY_PATH
WorkingDirectory=DIR_PATH
Restart=always
import datetime
import getpass
import json
import os
import pathlib
import pickle
import socket
import time
import traceback
import requests
class PaperDatabase():
def __init__(self, filename='/home/kyle/arxiv_bot/paper.pkl'):
if not pathlib.Path(filename).is_file() or os.path.getsize(filename) == 0:
with open(filename, 'wb') as f:
pickle.dump([], f)
with open(filename, 'rb') as f:
self._db = pickle.load(f)
self._filename = filename
def __len__(self):
return len(self._db)
def pop(self, k=1):
elems = self._db[:k]
for index, elem in enumerate(elems, start=1):
elem['order'] = self._db[-1]['order'] + index
self._db = self._db[k:] + elems
self._commit()
return elems.copy()
def insert(self, elems):
for elem in elems:
index = self._find_elem(elem)
if index == -1:
new_index = self._ordered_index(elem)
self._db = self._db[:new_index] + [elem.copy()] + self._db[new_index:]
self._commit()
def _find_elem(self, elem):
for index, row in enumerate(self._db):
if row['title'] == elem['title']:
return index
return -1
def _ordered_index(self, elem):
for index, row in enumerate(self._db):
if row['order'] >= elem['order']:
return index
return len(self._db)
def _commit(self):
with open(self._filename, 'wb') as f:
return pickle.dump(self._db, f)
def get_arxiv_papers(url):
start_token = 'var papers = '
end_token = ';\nvar pid_to_users'
data = requests.get(url)
papers = data.text[data.text.find(start_token)+len(start_token):data.text.find(end_token)]
parsed_papers = json.loads(papers)
for index, paper in enumerate(parsed_papers):
paper['order'] = index
return parsed_papers
def get_weekly_arxiv_papers():
return get_arxiv_papers('http://www.arxiv-sanity.com/top?timefilter=week&vfilter=all')
def send_to_slack(url, content):
requests.post(url, json={'attachments': content})
def send_paper(paper):
urls = ['SLACK_URLS']
content = [{
'title': paper['title'],
'title_link': paper['link'],
'text': '_Published on ' + paper['published_time'] + ' by ' + ', '.join('%s' % x for x in paper['authors']) + '_\n\n' + paper['abstract'].replace('\n', ' '),
}]
for url in urls:
send_to_slack(url, content)
def send_debug(string):
url = 'SLACK_URL'
content = [{
'title': getpass.getuser() + '@' + socket.gethostname(),
'text': '```{}```'.format(string),
}]
send_to_slack(url, content)
def check_time(now, delta, time_list):
base = datetime.date(2000, 1, 1)
for t in time_list:
if datetime.datetime.combine(base, t) <= datetime.datetime.combine(base, now) <= datetime.datetime.combine(base, t) + delta:
return True
return False
def db_update_time(now, delta):
return check_time(now, delta, [datetime.time(hour=9), ])
def message_time(now, delta):
return check_time(now, delta, [datetime.time(hour=10), datetime.time(hour=14), datetime.time(hour=18), ])
def main(delta):
now = datetime.datetime.now(tz=datetime.timezone(datetime.timedelta(hours=9))).time()
if db_update_time(now, delta):
papers = get_weekly_arxiv_papers()
PaperDatabase().insert(papers)
print('db updated')
if message_time(now, delta):
for paper in PaperDatabase().pop(3):
send_paper(paper)
print('slack messaged')
if __name__ == '__main__':
delta = datetime.timedelta(minutes=10)
while True:
try:
main(delta)
except KeyboardInterrupt:
break
except Exception:
send_debug(traceback.format_exc())
time.sleep(delta.total_seconds())
systemctl --user daemon-reload
systemctl --user status arxiv_bot.service
systemctl --user start arxiv_bot.service
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment