Skip to content

Instantly share code, notes, and snippets.

@dunhamsteve
Created January 15, 2021 17:10
Show Gist options
  • Save dunhamsteve/c471105fca75af9c427a0f4c8cabe55a to your computer and use it in GitHub Desktop.
Save dunhamsteve/c471105fca75af9c427a0f4c8cabe55a to your computer and use it in GitHub Desktop.
Script to back up dropbox paper
#!/usr/local/bin/python3
# Backup dropbox paper.
# This expects a ~/.auth/dropbox file that contains your dropbox bearer token.
import requests, os, json, re
from pprint import pprint
oj = os.path.join
class DBox(object):
def __init__(self):
self.base = "https://api.dropboxapi.com/2"
self.key = open(os.path.expanduser('~/.auth/dropbox')).read().strip()
self.headers = {'Authorization': f"Bearer {self.key}"}
def post(self, path, **args):
if path[0] != '/':
path = '/'+path
r = requests.post(self.base+path, json=args, headers=self.headers)
if not r.ok:
print(r.text)
assert r.ok
return r.json()
def download(self, path, **args):
headers = dict(self.headers)
headers['Dropbox-API-Arg'] = json.dumps(args)
r = requests.post(self.base+path, headers=headers)
if not r.ok:
print(r.headers)
print(r.text)
assert r.ok
meta = json.loads(r.headers['Dropbox-Api-Result'])
return meta, r.content
def sync(db,key):
# REVIEW - maybe backup HTML too?
base = os.path.expanduser('~/Dropbox/Backup/paper')
meta,md = db.download('/paper/docs/download', doc_id=key, export_format='markdown')
_,html = db.download('/paper/docs/download', doc_id=key, export_format='html')
os.makedirs(oj(base,key,'media'), exist_ok=True)
mf = oj(base,key,'meta.json')
if os.path.exists(mf):
prev = json.load(open(mf))
print(key, meta['revision'], 'previous', prev['revision'], repr(meta['title']))
if prev['revision'] == meta['revision']:
return False
print('write',key,meta.get('title'))
open(oj(base,key,'content.md'),'wb').write(md)
open(oj(base,key,'content.html'),'wb').write(html)
json.dump(meta,open(mf,'w'),indent=True)
saveinfo(db,key)
return True
def saveinfo(db,key):
print("save info",key)
info = db.post('/paper/docs/get_folder_info',doc_id=key)
base = os.path.expanduser('~/Dropbox/Backup/paper')
infofn = oj(base,key,'folder.json')
json.dump(info,open(infofn,"w",encoding="utf8"), indent=True)
def getmedia(key):
base = os.path.expanduser('~/Dropbox/Backup/paper')
infofn = oj(base,key,'folder.json')
if not os.path.exists(infofn):
saveinfo(db,key)
md = open(oj(base,key,'content.md'),encoding='utf8').read()
for m in re.finditer(r'!\[.*?\]\((.*?)\)', md):
url = m.group(1)
bn = os.path.basename(url)
bn = bn.split('?')[0]
fn = oj(base,key,'media',bn)
if not os.path.exists(fn):
print(key, bn, url)
if '://' not in url:
print("bad url",url)
continue
r = requests.get(url)
assert r.ok
open(fn,'wb').write(r.content)
if __name__ == '__main__':
import glob, subprocess
# Backup dropbox paper
db = DBox()
# If you have a lot of docs, you'll need to deal with the continuation stuff here
docs = db.post('/paper/docs/list',sort_by="modified", sort_order='descending')
for key in docs['doc_ids']:
if not sync(db,key):
break
base = os.path.expanduser('~/Dropbox/Backup/paper')
for fn in glob.glob(os.path.expanduser('~/Dropbox/Backup/paper/*')):
if os.path.isdir(fn):
key = os.path.basename(fn)
getmedia(key)
if not os.path.exists(oj(base,key,'content.html')) and not os.path.exists(oj(base,key,'DELETED')):
print("backfill html",key)
try:
_,html = db.download('/paper/docs/download', doc_id=key, export_format='html')
open(oj(base,key,'content.html'),'wb').write(html)
except:
open(oj(base,key,'DELETED'),'wb')
os.chdir(os.path.expanduser("~/Dropbox/backup/paper"))
subprocess.run("git add .".split())
subprocess.run("git commit -am checkpoint".split())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment