Created
January 15, 2021 17:10
-
-
Save dunhamsteve/c471105fca75af9c427a0f4c8cabe55a to your computer and use it in GitHub Desktop.
Script to back up dropbox paper
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/python3 | |
# Backup dropbox paper. | |
# This expects a ~/.auth/dropbox file that contains your dropbox bearer token. | |
import requests, os, json, re | |
from pprint import pprint | |
oj = os.path.join | |
class DBox(object): | |
def __init__(self): | |
self.base = "https://api.dropboxapi.com/2" | |
self.key = open(os.path.expanduser('~/.auth/dropbox')).read().strip() | |
self.headers = {'Authorization': f"Bearer {self.key}"} | |
def post(self, path, **args): | |
if path[0] != '/': | |
path = '/'+path | |
r = requests.post(self.base+path, json=args, headers=self.headers) | |
if not r.ok: | |
print(r.text) | |
assert r.ok | |
return r.json() | |
def download(self, path, **args): | |
headers = dict(self.headers) | |
headers['Dropbox-API-Arg'] = json.dumps(args) | |
r = requests.post(self.base+path, headers=headers) | |
if not r.ok: | |
print(r.headers) | |
print(r.text) | |
assert r.ok | |
meta = json.loads(r.headers['Dropbox-Api-Result']) | |
return meta, r.content | |
def sync(db,key): | |
# REVIEW - maybe backup HTML too? | |
base = os.path.expanduser('~/Dropbox/Backup/paper') | |
meta,md = db.download('/paper/docs/download', doc_id=key, export_format='markdown') | |
_,html = db.download('/paper/docs/download', doc_id=key, export_format='html') | |
os.makedirs(oj(base,key,'media'), exist_ok=True) | |
mf = oj(base,key,'meta.json') | |
if os.path.exists(mf): | |
prev = json.load(open(mf)) | |
print(key, meta['revision'], 'previous', prev['revision'], repr(meta['title'])) | |
if prev['revision'] == meta['revision']: | |
return False | |
print('write',key,meta.get('title')) | |
open(oj(base,key,'content.md'),'wb').write(md) | |
open(oj(base,key,'content.html'),'wb').write(html) | |
json.dump(meta,open(mf,'w'),indent=True) | |
saveinfo(db,key) | |
return True | |
def saveinfo(db,key): | |
print("save info",key) | |
info = db.post('/paper/docs/get_folder_info',doc_id=key) | |
base = os.path.expanduser('~/Dropbox/Backup/paper') | |
infofn = oj(base,key,'folder.json') | |
json.dump(info,open(infofn,"w",encoding="utf8"), indent=True) | |
def getmedia(key): | |
base = os.path.expanduser('~/Dropbox/Backup/paper') | |
infofn = oj(base,key,'folder.json') | |
if not os.path.exists(infofn): | |
saveinfo(db,key) | |
md = open(oj(base,key,'content.md'),encoding='utf8').read() | |
for m in re.finditer(r'!\[.*?\]\((.*?)\)', md): | |
url = m.group(1) | |
bn = os.path.basename(url) | |
bn = bn.split('?')[0] | |
fn = oj(base,key,'media',bn) | |
if not os.path.exists(fn): | |
print(key, bn, url) | |
if '://' not in url: | |
print("bad url",url) | |
continue | |
r = requests.get(url) | |
assert r.ok | |
open(fn,'wb').write(r.content) | |
if __name__ == '__main__': | |
import glob, subprocess | |
# Backup dropbox paper | |
db = DBox() | |
# If you have a lot of docs, you'll need to deal with the continuation stuff here | |
docs = db.post('/paper/docs/list',sort_by="modified", sort_order='descending') | |
for key in docs['doc_ids']: | |
if not sync(db,key): | |
break | |
base = os.path.expanduser('~/Dropbox/Backup/paper') | |
for fn in glob.glob(os.path.expanduser('~/Dropbox/Backup/paper/*')): | |
if os.path.isdir(fn): | |
key = os.path.basename(fn) | |
getmedia(key) | |
if not os.path.exists(oj(base,key,'content.html')) and not os.path.exists(oj(base,key,'DELETED')): | |
print("backfill html",key) | |
try: | |
_,html = db.download('/paper/docs/download', doc_id=key, export_format='html') | |
open(oj(base,key,'content.html'),'wb').write(html) | |
except: | |
open(oj(base,key,'DELETED'),'wb') | |
os.chdir(os.path.expanduser("~/Dropbox/backup/paper")) | |
subprocess.run("git add .".split()) | |
subprocess.run("git commit -am checkpoint".split()) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment