Last active
July 18, 2018 23:02
-
-
Save mark-cooper/6ca2a5f3604ee90321818088f23c3b21 to your computer and use it in GitHub Desktop.
ArchivesSpace: basic script to download exported files to `exports` folder in current directory.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
''' | |
Basic script to download exported files to `exports` folder in current | |
directory. It will only download if a file does not exist or the remote | |
file is updated and "newer" than an existing local file. A remote file | |
with deleted status will be removed if it exists in the local directory. | |
SETUP: | |
python3 required [adjust python3 and pip3 refs per system setup] | |
pip3 install python-dateutil | |
pip3 install requests[security] | |
USAGE: | |
./exports.py \ | |
--location=https://archivesspace.lyrasistechnology.org/files/exports/ | |
./exports.py \ | |
--location=https://archivesspace.lyrasistechnology.org/files/exports/ \ | |
--manifest=manifest_ead_pdf.csv | |
# basic auth | |
./exports.py \ | |
--location=https://archivesspace.lyrasistechnology.org/files/exports/ \ | |
--username=demo-files \ | |
--password=abc123 | |
''' | |
import csv | |
import os | |
import requests | |
import sys | |
import time | |
from argparse import ArgumentParser | |
from dateutil import parser as date_parser | |
from urllib.parse import urljoin | |
def parse_date(updated_at): | |
return int(date_parser.parse(updated_at).timestamp()) | |
parser = ArgumentParser() | |
parser.add_argument( | |
'-l', | |
'--location', | |
help='location (url) of exports folder', | |
default='https://archivesspace.lyrasistechnology.org/files/exports/' | |
) | |
parser.add_argument( | |
'-m', | |
'--manifest', | |
help='manifest', | |
default='manifest_ead_xml.csv' | |
) | |
parser.add_argument('-u', '--username', help='username', default='') | |
parser.add_argument('-p', '--password', help='password', default='') | |
cwd = os.getcwd() | |
outputd = os.path.join(cwd, 'exports') | |
# args parser | |
args = parser.parse_args() | |
manifest = args.manifest | |
manifest_url = urljoin(args.location, manifest) | |
username = args.username | |
password = args.password | |
if not os.path.exists(outputd): | |
os.makedirs(outputd) | |
print('Downloading manifest: %s' % manifest) | |
response = requests.get(manifest_url, auth=(username, password)) | |
if not response.ok: | |
sys.exit('Failed to download manifest: ' + manifest_url) | |
with open(os.path.join(outputd, manifest), 'wb') as f: | |
f.write(response.content) | |
with open(os.path.join(outputd, manifest), 'r') as csvfile: | |
reader = csv.DictReader(csvfile, quotechar='"') | |
for row in reader: | |
deleted = True if row['deleted'].lower() == 'true' else False | |
filename = row['filename'] | |
url = row['location'] | |
updated_at = parse_date(row["updated_at"]) | |
file = os.path.join(outputd, filename) | |
exists = os.path.exists(file) | |
if exists and deleted: | |
print('Deleting file: %s' % filename) | |
os.remove(file) | |
continue | |
mtime = os.path.getmtime( | |
file) if exists else time.mktime(time.gmtime(0)) | |
if int(mtime) < updated_at: | |
print('Downloading file: %s' % filename) | |
response = requests.get( | |
url, | |
auth=(username, password), | |
) | |
with open(file, 'wb') as f: | |
f.write(response.content) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment