Skip to content

Instantly share code, notes, and snippets.

@jphdotam
Last active November 1, 2024 15:32
Show Gist options
  • Save jphdotam/21581fc4a205072ecf30d2c0c846f117 to your computer and use it in GitHub Desktop.
Save jphdotam/21581fc4a205072ecf30d2c0c846f117 to your computer and use it in GitHub Desktop.
Migrate one Orthanc server to another using multiple threads
# this is inspired by the official ImportDicomFiles.py - however, this is MUCH faster because
# it uses multiple workers to dramatically reduce IO bottlenecks
# it doesn't re-instantiate the Http() and headers for every file, rather every folder
# it does assume the input folders are structured in the orthanc format, e.g. /00/00/0000a8768bd86...
# and expects PATH_TO_SYNC to be something like:
## "/path/to/orthanc/root/*" for all root folders
## "/path/to/orthanc/root/1*" for all folders beginning with 1
## "/path/to/orthanc/root/23" for root folder 23 only
# It also has a couple of features, including
## keeping a very simple log of folders that have synced (as files in SUCCESS_DIR) so these can be skipped if the session is restarted
## keeping a very simple log of failed syncs (as files in FAILS_DIR) where the contents of each file is the error
## the ability to delete fails that have successfully synced via the DELETE_SYNCED_FILES
import base64
import httplib2
import json
import os
import os.path
import sys
import multiprocessing
from tqdm import tqdm
from glob import glob
from collections import defaultdict
SERVER_PATH = "localhost"
PORT = "8042"
PATH_TO_SYNC = "/path/to/orthanc/root/*" # the folder containing 00,0a,0b...ff
USERNAME = "username" # orthanc username
PASSWORD = "password" # orthanc password
URL = f'http://{SERVER_PATH}:{PORT}/instances'
N_WORKERS = 4 # number of concurrent connections
FAILS_DIR = "./fails/" # Log failed transfers to be retried later
SUCCESS_DIR = "./success/" # Log of successful transfers so can resume later
DELETE_SYNCED_FILES = False
def is_json(content):
try:
json.loads(content.decode())
return True
except:
return False
def export_helper(study):
dicom_dir, dicom_files = study
h = httplib2.Http()
headers = { 'content-type' : 'application/dicom' }
creds_str = USERNAME + ':' + PASSWORD
creds_str_bytes = creds_str.encode('ascii')
creds_str_bytes_b64 = b'Basic ' + base64.b64encode(creds_str_bytes)
headers['authorization'] = creds_str_bytes_b64.decode('ascii')
complete_path = os.path.join(SUCCESS_DIR, f"{os.path.basename(os.path.dirname(dicom_dir))}_{os.path.basename(dicom_dir)}.complete")
if os.path.exists(complete_path):
print(f"Skipping {os.path.basename(complete_path)}")
return
for dicom_file in tqdm(dicom_files):
try:
with open(dicom_file, 'rb') as f:
content = f.read()
if is_json(content):
if DELETE_SYNCED_FILES:
os.remove(dicom_file)
continue
resp, content = h.request(URL, 'POST',
body = content,
headers = headers)
if resp.status != 200:
raise ValueError(f'ERROR ({resp.status})\n\t{content}')
elif DELETE_SYNCED_FILES:
os.remove(dicom_file)
except Exception as e:
print(f"ERROR WITH {dicom_file} ({e})")
with open(os.path.join(FAILS_DIR, os.path.basename(dicom_file)) + '.fail', 'w') as f:
f.write(f"{e}")
pass
with open(complete_path, 'w') as f:
pass
if __name__ == "__main__":
os.makedirs(SUCCESS_DIR, exist_ok=True)
os.makedirs(FAILS_DIR, exist_ok=True)
root_folders = sorted(glob(os.path.join(PATH_TO_SYNC)))
print(f"Found {len(root_folders)} root folders")
for root_path in root_folders:
files = glob(os.path.join(root_path, "**/*"), recursive=True)
print(f"({os.path.basename(root_path)}): found {len(files)} files, processing...")
dicoms_by_dir = defaultdict(list)
for file in tqdm(files):
# for file in files:
if not os.path.isdir(file):
dicoms_by_dir[os.path.dirname(file)].append(file)
print(f"Found {len(dicoms_by_dir)} folders")
with multiprocessing.Pool(N_WORKERS) as p:
for _ in tqdm(p.imap(export_helper, dicoms_by_dir.items()), total=len(dicoms_by_dir)):
pass
@sscotti
Copy link

sscotti commented Dec 12, 2022

I replaced 'from glob import glob' with 'import glob2 # python3 -m pip install glob2' and made some other mods because I have python3 on a Mac with homebrew, e.g.

'root_folders = sorted(glob2.glob(os.path.join(PATH_TO_SYNC)))' and
'files = glob2.glob(os.path.join(root_path, "**/*"), recursive=True)'

and it seems to work fine when using http for the target, although the success and fail directories get created relative to the working directory, not necessarily the script, although that is easy to fix.

When the target is https, I am seeing an issue like this:

ERROR WITH /Users/sscotti/Desktop/ImportSample/IMAGES/IM10 ([SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:997))

which I presume is related to my python3 setup on the Mac with homebrew. Haven't quite figure out how to fix that.

@ibby360
Copy link

ibby360 commented Feb 20, 2023

@sscotti I got the same error using python3.10 but I changed the version of python to Python3.9 and it worked fine.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment