Skip to content

Instantly share code, notes, and snippets.

@jphdotam
Last active November 1, 2024 15:32
Show Gist options
  • Save jphdotam/21581fc4a205072ecf30d2c0c846f117 to your computer and use it in GitHub Desktop.
Save jphdotam/21581fc4a205072ecf30d2c0c846f117 to your computer and use it in GitHub Desktop.
Migrate one Orthanc server to another using multiple threads
# this is inspired by the official ImportDicomFiles.py - however, this is MUCH faster because
# it uses multiple workers to dramatically reduce IO bottlenecks
# it doesn't re-instantiate the Http() and headers for every file, rather every folder
# it does assume the input folders are structured in the orthanc format, e.g. /00/00/0000a8768bd86...
# and expects PATH_TO_SYNC to be something like:
## "/path/to/orthanc/root/*" for all root folders
## "/path/to/orthanc/root/1*" for all folders beginning with 1
## "/path/to/orthanc/root/23" for root folder 23 only
# It also has a couple of features, including
## keeping a very simple log of folders that have synced (as files in SUCCESS_DIR) so these can be skipped if the session is restarted
## keeping a very simple log of failed syncs (as files in FAILS_DIR) where the contents of each file is the error
## the ability to delete fails that have successfully synced via the DELETE_SYNCED_FILES
import base64
import httplib2
import json
import os
import os.path
import sys
import multiprocessing
from tqdm import tqdm
from glob import glob
from collections import defaultdict
SERVER_PATH = "localhost"
PORT = "8042"
PATH_TO_SYNC = "/path/to/orthanc/root/*" # the folder containing 00,0a,0b...ff
USERNAME = "username" # orthanc username
PASSWORD = "password" # orthanc password
URL = f'http://{SERVER_PATH}:{PORT}/instances'
N_WORKERS = 4 # number of concurrent connections
FAILS_DIR = "./fails/" # Log failed transfers to be retried later
SUCCESS_DIR = "./success/" # Log of successful transfers so can resume later
DELETE_SYNCED_FILES = False
def is_json(content):
try:
json.loads(content.decode())
return True
except:
return False
def export_helper(study):
dicom_dir, dicom_files = study
h = httplib2.Http()
headers = { 'content-type' : 'application/dicom' }
creds_str = USERNAME + ':' + PASSWORD
creds_str_bytes = creds_str.encode('ascii')
creds_str_bytes_b64 = b'Basic ' + base64.b64encode(creds_str_bytes)
headers['authorization'] = creds_str_bytes_b64.decode('ascii')
complete_path = os.path.join(SUCCESS_DIR, f"{os.path.basename(os.path.dirname(dicom_dir))}_{os.path.basename(dicom_dir)}.complete")
if os.path.exists(complete_path):
print(f"Skipping {os.path.basename(complete_path)}")
return
for dicom_file in tqdm(dicom_files):
try:
with open(dicom_file, 'rb') as f:
content = f.read()
if is_json(content):
if DELETE_SYNCED_FILES:
os.remove(dicom_file)
continue
resp, content = h.request(URL, 'POST',
body = content,
headers = headers)
if resp.status != 200:
raise ValueError(f'ERROR ({resp.status})\n\t{content}')
elif DELETE_SYNCED_FILES:
os.remove(dicom_file)
except Exception as e:
print(f"ERROR WITH {dicom_file} ({e})")
with open(os.path.join(FAILS_DIR, os.path.basename(dicom_file)) + '.fail', 'w') as f:
f.write(f"{e}")
pass
with open(complete_path, 'w') as f:
pass
if __name__ == "__main__":
os.makedirs(SUCCESS_DIR, exist_ok=True)
os.makedirs(FAILS_DIR, exist_ok=True)
root_folders = sorted(glob(os.path.join(PATH_TO_SYNC)))
print(f"Found {len(root_folders)} root folders")
for root_path in root_folders:
files = glob(os.path.join(root_path, "**/*"), recursive=True)
print(f"({os.path.basename(root_path)}): found {len(files)} files, processing...")
dicoms_by_dir = defaultdict(list)
for file in tqdm(files):
# for file in files:
if not os.path.isdir(file):
dicoms_by_dir[os.path.dirname(file)].append(file)
print(f"Found {len(dicoms_by_dir)} folders")
with multiprocessing.Pool(N_WORKERS) as p:
for _ in tqdm(p.imap(export_helper, dicoms_by_dir.items()), total=len(dicoms_by_dir)):
pass
@ibby360
Copy link

ibby360 commented Feb 20, 2023

@sscotti I got the same error using python3.10 but I changed the version of python to Python3.9 and it worked fine.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment