Last active
November 1, 2024 15:32
-
-
Save jphdotam/21581fc4a205072ecf30d2c0c846f117 to your computer and use it in GitHub Desktop.
Migrate one Orthanc server to another using multiple threads
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# this is inspired by the official ImportDicomFiles.py - however, this is MUCH faster because | |
# it uses multiple workers to dramatically reduce IO bottlenecks | |
# it doesn't re-instantiate the Http() and headers for every file, rather every folder | |
# it does assume the input folders are structured in the orthanc format, e.g. /00/00/0000a8768bd86... | |
# and expects PATH_TO_SYNC to be something like: | |
## "/path/to/orthanc/root/*" for all root folders | |
## "/path/to/orthanc/root/1*" for all folders beginning with 1 | |
## "/path/to/orthanc/root/23" for root folder 23 only | |
# It also has a couple of features, including | |
## keeping a very simple log of folders that have synced (as files in SUCCESS_DIR) so these can be skipped if the session is restarted | |
## keeping a very simple log of failed syncs (as files in FAILS_DIR) where the contents of each file is the error | |
## the ability to delete fails that have successfully synced via the DELETE_SYNCED_FILES | |
import base64 | |
import httplib2 | |
import json | |
import os | |
import os.path | |
import sys | |
import multiprocessing | |
from tqdm import tqdm | |
from glob import glob | |
from collections import defaultdict | |
SERVER_PATH = "localhost" | |
PORT = "8042" | |
PATH_TO_SYNC = "/path/to/orthanc/root/*" # the folder containing 00,0a,0b...ff | |
USERNAME = "username" # orthanc username | |
PASSWORD = "password" # orthanc password | |
URL = f'http://{SERVER_PATH}:{PORT}/instances' | |
N_WORKERS = 4 # number of concurrent connections | |
FAILS_DIR = "./fails/" # Log failed transfers to be retried later | |
SUCCESS_DIR = "./success/" # Log of successful transfers so can resume later | |
DELETE_SYNCED_FILES = False | |
def is_json(content): | |
try: | |
json.loads(content.decode()) | |
return True | |
except: | |
return False | |
def export_helper(study): | |
dicom_dir, dicom_files = study | |
h = httplib2.Http() | |
headers = { 'content-type' : 'application/dicom' } | |
creds_str = USERNAME + ':' + PASSWORD | |
creds_str_bytes = creds_str.encode('ascii') | |
creds_str_bytes_b64 = b'Basic ' + base64.b64encode(creds_str_bytes) | |
headers['authorization'] = creds_str_bytes_b64.decode('ascii') | |
complete_path = os.path.join(SUCCESS_DIR, f"{os.path.basename(os.path.dirname(dicom_dir))}_{os.path.basename(dicom_dir)}.complete") | |
if os.path.exists(complete_path): | |
print(f"Skipping {os.path.basename(complete_path)}") | |
return | |
for dicom_file in tqdm(dicom_files): | |
try: | |
with open(dicom_file, 'rb') as f: | |
content = f.read() | |
if is_json(content): | |
if DELETE_SYNCED_FILES: | |
os.remove(dicom_file) | |
continue | |
resp, content = h.request(URL, 'POST', | |
body = content, | |
headers = headers) | |
if resp.status != 200: | |
raise ValueError(f'ERROR ({resp.status})\n\t{content}') | |
elif DELETE_SYNCED_FILES: | |
os.remove(dicom_file) | |
except Exception as e: | |
print(f"ERROR WITH {dicom_file} ({e})") | |
with open(os.path.join(FAILS_DIR, os.path.basename(dicom_file)) + '.fail', 'w') as f: | |
f.write(f"{e}") | |
pass | |
with open(complete_path, 'w') as f: | |
pass | |
if __name__ == "__main__": | |
os.makedirs(SUCCESS_DIR, exist_ok=True) | |
os.makedirs(FAILS_DIR, exist_ok=True) | |
root_folders = sorted(glob(os.path.join(PATH_TO_SYNC))) | |
print(f"Found {len(root_folders)} root folders") | |
for root_path in root_folders: | |
files = glob(os.path.join(root_path, "**/*"), recursive=True) | |
print(f"({os.path.basename(root_path)}): found {len(files)} files, processing...") | |
dicoms_by_dir = defaultdict(list) | |
for file in tqdm(files): | |
# for file in files: | |
if not os.path.isdir(file): | |
dicoms_by_dir[os.path.dirname(file)].append(file) | |
print(f"Found {len(dicoms_by_dir)} folders") | |
with multiprocessing.Pool(N_WORKERS) as p: | |
for _ in tqdm(p.imap(export_helper, dicoms_by_dir.items()), total=len(dicoms_by_dir)): | |
pass |
@sscotti I got the same error using python3.10 but I changed the version of python to Python3.9 and it worked fine.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I replaced 'from glob import glob' with 'import glob2 # python3 -m pip install glob2' and made some other mods because I have python3 on a Mac with homebrew, e.g.
'root_folders = sorted(glob2.glob(os.path.join(PATH_TO_SYNC)))' and
'files = glob2.glob(os.path.join(root_path, "**/*"), recursive=True)'
and it seems to work fine when using http for the target, although the success and fail directories get created relative to the working directory, not necessarily the script, although that is easy to fix.
When the target is https, I am seeing an issue like this:
ERROR WITH /Users/sscotti/Desktop/ImportSample/IMAGES/IM10 ([SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:997))
which I presume is related to my python3 setup on the Mac with homebrew. Haven't quite figure out how to fix that.