Created
March 14, 2025 06:56
-
-
Save EKami/74ebd6c64b23931cf792a5b98f05aa30 to your computer and use it in GitHub Desktop.
Darwin script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import click | |
import tempfile | |
import os | |
from datetime import datetime | |
from darwin.client import Client | |
from darwin.importer import get_importer | |
import darwin.importer as importer | |
from darwin.datatypes import AnnotationClass | |
def add_darwin_class(dataset, name, annotation_type): | |
if annotation_type not in [ | |
'bounding_box', 'polygon', 'tag', 'line', | |
'keypoint', 'mask', 'raster_layer', 'dicom' | |
]: | |
raise ValueError(f"{annotation_type} is not a valid annotation type") | |
new_class = AnnotationClass( | |
name=name, | |
annotation_type=annotation_type | |
) | |
response = dataset.add_annotation_class(new_class) | |
if response.status_code != 201: | |
raise RuntimeError("Could not add annotation") | |
def get_darwin_item(dataset, slide_filename): | |
items = dataset.fetch_remote_files( | |
filters={"types": "image"}, | |
) | |
for item in items: | |
if item.filename == slide_filename: | |
return { | |
"name": slide_filename, | |
"path": item.path, | |
"source_info": { | |
"item_id": item.id, | |
} | |
} | |
return None | |
def register_annotations_in_darwin(api_key, dataset_slug, slide_name, annotations_json_filepath): | |
# https://docs.v7labs.com/docs/import-annotations-made-outside-of-v7 | |
try: | |
with open(annotations_json_filepath) as annotations_json_file: | |
annotations_json = json.load(annotations_json_file) | |
except json.decoder.JSONDecodeError as e: | |
raise RuntimeError(f"Could not decode annotations_json to JSON {e}") | |
print(f"Registering annotations items in Darwin...") | |
client = Client.from_api_key(api_key) | |
dataset = client.get_remote_dataset(dataset_identifier=dataset_slug) | |
# Ensures the slide exists in Darwin first | |
darwin_item = get_darwin_item(dataset, slide_name) | |
if darwin_item is None: | |
raise RuntimeError(f"Could not find a Darwin slide ID for {dataset_slug}") | |
final_json = { | |
"version": "2.0", | |
"item": darwin_item, | |
"annotations": annotations_json, | |
} | |
parser = get_importer("darwin") | |
with tempfile.NamedTemporaryFile(mode='w', suffix=".json", delete=False) as tmp_file: | |
tmp_file.write(json.dumps(final_json)) | |
tmp_file.flush() # Ensure data is written to disk | |
# FIXME: How do I properly capture the error output out of this function | |
# without having to rely on patchy checkers such as checking the stdout and stderr | |
# for the "error" word? | |
importer.import_annotations( | |
dataset, parser, [tmp_file.name], append=True, | |
overwrite=True, class_prompt=False | |
) | |
# TODO fix the return value based on the response from `importer.import_annotation` | |
return '{"success": "true"}}' | |
@click.command(help='Register AI annotations in V7') | |
@click.option( | |
'--darwin_api_key', type=str, | |
help='The API key to use to authenticate to Darwin V7' | |
) | |
@click.option( | |
'--slide_name', type=str, required=True, | |
help='The slide name to register the annotation to' | |
) | |
@click.option( | |
'--dataset_slug', default="auto-annotation-test", type=str | |
) | |
# Example of file input: | |
# [ | |
# { | |
# "name": "Tumor (AI generated)", | |
# "polygon": { | |
# "paths": [ | |
# [ | |
# {"x": 100.5, "y": 200.3}, | |
# {"x": 150.2, "y": 180.7}, | |
# {"x": 200.8, "y": 220.1}, | |
# {"x": 180.4, "y": 250.9}, | |
# {"x": 120.6, "y": 240.2} | |
# ] | |
# ] | |
# }, | |
# "bounding_box": { | |
# "x": 100.5, | |
# "y": 180.7, | |
# "w": 100.3, | |
# "h": 70.2 | |
# }, | |
# "inference": { | |
# "confidence": 0.95, | |
# "model": { | |
# "id": "1", | |
# "name": "auto-annotation-prognostics-ai", | |
# "type": "external" | |
# } | |
# }, | |
# "text": { | |
# "text": "AI generated" | |
# } | |
# } | |
# ] | |
@click.option( | |
'--annotations_json_filepath', | |
type=click.Path(exists=True), required=True, | |
help='A path to annotations array in the Darwin format. Only the "annotations" object is required here' | |
) | |
def main(darwin_api_key, slide_name, dataset_slug, annotations_json_filepath): | |
start_time = datetime.now() | |
output = register_annotations_in_darwin(darwin_api_key, dataset_slug, slide_name, annotations_json_filepath) | |
print(f"Total time spend: (hh:mm:ss.ms) {datetime.now() - start_time}\n") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment