Last active
August 23, 2022 15:31
-
-
Save jaor/4fd9cf9f8d619717af0997ea021cc566 to your computer and use it in GitHub Desktop.
dataset to image composite
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"name": "dataset to image composite", | |
"kind": "script", | |
"description": "Transform back a dataset with images to a source composite", | |
"source_code": "script.whizzml", | |
"inputs": [ | |
{ | |
"name": "dataset", | |
"type": "dataset-id", | |
"description": "The input dataset" | |
}, | |
{ | |
"name": "fields", | |
"type": "list", | |
"default": [], | |
"description": "A list of fields to add as label fields" | |
}, | |
{ | |
"name": "component-batch-size", | |
"type": "number", | |
"default": 100, | |
"description": "Components are added in batches of this size" | |
}, | |
{ | |
"name": "row-value-batch-size", | |
"type": "number", | |
"default": 5, | |
"description": "Values for labels are updated in batches of this size" | |
} | |
], | |
"outputs": [ | |
{ | |
"name": "source", | |
"type": "source-id", | |
"description": "The resulting composite" | |
} | |
] | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(define (unique-name names name idx) | |
(let (name (str name " (" idx ")")) | |
(if (member? name names) (unique-name names name (+ 1 idx)) name))) | |
(define (with-unique-label-names fds) | |
(let (names (map (lambda (f) (f "name")) fds)) | |
(if (= (count (set* names)) (count names)) | |
(map (lambda (f) (assoc f "label_name" (f "name"))) fds) | |
(let (labels | |
(reduce (lambda (ns f) | |
(let (n (f "name") | |
p (str n " (" (f "provenance" "2") ")")) | |
(cond (not (member? n ns)) (append ns n) | |
(not (member? p ns)) (append ns p) | |
(unique-name ns n 3)))) | |
[] fds)) | |
(map (lambda (f n) (assoc f "label_name" n)) fds labels))))) | |
(define (auto-generated? f) (f "auto_generated" false)) | |
(define (remove-image-features fds im-name) | |
(let (im-rx (str "^" (re-quote im-name) "\\..+")) | |
(filter (lambda (f) (not (matches? im-rx (f "name")))) fds))) | |
(define (sample-fields dataset fields) | |
(let (fds (resource-fields dataset) | |
imgs (filter (lambda (f) (or (image-field? f) (text-field? f))) | |
(values fds)) | |
img (if (= 1 (count imgs)) | |
(head imgs) | |
(raise "The input dataset must contain a single image field")) | |
fields (if (empty? fields) | |
(values fds) | |
(map (lambda (f) (find-field fds f)) fields)) | |
labels (filter (lambda (f) | |
(and (not (auto-generated? f)) | |
(not (image-field? f)) | |
(not (path-field? f)))) | |
fields) | |
labels (remove-image-features labels (img "name"))) | |
(with-unique-label-names (cons img labels)))) | |
(define (rows-to-values rows names) | |
(iterate (res [] row rows) | |
(let (id (head row) | |
vs (map (lambda (v name) | |
{"field" name "value" (or v "") "components" [id]}) | |
(tail row) | |
names)) | |
(if (empty? id) res (concat res vs))))) | |
(define (update-row-values src vs) | |
(when (not (empty? vs)) | |
(update-and-wait (wait src) {"row_values" (take row-value-batch-size vs)}) | |
(update-row-values src (drop row-value-batch-size vs)))) | |
(define (add-rows composite sample names offset max-rows) | |
(log-progress (+ 0.1 (* 0.85 (- 1 (/ (- max-rows offset) max-rows))))) | |
(if (< offset max-rows) | |
(let (size (max row-value-batch-size component-batch-size) | |
s (fetch sample {"mode" "linear" "row_offset" offset "rows" size}) | |
rows (s ["sample" "rows"] [])) | |
(when (not (empty? rows)) | |
(log-info "Adding labels [" (+ 1 offset) | |
", " (min (+ offset size) max-rows) "]") | |
(update-row-values composite (rows-to-values rows names)) | |
(add-rows composite sample names (+ offset (count rows)) max-rows))) | |
composite)) | |
(define (components image-id sample offset limit) | |
(let (s (fetch sample | |
{"mode" "linear" | |
"row_offset" offset "rows" limit | |
"fields" [image-id]})) | |
(map head (s ["sample" "rows"] [])))) | |
(define (add-components composite iid sample offset max-rows) | |
(log-progress (* 0.1 (- 1 (/ (- max-rows offset) max-rows)))) | |
(when (< offset max-rows) | |
(log-info "Adding components [" (+ 1 offset) | |
", " (min max-rows (+ offset component-batch-size)) "]") | |
(let (srcs (components iid sample offset component-batch-size)) | |
(when (not (empty? srcs)) | |
(update-and-wait composite {"add_sources" srcs}) | |
(add-components composite iid sample (+ offset (count srcs)) max-rows))))) | |
(define (create-image-composite name fields sample max-rows) | |
(let (src (wait (create-source {"sources" [] "name" name})) | |
ifd (head fields) | |
lbs (map (lambda (f) {"name" (f "label_name") "optype" (f "optype")}) | |
(tail fields))) | |
(add-components src (ifd "id") sample 0 max-rows) | |
(wait (if (empty? lbs) src (update src {"new_fields" lbs}))))) | |
(define (create-editable-composite dataset fields) | |
(log-progress 0.0) | |
(let (fds (sample-fields dataset fields) | |
sample (create-sample {"dataset" dataset | |
"input_fields" (map (lambda (f) (f "id")) fds) | |
"temp" true}) | |
name (str "editable " (resource-name dataset)) | |
rows (resource-property (wait sample) "max_rows") | |
names (map (lambda (f) (f "label_name")) (tail fds))) | |
(log-info "Creating composite with " rows " components" | |
(if (empty? names) | |
"" | |
(str " and " (count names) " label fields " names))) | |
(add-rows (create-image-composite name fds sample rows) sample names 0 rows))) | |
(define source (create-editable-composite dataset fields)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment