sed -i 's,imageFilename=",imageFilename="page/",' page/*.xml
ocrd workspace init
ocrd workspace bulk-add \
--ignore \
--regex '^.*/(?P<fileGrp>[^/]+)/altstrelitz_friedregister(?P<pageid>.*)\.(?P<ext>[^\.]*)$' \
--file-id 'FILE_{{ fileGrp }}_{{ pageid }}' \
--page-id 'PHYS_{{ pageid }}' \
--file-grp "{{ fileGrp }}" \
--url '{{ fileGrp }}/altstrelitz_friedregister{{ pageid }}.{{ ext }}' page/*.xml jpg/*.jpg
(must be a separate step because both PAGE and ALTO use .xml
)
ocrd workspace bulk-add \
--ignore \
--regex '^.*/(?P<fileGrp>[^/]+)/altstrelitz_friedregister(?P<pageid>.*)\.(?P<ext>[^\.]*)$' \
--file-id 'FILE_{{ fileGrp }}_{{ pageid }}' \
--page-id 'PHYS_{{ pageid }}' \
--file-grp "{{ fileGrp }}" \
--mimetype "application/alto+xml" \
--url '{{ fileGrp }}/altstrelitz_friedregister{{ pageid }}.{{ ext }}' alto/*.xml