Created
October 16, 2024 17:42
-
-
Save sardinecan/c4c6efcf043545a0bad24b6f56b73c21 to your computer and use it in GitHub Desktop.
Découpage d'images avec Julia
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#= | |
Banaka Split | |
Ce script Julia a été réalisé dans le cadre de l'[ANR Experts](https://experts.huma-num.fr/) afin | |
d'évaluer des méthodes de découpage automatique de photographies comportant des double pages et accessibles | |
depuis [Nakala](https://nakala.fr/). | |
Fichier image : https://api.nakala.fr/data/10.34847/nkl.027b840e/5c8e77a046216ab6aed848b2f781deb9495fea76 | |
**Note** une clé API est requise pour récupérer les images depuis l'[API Nakala](https://api.nakala.fr/). | |
=# | |
# chargement des paquets | |
using CSV | |
using DataFrames | |
using ImageSegmentation | |
using HTTP | |
using JSON | |
using Dates | |
using FileIO | |
# identifiants | |
path = @__DIR__ | |
credentials = CSV.read(joinpath(path, "credentials.csv"), DataFrame, header=1) #liste des utilisateurs | |
user = "tnakala" #choix de l'utilisateur (api test = tnakala) | |
usrCredentials = filter(:user => n -> n == user, credentials) #récupération des identifiants | |
apiKey = usrCredentials[1, :apikey] #clé API | |
# select API | |
apitest = true | |
if apitest == true | |
apiurl = "https://apitest.nakala.fr" | |
else | |
apiurl = "https://api.nakala.fr" | |
end | |
# Chercher un fichier précis dans une donnée | |
title = "Z1J434" # titre de la donnée | |
lang = "" # langue du titre, option | |
# @todo faire un range | |
firstImage = 1 # première image | |
lastImage = 2 # dernière image | |
scope = ["deposited", "owned", "shared", "editable", "readable", "all"] | |
s = scope[1] | |
url = joinpath(apiurl, "users", "datas", s) | |
headers = Dict( | |
"X-API-KEY" => apiKey, | |
"Content-Type" => "application/json" | |
) | |
body = Dict( | |
:page => 1, | |
:limit => 100, | |
:titleSearch => title, | |
:titleSearchLang => lang | |
) | |
userDatas = HTTP.request("POST", url, headers, JSON.json(body)) | |
userDatasResponse = JSON.parse(String(HTTP.payload(userDatas))) # réponse du server | |
datas = get(userDatasResponse, "data", "nothing") | |
data = datas[1] # il peut y avoir plusieurs résultats, on prend le premier… | |
identifier = get(datas[1], "identifier", "") | |
files = get(data, "files", "") | |
# Création d'un array pour les images | |
imgs = Vector() | |
for i in firstImage:lastImage | |
file = files[i] | |
fileName = get(file, "name", "unknown") | |
fileIdentifier = get(file, "sha1", "unknown") | |
fileUrl = joinpath(apiurl, "data", identifier, fileIdentifier) | |
img = Dict( | |
"url" => fileUrl, | |
"apiurl" => apiurl, | |
"dataIdentifier" => identifier, | |
"fileIdentifier" => fileIdentifier, | |
"label" => fileName | |
) | |
push!(imgs, img) | |
end | |
imgs | |
# Split() est une fonction qui scinde les images en deux et enregistre les images découpées. | |
function split(label, file, s) | |
middle = s[2]/2 | |
l = floor(Int, middle+100) | |
r = floor(Int, middle-100) | |
leftPart = @view file[ :, 1:l] | |
rightPart = @view file[ :, r:s[2]] | |
save(string(splitext(label)[1], "-l", splitext(label)[2]), leftPart) | |
save(string(splitext(label)[1], "-r", splitext(label)[2]), rightPart) | |
end | |
for img in imgs | |
label = get(img, "label", "unknown") | |
file = download(get(img, "url", "")) |> load | |
s = size(file) | |
file = rot180(file) | |
if s[1] < s[2] | |
split(label, file, s) | |
else | |
save(label, file) | |
end | |
end | |
# spliiit() a recours à des algorithmes de segmentation pour tenter de "repérer" les pages et déterminer leurs coordonnées, afin d'y accéder via l'API IIIF de Nakala. | |
function spliiit(image) | |
img = download(get(image, "url", "")) |> load | |
segments = felzenszwalb(img, 5000, 300000) # le deuxième argument permet de supprimer les segments les plus petits | |
segMap = labels_map(segments) | |
pixel = sort(segment_pixel_count(segments); byvalue=true) | |
v = collect(values(pixel)) | |
k = collect(keys(pixel)) | |
l = length(collect(values(pixel))) | |
vec = Vector() | |
for i in l-1:l | |
coordinates = findall(x -> x == k[i], segMap) | |
ymin, ymax = extrema(c -> c[1], coordinates) | |
h = ymax - ymin | |
xmin, xmax = extrema(c -> c[2], coordinates) | |
w = xmax - xmin | |
region = joinpath(get(image, "apiurl", ""), "iiif", get(image, "dataIdentifier", ""), get(image, "fileIdentifier", ""), "$xmin,$ymin,$w,$h/full/0/default.jpg") | |
push!(vec, region) | |
end | |
return vec | |
end | |
t = Vector() | |
for img in urls | |
push!(t, spliiit(img)) | |
end | |
t |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Méthodes