Mix.install([
{:tesla, "~> 1.6"},
{:kino, "~> 0.9.2"},
{:nx, "~> 0.4"},
{:evision, "~> 0.1"}
])
image_url_input =
Kino.Input.text("image url",
default: "https://cdn.tontontresors.com/18ac8c54-81a3-4dd4-be18-01238d5d69dc.jpg"
)
x_input = Kino.Input.number("x", default: 0)
y_input = Kino.Input.number("y", default: 0)
kernel_size_input = Kino.Input.number("kernel size", default: 5)
deviation_input = Kino.Input.number("deviation", default: 0)
image_url = Kino.Input.read(image_url_input)
%{body: body} = Tesla.get!(image_url)
image_path = Path.join([System.tmp_dir!(), Path.basename(image_url)])
File.write!(image_path, body)
kernel_size = Kino.Input.read(kernel_size_input)
deviation = Kino.Input.read(deviation_input)
# Load original image
original = Evision.imread(image_path)
# Convert it to grayscale
gray = Evision.cvtColor(original, Evision.Constant.cv_COLOR_BGR2GRAY())
# Gaussian blur
blurred = Evision.gaussianBlur(gray, {kernel_size, kernel_size}, deviation)
# binarization with Evision.adaptiveThreshold
bw =
Evision.adaptiveThreshold(
blurred,
255,
Evision.Constant.cv_ADAPTIVE_THRESH_GAUSSIAN_C(),
Evision.Constant.cv_THRESH_BINARY(),
11,
2
)
# bitwise not
threshold = Evision.Mat.bitwise_not(bw)
# First thing first, we need to find all the contours in the thresholded image
{contours, _} =
Evision.findContours(
threshold,
Evision.Constant.cv_RETR_EXTERNAL(),
Evision.Constant.cv_CHAIN_APPROX_SIMPLE()
)
IO.puts("Find #{Enum.count(contours)} contour(s)")
# and our assumptions are that
# 1. the contour that contains the puzzle should be fairly large:
# hence we are sorting them by their area in descending order
contours =
Enum.sort_by(contours, fn c ->
-Evision.contourArea(c)
end)
IO.puts("area of the largest contour: #{Evision.contourArea(Enum.at(contours, 0))}")
# 2.the scarf should be a rectangular
# which means its contour should be approximately an rectangular
# which means the approximated polygonal of the contour should have 4 corners (keypoints)
# hence we will need to find the contour which its approximated polygonal's shape is {4, 1, 2}
scarf_keypoints =
Enum.reduce_while(contours, nil, fn c, _acc ->
peri = Evision.arcLength(c, true)
approx = Evision.approxPolyDP(c, 0.02 * peri, true)
case approx.shape do
{4, 1, 2} ->
{:halt, approx}
_ ->
{:cont, nil}
end
end)
drawed =
if scarf_keypoints do
IO.puts("Found scarf")
Evision.drawContours(original, [scarf_keypoints], -1, {0, 255, 0}, thickness: 2)
# Evision.drawContours(original, contours, -1, {0, 255, 0}, thickness: 2)
else
IO.puts("""
Could not find scarf outline.
Try debugging your thresholding and contour steps.
""")
end
x = Kino.Input.read(x_input)
y = Kino.Input.read(y_input)
# extract scarf
# this function will arrange the keypoints in the order discussed above
order_points = fn pts ->
# the top-left point will have the smallest sum, whereas
# the bottom-right point will have the largest sum
sum = Nx.sum(pts, axes: [1])
tl = pts[Nx.argmin(sum)] |> Nx.add(Nx.tensor([-x, -y]))
br = pts[Nx.argmax(sum)] |> Nx.add(Nx.tensor([x, y]))
# now, compute the difference between the points, the
# top-right point will have the smallest difference,
# whereas the bottom-left will have the largest difference
diff = Nx.subtract(pts[[0..3, 1]], pts[[0..3, 0]])
tr = pts[Nx.argmin(diff)] |> Nx.add(Nx.tensor([x, -y]))
bl = pts[Nx.argmax(diff)] |> Nx.add(Nx.tensor([-x, y]))
{tl, tr, br, bl}
end
input =
Evision.Mat.as_shape(scarf_keypoints, {4, 2})
|> Evision.Mat.to_nx(Nx.BinaryBackend)
|> Nx.as_type(:f32)
{tl, tr, br, bl} = order_points.(input)
rect = Nx.stack([tl, tr, br, bl])
point_distance = fn p1, p2 ->
round(
Nx.to_number(
Nx.sqrt(
Nx.add(
Nx.power(Nx.subtract(p1[[0]], p2[[0]]), 2),
Nx.power(Nx.subtract(p1[[1]], p2[[1]]), 2)
)
)
)
)
end
# compute the width of the new image, which will be the
# maximum distance between bottom-right and bottom-left
# x-coordiates or the top-right and top-left x-coordinates
output_width =
Nx.to_number(
Nx.max(
point_distance.(br, bl),
point_distance.(tr, tl)
)
)
# compute the height of the new image, which will be the
# maximum distance between the top-right and bottom-right
# y-coordinates or the top-left and bottom-left y-coordinates
output_height =
Nx.to_number(
Nx.max(
point_distance.(tr, br),
point_distance.(tl, bl)
)
)
{output_height, output_width}
output =
Nx.tensor(
[
[0, 0],
[output_width - 1, 0],
[output_width - 1, output_height - 1],
[0, output_height - 1]
],
type: :f32
)
matrix = Evision.getPerspectiveTransform(rect, output)
scarf =
Evision.warpPerspective(
original,
matrix,
{output_width, output_height},
flags: Evision.Constant.cv_INTER_LINEAR(),
borderMode: Evision.Constant.cv_BORDER_CONSTANT(),
borderValue: {0, 0, 0}
)
scarf_gray =
Evision.warpPerspective(
gray,
matrix,
{output_width, output_height},
flags: Evision.Constant.cv_INTER_LINEAR(),
borderMode: Evision.Constant.cv_BORDER_CONSTANT(),
borderValue: {0, 0, 0}
)
{h, w} = scarf_gray.shape
len = max(h, w)
output = Evision.resize(scarf, {len, len})