Image perspective

Mix.install([
  {:tesla, "~> 1.6"},
  {:kino, "~> 0.9.2"},
  {:nx, "~> 0.4"},
  {:evision, "~> 0.1"}
])

Inputs

image_url_input =
  Kino.Input.text("image url",
    default: "https://cdn.tontontresors.com/18ac8c54-81a3-4dd4-be18-01238d5d69dc.jpg"
  )

x_input = Kino.Input.number("x", default: 0)

y_input = Kino.Input.number("y", default: 0)

kernel_size_input = Kino.Input.number("kernel size", default: 5)

deviation_input = Kino.Input.number("deviation", default: 0)

Code

image_url = Kino.Input.read(image_url_input)

%{body: body} = Tesla.get!(image_url)
image_path = Path.join([System.tmp_dir!(), Path.basename(image_url)])
File.write!(image_path, body)

kernel_size = Kino.Input.read(kernel_size_input)
deviation = Kino.Input.read(deviation_input)

# Load original image
original = Evision.imread(image_path)
# Convert it to grayscale
gray = Evision.cvtColor(original, Evision.Constant.cv_COLOR_BGR2GRAY())
# Gaussian blur
blurred = Evision.gaussianBlur(gray, {kernel_size, kernel_size}, deviation)

# binarization with Evision.adaptiveThreshold
bw =
  Evision.adaptiveThreshold(
    blurred,
    255,
    Evision.Constant.cv_ADAPTIVE_THRESH_GAUSSIAN_C(),
    Evision.Constant.cv_THRESH_BINARY(),
    11,
    2
  )

# bitwise not
threshold = Evision.Mat.bitwise_not(bw)

# First thing first, we need to find all the contours in the thresholded image
{contours, _} =
  Evision.findContours(
    threshold,
    Evision.Constant.cv_RETR_EXTERNAL(),
    Evision.Constant.cv_CHAIN_APPROX_SIMPLE()
  )

IO.puts("Find #{Enum.count(contours)} contour(s)")

# and our assumptions are that
#   1. the contour that contains the puzzle should be fairly large: 
#      hence we are sorting them by their area in descending order
contours =
  Enum.sort_by(contours, fn c ->
    -Evision.contourArea(c)
  end)

IO.puts("area of the largest contour: #{Evision.contourArea(Enum.at(contours, 0))}")

# 2.the scarf should be a rectangular
#   which means its contour should be approximately an rectangular
#   which means the approximated polygonal of the contour should have 4 corners (keypoints)
#   hence we will need to find the contour which its approximated polygonal's shape is {4, 1, 2}
scarf_keypoints =
  Enum.reduce_while(contours, nil, fn c, _acc ->
    peri = Evision.arcLength(c, true)
    approx = Evision.approxPolyDP(c, 0.02 * peri, true)

    case approx.shape do
      {4, 1, 2} ->
        {:halt, approx}

      _ ->
        {:cont, nil}
    end
  end)

drawed =
  if scarf_keypoints do
    IO.puts("Found scarf")
    Evision.drawContours(original, [scarf_keypoints], -1, {0, 255, 0}, thickness: 2)
    # Evision.drawContours(original, contours, -1, {0, 255, 0}, thickness: 2)
  else
    IO.puts("""
    Could not find scarf outline.
    Try debugging your thresholding and contour steps.
    """)
  end

x = Kino.Input.read(x_input)
y = Kino.Input.read(y_input)
# extract scarf
# this function will arrange the keypoints in the order discussed above
order_points = fn pts ->
  # the top-left point will have the smallest sum, whereas
  # the bottom-right point will have the largest sum
  sum = Nx.sum(pts, axes: [1])
  tl = pts[Nx.argmin(sum)] |> Nx.add(Nx.tensor([-x, -y]))
  br = pts[Nx.argmax(sum)] |> Nx.add(Nx.tensor([x, y]))

  # now, compute the difference between the points, the
  # top-right point will have the smallest difference,
  # whereas the bottom-left will have the largest difference
  diff = Nx.subtract(pts[[0..3, 1]], pts[[0..3, 0]])
  tr = pts[Nx.argmin(diff)] |> Nx.add(Nx.tensor([x, -y]))
  bl = pts[Nx.argmax(diff)] |> Nx.add(Nx.tensor([-x, y]))
  {tl, tr, br, bl}
end

input =
  Evision.Mat.as_shape(scarf_keypoints, {4, 2})
  |> Evision.Mat.to_nx(Nx.BinaryBackend)
  |> Nx.as_type(:f32)

{tl, tr, br, bl} = order_points.(input)
rect = Nx.stack([tl, tr, br, bl])

point_distance = fn p1, p2 ->
  round(
    Nx.to_number(
      Nx.sqrt(
        Nx.add(
          Nx.power(Nx.subtract(p1[[0]], p2[[0]]), 2),
          Nx.power(Nx.subtract(p1[[1]], p2[[1]]), 2)
        )
      )
    )
  )
end

# compute the width of the new image, which will be the
# maximum distance between bottom-right and bottom-left
# x-coordiates or the top-right and top-left x-coordinates
output_width =
  Nx.to_number(
    Nx.max(
      point_distance.(br, bl),
      point_distance.(tr, tl)
    )
  )

# compute the height of the new image, which will be the
# maximum distance between the top-right and bottom-right
# y-coordinates or the top-left and bottom-left y-coordinates
output_height =
  Nx.to_number(
    Nx.max(
      point_distance.(tr, br),
      point_distance.(tl, bl)
    )
  )

{output_height, output_width}

output =
  Nx.tensor(
    [
      [0, 0],
      [output_width - 1, 0],
      [output_width - 1, output_height - 1],
      [0, output_height - 1]
    ],
    type: :f32
  )

matrix = Evision.getPerspectiveTransform(rect, output)

scarf =
  Evision.warpPerspective(
    original,
    matrix,
    {output_width, output_height},
    flags: Evision.Constant.cv_INTER_LINEAR(),
    borderMode: Evision.Constant.cv_BORDER_CONSTANT(),
    borderValue: {0, 0, 0}
  )

scarf_gray =
  Evision.warpPerspective(
    gray,
    matrix,
    {output_width, output_height},
    flags: Evision.Constant.cv_INTER_LINEAR(),
    borderMode: Evision.Constant.cv_BORDER_CONSTANT(),
    borderValue: {0, 0, 0}
  )

{h, w} = scarf_gray.shape
len = max(h, w)
output = Evision.resize(scarf, {len, len})

Output

output

original

mrdotb/image-crop-and-perspective-no-priv-deps.livemd

Image perspective

Inputs

Code

Output