rfletchr · June 12, 2024 22:55
diff --git a/calculate_inset_transform.py b/calculate_inset_transform.py
 """
 This is a cobbled together "copy paste" abomination which finds the bounding box of one image inside another.

 Useful for matching compositions.
 """

 import numpy as np
 import cv2 as cv
 import matplotlib.pyplot as plt

 def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2)**2))


 inset_img = cv.imread('/home/work/Downloads/image.png',cv.IMREAD_GRAYSCALE) # queryImage
 orig_rgb = cv.imread('/home/work/Downloads/image(1).png')
 orig_gray = cv.cvtColor(orig_rgb, cv.COLOR_BGR2GRAY)
 # find the keypoints and descriptors with ORB
 orb = cv.ORB_create()
 kp1, des1 = orb.detectAndCompute(inset_img,None)
 kp2, des2 = orb.detectAndCompute(orig_gray,None)

 # find matching key points using brute force matching
 bf = cv.BFMatcher(cv.NORM_HAMMING, crossCheck=True)
 matches = bf.match(des1,des2)
 matches = list(sorted(matches, key = lambda x:x.distance))


 most_confident_matches = matches[:5]


 scale_samples = []

 origin_samples = []

 for i in range(len(most_confident_matches)):
    for j in range(len(most_confident_matches)):
        if i == j:
            continue
        m1 = most_confident_matches[i]
        m2 = most_confident_matches[j]

        op1 = np.array(kp2[m1.trainIdx].pt)
        op2 = np.array(kp2[m2.trainIdx].pt)

        orig_distance = euclidean_distance(op1, op2)

        ip1 = np.array(kp1[m1.queryIdx].pt)
        ip2 = np.array(kp1[m2.queryIdx].pt)

        inset_distance = euclidean_distance(ip1, ip2)

        scale = orig_distance/inset_distance

        scale_samples.append(scale)

        origin = op1 - (ip1*scale)
        origin_samples.append(origin)

 # resample the inset image to match the original image
 scale = np.mean(scale_samples)
 scaled_inset_img = cv.resize(inset_img, (0,0), fx=scale, fy=scale)

 # find the rect of the scaled inset image in the original image.
 # this could probably be done above but brain no math do.
 res = cv.matchTemplate(orig_gray,scaled_inset_img, cv.TM_CCOEFF_NORMED)
 min_val, max_val, min_loc, top_left = cv.minMaxLoc(res)

 rect = (top_left[0], top_left[1], scaled_inset_img.shape[1], scaled_inset_img.shape[0])
 cv.rectangle(orig_rgb, rect, (0,0,255), 2)




 cv.imwrite("orig.png",orig_rgb)
 cv.imwrite("inset.png",inset_img)
	"""
	This is a cobbled together "copy paste" abomination which finds the bounding box of one image inside another.

	Useful for matching compositions.
	"""

	import numpy as np
	import cv2 as cv
	import matplotlib.pyplot as plt

	def euclidean_distance(x1, x2):
	return np.sqrt(np.sum((x1 - x2)**2))


	inset_img = cv.imread('/home/work/Downloads/image.png',cv.IMREAD_GRAYSCALE) # queryImage
	orig_rgb = cv.imread('/home/work/Downloads/image(1).png')
	orig_gray = cv.cvtColor(orig_rgb, cv.COLOR_BGR2GRAY)
	# find the keypoints and descriptors with ORB
	orb = cv.ORB_create()
	kp1, des1 = orb.detectAndCompute(inset_img,None)
	kp2, des2 = orb.detectAndCompute(orig_gray,None)

	# find matching key points using brute force matching
	bf = cv.BFMatcher(cv.NORM_HAMMING, crossCheck=True)
	matches = bf.match(des1,des2)
	matches = list(sorted(matches, key = lambda x:x.distance))


	most_confident_matches = matches[:5]


	scale_samples = []

	origin_samples = []

	for i in range(len(most_confident_matches)):
	for j in range(len(most_confident_matches)):
	if i == j:
	continue
	m1 = most_confident_matches[i]
	m2 = most_confident_matches[j]

	op1 = np.array(kp2[m1.trainIdx].pt)
	op2 = np.array(kp2[m2.trainIdx].pt)

	orig_distance = euclidean_distance(op1, op2)

	ip1 = np.array(kp1[m1.queryIdx].pt)
	ip2 = np.array(kp1[m2.queryIdx].pt)

	inset_distance = euclidean_distance(ip1, ip2)

	scale = orig_distance/inset_distance

	scale_samples.append(scale)

	origin = op1 - (ip1*scale)
	origin_samples.append(origin)

	# resample the inset image to match the original image
	scale = np.mean(scale_samples)
	scaled_inset_img = cv.resize(inset_img, (0,0), fx=scale, fy=scale)

	# find the rect of the scaled inset image in the original image.
	# this could probably be done above but brain no math do.
	res = cv.matchTemplate(orig_gray,scaled_inset_img, cv.TM_CCOEFF_NORMED)
	min_val, max_val, min_loc, top_left = cv.minMaxLoc(res)

	rect = (top_left[0], top_left[1], scaled_inset_img.shape[1], scaled_inset_img.shape[0])
	cv.rectangle(orig_rgb, rect, (0,0,255), 2)




	cv.imwrite("orig.png",orig_rgb)
	cv.imwrite("inset.png",inset_img)