AlcibiadesCleinias · February 26, 2022 21:46
diff --git a/compare-images-with-cv2.py b/compare-images-with-cv2.py
 """
 I want to compare image (google to tg) and save tg edition in result folder

 My case: 
 - google saved my photos (that I want to save) with ugly quality 
 - More of these photos are from telegram
 - On my humble opinion in telegram quality of the photos is better
 - What if I can go through google photos and try to replcace the ones with telegram one?
 """
 import cv2

 class CompareImage(object):
    """-Compare: find similar image for 1st image.
    Thus, I cache 1st image under the hood.
    """
    _last_1_image_path = ''
    _last_1_image_cv2_readed = None

    def __init__(self, image_1_path, image_2_path):
        self.image_1_path = image_1_path
        self.image_2_path = image_2_path

    def get_diff(self):
        if not CompareImage._last_1_image_path == self.image_1_path:  # todo: kinda fast cach for 1st img
            image_1 = cv2.imread(self.image_1_path, 0)
            CompareImage._last_1_image_path = self.image_1_path
            CompareImage._last_1_image_cv2_readed = image_1
        else:
            image_1 = CompareImage._last_1_image_cv2_readed
        
        image_2 = cv2.imread(self.image_2_path, 0)
        
        # compare proportion
        try:
            proportion_1 = image_1.shape[0] / image_1.shape[1]
            proportion_2 = image_2.shape[0] / image_2.shape[1]
        except Exception:
            print('problem with the following imgs')
            print(self.image_1_path)
            print(self.image_2_path)
            return 9999
        
        proporion_diff = abs(proportion_1 - proportion_2)
        if proporion_diff > 0.1:
            return 999
        
        commutative_image_diff = self.get_image_difference(image_1, image_2)

        # print(f'{commutative_image_diff = } for {self.image_1_path} & {self.image_2_path}')
        return commutative_image_diff

    @staticmethod
    def get_image_difference(image_1, image_2):
        first_image_hist = cv2.calcHist([image_1], [0], None, [256], [0, 256])
        second_image_hist = cv2.calcHist([image_2], [0], None, [256], [0, 256])

        img_hist_diff = cv2.compareHist(first_image_hist, second_image_hist, cv2.HISTCMP_BHATTACHARYYA)
        img_template_probability_match = cv2.matchTemplate(first_image_hist, second_image_hist, cv2.TM_CCOEFF_NORMED)[0][0]
        img_template_diff = 1 - img_template_probability_match

        # taking only 10% of histogram diff, since it's less accurate than template method
        commutative_image_diff = (img_hist_diff / 10) + img_template_diff
        return commutative_image_diff
      

 if __name__ == "__main__":
  tg_dir_path = './tg'
  g_dir_path = './google'
  result_dir_path = './result'

  tg_files = [
      file for file in os.listdir(tg_dir_path)
      if file.rsplit('.', 1)[-1].lower() in ['jpg', 'jpeg', 'png']
  ]
  g_files = [
      file for file in os.listdir(g_dir_path)
      if file.rsplit('.', 1)[-1].lower() in ['jpg', 'jpeg', 'png']
  ]

  processed_g_img_names = {}
  g_image_to_last_tg_image_diff = {}
  for idx, tg_img in enumerate(tg_files):
      print(f"{idx/len(tg_files)} of all")
      tg_img_path = os.path.join(tg_dir_path, tg_img)

      for g_img in g_files:
          g_img_path = os.path.join(g_dir_path, g_img)

          if g_img in processed_g_img_names:
              continue

          diff = CompareImage(tg_img_path, g_img_path).get_diff()
          if not diff <= similarity_coef:
              if g_img not in g_image_to_last_tg_image_diff or diff < g_image_to_last_tg_image_diff[g_img]['diff']:
                  g_image_to_last_tg_image_diff[g_img] = {'diff': diff, 'tg_img': tg_img}
              continue

          print("found similar, save tg image to result folder with g name")
          result_path = os.path.join(result_dir_path, g_img)
          os.system(f'cp {tg_img_path} {result_path}')

          # upd diffs dict
          processed_g_img_names[g_img] = 1

    # not replaced google img and closes tg imgs
    # for kinda manual check
    g_image_to_last_tg_image_diff_copy = g_image_to_last_tg_image_diff.copy()

    for img in processed_g_img_names:
        if img in g_image_to_last_tg_image_diff:
            g_image_to_last_tg_image_diff_copy.pop(img)
    g_image_to_last_tg_image_diff_copy
	"""
	I want to compare image (google to tg) and save tg edition in result folder

	My case:
	- google saved my photos (that I want to save) with ugly quality
	- More of these photos are from telegram
	- On my humble opinion in telegram quality of the photos is better
	- What if I can go through google photos and try to replcace the ones with telegram one?
	"""
	import cv2

	class CompareImage(object):
	"""-Compare: find similar image for 1st image.
	Thus, I cache 1st image under the hood.
	"""
	_last_1_image_path = ''
	_last_1_image_cv2_readed = None

	def __init__(self, image_1_path, image_2_path):
	self.image_1_path = image_1_path
	self.image_2_path = image_2_path

	def get_diff(self):
	if not CompareImage._last_1_image_path == self.image_1_path: # todo: kinda fast cach for 1st img
	image_1 = cv2.imread(self.image_1_path, 0)
	CompareImage._last_1_image_path = self.image_1_path
	CompareImage._last_1_image_cv2_readed = image_1
	else:
	image_1 = CompareImage._last_1_image_cv2_readed

	image_2 = cv2.imread(self.image_2_path, 0)

	# compare proportion
	try:
	proportion_1 = image_1.shape[0] / image_1.shape[1]
	proportion_2 = image_2.shape[0] / image_2.shape[1]
	except Exception:
	print('problem with the following imgs')
	print(self.image_1_path)
	print(self.image_2_path)
	return 9999

	proporion_diff = abs(proportion_1 - proportion_2)
	if proporion_diff > 0.1:
	return 999

	commutative_image_diff = self.get_image_difference(image_1, image_2)

	# print(f'{commutative_image_diff = } for {self.image_1_path} & {self.image_2_path}')
	return commutative_image_diff

	@staticmethod
	def get_image_difference(image_1, image_2):
	first_image_hist = cv2.calcHist([image_1], [0], None, [256], [0, 256])
	second_image_hist = cv2.calcHist([image_2], [0], None, [256], [0, 256])

	img_hist_diff = cv2.compareHist(first_image_hist, second_image_hist, cv2.HISTCMP_BHATTACHARYYA)
	img_template_probability_match = cv2.matchTemplate(first_image_hist, second_image_hist, cv2.TM_CCOEFF_NORMED)[0][0]
	img_template_diff = 1 - img_template_probability_match

	# taking only 10% of histogram diff, since it's less accurate than template method
	commutative_image_diff = (img_hist_diff / 10) + img_template_diff
	return commutative_image_diff


	if __name__ == "__main__":
	tg_dir_path = './tg'
	g_dir_path = './google'
	result_dir_path = './result'

	tg_files = [
	file for file in os.listdir(tg_dir_path)
	if file.rsplit('.', 1)[-1].lower() in ['jpg', 'jpeg', 'png']
	]
	g_files = [
	file for file in os.listdir(g_dir_path)
	if file.rsplit('.', 1)[-1].lower() in ['jpg', 'jpeg', 'png']
	]

	processed_g_img_names = {}
	g_image_to_last_tg_image_diff = {}
	for idx, tg_img in enumerate(tg_files):
	print(f"{idx/len(tg_files)} of all")
	tg_img_path = os.path.join(tg_dir_path, tg_img)

	for g_img in g_files:
	g_img_path = os.path.join(g_dir_path, g_img)

	if g_img in processed_g_img_names:
	continue

	diff = CompareImage(tg_img_path, g_img_path).get_diff()
	if not diff <= similarity_coef:
	if g_img not in g_image_to_last_tg_image_diff or diff < g_image_to_last_tg_image_diff[g_img]['diff']:
	g_image_to_last_tg_image_diff[g_img] = {'diff': diff, 'tg_img': tg_img}
	continue

	print("found similar, save tg image to result folder with g name")
	result_path = os.path.join(result_dir_path, g_img)
	os.system(f'cp {tg_img_path} {result_path}')

	# upd diffs dict
	processed_g_img_names[g_img] = 1

	# not replaced google img and closes tg imgs
	# for kinda manual check
	g_image_to_last_tg_image_diff_copy = g_image_to_last_tg_image_diff.copy()

	for img in processed_g_img_names:
	if img in g_image_to_last_tg_image_diff:
	g_image_to_last_tg_image_diff_copy.pop(img)
	g_image_to_last_tg_image_diff_copy