Skip to content

Instantly share code, notes, and snippets.

@hoelzro
Created February 20, 2020 19:41
Show Gist options
  • Save hoelzro/99685bc4fa56d118d9070c98565ed77b to your computer and use it in GitHub Desktop.
Save hoelzro/99685bc4fa56d118d9070c98565ed77b to your computer and use it in GitHub Desktop.
#!/usr/bin/env perl
use strict;
use warnings;
use feature qw(say);
use experimental qw(signatures);
my $THRESHOLD = 8;
sub distance($a, $b) {
my $count = 0;
my $xor = ($a+0) ^ ($b+0);
while($xor > 0) {
if($xor & 1) {
$count++;
}
$xor >>= 1;
}
return $count;
}
my @entries;
while(<>) {
chomp;
my ( $filename, $hash ) = split /\t/, $_;
next unless -e $filename;
push @entries, [ $filename, $hash ];
}
for my $i (0..$#entries) {
for my $j ($i+1..$#entries) {
my $d = distance($entries[$i][1], $entries[$j][1]);
if($d <= $THRESHOLD) {
say join("\t", $entries[$i][0], $entries[$j][0], $d);
}
}
}
#include <opencv2/opencv.hpp>
#include <opencv2/highgui.hpp>
#include <cstdint>
#include <cstdio>
#include <string>
#include <unistd.h>
using namespace cv;
using namespace std;
// http://www.hackerfactor.com/blog/?/archives/432-Looks-Like-It.html
static
float median(const Mat &img)
{
Mat copy = img.clone().reshape(0, 64);
cv::sort(copy, copy, cv::SORT_EVERY_COLUMN | cv::SORT_ASCENDING);
return (copy.at<float>(31) + copy.at<float>(32)) / 2.0;
}
static uint64_t
phash(const Mat &img)
{
Mat eight_by_eight;
Mat grayscale;
Mat float_grayscale;
Mat discrete_cosine;
resize(img, eight_by_eight, Size(32, 32));
cvtColor(eight_by_eight, grayscale, cv::COLOR_BGR2GRAY);
grayscale.convertTo(float_grayscale, CV_32FC1);
dct(float_grayscale, discrete_cosine);
Mat top8(discrete_cosine, Rect(1, 1, 8, 8)); // original cuts off the first row and column
//top8.at<float>(0, 0) = 0;
double avg = median(top8);
Mat avg_mat(8, 8, CV_32FC1, Scalar(avg, 0, 0, 0));
avg_mat = (top8 > avg_mat);
uint64_t mask = 1UL << 63;
uint64_t result = 0;
for(int row = 0; row < 8; row++) {
for(int col = 0; col < 8; col++) {
if(avg_mat.at<uchar>(row, col) != 0) {
result |= mask;
}
mask >>= 1;
}
}
return result;
}
int
main(int argc, char **argv)
{
for(int i = 1; i < argc; i++) {
if(access(argv[i], R_OK)) {
continue;
}
try {
Mat img = imread(argv[i]);
uint64_t hash = phash(img);
printf("%s\t%llu\n", argv[i], hash);
} catch(...) {
fprintf(stderr, "exception for %s\n", argv[i]);
}
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment