Created
February 20, 2020 19:41
-
-
Save hoelzro/99685bc4fa56d118d9070c98565ed77b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
use strict; | |
use warnings; | |
use feature qw(say); | |
use experimental qw(signatures); | |
my $THRESHOLD = 8; | |
sub distance($a, $b) { | |
my $count = 0; | |
my $xor = ($a+0) ^ ($b+0); | |
while($xor > 0) { | |
if($xor & 1) { | |
$count++; | |
} | |
$xor >>= 1; | |
} | |
return $count; | |
} | |
my @entries; | |
while(<>) { | |
chomp; | |
my ( $filename, $hash ) = split /\t/, $_; | |
next unless -e $filename; | |
push @entries, [ $filename, $hash ]; | |
} | |
for my $i (0..$#entries) { | |
for my $j ($i+1..$#entries) { | |
my $d = distance($entries[$i][1], $entries[$j][1]); | |
if($d <= $THRESHOLD) { | |
say join("\t", $entries[$i][0], $entries[$j][0], $d); | |
} | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <opencv2/opencv.hpp> | |
#include <opencv2/highgui.hpp> | |
#include <cstdint> | |
#include <cstdio> | |
#include <string> | |
#include <unistd.h> | |
using namespace cv; | |
using namespace std; | |
// http://www.hackerfactor.com/blog/?/archives/432-Looks-Like-It.html | |
static | |
float median(const Mat &img) | |
{ | |
Mat copy = img.clone().reshape(0, 64); | |
cv::sort(copy, copy, cv::SORT_EVERY_COLUMN | cv::SORT_ASCENDING); | |
return (copy.at<float>(31) + copy.at<float>(32)) / 2.0; | |
} | |
static uint64_t | |
phash(const Mat &img) | |
{ | |
Mat eight_by_eight; | |
Mat grayscale; | |
Mat float_grayscale; | |
Mat discrete_cosine; | |
resize(img, eight_by_eight, Size(32, 32)); | |
cvtColor(eight_by_eight, grayscale, cv::COLOR_BGR2GRAY); | |
grayscale.convertTo(float_grayscale, CV_32FC1); | |
dct(float_grayscale, discrete_cosine); | |
Mat top8(discrete_cosine, Rect(1, 1, 8, 8)); // original cuts off the first row and column | |
//top8.at<float>(0, 0) = 0; | |
double avg = median(top8); | |
Mat avg_mat(8, 8, CV_32FC1, Scalar(avg, 0, 0, 0)); | |
avg_mat = (top8 > avg_mat); | |
uint64_t mask = 1UL << 63; | |
uint64_t result = 0; | |
for(int row = 0; row < 8; row++) { | |
for(int col = 0; col < 8; col++) { | |
if(avg_mat.at<uchar>(row, col) != 0) { | |
result |= mask; | |
} | |
mask >>= 1; | |
} | |
} | |
return result; | |
} | |
int | |
main(int argc, char **argv) | |
{ | |
for(int i = 1; i < argc; i++) { | |
if(access(argv[i], R_OK)) { | |
continue; | |
} | |
try { | |
Mat img = imread(argv[i]); | |
uint64_t hash = phash(img); | |
printf("%s\t%llu\n", argv[i], hash); | |
} catch(...) { | |
fprintf(stderr, "exception for %s\n", argv[i]); | |
} | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment