|
use std::collections::HashSet; |
|
use image_hasher::{HashAlg, HasherConfig, ImageHash}; |
|
use indicatif::{ProgressBar, ProgressStyle}; |
|
use rayon::prelude::*; |
|
use std::fs; |
|
use std::path::{Path, PathBuf}; |
|
|
|
pub fn stylized_progress_bar(len: u64) -> ProgressBar { |
|
let pb = ProgressBar::new(len); |
|
pb.set_style( |
|
#[allow(clippy::literal_string_with_formatting_args)] |
|
ProgressStyle::with_template("[{elapsed_precise}] {wide_bar} {pos:>}/{len:7} {eta}") |
|
.unwrap() |
|
.progress_chars(">>-"), |
|
); |
|
pb |
|
} |
|
|
|
const THRESHOLD: u32 = 0; |
|
|
|
fn main() { |
|
let try1 = process_with_alg(HashAlg::Blockhash); |
|
|
|
println!("{}", try1.len()); |
|
for x in try1 { |
|
println!("{}{}",x.0,x.1); |
|
} |
|
} |
|
|
|
fn intersection(v1: &Vec<(char, char)>, v2: &Vec<(char, char)>) -> Vec<(char, char)> { |
|
let set1 = v1.iter().collect::<HashSet<_>>(); |
|
let set2 = v2.iter().collect::<HashSet<_>>(); |
|
set1.intersection(&set2).map(|x| **x).collect::<Vec<_>>() |
|
} |
|
|
|
|
|
fn process_with_alg(alg: HashAlg) -> Vec<(char, char)> { |
|
let dir = "/mnt/nvme/imgs"; |
|
let threshold = THRESHOLD; // 距离阈值,小于等于此值认为相似 |
|
let hasher = HasherConfig::new() |
|
.hash_alg(alg) |
|
// .hash_alg(HashAlg::Blockhash) |
|
.to_hasher(); |
|
|
|
// 获取图片路径 |
|
let mut image_paths: Vec<PathBuf> = fs::read_dir(dir) |
|
.unwrap() |
|
.filter_map(|e| { |
|
let path = e.ok()?.path(); |
|
let ext = path.extension()?.to_string_lossy().to_lowercase(); |
|
if ["png", "jpg", "jpeg"].contains(&ext.as_str()) { |
|
Some(path) |
|
} else { |
|
None |
|
} |
|
}) |
|
.collect(); |
|
|
|
eprintln!("共找到 {} 张图片", image_paths.len()); |
|
|
|
// 并行计算哈希 |
|
let pb = stylized_progress_bar(image_paths.len() as u64); |
|
let hashes: Vec<Option<ImageHash>> = image_paths |
|
.par_iter() |
|
.map(|path| { |
|
let res = image::ImageReader::open(path) |
|
.and_then(|r| Ok(r.decode().unwrap())) |
|
.map(|img| hasher.hash_image(&img)) |
|
.map_err(|e| e.to_string()); |
|
pb.inc(1); |
|
match res { |
|
Ok(hash) => Some(hash), |
|
Err(err) => { |
|
eprintln!("跳过 {:?}: {}", path, err); |
|
None |
|
} |
|
} |
|
}) |
|
.collect(); |
|
pb.finish_with_message("哈希计算完成"); |
|
|
|
// 去除无效项 |
|
let valid: Vec<(PathBuf, ImageHash)> = image_paths |
|
.into_iter() |
|
.zip(hashes.into_iter()) |
|
.filter_map(|(p, h)| h.map(|hash| (p, hash))) |
|
.collect(); |
|
|
|
eprintln!("开始比较相似图片..."); |
|
let n = valid.len(); |
|
let pb2 = stylized_progress_bar((n * (n - 1) / 2) as u64); |
|
let pb2_ref = &pb2; |
|
|
|
// 并行比较 |
|
let results: Vec<(PathBuf, PathBuf, u32)> = (0..n) |
|
.into_par_iter() |
|
.flat_map(|i| { |
|
let valid = valid.clone(); |
|
(i + 1..n).into_par_iter().filter_map(move |j| { |
|
let dist = valid[i].1.dist(&valid[j].1); |
|
pb2_ref.inc(1); |
|
// eprintln!("dist: {dist}"); |
|
if dist <= threshold { |
|
Some((valid[i].0.clone(), valid[j].0.clone(), dist)) |
|
} else { |
|
None |
|
} |
|
}) |
|
}) |
|
.collect(); |
|
|
|
pb2.finish_with_message("比较完成"); |
|
|
|
let mut collected = Vec::new(); |
|
|
|
for (a, b, d) in results { |
|
let c1 = filename_to_char(&a); |
|
let c2 = filename_to_char(&b); |
|
if c1 < c2 { |
|
collected.push((c1, c2)); |
|
} else { |
|
collected.push((c2, c1)); |
|
} |
|
} |
|
collected |
|
} |
|
|
|
fn filename_to_char(name: impl AsRef<Path>) -> char { |
|
let offset = name |
|
.as_ref() |
|
.file_stem() |
|
.unwrap() |
|
.to_string_lossy() |
|
.as_ref() |
|
.parse::<u32>() |
|
.unwrap() |
|
- 1; |
|
let cp = 0x3400 + offset; |
|
let c = char::from_u32(cp).unwrap_or(char::REPLACEMENT_CHARACTER); |
|
c |
|
} |