Skip to content

Instantly share code, notes, and snippets.

@joshgachnang
Last active March 20, 2023 17:14
Show Gist options
  • Save joshgachnang/5f8c918a865a2ad5a616df0356d9bb9e to your computer and use it in GitHub Desktop.
Save joshgachnang/5f8c918a865a2ad5a616df0356d9bb9e to your computer and use it in GitHub Desktop.
Remove duplicate movies
// This is messy as all heck. Sorry! But it works.
// Warning: if you have the same movie from different years (e.g. The Italian Job from 1969 and 2003), one will be deleted
// incorrectly. This mainly catches movies that have different filenames but are still duplicates.
const glob = require("glob");
const fs = require("fs");
const getDirectories = function (src, callback) {
glob(src + "/**/*", callback);
};
function removeDupe(a, b) {
let rem = a.size > b.size ? b : a;
// let save = a.size > b.size ? a : b;
// console.log("Would delete", rem.path, rem.size, "OVER", save.path, save.size);
try {
return fs.unlinkSync(rem.path);
} catch (e) {
console.error("Could not delete", rem, e);
}
}
let paths = [];
if (require.main === module) {
getDirectories("/data/Movies", async function (err, files) {
if (err) {
console.log("Error", err);
return;
}
for (let file of files) {
if (file.slice(-4) === ".mp4" || file.slice(-4) === ".mkv") {
let filename = file.split("/").slice(-1)[0];
if (filename) {
filename = filename
.slice(0, -4)
.replace(" ", "")
.replace("(", "")
.replace(")", "")
.replace("'", "")
.replace("_", "")
.replace("-", "")
.replace(/\d{4}/g, "")
.replace(/\./g, "")
.trim()
.toLowerCase();
const stats = fs.statSync(file);
paths.push({ filename, path: file, size: stats.size });
}
}
}
const sorted = paths.sort();
for (let i = 0; i < sorted.length - 1; i++) {
if (sorted[i].filename === sorted[i + 1].filename) {
await removeDupe(sorted[i], sorted[i + 1]);
}
}
});
}
@MynaITLabs
Copy link

MynaITLabs commented Mar 20, 2023

Might also think about the approach of using 'md5sum', 'sha256' or other CRC checksum tools to compare files.
(md5sum works best upto 2G, sha256/sha512 works best with bigger files)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment