Last active
March 20, 2023 17:14
-
-
Save joshgachnang/5f8c918a865a2ad5a616df0356d9bb9e to your computer and use it in GitHub Desktop.
Remove duplicate movies
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// This is messy as all heck. Sorry! But it works. | |
// Warning: if you have the same movie from different years (e.g. The Italian Job from 1969 and 2003), one will be deleted | |
// incorrectly. This mainly catches movies that have different filenames but are still duplicates. | |
const glob = require("glob"); | |
const fs = require("fs"); | |
const getDirectories = function (src, callback) { | |
glob(src + "/**/*", callback); | |
}; | |
function removeDupe(a, b) { | |
let rem = a.size > b.size ? b : a; | |
// let save = a.size > b.size ? a : b; | |
// console.log("Would delete", rem.path, rem.size, "OVER", save.path, save.size); | |
try { | |
return fs.unlinkSync(rem.path); | |
} catch (e) { | |
console.error("Could not delete", rem, e); | |
} | |
} | |
let paths = []; | |
if (require.main === module) { | |
getDirectories("/data/Movies", async function (err, files) { | |
if (err) { | |
console.log("Error", err); | |
return; | |
} | |
for (let file of files) { | |
if (file.slice(-4) === ".mp4" || file.slice(-4) === ".mkv") { | |
let filename = file.split("/").slice(-1)[0]; | |
if (filename) { | |
filename = filename | |
.slice(0, -4) | |
.replace(" ", "") | |
.replace("(", "") | |
.replace(")", "") | |
.replace("'", "") | |
.replace("_", "") | |
.replace("-", "") | |
.replace(/\d{4}/g, "") | |
.replace(/\./g, "") | |
.trim() | |
.toLowerCase(); | |
const stats = fs.statSync(file); | |
paths.push({ filename, path: file, size: stats.size }); | |
} | |
} | |
} | |
const sorted = paths.sort(); | |
for (let i = 0; i < sorted.length - 1; i++) { | |
if (sorted[i].filename === sorted[i + 1].filename) { | |
await removeDupe(sorted[i], sorted[i + 1]); | |
} | |
} | |
}); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Might also think about the approach of using 'md5sum', 'sha256' or other CRC checksum tools to compare files.
(md5sum works best upto 2G, sha256/sha512 works best with bigger files)