Skip to content

Instantly share code, notes, and snippets.

@edbond
Created February 24, 2011 21:17
Show Gist options
  • Save edbond/842907 to your computer and use it in GitHub Desktop.
Save edbond/842907 to your computer and use it in GitHub Desktop.
mp3 duplicate search
(ns mp3dups.core
(:import (org.apache.commons.io FileUtils)
(java.io File)
(org.jaudiotagger.audio AudioFileIO)
(org.jaudiotagger.tag FieldKey)))
;; mp3data hash
;; [artist, title] -> mp3 filename
(def mp3data (atom {}))
(defn init-or-append
[map key value]
(update-in map [key] #(if % (conj % value) [value])))
(defn update-data
[file]
(try
(let [f (AudioFileIO/read file)
tag (.getTag f)
artist (.getFirst tag FieldKey/ARTIST)
title (.getFirst tag FieldKey/TITLE)]
(swap! mp3data #(apply init-or-append %&) [artist title] file))
(catch Exception e (str "caught exception: " (.getMessage e)))))
(defn collect-data
[seq]
(dorun (map update-data seq)))
(defn find-duplicates
[]
(let [dupes (filter #(> (count (last %)) 1) @mp3data)]
(prn dupes)))
(defn -main
[top-dir]
(let [dir (-> (File. top-dir) (.getAbsoluteFile))
mp3s (iterator-seq
(FileUtils/iterateFiles dir nil true))]
(reset! mp3data {})
(collect-data mp3s)
(find-duplicates)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment