Skip to content

Instantly share code, notes, and snippets.

@zk-phi
Created January 29, 2025 15:21
Show Gist options
  • Save zk-phi/d7cfc7e0330e4f0ec405e507a27ce1fe to your computer and use it in GitHub Desktop.
Save zk-phi/d7cfc7e0330e4f0ec405e507a27ce1fe to your computer and use it in GitHub Desktop.
Directory comparator
#!emacs --script
;; Compare two directory recursively to find updated/outdated files.
;;
;; Dependencies:
;; - brew install --cask emacs
;; - brew install xxh
;;
;; Usage:
;; - ./comparator.el /path/to/directory/A /path/to/directory/B > output
(require 'cl)
(defun time-reliable-p (decoded-time)
(not (eql (decoded-time-year decoded-time) 1970)))
(defconst xxh-buffer-1 (get-buffer-create "*xxh1*"))
(defconst xxh-buffer-2 (get-buffer-create "*xxh2*"))
(defun xxh-check (a b)
"Check if two files A and B are (probably) the same, by comparing xxh digest."
(message "Verifying %s ..." a)
(let ((default-directory (file-name-directory a)))
(call-process "xxhsum" nil xxh-buffer-1 nil "-H3" (file-name-nondirectory a)))
(let ((default-directory (file-name-directory b)))
(call-process "xxhsum" nil xxh-buffer-2 nil "-H3" (file-name-nondirectory b)))
(prog1 (string= (with-current-buffer xxh-buffer-1 (buffer-string))
(with-current-buffer xxh-buffer-2 (buffer-string)))
(with-current-buffer xxh-buffer-1 (erase-buffer))
(with-current-buffer xxh-buffer-2 (erase-buffer))))
;; ------
(defvar compare-backup-new-files nil)
(defvar compare-backup-deleted-files nil)
(defvar compare-backup-suspicious-files nil)
(defvar compare-backup-outdated-files nil)
(defvar compare-backup-updated-files nil)
(defvar compare-backup-unmatched-files nil)
(defvar compare-backup-skipped-files-count 0)
(defvar compare-backup-identical-files-count 0)
(defun compare-files (a b)
"Compare two files A and B and update list as needed."
(let* ((a-attr (file-attributes a))
(b-attr (file-attributes b))
(a-time (file-attribute-modification-time a-attr))
(b-time (file-attribute-modification-time b-attr))
(a-size (file-attribute-size a-attr))
(b-size (file-attribute-size b-attr))
(same-time (time-equal-p a-time b-time))
(same-size (eql a-size b-size)))
(cond ((and same-size same-time)
;; maybe identical
(incf compare-backup-skipped-files-count 1))
((and same-size (xxh-check a b))
;; timestamp is modified, but size and content are identical
(incf compare-backup-identical-files-count 1))
((or same-time (not (time-reliable-p a-time)))
(push a compare-backup-suspicious-files))
((time-less-p a-time b-time)
(push a compare-backup-outdated-files))
(t
(push a compare-backup-updated-files)))))
(defun compare-backup-dirs-r (expected-newer expected-older msg-prefix)
(let ((left-files (directory-files expected-newer t))
(right-files (directory-files expected-older t)))
;; check if (car right-files) is backed up in the left-files
(while right-files
(let ((right (car right-files)))
(if (null left-files)
;; only appears in the right list => deleted
(progn (push right compare-backup-deleted-files) (pop right-files))
(let* ((left (car left-files))
(left-name (file-name-nondirectory left))
(right-name (file-name-nondirectory right)))
(cond ((string< left-name right-name)
;; left right
;; A <- A doesnt exist in the right list (new file)
;; B B
;; C C
(push left compare-backup-new-files)
(pop left-files))
((string< right-name left-name)
(push right compare-backup-deleted-files)
(pop right-files))
(t
(cond ((not (eq (file-directory-p left) (file-directory-p right)))
(push left compare-backup-unmatched-files))
((file-directory-p left)
(unless (string-match "/[.]+$" left)
(message "%s%s" msg-prefix left-name)
(compare-backup-dirs-r left right (concat msg-prefix "| "))))
(t
(compare-files left right)))
(pop right-files)
(pop left-files)))))))))
(princ
(format "Target 1: %s\nTarget 2: %s\n"
(car command-line-args-left)
(cadr command-line-args-left)))
(princ (format "\n%s Started.\n" (format-time-string "%Y-%m-%d %H:%M:%S" (current-time))))
(compare-backup-dirs-r (car command-line-args-left) (cadr command-line-args-left) "")
(princ
(format "%s Finished.\n" (format-time-string "%Y-%m-%d %H:%M:%S" (current-time))))
(when compare-backup-new-files
(princ "\n==== NEW FILES\n")
(dolist (file compare-backup-new-files)
(princ (concat file "\n"))))
(when compare-backup-deleted-files
(princ "\n==== DELETED (MISSING) FILES\n")
(dolist (file compare-backup-deleted-files)
(princ (concat file "\n"))))
(when compare-backup-suspicious-files
(princ "\n==== MODIFIED FILES (timestamp unreliable)\n")
(dolist (file compare-backup-suspicious-files)
(princ (concat file "\n"))))
(when compare-backup-updated-files
(princ "\n==== MODIFIED FILES (updated)\n")
(dolist (file compare-backup-updated-files)
(princ (concat file "\n"))))
(when compare-backup-outdated-files
(princ "\n==== MODIFIED FILES (outdated)\n")
(dolist (file compare-backup-outdated-files)
(princ (concat file "\n"))))
(when compare-backup-unmatched-files
(princ "\n==== MODIFIED FILES (file => dir or vice versa)\n")
(dolist (file compare-backup-unmatched-files)
(princ (concat file "\n"))))
(princ "\n==== SUMMARY\n")
(princ
(format "assumed identity (timestamp and size): %d\n" compare-backup-skipped-files-count))
(princ
(format "confirmed identity (xxh digest): %d\n" compare-backup-identical-files-count))
(princ (format "new files: %d\n" (length compare-backup-new-files)))
(princ (format "deleted (missing) files: %d\n" (length compare-backup-deleted-files)))
(princ (format "modified files:\n"))
(princ (format "- timestamp unreliable: %d\n" (length compare-backup-suspicious-files)))
(princ (format "- updated: %d\n" (length compare-backup-updated-files)))
(princ (format "- outdated: %d\n" (length compare-backup-outdated-files)))
(princ (format "- unmatched (file/dir): %d\n" (length compare-backup-unmatched-files)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment