Created
January 29, 2025 15:21
-
-
Save zk-phi/d7cfc7e0330e4f0ec405e507a27ce1fe to your computer and use it in GitHub Desktop.
Directory comparator
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!emacs --script | |
;; Compare two directory recursively to find updated/outdated files. | |
;; | |
;; Dependencies: | |
;; - brew install --cask emacs | |
;; - brew install xxh | |
;; | |
;; Usage: | |
;; - ./comparator.el /path/to/directory/A /path/to/directory/B > output | |
(require 'cl) | |
(defun time-reliable-p (decoded-time) | |
(not (eql (decoded-time-year decoded-time) 1970))) | |
(defconst xxh-buffer-1 (get-buffer-create "*xxh1*")) | |
(defconst xxh-buffer-2 (get-buffer-create "*xxh2*")) | |
(defun xxh-check (a b) | |
"Check if two files A and B are (probably) the same, by comparing xxh digest." | |
(message "Verifying %s ..." a) | |
(let ((default-directory (file-name-directory a))) | |
(call-process "xxhsum" nil xxh-buffer-1 nil "-H3" (file-name-nondirectory a))) | |
(let ((default-directory (file-name-directory b))) | |
(call-process "xxhsum" nil xxh-buffer-2 nil "-H3" (file-name-nondirectory b))) | |
(prog1 (string= (with-current-buffer xxh-buffer-1 (buffer-string)) | |
(with-current-buffer xxh-buffer-2 (buffer-string))) | |
(with-current-buffer xxh-buffer-1 (erase-buffer)) | |
(with-current-buffer xxh-buffer-2 (erase-buffer)))) | |
;; ------ | |
(defvar compare-backup-new-files nil) | |
(defvar compare-backup-deleted-files nil) | |
(defvar compare-backup-suspicious-files nil) | |
(defvar compare-backup-outdated-files nil) | |
(defvar compare-backup-updated-files nil) | |
(defvar compare-backup-unmatched-files nil) | |
(defvar compare-backup-skipped-files-count 0) | |
(defvar compare-backup-identical-files-count 0) | |
(defun compare-files (a b) | |
"Compare two files A and B and update list as needed." | |
(let* ((a-attr (file-attributes a)) | |
(b-attr (file-attributes b)) | |
(a-time (file-attribute-modification-time a-attr)) | |
(b-time (file-attribute-modification-time b-attr)) | |
(a-size (file-attribute-size a-attr)) | |
(b-size (file-attribute-size b-attr)) | |
(same-time (time-equal-p a-time b-time)) | |
(same-size (eql a-size b-size))) | |
(cond ((and same-size same-time) | |
;; maybe identical | |
(incf compare-backup-skipped-files-count 1)) | |
((and same-size (xxh-check a b)) | |
;; timestamp is modified, but size and content are identical | |
(incf compare-backup-identical-files-count 1)) | |
((or same-time (not (time-reliable-p a-time))) | |
(push a compare-backup-suspicious-files)) | |
((time-less-p a-time b-time) | |
(push a compare-backup-outdated-files)) | |
(t | |
(push a compare-backup-updated-files))))) | |
(defun compare-backup-dirs-r (expected-newer expected-older msg-prefix) | |
(let ((left-files (directory-files expected-newer t)) | |
(right-files (directory-files expected-older t))) | |
;; check if (car right-files) is backed up in the left-files | |
(while right-files | |
(let ((right (car right-files))) | |
(if (null left-files) | |
;; only appears in the right list => deleted | |
(progn (push right compare-backup-deleted-files) (pop right-files)) | |
(let* ((left (car left-files)) | |
(left-name (file-name-nondirectory left)) | |
(right-name (file-name-nondirectory right))) | |
(cond ((string< left-name right-name) | |
;; left right | |
;; A <- A doesnt exist in the right list (new file) | |
;; B B | |
;; C C | |
(push left compare-backup-new-files) | |
(pop left-files)) | |
((string< right-name left-name) | |
(push right compare-backup-deleted-files) | |
(pop right-files)) | |
(t | |
(cond ((not (eq (file-directory-p left) (file-directory-p right))) | |
(push left compare-backup-unmatched-files)) | |
((file-directory-p left) | |
(unless (string-match "/[.]+$" left) | |
(message "%s%s" msg-prefix left-name) | |
(compare-backup-dirs-r left right (concat msg-prefix "| ")))) | |
(t | |
(compare-files left right))) | |
(pop right-files) | |
(pop left-files))))))))) | |
(princ | |
(format "Target 1: %s\nTarget 2: %s\n" | |
(car command-line-args-left) | |
(cadr command-line-args-left))) | |
(princ (format "\n%s Started.\n" (format-time-string "%Y-%m-%d %H:%M:%S" (current-time)))) | |
(compare-backup-dirs-r (car command-line-args-left) (cadr command-line-args-left) "") | |
(princ | |
(format "%s Finished.\n" (format-time-string "%Y-%m-%d %H:%M:%S" (current-time)))) | |
(when compare-backup-new-files | |
(princ "\n==== NEW FILES\n") | |
(dolist (file compare-backup-new-files) | |
(princ (concat file "\n")))) | |
(when compare-backup-deleted-files | |
(princ "\n==== DELETED (MISSING) FILES\n") | |
(dolist (file compare-backup-deleted-files) | |
(princ (concat file "\n")))) | |
(when compare-backup-suspicious-files | |
(princ "\n==== MODIFIED FILES (timestamp unreliable)\n") | |
(dolist (file compare-backup-suspicious-files) | |
(princ (concat file "\n")))) | |
(when compare-backup-updated-files | |
(princ "\n==== MODIFIED FILES (updated)\n") | |
(dolist (file compare-backup-updated-files) | |
(princ (concat file "\n")))) | |
(when compare-backup-outdated-files | |
(princ "\n==== MODIFIED FILES (outdated)\n") | |
(dolist (file compare-backup-outdated-files) | |
(princ (concat file "\n")))) | |
(when compare-backup-unmatched-files | |
(princ "\n==== MODIFIED FILES (file => dir or vice versa)\n") | |
(dolist (file compare-backup-unmatched-files) | |
(princ (concat file "\n")))) | |
(princ "\n==== SUMMARY\n") | |
(princ | |
(format "assumed identity (timestamp and size): %d\n" compare-backup-skipped-files-count)) | |
(princ | |
(format "confirmed identity (xxh digest): %d\n" compare-backup-identical-files-count)) | |
(princ (format "new files: %d\n" (length compare-backup-new-files))) | |
(princ (format "deleted (missing) files: %d\n" (length compare-backup-deleted-files))) | |
(princ (format "modified files:\n")) | |
(princ (format "- timestamp unreliable: %d\n" (length compare-backup-suspicious-files))) | |
(princ (format "- updated: %d\n" (length compare-backup-updated-files))) | |
(princ (format "- outdated: %d\n" (length compare-backup-outdated-files))) | |
(princ (format "- unmatched (file/dir): %d\n" (length compare-backup-unmatched-files))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment