Skip to content

Instantly share code, notes, and snippets.

@ashishnegi
Last active June 30, 2021 11:36
Show Gist options
  • Save ashishnegi/d51e2cb116eb133b4a7d to your computer and use it in GitHub Desktop.
Save ashishnegi/d51e2cb116eb133b4a7d to your computer and use it in GitHub Desktop.
diff of two files in clojure
;;;;;;;;;;;; ---- diff.clj ------------------------
(ns diffclj.diff)
;; so what is diff
;; diff of two files
;; basic unit is line.
;; maximize the no of common lines.
;; longest-common-subseq.
(defn- make-pairs
"Makes the pair of [string, [center, up]] from the
file and last-diff-matrix"
[file-2 diff-last]
(map vector file-2
(map vector diff-last (rest diff-last))))
(defn- next-diff-matrix
"Generates the next-diff-matrx using the
longest common subsequence row operation."
[str-1 diff-last file-2]
(reductions (fn [left x]
(let [str-2 (first x)
diff (second x)
up (second diff)
center (first diff)]
(if (= str-2 str-1)
(inc center)
(max left up))))
0
(make-pairs file-2 diff-last)))
(defn diff-lcs
"Diffs the files using the longest common subseq."
[file-1 file-2]
(let [len-1 (count file-1)
len-2 (count file-2)]
(loop [pos-1 0, diff-matrix (list (take (inc len-2) (repeat 0);;(iterate inc 0)
))]
(if (= pos-1 len-1)
(reverse diff-matrix)
(recur (inc pos-1)
(cons (next-diff-matrix (nth file-1 pos-1)
(first diff-matrix)
file-2)
diff-matrix))))))
(defn- find-maximum-matching
"Gives pairs of {:file1-match-index :file2-match-index}
from diff-lcs-matrix and file-1 and file-2"
[diff-matrix file-1 file-2]
(let [len-1 (count file-1)
len-2 (count file-2)]
(loop [pos-1 (dec len-1)
pos-2 (dec len-2)
diff-mat (reverse diff-matrix)
matches '()]
;; (do (print pos-1 pos-2))
(if (or (< pos-1 0) (< pos-2 0))
matches
(let [str-1 (nth file-1 pos-1)
str-2 (nth file-2 pos-2)
curr-row (first diff-mat)
up-row (second diff-mat)]
(if (= str-1 str-2)
;; it matches here so go diagnollay
;; (do (println "going diagnol" str-1 str-2 pos-1 pos-2))
(recur (dec pos-1) (dec pos-2)
(rest diff-mat)
(cons {:file1 pos-1 :file2 pos-2} matches))
;; it does not matches - go left or up
(let [left-value (nth curr-row pos-2)
up-value (if (nil? up-row)
(inc left-value)
(nth up-row (inc pos-2)))]
;; (do (println ":>" up-value left-value up-row curr-row))
(if (> up-value left-value)
;; while moving up, remove the current-diff-mat
(recur (dec pos-1) pos-2
(rest diff-mat) matches)
(recur pos-1 (dec pos-2)
diff-mat matches)))))))))
;; Public api
(defn diff-api
"Gives the index-pairs {:file1 and :file2} which match the
diff of the two files."
[file-1 file-2]
(-> (diff-lcs file-1 file-2)
(find-maximum-matching file-1 file-2)))
;; test cases diff-api :
;; Note -> These test cases are no string, but would work on array of strings or
;; for that matter on anything comparable.
;; Uncomment the below code for testing.
;; (and (= '({:file1 1, :file2 0} {:file1 3, :file2 1})
;; (diff-api "abcd" "bdfa"))
;; (= '()
;; (diff-api "" "bdfa"))
;; (= '()
;; (diff-api "" ""))
;; (= '()
;; (diff-api "abcd" ""))
;; (= '({:file1 1, :file2 3}
;; {:file1 2, :file2 4}
;; {:file1 3, :file2 5})
;; (diff-api "xabcd" "bdfabcx")))
;;;;;;;;;;;; ---- print.clj ------------------------
(ns diffclj.print)
;;** defn- is private function for file.
(defn- print-insert-updates-nochange
"Returns a function that Does the actual printing of diffs:
Print all the lines form
1. last-match:file1 to match:file1 as -
2. last-match:file2 to match:file2 as +
3. last-match:file1 as \" \" "
[file1 file2 len1]
(fn [last-match match]
(do
(let [pos-start (:file1 last-match)
pos-last (:file1 match)]
(loop [pos (inc pos-start)]
(if (< pos pos-last)
(do (println "- " (nth file1 pos))
(recur (inc pos))))))
(let [pos-start (:file2 last-match)
pos-last (:file2 match)]
(loop [pos (inc pos-start)]
(if (< pos pos-last)
(do (println "+ " (nth file2 pos))
(recur (inc pos))))))
(if (< (:file1 match) len1)
(println " " (nth file1 (:file1 match))))
match)))
(defn print-diffs
"Prints the diffs for two files : file1 and file2 given the matches."
[file1 file2 matches]
(let [len1 (count file1)
len2 (count file2)]
(reduce
;;** call this function for each item in matches
(print-insert-updates-nochange file1 file2 len1)
;; the initial value of the last-matches
{:file1 -1 :file2 -1}
;; the matches to be send to print-diffs
(conj (vec matches)
;; put the end-of-file in the matches
;; so that it works when there are no matches.
{:file1 len1
:file2 len2}
))))
;;;;;;;;;;;; ---- core.clj ------------------------
(ns diffclj.core
(:gen-class)
(:require [diffclj.diff :as diff])
(:require [clojure.java.io :as io])
(:require [diffclj.print :as print]))
(defn printManual []
(println "Run the program with command:"
"<executable> file1 file2"))
(defn -main
"Diff-main: Reads the two files in memory and prints their diffs"
[& args]
(if-not (= (count args) 2)
(printManual)
(let [file1 (first args)
file2 (second args)]
(try
(with-open [rdr1 (io/reader file1)
rdr2 (io/reader file2)]
(let [file1 (line-seq rdr1)
file2 (line-seq rdr2)]
;; take the diff of two files and.
(->> (diff/diff-api file1 file2)
;; pass the diff to the printing.
(print/print-diffs file1 file2))))
(catch Exception e
(println (str "Some Problem occurred: " (.getMessage e))))))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment