Last active
June 30, 2021 11:36
-
-
Save ashishnegi/d51e2cb116eb133b4a7d to your computer and use it in GitHub Desktop.
diff of two files in clojure
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;;;;;;;;;;;; ---- diff.clj ------------------------ | |
(ns diffclj.diff) | |
;; so what is diff | |
;; diff of two files | |
;; basic unit is line. | |
;; maximize the no of common lines. | |
;; longest-common-subseq. | |
(defn- make-pairs | |
"Makes the pair of [string, [center, up]] from the | |
file and last-diff-matrix" | |
[file-2 diff-last] | |
(map vector file-2 | |
(map vector diff-last (rest diff-last)))) | |
(defn- next-diff-matrix | |
"Generates the next-diff-matrx using the | |
longest common subsequence row operation." | |
[str-1 diff-last file-2] | |
(reductions (fn [left x] | |
(let [str-2 (first x) | |
diff (second x) | |
up (second diff) | |
center (first diff)] | |
(if (= str-2 str-1) | |
(inc center) | |
(max left up)))) | |
0 | |
(make-pairs file-2 diff-last))) | |
(defn diff-lcs | |
"Diffs the files using the longest common subseq." | |
[file-1 file-2] | |
(let [len-1 (count file-1) | |
len-2 (count file-2)] | |
(loop [pos-1 0, diff-matrix (list (take (inc len-2) (repeat 0);;(iterate inc 0) | |
))] | |
(if (= pos-1 len-1) | |
(reverse diff-matrix) | |
(recur (inc pos-1) | |
(cons (next-diff-matrix (nth file-1 pos-1) | |
(first diff-matrix) | |
file-2) | |
diff-matrix)))))) | |
(defn- find-maximum-matching | |
"Gives pairs of {:file1-match-index :file2-match-index} | |
from diff-lcs-matrix and file-1 and file-2" | |
[diff-matrix file-1 file-2] | |
(let [len-1 (count file-1) | |
len-2 (count file-2)] | |
(loop [pos-1 (dec len-1) | |
pos-2 (dec len-2) | |
diff-mat (reverse diff-matrix) | |
matches '()] | |
;; (do (print pos-1 pos-2)) | |
(if (or (< pos-1 0) (< pos-2 0)) | |
matches | |
(let [str-1 (nth file-1 pos-1) | |
str-2 (nth file-2 pos-2) | |
curr-row (first diff-mat) | |
up-row (second diff-mat)] | |
(if (= str-1 str-2) | |
;; it matches here so go diagnollay | |
;; (do (println "going diagnol" str-1 str-2 pos-1 pos-2)) | |
(recur (dec pos-1) (dec pos-2) | |
(rest diff-mat) | |
(cons {:file1 pos-1 :file2 pos-2} matches)) | |
;; it does not matches - go left or up | |
(let [left-value (nth curr-row pos-2) | |
up-value (if (nil? up-row) | |
(inc left-value) | |
(nth up-row (inc pos-2)))] | |
;; (do (println ":>" up-value left-value up-row curr-row)) | |
(if (> up-value left-value) | |
;; while moving up, remove the current-diff-mat | |
(recur (dec pos-1) pos-2 | |
(rest diff-mat) matches) | |
(recur pos-1 (dec pos-2) | |
diff-mat matches))))))))) | |
;; Public api | |
(defn diff-api | |
"Gives the index-pairs {:file1 and :file2} which match the | |
diff of the two files." | |
[file-1 file-2] | |
(-> (diff-lcs file-1 file-2) | |
(find-maximum-matching file-1 file-2))) | |
;; test cases diff-api : | |
;; Note -> These test cases are no string, but would work on array of strings or | |
;; for that matter on anything comparable. | |
;; Uncomment the below code for testing. | |
;; (and (= '({:file1 1, :file2 0} {:file1 3, :file2 1}) | |
;; (diff-api "abcd" "bdfa")) | |
;; (= '() | |
;; (diff-api "" "bdfa")) | |
;; (= '() | |
;; (diff-api "" "")) | |
;; (= '() | |
;; (diff-api "abcd" "")) | |
;; (= '({:file1 1, :file2 3} | |
;; {:file1 2, :file2 4} | |
;; {:file1 3, :file2 5}) | |
;; (diff-api "xabcd" "bdfabcx"))) | |
;;;;;;;;;;;; ---- print.clj ------------------------ | |
(ns diffclj.print) | |
;;** defn- is private function for file. | |
(defn- print-insert-updates-nochange | |
"Returns a function that Does the actual printing of diffs: | |
Print all the lines form | |
1. last-match:file1 to match:file1 as - | |
2. last-match:file2 to match:file2 as + | |
3. last-match:file1 as \" \" " | |
[file1 file2 len1] | |
(fn [last-match match] | |
(do | |
(let [pos-start (:file1 last-match) | |
pos-last (:file1 match)] | |
(loop [pos (inc pos-start)] | |
(if (< pos pos-last) | |
(do (println "- " (nth file1 pos)) | |
(recur (inc pos)))))) | |
(let [pos-start (:file2 last-match) | |
pos-last (:file2 match)] | |
(loop [pos (inc pos-start)] | |
(if (< pos pos-last) | |
(do (println "+ " (nth file2 pos)) | |
(recur (inc pos)))))) | |
(if (< (:file1 match) len1) | |
(println " " (nth file1 (:file1 match)))) | |
match))) | |
(defn print-diffs | |
"Prints the diffs for two files : file1 and file2 given the matches." | |
[file1 file2 matches] | |
(let [len1 (count file1) | |
len2 (count file2)] | |
(reduce | |
;;** call this function for each item in matches | |
(print-insert-updates-nochange file1 file2 len1) | |
;; the initial value of the last-matches | |
{:file1 -1 :file2 -1} | |
;; the matches to be send to print-diffs | |
(conj (vec matches) | |
;; put the end-of-file in the matches | |
;; so that it works when there are no matches. | |
{:file1 len1 | |
:file2 len2} | |
)))) | |
;;;;;;;;;;;; ---- core.clj ------------------------ | |
(ns diffclj.core | |
(:gen-class) | |
(:require [diffclj.diff :as diff]) | |
(:require [clojure.java.io :as io]) | |
(:require [diffclj.print :as print])) | |
(defn printManual [] | |
(println "Run the program with command:" | |
"<executable> file1 file2")) | |
(defn -main | |
"Diff-main: Reads the two files in memory and prints their diffs" | |
[& args] | |
(if-not (= (count args) 2) | |
(printManual) | |
(let [file1 (first args) | |
file2 (second args)] | |
(try | |
(with-open [rdr1 (io/reader file1) | |
rdr2 (io/reader file2)] | |
(let [file1 (line-seq rdr1) | |
file2 (line-seq rdr2)] | |
;; take the diff of two files and. | |
(->> (diff/diff-api file1 file2) | |
;; pass the diff to the printing. | |
(print/print-diffs file1 file2)))) | |
(catch Exception e | |
(println (str "Some Problem occurred: " (.getMessage e)))))))) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment