(ns assess |
(:require [clojure.tools.analyzer.jvm :as ana.jvm] |
[clojure.java.io :as io] |
[clojure.string :as str] |
[clojure.pprint :as pp]) |
(:import java.io.PushbackReader)) |
;;; ======================================== |
;;; Utility functions |
(defn- cross [f xs] |
(for [x xs |
y xs] |
(f x y))) |
(defn- map-vals [f m] |
(with-meta |
(zipmap (keys m) (map f (vals m))) |
(meta m))) |
(defn- remove-vals [pred m] |
(map-vals #(remove pred %) m)) |
(defn- count-by [f coll] |
(map-vals count (group-by f coll))) |
;;; This postwalk is specific to the return value produced by |
;;; clojure.tools.analyzer.jvm |
(defn postwalk [ast f] |
(f (reduce |
(fn [acc key] |
(let [value (get ast key)] |
(if (vector? value) |
(assoc acc key (doall (mapv (fn [node] (postwalk node f)) value))) |
(assoc acc key (postwalk value f))))) |
ast |
(:children ast)))) |
;;; Postwalk an AST, extract references to vars. |
;;; Meant for use on a top-level form like a def or defn |
(defn var-references [ast] |
(let [refs (atom #{})] |
(postwalk |
ast |
(fn [n] |
(when-let [v (:var n)] |
(swap! refs conj v)) |
n)) |
@refs)) |
(defn- ns-loaded? [s] |
(some #{s} (map #(.name %) (all-ns)))) |
(defn- ns-loaded! [s] |
(when-not (ns-loaded? s) |
(require s))) |
;;; Does this AST node represent an `ns` form? If so, return the new |
;;; namespace symbol |
(defn- nsdecl? [n] |
(and |
(= :do (-> n :op)) |
(= :invoke (-> n :statements first :op)) |
(= 'clojure.core/in-ns (-> n :statements first :form first)) |
(-> n :statements first :form second second))) |
;;; If this is a def, return the var it defines |
(defn top-level-def [n] |
(when (= :def (:op n)) |
(:var n))) |
;;; Locate all var refs inside a sequence of forms. |
;;; When the forms change namespaces, follow those namespace changes |
;;; so the right aliases are in scope. When we encounter namespaces |
;;; that aren't yet loaded, require them. |
;;; This means the analysis only works when 100% of source modules |
;;; compile! |
;;; Returns a map of var to set of vars. Takes a sequence of forms, as |
;;; you would get from `read-all-forms` below. |
(defn- var-refs [forms] |
(loop [ns-for-next-form *ns* |
var-refs {} |
forms forms] |
(if-let [[f & rest] forms] |
(let [node (binding [*ns* ns-for-next-form] |
(ana.jvm/analyze f)) |
new-var (top-level-def node) |
new-ns (nsdecl? node)] |
(when new-ns |
(ns-loaded! new-ns)) |
(recur |
(or new-ns ns-for-next-form) |
(if new-var |
(assoc var-refs new-var (disj (var-references node) new-var)) |
var-refs) |
rest)) |
var-refs))) |
;;; ======================================== |
;;; Analyzing the results |
(defn- var-ns [v] (.name (.ns v))) |
(defn- ns-starts-with? [s ns] |
(str/starts-with? (str ns) s)) |
;;; Customize this list to the specific project. |
(def ^:private external-namespaces |
["clojure" |
"medley" |
"gloss" |
"camel-snake-kebab" |
"automat" |
"plumbing" |
"schema." |
"com.stuartsierra" |
"cemerick" |
"byte-streams" |
"ring" |
"bidi" |
"compojure" |
"buddy" |
"endophile" |
"cheshire" |
"honeysql" |
"pantomime" |
"modular" |
"org.httpkit" |
"clj-time" |
"redsys-clj" |
"clj-uuid" |
"postal" |
"selmer" |
"zeromq"]) |
(def ^:private external? |
(apply some-fn (map #(partial ns-starts-with? %) external-namespaces))) |
(defn- all-ns-in-depmaps [depmaps] |
(reduce into #{} |
(for [depmap depmaps |
[from-var to-vars] depmap |
to-ns (map var-ns to-vars) |
:when (not (external? to-ns))] |
(hash-set (var-ns from-var) to-ns)))) |
(defn- empty-coupling-list [depmaps] |
(->> depmaps |
all-ns-in-depmaps |
(cross #(hash-map [%1 %2] 0)) |
(apply merge))) |
(defn unnest [m] |
(reduce-kv |
(fn [l k v] |
(conj l (assoc v :name k))) |
[] |
m)) |
;; Receives list of map from var to set of var |
;; emits map of {[from-ns to-ns] strength} |
(defn coupling-strength [depmaps] |
(apply merge-with + |
(empty-coupling-list depmaps) |
(for [depmap depmaps |
[from-var to-vars] depmap |
:let [to-nses (count-by var-ns to-vars)] |
[to-ns strength] to-nses |
:when (not (external? to-ns))] |
{[(var-ns from-var) to-ns] strength}))) |
;; Receives list of map from var to set of variable |
;; Emits list of {:name sym sym1 int sym2 int sym3 int,,,} |
;; where the first symbol is the "from" and the sym1, sym2, sym3 |
;; values are the strength of coupling between "from" and "sym1" etc. |
(defn coupling-matrix [depmaps] |
(let [all-known-nses (all-ns-in-depmaps depmaps) |
empty-row (zipmap all-known-nses (repeat 0)) |
empty-matrix (zipmap all-known-nses (repeat empty-row)) |
nested-map (reduce |
(fn [cmap [from-ns to-ns st]] |
(let [oldrow (get cmap from-ns empty-row)] |
(assoc cmap from-ns (update oldrow to-ns #(+ % st))))) |
empty-matrix |
(for [depmap depmaps |
[from-var to-vars] depmap |
:let [to-nses (count-by var-ns to-vars)] |
[to-ns strength] to-nses |
:when (not (external? to-ns))] |
[(var-ns from-var) to-ns strength]))] |
(unnest nested-map))) |
;;; ======================================== |
;;; Deal with source files |
(defn read-all-forms |
[file] |
(let [rdr (-> file io/file io/reader PushbackReader.)] |
(loop [forms []] |
(let [form (try (read rdr) (catch Exception e nil))] |
(if form |
(recur (conj forms form)) |
forms))))) |
(defn- clojure? [f] |
(or (str/ends-with? (.getName f) ".clj") |
(str/ends-with? (.getName f) ".cljc"))) |
(defn- srcs-in-path [srcdir] |
(->> srcdir |
io/file |
file-seq |
(filter clojure?))) |
;;; ======================================== |
;;; Driver functions |
(defn all-var-refs-in-path |
[srcdir] |
(for [s (srcs-in-path srcdir)] |
(with-meta (var-refs (read-all-forms s)) {:src (.getPath s)}))) |
(defn- print-csv |
[ks rows] |
(let [fmt-row (fn [row] (apply str (interpose "," row)))] |
(println (fmt-row ks)) |
(doseq [row rows] |
(println (fmt-row (mapv #(get row %) ks)))))) |
(defn analyze-to-csv |
[srcdir outfile] |
(with-open [writer (java.io.PrintWriter. (io/file "all-ns-refs.csv"))] |
(binding [*print-length* nil |
*out* writer] |
(let [alldeps (all-var-refs-in-path "src") |
mtx (coupling-matrix alldeps) |
mtx (sort-by :name mtx) |
cols (sort (filter #(not= % :name) (keys (first mtx))))] |
(print-csv (list* :name cols) mtx))))) |