Skip to content

Instantly share code, notes, and snippets.

@mmzsource
Last active May 6, 2021 23:24
Show Gist options
  • Save mmzsource/2d0fb494a9961e70ec46a650214c5cfb to your computer and use it in GitHub Desktop.
Save mmzsource/2d0fb494a9961e70ec46a650214c5cfb to your computer and use it in GitHub Desktop.
#!/usr/bin/env bb
;;
;; This is a babashka script to calculate the duration a github issue spent in
;; each github project board column. It will output the results in a few
;; different views, where this view is maybe most interesting:
;;
;; | :iss-title | :iss-html | Total | To do | In Progress | Done |
;; |--------------------+--------------+-------+-------+-------------+------+
;; | your issue 1 title | issue 1 html | 100 | 25 | 50 | 25 |
;; | your issue 2 title | issue 2 html | 50 | 40 | 10 | |
;; | etc ... | | | | | |
;; |--------------------+--------------+-------+-------+-------------+------|
;;
;; (the column names are just an example and will be retrieved from the github API)
;; This particular view is also printed as a csv.
;;
;; This script has 3 dependencies:
;;
;; 1. Babashka installed: https://github.com/babashka/babashka#installation
;; (and bb on PATH)
;; 2. GITHUB_TOKEN with 'read:org' and 'repo' permissions available as
;; environment variable, 'authorized for SSO', see:
;; https://docs.github.com/articles/authorizing-a-personal-access-token-for-use-with-a-saml-single-sign-on-organization/
;; 3. your org name and html url to your project pasted in this script
;;
;; Run like this: bb bb-github-issues.clj OR like this: ./bb-github-issues.clj
;;
;; Tested on babashka v0.3.5
(require
'[babashka.curl :as curl]
'[cheshire.core :as cheshire]
'[clojure.string :as str])
;; =============
;; = API CALLS =
;; =============
;; Some additional information about how this script roughly traverses the
;; github resource model:
;;
;; | from | via | to | name in script |
;; |---------------|--------------|----------------|----------------|
;; | org-projects | :html-url | project-api-id | project-id |
;; | project-id | | columns | raw-columns |
;; | column | :cards_url | cards | raw-cards |
;; | card | :content_url | issue | raw-issue |
;; | issue | :events_url | events | raw-events |
;; |---------------|--------------|----------------|----------------|
;;
(def token (System/getenv "GITHUB_TOKEN"))
(def endpoint "https://api.github.com")
(def org "philips-internal")
(def project-html-url "https://github.com/orgs/philips-internal/projects/38")
(defn path [& strs]
(str/join "/" strs))
(defn with-gh-headers [m]
(update m :headers assoc
"Authorization" (str "token " token)
"Accept" "application/vnd.github.v3+json"))
(defn with-gh-preview-headers [m]
(update m :headers assoc
"Authorization" (str "token " token)
"Accept" "application/vnd.github.inertia-preview+json"))
(defn with-gh-starfox-headers [m]
(update m :headers assoc
"Authorization" (str "token " token)
"Accept" "application/vnd.github.starfox-preview+json"))
;; TODO: add paging! https://docs.github.com/en/rest/overview/resources-in-the-rest-api#pagination
(defn get-github-data! [endpoint headers]
(-> (curl/get endpoint headers)
:body
(cheshire/parse-string true)))
;;
;; Get global github project id based on org & project-html-url
;;
(defn list-raw-org-projects [org]
(get-github-data!
(path endpoint "orgs" org "projects?per_page=100")
(with-gh-preview-headers {})))
(defn your-dev-project? [project-json]
(= (:html_url project-json) project-html-url))
(defn your-project-api-id [org-projects]
(-> (filter your-dev-project? org-projects)
first
:id))
;;
;; List columns of project based on global github project-id
;;
(defn columns-endpoint [project-id]
(path endpoint "projects" project-id "columns"))
;;
;; List raw columns based on project id
;; example column url: "https://api.github.com/projects/columns/12685794"
;;
(defn list-raw-columns [project-id]
(get-github-data! (columns-endpoint project-id) (with-gh-preview-headers {})))
;;
;; List cards in a column based on cards_url (also includes 'notes')
;; example cards_url: "https://api.github.com/projects/columns/12685794/cards"
;;
(defn list-raw-cards [cards-url]
(get-github-data! cards-url (with-gh-preview-headers {})))
;;
;; List issue-urls based on a collection of cards
;;
(defn list-issue-urls [raw-cards]
(let [issues (map :content_url raw-cards)]
(remove nil? issues)))
;;
;; Get raw issue based on an issue URL
;; example issue URL: "https://api.github.com/repos/philips-internal/barista-sdr/issues/36"
;;
(defn get-raw-issue [issue-url]
(get-github-data! issue-url (with-gh-headers {})))
;;
;; list events based on an events URL
;; example events URL "https://api.github.com/repos/philips-internal/barista-sdr/issues/36/events"
;;
(defn list-raw-events [events-url]
(get-github-data! events-url (with-gh-starfox-headers {})))
;;
;; get event based on event url
;; example event url: "https://api.github.com/repos/philips-internal/barista-sdr/issues/events/4506477796"
;;
(defn get-raw-event [event-url]
(get-github-data! event-url (with-gh-starfox-headers {})))
;; =======================
;; = DATA TRANSFORMATION =
;; =======================
;;
;; takes a raw-column and strips it down to a column with the relevant info
;; for the task at hand
;;
(defn make-col [{:keys [name id url cards_url]}]
{:col-name name :col-id id :col-url url :col-cards cards_url :col-issues []})
;;
;; takes a raw-issue and strips it down to an issue with the relevant info for
;; the task at hand
;;
(defn make-issue [{:keys [title id html_url events_url]}]
{:iss-title title :iss-id id :iss-html html_url :iss-events-url events_url :iss-events []})
;;
;; takes a raw-event and strips it down to an event with the relevant info for
;; the task at hand
;;
;; an event may or may not include a 'project card' and that project card may or
;; may not include a 'previous column name' field. this function handles all
;; those cases.
;;
(defn make-event [{:keys [event id created_at] :as ev}]
(let [evt {:evt-event event :evt-id id :evt-created-at created_at}]
(if (contains? ev :project_card)
(let [col-name (get-in ev [:project_card :column_name])
prj-id (get-in ev [:project_card :project_id])]
(if-let [pcn (get-in ev [:project_card :previous_column_name])]
(assoc evt :evt-proj-id prj-id :evt-col col-name :evt-prev-col pcn)
(assoc evt :evt-proj-id prj-id :evt-col col-name :evt-prev-col "-")))
(assoc evt :evt-proj-id "-" :evt-col "-" :evt-prev-col "-"))))
;;
;; github event types that are interesting for this script:
;;
(def add-events #{"added_to_project" "converted_note_to_issue"})
(def move-events #{"moved_columns_in_project"})
(def remove-events #{"removed_from_project" "dummy_remove_event"})
(def project-events (clojure.set/union add-events move-events remove-events))
;;
;; takes a list of events and filters out the events that are relevant for the
;; task at hand. Also filters out events that are related to other projects.
;;
(defn filter-interesting-events [project-id events]
(->> events
(filterv #(project-events (:evt-event %)))
(filterv #(= project-id (:evt-proj-id %)))))
;;
;; takes a map of relevant column key value pairs and an issue and merges every
;; event in the issue with the relevant issue - and column key value pairs
;;
(defn merge-events [col-kv issue]
(let [issue-kv (dissoc issue :iss-events)]
(mapv #(merge col-kv issue-kv %) (:iss-events issue))))
;;
;; takes a nested column datastructure and returns a flat collection of events
;; that contain the relevant parent objects (issue & column) information
;;
(defn flatten-column [column]
(let [col-kv (dissoc column :col-issues)]
(mapcat #(merge-events col-kv %) (:col-issues column))))
;; ==================================
;; = DETERMINE TIME SPENT IN COLUMS =
;; ==================================
;; A formatter aligned with the way github sends time information
(def datetime-formatter (java.time.format.DateTimeFormatter/ofPattern "yyyy-MM-dd'T'HH:mm:ss'Z'"))
;;
;; retrieves the current time as a well formatted string. needed for issues
;; that are not removed from the project board yet and don't have an end
;; date-time. to calculate the time spent in the last column, we'll have to
;; create our own 'dummy remove event' with a timestamp of 'now' (when the
;; script is running.)
;;
;; java interop ... ¯\_(ツ)_/¯
;;
(defn now-str []
(.format
(.withZoneSameInstant
(java.time.ZonedDateTime/now)
(java.time.ZoneId/of "GMT"))
datetime-formatter))
;;
;; ensure a list of events has a remove event
;;
(defn ensure-remove-event [events]
(when (not (remove-events (last events)))
(conj events {:evt-event "dummy_remove_event" :evt-created-at (now-str) :evt-col (:evt-col (last events))})))
;;
;; this algorithm works by comparing 2 consecutive events. this function takes
;; a collection of events and returns a collection with pairs of events:
;; ([ev-1 ev-2] [ev-2 ev-3] [ev-3 ev-4]) etc. It's just a helper function to
;; make the task of comparing easier
;;
(defn prepare [events]
(loop [remain events
result []]
(if (< (count remain) 2)
result
(recur (rest remain) (conj result [(first remain) (second remain)])))))
;;
;; takes 2 event times as string and calculates the duration between them
;;
(defn duration-in-hours [event-time1 event-time2]
(let [t1 (java.time.LocalDateTime/parse event-time1 datetime-formatter)
t2 (java.time.LocalDateTime/parse event-time2 datetime-formatter)]
(.toHours (java.time.Duration/between t1 t2))))
;;
;; takes a collection of pairs of consecutive events and builds a collection of
;; durations for each pair.
;;
;; Possibilities I've encountered during working with the API:
;;
;; | event 1 | event 2 | what happened? | time count? | reason |
;; |-----------+-----------+-------------------------------------------------------------------+------------------+---------------------------------|
;; | add a | add a | col a deleted, recreated with same name, card added anew in col a | in a = evt2-evt1 | |
;; | move a->b | add b | col b deleted, recreated with same name, card added anew in col b | in b = evt2-evt1 | |
;; | add a | add b | col a deleted, card added anew in col b | - | no clue when col a was deleted. |
;; | move a->b | add x | col b deleted, card added anew to x | - | no clue when col b was deleted |
;; | removed c | add x | card removed from project, added anew in a | - | |
;; | add a | move a->c | card moved from a->c | in a = evt2-evt1 | |
;; | add a | move b->c | col a renamed to b, card moved from b->c | in b = evt2-evt1 | register on latest name (b) |
;; | move a->b | move b->c | card moved from b->c | in b = evt2-evt1 | |
;; | move a->b | move x->c | col a renamed to x, card moved from x->c | in x = evt2-evt1 | register on latest name (x) |
;; | add a | removed a | card removed from project | in a = evt2-evt1 | |
;; | add a | removed b | col a renamed to b, card removed from project | in b = evt2-evt1 | register on latest name (b) |
;; | move a->b | removed b | card removed from project | in b = evt2-evt1 | |
;; | move a->b | removed c | col b renamed to c, card removed from project | in c = evt2-evt1 | register on latest name (c) |
;; |-----------+-----------+-------------------------------------------------------------------+------------------+---------------------------------|
;;
;; In essence these cases are covered:
;;
;; - When 2nd event of pair is 'add' do nothing; 'reset timer'
;; (corner case that might be covered later: current col name is equal to col
;; name of 1st event; then register on col ... in this case, the )
;; - When 2nd event is 'move', register on prev_col of 2nd event
;; - when 2nd event is 'removed', register on col of 2nd event
(defn time-spent [prepared-events]
(reduce
(fn [durations [event1 event2]]
(let [e2 (:evt-event event2)
t1 (:evt-created-at event1)
t2 (:evt-created-at event2)
c2 (:evt-col event2)
p2 (:evt-prev-col event2)]
(cond
(add-events e2) (conj durations {c2 0})
(move-events e2) (conj durations {p2 (duration-in-hours t1 t2)})
(remove-events e2) (conj durations {c2 (duration-in-hours t1 t2)})
:else (conj durations {"unknown event" 0}))))
[]
prepared-events))
;;
;; during its lifetime, an issue can visit the same column multiple times.
;; this function takes a collection of durations and sums the ones that are in
;; the same column. it returns a map where the keys are column names and the
;; values are the total time spent in that column. in addition it adds a
;; 'Total' amount of hours spent. (edge case I didn't take into account: maybe
;; someone decides to give their column name the name 'Total'. Then the duration
;; for this total column will be overridden. ¯\_(ツ)_/¯ )
;;
(defn sum-time-spent [durations]
(let [m (reduce
(fn [acc m]
(let [col-name (first (keys m))
time (first (vals m))]
(if (contains? acc col-name)
(update acc col-name + time)
(assoc acc col-name time))))
{}
durations)]
(assoc m "Total" (apply + (vals m)))))
;;
;; takes an issue and a map of durations and adds issue information
;;
(defn add-issue-info [{:keys [iss-title iss-html]} durations]
(assoc durations :iss-title iss-title :iss-html iss-html))
;;
;; takes all events from one issue and returns a map that contains the durations
;; this issue visited the project board columns it visited
;;
(defn time-spent-per-column [issue-events]
(->> issue-events
(ensure-remove-event)
(prepare)
(time-spent)
(sum-time-spent)))
;;
;; checks if a collection of events contains at least one 'add-event'
;; (a 'transferred' issue (transferred between repositories) can contain NO add-events)
;;
(defn contains-add-event? [issue-events]
(> (count (clojure.set/intersection add-events (into #{} (map :evt-event issue-events)))) 0))
;;
;; Adds issue information to 'time spent per column' map
;;
(defn time-spent-with-issue-info [issue-events]
(when (contains-add-event? issue-events)
(let [{:keys [iss-title iss-html]} (first issue-events)
row (time-spent-per-column issue-events)]
(assoc row :iss-title iss-title :iss-html iss-html))))
(defn in?
"true if collection contains element"
[coll elm]
(some #(= elm %) coll))
;;
;; The issues may have spent time in columns that no longer exist either because
;; the column was deleted or renamed.
;; This function extracts the column names that are no longer existing
;;
(defn extract-additional-col-names [current all]
(filter #(not (in? current %)) all))
;; ==========
;; = OUTPUT =
;; ==========
(defn print-events [events]
(clojure.pprint/print-table
[:evt-created-at :evt-event :evt-prev-col :evt-col]
events))
(defn print-issue [{:keys [iss-title iss-html iss-events]}]
(println)
(println (str "ISSUE: " iss-title))
(println (str "HTML: " iss-html))
(println "EVENTS:")
(print-events iss-events))
(defn print-issues-per-column [column]
(println)
(println (str "===== " (str/upper-case (:col-name column)) " ====="))
(run! print-issue (:col-issues column)))
(defn print-all-in-one-table [columns]
(let [all-in-one (mapcat flatten-column columns)]
(clojure.pprint/print-table
[:col-name :col-url :iss-title :iss-html :evt-created-at :evt-event :evt-prev-col :evt-col]
all-in-one)))
(defn prepare-col-time-per-issue [columns]
(let [issues (mapcat :col-issues columns)
iss-events (for [{:keys [iss-events iss-title iss-html]} issues]
(into [] (for [event iss-events]
(merge event {:iss-title iss-title :iss-html iss-html}))))
rows (remove nil? (map time-spent-with-issue-info iss-events))
current-col-names (conj (map :col-name columns) "Total" :iss-html :iss-title)
all-col-names (into #{} (mapcat keys rows))
diff-col-names (extract-additional-col-names current-col-names all-col-names)]
{:headers (concat current-col-names diff-col-names)
:rows rows}))
(defn print-column-time-per-issue [columns]
(let [m (prepare-col-time-per-issue columns)]
(clojure.pprint/print-table (:headers m) (:rows m))))
(defn prepare-csv [columns]
(let [csv-map (prepare-col-time-per-issue columns)]
{:headers (map name (:headers csv-map))
:rows (mapv #(mapv % (:headers csv-map)) (:rows csv-map))}))
(defn write-csv [columns]
(let [csv-map (prepare-csv columns)]
(clojure.data.csv/write-csv *out* (cons (:headers csv-map) (:rows csv-map)))))
;; ==============
;; = SUPER GLUE =
;; ==============
(defn run []
(let [project-id (your-project-api-id (list-raw-org-projects org))
raw-columns (list-raw-columns project-id)]
(for [col raw-columns]
(let [column (make-col col)
cards (list-raw-cards (:col-cards column))
iss-urls (list-issue-urls cards)
raw-issues (mapv get-raw-issue iss-urls)
issues (mapv make-issue raw-issues)
issues-with-events (for [issue issues]
(let [events-list (list-raw-events (:iss-events-url issue))
events (map make-event events-list)
filtered (filter-interesting-events project-id events)]
(assoc issue :iss-events filtered)))]
(assoc column :col-issues issues-with-events)))))
(def columns (run))
(run! print-issues-per-column columns)
(print-all-in-one-table columns)
(print-column-time-per-issue columns)
(println)
(write-csv columns)
;; Maybe later:
;; - request the durations 'as-of a certain date'?
;; - include paging when calling the GH API
;; - the composition in the run method might be 'normalized' because the first
;; thing the print functions do is unpack this entity again ...
;; - the 'knowledge' about the internal structure of the entities is spread
;; over the code. I probably should hide that ..
;; - maybe add option to provide input params via cmdline
;; - explore the github graphql API?
;; - explore timeline API?
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment