Skip to content

Instantly share code, notes, and snippets.

@zachcp
Created December 21, 2019 17:54
Show Gist options
  • Save zachcp/4726e1ff5acf3e2b66b5fbe39d273127 to your computer and use it in GitHub Desktop.
Save zachcp/4726e1ff5acf3e2b66b5fbe39d273127 to your computer and use it in GitHub Desktop.
np-atlas
(require '[clj-http.client :as client])
(require '[clojure.data.csv :as csv])
(require '[clojure.java.io :as io]))
(require '[mundaneum.query :refer [describe entity label property query stringify-query *default-language* clojurize-results Clojurizable]])
(require '[mundaneum.properties :refer [properties]])
(require '[backtick :refer [template syntax-quote]])
;; data loading functions
(defn csv-data->maps [csv-data]
(map zipmap
(->> (first csv-data) ;; First row is the header
(map #(clojure.string/replace % " " "-"))
(map keyword) ;; Drop if you want string keys instead
repeat)
(rest csv-data)))
(defn load-tsv [file]
(let [reader (io/reader file)
map-data(csv/read-csv reader :separator \tab)]
(csv-data->maps map-data)))
(defn basic-natural-product-atlas-entity
[inchikey, pubchemid, title, description]
" a simple template to create JSON compatible with the
excellent wikibase-edit library:
https://github.com/maxlath/wikibase-edit"
;(entity "PubChem") ; Q278487
;(entity "chemical entity") ; Q43460564
;(entity "Natural Product Atlas") ;Q75055586
;(property :PubChem-CID) ; P662
;(property :InChIKey) ; P235
;(property :instance-of) ; P31
;(property :language-of-work-or-name) ; P407
;(property :stated-in) ; P248
(template
{:labels {:en ~title}
:descriptions {:en ~description}
:claims
{:P31
[{:value :Q43460564
:references [{:P248 :Q278487 :P662 ~pubchemid :P407 :Q1860}]}]
:P662
[{:value ~pubchemid
:references [{:P248 :Q278487 :P662 ~pubchemid :P407 :Q1860}]}]
:P235
[{:value ~inchikey
:references [{:P248 :Q278487 :P662 ~pubchemid :P407 :Q1860}
{:P248 :Q75055586 :P662 ~pubchemid :P407 :Q1860}]}]}}))
(defn get-compounds-name [items]
(query
(template
[:select ?compoundID ?compoundIDLabel ?InChIKey
:where [[?compoundID (wdt :InChIKey) ?InChIKey]
:values ?InChIKey ~items]])))
(defn add-inchi [entity inchikey]
(programs wd)
(wd "add-claim" entity "P235" inchikey))
(defn create-new-entity [inchikey name]
(let [{:keys [Description CID IUPACName]} (get-chemdata inchikey)
descrip (if-let [_ Description] Description "Bioactive Natural Product")
chem-data
(basic-natural-product-atlas-entity
inchikey
(str CID)
name
(shorten-sentence descrip))]
(programs wd)
(println (str "wd" " create-entity '" (json/write-str chem-data) "'"))
(wd "create-entity" (json/write-str chem-data))))
; note: np_atlas_2019_08.tsv is downloadable from the NP-atlas website
(def np-atlas (load-tsv "resources/data/np_atlas_2019_08.tsv"))
; 1. check to see if an entity exists for a given INCHIKEY
; 2. if it doesn't exist, create the full entry
; 3. if the record exists, make sure it has the keys of interest (in this case INCHI)
; 4. if it doesn't have the properties of interest, update them. (not implemented)
;
(for [i (range 1 100 1)] ; note: I was iterating through these one-by-one
(let [ ; the two identifiers I used are the names and inchi keys.
; in retrospect the names are unreliable as idnetifiers and we should be only be using
; PUB CID + PubChem SID + InCHI see.https://pubchem.ncbi.nlm.nih.gov/source/The%20Natural%20Products%20Atlas
{:keys [Names InChIKey]}(nth np-atlas i)
wikidata-ids (get-compounds-name [InChIKey])
ent (entity Names)]
(if (empty? wikidata-ids)
(if (nil? ent)
(try
(println i InChIKey Names)
(create-new-entity InChIKey Names)
(catch Exception e (println " could not create entity")))
(try
;; InChIKeys added here. If there is a WD entity that
(println (str i " Has Entity but not INCHI nil: ", Names, " ", ent))
(add-inchi ent InChIKey)
(catch Exception e (println "couldnt add InChiKey"))))
(println (str i " Already Has and ID: " InChIKey, " ", Names)))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment