Created
December 21, 2019 17:54
-
-
Save zachcp/4726e1ff5acf3e2b66b5fbe39d273127 to your computer and use it in GitHub Desktop.
np-atlas
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(require '[clj-http.client :as client]) | |
(require '[clojure.data.csv :as csv]) | |
(require '[clojure.java.io :as io])) | |
(require '[mundaneum.query :refer [describe entity label property query stringify-query *default-language* clojurize-results Clojurizable]]) | |
(require '[mundaneum.properties :refer [properties]]) | |
(require '[backtick :refer [template syntax-quote]]) | |
;; data loading functions | |
(defn csv-data->maps [csv-data] | |
(map zipmap | |
(->> (first csv-data) ;; First row is the header | |
(map #(clojure.string/replace % " " "-")) | |
(map keyword) ;; Drop if you want string keys instead | |
repeat) | |
(rest csv-data))) | |
(defn load-tsv [file] | |
(let [reader (io/reader file) | |
map-data(csv/read-csv reader :separator \tab)] | |
(csv-data->maps map-data))) | |
(defn basic-natural-product-atlas-entity | |
[inchikey, pubchemid, title, description] | |
" a simple template to create JSON compatible with the | |
excellent wikibase-edit library: | |
https://github.com/maxlath/wikibase-edit" | |
;(entity "PubChem") ; Q278487 | |
;(entity "chemical entity") ; Q43460564 | |
;(entity "Natural Product Atlas") ;Q75055586 | |
;(property :PubChem-CID) ; P662 | |
;(property :InChIKey) ; P235 | |
;(property :instance-of) ; P31 | |
;(property :language-of-work-or-name) ; P407 | |
;(property :stated-in) ; P248 | |
(template | |
{:labels {:en ~title} | |
:descriptions {:en ~description} | |
:claims | |
{:P31 | |
[{:value :Q43460564 | |
:references [{:P248 :Q278487 :P662 ~pubchemid :P407 :Q1860}]}] | |
:P662 | |
[{:value ~pubchemid | |
:references [{:P248 :Q278487 :P662 ~pubchemid :P407 :Q1860}]}] | |
:P235 | |
[{:value ~inchikey | |
:references [{:P248 :Q278487 :P662 ~pubchemid :P407 :Q1860} | |
{:P248 :Q75055586 :P662 ~pubchemid :P407 :Q1860}]}]}})) | |
(defn get-compounds-name [items] | |
(query | |
(template | |
[:select ?compoundID ?compoundIDLabel ?InChIKey | |
:where [[?compoundID (wdt :InChIKey) ?InChIKey] | |
:values ?InChIKey ~items]]))) | |
(defn add-inchi [entity inchikey] | |
(programs wd) | |
(wd "add-claim" entity "P235" inchikey)) | |
(defn create-new-entity [inchikey name] | |
(let [{:keys [Description CID IUPACName]} (get-chemdata inchikey) | |
descrip (if-let [_ Description] Description "Bioactive Natural Product") | |
chem-data | |
(basic-natural-product-atlas-entity | |
inchikey | |
(str CID) | |
name | |
(shorten-sentence descrip))] | |
(programs wd) | |
(println (str "wd" " create-entity '" (json/write-str chem-data) "'")) | |
(wd "create-entity" (json/write-str chem-data)))) | |
; note: np_atlas_2019_08.tsv is downloadable from the NP-atlas website | |
(def np-atlas (load-tsv "resources/data/np_atlas_2019_08.tsv")) | |
; 1. check to see if an entity exists for a given INCHIKEY | |
; 2. if it doesn't exist, create the full entry | |
; 3. if the record exists, make sure it has the keys of interest (in this case INCHI) | |
; 4. if it doesn't have the properties of interest, update them. (not implemented) | |
; | |
(for [i (range 1 100 1)] ; note: I was iterating through these one-by-one | |
(let [ ; the two identifiers I used are the names and inchi keys. | |
; in retrospect the names are unreliable as idnetifiers and we should be only be using | |
; PUB CID + PubChem SID + InCHI see.https://pubchem.ncbi.nlm.nih.gov/source/The%20Natural%20Products%20Atlas | |
{:keys [Names InChIKey]}(nth np-atlas i) | |
wikidata-ids (get-compounds-name [InChIKey]) | |
ent (entity Names)] | |
(if (empty? wikidata-ids) | |
(if (nil? ent) | |
(try | |
(println i InChIKey Names) | |
(create-new-entity InChIKey Names) | |
(catch Exception e (println " could not create entity"))) | |
(try | |
;; InChIKeys added here. If there is a WD entity that | |
(println (str i " Has Entity but not INCHI nil: ", Names, " ", ent)) | |
(add-inchi ent InChIKey) | |
(catch Exception e (println "couldnt add InChiKey")))) | |
(println (str i " Already Has and ID: " InChIKey, " ", Names))))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment