Last active
January 8, 2022 10:47
-
-
Save jackrusher/c41c979711378bfd5ecc4719849b7965 to your computer and use it in GitHub Desktop.
Quick example of using clojure with the Wikidata API to grab some facts about the world.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;;assumes that this package is available: | |
;;[org.wikidata.wdtk/wdtk-wikibaseapi "0.7.0"] | |
(import org.wikidata.wdtk.datamodel.interfaces.EntityDocument) | |
(import org.wikidata.wdtk.datamodel.interfaces.ItemDocument) | |
(import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue) | |
(import org.wikidata.wdtk.wikibaseapi.WikibaseDataFetcher) | |
(import org.wikidata.wdtk.wikibaseapi.apierrors.MediaWikiApiErrorException) | |
;; Trying to automatically build a list of the presidents of the US | |
;; using the WikiData Java API wrapped with a bit of clojure | |
(def get-entity-document | |
(let [fetcher (WikibaseDataFetcher/getWikidataDataFetcher) | |
filter (.getFilter fetcher)] | |
;; only the english wiki just now | |
(.setSiteLinkFilter filter (java.util.Collections/singleton "enwiki")) | |
(.setLanguageFilter filter (java.util.Collections/singleton "en")) | |
;; memoize results to improve performance and reduce network traffic | |
(memoize (fn [id] (.getEntityDocument fetcher id))))) | |
(defn get-property-id [thing] | |
(.getId (.getPropertyId thing))) | |
(defn get-value-id [thing] | |
(.getId (.getValue thing))) | |
(defn find-claim [id statement] | |
(->> (iterator-seq (.getAllQualifiers (.getClaim statement))) | |
(filter #(= id (get-property-id %))) | |
first)) | |
(defn find-statement [id statements] | |
(first (filter #(= id (get-value-id %)) statements))) | |
(defn find-statement-group [id document] | |
(.getStatements (.findStatementGroup document id))) | |
(->> (get-entity-document "Q76") ;; Barack Obama | |
(find-statement-group "P39") ;; position(s) held | |
(find-statement "Q11696") ;; POTUS | |
(find-claim "P1365") ;; "replaced" | |
get-value-id) | |
;;=> "Q207" | |
;; oy, what's that then? | |
(defn get-label [document] | |
(.getText (.getValue (first (.getLabels document))))) | |
(get-label (get-entity-document "Q207")) | |
;;=> "George W. Bush" | |
;; Ok, this looks like a job for recursion! | |
(loop [id "Q22686" out []] | |
(let [doc (get-entity-document id) | |
out (conj out {:id id :name (get-label doc)})] | |
(if-let [prev (some->> doc | |
(find-statement-group "P39") | |
(find-statement "Q11696") | |
(find-claim "P1365") | |
get-value-id)] | |
(recur prev out) | |
out))) | |
;;=> | |
[{:id "Q22686", :name "Donald Trump"} | |
{:id "Q76", :name "Barack Obama"} | |
{:id "Q207", :name "George W. Bush"} | |
{:id "Q1124", :name "Bill Clinton"} | |
{:id "Q23505", :name "George H. W. Bush"} | |
{:id "Q9960", :name "Ronald Reagan"} | |
{:id "Q23685", :name "Jimmy Carter"} | |
{:id "Q9582", :name "Gerald Ford"} | |
{:id "Q9588", :name "Richard Nixon"} | |
{:id "Q9640", :name "Lyndon B. Johnson"} | |
{:id "Q9696", :name "John F. Kennedy"}] | |
;; I'm not sure, but I suspect there were more presidents before | |
;; JFK. Unfortunately, it turns out the "replaces" property hasn't | |
;; been set on Kennedy's record. It has for Eisenhower, but not for | |
;; Truman, and so on. | |
;; one can also bring in RDF4J (formerly Sesame) | |
;; [org.eclipse.rdf4j/rdf4j-query "2.1.2"] | |
;; [org.eclipse.rdf4j/rdf4j-repository-api "2.1.2"] | |
;; [org.eclipse.rdf4j/rdf4j-runtime "2.1.2"] | |
(def wikidata | |
(.getConnection | |
(doto (org.eclipse.rdf4j.repository.sparql.SPARQLRepository. "https://query.wikidata.org/sparql") | |
(.initialize)))) | |
(defn query [conn sparql-string] | |
(org.eclipse.rdf4j.query.QueryResults/asList | |
(.evaluate | |
(.prepareTupleQuery conn | |
org.eclipse.rdf4j.query.QueryLanguage/SPARQL | |
sparql-string)))) | |
;; NB a two-clause SPARQL query: | |
;; | |
;; items that have held the position of POTUS | |
;; ?item wdt:P39 wd:Q11696. | |
;; | |
;; items that are human beings | |
;; ?item wdt:P31 wd:Q5. | |
;; | |
;; without the second one, we get loads of fictional POTUS | |
(def results | |
(query wikidata | |
"SELECT ?item ?itemLabel WHERE { | |
?item wdt:P31 wd:Q5. | |
?item wdt:P39 wd:Q11696. | |
SERVICE wikibase:label { bd:serviceParam wikibase:language \"en\". } | |
}")) | |
(mapv (fn [bindings] | |
(mapv #(.toString (.getValue bindings %)) | |
(.getBindingNames bindings))) | |
results) | |
;;=> | |
[["http://www.wikidata.org/entity/Q23" "\"George Washington\"@en"] | |
["http://www.wikidata.org/entity/Q76" "\"Barack Obama\"@en"] | |
["http://www.wikidata.org/entity/Q91" "\"Abraham Lincoln\"@en"] | |
["http://www.wikidata.org/entity/Q207" "\"George W. Bush\"@en"] | |
["http://www.wikidata.org/entity/Q1124" "\"Bill Clinton\"@en"] | |
["http://www.wikidata.org/entity/Q8007" "\"Franklin Delano Roosevelt\"@en"] | |
["http://www.wikidata.org/entity/Q8612" "\"Andrew Johnson\"@en"] | |
["http://www.wikidata.org/entity/Q9582" "\"Gerald Ford\"@en"] | |
["http://www.wikidata.org/entity/Q9588" "\"Richard Nixon\"@en"] | |
["http://www.wikidata.org/entity/Q9640" "\"Lyndon B. Johnson\"@en"] | |
["http://www.wikidata.org/entity/Q9696" "\"John F. Kennedy\"@en"] | |
["http://www.wikidata.org/entity/Q9916" "\"Dwight D. Eisenhower\"@en"] | |
["http://www.wikidata.org/entity/Q9960" "\"Ronald Reagan\"@en"] | |
["http://www.wikidata.org/entity/Q11613" "\"Harry S. Truman\"@en"] | |
["http://www.wikidata.org/entity/Q11806" "\"John Adams\"@en"] | |
["http://www.wikidata.org/entity/Q11812" "\"Thomas Jefferson\"@en"] | |
["http://www.wikidata.org/entity/Q11813" "\"James Madison\"@en"] | |
["http://www.wikidata.org/entity/Q11815" "\"James Monroe\"@en"] | |
["http://www.wikidata.org/entity/Q11816" "\"John Quincy Adams\"@en"] | |
["http://www.wikidata.org/entity/Q11817" "\"Andrew Jackson\"@en"] | |
["http://www.wikidata.org/entity/Q11820" "\"Martin Van Buren\"@en"] | |
["http://www.wikidata.org/entity/Q11869" "\"William Henry Harrison\"@en"] | |
["http://www.wikidata.org/entity/Q11881" "\"John Tyler\"@en"] | |
["http://www.wikidata.org/entity/Q11891" "\"James K. Polk\"@en"] | |
["http://www.wikidata.org/entity/Q11896" "\"Zachary Taylor\"@en"] | |
["http://www.wikidata.org/entity/Q12306" "\"Millard Fillmore\"@en"] | |
["http://www.wikidata.org/entity/Q12312" "\"Franklin Pierce\"@en"] | |
["http://www.wikidata.org/entity/Q12325" "\"James Buchanan\"@en"] | |
["http://www.wikidata.org/entity/Q23505" "\"George H. W. Bush\"@en"] | |
["http://www.wikidata.org/entity/Q23685" "\"Jimmy Carter\"@en"] | |
["http://www.wikidata.org/entity/Q33866" "\"Theodore Roosevelt\"@en"] | |
["http://www.wikidata.org/entity/Q34296" "\"Woodrow Wilson\"@en"] | |
["http://www.wikidata.org/entity/Q34597" "\"James A. Garfield\"@en"] | |
["http://www.wikidata.org/entity/Q34836" "\"Ulysses S. Grant\"@en"] | |
["http://www.wikidata.org/entity/Q35041" "\"William McKinley\"@en"] | |
["http://www.wikidata.org/entity/Q35171" "\"Grover Cleveland\"@en"] | |
["http://www.wikidata.org/entity/Q35236" "\"Herbert Hoover\"@en"] | |
["http://www.wikidata.org/entity/Q35286" "\"Warren G. Harding\"@en"] | |
["http://www.wikidata.org/entity/Q35498" "\"Chester A. Arthur\"@en"] | |
["http://www.wikidata.org/entity/Q35648" "\"William Howard Taft\"@en"] | |
["http://www.wikidata.org/entity/Q35678" "\"Benjamin Harrison\"@en"] | |
["http://www.wikidata.org/entity/Q35686" "\"Rutherford B. Hayes\"@en"] | |
["http://www.wikidata.org/entity/Q36023" "\"Calvin Coolidge\"@en"]] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment