Skip to content

Instantly share code, notes, and snippets.

@62mkv
Last active December 14, 2023 13:14
Show Gist options
  • Save 62mkv/ff7efbecc50bd66954d75ef9aefda9ec to your computer and use it in GitHub Desktop.
Save 62mkv/ff7efbecc50bd66954d75ef9aefda9ec to your computer and use it in GitHub Desktop.
Wikidata SPARQL queries

Examples of SPARQL Wikidata queries:

List of hills in Estonia without defined elevation

SELECT ?item ?itemLabel ?geo ?elevation WHERE {
  ?item wdt:P31 wd:Q54050;
    wdt:P17 wd:Q191;
        wdt:P625 ?geo.
    OPTIONAL { ?item wdt:P2044 ?elevation }
    SERVICE wikibase:label { bd:serviceParam wikibase:language "et" } 
    FILTER (!bound(?elevation))
}

Estonian settlements with coordinates and population

WARNING! Population is not actually aggregated correctly, e.g. Tallinn should have about 450k displayed

select ?item ?itemLabel ?location (MAX(?population) as ?pop) where {
  ?item wdt:P31 ?settlement .
  ?settlement wdt:P279 wd:Q618299 .
  ?item wdt:P625 ?location.
  OPTIONAL { ?item wdt:P1082 ?population}
  SERVICE wikibase:label { bd:serviceParam wikibase:language "et" }
  }
GROUP BY ?item ?itemLabel ?location

Number of lexemes per language:

SELECT ?languageLabel ?lexemeCount {   
  {   SELECT ?language (COUNT (?lexeme) as ?lexemeCount) {   ?lexeme dct:language ?language . } GROUP BY ?language }  
  SERVICE wikibase:label {    bd:serviceParam wikibase:language "en". } 
}
ORDER BY DESC(?lexemeCount)

Number of lexemes per lexical category:

SELECT ?lexCatLabel ?lexCount {
  {  SELECT ?lexCat (COUNT(?lexeme) AS ?lexCount) WHERE { ?lexeme dct:language wd:Q9072; wikibase:lexicalCategory ?lexCat. } GROUP BY (?lexCat) } 
  SERVICE wikibase:label {    bd:serviceParam wikibase:language "en". }   
}

Number of lexeme forms per language:

#defaultView:BubbleChart
SELECT ?languageLabel ?formCount {   
  {   SELECT ?language (COUNT (?form) as ?formCount) {   ?lexeme dct:language ?language . ?lexeme ontolex:lexicalForm ?form. } GROUP BY ?language }  
  SERVICE wikibase:label {    bd:serviceParam wikibase:language "en". } 
}
ORDER BY DESC(?formCount)

List of Estonian language lexemes

SELECT ?lexeme ?lemma WHERE {
  ?lexeme dct:language wd:Q9072;
   wikibase:lemma ?lemma.
}

Same, but with human-readable lexical category

SELECT ?lexeme ?lemma ?categoryLabel WHERE {
  ?lexeme dct:language wd:Q9072.
  ?lexeme wikibase:lemma ?lemma.
  ?lexeme wikibase:lexicalCategory ?category.
   SERVICE wikibase:label {
         bd:serviceParam wikibase:language "en" .
       }
}

Find Estonian lexeme, matching a lemma and a lexical category

SELECT ?lexeme ?lemma WHERE {
  ?lexeme dct:language wd:Q9072;wikibase:lexicalCategory wd:Q1084;wikibase:lemma ?lemma.
  FILTER (STR(?lemma)="aprill")
}

Find count of forms for a given lexeme

SELECT (count(*) as ?count) WHERE { 
       wd:L8287 ontolex:lexicalForm ?form. 
       ?form ontolex:representation ?representation. 
       ?form wikibase:grammaticalFeature ?feature;
}

Find all forms for a given lexeme

SELECT ?form ?representation ?feature WHERE { 
       wd:L8287 ontolex:lexicalForm ?form. 
       ?form ontolex:representation ?representation. 
       ?form wikibase:grammaticalFeature ?feature;
}

Same, but with human-readable features:

SELECT ?form ?representation ?featureLabel
WHERE { wd:L8287 ontolex:lexicalForm ?form. 
       ?form ontolex:representation ?representation. 
       ?form wikibase:grammaticalFeature ?feature.
       SERVICE wikibase:label {
         bd:serviceParam wikibase:language "en" .
       }
}

Query, that sums up most of it:

SELECT ?lexeme ?lemma ?categoryLabel ?form ?representation ?feature ?featureLabel WHERE {
  ?lexeme dct:language wd:Q9072.
  ?lexeme wikibase:lemma ?lemma.
  ?lexeme wikibase:lexicalCategory ?category.
  ?lexeme ontolex:lexicalForm ?form. 
  ?form ontolex:representation ?representation. 
  ?form wikibase:grammaticalFeature ?feature;
   SERVICE wikibase:label {
         bd:serviceParam wikibase:language "en" .
       }
   FILTER (STR(?lemma)="vesi")
}

WARNING! If lexeme in question exists, but has no forms, the query will return NOTHING

Show all items, that are "word list":

SELECT ?item ?itemLabel WHERE {
  ?item wdt:P31 wd:Q91828006;
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en" } 
}

List of English lexemes, that start with "a" and have "e" in 4th position

yes it's for Wordle

SELECT ?lemma WHERE {
  ?lexeme dct:language wd:Q1860;
   wikibase:lemma ?lemma.
  FILTER(STRSTARTS(?lemma, "a")).
  FILTER(STRLEN(?lemma) = 5).
  FILTER(STRSTARTS(SUBSTR(?lemma, 4, 1), "e"))
}

Another Wordle example, this time for Russian Wordle

SELECT ?lemma WHERE {
  ?lexeme dct:language wd:Q7737;
   wikibase:lemma ?lemma.
  FILTER(STRENDS(?lemma, "ь")).
  FILTER(STRLEN(?lemma) = 5).
  FILTER(CONTAINS(?lemma, "с")).
  FILTER(! CONTAINS(?lemma, "а")).
  FILTER(! CONTAINS(?lemma, "е")).
  FILTER(! CONTAINS(?lemma, "и")).
  FILTER(! CONTAINS(?lemma, "о")).
}

Alive NL/BE journalists

SELECT ?item ?itemLabel ?countryLabel
WHERE 
{
  ?item wdt:P106 wd:Q1930187;
        wdt:P27 ?country;
        wdt:P569 ?dob. # Must be a cat
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],nl". } # Helps get the label in your language, if not, then en language
  FILTER (?dob > "1970-01-01"^^xsd:dateTime)
  FILTER NOT EXISTS { ?item wdt:P570 ?death }
  VALUES ?country {
     wd:Q29999
     wd:Q31
  }
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment