Skip to content

Instantly share code, notes, and snippets.

View salgo60's full-sized avatar
😴

Magnus Sälgö salgo60

😴
View GitHub Profile
import json
import requests
import time
urlbase = 'http://data.europeana.eu/place/base/'
import sys
print (sys.getdefaultencoding())
def check(Qnumber):
https://tinyurl.com/s84pyc8
@salgo60
salgo60 / WikidataObjectsConnectedToEuropeanaMergedLastWeek
Last active December 20, 2019 00:21
Wikidata objects connected to Europeana merged in the last week -
# see https://phabricator.wikimedia.org/T240809
# query https://w.wiki/E3V
# youtube https://youtu.be/NnoIhdaU6SQ
from SPARQLWrapper import SPARQLWrapper, JSON
endpoint_url = "https://query.wikidata.org/sparql"
query = """SELECT ?EuropeanaEntity ?tgt ?change (REPLACE(STR(?item), ".*Q", "Q") AS ?qid) (REPLACE(STR(?tgt), ".*Q", "Q") AS ?tgtQid) ?tgtLabel WHERE {
?tgt wdt:P7704 ?EuropeanaEntityID.
?item owl:sameAs ?tgt;
@salgo60
salgo60 / CheckEuropeanaDatainWikidata.py
Created December 29, 2019 20:46
Reads a log file I have with records in Europeana and checks that everyine is added to Wikidata. Problem can be delketed WD record and/or redirected WD record(merged)
"""Small utility that reads the CheckConsistency log
and check consistancy that everythings is in Wikidata
1) Read the logs in INFILE
2) read Wikidata
3) loop everything
3-1) Log missing to file
See also Constraints report in Wikidata
https://www.wikidata.org/wiki/Wikidata:Database_reports/Constraint_violations/P7704
"""
# pip install sparqlwrapper
# https://rdflib.github.io/sparqlwrapper/
from SPARQLWrapper import SPARQLWrapper, JSON
endpoint_url = "https://query.wikidata.org/sparql"
query = """SELECT ?sculptors (REPLACE(STR(?sculptors),".*Q","Q") AS ?WD) ?sculptorsLabel ?birth ?died ?sculptorsDescription WHERE {
VALUES ?sculptors { wd:Q4936564 wd:Q5908201 wd:Q4938072 wd:Q15711231 wd:Q4966391 wd:Q4939597 wd:Q16596588
wd:Q4952238 wd:Q2827030 wd:Q4970016 wd:Q4970058 wd:Q4935445 wd:Q20156198 wd:Q4981273
@salgo60
salgo60 / ScrapeAdelsVapen.py
Created February 26, 2020 05:49
AdelsVapen
import requests
import urllib.request
import time
from bs4 import BeautifulSoup
def cleanupstring(s):
out_s = s
while ' ' in out_s:
out_s = out_s.strip().replace(' ', ' ')
return out_s, len(s)-len(out_s)
@salgo60
salgo60 / Bygdebandwebscraping.py
Created April 5, 2020 08:06
Funkar inte hundra skall nog ha timeout eller kolla att sidan laddats
from builtins import type
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
driver = webdriver.Firefox()
start = 1
endrange = 10000
with open('breadfilecounty.txt','w') as filehandlecounty:
@salgo60
salgo60 / Kulturnav_Wikidata.py
Last active April 16, 2020 09:27
Get Kulturnav dataset for objects in Wikidata missing it see GITHUB Issue https://github.com/salgo60/WikidataBygdeband/issues/12#issuecomment-614522095
#Get dataset for Wikidata objects in kulturnav
# pip install sparqlwrapper
# https://rdflib.github.io/sparqlwrapper/
import json,requests
import sys
from SPARQLWrapper import SPARQLWrapper, JSON
endpoint_url = "https://query.wikidata.org/sparql"
@salgo60
salgo60 / WDKulturnavSameas.py
Last active April 17, 2020 15:05
A quick check if kulturnav has same as when WIkidata has same as Kulturnav for humans
# SPARQL getting all humans with P1248 and check if Kulturnav has Same as for those items...
# SPARQL https://w.wiki/N6k
# pip install sparqlwrapper
# https://rdflib.github.io/sparqlwrapper/
import sys
import json,requests
import datetime
import csv
from SPARQLWrapper import SPARQLWrapper, JSON
@salgo60
salgo60 / Find Wikidata duplicates linking SBL
Last active April 21, 2020 10:18
If we have sv:WIkipedia article linking SBL and compare with Wikidata records with P3217 but no sv:WIkipedia article
#Find duplicate SBL WD records .....
# SBL but no sv article https://w.wiki/NUT
#Version 0.2 add blacklist
__version__ = "0.2"
__author__ = "Magnus Sälgö"
print ("version: ",__version__)
import requests
from bs4 import BeautifulSoup