This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import os | |
import xml.parsers.expat | |
from xml.sax.saxutils import escape | |
from optparse import OptionParser | |
from math import log10 | |
# How much data we process at a time |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
api = "https://www.wikidata.org/w/api.php" | |
query = "some search" | |
params = { | |
'action': 'query', | |
'list': 'search', | |
'format': 'json', | |
'srsearch': query, | |
'srprop': 'titlesnippet|snippet', |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Dézippe le dernier dump de viaf links (http://viaf.org/viaf/data/viaf-20190107-links.txt.gz) et compte le nombre d'id #viaf uniques qui possèdent au moins un lien vers une édition de Wikipedia | |
gunzip -kc viaf-20190107-links.txt.gz | awk -F '\t' '/wikipedia/ {print $1}' | sort | uniq | wc -l | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from SPARQLWrapper import SPARQLWrapper, JSON | |
sparql = SPARQLWrapper("https://linkeddata1.calcul.u-psud.fr/sparql") | |
sparql.setQuery(""" | |
select * | |
where { | |
<http://yago-knowledge.org/resource/Elvis_Presley> ?property ?valueOrObject . | |
} | |
LIMIT 100""") | |
sparql.setReturnFormat(JSON) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<a "href=javascript:function replaceText(ot,nt,n){n=n||document.body;var cs=n.childNodes,i=0;while(n=cs[i]){if(n.nodeType==Node.TEXT_NODE){n.textContent=n.textContent.replace(ot,nt);}else{replaceText(ot,nt,n);};i++;}};replaceText('surréalisme','romantisme');">Surréaliste!</a> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
import requests | |
import json | |
import unicodecsv as csv | |
url = "https://lod-cloud.net/lod-data.json" | |
r = requests.get(url) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import urllib2 | |
langs = ["fr", "en", "de", "nl"] # ordered list of languages you want to try until there is a match | |
value = cell.recon.match.id | |
for lang in langs: | |
wiki = lang + "wiki" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import csv | |
import sys | |
# parseur Python pour les fichiers jsonLD de Tropy | |
# Renvoie le fichier avec son nom d'origine, mais en CSV. | |
# à utiliser en lignes de commande. | |
# exemple d'usage avec un fichier Json de Tropy nommé resultats.json : | |
#> python tropy_parser.py resultats.json |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import subprocess | |
command_line = "openrefine-client_0-3-4_windows.exe --list" | |
result = subprocess.run(command_line, shell=True, stdout=subprocess.PIPE).stdout.decode('utf8') | |
print(result) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def camel_case_split(identifier): | |
matches = finditer('.+?(?:(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])|$)', identifier) | |
return [m.group(0) for m in matches] |