Skip to content

Instantly share code, notes, and snippets.

@argahsuknesib
Created March 14, 2022 20:13
Show Gist options
  • Save argahsuknesib/a41aa029a99d0d8c4af58d3774093f26 to your computer and use it in GitHub Desktop.
Save argahsuknesib/a41aa029a99d0d8c4af58d3774093f26 to your computer and use it in GitHub Desktop.
CSVLabel2Wikibase
from cgitb import text
import csv
import configparser
import ntpath
from pydoc import doc
from urllib import request
from xml.dom.minidom import Document
import pywikibot
from SPARQLWrapper import SPARQLWrapper, JSON
from configWikibaseID import ProductionConfig
config = configparser.ConfigParser()
config.read('config/application.config.ini')
wikibase = pywikibot.Site("my", "my")
sparql = SPARQLWrapper(config.get('wikibase', 'sparqlEndPoint'))
site = pywikibot.Site()
wikidata = pywikibot.Site("wikidata", "wikidata")
class UploadLabels():
def __init__(self, wikibase):
self.wikibase = wikibase
self.wikibase_repo = wikibase.data_repository()
self.sparql = SPARQLWrapper(config.get('wikibase', 'sparqlEndPoint'))
self.class_entities = {}
self.properties = {}
self.pywikibot = pywikibot
def capitaliseFirstLetter(self, word):
return word.capitalize()
def searchItem(self, label):
if label is None:
return False
params = {'action': 'wbsearchentities', 'format': 'json',
'language': 'en', 'type': 'item', 'limit': 100, 'search': label}
request = self.wikibase._simple_request(**params)
result = request.submit()
return result
def searchExactWikiItem(self, label):
if label is None:
return True
params = {'action': 'wbsearchentities', 'format': 'json',
'language': 'en', 'type': 'item', 'limit': 1, 'search': label}
request = self.wikibase._simple_request(**params)
result = request.submit()
print(result)
if (len(result['search']) > 0):
for item in result['search']:
if (item.get('label') == label):
return True
return False
def searchWikiItem(self, label):
query = """
select ?label ?s where
{
?s ?p ?o.
?s rdfs:label ?label.
FILTER(lang(?label) = 'fr' || lang(?label) = 'en')
FILTER(?label = ' """ + label + """ ')
}
"""
self.sparql.setQuery(query)
self.sparql.setReturnFormat(JSON)
results = self.sparql.query().convert()
if (len(results['results']['bindings']) > 0):
return True
else:
return False
def getItemByAlias(self, label):
query = """
SELECT DISTINCT ?label ?s where
{
?s ?p ?o;
skos:altLabel ?label.
FILTER(lang(?label) = 'fr' || lang(?label) = 'en')
FILTER(?label = ' """ + label + """ ')
}
"""
self.sparql.setQuery(query)
self.sparql.setReturnFormat(JSON)
results = self.sparql.query().convert()
if (results.get('results', None) is not None and results.get('results').get('bindings') is not None
and type(results.get('results').get('bindings')) is list and len(results.get('results').get('bindings')) > 0
and results.get('results').get('bindings')[0] is not None and
results.get('results').get('bindings')[
0].get('s', None) is not None
and results.get('results').get('bindings')[0].get('s').get('value', None) is not None
):
item_qid = results['results']['bindings'][0]['s']['value'].split(
"/")[-1]
if (item_qid):
item = self.pywikibot.ItemPage(self.wikibase_repo, item_qid)
return item
else:
return False
else:
return False
def createDocumentEntity(self, label, description, key):
search_result = self.searchWikiItem(
self.capitaliseFirstLetter(key.rstrip()))
is_exist = self.searchExactWikiItem(
self.capitaliseFirstLetter(key.rstrip()))
if (not search_result and not is_exist):
data = {}
print(f'inserting document entity {key.rstrip()}')
data['labels'] = label
data['descriptions'] = description
new_item = self.pywikibot.ItemPage(self.wikibase_repo)
new_item.editEntity(data, summary='Creating new item')
new_claims = []
claim_data = {}
instance_claim = {}
document_class_entity = self.pywikibot.ItemPage(
self.wikibase_repo, f'{ProductionConfig.DOCUMENT_CLASS_QID}')
document_class_entity.get()
instance_of_property = self.pywikibot.PropertyPage(
self.wikibase_repo, f'{ProductionConfig.INSTACE_OF_PROPERTY_PID}')
instance_of_property.get()
instance_claim = self.pywikibot.Claim(
self.wikibase_repo, instance_of_property.id, datatype=instance_of_property.Type)
instance_claim.setTarget(document_class_entity)
"""
uncomment this code about document URI just in case you
found a reason to add the actual link of the document.
"""
# document_uri_property = self.pywikibot.PropertyPage(
# self.wikibase_repo, ProductionConfig.DOCUMENT_REFERENCE_URI_PROPERTY_PID)
# document_uri_property.get()
# document_uri_claim = self.pywikibot.Claim(
# self.wikibase_repo, document_uri_property.id, datatype=document_uri_property.Type)
# document_uri_claim.setTarget(document_link)
# new_claims.append(document_uri_claim.toJSON())
new_claims.append(instance_claim.toJSON())
claim_data['claims'] = new_claims
new_item.editEntity(claim_data, summary='Adding new claims')
return new_item
else:
entity = self.searchWikiItem(
self.capitaliseFirstLetter(key.rstrip()))
return entity
def create_sub_topic(self, topic, paragraph_entity, document_entity, lang):
topic_entity = {}
search_result = self.searchWikiItem(
self.capitaliseFirstLetter(topic.rstrip()))
is_exist = self.searchExactWikiItem(
self.capitaliseFirstLetter(topic.rstrip()))
if(not search_result and not is_exist):
"""checking for the alias name of the topic if it exists or not"""
is_alias_exist = self.getItemByAlias(
self.capitaliseFirstLetter(topic.rstrip()))
if (not is_alias_exist):
"""" creating topic if there is none already """
data = {}
label = {lang: topic.capitalize().strip()}
description = {lang: topic.capitalize().strip() + "entity"}
data['labels'] = label
data['descriptions'] = description
topic_entity = self.pywikibot.ItemPage(self.wikibase_repo)
topic_entity.editEntity(data, summary='Creating new item')
else:
"""getting the topic by alias"""
topic_entity = self.getItemByAlias(
self.capitaliseFirstLetter(topic.rstrip()))
topic_entity.get()
else:
topic_entity = self.getItemByAlias(
self.capitaliseFirstLetter(topic.rstrip()))
topic_entity.get()
if (topic_entity):
""" mentioned in """
mentioned_in_property = self.pywikibot.PropertyPage(
self.wikibase_repo, ProductionConfig.MENTIONED_IN_PROPERTY_PID)
mentioned_in_property.get()
mentioned_in_claim = self.pywikibot.Claim(
self.wikibase_repo, mentioned_in_property.id, datatype=mentioned_in_property.Type)
paragraph_entity.get()
mentioned_in_claim.setTarget(paragraph_entity)
topic_entity.addClaim(mentioned_in_claim,
summary='Adding new claim')
return topic_entity
else:
return False
def createParagraphEntity(self, label, description, text, document_entity, sub_topics, lang):
data = {}
print(f'inserting paragraph entity')
data['labels'] = label
data['descriptions'] = description
paragraph_item = self.pywikibot.ItemPage(self.wikibase_repo)
paragraph_item.editEntity(data, summary='Creating new item')
"""
instance of
"""
paragraph_class_entity = self.pywikibot.ItemPage(
self.wikibase_repo, f'{ProductionConfig.PARAGRAPH_CLASS_QID}')
paragraph_class_entity.get()
instance_of_property = self.pywikibot.PropertyPage(
self.wikibase_repo, f'{ProductionConfig.INSTACE_OF_PROPERTY_PID}')
instance_of_property.get()
instance_claim = self.pywikibot.Claim(
self.wikibase_repo, instance_of_property.id, datatype=instance_of_property.Type)
instance_claim.setTarget(paragraph_class_entity)
paragraph_item.addClaim(
instance_claim, summary='Adding claim to the paragraph')
"""
part of
"""
part_of_property = self.pywikibot.PropertyPage(
self.wikibase_repo, f'{ProductionConfig.PART_OF_PROPERTY_PID}')
part_of_property.get()
part_of_claim = self.pywikibot.Claim(
self.wikibase_repo, part_of_property.id, datatype=part_of_property.Type)
part_of_claim.setTarget(document_entity)
paragraph_item.addClaim(
part_of_claim, summary='Adding claim to the paragraph')
"""
has text
"""
has_text_property = self.pywikibot.PropertyPage(
self.wikibase_repo, f'{ProductionConfig.HAS_TEXT_PROPERTY_PID}')
has_text_property.get()
has_text_claim = self.pywikibot.Claim(
self.wikibase_repo, has_text_property.id, datatype=has_text_property.Type)
has_text_claim.setTarget(text)
paragraph_item.addClaim(
has_text_claim, summary='Adding claim to the paragraph')
if (paragraph_item):
""" for sub topics """
for sub_topic in sub_topics:
topic_entity = self.create_sub_topic(
sub_topic.label, paragraph_item, document_entity, lang)
topic_entity.get()
if (topic_entity):
has_topic_property = self.pywikibot.PropertyPage(
self.wikibase_repo, f'{ProductionConfig.HAS_TOPIC_PROPERTY_PID}')
has_topic_property.get()
has_topic_claim = self.pywikibot.Claim(
self.wikibase_repo, has_topic_property.id, datatype=has_topic_property.Type)
has_topic_claim.setTarget(topic_entity)
paragraph_item.addClaim(
has_topic_claim, summary='Adding claim to the paragraph')
has_paragraph_property = self.pywikibot.PropertyPage(
self.wikibase_repo, f'{ProductionConfig.HAS_PARAGRAPH_PROPERTY_PID}')
has_paragraph_property.get()
has_paragraph_claim = self.pywikibot.Claim(
self.wikibase_repo, has_paragraph_property.id, datatype=has_paragraph_property.Type)
has_paragraph_claim.setTarget(paragraph_item)
document_entity.addClaim(
has_paragraph_claim, summary='Adding caim to the document')
return paragraph_item
else:
return False
def Upload2Wikibase(self, filePath):
document_name = ntpath.basename(filePath)[0:-4]
label = {}
data = {}
language_code = 'en'
label = {language_code : document_name.capitalize()}
description_text = "This document titled " + document_name + " and is added to the disability wikibase"
description = {language_code: description_text}
wiki_doc_item = self.createDocumentEntity(label=label, description=description, key = document_name)
# if (not wiki_doc_item):
# return False
print('hi-1')
with open(filePath, 'r') as csv_file:
csv_reader = csv.DictReader(csv_file, delimiter = ',')
line_count = 0
print('hi-2')
for line in csv_reader:
print(f'currently on the line {line_count}')
try:
paragraph_label_value = f"{document_name.capitalize()} paragraph number {line_count}"
paragraph_description_value = f"Paragraph from {document_name.capitalize()} document"
paragraph_label = {language_code : paragraph_label_value}
paragraph_description = {language_code : paragraph_description_value}
paragraph_text_value = line['Paragraph']
paragraph_text = {language_code : paragraph_text_value}
paragraph_topics = []
for i in range(1, 15):
if(line[f'Label {i}']) != "":
paragraph_topics.append(line[f'Label {i}'].capitalize())
else:
pass
paragraph_subtopics = {language_code : paragraph_topics}
paragraph_entity = self.createParagraphEntity(label = paragraph_label, description = paragraph_description, text = paragraph_text, document_entity= wiki_doc_item , sub_topics= paragraph_subtopics, lang = language_code)
paragraph_entity.get()
print('hi-3')
except Exception as e:
print('The exception encountered is ', e)
line_count = line_count + 1
print('hi-4')
def UploadCSV2Wikibase(self, filePath):
fileName = ntpath.basename(filePath)[0:-4]
label = {fileName.capitalize()}
description = "This file has origin from Black Disability"
with open(filePath, 'r') as csv_file:
csv_reader = csv.DictReader(csv_file, delimiter=',')
line_count = 0
doc_item = self.createDocumentEntity(label = label, description = description, key = fileName, document_link= None, type= None)
doc_item.get()
for line in csv_reader:
print(f'currently on the line {line_count}')
try:
pass
except Exception as e:
print('The exception encountered is ',e)
line_count = line_count + 1
def main():
uploadingLabels = UploadLabels(wikibase)
uploadingLabels.Upload2Wikibase("data/Black-Disability/CSV/(1977) The Combahee River Collective Statement.csv")
#uploadingLabels.UploadCSV2Wikibase("data/Black-Disability/CSV/(1977) The Combahee River Collective Statement.csv")
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment