Last active
August 30, 2017 11:23
-
-
Save mattfield11/c7ed565b299028986e6d1be4c8c63ca4 to your computer and use it in GitHub Desktop.
Pywikibot to elasticsearch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pywikibot | |
from pywikibot import pagegenerators | |
from elasticsearch import Elasticsearch | |
import json | |
es = Elasticsearch() | |
data = {} | |
site = pywikibot.Site() | |
cat = pywikibot.Category(site,'Category:Living people') | |
gen = pagegenerators.CategorizedPageGenerator(cat) | |
for page in gen: | |
#Do something with the page object, for example: | |
data['text'] = page.text | |
data['title']=page.title() | |
#data['extlinks']=page.extlinks() | |
data['fullurl']=page.full_url() | |
categories= page.categories() | |
categorylist=[] | |
for category in categories: | |
categorystr=str(category) | |
categorystr.replace(":",";") | |
categorylist.append(categorystr) | |
data['categories']=categorylist | |
suggest=[{"input":data['title'],"weight":1}] | |
data['suggest']=suggest | |
doc=json.dumps(data) | |
print (doc) | |
res = es.index(index="wikipeople", doc_type='wiki_page', body=doc) | |
print(res['created']) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment