Last active
June 24, 2016 14:01
-
-
Save ettorerizza/ac91ab4e23e5bcf884862b9a9d8547eb to your computer and use it in GitHub Desktop.
# Ce script récupère une liste de noms et vérifie d'abord s'il existent dans Wikipedia.fr, puis dans Wikipedia.nl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
###################################################### | |
# | |
# Ce script récupère une liste de noms et vérifie | |
# d'abord s'il existent dans Wikipedia.fr, puis | |
# dans Wikipedia.nl | |
# | |
###################################################### | |
import codecs | |
import wikipedia | |
# voir https://pypi.python.org/pypi/wikipedia/ | |
with codecs.open('fichier.txt', 'r', encoding='utf-8') as f: | |
lines = f.read().splitlines() | |
page = [] | |
for names in lines: | |
try: | |
wikipedia.set_lang("fr") | |
page = wikipedia.page(names, auto_suggest=True).url + " ||| " + wikipedia.page(names, auto_suggest=False).summary | |
except: | |
try: | |
wikipedia.set_lang("nl") | |
page = wikipedia.page(names, auto_suggest=True).url + " ||| " + wikipedia.page(names, | |
auto_suggest=False).summary | |
except: | |
page = "notFind " + names | |
print page | |
with codecs.open("resultats.txt", "w", encoding='utf-8') as fp: | |
for line in page: | |
fp.write(line + "\n") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment