Created
March 1, 2018 13:33
-
-
Save zardoru/592e13fb4c2140bf77a1fae8f3549bec to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
from urllib.request import urlopen | |
from urllib.parse import unquote, quote | |
from urllib.error import HTTPError | |
pages = [ | |
"http://mtwildwood.net/yokaiMedallium/tribeBrave.html", | |
"http://mtwildwood.net/yokaiMedallium/tribeMysterious.html", | |
"http://mtwildwood.net/yokaiMedallium/tribeTough.html", | |
"http://mtwildwood.net/yokaiMedallium/tribeShady.html", | |
"http://mtwildwood.net/yokaiMedallium/tribeHeartful.html", | |
"http://mtwildwood.net/yokaiMedallium/tribeCharming.html", | |
"http://mtwildwood.net/yokaiMedallium/tribeEerie.html", | |
"http://mtwildwood.net/yokaiMedallium/tribeSlippery.html", | |
"http://mtwildwood.net/yokaiMedallium/tribeKaima.html" | |
] | |
links = [] | |
for page in pages: | |
print("Reading {}".format(page)) | |
f = urlopen(page) | |
html = f.read() | |
soap = BeautifulSoup(html, "lxml") | |
for med in soap.find_all(class_="medallium-icon"): | |
links.append("http://mtwildwood.net/yokaiMedallium/" + med.a.get("href")) | |
print ("Links acquired. Going through descriptions...") | |
out = open("lines-yokai.txt", "w") | |
for link in links: | |
print("Reading {}".format(link)) | |
try: | |
f = urlopen(link) | |
except HTTPError as e: | |
print ("Couldn't read - {}".format(e)) | |
continue | |
html = f.read() | |
soap = BeautifulSoup(html, "lxml") | |
d = soap.find(class_="description") | |
if d is None: | |
continue | |
s = d.get_text().replace("\n", "").replace("Yo-kai Watch 2", "").strip() | |
out.write(s + "\n") | |
out.flush() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment