Last active
June 5, 2016 09:54
-
-
Save hungtatai/8febc655de29c796955c9a7711b367f2 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from urllib.request import urlopen | |
import bs4 | |
import os.path | |
import openpyxl | |
# openpyxl: http://liyangliang.me/posts/2013/02/using-openpyxl-to-read-and-write-xlsx-files/ | |
def yahoo_dict(word): | |
html = urlopen("https://tw.dictionary.search.yahoo.com/search?p=%s" % word).read().decode("utf-8") | |
bs = bs4.BeautifulSoup(html, "html.parser") | |
explainArea = bs.select(".explain.DictionaryResults")[0] | |
wordClassList = explainArea.select(".compTitle") | |
wordExplainsList = explainArea.select(".compArticleList") | |
meaning = "" | |
for i in range(len(wordClassList)): | |
wordClass = wordClassList[i].getText() | |
wordExplains = [h4.getText() for h4 in wordExplainsList[i].select("h4")] | |
meaning += "%s\n" % wordClass | |
for exp in wordExplains: | |
meaning += " %s\n" % exp | |
return meaning | |
### main | |
FILE_NAME = '2brokegirls.xlsx' | |
if os.path.isfile(FILE_NAME): | |
wb = openpyxl.load_workbook(FILE_NAME) | |
else: | |
wb = openpyxl.Workbook() | |
wb.remove_sheet(wb.active) | |
season = input('這是第幾季?: ') | |
episode = input('這是第幾集?: ') | |
sheet_name = "S%02dEP%02d" % (int(season), int(episode)) | |
if sheet_name in wb.get_sheet_names(): | |
sh = wb.get_sheet_by_name(sheet_name) | |
else: | |
sh = wb.create_sheet(sheet_name) | |
while True: | |
word = input("請輸入單字: ") | |
nextRow = len(sh.rows) + 1 | |
sh.cell(row=nextRow, column=1).value = word | |
sh.cell(row=nextRow, column=2).value = yahoo_dict(word) | |
wb.save(filename=FILE_NAME) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment