Skip to content

Instantly share code, notes, and snippets.

@hungtatai
Last active June 5, 2016 09:54
Show Gist options
  • Save hungtatai/8febc655de29c796955c9a7711b367f2 to your computer and use it in GitHub Desktop.
Save hungtatai/8febc655de29c796955c9a7711b367f2 to your computer and use it in GitHub Desktop.
from urllib.request import urlopen
import bs4
import os.path
import openpyxl
# openpyxl: http://liyangliang.me/posts/2013/02/using-openpyxl-to-read-and-write-xlsx-files/
def yahoo_dict(word):
html = urlopen("https://tw.dictionary.search.yahoo.com/search?p=%s" % word).read().decode("utf-8")
bs = bs4.BeautifulSoup(html, "html.parser")
explainArea = bs.select(".explain.DictionaryResults")[0]
wordClassList = explainArea.select(".compTitle")
wordExplainsList = explainArea.select(".compArticleList")
meaning = ""
for i in range(len(wordClassList)):
wordClass = wordClassList[i].getText()
wordExplains = [h4.getText() for h4 in wordExplainsList[i].select("h4")]
meaning += "%s\n" % wordClass
for exp in wordExplains:
meaning += " %s\n" % exp
return meaning
### main
FILE_NAME = '2brokegirls.xlsx'
if os.path.isfile(FILE_NAME):
wb = openpyxl.load_workbook(FILE_NAME)
else:
wb = openpyxl.Workbook()
wb.remove_sheet(wb.active)
season = input('這是第幾季?: ')
episode = input('這是第幾集?: ')
sheet_name = "S%02dEP%02d" % (int(season), int(episode))
if sheet_name in wb.get_sheet_names():
sh = wb.get_sheet_by_name(sheet_name)
else:
sh = wb.create_sheet(sheet_name)
while True:
word = input("請輸入單字: ")
nextRow = len(sh.rows) + 1
sh.cell(row=nextRow, column=1).value = word
sh.cell(row=nextRow, column=2).value = yahoo_dict(word)
wb.save(filename=FILE_NAME)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment