Skip to content

Instantly share code, notes, and snippets.

@retrage
Created October 5, 2016 17:47
Show Gist options
  • Select an option

  • Save retrage/e2c503bd107ca73577b2d6cdf4f8d8bb to your computer and use it in GitHub Desktop.

Select an option

Save retrage/e2c503bd107ca73577b2d6cdf4f8d8bb to your computer and use it in GitHub Desktop.
Generating json from Kokyo Higashi-Gyoen Hanadayori
#!/usr/bin/python
# -*- coding: utf-8 -*-
from urllib import request
from urllib import error
from bs4 import BeautifulSoup
import json
from datetime import datetime
def fetch_data(url):
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12) AppleWebKit/602.1.50 (KHTML, like Gecko) Version/10.0 Safari/602.1.50"
encode = "utf-8"
header = {"User-agent": user_agent}
req = request.Request(url, None, header)
try:
res = request.urlopen(req)
except error.URLError as e:
return e
else:
return res.read().decode(encode)
def jsonize_data(html_data):
soup = BeautifulSoup(html_data, "html.parser")
date = soup.find("p", class_="mb0").string
head = [col.string for col in soup.find_all("th")]
table = list([head])
for (n, row) in enumerate(soup.find_all("tr")):
if n < 2:
continue
col = [child.string for child in row.children if child != "\n"]
table.append(col)
description = soup.find_all("p")[2].string
waka = soup.find("div", class_="md-waka").find("span").string
res = {"date": date, "table": table,
"description": description, "waka": waka}
return res
def main():
url = "http://www.kunaicho.go.jp/event/hanadayori/hanadayori.html"
data = fetch_data(url)
if not isinstance(data, str):
print("Error")
return -1
json_data = jsonize_data(data)
today = datetime.now()
with open("{0:%Y%m%d}.json".format(today), "w") as fp:
json.dump(json_data, fp, sort_keys=False, indent=4)
return 0
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment