|
# encoding: utf-8 |
|
|
|
import urllib.request |
|
import urllib.parse |
|
import json |
|
import os |
|
import ftplib |
|
import dotenv |
|
import git |
|
import shutil |
|
import requests |
|
import re |
|
import datetime |
|
from bs4 import BeautifulSoup |
|
|
|
dotenv_path = os.path.join(os.path.dirname(__file__), '.env') |
|
dotenv.load_dotenv(dotenv_path) |
|
|
|
# 給食献立データのLinkDataのURL |
|
HANDA_ALL_TABLES_URL = 'http://linkdata.org/api/1/rdf1s4907i/datapackage.json' |
|
|
|
# LinkDataからダウンロードするCSVのローカルの保存先 |
|
MENUES_DIR = 'menues' |
|
|
|
# FTPSの情報 |
|
HOST_NAME = os.environ.get("HOST_NAME") |
|
USER_NAME = os.environ.get("USER_NAME") |
|
PASSWORD = os.environ.get("PASSWORD") |
|
FTPS_DATA_DIR = os.environ.get("FTPS_DATA_DIR") |
|
|
|
# GitHubのリポジトリ名 |
|
REPO_NAME = 'shocknine' |
|
# GitHubのリポジトリ |
|
GITHUB = '[email protected]:shinyaoguri/shocknine.git' |
|
# リポジトリの中でCSVデータのあるディレクトリ |
|
GH_DATA_DIR = 'data' |
|
# 半田市のベースURL |
|
HANDA_BASE_URL = 'http://www.city.handa.lg.jp' |
|
# 半田市今日の献立ページのURL |
|
HANDA_TODAY_MENU_URL = 'http://www.city.handa.lg.jp/kyushoku/kosodate/kyoiku/kyushoku/kyonokondate.html' |
|
# 半田市ページから画像を一時的にダウンロードするディレクトリ |
|
MENU_PIC_TMP_DIR = 'tmp_pic' |
|
|
|
def get_all_tables(url): |
|
req = urllib.request.Request(HANDA_ALL_TABLES_URL) |
|
all_tables = {} |
|
with urllib.request.urlopen(req) as response: |
|
html = response.read().decode("utf-8") |
|
all_tables = json.loads(html) |
|
|
|
print('[LOG] get all table data.') |
|
return all_tables |
|
|
|
def get_all_csvs_url(data): |
|
csv_list = [] |
|
for d in data["resources"]: |
|
csv_list.append(d["url"]) |
|
print('[LOG] get ' + str(len(csv_list)) + ' csv urls') |
|
return csv_list |
|
|
|
def download_csv(url, name): |
|
if not os.path.exists(MENUES_DIR): |
|
os.mkdir(MENUES_DIR) |
|
file_is_exist = os.path.isfile(os.path.join(MENUES_DIR, name)) |
|
if not file_is_exist: |
|
print('[LOG] ' + name + ' was not exist. so download this.') |
|
tmp = urllib.request.urlretrieve(url, os.path.join(MENUES_DIR, name)) |
|
else: |
|
print('[LOG] ' + name + ' was already exist. so, do nothing') |
|
|
|
def put_ftp_pictures(pic_name, pic_path): |
|
with ftplib.FTP_TLS(HOST_NAME) as ftps: |
|
ftps.set_pasv("true") |
|
ftps.login(USER_NAME, PASSWORD) |
|
ftps.prot_p() |
|
with open(pic_path, 'rb') as fp: |
|
log = ftps.storbinary('STOR ' + FTPS_DATA_DIR + '/' + pic_name, fp) |
|
print('[LOG] upload to ftp server ' + pic_name + ' ' + log) |
|
|
|
def get_ftp_pictures(): |
|
with ftplib.FTP_TLS(HOST_NAME) as ftps: |
|
ftps.login(USER_NAME, PASSWORD) |
|
pic_list = ftps.nlst(FTPS_DATA_DIR) |
|
print('[LOG] get ftp picture list. there are ' + str(len(pic_list)) + ' files') |
|
return pic_list |
|
|
|
def get_github_resource(): |
|
if os.path.exists(REPO_NAME): |
|
print('[LOG] Repository was already exist. so delete it.') |
|
shutil.rmtree(REPO_NAME) |
|
print('[LOG] git clone ' + GITHUB) |
|
git.Git().clone(GITHUB) |
|
gh_csv_res_list = os.listdir(os.path.join(REPO_NAME, GH_DATA_DIR)) |
|
print('[LOG] github repository has ' + str(len(gh_csv_res_list)) + ' files') |
|
return gh_csv_res_list |
|
|
|
def get_handa_lunch_picture(): |
|
picture_list = dict() |
|
response = requests.get(HANDA_TODAY_MENU_URL) |
|
soup = BeautifulSoup(response.content, 'html5lib') |
|
tmp_html = soup.find('div', id='tmp_contents') |
|
|
|
img_tag_list = tmp_html.find_all("img") |
|
for c in img_tag_list: |
|
img_path = c['src'] |
|
img_alt = c['alt'] |
|
menu_txt = re.search(r"(?P<month>[0-9]+)月(?P<date>[0-9]+)日(?P<option>.*)", img_alt) |
|
print('[LOG] find picture src=' + img_path + ' alt=' + img_alt + ' month=' + menu_txt.group('month') + ' date=' + menu_txt.group('date') + ' option=' + menu_txt.group('option')) |
|
pic_name = str(datetime.date.today().year) + str(menu_txt.group('month').zfill(2)) + str(menu_txt.group('date').zfill(2)) |
|
pic_name = pic_name + '.jpg' |
|
|
|
if not os.path.exists(MENU_PIC_TMP_DIR): |
|
os.mkdir(MENU_PIC_TMP_DIR) |
|
urllib.request.urlretrieve(HANDA_BASE_URL+img_path, os.path.join(MENU_PIC_TMP_DIR, pic_name)) |
|
print('[LOG] download picture from ' + HANDA_BASE_URL + img_path + ' as ' + pic_name) |
|
picture_list.update([(pic_name, os.path.join(MENU_PIC_TMP_DIR, pic_name))]) |
|
return picture_list |
|
|
|
def remove_tmp_dir(): |
|
if os.path.exists(REPO_NAME): |
|
shutil.rmtree(REPO_NAME) |
|
print('[LOG] remove ' + REPO_NAME) |
|
if os.path.exists(MENU_PIC_TMP_DIR): |
|
shutil.rmtree(MENU_PIC_TMP_DIR) |
|
print('[LOG] remove ' + MENU_PIC_TMP_DIR) |
|
|
|
|
|
def main(): |
|
# LinkDataから給食献立データの一覧情報を取得 |
|
#data = get_all_tables(HANDA_ALL_TABLES_URL) |
|
|
|
# 各月の献立CSVデータのURLを取得 |
|
#all_csv_url = get_all_csvs_url(data) |
|
|
|
# 各CSVデータをダウンロード |
|
# for csv in all_csv_url: |
|
# tmp = csv.split('/') |
|
# file_name = tmp[-1] |
|
# download_csv(csv, file_name) |
|
|
|
# GitHubのリポジトリをCloneしてCSVファイル一覧を取得 |
|
#gh_csv_list = get_github_resource() |
|
|
|
# リポジトリ内に献立CSVがなければアップロード |
|
|
|
# 半田市のサイトから最新の画像一覧を取得 |
|
handa_pic_list = get_handa_lunch_picture() |
|
|
|
# FPTサーバにある画像一覧を取得 |
|
ftp_pic_list = get_ftp_pictures() |
|
|
|
# FTPサーバに画像が存在しなければアップロード |
|
for pic in handa_pic_list.items(): |
|
if pic[0] not in list(map(lambda ftp_pic: os.path.basename(ftp_pic), ftp_pic_list)): |
|
print('[LOG] ' + pic[0]) |
|
put_ftp_pictures(pic[0], pic[1]) |
|
|
|
# 一時的なファイル,ディレクトリを削除 |
|
remove_tmp_dir() |
|
|
|
if __name__ == "__main__": |
|
main() |