Last active
January 12, 2021 07:59
-
-
Save shinyaoguri/5f1d66fc02636fc305c1d5ffd8dae4d0 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# encoding: utf-8 | |
import urllib.request | |
import urllib.parse | |
import json | |
import os | |
import ftplib | |
import dotenv | |
import git | |
import shutil | |
import requests | |
import re | |
import datetime | |
from bs4 import BeautifulSoup | |
dotenv_path = os.path.join(os.path.dirname(__file__), '.env') | |
dotenv.load_dotenv(dotenv_path) | |
# 給食献立データのLinkDataのURL | |
HANDA_ALL_TABLES_URL = 'http://linkdata.org/api/1/rdf1s4907i/datapackage.json' | |
# LinkDataからダウンロードするCSVのローカルの保存先 | |
MENUES_DIR = 'menues' | |
# FTPSの情報 | |
HOST_NAME = os.environ.get("HOST_NAME") | |
USER_NAME = os.environ.get("USER_NAME") | |
PASSWORD = os.environ.get("PASSWORD") | |
FTPS_DATA_DIR = os.environ.get("FTPS_DATA_DIR") | |
# GitHubのリポジトリ名 | |
REPO_NAME = 'shocknine' | |
# GitHubのリポジトリ | |
GITHUB = '[email protected]:shinyaoguri/shocknine.git' | |
# リポジトリの中でCSVデータのあるディレクトリ | |
GH_DATA_DIR = 'data' | |
# 半田市のベースURL | |
HANDA_BASE_URL = 'http://www.city.handa.lg.jp' | |
# 半田市今日の献立ページのURL | |
HANDA_TODAY_MENU_URL = 'http://www.city.handa.lg.jp/kyushoku/kosodate/kyoiku/kyushoku/kyonokondate.html' | |
# 半田市ページから画像を一時的にダウンロードするディレクトリ | |
MENU_PIC_TMP_DIR = 'tmp_pic' | |
def get_all_tables(url): | |
req = urllib.request.Request(HANDA_ALL_TABLES_URL) | |
all_tables = {} | |
with urllib.request.urlopen(req) as response: | |
html = response.read().decode("utf-8") | |
all_tables = json.loads(html) | |
print('[LOG] get all table data.') | |
return all_tables | |
def get_all_csvs_url(data): | |
csv_list = [] | |
for d in data["resources"]: | |
csv_list.append(d["url"]) | |
print('[LOG] get ' + str(len(csv_list)) + ' csv urls') | |
return csv_list | |
def download_csv(url, name): | |
if not os.path.exists(MENUES_DIR): | |
os.mkdir(MENUES_DIR) | |
file_is_exist = os.path.isfile(os.path.join(MENUES_DIR, name)) | |
if not file_is_exist: | |
print('[LOG] ' + name + ' was not exist. so download this.') | |
tmp = urllib.request.urlretrieve(url, os.path.join(MENUES_DIR, name)) | |
else: | |
print('[LOG] ' + name + ' was already exist. so, do nothing') | |
def put_ftp_pictures(pic_name, pic_path): | |
with ftplib.FTP_TLS(HOST_NAME) as ftps: | |
ftps.set_pasv("true") | |
ftps.login(USER_NAME, PASSWORD) | |
ftps.prot_p() | |
with open(pic_path, 'rb') as fp: | |
log = ftps.storbinary('STOR ' + FTPS_DATA_DIR + '/' + pic_name, fp) | |
print('[LOG] upload to ftp server ' + pic_name + ' ' + log) | |
def get_ftp_pictures(): | |
with ftplib.FTP_TLS(HOST_NAME) as ftps: | |
ftps.login(USER_NAME, PASSWORD) | |
pic_list = ftps.nlst(FTPS_DATA_DIR) | |
print('[LOG] get ftp picture list. there are ' + str(len(pic_list)) + ' files') | |
return pic_list | |
def get_github_resource(): | |
if os.path.exists(REPO_NAME): | |
print('[LOG] Repository was already exist. so delete it.') | |
shutil.rmtree(REPO_NAME) | |
print('[LOG] git clone ' + GITHUB) | |
git.Git().clone(GITHUB) | |
gh_csv_res_list = os.listdir(os.path.join(REPO_NAME, GH_DATA_DIR)) | |
print('[LOG] github repository has ' + str(len(gh_csv_res_list)) + ' files') | |
return gh_csv_res_list | |
def get_handa_lunch_picture(): | |
picture_list = dict() | |
response = requests.get(HANDA_TODAY_MENU_URL) | |
soup = BeautifulSoup(response.content, 'html5lib') | |
tmp_html = soup.find('div', id='tmp_contents') | |
img_tag_list = tmp_html.find_all("img") | |
for c in img_tag_list: | |
img_path = c['src'] | |
img_alt = c['alt'] | |
menu_txt = re.search(r"(?P<month>[0-9]+)月(?P<date>[0-9]+)日(?P<option>.*)", img_alt) | |
print('[LOG] find picture src=' + img_path + ' alt=' + img_alt + ' month=' + menu_txt.group('month') + ' date=' + menu_txt.group('date') + ' option=' + menu_txt.group('option')) | |
pic_name = str(datetime.date.today().year) + str(menu_txt.group('month').zfill(2)) + str(menu_txt.group('date').zfill(2)) | |
if menu_txt.group('option') == '献立': | |
pic_name = pic_name + '.jpg' | |
else: | |
pic_name = pic_name + '_alt.jpg' | |
if not os.path.exists(MENU_PIC_TMP_DIR): | |
os.mkdir(MENU_PIC_TMP_DIR) | |
urllib.request.urlretrieve(HANDA_BASE_URL+img_path, os.path.join(MENU_PIC_TMP_DIR, pic_name)) | |
print('[LOG] download picture from ' + HANDA_BASE_URL + img_path + ' as ' + pic_name) | |
picture_list.update([(pic_name, os.path.join(MENU_PIC_TMP_DIR, pic_name))]) | |
return picture_list | |
def remove_tmp_dir(): | |
if os.path.exists(REPO_NAME): | |
shutil.rmtree(REPO_NAME) | |
print('[LOG] remove ' + REPO_NAME) | |
if os.path.exists(MENU_PIC_TMP_DIR): | |
shutil.rmtree(MENU_PIC_TMP_DIR) | |
print('[LOG] remove ' + MENU_PIC_TMP_DIR) | |
def main(): | |
# LinkDataから給食献立データの一覧情報を取得 | |
#data = get_all_tables(HANDA_ALL_TABLES_URL) | |
# 各月の献立CSVデータのURLを取得 | |
#all_csv_url = get_all_csvs_url(data) | |
# 各CSVデータをダウンロード | |
# for csv in all_csv_url: | |
# tmp = csv.split('/') | |
# file_name = tmp[-1] | |
# download_csv(csv, file_name) | |
# GitHubのリポジトリをCloneしてCSVファイル一覧を取得 | |
#gh_csv_list = get_github_resource() | |
# リポジトリ内に献立CSVがなければアップロード | |
# 半田市のサイトから最新の画像一覧を取得 | |
handa_pic_list = get_handa_lunch_picture() | |
# FPTサーバにある画像一覧を取得 | |
ftp_pic_list = get_ftp_pictures() | |
# FTPサーバに画像が存在しなければアップロード | |
for pic in handa_pic_list.items(): | |
if pic[0] not in list(map(lambda ftp_pic: os.path.basename(ftp_pic), ftp_pic_list)): | |
print('[LOG] ' + pic[0]) | |
put_ftp_pictures(pic[0], pic[1]) | |
# 一時的なファイル,ディレクトリを削除 | |
remove_tmp_dir() | |
if __name__ == "__main__": | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
beautifulsoup4 | |
GitPython | |
html5lib | |
python-dotenv | |
requests |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment