Last active
September 12, 2017 07:35
-
-
Save aoirint/01d2f9e34586d47a86d67e3135971658 to your computer and use it in GitHub Desktop.
気象庁HPからPython, requests, BSで天気予報、週間天気予報を取得、適当に(再)整形して出力するスクリプト
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import re | |
import requests | |
from bs4 import BeautifulSoup | |
def as_int(tpl): | |
ls = list(tpl) | |
for j in range(len(ls)): | |
if isinstance(ls[j], tuple): | |
ls[j] = tuple(as_int(ls[j])) | |
elif isinstance(ls[j], list): | |
ls[j] = list(as_int(ls[j])) | |
else: | |
ls[j] = int(ls[j]) | |
return ls | |
def get_week_forecast(fukenNo=319): | |
url = 'http://www.jma.go.jp/jp/week/' + str(fukenNo) + '.html' | |
html = requests.get(url).text | |
# マークアップがおかしいので修正 | |
html = html.replace('</form>\n<tr>', '</form>\n') | |
html = html.replace('<th colspan="2" class="normal">', '<tr><th colspan="2" class="normal">') | |
bs = BeautifulSoup(html, 'html.parser') | |
table = bs.find(id='infotablefont') | |
fuken = bs.find(class_='titleText').text.split(' ')[1] | |
caption = table.find('caption').text | |
updated = caption.split('\u3000')[0] | |
rows = table.find_all('tr') | |
dayRow = [(int(r.text[:-1]), r.text[-1:]) for r in rows[0].find_all('th')[1:8]] | |
rows = rows[1:] | |
region_count = len(rows) // 5 | |
regions = [] | |
for i in range(region_count): | |
batch = rows[5*i:5*i + 6] | |
region = {} | |
region['name'] = batch[0].find(class_='normal').text[:-1] | |
region['weather'] = [r.text[:-1] for r in batch[0].find_all('td')[0:7]] | |
region['rain'] = as_int([tuple(r.text.split('/')) for r in batch[1].find_all('td')[1:8]]) | |
region['reliability'] = [r.text if r.text != u'/' else None for r in batch[2].find_all('td')[1:8]] | |
region['cityname'] = batch[3].find(class_='cityname').text | |
region['maxtemp'] = as_int([tuple(filter(None, re.split(r'\W+', r.text))) for r in batch[3].find_all('td')[1:]]) | |
region['mintemp'] = as_int([tuple(filter(None, re.split(r'\W+', r.text))) for r in batch[4].find_all('td')[1:]]) | |
regions.append(region) | |
return { | |
'fukenNo': fukenNo, | |
'fuken': fuken, | |
'caption': caption, | |
'updated': updated, | |
'source': url, | |
'day': dayRow, | |
'region': regions | |
} | |
def get_forecast(fukenNo=319): | |
url = 'http://www.jma.go.jp/jp/yoho/' + str(fukenNo) + '.html' | |
html = requests.get(url).text | |
bs = BeautifulSoup(html, 'html.parser') | |
table = bs.find(id='forecasttablefont') | |
fuken = bs.find(class_='titleText').text.split(' ')[1] | |
caption = table.find('caption').text | |
updated = caption.split('気象庁')[0] | |
rows = table.find_all('tr') | |
region_count = len(rows) // 20 | |
regions = [] | |
for i in range(region_count): | |
hi = 20*i | |
batch = rows[hi:hi+20] | |
region = {} | |
region['name'] = batch[0].find('th').text.strip() | |
batch = batch[1:] | |
days = [] | |
for j in range(3): | |
# day head 1, 8, 15 -> 0, 7, 14 | |
dhi = 7*j | |
day = {} | |
day['date'] = int(re.match('.*?(\d+)', batch[dhi].find('th', class_='weather').text.strip()[:-1]).group(1)) | |
day['weather'] = batch[dhi].find('th', class_='weather').find('img')['alt'] | |
day['info'] = batch[dhi].find('td', class_='info').text | |
if j != 2: | |
rain = [] | |
for k in range(1, 5): | |
r = batch[dhi+k].find_all('td') | |
rain.append((r[0].text, int(r[1].text[:-1]) if r[1].text[:-1] != '--' else None)) | |
day['rain'] = rain | |
if not 'cityname' in region: | |
region['cityname'] = batch[dhi+6].find('td', class_='city').text | |
s = batch[dhi+6].find('td', class_='min').text | |
if len(s) != 0: | |
day['mintemp'] = int(s.strip()[:-1]) | |
s = batch[dhi+6].find('td', class_='max').text | |
if len(s) != 0: | |
day['maxtemp'] = int(s.strip()[:-1]) | |
days.append(day) | |
region['day'] = days | |
regions.append(region) | |
abstract = bs.find('pre', class_='textframe').text | |
return { | |
'fuken': fuken, | |
'fukenNo': fukenNo, | |
'caption': caption, | |
'updated': updated, | |
'source': url, | |
'region': regions, | |
'abstract': abstract | |
} | |
# オプション | |
# -fn 地域コード(府県番号、気象庁ページのファイル名数値部)、指定しない場合319(東京) | |
# -fc 指定地域の天気予報を出力 | |
# -abs 天気概況を出力(-fcと併用) | |
# -wc 指定地域の週間天気予報を出力 | |
# -a 指定地域のすべての天気予報を出力、指定しない場合先頭の1つのみ | |
if __name__ == '__main__': | |
options = { | |
'fukenNo': 319, | |
'forecast': False, | |
'abstract': False, | |
'weekcast': False, | |
'all': False | |
} | |
for option in sys.argv[1:]: | |
if option[0] == '-': | |
k, v = option[1:].split('=') if '=' in option else (option[1:], True) | |
if k == 'fn': | |
k = 'fukenNo' | |
if k == 'fc': | |
k = 'forecast' | |
if k == 'abs': | |
k = 'abstract' | |
if k == 'wc': | |
k = 'weekcast' | |
if k == 'a': | |
k = 'all' | |
if k in ('fukenNo'): | |
v = int(v) | |
if k in ('forecast', 'abstract', 'weekcast', 'all'): | |
if not isinstance(v, bool): | |
v = v.lower() | |
v = v == '1' or v == 't' or v == 'true' or v == 'on' | |
options[k] = v | |
if not options['forecast'] and not options['weekcast']: | |
print('''オプション | |
-fn 地域コード(府県番号、気象庁ページのファイル名数値部)、指定しない場合319(東京) | |
-fc 指定地域の天気予報を出力 | |
-abs 天気概況を出力(-fcと併用) | |
-wc 指定地域の週間天気予報を出力 | |
-a 指定地域のすべての天気予報を出力、指定しない場合先頭の1つのみ''') | |
exit() | |
sources = [] | |
if options['forecast']: | |
fc = get_forecast(options['fukenNo']) | |
sources.append(fc['source']) | |
print('天気予報 - ' + fc['fuken'] + '(' + str(fc['fukenNo']) + ') ' + fc['updated']) | |
for i in range(len(fc['region']) if options['all'] else 1): | |
region = fc['region'][i] | |
print('#', region['name'] + '(' + region['cityname'] + ')') | |
for j in range(len(region['day'])): | |
day = region['day'][j] | |
print(str(day['date']).ljust(2), end=' ') | |
print(day['weather'].ljust(6, ' '), end=' ') | |
if 'rain' in day: | |
ls = [str(r[1]) if r[1] != None else '--' for r in day['rain']] | |
print('/'.join(ls).ljust(11), end=' ') | |
else: | |
print(' '*11, end=' ') | |
print(str(day['mintemp']).ljust(2) if 'mintemp' in day else '--', end=' ') | |
print(str(day['maxtemp']).ljust(2) if 'maxtemp' in day else '--', end=' ') | |
print() | |
if options['abstract']: | |
print() | |
print(fc['abstract']) | |
if options['forecast'] and options['weekcast']: | |
print() | |
if options['weekcast']: | |
wf = get_week_forecast(options['fukenNo']) | |
sources.append(wf['source']) | |
print('週間天気予報 - ' + wf['fuken'] + '(' + str(wf['fukenNo']) + ') ' + wf['updated']) | |
for i in range(len(wf['region']) if options['all'] else 1): | |
region = wf['region'][i] | |
print('#', region['name'] + '(' + region['cityname'] + ')') | |
for j in range(len(wf['day'])): | |
print((str(wf['day'][j][0]) + '(' + wf['day'][j][1] + ')').ljust(5), end=' ') | |
print(region['weather'][j].ljust(4, ' '), end=' ') | |
l = len(region['rain'][j]) | |
s = '' | |
for k in range(l): | |
s += str(region['rain'][j][k]) + ('/' if k != l-1 else '') | |
print(s.ljust(11), end=' ') | |
l = len(region['mintemp'][j]) | |
s = str(region['mintemp'][j][0]) if l > 0 else '' | |
if l == 3: | |
s += '(' + str(region['mintemp'][j][1]) + '-' + str(region['mintemp'][j][2]) + ')' | |
print(s.ljust(9), end=' ') | |
l = len(region['maxtemp'][j]) | |
s = str(region['maxtemp'][j][0]) if l > 0 else '' | |
if l == 3: | |
s += '(' + str(region['maxtemp'][j][1]) + '-' + str(region['maxtemp'][j][2]) + ')' | |
print(s.ljust(9)) | |
print('-----') | |
print('このアプリケーションは、気象庁ホームページから取得したデータを加工表示しています') | |
print('ソース:' + ', '.join(sources)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
rev3