Last active
September 7, 2019 14:11
-
-
Save foriequal0/0415f544778c4f76f8bf7bcba3880d9a to your computer and use it in GitHub Desktop.
www.weather.go.kr rader prediction crawler
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import boto3 | |
from datetime import datetime, timezone | |
import logging | |
import urllib.request | |
logger = logging.getLogger() | |
logger.setLevel(logging.INFO) | |
s3 = boto3.resource('s3') | |
BUCKET_NAME = "weather-go-kr-radar-predict-crawler-images" | |
def get_image(offset): | |
url = f"http://www.weather.go.kr/cgi-bin/rdr_new/nph-qpf_web_img?tm=0&ef={offset}&option=0&size=640&zoom_level=0&zoom_x=0000000&zoom_y=0000000" | |
with urllib.request.urlopen(url) as f: | |
return f.read() | |
def crawl(timestamp, offset): | |
image = get_image(offset) | |
logging.debug({ "message": "downloaded", "image": offset }) | |
key = f"{timestamp.isoformat()}_{offset:03}.png" | |
s3.Object(BUCKET_NAME, key).put( | |
ACL="public-read", | |
Body=image, | |
CacheControl="max-age=31536000", | |
ContentType="image/png", | |
Metadata={ | |
"timestamp": timestamp.isoformat(), | |
"offset": str(offset), | |
} | |
) | |
logging.debug({ "message": "uploaded", "image": offset }) | |
def lambda_handler(event, context): | |
now = datetime.now(timezone.utc) | |
timestamp = datetime( | |
year=now.year, | |
month=now.month, | |
day=now.day, | |
hour=now.hour, | |
minute=int(now.minute/10)*10 # round down sec | |
) | |
logging.info({ "message": "started", "now": now, "timestamp": timestamp }) | |
for offset in range(-10, 360, 10): | |
crawl(timestamp, offset) | |
logging.info({ "message": "finished" }) | |
return True |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment