Skip to content

Instantly share code, notes, and snippets.

@foriequal0
Last active September 7, 2019 14:11
Show Gist options
  • Save foriequal0/0415f544778c4f76f8bf7bcba3880d9a to your computer and use it in GitHub Desktop.
Save foriequal0/0415f544778c4f76f8bf7bcba3880d9a to your computer and use it in GitHub Desktop.
www.weather.go.kr rader prediction crawler
import boto3
from datetime import datetime, timezone
import logging
import urllib.request
logger = logging.getLogger()
logger.setLevel(logging.INFO)
s3 = boto3.resource('s3')
BUCKET_NAME = "weather-go-kr-radar-predict-crawler-images"
def get_image(offset):
url = f"http://www.weather.go.kr/cgi-bin/rdr_new/nph-qpf_web_img?tm=0&ef={offset}&option=0&size=640&zoom_level=0&zoom_x=0000000&zoom_y=0000000"
with urllib.request.urlopen(url) as f:
return f.read()
def crawl(timestamp, offset):
image = get_image(offset)
logging.debug({ "message": "downloaded", "image": offset })
key = f"{timestamp.isoformat()}_{offset:03}.png"
s3.Object(BUCKET_NAME, key).put(
ACL="public-read",
Body=image,
CacheControl="max-age=31536000",
ContentType="image/png",
Metadata={
"timestamp": timestamp.isoformat(),
"offset": str(offset),
}
)
logging.debug({ "message": "uploaded", "image": offset })
def lambda_handler(event, context):
now = datetime.now(timezone.utc)
timestamp = datetime(
year=now.year,
month=now.month,
day=now.day,
hour=now.hour,
minute=int(now.minute/10)*10 # round down sec
)
logging.info({ "message": "started", "now": now, "timestamp": timestamp })
for offset in range(-10, 360, 10):
crawl(timestamp, offset)
logging.info({ "message": "finished" })
return True
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment