Created
November 28, 2022 23:39
-
-
Save sdtaylor/5bad00693dd023e813b15b623b8d241b to your computer and use it in GitHub Desktop.
CAP Stuff
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Various tests with Common Alert Protocal (CAP) feeds at https://severeweather.wmo.int/v2/ | |
from datetime import datetime, timedelta | |
import requests | |
import re | |
#import xmltodict | |
import feedparser | |
from capparselib.parsers import CAPParser | |
from tqdm import tqdm | |
from shapely.geometry import shape, Point | |
from shapely import wkt | |
cap_feeds = { | |
'USA-NOAA':'http://alert-feed.worldweather.org/us-noaa-nws-en/rss.xml', | |
'Brazil-INMET':'http://alert-feed.worldweather.org/br-inmet-pt/rss.xml', | |
'China-CMA':'https://alert-feed-worldweather-org.s3.amazonaws.com/cn-cma-xx/rss.xml', | |
'Germany-GMO':'http://alert-feed.worldweather.org/de-dwd-de/rss.xml', | |
'Indonesia-INATEWS':'http://alert-feed.worldweather.org/id-inatews-id/rss.xml', | |
'Russia':'http://alert-feed.worldweather.org/ru-roshydromet-en/rss.xml', | |
'SaudiArabia':'http://alert-feed.worldweather.org/sa-ncm-en/rss.xml', | |
} | |
#cap_feed_url = 'http://alert-feed.worldweather.org/id-inatews-id/rss.xml' | |
#cap_feed = 'https://s3-us-west-2.amazonaws.com/alert-feeds-hko/ca-msc-xx/rss.xml' | |
#------------------------------------------------- | |
def parse_cap_polygon_to_wkt(polygon_str): | |
""" | |
Convert a CAP style polygon string to WKT. | |
A CAP polygon string is a list of lat,lon pairs. Specifically: | |
"The geographic polygon is represented by a whitespace-delimited | |
list of [WGS 84] coordinate pairs." | |
"The term “coordinate pair” is used in this document to refer to a | |
comma-delimited pair of decimal values describing a geospatial location | |
in degrees, unprojected, in the form “[latitude],[longitude]”. Latitudes in | |
the Southern Hemisphere and longitudes in the Western Hemisphere are signed | |
negative by means of a leading dash." | |
From: Common Alerting Protocol Version 1.2, OASIS Standard, 01 July 2010 | |
Parameters | |
---------- | |
polygon_str : str | |
The cap str | |
Returns | |
------- | |
str: | |
A wkt str of the polygon. | |
""" | |
# Regex to extract and label a str like: "-0.175781,-61.831055" | |
lat_lon_r = r"^(?P<lat>-?\d+\.\d+?\s*),(?P<lon>\s*-?\d+\.\d+)" | |
# Destination format. Note the WKT is "lon lat", while CAP is "lat,lon" | |
wkt_coord_template = '{lon} {lat}' | |
full_wkt_template = 'POLYGON (({all_coords}))' | |
coord_list = [] | |
# Split on spaces first, then use regex to extract the actual numbers. | |
for coord_pair in polygon_str.split(' '): | |
coord_match = re.match(lat_lon_r, coord_pair) | |
if coord_match is not None: | |
coord_list.append( | |
wkt_coord_template.format(**coord_match.groupdict()) | |
) | |
# WKT has space separated XY coordinates, in where each coordinate is | |
# comma separates. | |
return full_wkt_template.format(all_coords = ', '.join(coord_list)) | |
def get_xml_file(url): | |
resp = requests.get(url) | |
resp.raise_for_status() | |
return resp.text | |
#------------------------------------------ | |
all_entries = [] | |
thirty_days_ago = datetime.today() - timedelta(days=30) | |
for feed_source, feed_url in tqdm(cap_feeds.items()): | |
pass | |
feed_contents = feedparser.parse(feed_url) | |
for feed_item in tqdm(feed_contents['entries'][:30], leave=False): | |
item_date = datetime(year = feed_item['published_parsed'].tm_year, | |
month = feed_item['published_parsed'].tm_mon, | |
day = feed_item['published_parsed'].tm_mday) | |
if item_date < thirty_days_ago: | |
continue | |
pass | |
try: | |
cap_dict = CAPParser(get_xml_file(feed_item['link'])).as_dict() | |
date_sent = datetime.fromisoformat(cap_dict[0]['cap_sent'].text) | |
if 'polygons' in cap_dict[0]['cap_info'][0]['cap_area'][0]: | |
has_polygon = True | |
polygon_count = len(cap_dict[0]['cap_info'][0]['cap_area'][0]['polygons']) | |
else: | |
has_polygon = False | |
polygon_count = 0 | |
parse_success = True | |
except: | |
date_sent = None | |
has_polygon = False | |
polygon_count = 0 | |
parse_success = False | |
all_entries.append(dict( | |
source = feed_source, | |
date_sent = date_sent, | |
parse_success = parse_success, | |
has_polygon = has_polygon, | |
polygon_count = polygon_count, | |
)) | |
x = pd.DataFrame(all_entries) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment