Created
August 30, 2025 05:03
-
-
Save sasasin/2b02a5668b5df32979489032a48ae75f to your computer and use it in GitHub Desktop.
AWS Health Dashboard の Service health から Playwright で RSS の URL を入手するやつ
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env -S uv run --python 3.13 --with playwright | |
| # https://health.aws.amazon.com/health/status から RSS の URL を入手する。 | |
| # 昔は curl で取れたが、JSでレンダリングを要するようになったので Playwright を使う。 | |
| # npx playwright codegen でポチポチして python library で生成したものをベースにした。 | |
| from playwright.sync_api import sync_playwright | |
| import time | |
| import re | |
| def get_all_rss_urls(): | |
| rss_urls = [] | |
| with sync_playwright() as playwright: | |
| browser = playwright.chromium.launch(headless=True) | |
| context = browser.new_context() | |
| page = context.new_page() | |
| page.goto("https://health.aws.amazon.com/health/status") | |
| page.wait_for_load_state('networkidle') | |
| time.sleep(2) | |
| # 全ロケーションにする | |
| page.get_by_test_id("services").click() | |
| page.get_by_role("button", name="Locales North America").click() | |
| page.get_by_text("All locales").first.click() | |
| while True: | |
| # 表示したページから RSS の URL を抽出 | |
| rows = page.query_selector_all('table tbody tr') | |
| for row in rows: | |
| rss_link = row.query_selector('a[href$=".rss"]') | |
| if rss_link: | |
| href = rss_link.get_attribute('href') | |
| if href.startswith('/'): | |
| href = "https://status.aws.amazon.com" + href | |
| rss_urls.append(href) | |
| # 次ページボタンが is_enabled なら次ページがある | |
| next_button = page.get_by_test_id("status-history-pagination").get_by_role("button").filter(has_text=re.compile(r"^$")).nth(1) | |
| if next_button and next_button.is_enabled(): | |
| next_button.click() | |
| page.wait_for_load_state('networkidle') | |
| time.sleep(1) | |
| else: | |
| break | |
| context.close() | |
| browser.close() | |
| return rss_urls | |
| if __name__ == "__main__": | |
| urls = get_all_rss_urls() | |
| for url in urls: | |
| print(url) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment