Created
December 10, 2020 12:22
-
-
Save amotl/0a2eb63708b8a0cf4dc457b1e6a87455 to your computer and use it in GitHub Desktop.
Demo for accessing https://opendata.dwd.de/ using fsspec, optionally using caching
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| Demo for accessing https://opendata.dwd.de/ using fsspec, optionally using caching. | |
| Setup | |
| ===== | |
| :: | |
| pip install fsspec requests aiohttp | |
| Resources | |
| ========= | |
| - https://opendata.dwd.de/ | |
| - https://github.com/intake/filesystem_spec | |
| """ | |
| import sys | |
| from fsspec import AbstractFileSystem | |
| from fsspec.implementations.cached import WholeFileCacheFileSystem | |
| from fsspec.implementations.http import HTTPFileSystem | |
| from fsspec.implementations.zip import ZipFileSystem | |
| http_resource_pattern = "https://opendata.dwd.de/climate_environment/CDC/observations_germany/climate/" \ | |
| "10_minutes/air_temperature/recent/*.zip" | |
| def run_demo(fs: AbstractFileSystem): | |
| print("Reading HTTP directory index") | |
| remote_urls = fs.glob(http_resource_pattern) | |
| print(remote_urls) | |
| print() | |
| print("Reading first Zip archive") | |
| http_resource = remote_urls[0] | |
| print(http_resource) | |
| print() | |
| with fs.open(http_resource) as resource: | |
| print("Reading Zip archive index") | |
| fs = ZipFileSystem(resource) | |
| text_files = fs.glob("*.txt") | |
| print(text_files) | |
| print() | |
| print("Reading first text file") | |
| payload = fs.cat(text_files[0]) | |
| print(f"Payload length: {len(payload)}") | |
| print() | |
| if __name__ == "__main__": | |
| mode = "direct" | |
| try: | |
| mode = sys.argv[1] | |
| except IndexError: | |
| pass | |
| if mode == "direct": | |
| print("Accessing remote filesystem directly") | |
| print() | |
| filesystem = HTTPFileSystem() | |
| elif mode == "cached": | |
| print("Accessing remote filesystem with caching") | |
| print() | |
| filesystem = WholeFileCacheFileSystem(fs=HTTPFileSystem(), cache_storage=".dwd-cache") | |
| else: | |
| raise ValueError("Unknown access mode") | |
| run_demo(filesystem) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is related to the discussion at earthobservations/wetterdienst#243. Thanks for bringing that to our attention, @kmuehlbauer. Thanks also to @martindurant, @TomAugspurger, @intake and contributors for conceiving and maintaining
fsspec. This is truly golden.cc @gutzbenj