Last active
March 27, 2024 18:44
-
-
Save andersy005/b83592c90eb12f8d18ab3405ef562cd1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[ | |
{ | |
"title": "CMIP6 Downscaled products", | |
"description": "dataset_description.", | |
"maintainers": [ | |
{ | |
"name": "Oriana Chegwidden", | |
"github": "orianac" | |
} | |
], | |
"provenance": { | |
"providers": [ | |
{ | |
"name": "carbonplan", | |
"description": "carbonplan_test_dataset", | |
"roles": [ | |
"producer", | |
"licensor" | |
], | |
"url": "https://carbonplan.org/" | |
} | |
], | |
"license": "CC-BY-NC v.4.0", | |
"license_link": null | |
}, | |
"thumbnail": null, | |
"tags": [ | |
"zarr", | |
"climate" | |
], | |
"links": null, | |
"stores": [ | |
{ | |
"name": "ScenarioMIP.CCCma.CanESM5.ssp245.r1i1p1f1.annual.GARD-SV.pr.zarr", | |
"url": "s3://carbonplan-data-viewer/demo/ncview-2.0/ScenarioMIP.CCCma.CanESM5.ssp245.r1i1p1f1.annual.GARD-SV.pr.zarr" | |
}, | |
{ | |
"name": "ScenarioMIP.CCCma.CanESM5.ssp245.r1i1p1f1.annual.GARD-SV.tasmax.zarr", | |
"url": "s3://carbonplan-data-viewer/demo/ncview-2.0/ScenarioMIP.CCCma.CanESM5.ssp245.r1i1p1f1.annual.GARD-SV.tasmax.zarr" | |
} | |
], | |
"doi_citation": null, | |
"demo": true | |
}, | |
{ | |
"title": "Dry Spell Corn", | |
"description": "dataset_description.", | |
"maintainers": [ | |
{ | |
"name": "Oriana Chegwidden", | |
"github": "orianac" | |
} | |
], | |
"provenance": { | |
"providers": [ | |
{ | |
"name": "carbonplan", | |
"description": "carbonplan_test_dataset", | |
"roles": [ | |
"producer", | |
"licensor" | |
], | |
"url": "https://carbonplan.org/" | |
} | |
], | |
"license": "CC-BY-NC v.4.0", | |
"license_link": null | |
}, | |
"thumbnail": null, | |
"tags": [ | |
"zarr", | |
"climate" | |
], | |
"links": null, | |
"stores": [ | |
{ | |
"name": "CanESM5-ssp370-full-time-extent.zarr", | |
"url": "s3://carbonplan-data-viewer/demo/ncview-2.0/dryspells_corn/CanESM5-ssp370-full-time-extent.zarr" | |
} | |
], | |
"doi_citation": null, | |
"demo": true | |
}, | |
{ | |
"title": "Sample Australia Cordex Data", | |
"description": "dataset_description.", | |
"maintainers": [ | |
{ | |
"name": "Anderson Banihirwe", | |
"github": "andersy005" | |
} | |
], | |
"provenance": { | |
"providers": [ | |
{ | |
"name": "carbonplan", | |
"description": "carbonplan_test_dataset", | |
"roles": [ | |
"producer", | |
"licensor" | |
], | |
"url": "https://carbonplan.org/" | |
} | |
], | |
"license": "CC-BY-NC v.4.0", | |
"license_link": null | |
}, | |
"thumbnail": null, | |
"tags": [ | |
"zarr", | |
"climate" | |
], | |
"links": null, | |
"stores": [ | |
{ | |
"name": "sample_australia_cordex_data.zarr", | |
"url": "s3://carbonplan-data-viewer/demo/ncview-2.0/single_timestep/sample_australia_cordex_data.zarr" | |
} | |
], | |
"doi_citation": null, | |
"demo": true | |
}, | |
{ | |
"title": "dataset_1", | |
"description": "dataset_description.", | |
"maintainers": [ | |
{ | |
"name": "Raphael Hagen", | |
"github": "norlandrhagen" | |
} | |
], | |
"provenance": { | |
"providers": [ | |
{ | |
"name": "carbonplan", | |
"description": "carbonplan_test_dataset", | |
"roles": [ | |
"producer", | |
"licensor" | |
], | |
"url": "https://dapds00.nci.org.au/thredds/catalogs/zv2/catalog.html" | |
} | |
], | |
"license": "CC-BY-NC v.4.0", | |
"license_link": null | |
}, | |
"thumbnail": null, | |
"tags": [ | |
"zarr", | |
"climate" | |
], | |
"links": null, | |
"stores": [ | |
{ | |
"name": "test_dataset1.zarr", | |
"url": "s3://carbonplan-data-viewer/demo/ncview-2.0/test_dataset1.zarr" | |
} | |
], | |
"doi_citation": null, | |
"demo": true | |
}, | |
{ | |
"title": "dataset_2", | |
"description": "dataset_description.", | |
"maintainers": [ | |
{ | |
"name": "Raphael Hagen", | |
"github": "norlandrhagen" | |
} | |
], | |
"provenance": { | |
"providers": [ | |
{ | |
"name": "carbonplan", | |
"description": "carbonplan_test_dataset", | |
"roles": [ | |
"producer", | |
"licensor" | |
], | |
"url": "https://dapds00.nci.org.au/thredds/catalogs/zv2/catalog.html" | |
} | |
], | |
"license": "CC-BY-NC v.4.0", | |
"license_link": null | |
}, | |
"thumbnail": null, | |
"tags": [ | |
"zarr", | |
"climate" | |
], | |
"links": null, | |
"stores": [ | |
{ | |
"name": "test_dataset2.zarr", | |
"url": "s3://carbonplan-data-viewer/demo/ncview-2.0/test_dataset2.zarr" | |
} | |
], | |
"doi_citation": null, | |
"demo": true | |
}, | |
{ | |
"title": "dataset_3", | |
"description": "dataset_description.", | |
"maintainers": [ | |
{ | |
"name": "Raphael Hagen", | |
"github": "norlandrhagen" | |
} | |
], | |
"provenance": { | |
"providers": [ | |
{ | |
"name": "carbonplan", | |
"description": "carbonplan_test_dataset", | |
"roles": [ | |
"producer", | |
"licensor" | |
], | |
"url": "https://dapds00.nci.org.au/thredds/catalogs/zv2/catalog.html" | |
} | |
], | |
"license": "CC-BY-NC v.4.0", | |
"license_link": null | |
}, | |
"thumbnail": null, | |
"tags": [ | |
"zarr", | |
"climate" | |
], | |
"links": null, | |
"stores": [ | |
{ | |
"name": "test_dataset3.zarr", | |
"url": "s3://carbonplan-data-viewer/demo/ncview-2.0/test_dataset3.zarr" | |
} | |
], | |
"doi_citation": null, | |
"demo": true | |
} | |
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
title: "Dry Spell Corn" | |
description: "dataset_description." | |
recipes: | |
- id: recipe | |
object: "recipe:recipe" | |
provenance: | |
providers: | |
- name: "carbonplan" | |
description: "carbonplan_test_dataset" | |
roles: | |
- producer | |
- licensor | |
url: https://carbonplan.org/ | |
license: "CC-BY-NC v.4.0" | |
maintainers: | |
- name: "Oriana Chegwidden" | |
github: orianac |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
title: "CMIP6 Downscaled products" | |
description: "dataset_description." | |
recipes: | |
- id: recipe | |
object: "recipe:recipe" | |
provenance: | |
providers: | |
- name: "carbonplan" | |
description: "carbonplan_test_dataset" | |
roles: | |
- producer | |
- licensor | |
url: https://carbonplan.org/ | |
license: "CC-BY-NC v.4.0" | |
maintainers: | |
- name: "Oriana Chegwidden" | |
github: orianac |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
title: "Sample Australia Cordex Data" | |
description: "dataset_description." | |
recipes: | |
- id: recipe | |
object: "recipe:recipe" | |
provenance: | |
providers: | |
- name: "carbonplan" | |
description: "carbonplan_test_dataset" | |
roles: | |
- producer | |
- licensor | |
url: https://carbonplan.org/ | |
license: "CC-BY-NC v.4.0" | |
maintainers: | |
- name: "Anderson Banihirwe" | |
github: andersy005 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"id": "90275cba-9286-4cca-8dcf-62dbae62da09", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import fsspec\n", | |
"import xarray as xr\n", | |
"import upath\n", | |
"import yaml\n", | |
"from collections import defaultdict\n", | |
"import json" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "b9e1934e-3f3c-4dee-985c-e94f82282ca0", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"bucket = 's3://carbonplan-data-viewer/demo/ncview-2.0'\n", | |
"fs = fsspec.filesystem('s3')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "cf60cff6-a71d-430d-9d66-80f29f1e50e4", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def find_zarr_stores(fs, bucket):\n", | |
" \"\"\"Finds directories containing '.zarr' in the first and second level of directories.\"\"\"\n", | |
" stores = []\n", | |
"\n", | |
" # Process the first level\n", | |
" for p in fs.ls(bucket, detail=True):\n", | |
" if p['type'] == 'directory' and 'zarr' in p['name']:\n", | |
" stores.append('s3://' + p['name'])\n", | |
" elif p['type'] == 'directory':\n", | |
" # Process the second level\n", | |
" for d in fs.ls(p['name'], detail=True):\n", | |
" if d['type'] == 'directory' and 'zarr' in d['name']:\n", | |
" stores.append('s3://' + d['name'])\n", | |
"\n", | |
" return stores" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"id": "f6f73778-1f6d-4bbc-8bba-bae7a482920f", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['s3://carbonplan-data-viewer/demo/ncview-2.0/ScenarioMIP.CCCma.CanESM5.ssp245.r1i1p1f1.annual.GARD-SV.pr.zarr',\n", | |
" 's3://carbonplan-data-viewer/demo/ncview-2.0/ScenarioMIP.CCCma.CanESM5.ssp245.r1i1p1f1.annual.GARD-SV.tasmax.zarr',\n", | |
" 's3://carbonplan-data-viewer/demo/ncview-2.0/dryspells_corn/CanESM5-ssp370-full-time-extent.zarr',\n", | |
" 's3://carbonplan-data-viewer/demo/ncview-2.0/single_timestep/sample_australia_cordex_data.zarr',\n", | |
" 's3://carbonplan-data-viewer/demo/ncview-2.0/test_dataset1.zarr',\n", | |
" 's3://carbonplan-data-viewer/demo/ncview-2.0/test_dataset2.zarr',\n", | |
" 's3://carbonplan-data-viewer/demo/ncview-2.0/test_dataset3.zarr']" | |
] | |
}, | |
"execution_count": 19, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"stores = find_zarr_stores(fs, bucket)\n", | |
"stores" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 36, | |
"id": "924e572b-d1ec-4440-be3c-5bd7111af94d", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import pydantic\n", | |
"\n", | |
"\n", | |
"class Store(pydantic.BaseModel):\n", | |
" name: str = pydantic.Field(..., description='Name of the store')\n", | |
" url: str = pydantic.Field(..., description='URL of the store')\n", | |
"\n", | |
"\n", | |
"class Link(pydantic.BaseModel):\n", | |
" label: str = pydantic.Field(..., description='Label of the link')\n", | |
" url: str = pydantic.Field(..., description='URL of the link')\n", | |
"\n", | |
"\n", | |
"class LicenseLink(pydantic.BaseModel):\n", | |
" title: str = pydantic.Field(..., description='Name of the license')\n", | |
" url: str | None = pydantic.Field(None, description='URL of the license')\n", | |
"\n", | |
"\n", | |
"class Maintainer(pydantic.BaseModel):\n", | |
" name: str = pydantic.Field(..., description='Name of the maintainer')\n", | |
" github: str | None = pydantic.Field(\n", | |
" None, description='GitHub username of the maintainer'\n", | |
" )\n", | |
"class Provider(pydantic.BaseModel):\n", | |
" name: str = pydantic.Field(..., description='Name of the provider')\n", | |
" description: str = pydantic.Field(..., description='Description of the provider')\n", | |
" roles: list[str] | None = pydantic.Field(None, description='Roles of the provider')\n", | |
" url: str | None = pydantic.Field(None, description='URL of the provider')\n", | |
"\n", | |
"class Provenance(pydantic.BaseModel):\n", | |
" providers: list[Provider]\n", | |
" license: str\n", | |
" license_link: LicenseLink | None = None\n", | |
"\n", | |
"\n", | |
"class Feedstock(pydantic.BaseModel, validate_assignment=True):\n", | |
" title: str = pydantic.Field(..., description='Title of the feedstock')\n", | |
" description: str = pydantic.Field(..., description='Description of the feedstock')\n", | |
" maintainers: list[Maintainer]\n", | |
" provenance: Provenance\n", | |
" thumbnail: pydantic.HttpUrl | None = pydantic.Field(\n", | |
" None, description='Thumbnail of the feedstock'\n", | |
" )\n", | |
" tags: list[str] | None = pydantic.Field(None, description='Tags of the dataset')\n", | |
" links: list[Link] | None = None\n", | |
" stores: list[Store] | None = None\n", | |
" doi_citation: pydantic.HttpUrl | None = None\n", | |
" demo: bool = pydantic.Field(False, description='Whether the dataset is a demo dataset')\n", | |
"\n", | |
"\n", | |
"\n", | |
"def convert_to_raw_github_url(github_url):\n", | |
" # Check if the URL is already a raw URL\n", | |
" if 'raw.githubusercontent.com' in github_url:\n", | |
" return github_url\n", | |
"\n", | |
" # Replace the domain\n", | |
" raw_url = github_url.replace('github.com', 'raw.githubusercontent.com')\n", | |
" \n", | |
" # Remove '/blob'\n", | |
" raw_url = raw_url.replace('/blob', '')\n", | |
"\n", | |
" return raw_url\n", | |
"\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 37, | |
"id": "fad6f60e-e3fd-4c99-a6ff-ba114c6cb1e9", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"catalog = []\n", | |
"for store in stores:\n", | |
" ds = xr.open_dataset(store, engine='zarr', chunks={})\n", | |
" meta_url = convert_to_raw_github_url(ds.attrs['pangeo-forge:meta_yaml_url'])\n", | |
" meta = yaml.load(upath.UPath(meta_url).read_text(), Loader=yaml.FullLoader)\n", | |
" data = Feedstock.model_validate(meta)\n", | |
" data.stores = [{'name': store.split('/')[-1], \n", | |
" 'url': store}]\n", | |
" catalog.append(data)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 38, | |
"id": "020495b0-2a68-4485-94b8-ad0c5f9cec6d", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[Feedstock(title='CMIP6 Downscaled products', description='dataset_description.', maintainers=[Maintainer(name='Oriana Chegwidden', github='orianac')], provenance=Provenance(providers=[Provider(name='carbonplan', description='carbonplan_test_dataset', roles=['producer', 'licensor'], url='https://carbonplan.org/')], license='CC-BY-NC v.4.0', license_link=None), thumbnail=None, tags=None, links=None, stores=[Store(name='ScenarioMIP.CCCma.CanESM5.ssp245.r1i1p1f1.annual.GARD-SV.pr.zarr', url='s3://carbonplan-data-viewer/demo/ncview-2.0/ScenarioMIP.CCCma.CanESM5.ssp245.r1i1p1f1.annual.GARD-SV.pr.zarr')], doi_citation=None, demo=False),\n", | |
" Feedstock(title='CMIP6 Downscaled products', description='dataset_description.', maintainers=[Maintainer(name='Oriana Chegwidden', github='orianac')], provenance=Provenance(providers=[Provider(name='carbonplan', description='carbonplan_test_dataset', roles=['producer', 'licensor'], url='https://carbonplan.org/')], license='CC-BY-NC v.4.0', license_link=None), thumbnail=None, tags=None, links=None, stores=[Store(name='ScenarioMIP.CCCma.CanESM5.ssp245.r1i1p1f1.annual.GARD-SV.tasmax.zarr', url='s3://carbonplan-data-viewer/demo/ncview-2.0/ScenarioMIP.CCCma.CanESM5.ssp245.r1i1p1f1.annual.GARD-SV.tasmax.zarr')], doi_citation=None, demo=False),\n", | |
" Feedstock(title='Dry Spell Corn', description='dataset_description.', maintainers=[Maintainer(name='Oriana Chegwidden', github='orianac')], provenance=Provenance(providers=[Provider(name='carbonplan', description='carbonplan_test_dataset', roles=['producer', 'licensor'], url='https://carbonplan.org/')], license='CC-BY-NC v.4.0', license_link=None), thumbnail=None, tags=None, links=None, stores=[Store(name='CanESM5-ssp370-full-time-extent.zarr', url='s3://carbonplan-data-viewer/demo/ncview-2.0/dryspells_corn/CanESM5-ssp370-full-time-extent.zarr')], doi_citation=None, demo=False),\n", | |
" Feedstock(title='Sample Australia Cordex Data', description='dataset_description.', maintainers=[Maintainer(name='Anderson Banihirwe', github='andersy005')], provenance=Provenance(providers=[Provider(name='carbonplan', description='carbonplan_test_dataset', roles=['producer', 'licensor'], url='https://carbonplan.org/')], license='CC-BY-NC v.4.0', license_link=None), thumbnail=None, tags=None, links=None, stores=[Store(name='sample_australia_cordex_data.zarr', url='s3://carbonplan-data-viewer/demo/ncview-2.0/single_timestep/sample_australia_cordex_data.zarr')], doi_citation=None, demo=False),\n", | |
" Feedstock(title='dataset_1', description='dataset_description.', maintainers=[Maintainer(name='Raphael Hagen', github='norlandrhagen')], provenance=Provenance(providers=[Provider(name='carbonplan', description='carbonplan_test_dataset', roles=['producer', 'licensor'], url='https://dapds00.nci.org.au/thredds/catalogs/zv2/catalog.html')], license='CC-BY-NC v.4.0', license_link=None), thumbnail=None, tags=None, links=None, stores=[Store(name='test_dataset1.zarr', url='s3://carbonplan-data-viewer/demo/ncview-2.0/test_dataset1.zarr')], doi_citation=None, demo=False),\n", | |
" Feedstock(title='dataset_2', description='dataset_description.', maintainers=[Maintainer(name='Raphael Hagen', github='norlandrhagen')], provenance=Provenance(providers=[Provider(name='carbonplan', description='carbonplan_test_dataset', roles=['producer', 'licensor'], url='https://dapds00.nci.org.au/thredds/catalogs/zv2/catalog.html')], license='CC-BY-NC v.4.0', license_link=None), thumbnail=None, tags=None, links=None, stores=[Store(name='test_dataset2.zarr', url='s3://carbonplan-data-viewer/demo/ncview-2.0/test_dataset2.zarr')], doi_citation=None, demo=False),\n", | |
" Feedstock(title='dataset_3', description='dataset_description.', maintainers=[Maintainer(name='Raphael Hagen', github='norlandrhagen')], provenance=Provenance(providers=[Provider(name='carbonplan', description='carbonplan_test_dataset', roles=['producer', 'licensor'], url='https://dapds00.nci.org.au/thredds/catalogs/zv2/catalog.html')], license='CC-BY-NC v.4.0', license_link=None), thumbnail=None, tags=None, links=None, stores=[Store(name='test_dataset3.zarr', url='s3://carbonplan-data-viewer/demo/ncview-2.0/test_dataset3.zarr')], doi_citation=None, demo=False)]" | |
] | |
}, | |
"execution_count": 38, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"catalog" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 39, | |
"id": "7436cadd-bc26-4017-a4dd-5337c85ade57", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"consolidated_data = {}\n", | |
"for item in catalog:\n", | |
" title = item.title\n", | |
" if title not in consolidated_data:\n", | |
" # If title not seen before, copy the item structure\n", | |
" consolidated_data[title] = item.copy()\n", | |
" else:\n", | |
" # If title already seen, extend the 'stores' list\n", | |
" consolidated_data[title].stores.extend(item.stores)\n", | |
"consolidated_web_catalog = list(consolidated_data.values())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 43, | |
"id": "3061afa0-9848-4909-bf51-18dfb032abe2", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# write catalog to JSON file for use in the website\n", | |
"with open(f'consolidated-web-catalog.json', 'w') as f:\n", | |
" json.dump(consolidated_web_catalog, f, indent=2, default=pydantic_encoder)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "8e9bdfd8-51e7-47e6-b170-c3595cd734c6", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.11.8" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment