Last active
July 10, 2024 01:04
-
-
Save rbeucher/f8e8f832720dd97c6bfc8f61088453d4 to your computer and use it in GitHub Desktop.
find_cmip6_model
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import glob | |
import sys | |
import re | |
def return_datasets(activity="*", | |
institute="*", | |
dataset="*", | |
exp="*", | |
ensemble="*", | |
mip="*", | |
short_name="*", | |
grid="*", | |
version="*"): | |
rootpath_cmip6=["/g/data/oi10/replicas/CMIP6/", "/g/data/fs38/publications/CMIP6/", "/g/data/xp65/public/apps/esmvaltool/replicas/CMIP6/"] | |
results = [] | |
for path in rootpath_cmip6: | |
query = path + f'{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/{grid}/{version}' | |
results = glob.glob(query) | |
for result in results: | |
print(extract_and_parse_path(result, path)) | |
def extract_and_parse_path(full_path, root_value): | |
# Ensure the root ends with a slash for accurate comparison | |
if not root_value.endswith('/'): | |
root_value += '/' | |
# Check if the full path starts with the given root value | |
if not full_path.startswith(root_value): | |
raise ValueError("The provided root value does not match the beginning of the full path.") | |
# Remove the root part from the path | |
remaining_path = full_path[len(root_value):] | |
# Define the pattern to match "{val1}/{val2}/{val3}" | |
pattern = r'^(?P<activity>[^/]+)/(?P<institute>[^/]+)/(?P<dataset>[^/]+)/(?P<exp>[^/]+)/(?P<ensemble>[^/]+)/(?P<mip>[^/]+)/(?P<shortname>[^/]+)/(?P<grid>[^/]+)/(?P<version>[^/]+)$' | |
# Match the remaining path against the pattern | |
match = re.match(pattern, remaining_path) | |
# Extract values | |
activity = match.group('activity') | |
institute = match.group('institute') | |
dataset = match.group('dataset') | |
exp = match.group('exp') | |
ensemble = match.group('ensemble') | |
mip = match.group('mip') | |
shortname = match.group('shortname') | |
grid = match.group('grid') | |
version = match.group('version') | |
#result = { | |
# 'activity': activity, | |
# 'institute': institute, | |
# 'dataset': dataset, | |
# 'exp': exp, | |
# 'ensemble': ensemble, | |
# 'mip': mip, | |
# 'shortname': shortname, | |
# 'grid': grid, | |
# 'version': version | |
#} | |
result = { | |
'dataset': dataset, | |
'institute': institute, | |
} | |
return {key: val for key, val in result.items() if val != " "} | |
if __name__ == "__main__": | |
args = sys.argv[1:] # Exclude the script name | |
kwargs = {} | |
for arg in args: | |
key, value = arg.split('=') | |
kwargs[key] = value | |
return_datasets(**kwargs) | |
#return_datasets(grid="gn", ensemble="r1i1p1f1", dataset="NorESM2-MM", institute="NCC") | |
#return_datasets(ensemble="r1i1p1f1", dataset="ACCESS-ESM1-5", institute="CSIRO") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment