Skip to content

Instantly share code, notes, and snippets.

@rbeucher
Last active July 10, 2024 01:04
Show Gist options
  • Save rbeucher/f8e8f832720dd97c6bfc8f61088453d4 to your computer and use it in GitHub Desktop.
Save rbeucher/f8e8f832720dd97c6bfc8f61088453d4 to your computer and use it in GitHub Desktop.
find_cmip6_model
#!/usr/bin/env python3
import glob
import sys
import re
def return_datasets(activity="*",
institute="*",
dataset="*",
exp="*",
ensemble="*",
mip="*",
short_name="*",
grid="*",
version="*"):
rootpath_cmip6=["/g/data/oi10/replicas/CMIP6/", "/g/data/fs38/publications/CMIP6/", "/g/data/xp65/public/apps/esmvaltool/replicas/CMIP6/"]
results = []
for path in rootpath_cmip6:
query = path + f'{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/{grid}/{version}'
results = glob.glob(query)
for result in results:
print(extract_and_parse_path(result, path))
def extract_and_parse_path(full_path, root_value):
# Ensure the root ends with a slash for accurate comparison
if not root_value.endswith('/'):
root_value += '/'
# Check if the full path starts with the given root value
if not full_path.startswith(root_value):
raise ValueError("The provided root value does not match the beginning of the full path.")
# Remove the root part from the path
remaining_path = full_path[len(root_value):]
# Define the pattern to match "{val1}/{val2}/{val3}"
pattern = r'^(?P<activity>[^/]+)/(?P<institute>[^/]+)/(?P<dataset>[^/]+)/(?P<exp>[^/]+)/(?P<ensemble>[^/]+)/(?P<mip>[^/]+)/(?P<shortname>[^/]+)/(?P<grid>[^/]+)/(?P<version>[^/]+)$'
# Match the remaining path against the pattern
match = re.match(pattern, remaining_path)
# Extract values
activity = match.group('activity')
institute = match.group('institute')
dataset = match.group('dataset')
exp = match.group('exp')
ensemble = match.group('ensemble')
mip = match.group('mip')
shortname = match.group('shortname')
grid = match.group('grid')
version = match.group('version')
#result = {
# 'activity': activity,
# 'institute': institute,
# 'dataset': dataset,
# 'exp': exp,
# 'ensemble': ensemble,
# 'mip': mip,
# 'shortname': shortname,
# 'grid': grid,
# 'version': version
#}
result = {
'dataset': dataset,
'institute': institute,
}
return {key: val for key, val in result.items() if val != " "}
if __name__ == "__main__":
args = sys.argv[1:] # Exclude the script name
kwargs = {}
for arg in args:
key, value = arg.split('=')
kwargs[key] = value
return_datasets(**kwargs)
#return_datasets(grid="gn", ensemble="r1i1p1f1", dataset="NorESM2-MM", institute="NCC")
#return_datasets(ensemble="r1i1p1f1", dataset="ACCESS-ESM1-5", institute="CSIRO")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment