rbeucher · July 10, 2024 01:04
diff --git a/find_cmip6_model.py b/find_cmip6_model.py
 #!/usr/bin/env python3

 import glob
 import sys
 import re

 def return_datasets(activity="*",
                    institute="*",
                    dataset="*",
                    exp="*",
                    ensemble="*",
                    mip="*",
                    short_name="*",
                    grid="*",
                    version="*"):
    
    rootpath_cmip6=["/g/data/oi10/replicas/CMIP6/",  "/g/data/fs38/publications/CMIP6/", "/g/data/xp65/public/apps/esmvaltool/replicas/CMIP6/"]

    results = []
 
    for path in rootpath_cmip6:
        query = path + f'{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/{grid}/{version}'
        results = glob.glob(query)
        for result in results:
            print(extract_and_parse_path(result, path))


 def extract_and_parse_path(full_path, root_value):
    # Ensure the root ends with a slash for accurate comparison
    if not root_value.endswith('/'):
        root_value += '/'
    
    # Check if the full path starts with the given root value
    if not full_path.startswith(root_value):
        raise ValueError("The provided root value does not match the beginning of the full path.")
    
    # Remove the root part from the path
    remaining_path = full_path[len(root_value):]
    
    # Define the pattern to match "{val1}/{val2}/{val3}"
    pattern = r'^(?P<activity>[^/]+)/(?P<institute>[^/]+)/(?P<dataset>[^/]+)/(?P<exp>[^/]+)/(?P<ensemble>[^/]+)/(?P<mip>[^/]+)/(?P<shortname>[^/]+)/(?P<grid>[^/]+)/(?P<version>[^/]+)$'
    
    # Match the remaining path against the pattern
    match = re.match(pattern, remaining_path)
    
    # Extract values
    activity = match.group('activity')
    institute = match.group('institute')
    dataset = match.group('dataset')
    exp = match.group('exp')
    
    ensemble = match.group('ensemble')
    mip = match.group('mip')
    shortname = match.group('shortname')
    grid = match.group('grid')
    version = match.group('version')

    
    #result = {
    #    'activity': activity,
    #    'institute': institute,
    #    'dataset': dataset,
    #    'exp': exp,
    #    'ensemble': ensemble,
    #    'mip': mip,
    #    'shortname': shortname,
    #    'grid': grid,
    #    'version': version
    #}
    
    result = {
        'dataset': dataset,
        'institute': institute,
    }

    return {key: val for key, val in result.items() if val != " "}

 if __name__ == "__main__":
    args = sys.argv[1:]  # Exclude the script name
    kwargs = {}

    for arg in args:
        key, value = arg.split('=')
        kwargs[key] = value

    return_datasets(**kwargs)

 #return_datasets(grid="gn", ensemble="r1i1p1f1", dataset="NorESM2-MM", institute="NCC")
 #return_datasets(ensemble="r1i1p1f1", dataset="ACCESS-ESM1-5", institute="CSIRO")
	#!/usr/bin/env python3

	import glob
	import sys
	import re

	def return_datasets(activity="*",
	institute="*",
	dataset="*",
	exp="*",
	ensemble="*",
	mip="*",
	short_name="*",
	grid="*",
	version="*"):

	rootpath_cmip6=["/g/data/oi10/replicas/CMIP6/", "/g/data/fs38/publications/CMIP6/", "/g/data/xp65/public/apps/esmvaltool/replicas/CMIP6/"]

	results = []

	for path in rootpath_cmip6:
	query = path + f'{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/{grid}/{version}'
	results = glob.glob(query)
	for result in results:
	print(extract_and_parse_path(result, path))


	def extract_and_parse_path(full_path, root_value):
	# Ensure the root ends with a slash for accurate comparison
	if not root_value.endswith('/'):
	root_value += '/'

	# Check if the full path starts with the given root value
	if not full_path.startswith(root_value):
	raise ValueError("The provided root value does not match the beginning of the full path.")

	# Remove the root part from the path
	remaining_path = full_path[len(root_value):]

	# Define the pattern to match "{val1}/{val2}/{val3}"
	pattern = r'^(?P<activity>[^/]+)/(?P<institute>[^/]+)/(?P<dataset>[^/]+)/(?P<exp>[^/]+)/(?P<ensemble>[^/]+)/(?P<mip>[^/]+)/(?P<shortname>[^/]+)/(?P<grid>[^/]+)/(?P<version>[^/]+)$'

	# Match the remaining path against the pattern
	match = re.match(pattern, remaining_path)

	# Extract values
	activity = match.group('activity')
	institute = match.group('institute')
	dataset = match.group('dataset')
	exp = match.group('exp')

	ensemble = match.group('ensemble')
	mip = match.group('mip')
	shortname = match.group('shortname')
	grid = match.group('grid')
	version = match.group('version')


	#result = {
	# 'activity': activity,
	# 'institute': institute,
	# 'dataset': dataset,
	# 'exp': exp,
	# 'ensemble': ensemble,
	# 'mip': mip,
	# 'shortname': shortname,
	# 'grid': grid,
	# 'version': version
	#}

	result = {
	'dataset': dataset,
	'institute': institute,
	}

	return {key: val for key, val in result.items() if val != " "}

	if __name__ == "__main__":
	args = sys.argv[1:] # Exclude the script name
	kwargs = {}

	for arg in args:
	key, value = arg.split('=')
	kwargs[key] = value

	return_datasets(**kwargs)

	#return_datasets(grid="gn", ensemble="r1i1p1f1", dataset="NorESM2-MM", institute="NCC")
	#return_datasets(ensemble="r1i1p1f1", dataset="ACCESS-ESM1-5", institute="CSIRO")