Skip to content

Instantly share code, notes, and snippets.

@emlys
Last active February 24, 2021 22:10
Show Gist options
  • Select an option

  • Save emlys/1964db5790aa0566d2c08b1e2d70aef2 to your computer and use it in GitHub Desktop.

Select an option

Save emlys/1964db5790aa0566d2c08b1e2d70aef2 to your computer and use it in GitHub Desktop.

Nested ARGS_SPEC data types examples and specification

Real model examples

leaving out all other arg properties besides "type" to save space

Coastal Blue Carbon selected args

"landcover_snapshot_csv": {
    "type": "csv",
    "structure": {
        "columns": {
            "snapshot_year": {
                "cells": {
                    "description": (
                        "The year that the raster in this row represents. Each "
                        "snapshot year must be unique in this table; the same "
                        "snapshot year cannot be provided twice."),
                    "type": "number",
                    "units": "year"
                }
            },
            "raster_path": {
                "cells": {
                    "description": (
                        "the path to a landcover raster on disk. May be an absolute "
                        "path, or relative to the location of this CSV file on disk. "
                        "The raster located at this path must be a land-use / land "
                        "cover raster with integer codes matching those in the "
                        "biophysical table."),
                    "type": "raster",
                    "bands": {
                        1: {
                            "type": "number",
                            "units": "unitless"
                        }
                    }
                }
            }
        }
    }
},
"analysis_year": {
    "type": "number",
    "units": "year"
},
"biophysical_table_path": {
    "type": "csv",
    "structure": {
        "columns": {
            "code": {
                "cells": {
                    "type": "number",
                    "units": "unitless"
                }
            },
            "lulc-class": {"cells": { "type": "string" }},
            "biomass-initial": {
                "cells": {
                    "type": "number",
                    "units": "megatonnes/hectare"
                }
            },
            'soil-initial': {
                "cells": {
                    "type": "number",
                    "units": "megatonnes/hectare"
                }
            },
            'litter-initial': {
                "cells": {
                    "type": "number",
                    "units": "megatonnes/hectare"
                }
            },
            "biomass-half-life": {
                "cells": {
                    "type": "number",
                    "units": "years"
                }
            },
            "biomass-low-impact-disturb": {"cells": { "type": "ratio" }},
            "biomass-med-impact-disturb": {"cells": { "type": "ratio" }},
            "biomass-high-impact-disturb": {"cells": { "type": "ratio" }},
            "soil-low-impact-disturb": {"cells": { "type": "ratio" }},
            "soil-med-impact-disturb": {"cells": { "type": "ratio" }},
            "soil-high-impact-disturb": {"cells": { "type": "ratio" }},
            "soil-yearly-accumulation": {
                "cells": {
                    "type": "number",
                    "units": "megatonnes/hectare"
                }
            },
            "litter-yearly-accumulation": {
                "cells": {
                    "type": "number",
                    "units": "megatonnes/hectare"
                }
            }
        }
    }
}

Pollination Guild Table

https://storage.googleapis.com/releases.naturalcapitalproject.org/invest-userguide/latest/croppollination.html#data-needs

"guild_table_path": {
    "type": "csv",
    "structure": {
        "columns": {
            "species": {
                "cells": {
                    "description": "species or guild name",
                    "type": "freestyle_string"
                }
            },
            "nesting_suitability_[SUBSTRATE]_index": {
                "description": "nesting_suitability_[SUBSTRATE]_index for each user-defined substrate",
                "cells": {
                    "description": "nesting suitability ratio for this species and substrate",
                    "type": "ratio"
                }
            },
            "foraging_activity_[SEASON]_index": {
                "description": "foraging_activity_[SEASON]_index for each user-defined season",
                "cells": {
                    "description": "foraging activity ratio for this species and season",
                    "type": "ratio"
                }
            },
            "alpha": {
                "cells": {
                    "description": "average distance each species or guild travels to forage on flowers",
                    "type": "number",
                    "units": "meters"
                }
            },
            "relative_abundance": {
                "cells": {
                    "description": "weighted relative abundance of the species’ contribution to pollinator abundance",
                    "type": "ratio"
                }
            }
        }
    }
}

Wave Energy selected args

"aoi_path": {
    "type": "vector",
    "fields": {}
},
'machine_perf_path': {
    'type': 'csv',
    'structure': {
        'rows': {
            '[WAVE HEIGHT]': {
                'description': 'user-defined wave height bins',
                'type': 'number',
                'units': 'meters',
                'columns': {
                    '[WAVE PERIOD]': {
                        'description': 'user-defined wave period bins',
                        'type': 'number',
                        'units': 'seconds',
                        'cells': {
                            'description': 'machine performance',
                            'type': 'number',
                            'units': 'kilowatts'
                        }
                    } 
                }
            } 
        }
    }
},
"dem_path": {
    "type": "raster",
    "bands": {
        1: {
            "type": "number",
            "units": "meters"
        }
    }
},
"machine_param_path": {
    "type": "csv",
    "structure": {
        "rows": {
            "CapMax": {
                "cells": {
                    "description": "machine maximum capacity",
                    "type": "number",
                    "units": "kilowatts"
                }
            },
            "HsMax": {
                "cells": {
                    "description": "machine maximum wave height",
                    "type": "number",
                    "units": "meters"
                }
            },
            "TpMax": {
                "cells": {
                    "description": "machine maximum wave period",
                    "type": "number",
                    "units": "kilowatts"
                }
            }
        }
    }
}          

Detailed specification for csv type structure key

let an index be a key:value pair in this format:

    "rows|columns": {
        "row/column name or pattern": {
            "description": "description of this row/column",
            "type": "number|ratio|percent|boolean|freestyle_string|option_string|raster|vector|csv|file|directory",
            "units": "units" (only if "type" == "number"),
            (either another index or
            "cells": {
                "description": "description of this cell type"
                "type": "number",
                "units": "unit"
            })
        },
        ... for every header in the row or column
    }

then the new key added to csv args will look like

    "structure": {
        index
    }

note: it's important to distinguish between the "description", "type", and "units" of the column/row, and the "description", "type", and "units" of the cells. the distinction is clearest in the wave energy machine performance table. its rows might have properties like

    "description": "user-defined wave height bins",
    "type": "number",
    "units": "meters"

while its cells might have properties like

    "description": "machine performance at this height/wave period combination",
    "type": "number",
    "units": "kilowatts"

the distinction is less obvious but still exists for all other tables. basically, the row/column-level properties describe the contents of the index/header cell itself, while the cell-level properties describe the cells under it.

for user-defined row/column names that are strings, it's still helpful to include a "description" and "type". for instance for the pollination guild table, its rows might have properties like

    "description": "nesting_suitability_[SUBSTRATE]_index for each user-defined substrate",
    "type": "string"

while its cells might have properties like

    "description": "nesting suitability ratio for this guild and substrate",
    "type": "ratio"

in the majority of cases, all row/column names are static. for these it makes sense to omit the row/column-level "description" and "type" and "units" because it's self-describing and the type is always a string (option_string or freestyle_string? """

Simplified CSV specification

Reworking the description to only have columns or rows, and they cannot be nested.

Wave Energy machine performance table:

'machine_perf_path': {
    'type': 'csv',
    'structure': {
        'columns': {
            'Hs': {
                'cells': {
                    'description': 'user-defined wave height bins',
                    'type': 'number',
                    'units': 'meters',
                }
            },
            '[WAVE PERIOD]': {
                'description': 'user-defined wave period bins',
                'type': 'number',
                'units': 'seconds',
                'cells': {
                    'description': 'machine performance',
                    'type': 'number',
                    'units': 'kilowatts'
                }
            } 
        }
    }
}

Fisheries migration table:

'migration_table_path': {
    'type': 'csv',
    'structure': {
        'columns': {
            'Migration': {
                'cells': {
                    'description': 'Sink subregion names',
                    'type': 'string'
                }
            },
            '[source subregion names]': {
                'description': 'Source subregion names',
                'type': 'string',
                'cells': {
                    'description': 'proportion of migration from source to sink',
                    'type': 'ratio'
                }
            } 
        }
    }
}

so the new specification would be

    "rows|columns": {
    
        # For static names:
        "row/column name": {
            "cells": {
                "description": "description of this cell type"
                "type": "number|ratio|percent|boolean|freestyle_string|option_string|raster|vector|csv|file|directory",
                "units": "unit" (only if "type" == "number")
            }
        },
        
        # For user-defined names:
        "row/column pattern": {
            "description": "description of this row/column",
            "type": "number|ratio|percent|boolean|freestyle_string|option_string|raster|vector|csv|file|directory",
            "units": "units" (only if "type" == "number"),
            "cells": {
                "description": "description of this cell type"
                "type": "number|ratio|percent|boolean|freestyle_string|option_string|raster|vector|csv|file|directory",
                "units": "unit" (only if "type" == "number")
            })
        },
        ... for every header in the row or column
    }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment