Skip to content

Instantly share code, notes, and snippets.

@mturley
Forked from pboyd/catalog.json
Last active January 29, 2025 02:10
Show Gist options
  • Save mturley/33aa2a28dfba3736388c54c71fd48e04 to your computer and use it in GitHub Desktop.
Save mturley/33aa2a28dfba3736388c54c71fd48e04 to your computer and use it in GitHub Desktop.
Model Registry Catalog (yaml converted to JSON for use with https://json-schema.app/start)
{
"$id": "https://kubeflow.org/model-registry/catalog.yaml",
"$schema": "https://json-schema.org/draft/2020-12/schema",
"title": "Model Catalog",
"type": "object",
"properties": {
"source": {
"type": "string",
"description": "The name of the catalog provider.",
"example": "Red Hat"
},
"models": {
"type": "array",
"description": "List of models available in the catalog. `repository` and `name` are used\nto uniquely identify a model, and should be unique within the catalog.",
"items": {
"type": "object",
"required": [
"repository",
"name"
],
"properties": {
"repository": {
"type": "string",
"description": "Name of the repository in the catalog.",
"example": "ibm-granite"
},
"name": {
"type": "string",
"description": "Code name of the model.",
"example": "granite-3.1-8b-base"
},
"provider": {
"type": "string",
"description": "Name of the organization or entity that provides the model.",
"example": "IBM"
},
"description": {
"type": "string",
"description": "Short description of the model."
},
"longDescription": {
"type": "string",
"description": "Longer description of the model."
},
"logo": {
"type": "string",
"format": "uri",
"description": "URL to the model's logo. A [data\nURL](https://developer.mozilla.org/en-US/docs/Web/URI/Schemes/data)\nis recommended."
},
"readme": {
"type": "string",
"description": "Model documentation in Markdown."
},
"language": {
"type": "array",
"description": "List of supported languages (https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes).",
"items": {
"type": "string"
},
"example": [
"en",
"es",
"cz"
]
},
"license": {
"type": "string",
"description": "Short name of the model's license.",
"example": "apache-2.0"
},
"licenseLink": {
"type": "string",
"format": "uri",
"description": "URL to the license text."
},
"maturity": {
"type": "string",
"description": "Maturity level of the model.",
"example": "Generally Available"
},
"libraryName": {
"type": "string",
"example": "transformers"
},
"baseModel": {
"type": "array",
"description": "Reference to the base model (if any).",
"items": {
"type": "object",
"properties": {
"catalog": {
"type": "string",
"description": "Name of the catalog for an external base model. Omit for\nmodels in the same catalog.",
"example": "huggingface.io"
},
"repository": {
"type": "string",
"description": "Name of the repository in an external catalog where the base\nmodel exists. Omit for models in the same catalog.",
"example": "ibm-granite"
},
"name": {
"type": "string",
"example": "granite-3.1-8b-base"
}
}
}
},
"labels": {
"type": "array",
"description": "List of labels for categorization.",
"example": [
"language"
],
"items": {
"type": "string"
}
},
"tasks": {
"type": "array",
"description": "List of tasks the model is designed for.",
"items": {
"type": "string"
},
"example": [
"text-generation"
]
},
"create": {
"description": "Creation time in milliseconds since epoch.",
"type": "integer"
},
"lastUpdateTimeSinceEpoch": {
"description": "Last update time in milliseconds since epoch.",
"type": "integer"
},
"artifacts": {
"type": "array",
"description": "If a model has multiple versions, each version should have a\nseparate artifact.",
"items": {
"type": "object",
"properties": {
"protocol": {
"type": "string",
"description": "The protocol used to access the artifact (only `oci` for now).",
"enum": [
"oci"
]
},
"tags": {
"type": "array",
"description": "List of tags for the artifact. These are for information\nonly, and may include ephemeral tags that will be removed in\nthe future.",
"example": [
"2.1.2",
"2.1",
"2"
],
"items": {
"type": "string"
}
},
"uri": {
"type": "string",
"description": "Artifact URI."
}
}
}
}
}
}
}
}
}
$id: https://kubeflow.org/model-registry/catalog.yaml
$schema: https://json-schema.org/draft/2020-12/schema
title: Model Catalog
type: object
properties:
source:
type: string
description: The name of the catalog provider.
example: Red Hat
models:
type: array
description: |-
List of models available in the catalog. `repository` and `name` are used
to uniquely identify a model, and should be unique within the catalog.
items:
type: object
required:
- repository
- name
properties:
repository:
type: string
description: Name of the repository in the catalog.
example: ibm-granite
name:
type: string
description: Code name of the model.
example: granite-3.1-8b-base
provider:
type: string
description: Name of the organization or entity that provides the model.
example: IBM
description:
type: string
description: Short description of the model.
longDescription:
type: string
description: Longer description of the model.
logo:
type: string
format: uri
description: |-
URL to the model's logo. A [data
URL](https://developer.mozilla.org/en-US/docs/Web/URI/Schemes/data)
is recommended.
readme:
type: string
description: Model documentation in Markdown.
language:
type: array
description: List of supported languages (https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes).
items:
type: string
example:
- en
- es
- cz
license:
type: string
description: Short name of the model's license.
example: apache-2.0
licenseLink:
type: string
format: uri
description: URL to the license text.
maturity:
type: string
description: Maturity level of the model.
example: Generally Available
libraryName:
type: string
example: transformers
baseModel:
type: array
description: Reference to the base model (if any).
items:
type: object
properties:
catalog:
type: string
description: |-
Name of the catalog for an external base model. Omit for
models in the same catalog.
example: huggingface.io
repository:
type: string
description: |-
Name of the repository in an external catalog where the base
model exists. Omit for models in the same catalog.
example: ibm-granite
name:
type: string
example: granite-3.1-8b-base
labels:
type: array
description: List of labels for categorization.
example:
- language
items:
type: string
tasks:
type: array
description: List of tasks the model is designed for.
items:
type: string
example:
- text-generation
create:
description: Creation time in milliseconds since epoch.
type: integer
lastUpdateTimeSinceEpoch:
description: Last update time in milliseconds since epoch.
type: integer
artifacts:
type: array
description: |-
If a model has multiple versions, each version should have a
separate artifact.
items:
type: object
properties:
protocol:
type: string
description: The protocol used to access the artifact (only `oci` for now).
enum:
- oci
tags:
type: array
description: |-
List of tags for the artifact. These are for information
only, and may include ephemeral tags that will be removed in
the future.
example: ["2.1.2", "2.1", "2"]
items:
type: string
uri:
type: string
description: Artifact URI.
source: Red Hat
models:
- repository: rhelai1
name: granite-8b-code-base
provider: IBM
description: A decoder-only code model designed for code generative tasks
longDescription: |-
Granite-8B-Code-Base is a decoder-only code model designed for code
generative tasks (e.g., code generation, code explanation, code fixing,
etc.). It is trained from scratch with a two-phase training strategy. In
phase 1, our model is trained on 4 trillion tokens sourced from 116
programming languages, ensuring a comprehensive understanding of
programming languages and syntax. In phase 2, our model is trained on 500
billion tokens with a carefully designed mixture of high-quality data from
code and natural language domains to improve the models’ ability to reason
and follow instructions.
readme: |-
# Granite-3.1-8B-Base
**Model Summary:**
Granite-3.1-8B-Base extends the context length of Granite-3.0-8B-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K. This long-context pre-training stage was performed using approximately 500B tokens.
...
logo: data:image/gif;base64,R0lGODlhIAAgAMZLAAAAAAEAAAIAAAQAAAYAAAcAAAgAAAoAAAsAAA0AAA4AAA8AABEAABMAABwAAB4AAB8AACAAACQAACYAACkAAC8AADAAADsAAEIAAEMAAEkAAEoAAE4AAFAAAFYAAFkAAF8AAHAAAHMAAHQAAHsAAHwAAIcAAJMAAJcAAJoAAKYAAKwAAK0AAK4AALAAALYAALcAALkAALoAAL4AAMAAAMUAAMgAAMsAANAAANEAANcAANgAANoAANwAAOEAAOUAAOcAAOgAAOkAAOoAAOwAAO0AAO4AAO8AAPAAAPEAAPIAAP///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////yH5BAEKAH8ALAAAAAAgACAAAAf3gH+Cg4SFhoeIiYqLjI2Oj5CCRkaDk5SRhZaam5iSm5+TkaCjl46ko4+noKaqn441Sa2cixobPbGyoYeWRB0AHTm4sruWRyEAABYqQ8KkRYabSDYTyAkfMUJKSKNHQCaZ0SwRyAAMHCk4QklKSknrSkQpGj6EoEgyFwLkAw8aIygsYMRwccKDAgo9jtT7lIRHCQgByCELQAABAgMRAWD4caQUtyM3SFQwILEkAAcrtun6QwoJkh4sRGSQ0OBAAQMLJoCY0VFTJVJH3AXRUeNFCxc0dhRpttLTqaDvkvR0BS7XKURWUS3KaimV1U4sSYEdS7as2bKBAAA7
language: ["ar", "cs", "de", "en", "es", "fr", "it", "ja", "ko", "nl", "pt", "zh"]
license: apache-2.0
licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt
maturity: Generally Available
libraryName: transformers
labels:
- language
- granite-3.1
tasks:
- text-generation
createTimeSinceEpoch: 1733514949000
lastUpdateTimeSinceEpoch: 1734637721000
artifacts:
- protocol: oci
tags: ["1.3-1732870892", "1.3"]
uri: oci://registry.redhat.io/rhelai1/granite-8b-code-base:1.3-1732870892
- repository: rhelai1
name: granite-8b-code-instruct
provider: IBM
description: A fine-tuned model based on Granite 8B Code Base
longDescription: |-
Granite-8B-Code-Instruct is a 8B parameter model fine tuned from
Granite-8B-Code-Base on a combination of permissively licensed instruction
data to enhance instruction following capabilities including logical
reasoning and problem-solving skills.
logo: data:image/gif;base64,R0lGODlhIAAgAMZLAAAAAAEAAAIAAAQAAAYAAAcAAAgAAAoAAAsAAA0AAA4AAA8AABEAABMAABwAAB4AAB8AACAAACQAACYAACkAAC8AADAAADsAAEIAAEMAAEkAAEoAAE4AAFAAAFYAAFkAAF8AAHAAAHMAAHQAAHsAAHwAAIcAAJMAAJcAAJoAAKYAAKwAAK0AAK4AALAAALYAALcAALkAALoAAL4AAMAAAMUAAMgAAMsAANAAANEAANcAANgAANoAANwAAOEAAOUAAOcAAOgAAOkAAOoAAOwAAO0AAO4AAO8AAPAAAPEAAPIAAP///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////yH5BAEKAH8ALAAAAAAgACAAAAf3gH+Cg4SFhoeIiYqLjI2Oj5CCRkaDk5SRhZaam5iSm5+TkaCjl46ko4+noKaqn441Sa2cixobPbGyoYeWRB0AHTm4sruWRyEAABYqQ8KkRYabSDYTyAkfMUJKSKNHQCaZ0SwRyAAMHCk4QklKSknrSkQpGj6EoEgyFwLkAw8aIygsYMRwccKDAgo9jtT7lIRHCQgByCELQAABAgMRAWD4caQUtyM3SFQwILEkAAcrtun6QwoJkh4sRGSQ0OBAAQMLJoCY0VFTJVJH3AXRUeNFCxc0dhRpttLTqaDvkvR0BS7XKURWUS3KaimV1U4sSYEdS7as2bKBAAA7
readme: |-
# Granite-3.1-8B-Instruct
**Model Summary:**
Granite-3.1-8B-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-8B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems. This model is developed using a diverse set of techniques with a structured chat format, including supervised finetuning, model alignment using reinforcement learning, and model merging.
...
language: ["ar", "cs", "de", "en", "es", "fr", "it", "ja", "ko", "nl", "pt", "zh"]
license: apache-2.0
licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt
maturity: Generally Available
libraryName: transformers
baseModel:
- repository: rhelai1
name: granite-8b-code-base
labels:
- language
- granite-3.1
tasks:
- text-generation
createTimeSinceEpoch: 1733514949000
lastUpdateTimeSinceEpoch: 1734637721000
artifacts:
- protocol: oci
tags: ["1.3-1732870892", "1.3"]
uri: oci://registry.redhat.io/rhelai1/granite-8b-code-instruct:1.3-1732870892
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment