Last active
March 27, 2025 17:15
-
-
Save pboyd/278c7b1e9ce0292b82cb871fa7d2103b to your computer and use it in GitHub Desktop.
Model Registry Catalog
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"$id": "https://kubeflow.org/model-registry/catalog.yaml", | |
"$schema": "https://json-schema.org/draft/2020-12/schema", | |
"title": "Model Catalog", | |
"type": "object", | |
"properties": { | |
"source": { | |
"type": "string", | |
"description": "The name of the catalog provider.", | |
"example": "Red Hat" | |
}, | |
"models": { | |
"type": "array", | |
"description": "List of models available in the catalog. `repository` and `name` are used\nto uniquely identify a model, and should be unique within the catalog.", | |
"items": { | |
"type": "object", | |
"required": [ | |
"repository", | |
"name" | |
], | |
"properties": { | |
"repository": { | |
"type": "string", | |
"description": "Name of the repository in the catalog.", | |
"example": "ibm-granite" | |
}, | |
"name": { | |
"type": "string", | |
"description": "Code name of the model.", | |
"example": "granite-3.1-8b-base" | |
}, | |
"provider": { | |
"type": "string", | |
"description": "Name of the organization or entity that provides the model.", | |
"example": "IBM" | |
}, | |
"description": { | |
"type": "string", | |
"description": "Short description of the model." | |
}, | |
"longDescription": { | |
"type": "string", | |
"description": "Longer description of the model." | |
}, | |
"logo": { | |
"type": "string", | |
"format": "uri", | |
"description": "URL to the model's logo. A [data\nURL](https://developer.mozilla.org/en-US/docs/Web/URI/Schemes/data)\nis recommended." | |
}, | |
"readme": { | |
"type": "string", | |
"description": "Model documentation in Markdown." | |
}, | |
"language": { | |
"type": "array", | |
"description": "List of supported languages (https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes).", | |
"items": { | |
"type": "string" | |
}, | |
"example": [ | |
"en", | |
"es", | |
"cz" | |
] | |
}, | |
"license": { | |
"type": "string", | |
"description": "Short name of the model's license.", | |
"example": "apache-2.0" | |
}, | |
"licenseLink": { | |
"type": "string", | |
"format": "uri", | |
"description": "URL to the license text." | |
}, | |
"maturity": { | |
"type": "string", | |
"description": "Maturity level of the model.", | |
"example": "Generally Available" | |
}, | |
"libraryName": { | |
"type": "string", | |
"example": "transformers" | |
}, | |
"baseModel": { | |
"type": "array", | |
"description": "Reference to the base model (if any).", | |
"items": { | |
"type": "object", | |
"properties": { | |
"catalog": { | |
"type": "string", | |
"description": "Name of the catalog for an external base model. Omit for\nmodels in the same catalog.", | |
"example": "huggingface.io" | |
}, | |
"repository": { | |
"type": "string", | |
"description": "Name of the repository in an external catalog where the base\nmodel exists. Omit for models in the same catalog.", | |
"example": "ibm-granite" | |
}, | |
"name": { | |
"type": "string", | |
"example": "granite-3.1-8b-base" | |
} | |
} | |
} | |
}, | |
"labels": { | |
"type": "array", | |
"description": "List of labels for categorization.", | |
"example": [ | |
"language" | |
], | |
"items": { | |
"type": "string" | |
} | |
}, | |
"tasks": { | |
"type": "array", | |
"description": "List of tasks the model is designed for.", | |
"items": { | |
"type": "string" | |
}, | |
"example": [ | |
"text-generation" | |
] | |
}, | |
"createTimeSinceEpoch": { | |
"description": "Creation time in milliseconds since epoch.", | |
"type": "integer" | |
}, | |
"lastUpdateTimeSinceEpoch": { | |
"description": "Last update time in milliseconds since epoch.", | |
"type": "integer" | |
}, | |
"artifacts": { | |
"type": "array", | |
"description": "If a model has multiple versions, each version should have a\nseparate artifact.", | |
"items": { | |
"type": "object", | |
"properties": { | |
"createTimeSinceEpoch": { | |
"description": "Creation time in milliseconds since epoch.", | |
"type": "integer" | |
}, | |
"protocol": { | |
"type": "string", | |
"description": "The protocol used to access the artifact (only `oci` for now).", | |
"enum": [ | |
"oci" | |
] | |
}, | |
"tags": { | |
"type": "array", | |
"description": "List of tags for the artifact. These are for information\nonly. It is recommended that this list only include immutable\ntags (e.g. `1.2.3` instead of ephemeral/floating tags such as\n`1`, `1.2`, or `latest`).", | |
"example": [ | |
"2.1.2" | |
], | |
"items": { | |
"type": "string" | |
} | |
}, | |
"uri": { | |
"type": "string", | |
"description": "Artifact URI." | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$id: https://kubeflow.org/model-registry/catalog.yaml | |
$schema: https://json-schema.org/draft/2020-12/schema | |
title: Model Catalog | |
type: object | |
properties: | |
source: | |
type: string | |
description: The name of the catalog provider. | |
example: Red Hat | |
models: | |
type: array | |
description: |- | |
List of models available in the catalog. `repository` and `name` are used | |
to uniquely identify a model, and should be unique within the catalog. | |
items: | |
type: object | |
required: | |
- repository | |
- name | |
properties: | |
repository: | |
type: string | |
description: Name of the repository in the catalog. | |
example: ibm-granite | |
name: | |
type: string | |
description: Code name of the model. | |
example: granite-3.1-8b-base | |
provider: | |
type: string | |
description: Name of the organization or entity that provides the model. | |
example: IBM | |
description: | |
type: string | |
description: Short description of the model. | |
longDescription: | |
type: string | |
description: Longer description of the model. | |
logo: | |
type: string | |
format: uri | |
description: |- | |
URL to the model's logo. A [data | |
URL](https://developer.mozilla.org/en-US/docs/Web/URI/Schemes/data) | |
is recommended. | |
readme: | |
type: string | |
description: Model documentation in Markdown. | |
language: | |
type: array | |
description: List of supported languages (https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes). | |
items: | |
type: string | |
example: | |
- en | |
- es | |
- cz | |
license: | |
type: string | |
description: Short name of the model's license. | |
example: apache-2.0 | |
licenseLink: | |
type: string | |
format: uri | |
description: URL to the license text. | |
maturity: | |
type: string | |
description: Maturity level of the model. | |
example: Generally Available | |
libraryName: | |
type: string | |
example: transformers | |
baseModel: | |
type: array | |
description: Reference to the base model (if any). | |
items: | |
type: object | |
properties: | |
catalog: | |
type: string | |
description: |- | |
Name of the catalog for an external base model. Omit for | |
models in the same catalog. | |
example: huggingface.io | |
repository: | |
type: string | |
description: |- | |
Name of the repository in an external catalog where the base | |
model exists. Omit for models in the same catalog. | |
example: ibm-granite | |
name: | |
type: string | |
example: granite-3.1-8b-base | |
labels: | |
type: array | |
description: List of labels for categorization. | |
example: | |
- language | |
items: | |
type: string | |
tasks: | |
type: array | |
description: List of tasks the model is designed for. | |
items: | |
type: string | |
example: | |
- text-generation | |
createTimeSinceEpoch: | |
description: Creation time in milliseconds since epoch. | |
type: integer | |
lastUpdateTimeSinceEpoch: | |
description: Last update time in milliseconds since epoch. | |
type: integer | |
artifacts: | |
type: array | |
description: |- | |
If a model has multiple versions, each version should have a | |
separate artifact. | |
items: | |
type: object | |
properties: | |
createTimeSinceEpoch: | |
description: Creation time in milliseconds since epoch. | |
type: integer | |
protocol: | |
type: string | |
description: The protocol used to access the artifact (only `oci` for now). | |
enum: | |
- oci | |
tags: | |
type: array | |
description: |- | |
List of tags for the artifact. These are for information | |
only. It is recommended that this list only include immutable | |
tags (e.g. `1.2.3` instead of ephemeral/floating tags such as | |
`1`, `1.2`, or `latest`). | |
example: ["2.1.2"] | |
items: | |
type: string | |
uri: | |
type: string | |
description: Artifact URI. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
source: Red Hat | |
models: | |
- repository: rhelai1 | |
name: granite-8b-code-base | |
provider: Red Hat | |
description: A decoder-only code model designed for code generative tasks | |
longDescription: |- | |
Granite-8B-Code-Base is a decoder-only code model designed for code | |
generative tasks (e.g., code generation, code explanation, code fixing, | |
etc.). It is trained from scratch with a two-phase training strategy. In | |
phase 1, our model is trained on 4 trillion tokens sourced from 116 | |
programming languages, ensuring a comprehensive understanding of | |
programming languages and syntax. In phase 2, our model is trained on 500 | |
billion tokens with a carefully designed mixture of high-quality data from | |
code and natural language domains to improve the models’ ability to reason | |
and follow instructions. | |
readme: |- | |
# Granite-3.1-8B-Base | |
**Model Summary:** | |
Granite-3.1-8B-Base extends the context length of Granite-3.0-8B-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K. This long-context pre-training stage was performed using approximately 500B tokens. | |
- **Developers:** Granite Team, IBM | |
- **GitHub Repository:** [ibm-granite/granite-3.1-language-models](https://github.com/ibm-granite/granite-3.1-language-models) | |
- **Website**: [Granite Docs](https://www.ibm.com/granite/docs/) | |
- **Paper:** [Granite 3.1 Language Models (coming soon)](https://huggingface.co/collections/ibm-granite/granite-31-language-models-6751dbbf2f3389bec5c6f02d) | |
- **Release Date**: December 18th, 2024 | |
- **License:** [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0) | |
**Supported Languages:** | |
English, German, Spanish, French, Japanese, Portuguese, Arabic, Czech, Italian, Korean, Dutch, and Chinese. Users may finetune Granite 3.1 models for languages beyond these 12 languages. | |
**Intended Use:** | |
Prominent use cases of LLMs in text-to-text generation include summarization, text classification, extraction, question-answering, and other long-context tasks. All Granite Base models are able to handle these tasks as they were trained on a large amount of data from various domains. Moreover, they can serve as baseline to create specialized models for specific application scenarios. | |
**Generation:** | |
This is a simple example of how to use Granite-3.1-8B-Base model. | |
Install the following libraries: | |
```shell | |
pip install torch torchvision torchaudio | |
pip install accelerate | |
pip install transformers | |
``` | |
Then, copy the code snippet below to run the example. | |
```python | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
device = "auto" | |
model_path = "ibm-granite/granite-3.1-8B-base" | |
tokenizer = AutoTokenizer.from_pretrained(model_path) | |
# drop device_map if running on CPU | |
model = AutoModelForCausalLM.from_pretrained(model_path, device_map=device) | |
model.eval() | |
# change input text as desired | |
input_text = "Where is the Thomas J. Watson Research Center located?" | |
# tokenize the text | |
input_tokens = tokenizer(input_text, return_tensors="pt").to(device) | |
# generate output tokens | |
output = model.generate(**input_tokens, | |
max_length=4000) | |
# decode output tokens into text | |
output = tokenizer.batch_decode(output) | |
# print output | |
print(output) | |
``` | |
**Evaluation Results:** | |
<table> | |
<caption><b>HuggingFace Open LLM Leaderboard V1</b></caption> | |
<thead> | |
<tr> | |
<th style="text-align:left; background-color: #001d6c; color: white;">Models</th> | |
<th style="text-align:center; background-color: #001d6c; color: white;">ARC-Challenge</th> | |
<th style="text-align:center; background-color: #001d6c; color: white;">Hellaswag</th> | |
<th style="text-align:center; background-color: #001d6c; color: white;">MMLU</th> | |
<th style="text-align:center; background-color: #001d6c; color: white;">TruthfulQA</th> | |
<th style="text-align:center; background-color: #001d6c; color: white;">Winogrande</th> | |
<th style="text-align:center; background-color: #001d6c; color: white;">GSM8K</th> | |
<th style="text-align:center; background-color: #001d6c; color: white;">Avg</th> | |
</tr></thead> | |
<tbody> | |
<tr> | |
<td style="text-align:left; background-color: #DAE8FF; color: black;">Granite-3.1-8B-Base</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">63.99</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">83.27</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">63.45</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">51.29</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">78.92</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">60.19</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">66.85</td> | |
</tr> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: #2D2D2D;">Granite-3.1-2B-Base</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">53.58</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">77.67</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">52.86</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">39.02</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">72.84</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">47.99</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">57.32</td> | |
</tr> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: #2D2D2D;">Granite-3.1-3B-A800M-Base</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">50.76</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">74.45</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">48.31</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">39.91</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">69.29</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">40.56</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">53.88</td> | |
</tr> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: #2D2D2D;">Granite-3.1-1B-A400M-Base</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">39.42</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">66.13</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">26.53</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">37.67</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">2.03</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">18.87</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">31.78</td> | |
</tr> | |
</tbody></table> | |
<table> | |
<caption><b>HuggingFace Open LLM Leaderboard V2</b></caption> | |
<thead> | |
<tr> | |
<th style="text-align:left; background-color: #001d6c; color: white;">Models</th> | |
<th style="text-align:center; background-color: #001d6c; color: white;">IFEval</th> | |
<th style="text-align:center; background-color: #001d6c; color: white;">BBH</th> | |
<th style="text-align:center; background-color: #001d6c; color: white;">MATH Lvl 5</th> | |
<th style="text-align:center; background-color: #001d6c; color: white;">GPQA</th> | |
<th style="text-align:center; background-color: #001d6c; color: white;">MUSR</th> | |
<th style="text-align:center; background-color: #001d6c; color: white;">MMLU-Pro</th> | |
<th style="text-align:center; background-color: #001d6c; color: white;">Avg</th> | |
</tr></thead> | |
<tbody> | |
<tr> | |
<td style="text-align:left; background-color: #DAE8FF; color: black;">Granite-3.1-8B-Base</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">42.21</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">26.02</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">9.52</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">9.51</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">8.36</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">24.8</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">20.07</td> | |
</tr> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: #2D2D2D;">Granite-3.1-2B-Base</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">35.22</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">16.84</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">5.59</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">3.69</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">3.9</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">13.9</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">13.19</td> | |
</tr> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: #2D2D2D;">Granite-3.1-3B-A800M-Base</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">29.96</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">11.91</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">4</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">3.69</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">1.11</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">8.81</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">9.91</td> | |
</tr> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: #2D2D2D;">Granite-3.1-1B-A400M-Base</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">25.19</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">6.43</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">2.19</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">0.22</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">1.76</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">1.55</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">6.22</td> | |
</tr> | |
</tbody></table> | |
**Model Architecture:** | |
Granite-3.1-8B-Base is based on a decoder-only dense transformer architecture. Core components of this architecture are: GQA and RoPE, MLP with SwiGLU, RMSNorm, and shared input/output embeddings. | |
<table> | |
<thead> | |
<tr> | |
<th style="text-align:left; background-color: #001d6c; color: white;">Model</th> | |
<th style="text-align:center; background-color: #001d6c; color: white;">2B Dense</th> | |
<th style="text-align:center; background-color: #001d6c; color: white;">8B Dense</th> | |
<th style="text-align:center; background-color: #001d6c; color: white;">1B MoE</th> | |
<th style="text-align:center; background-color: #001d6c; color: white;">3B MoE</th> | |
</tr></thead> | |
<tbody> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: black;">Embedding size</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">2048</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">4096</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">1024</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">1536</td> | |
</tr> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: black;">Number of layers</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">40</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">40</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">24</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">32</td> | |
</tr> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: black;">Attention head size</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">64</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">128</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">64</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">64</td> | |
</tr> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: black;">Number of attention heads</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">32</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">32</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">16</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">24</td> | |
</tr> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: black;">Number of KV heads</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">8</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">8</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">8</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">8</td> | |
</tr> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: black;">MLP hidden size</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">8192</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">12800</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">512</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">512</td> | |
</tr> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: black;">MLP activation</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">SwiGLU</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">SwiGLU</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">SwiGLU</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">SwiGLU</td> | |
</tr> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: black;">Number of experts</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">—</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">—</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">32</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">40</td> | |
</tr> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: black;">MoE TopK</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">—</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">—</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">8</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">8</td> | |
</tr> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: black;">Initialization std</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">0.1</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">0.1</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">0.1</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">0.1</td> | |
</tr> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: black;">Sequence length</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">128K</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">128K</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">128K</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">128K</td> | |
</tr> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: black;">Position embedding</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">RoPE</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">RoPE</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">RoPE</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">RoPE</td> | |
</tr> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: black;"># Parameters</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">2.5B</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">8.1B</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">1.3B</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">3.3B</td> | |
</tr> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: black;"># Active parameters</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">2.5B</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">8.1B</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">400M</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">800M</td> | |
</tr> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: black;"># Training tokens</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">12T</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">12T</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">10T</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">10T</td> | |
</tr> | |
</tbody></table> | |
**Training Data:** | |
This model is trained on a mix of open source and proprietary data following a three-stage training strategy. | |
* Stage 1 data: The data for stage 1 is sourced from diverse domains, such as: web, code, academic sources, books, and math data. | |
* Stage 2 data: The data for stage 2 comprises a curated mix of high-quality data from the same domains, plus multilingual and instruction data. The goal of this second training phase is to enhance the model’s performance on specific tasks. | |
* Stage 3 data: The data for stage 3 consists of original stage-2 pretraining data with additional synthetic long-context data in form of QA/summary pairs where the answer | |
contains a recitation of the related paragraph before the answer. | |
A detailed attribution of datasets can be found in the [Granite 3.0 Technical Report](https://github.com/ibm-granite/granite-3.0-language-models/blob/main/paper.pdf), [Granite 3.1 Technical Report (coming soon)](https://huggingface.co/collections/ibm-granite/granite-31-language-models-6751dbbf2f3389bec5c6f02d), and [Accompanying Author List](https://github.com/ibm-granite/granite-3.0-language-models/blob/main/author-ack.pdf). | |
**Infrastructure:** | |
We train Granite 3.1 Language Models using IBM's super computing cluster, Blue Vela, which is outfitted with NVIDIA H100 GPUs. This cluster provides a scalable and efficient infrastructure for training our models over thousands of GPUs. | |
**Ethical Considerations and Limitations:** | |
The use of Large Language Models involves risks and ethical considerations people must be aware of, including but not limited to: bias and fairness, misinformation, and autonomous decision-making. Granite-3.1-8B-Base model is not the exception in this regard. Even though this model is suited for multiple generative AI tasks, it has not undergone any safety alignment, there it may produce problematic outputs. Additionally, it remains uncertain whether smaller models might exhibit increased susceptibility to hallucination in generation scenarios by copying text verbatim from the training dataset due to their reduced sizes and memorization capacities. This aspect is currently an active area of research, and we anticipate more rigorous exploration, comprehension, and mitigations in this domain. Regarding ethics, a latent risk associated with all Large Language Models is their malicious utilization. We urge the community to use Granite-3.1-8B-Base model with ethical intentions and in a responsible way. | |
**Resources** | |
- ⭐️ Learn about the latest updates with Granite: https://www.ibm.com/granite | |
- 📄 Get started with tutorials, best practices, and prompt engineering advice: https://www.ibm.com/granite/docs/ | |
- 💡 Learn about the latest Granite learning resources: https://ibm.biz/granite-learning-resources | |
logo:  | |
language: ["ar", "cs", "de", "en", "es", "fr", "it", "ja", "ko", "nl", "pt", "zh"] | |
license: apache-2.0 | |
licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt | |
maturity: Generally Available | |
libraryName: transformers | |
labels: | |
- language | |
- granite-3.1 | |
tasks: | |
- text-generation | |
createTimeSinceEpoch: 1733514949000 | |
lastUpdateTimeSinceEpoch: 1734637721000 | |
artifacts: | |
- protocol: oci | |
createTimeSinceEpoch: 1733514949000 | |
tags: ["1.3.0"] | |
uri: oci://registry.redhat.io/rhelai1/granite-8b-code-base:1.3-1732870892 | |
- repository: rhelai1 | |
name: granite-8b-code-instruct | |
provider: Red Hat | |
description: A fine-tuned model based on Granite 8B Code Base | |
longDescription: |- | |
Granite-8B-Code-Instruct is a 8B parameter model fine tuned from | |
Granite-8B-Code-Base on a combination of permissively licensed instruction | |
data to enhance instruction following capabilities including logical | |
reasoning and problem-solving skills. | |
logo:  | |
readme: |- | |
# Granite-3.1-8B-Instruct | |
**Model Summary:** | |
Granite-3.1-8B-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-8B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems. This model is developed using a diverse set of techniques with a structured chat format, including supervised finetuning, model alignment using reinforcement learning, and model merging. | |
- **Developers:** Granite Team, IBM | |
- **GitHub Repository:** [ibm-granite/granite-3.1-language-models](https://github.com/ibm-granite/granite-3.1-language-models) | |
- **Website**: [Granite Docs](https://www.ibm.com/granite/docs/) | |
- **Paper:** [Granite 3.1 Language Models (coming soon)](https://huggingface.co/collections/ibm-granite/granite-31-language-models-6751dbbf2f3389bec5c6f02d) | |
- **Release Date**: December 18th, 2024 | |
- **License:** [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0) | |
**Supported Languages:** | |
English, German, Spanish, French, Japanese, Portuguese, Arabic, Czech, Italian, Korean, Dutch, and Chinese. Users may finetune Granite 3.1 models for languages beyond these 12 languages. | |
**Intended Use:** | |
The model is designed to respond to general instructions and can be used to build AI assistants for multiple domains, including business applications. | |
*Capabilities* | |
* Summarization | |
* Text classification | |
* Text extraction | |
* Question-answering | |
* Retrieval Augmented Generation (RAG) | |
* Code related tasks | |
* Function-calling tasks | |
* Multilingual dialog use cases | |
* Long-context tasks including long document/meeting summarization, long document QA, etc. | |
**Generation:** | |
This is a simple example of how to use Granite-3.1-8B-Instruct model. | |
Install the following libraries: | |
```shell | |
pip install torch torchvision torchaudio | |
pip install accelerate | |
pip install transformers | |
``` | |
Then, copy the snippet from the section that is relevant for your use case. | |
```python | |
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
device = "auto" | |
model_path = "ibm-granite/granite-3.1-8b-instruct" | |
tokenizer = AutoTokenizer.from_pretrained(model_path) | |
# drop device_map if running on CPU | |
model = AutoModelForCausalLM.from_pretrained(model_path, device_map=device) | |
model.eval() | |
# change input text as desired | |
chat = [ | |
{ "role": "user", "content": "Please list one IBM Research laboratory located in the United States. You should only output its name and location." }, | |
] | |
chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True) | |
# tokenize the text | |
input_tokens = tokenizer(chat, return_tensors="pt").to(device) | |
# generate output tokens | |
output = model.generate(**input_tokens, | |
max_new_tokens=100) | |
# decode output tokens into text | |
output = tokenizer.batch_decode(output) | |
# print output | |
print(output) | |
``` | |
**Evaluation Results:** | |
<table> | |
<caption><b>HuggingFace Open LLM Leaderboard V1</b></caption> | |
<thead> | |
<tr> | |
<th style="text-align:left; background-color: #001d6c; color: white;">Models</th> | |
<th style="text-align:center; background-color: #001d6c; color: white;">ARC-Challenge</th> | |
<th style="text-align:center; background-color: #001d6c; color: white;">Hellaswag</th> | |
<th style="text-align:center; background-color: #001d6c; color: white;">MMLU</th> | |
<th style="text-align:center; background-color: #001d6c; color: white;">TruthfulQA</th> | |
<th style="text-align:center; background-color: #001d6c; color: white;">Winogrande</th> | |
<th style="text-align:center; background-color: #001d6c; color: white;">GSM8K</th> | |
<th style="text-align:center; background-color: #001d6c; color: white;">Avg</th> | |
</tr></thead> | |
<tbody> | |
<tr> | |
<td style="text-align:left; background-color: #DAE8FF; color: black;">Granite-3.1-8B-Instruct</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">62.62</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">84.48</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">65.34</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">66.23</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">75.37</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">73.84</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">71.31</td> | |
</tr> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: #2D2D2D;">Granite-3.1-2B-Instruct</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">54.61</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">75.14</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">55.31</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">59.42</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">67.48</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">52.76</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">60.79</td> | |
</tr> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: #2D2D2D;">Granite-3.1-3B-A800M-Instruct</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">50.42</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">73.01</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">52.19</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">49.71</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">64.87</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">48.97</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">56.53</td> | |
</tr> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: #2D2D2D;">Granite-3.1-1B-A400M-Instruct</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">42.66</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">65.97</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">26.13</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">46.77</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">62.35</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">33.88</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">46.29</td> | |
</tr> | |
</tbody></table> | |
<table> | |
<caption><b>HuggingFace Open LLM Leaderboard V2</b></caption> | |
<thead> | |
<tr> | |
<th style="text-align:left; background-color: #001d6c; color: white;">Models</th> | |
<th style="text-align:center; background-color: #001d6c; color: white;">IFEval</th> | |
<th style="text-align:center; background-color: #001d6c; color: white;">BBH</th> | |
<th style="text-align:center; background-color: #001d6c; color: white;">MATH Lvl 5</th> | |
<th style="text-align:center; background-color: #001d6c; color: white;">GPQA</th> | |
<th style="text-align:center; background-color: #001d6c; color: white;">MUSR</th> | |
<th style="text-align:center; background-color: #001d6c; color: white;">MMLU-Pro</th> | |
<th style="text-align:center; background-color: #001d6c; color: white;">Avg</th> | |
</tr></thead> | |
<tbody> | |
<tr> | |
<td style="text-align:left; background-color: #DAE8FF; color: black;">Granite-3.1-8B-Instruct</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">72.08</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">34.09</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">21.68</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">8.28</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">19.01</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">28.19</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">30.55</td> | |
</tr> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: #2D2D2D;">Granite-3.1-2B-Instruct</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">62.86</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">21.82</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">11.33</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">5.26</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">4.87</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">20.21</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">21.06</td> | |
</tr> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: #2D2D2D;">Granite-3.1-3B-A800M-Instruct</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">55.16</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">16.69</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">10.35</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">5.15</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">2.51</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">12.75</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">17.1</td> | |
</tr> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: #2D2D2D;">Granite-3.1-1B-A400M-Instruct</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">46.86</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">6.18</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">4.08</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">0</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">0.78</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">2.41</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">10.05</td> | |
</tr> | |
</tbody></table> | |
**Model Architecture:** | |
Granite-3.1-8B-Instruct is based on a decoder-only dense transformer architecture. Core components of this architecture are: GQA and RoPE, MLP with SwiGLU, RMSNorm, and shared input/output embeddings. | |
<table> | |
<thead> | |
<tr> | |
<th style="text-align:left; background-color: #001d6c; color: white;">Model</th> | |
<th style="text-align:center; background-color: #001d6c; color: white;">2B Dense</th> | |
<th style="text-align:center; background-color: #001d6c; color: white;">8B Dense</th> | |
<th style="text-align:center; background-color: #001d6c; color: white;">1B MoE</th> | |
<th style="text-align:center; background-color: #001d6c; color: white;">3B MoE</th> | |
</tr></thead> | |
<tbody> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: black;">Embedding size</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">2048</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">4096</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">1024</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">1536</td> | |
</tr> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: black;">Number of layers</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">40</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">40</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">24</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">32</td> | |
</tr> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: black;">Attention head size</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">64</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">128</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">64</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">64</td> | |
</tr> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: black;">Number of attention heads</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">32</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">32</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">16</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">24</td> | |
</tr> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: black;">Number of KV heads</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">8</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">8</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">8</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">8</td> | |
</tr> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: black;">MLP hidden size</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">8192</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">12800</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">512</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">512</td> | |
</tr> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: black;">MLP activation</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">SwiGLU</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">SwiGLU</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">SwiGLU</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">SwiGLU</td> | |
</tr> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: black;">Number of experts</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">—</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">—</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">32</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">40</td> | |
</tr> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: black;">MoE TopK</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">—</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">—</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">8</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">8</td> | |
</tr> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: black;">Initialization std</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">0.1</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">0.1</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">0.1</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">0.1</td> | |
</tr> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: black;">Sequence length</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">128K</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">128K</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">128K</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">128K</td> | |
</tr> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: black;">Position embedding</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">RoPE</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">RoPE</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">RoPE</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">RoPE</td> | |
</tr> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: black;"># Parameters</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">2.5B</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">8.1B</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">1.3B</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">3.3B</td> | |
</tr> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: black;"># Active parameters</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">2.5B</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">8.1B</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">400M</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">800M</td> | |
</tr> | |
<tr> | |
<td style="text-align:left; background-color: #FFFFFF; color: black;"># Training tokens</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">12T</td> | |
<td style="text-align:center; background-color: #DAE8FF; color: black;">12T</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">10T</td> | |
<td style="text-align:center; background-color: #FFFFFF; color: black;">10T</td> | |
</tr> | |
</tbody></table> | |
**Training Data:** | |
Overall, our SFT data is largely comprised of three key sources: (1) publicly available datasets with permissive license, (2) internal synthetic data targeting specific capabilities including long-context tasks, and (3) very small amounts of human-curated data. A detailed attribution of datasets can be found in the [Granite 3.0 Technical Report](https://github.com/ibm-granite/granite-3.0-language-models/blob/main/paper.pdf), [Granite 3.1 Technical Report (coming soon)](https://huggingface.co/collections/ibm-granite/granite-31-language-models-6751dbbf2f3389bec5c6f02d), and [Accompanying Author List](https://github.com/ibm-granite/granite-3.0-language-models/blob/main/author-ack.pdf). | |
**Infrastructure:** | |
We train Granite 3.1 Language Models using IBM's super computing cluster, Blue Vela, which is outfitted with NVIDIA H100 GPUs. This cluster provides a scalable and efficient infrastructure for training our models over thousands of GPUs. | |
**Ethical Considerations and Limitations:** | |
Granite 3.1 Instruct Models are primarily finetuned using instruction-response pairs mostly in English, but also multilingual data covering eleven languages. Although this model can handle multilingual dialog use cases, its performance might not be similar to English tasks. In such case, introducing a small number of examples (few-shot) can help the model in generating more accurate outputs. While this model has been aligned by keeping safety in consideration, the model may in some cases produce inaccurate, biased, or unsafe responses to user prompts. So we urge the community to use this model with proper safety testing and tuning tailored for their specific tasks. | |
**Resources** | |
- ⭐️ Learn about the latest updates with Granite: https://www.ibm.com/granite | |
- 📄 Get started with tutorials, best practices, and prompt engineering advice: https://www.ibm.com/granite/docs/ | |
- 💡 Learn about the latest Granite learning resources: https://ibm.biz/granite-learning-resources | |
<!-- ## Citation | |
``` | |
@misc{granite-models, | |
author = {author 1, author2, ...}, | |
title = {}, | |
journal = {}, | |
volume = {}, | |
year = {2024}, | |
url = {https://arxiv.org/abs/0000.00000}, | |
} | |
``` --> | |
language: ["ar", "cs", "de", "en", "es", "fr", "it", "ja", "ko", "nl", "pt", "zh"] | |
license: apache-2.0 | |
licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt | |
maturity: Generally Available | |
libraryName: transformers | |
baseModel: | |
- repository: rhelai1 | |
name: granite-8b-code-base | |
labels: | |
- language | |
- granite-3.1 | |
tasks: | |
- text-generation | |
createTimeSinceEpoch: 1733514949000 | |
lastUpdateTimeSinceEpoch: 1734637721000 | |
artifacts: | |
- protocol: oci | |
createTimeSinceEpoch: 1733514949000 | |
tags: ["1.3.0"] | |
uri: oci://registry.redhat.io/rhelai1/granite-8b-code-instruct:1.3-1732870892 | |
- repository: rhelai1 | |
name: granite-8b-lab-v2-preview | |
provider: Red Hat | |
description: A derivative of granite-8b-base trained with the LAB methodology | |
longDescription: |- | |
LAB: Large-scale Alignment for chatBots is a novel synthetic data-based | |
alignment tuning method for LLMs from IBM Research. | |
Granite-8b-lab-v2-preview is a Granite-8b-base derivative model trained | |
with the LAB methodology, using Mixtral-8x7b-Instruct as a teacher model. | |
logo:  | |
language: ["ar", "cs", "de", "en", "es", "fr", "it", "ja", "ko", "nl", "pt", "zh"] | |
license: apache-2.0 | |
licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt | |
maturity: Generally Available | |
libraryName: transformers | |
baseModel: | |
- repository: rhelai1 | |
name: granite-8b-code-base | |
labels: | |
- language | |
- granite-3.1 | |
- lab-base | |
tasks: | |
- text-generation | |
createTimeSinceEpoch: 1732870892000 | |
lastUpdateTimeSinceEpoch: 1732870892000 | |
artifacts: | |
- protocol: oci | |
createTimeSinceEpoch: 1732870892000 | |
tags: ["1.3.0"] | |
uri: registry.redhat.io/rhelai1/granite-8b-lab-v2-preview:1.3-1732870892 | |
- repository: rhelai1 | |
name: granite-8b-starter-v1 | |
provider: Red Hat | |
description: Custom Red Hat phase00 tuned base model. | |
longDescription: |- | |
A custom Red Hat base model instruct tuned only for phase 00, produced by IBM Research specifically for RHEL AI. | |
logo:  | |
language: ["ar", "cs", "de", "en", "es", "fr", "it", "ja", "ko", "nl", "pt", "zh"] | |
license: apache-2.0 | |
licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt | |
maturity: Generally Available | |
libraryName: transformers | |
labels: | |
- language | |
- granite-3.1 | |
- lab-base | |
tasks: | |
- text-generation | |
createTimeSinceEpoch: 1732870892000 | |
lastUpdateTimeSinceEpoch: 1732870892000 | |
artifacts: | |
- protocol: oci | |
createTimeSinceEpoch: 1732870892000 | |
tags: ["1.3.0"] | |
uri: registry.redhat.io/rhelai1/granite-8b-starter-v1:1.3-1732870892 | |
- repository: rhelai1 | |
name: granite-8b-lab-v1 | |
provider: Red Hat | |
description: A derivative of granite-8b-base trained with the LAB methodology | |
longDescription: |- | |
LAB: Large-scale Alignment for chatBots is a novel synthetic data-based | |
alignment tuning method for LLMs from IBM Research. Granite-8b-lab-v1 is | |
a Granite-8b-base derivative model trained with the LAB methodology, | |
using Mixtral-8x7b-Instruct as a teacher model. | |
logo:  | |
language: ["ar", "cs", "de", "en", "es", "fr", "it", "ja", "ko", "nl", "pt", "zh"] | |
license: apache-2.0 | |
licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt | |
maturity: Generally Available | |
libraryName: transformers | |
baseModel: | |
- repository: rhelai1 | |
name: granite-8b-code-base | |
labels: | |
- language | |
- granite-3.1 | |
tasks: | |
- text-generation | |
createTimeSinceEpoch: 1732870892000 | |
lastUpdateTimeSinceEpoch: 1732870892000 | |
artifacts: | |
- protocol: oci | |
createTimeSinceEpoch: 1732870892000 | |
tags: ["1.3.0"] | |
uri: registry.redhat.io/rhelai1/granite-8b-lab-v1:1.3-1732870892 | |
- repository: rhelai1 | |
name: granite-7b-starter | |
provider: Red Hat | |
description: Custom Red Hat phase00 tuned base model. | |
longDescription: |- | |
A custom Red Hat base model instruct tuned only for phase 00, produced by IBM Research specifically for RHEL AI. | |
logo:  | |
language: ["en"] | |
license: apache-2.0 | |
licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt | |
maturity: Generally Available | |
libraryName: transformers | |
labels: | |
- lab-base | |
tasks: | |
- text-generation | |
createTimeSinceEpoch: 1732870892000 | |
lastUpdateTimeSinceEpoch: 1732870892000 | |
artifacts: | |
- protocol: oci | |
createTimeSinceEpoch: 1732870892000 | |
tags: ["1.3.0"] | |
uri: registry.redhat.io/rhelai1/granite-7b-starter:1.3-1732870892 | |
- repository: rhelai1 | |
name: granite-7b-redhat | |
provider: Red Hat | |
description: A derivative of granite-7b-base trained with the LAB methodology | |
longDescription: |- | |
LAB: Large-scale Alignment for chatBots is a novel synthetic data-based | |
alignment tuning method for LLMs from IBM Research. Granite-7b-lab is a | |
Granite-7b-base derivative model trained with the LAB methodology, using | |
Mixtral-8x7b-Instruct as a teacher model. | |
logo:  | |
language: ["en"] | |
license: apache-2.0 | |
licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt | |
maturity: Generally Available | |
libraryName: transformers | |
labels: | |
- language | |
tasks: | |
- text-generation | |
createTimeSinceEpoch: 1732870892000 | |
lastUpdateTimeSinceEpoch: 1732870892000 | |
artifacts: | |
- protocol: oci | |
createTimeSinceEpoch: 1732870892000 | |
tags: ["1.3.0"] | |
uri: registry.redhat.io/rhelai1/granite-7b-redhat-lab:1.3-1732870892 | |
- repository: rhelai1 | |
name: mixtral-8x7b-instruct-v0-1 | |
description: Teacher and critic model for running Synthetic data generation (SDG) | |
maturity: General availability | |
provider: Red Hat | |
logo:  | |
labels: | |
- lab-teacher | |
artifacts: | |
- protocol: oci | |
tags: ["1.4"] | |
uri: registry.redhat.io/rhelai1/mixtral-8x7b-instruct-v0-1:1.4 | |
- repository: rhelai1 | |
name: prometheus-8x7b-v2-0 | |
description: Judge model for multi-phase training and evaluation | |
maturity: General availability | |
provider: Red Hat | |
logo:  | |
labels: | |
- lab-judge | |
artifacts: | |
- protocol: oci | |
tags: ["1.4"] | |
uri: registry.redhat.io/rhelai1/prometheus-8x7b-v2-0:1.4 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
source: Red Hat | |
models: | |
- repository: rhelai1 | |
name: granite-8b-code-base | |
provider: Red Hat | |
description: A decoder-only code model designed for code generative tasks | |
longDescription: |- | |
Granite-8B-Code-Base is a decoder-only code model designed for code | |
generative tasks (e.g., code generation, code explanation, code fixing, | |
etc.). It is trained from scratch with a two-phase training strategy. In | |
phase 1, our model is trained on 4 trillion tokens sourced from 116 | |
programming languages, ensuring a comprehensive understanding of | |
programming languages and syntax. In phase 2, our model is trained on 500 | |
billion tokens with a carefully designed mixture of high-quality data from | |
code and natural language domains to improve the models’ ability to reason | |
and follow instructions. | |
readme: |- | |
# Granite-3.1-8B-Base | |
**Model Summary:** | |
Granite-3.1-8B-Base extends the context length of Granite-3.0-8B-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K. This long-context pre-training stage was performed using approximately 500B tokens. | |
... | |
logo:  | |
language: ["ar", "cs", "de", "en", "es", "fr", "it", "ja", "ko", "nl", "pt", "zh"] | |
license: apache-2.0 | |
licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt | |
maturity: Generally Available | |
libraryName: transformers | |
labels: | |
- language | |
- granite-3.1 | |
- lab-base | |
tasks: | |
- text-generation | |
createTimeSinceEpoch: 1733514949000 | |
lastUpdateTimeSinceEpoch: 1734637721000 | |
artifacts: | |
- protocol: oci | |
createTimeSinceEpoch: 1733514949000 | |
tags: ["1.3.0"] | |
uri: oci://registry.redhat.io/rhelai1/granite-8b-code-base:1.3-1732870892 | |
- repository: rhelai1 | |
name: granite-8b-code-instruct | |
provider: Red Hat | |
description: A fine-tuned model based on Granite 8B Code Base | |
longDescription: |- | |
Granite-8B-Code-Instruct is a 8B parameter model fine tuned from | |
Granite-8B-Code-Base on a combination of permissively licensed instruction | |
data to enhance instruction following capabilities including logical | |
reasoning and problem-solving skills. | |
logo:  | |
readme: |- | |
# Granite-3.1-8B-Instruct | |
**Model Summary:** | |
Granite-3.1-8B-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-8B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems. This model is developed using a diverse set of techniques with a structured chat format, including supervised finetuning, model alignment using reinforcement learning, and model merging. | |
... | |
language: ["ar", "cs", "de", "en", "es", "fr", "it", "ja", "ko", "nl", "pt", "zh"] | |
license: apache-2.0 | |
licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt | |
maturity: Generally Available | |
libraryName: transformers | |
baseModel: | |
- repository: rhelai1 | |
name: granite-8b-code-base | |
labels: | |
- language | |
- granite-3.1 | |
tasks: | |
- text-generation | |
createTimeSinceEpoch: 1733514949000 | |
lastUpdateTimeSinceEpoch: 1734637721000 | |
artifacts: | |
- protocol: oci | |
createTimeSinceEpoch: 1733514949000 | |
tags: ["1.3.0"] | |
uri: oci://registry.redhat.io/rhelai1/granite-8b-code-instruct:1.3-1732870892 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
source: Red Hat | |
models: | |
- repository: rhelai1 | |
name: granite-7b-starter | |
description: Base model for customizing and fine-tuning | |
longDescription: >- | |
A custom Red Hat base model instruct tuned only for phase 00, produced by IBM Research specifically for RHEL AI. | |
license: apache-2.0 | |
licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt | |
maturity: General availability | |
provider: Red Hat | |
logo:  | |
labels: | |
- lab-base | |
tasks: | |
- text-generation | |
languages: | |
- en | |
readme: |- | |
# Model Card for Granite-7b-starter [Paper](https://arxiv.org/abs/2403.01081) | |
### Overview | |
Granite-7b-starter is a starting student model built for InstructLab, based on Granite-7b-base. This model can be used to create LAB models via InstructLab. | |
### Method | |
LAB: **L**arge-scale **A**lignment for chat**B**ots is a novel synthetic data-based alignment tuning method for LLMs from IBM Research. Granite-7b-starter is a Granite-7b-base derivative model for training with the LAB methodology, using Mixtral-8x7b-Instruct as a teacher model. | |
LAB consists of three key components: | |
1. Taxonomy-driven data curation process | |
2. Large-scale synthetic data generator | |
3. Two-phased-training with replay buffers | |
LAB approach allows for adding new knowledge and skills, in an incremental fashion, to an already pre-trained model without suffering from catastrophic forgetting. | |
Taxonomy is a tree of seed examples that are used to prompt a teacher model to generate synthetic data. Taxonomy allows the data curator or the model designer to easily specify a diverse set of the knowledge-domains and skills that they would like to include in their LLM. At a high level, these can be categorized into three high-level bins - knowledge, foundational skills, and compositional skills. The leaf nodes of the taxonomy are tasks associated with one or more seed examples. | |
During the synthetic data generation, **unlike previous approaches where seed examples are uniformly drawn from the entire pool (i.e. self-instruct), we use the taxonomy to drive the sampling process**: For each knowledge/skill, we only use the local examples within the leaf node as seeds to prompt the teacher model. | |
This makes the teacher model better exploit the task distributions defined by the local examples of each node and the diversity in the taxonomy itself ensures the entire generation covers a wide range of tasks, as illustrated below. In turns, this allows for using Mixtral 8x7B as the teacher model for generation while performing very competitively with models such as ORCA-2, WizardLM, and Zephyr Beta that rely on synthetic data generated by much larger and capable models like GPT-4. | |
For adding new domain-specific knowledge, we provide an external knowledge source (document) and prompt the model to generate questions and answers based on the document. | |
Foundational skills such as reasoning and compositional skills such as creative writing are generated through in-context learning using the seed examples from the taxonomy. | |
Additionally, to ensure the data is high-quality and safe, we employ steps to check the questions and answers to ensure that they are grounded and safe. This is done using the same teacher model that generated the data. | |
Our training consists of two major phases: knowledge tuning and skills tuning. | |
There are two steps in knowledge tuning where the first step learns simple knowledge (short samples) and the second step learns complicated knowledge (longer samples). | |
The second step uses replay a replay buffer with data from the first step. | |
Both foundational skills and compositional skills are learned during the skills tuning phases, where a replay buffer of data from the knowledge phase is used. | |
Importantly, we use a set of hyper-parameters for training that are very different from standard small-scale supervised fine-training: larger batch size and carefully optimized learning rate and scheduler. | |
## Model description | |
- **Model Name**: Granite-7b-starter | |
- **Language(s):** Primarily English | |
- **License:** Apache 2.0 | |
- **Base model:** [ibm/granite-7b-base](https://huggingface.co/ibm/granite-7b-base) | |
- **Teacher Model:** [mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1) | |
## Prompt Template | |
```python | |
sys_prompt = "I am, Red Hat® Starter Model based on Granite 7B, an AI language model developed by Red Hat and IBM Research, based on the Granite-7b-base language model. My primary function is to provide a base model for InstructLab model alignment based on the LAB (Large-scale Alignment for chatBots) methodology." | |
prompt = f'<|system|>\n{sys_prompt}\n<|user|>\n{inputs}\n<|assistant|>\n' | |
stop_token = '<|endoftext|>' | |
``` | |
We advise utilizing the system prompt employed during the model's training for optimal inference performance, as there could be performance variations based on the provided instructions. | |
**Bias, Risks, and Limitations** | |
Granite-7b-starter is a base model and has not undergone any safety alignment, there it may produce problematic outputs. In the absence of adequate safeguards and RLHF, there exists a risk of malicious utilization of these models for generating disinformation or harmful content. Caution is urged against complete reliance on a specific language model for crucial decisions or impactful information, as preventing these models from fabricating content is not straightforward. Additionally, it remains uncertain whether smaller models might exhibit increased susceptibility to hallucination in ungrounded generation scenarios due to their reduced sizes and memorization capacities. This aspect is currently an active area of research, and we anticipate more rigorous exploration, comprehension, and mitigations in this domain. | |
createTimeSinceEpoch: 1739210683000 | |
lastUpdateTimeSinceEpoch: 1739210683000 | |
artifacts: | |
- protocol: oci | |
tags: ["1.4.0"] | |
uri: oci://registry.redhat.io/rhelai1/modelcar-granite-7b-starter:1.4.0 | |
- repository: rhelai1 | |
name: granite-7b-redhat-lab | |
description: Granite model for inference serving | |
longDescription: >- | |
LAB: Large-scale Alignment for chatBots is a novel synthetic data-based alignment tuning method for LLMs from IBM Research. Granite-7b-lab is a Granite-7b-base derivative model trained with the LAB methodology, using Mixtral-8x7b-Instruct as a teacher model. | |
license: apache-2.0 | |
licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt | |
maturity: General availability | |
provider: Red Hat | |
logo:  | |
createTimeSinceEpoch: 1739210683000 | |
lastUpdateTimeSinceEpoch: 1739210683000 | |
tasks: | |
- text-generation | |
languages: | |
- en | |
readme: |- | |
# Model Card for Granite-7b-redhat-lab [Paper](https://arxiv.org/abs/2403.01081) | |
### Overview | |
Granite-7b-redhat-lab is an instruction-tuned LAB model built via InstructLab, based on Granite-7b-base | |
### Method | |
LAB: **L**arge-scale **A**lignment for chat**B**ots is a novel synthetic data-based alignment tuning method for LLMs from IBM Research. Granite-7b-redhat-lab is a Granite-7b-base derivative model trained with the LAB methodology, using Mixtral-8x7b-Instruct as a teacher model. | |
LAB consists of three key components: | |
1. Taxonomy-driven data curation process | |
2. Large-scale synthetic data generator | |
3. Two-phased-training with replay buffers | |
LAB approach allows for adding new knowledge and skills, in an incremental fashion, to an already pre-trained model without suffering from catastrophic forgetting. | |
Taxonomy is a tree of seed examples that are used to prompt a teacher model to generate synthetic data. Taxonomy allows the data curator or the model designer to easily specify a diverse set of the knowledge-domains and skills that they would like to include in their LLM. At a high level, these can be categorized into three high-level bins - knowledge, foundational skills, and compositional skills. The leaf nodes of the taxonomy are tasks associated with one or more seed examples. | |
During the synthetic data generation, **unlike previous approaches where seed examples are uniformly drawn from the entire pool (i.e. self-instruct), we use the taxonomy to drive the sampling process**: For each knowledge/skill, we only use the local examples within the leaf node as seeds to prompt the teacher model. | |
This makes the teacher model better exploit the task distributions defined by the local examples of each node and the diversity in the taxonomy itself ensures the entire generation covers a wide range of tasks, as illustrated below. In turns, this allows for using Mixtral 8x7B as the teacher model for generation while performing very competitively with models such as ORCA-2, WizardLM, and Zephyr Beta that rely on synthetic data generated by much larger and capable models like GPT-4. | |
For adding new domain-specific knowledge, we provide an external knowledge source (document) and prompt the model to generate questions and answers based on the document. | |
Foundational skills such as reasoning and compositional skills such as creative writing are generated through in-context learning using the seed examples from the taxonomy. | |
Additionally, to ensure the data is high-quality and safe, we employ steps to check the questions and answers to ensure that they are grounded and safe. This is done using the same teacher model that generated the data. | |
Our training consists of two major phases: knowledge tuning and skills tuning. | |
There are two steps in knowledge tuning where the first step learns simple knowledge (short samples) and the second step learns complicated knowledge (longer samples). | |
The second step uses replay a replay buffer with data from the first step. | |
Both foundational skills and compositional skills are learned during the skills tuning phases, where a replay buffer of data from the knowledge phase is used. | |
Importantly, we use a set of hyper-parameters for training that are very different from standard small-scale supervised fine-training: larger batch size and carefully optimized learning rate and scheduler. | |
## Model description | |
- **Model Name**: Granite-7b-redhat-lab | |
- **Language(s):** Primarily English | |
- **License:** Apache 2.0 | |
- **Base model:** [ibm/granite-7b-base](https://huggingface.co/ibm/granite-7b-base) | |
- **Teacher Model:** [mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1) | |
## Prompt Template | |
```python | |
sys_prompt = "I am, Red Hat® Instruct Model based on Granite 7B, an AI language model developed by Red Hat and IBM Research, based on the Granite-7b-base language model. My primary function is to be a chat assistant." | |
prompt = f'<|system|>\n{sys_prompt}\n<|user|>\n{inputs}\n<|assistant|>\n' | |
stop_token = '<|endoftext|>' | |
``` | |
We advise utilizing the system prompt employed during the model's training for optimal inference performance, as there could be performance variations based on the provided instructions. | |
**Bias, Risks, and Limitations** | |
Granite-7b-redhat-lab is a base model and has not undergone any safety alignment, there it may produce problematic outputs. In the absence of adequate safeguards and RLHF, there exists a risk of malicious utilization of these models for generating disinformation or harmful content. Caution is urged against complete reliance on a specific language model for crucial decisions or impactful information, as preventing these models from fabricating content is not straightforward. Additionally, it remains uncertain whether smaller models might exhibit increased susceptibility to hallucination in ungrounded generation scenarios due to their reduced sizes and memorization capacities. This aspect is currently an active area of research, and we anticipate more rigorous exploration, comprehension, and mitigations in this domain. | |
artifacts: | |
- protocol: oci | |
tags: ["1.4.0"] | |
uri: oci://registry.redhat.io/rhelai1/modelcar-granite-7b-redhat-lab:1.4.0 | |
- repository: rhelai1 | |
name: granite-8b-starter-v1 | |
description: Base model for customizing and fine-tuning | |
longDescription: >- | |
A custom Red Hat base model instruct tuned only for phase 00, produced by IBM Research specifically for RHEL AI. | |
license: apache-2.0 | |
licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt | |
maturity: General availability | |
provider: Red Hat | |
logo:  | |
labels: | |
- lab-base | |
tasks: | |
- text-generation | |
languages: | |
- en | |
readme: |- | |
# Model Card for Granite-8b-starter-v1 [Paper](https://arxiv.org/abs/2403.01081) | |
### Overview | |
Granite-8b-starter-v1 is a starting student model built for InstructLab, based on Granite-3.0-8b-base. This model can be used to create LAB models via InstructLab. | |
### Method | |
LAB: **L**arge-scale **A**lignment for chat**B**ots is a novel synthetic data-based alignment tuning method for LLMs from IBM Research. Granite-8b-starter-v1 is a Granite-3.0-8b-base derivative model for training with the LAB methodology, using Mixtral-8x7b-Instruct as a teacher model. | |
LAB consists of three key components: | |
1. Taxonomy-driven data curation process | |
2. Large-scale synthetic data generator | |
3. Two-phased-training with replay buffers | |
LAB approach allows for adding new knowledge and skills, in an incremental fashion, to an already pre-trained model without suffering from catastrophic forgetting. | |
Taxonomy is a tree of seed examples that are used to prompt a teacher model to generate synthetic data. Taxonomy allows the data curator or the model designer to easily specify a diverse set of the knowledge-domains and skills that they would like to include in their LLM. At a high level, these can be categorized into three high-level bins - knowledge, foundational skills, and compositional skills. The leaf nodes of the taxonomy are tasks associated with one or more seed examples. | |
During the synthetic data generation, **unlike previous approaches where seed examples are uniformly drawn from the entire pool (i.e. self-instruct), we use the taxonomy to drive the sampling process**: For each knowledge/skill, we only use the local examples within the leaf node as seeds to prompt the teacher model. | |
This makes the teacher model better exploit the task distributions defined by the local examples of each node and the diversity in the taxonomy itself ensures the entire generation covers a wide range of tasks, as illustrated below. In turns, this allows for using Mixtral 8x7B as the teacher model for generation while performing very competitively with models such as ORCA-2, WizardLM, and Zephyr Beta that rely on synthetic data generated by much larger and capable models like GPT-4. | |
For adding new domain-specific knowledge, we provide an external knowledge source (document) and prompt the model to generate questions and answers based on the document. | |
Foundational skills such as reasoning and compositional skills such as creative writing are generated through in-context learning using the seed examples from the taxonomy. | |
Additionally, to ensure the data is high-quality and safe, we employ steps to check the questions and answers to ensure that they are grounded and safe. This is done using the same teacher model that generated the data. | |
Our training consists of two major phases: knowledge tuning and skills tuning. | |
There are two steps in knowledge tuning where the first step learns simple knowledge (short samples) and the second step learns complicated knowledge (longer samples). | |
The second step uses replay a replay buffer with data from the first step. | |
Both foundational skills and compositional skills are learned during the skills tuning phases, where a replay buffer of data from the knowledge phase is used. | |
Importantly, we use a set of hyper-parameters for training that are very different from standard small-scale supervised fine-training: larger batch size and carefully optimized learning rate and scheduler. | |
## Model description | |
- **Model Name**: Granite-8b-starter-v1 | |
- **Language(s):** Primarily English | |
- **License:** Apache 2.0 | |
- **Base model:** [ibm-granite/granite-3.0-8b-base](https://huggingface.co/ibm-granite/granite-3.0-8b-base) | |
- **Teacher Model:** [mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1) | |
## Prompt Template | |
```python | |
sys_prompt = "I am a Red Hat® Starter Model, an AI language model developed by Red Hat and IBM Research based on the granite-3.0-8b-base model. My primary function is to provide a base model for InstructLab model alignment based on the LAB (Large-scale Alignment for chatBots) methodology." | |
prompt = f'<|system|>\n{sys_prompt}\n<|user|>\n{inputs}\n<|assistant|>\n' | |
stop_token = '<|endoftext|>' | |
``` | |
We advise utilizing the system prompt employed during the model's training for optimal inference performance, as there could be performance variations based on the provided instructions. | |
**Bias, Risks, and Limitations** | |
Granite-8b-starter-v1 is a base model and has not undergone any safety alignment, there it may produce problematic outputs. In the absence of adequate safeguards and RLHF, there exists a risk of malicious utilization of these models for generating disinformation or harmful content. Caution is urged against complete reliance on a specific language model for crucial decisions or impactful information, as preventing these models from fabricating content is not straightforward. Additionally, it remains uncertain whether smaller models might exhibit increased susceptibility to hallucination in ungrounded generation scenarios due to their reduced sizes and memorization capacities. This aspect is currently an active area of research, and we anticipate more rigorous exploration, comprehension, and mitigations in this domain. | |
createTimeSinceEpoch: 1739210683000 | |
lastUpdateTimeSinceEpoch: 1739210683000 | |
artifacts: | |
- protocol: oci | |
tags: ["1.4.0"] | |
uri: oci://registry.redhat.io/rhelai1/modelcar-granite-8b-starter-v1:1.4.0 | |
- repository: rhelai1 | |
name: granite-8b-lab-v1 | |
description: Granite model for inference serving | |
longDescription: >- | |
LAB: Large-scale Alignment for chatBots is a novel synthetic data-based alignment tuning method for LLMs from IBM Research. Granite-8b-lab-v1 is a Granite-8b-base derivative model trained with the LAB methodology, using Mixtral-8x7b-Instruct as a teacher model. | |
license: apache-2.0 | |
licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt | |
maturity: General availability | |
provider: Red Hat | |
logo:  | |
tasks: | |
- text-generation | |
languages: | |
- en | |
readme: |- | |
# Model Card for Granite-8b-lab-v1 [Paper](https://arxiv.org/abs/2403.01081) | |
### Overview | |
Granite-8b-lab-v1 is an instruction-tuned LAB model built via InstructLab, based on Granite-3.0-8b-base | |
### Method | |
LAB: **L**arge-scale **A**lignment for chat**B**ots is a novel synthetic data-based alignment tuning method for LLMs from IBM Research. Granite-8b-lab-v1 is a Granite-3.0-8b-base derivative model trained with the LAB methodology, using Mixtral-8x7b-Instruct as a teacher model. | |
LAB consists of three key components: | |
1. Taxonomy-driven data curation process | |
2. Large-scale synthetic data generator | |
3. Two-phased-training with replay buffers | |
LAB approach allows for adding new knowledge and skills, in an incremental fashion, to an already pre-trained model without suffering from catastrophic forgetting. | |
Taxonomy is a tree of seed examples that are used to prompt a teacher model to generate synthetic data. Taxonomy allows the data curator or the model designer to easily specify a diverse set of the knowledge-domains and skills that they would like to include in their LLM. At a high level, these can be categorized into three high-level bins - knowledge, foundational skills, and compositional skills. The leaf nodes of the taxonomy are tasks associated with one or more seed examples. | |
During the synthetic data generation, **unlike previous approaches where seed examples are uniformly drawn from the entire pool (i.e. self-instruct), we use the taxonomy to drive the sampling process**: For each knowledge/skill, we only use the local examples within the leaf node as seeds to prompt the teacher model. | |
This makes the teacher model better exploit the task distributions defined by the local examples of each node and the diversity in the taxonomy itself ensures the entire generation covers a wide range of tasks, as illustrated below. In turns, this allows for using Mixtral 8x7B as the teacher model for generation while performing very competitively with models such as ORCA-2, WizardLM, and Zephyr Beta that rely on synthetic data generated by much larger and capable models like GPT-4. | |
For adding new domain-specific knowledge, we provide an external knowledge source (document) and prompt the model to generate questions and answers based on the document. | |
Foundational skills such as reasoning and compositional skills such as creative writing are generated through in-context learning using the seed examples from the taxonomy. | |
Additionally, to ensure the data is high-quality and safe, we employ steps to check the questions and answers to ensure that they are grounded and safe. This is done using the same teacher model that generated the data. | |
Our training consists of two major phases: knowledge tuning and skills tuning. | |
There are two steps in knowledge tuning where the first step learns simple knowledge (short samples) and the second step learns complicated knowledge (longer samples). | |
The second step uses replay a replay buffer with data from the first step. | |
Both foundational skills and compositional skills are learned during the skills tuning phases, where a replay buffer of data from the knowledge phase is used. | |
Importantly, we use a set of hyper-parameters for training that are very different from standard small-scale supervised fine-training: larger batch size and carefully optimized learning rate and scheduler. | |
## Model description | |
- **Model Name**: Granite-8b-lab-v1 | |
- **Language(s):** Primarily English | |
- **License:** Apache 2.0 | |
- **Base model:** [ibm-granite/granite-3.0-8b-base](https://huggingface.co/ibm-granite/granite-3.0-8b-base) | |
- **Teacher Model:** [mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1) | |
## Prompt Template | |
```python | |
sys_prompt = "I am a Red Hat® Instruct Model, an AI language model developed by Red Hat and IBM Research based on the granite-3.0-8b-base model. My primary role is to serve as a chat assistant." | |
prompt = f'<|system|>\n{sys_prompt}\n<|user|>\n{inputs}\n<|assistant|>\n' | |
stop_token = '<|endoftext|>' | |
``` | |
We advise utilizing the system prompt employed during the model's training for optimal inference performance, as there could be performance variations based on the provided instructions. | |
**Bias, Risks, and Limitations** | |
Granite-8b-lab-v1 is a base model and has not undergone any safety alignment, there it may produce problematic outputs. In the absence of adequate safeguards and RLHF, there exists a risk of malicious utilization of these models for generating disinformation or harmful content. Caution is urged against complete reliance on a specific language model for crucial decisions or impactful information, as preventing these models from fabricating content is not straightforward. Additionally, it remains uncertain whether smaller models might exhibit increased susceptibility to hallucination in ungrounded generation scenarios due to their reduced sizes and memorization capacities. This aspect is currently an active area of research, and we anticipate more rigorous exploration, comprehension, and mitigations in this domain. | |
createTimeSinceEpoch: 1739210683000 | |
lastUpdateTimeSinceEpoch: 1739210683000 | |
artifacts: | |
- protocol: oci | |
tags: ["1.4.0"] | |
uri: oci://registry.redhat.io/rhelai1/modelcar-granite-8b-lab-v1:1.4.0 | |
- repository: rhelai1 | |
name: granite-8b-lab-v2-preview | |
description: Preview of the version 2 8b Granite model for inference serving | |
longDescription: >- | |
LAB: Large-scale Alignment for chatBots is a novel synthetic data-based alignment tuning method for LLMs from IBM Research. Granite-8b-lab-v2-preview is a Granite-8b-base derivative model trained with the LAB methodology, using Mixtral-8x7b-Instruct as a teacher model. | |
license: apache-2.0 | |
licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt | |
maturity: General availability | |
provider: Red Hat | |
logo:  | |
createTimeSinceEpoch: 1739210683000 | |
lastUpdateTimeSinceEpoch: 1739210683000 | |
artifacts: | |
- protocol: oci | |
tags: ["1.4.0"] | |
uri: oci://registry.redhat.io/rhelai1/modelcar-granite-8b-lab-v2-preview:1.4.0 | |
- repository: rhelai1 | |
name: granite-3.1-8b-starter-v1 | |
description: Version 1 of the Granite 3.1 base model for customizing and fine-tuning | |
longDescription: >- | |
A custom Red Hat base model instruct tuned only for phase 00, produced by IBM Research specifically for RHEL AI. | |
license: apache-2.0 | |
licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt | |
maturity: General availability | |
provider: Red Hat | |
logo:  | |
tasks: | |
- text-generation | |
languages: | |
- en | |
readme: |- | |
# Model Card for Granite-8b-starter-v1 [Paper](https://arxiv.org/abs/2403.01081) | |
### Overview | |
Granite-8b-starter-v1 is a starting student model built for InstructLab, based on Granite-3.0-8b-base. This model can be used to create LAB models via InstructLab. | |
### Method | |
LAB: **L**arge-scale **A**lignment for chat**B**ots is a novel synthetic data-based alignment tuning method for LLMs from IBM Research. Granite-8b-starter-v1 is a Granite-3.0-8b-base derivative model for training with the LAB methodology, using Mixtral-8x7b-Instruct as a teacher model. | |
LAB consists of three key components: | |
1. Taxonomy-driven data curation process | |
2. Large-scale synthetic data generator | |
3. Two-phased-training with replay buffers | |
LAB approach allows for adding new knowledge and skills, in an incremental fashion, to an already pre-trained model without suffering from catastrophic forgetting. | |
Taxonomy is a tree of seed examples that are used to prompt a teacher model to generate synthetic data. Taxonomy allows the data curator or the model designer to easily specify a diverse set of the knowledge-domains and skills that they would like to include in their LLM. At a high level, these can be categorized into three high-level bins - knowledge, foundational skills, and compositional skills. The leaf nodes of the taxonomy are tasks associated with one or more seed examples. | |
During the synthetic data generation, **unlike previous approaches where seed examples are uniformly drawn from the entire pool (i.e. self-instruct), we use the taxonomy to drive the sampling process**: For each knowledge/skill, we only use the local examples within the leaf node as seeds to prompt the teacher model. | |
This makes the teacher model better exploit the task distributions defined by the local examples of each node and the diversity in the taxonomy itself ensures the entire generation covers a wide range of tasks, as illustrated below. In turns, this allows for using Mixtral 8x7B as the teacher model for generation while performing very competitively with models such as ORCA-2, WizardLM, and Zephyr Beta that rely on synthetic data generated by much larger and capable models like GPT-4. | |
For adding new domain-specific knowledge, we provide an external knowledge source (document) and prompt the model to generate questions and answers based on the document. | |
Foundational skills such as reasoning and compositional skills such as creative writing are generated through in-context learning using the seed examples from the taxonomy. | |
Additionally, to ensure the data is high-quality and safe, we employ steps to check the questions and answers to ensure that they are grounded and safe. This is done using the same teacher model that generated the data. | |
Our training consists of two major phases: knowledge tuning and skills tuning. | |
There are two steps in knowledge tuning where the first step learns simple knowledge (short samples) and the second step learns complicated knowledge (longer samples). | |
The second step uses replay a replay buffer with data from the first step. | |
Both foundational skills and compositional skills are learned during the skills tuning phases, where a replay buffer of data from the knowledge phase is used. | |
Importantly, we use a set of hyper-parameters for training that are very different from standard small-scale supervised fine-training: larger batch size and carefully optimized learning rate and scheduler. | |
## Model description | |
- **Model Name**: Granite-8b-starter-v1 | |
- **Language(s):** Primarily English | |
- **License:** Apache 2.0 | |
- **Base model:** [ibm-granite/granite-3.0-8b-base](https://huggingface.co/ibm-granite/granite-3.0-8b-base) | |
- **Teacher Model:** [mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1) | |
## Prompt Template | |
```python | |
sys_prompt = "I am a Red Hat® Starter Model, an AI language model developed by Red Hat and IBM Research based on the granite-3.0-8b-base model. My primary function is to provide a base model for InstructLab model alignment based on the LAB (Large-scale Alignment for chatBots) methodology." | |
prompt = f'<|system|>\n{sys_prompt}\n<|user|>\n{inputs}\n<|assistant|>\n' | |
stop_token = '<|endoftext|>' | |
``` | |
We advise utilizing the system prompt employed during the model's training for optimal inference performance, as there could be performance variations based on the provided instructions. | |
**Bias, Risks, and Limitations** | |
Granite-8b-starter-v1 is a base model and has not undergone any safety alignment, there it may produce problematic outputs. In the absence of adequate safeguards and RLHF, there exists a risk of malicious utilization of these models for generating disinformation or harmful content. Caution is urged against complete reliance on a specific language model for crucial decisions or impactful information, as preventing these models from fabricating content is not straightforward. Additionally, it remains uncertain whether smaller models might exhibit increased susceptibility to hallucination in ungrounded generation scenarios due to their reduced sizes and memorization capacities. This aspect is currently an active area of research, and we anticipate more rigorous exploration, comprehension, and mitigations in this domain. | |
createTimeSinceEpoch: 1739210683000 | |
lastUpdateTimeSinceEpoch: 1739210683000 | |
artifacts: | |
- protocol: oci | |
tags: ["1.4.0"] | |
uri: oci://registry.redhat.io/rhelai1/modelcar-granite-3-1-8b-starter-v1:1.4.0 | |
- repository: rhelai1 | |
name: granite-3.1-8b-lab-v1 | |
description: Version 1 of the Granite 3.1 model for inference serving | |
longDescription: >- | |
LAB: Large-scale Alignment for chatBots is a novel synthetic data-based alignment tuning method for LLMs from IBM Research. Granite-8b-lab-v1 is a Granite-8b-base derivative model trained with the LAB methodology, using Mixtral-8x7b-Instruct as a teacher model. | |
license: apache-2.0 | |
licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt | |
maturity: General availability | |
provider: Red Hat | |
logo:  | |
tasks: | |
- text-generation | |
languages: | |
- en | |
readme: |- | |
# Model Card for Granite-8b-lab-v1 [Paper](https://arxiv.org/abs/2403.01081) | |
### Overview | |
Granite-8b-lab-v1 is an instruction-tuned LAB model built via InstructLab, based on Granite-3.0-8b-base | |
### Method | |
LAB: **L**arge-scale **A**lignment for chat**B**ots is a novel synthetic data-based alignment tuning method for LLMs from IBM Research. Granite-8b-lab-v1 is a Granite-3.0-8b-base derivative model trained with the LAB methodology, using Mixtral-8x7b-Instruct as a teacher model. | |
LAB consists of three key components: | |
1. Taxonomy-driven data curation process | |
2. Large-scale synthetic data generator | |
3. Two-phased-training with replay buffers | |
LAB approach allows for adding new knowledge and skills, in an incremental fashion, to an already pre-trained model without suffering from catastrophic forgetting. | |
Taxonomy is a tree of seed examples that are used to prompt a teacher model to generate synthetic data. Taxonomy allows the data curator or the model designer to easily specify a diverse set of the knowledge-domains and skills that they would like to include in their LLM. At a high level, these can be categorized into three high-level bins - knowledge, foundational skills, and compositional skills. The leaf nodes of the taxonomy are tasks associated with one or more seed examples. | |
During the synthetic data generation, **unlike previous approaches where seed examples are uniformly drawn from the entire pool (i.e. self-instruct), we use the taxonomy to drive the sampling process**: For each knowledge/skill, we only use the local examples within the leaf node as seeds to prompt the teacher model. | |
This makes the teacher model better exploit the task distributions defined by the local examples of each node and the diversity in the taxonomy itself ensures the entire generation covers a wide range of tasks, as illustrated below. In turns, this allows for using Mixtral 8x7B as the teacher model for generation while performing very competitively with models such as ORCA-2, WizardLM, and Zephyr Beta that rely on synthetic data generated by much larger and capable models like GPT-4. | |
For adding new domain-specific knowledge, we provide an external knowledge source (document) and prompt the model to generate questions and answers based on the document. | |
Foundational skills such as reasoning and compositional skills such as creative writing are generated through in-context learning using the seed examples from the taxonomy. | |
Additionally, to ensure the data is high-quality and safe, we employ steps to check the questions and answers to ensure that they are grounded and safe. This is done using the same teacher model that generated the data. | |
Our training consists of two major phases: knowledge tuning and skills tuning. | |
There are two steps in knowledge tuning where the first step learns simple knowledge (short samples) and the second step learns complicated knowledge (longer samples). | |
The second step uses replay a replay buffer with data from the first step. | |
Both foundational skills and compositional skills are learned during the skills tuning phases, where a replay buffer of data from the knowledge phase is used. | |
Importantly, we use a set of hyper-parameters for training that are very different from standard small-scale supervised fine-training: larger batch size and carefully optimized learning rate and scheduler. | |
## Model description | |
- **Model Name**: Granite-8b-lab-v1 | |
- **Language(s):** Primarily English | |
- **License:** Apache 2.0 | |
- **Base model:** [ibm-granite/granite-3.0-8b-base](https://huggingface.co/ibm-granite/granite-3.0-8b-base) | |
- **Teacher Model:** [mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1) | |
## Prompt Template | |
```python | |
sys_prompt = "I am a Red Hat® Instruct Model, an AI language model developed by Red Hat and IBM Research based on the granite-3.0-8b-base model. My primary role is to serve as a chat assistant." | |
prompt = f'<|system|>\n{sys_prompt}\n<|user|>\n{inputs}\n<|assistant|>\n' | |
stop_token = '<|endoftext|>' | |
``` | |
We advise utilizing the system prompt employed during the model's training for optimal inference performance, as there could be performance variations based on the provided instructions. | |
**Bias, Risks, and Limitations** | |
Granite-8b-lab-v1 is a base model and has not undergone any safety alignment, there it may produce problematic outputs. In the absence of adequate safeguards and RLHF, there exists a risk of malicious utilization of these models for generating disinformation or harmful content. Caution is urged against complete reliance on a specific language model for crucial decisions or impactful information, as preventing these models from fabricating content is not straightforward. Additionally, it remains uncertain whether smaller models might exhibit increased susceptibility to hallucination in ungrounded generation scenarios due to their reduced sizes and memorization capacities. This aspect is currently an active area of research, and we anticipate more rigorous exploration, comprehension, and mitigations in this domain. | |
createTimeSinceEpoch: 1739210683000 | |
lastUpdateTimeSinceEpoch: 1739210683000 | |
artifacts: | |
- protocol: oci | |
tags: ["1.4.0"] | |
uri: oci://registry.redhat.io/rhelai1/modelcar-granite-3-1-8b-lab-v1:1.4.0 | |
- repository: rhelai1 | |
name: granite-8b-code-instruct | |
description: LAB fine-tuned granite code model for inference serving | |
longDescription: >- | |
Granite-8B-Code-Instruct is a 8B parameter model fine tuned from Granite-8B-Code-Base on a combination of permissively licensed instruction data to enhance instruction following capabilities including logical reasoning and problem-solving skills. | |
license: apache-2.0 | |
licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt | |
maturity: Technology preview | |
provider: Red Hat | |
logo:  | |
readme: |- | |
 | |
# Granite-8B-Code-Instruct-4K | |
## Model Summary | |
**Granite-8B-Code-Instruct-4K** is a 8B parameter model fine tuned from *Granite-8B-Code-Base-4K* on a combination of **permissively licensed** instruction data to enhance instruction following capabilities including logical reasoning and problem-solving skills. | |
- **Developers:** IBM Research | |
- **GitHub Repository:** [ibm-granite/granite-code-models](https://github.com/ibm-granite/granite-code-models) | |
- **Paper:** [Granite Code Models: A Family of Open Foundation Models for Code Intelligence](https://arxiv.org/abs/2405.04324) | |
- **Release Date**: May 6th, 2024 | |
- **License:** [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0). | |
## Usage | |
### Intended use | |
The model is designed to respond to coding related instructions and can be used to build coding assistants. | |
<!-- TO DO: Check starcoder2 instruct code example that includes the template https://huggingface.co/bigcode/starcoder2-15b-instruct-v0.1 --> | |
### Generation | |
This is a simple example of how to use **Granite-8B-Code-Instruct-4K** model. | |
```python | |
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
device = "cuda" # or "cpu" | |
model_path = "ibm-granite/granite-8b-code-instruct-4k" | |
tokenizer = AutoTokenizer.from_pretrained(model_path) | |
# drop device_map if running on CPU | |
model = AutoModelForCausalLM.from_pretrained(model_path, device_map=device) | |
model.eval() | |
# change input text as desired | |
chat = [ | |
{ "role": "user", "content": "Write a code to find the maximum value in a list of numbers." }, | |
] | |
chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True) | |
# tokenize the text | |
input_tokens = tokenizer(chat, return_tensors="pt") | |
# transfer tokenized inputs to the device | |
for i in input_tokens: | |
input_tokens[i] = input_tokens[i].to(device) | |
# generate output tokens | |
output = model.generate(**input_tokens, max_new_tokens=100) | |
# decode output tokens into text | |
output = tokenizer.batch_decode(output) | |
# loop over the batch to print, in this example the batch size is 1 | |
for i in output: | |
print(i) | |
``` | |
<!-- TO DO: Check this part --> | |
## Training Data | |
Granite Code Instruct models are trained on the following types of data. | |
* Code Commits Datasets: we sourced code commits data from the [CommitPackFT](https://huggingface.co/datasets/bigcode/commitpackft) dataset, a filtered version of the full CommitPack dataset. From CommitPackFT dataset, we only consider data for 92 programming languages. Our inclusion criteria boils down to selecting programming languages common across CommitPackFT and the 116 languages that we considered to pretrain the code-base model (*Granite-8B-Code-Base*). | |
* Math Datasets: We consider two high-quality math datasets, [MathInstruct](https://huggingface.co/datasets/TIGER-Lab/MathInstruct) and [MetaMathQA](https://huggingface.co/datasets/meta-math/MetaMathQA). Due to license issues, we filtered out GSM8K-RFT and Camel-Math from MathInstruct dataset. | |
* Code Instruction Datasets: We use [Glaive-Code-Assistant-v3](https://huggingface.co/datasets/glaiveai/glaive-code-assistant-v3), [Glaive-Function-Calling-v2](https://huggingface.co/datasets/glaiveai/glaive-function-calling-v2), [NL2SQL11](https://huggingface.co/datasets/bugdaryan/sql-create-context-instruction) and a small collection of synthetic API calling datasets. | |
* Language Instruction Datasets: We include high-quality datasets such as [HelpSteer](https://huggingface.co/datasets/nvidia/HelpSteer) and an open license-filtered version of [Platypus](https://huggingface.co/datasets/garage-bAInd/Open-Platypus). We also include a collection of hardcoded prompts to ensure our model generates correct outputs given inquiries about its name or developers. | |
## Infrastructure | |
We train the Granite Code models using two of IBM's super computing clusters, namely Vela and Blue Vela, both outfitted with NVIDIA A100 and H100 GPUs respectively. These clusters provide a scalable and efficient infrastructure for training our models over thousands of GPUs. | |
## Ethical Considerations and Limitations | |
Granite code instruct models are primarily finetuned using instruction-response pairs across a specific set of programming languages. Thus, their performance may be limited with out-of-domain programming languages. In this situation, it is beneficial providing few-shot examples to steer the model's output. Moreover, developers should perform safety testing and target-specific tuning before deploying these models on critical applications. The model also inherits ethical considerations and limitations from its base model. For more information, please refer to *[Granite-8B-Code-Base-4K](https://huggingface.co/ibm-granite/granite-8b-code-base-4k)* model card. | |
createTimeSinceEpoch: 1739210683000 | |
lastUpdateTimeSinceEpoch: 1739210683000 | |
artifacts: | |
- protocol: oci | |
tags: ["1.4.0"] | |
uri: oci://registry.redhat.io/rhelai1/modelcar-granite-8b-code-instruct:1.4.0 | |
- repository: rhelai1 | |
name: granite-8b-code-base | |
description: Granite code model for inference serving | |
longDescription: >- | |
Granite-8B-Code-Base is a decoder-only code model designed for code generative tasks (e.g., code generation, code explanation, code fixing, etc.). It is trained from scratch with a two-phase training strategy. In phase 1, our model is trained on 4 trillion tokens sourced from 116 programming languages, ensuring a comprehensive understanding of programming languages and syntax. In phase 2, our model is trained on 500 billion tokens with a carefully designed mixture of high-quality data from code and natural language domains to improve the models’ ability to reason and follow instructions. | |
license: apache-2.0 | |
licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt | |
maturity: Technology preview | |
provider: Red Hat | |
logo:  | |
readme: |- | |
 | |
# Granite-8B-Code-Base-4K | |
## Model Summary | |
**Granite-8B-Code-Base-4K** is a decoder-only code model designed for code generative tasks (e.g., code generation, code explanation, code fixing, etc.). It is trained from scratch with a two-phase training strategy. In phase 1, our model is trained on 4 trillion tokens sourced from 116 programming languages, ensuring a comprehensive understanding of programming languages and syntax. In phase 2, our model is trained on 500 billion tokens with a carefully designed mixture of high-quality data from code and natural language domains to improve the models’ ability to reason and follow instructions. | |
- **Developers:** IBM Research | |
- **GitHub Repository:** [ibm-granite/granite-code-models](https://github.com/ibm-granite/granite-code-models) | |
- **Paper:** [Granite Code Models: A Family of Open Foundation Models for Code Intelligence](https://arxiv.org/abs/2405.04324) | |
- **Release Date**: May 6th, 2024 | |
- **License:** [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0). | |
## Usage | |
### Intended use | |
Prominent enterprise use cases of LLMs in software engineering productivity include code generation, code explanation, code fixing, generating unit tests, generating documentation, addressing technical debt issues, vulnerability detection, code translation, and more. All Granite Code Base models, including the **8B parameter model**, are able to handle these tasks as they were trained on a large amount of code data from 116 programming languages. | |
### Generation | |
This is a simple example of how to use **Granite-8B-Code-Base-4K** model. | |
```python | |
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
device = "cuda" # or "cpu" | |
model_path = "ibm-granite/granite-8b-code-base-4k" | |
tokenizer = AutoTokenizer.from_pretrained(model_path) | |
# drop device_map if running on CPU | |
model = AutoModelForCausalLM.from_pretrained(model_path, device_map=device) | |
model.eval() | |
# change input text as desired | |
input_text = "def generate():" | |
# tokenize the text | |
input_tokens = tokenizer(input_text, return_tensors="pt") | |
# transfer tokenized inputs to the device | |
for i in input_tokens: | |
input_tokens[i] = input_tokens[i].to(device) | |
# generate output tokens | |
output = model.generate(**input_tokens) | |
# decode output tokens into text | |
output = tokenizer.batch_decode(output) | |
# loop over the batch to print, in this example the batch size is 1 | |
for i in output: | |
print(i) | |
``` | |
## Training Data | |
- **Data Collection and Filtering:** Pretraining code data is sourced from a combination of publicly available datasets (e.g., [GitHub Code Clean](https://huggingface.co/datasets/codeparrot/github-code-clean), [Starcoder data](https://huggingface.co/datasets/bigcode/starcoderdata)), and additional public code repositories and issues from GitHub. We filter raw data to retain a list of 116 programming languages. After language filtering, we also filter out low-quality code. | |
- **Exact and Fuzzy Deduplication:** We adopt an aggressive deduplication strategy that includes both exact and fuzzy deduplication to remove documents having (near) identical code content. | |
- **HAP, PII, Malware Filtering:** We apply a HAP content filter that reduces models' likelihood of generating hateful, abusive, or profane language. We also make sure to redact Personally Identifiable Information (PII) by replacing PII content (e.g., names, email addresses, keys, passwords) with corresponding tokens (e.g., ⟨NAME⟩, ⟨EMAIL⟩, ⟨KEY⟩, ⟨PASSWORD⟩). Moreover, we scan all datasets using [ClamAV](https://www.clamav.net/) to identify and remove instances of malware in the source code. | |
- **Natural Language Datasets:** In addition to collecting code data for model training, we curate several publicly available high-quality natural language datasets to improve models' proficiency in language understanding and mathematical reasoning. Unlike the code data, we do not deduplicate these datasets. | |
## Infrastructure | |
We train the Granite Code models using two of IBM's super computing clusters, namely Vela and Blue Vela, both outfitted with NVIDIA A100 and H100 GPUs respectively. These clusters provide a scalable and efficient infrastructure for training our models over thousands of GPUs. | |
## Ethical Considerations and Limitations | |
The use of Large Language Models involves risks and ethical considerations people must be aware of. Regarding code generation, caution is urged against complete reliance on specific code models for crucial decisions or impactful information as the generated code is not guaranteed to work as intended. **Granite-8B-Code-Base-4K** model is not the exception in this regard. Even though this model is suited for multiple code-related tasks, it has not undergone any safety alignment, there it may produce problematic outputs. Additionally, it remains uncertain whether smaller models might exhibit increased susceptibility to hallucination in generation scenarios by copying source code verbatim from the training dataset due to their reduced sizes and memorization capacities. This aspect is currently an active area of research, and we anticipate more rigorous exploration, comprehension, and mitigations in this domain. Regarding ethics, a latent risk associated with all Large Language Models is their malicious utilization. We urge the community to use **Granite-8B-Code-Base-4K** model with ethical intentions and in a responsible way. | |
createTimeSinceEpoch: 1739210683000 | |
lastUpdateTimeSinceEpoch: 1739210683000 | |
artifacts: | |
- protocol: oci | |
tags: ["1.4.0"] | |
uri: oci://registry.redhat.io/rhelai1/modelcar-granite-8b-code-base:1.4.0 | |
- repository: rhelai1 | |
name: mixtral-8x7b-instruct-v0-1 | |
description: Teacher and critic model for running Synthetic data generation (SDG) | |
license: apache-2.0 | |
licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt | |
maturity: General availability | |
provider: Red Hat | |
logo:  | |
labels: | |
- lab-teacher | |
readme: |- | |
# Model Card for Mixtral-8x7B | |
### Tokenization with `mistral-common` | |
```py | |
from mistral_common.tokens.tokenizers.mistral import MistralTokenizer | |
from mistral_common.protocol.instruct.messages import UserMessage | |
from mistral_common.protocol.instruct.request import ChatCompletionRequest | |
mistral_models_path = "MISTRAL_MODELS_PATH" | |
tokenizer = MistralTokenizer.v1() | |
completion_request = ChatCompletionRequest(messages=[UserMessage(content="Explain Machine Learning to me in a nutshell.")]) | |
tokens = tokenizer.encode_chat_completion(completion_request).tokens | |
``` | |
## Inference with `mistral_inference` | |
```py | |
from mistral_inference.transformer import Transformer | |
from mistral_inference.generate import generate | |
model = Transformer.from_folder(mistral_models_path) | |
out_tokens, _ = generate([tokens], model, max_tokens=64, temperature=0.0, eos_id=tokenizer.instruct_tokenizer.tokenizer.eos_id) | |
result = tokenizer.decode(out_tokens[0]) | |
print(result) | |
``` | |
## Inference with hugging face `transformers` | |
```py | |
from transformers import AutoModelForCausalLM | |
model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1") | |
model.to("cuda") | |
generated_ids = model.generate(tokens, max_new_tokens=1000, do_sample=True) | |
# decode with mistral tokenizer | |
result = tokenizer.decode(generated_ids[0].tolist()) | |
print(result) | |
``` | |
> [!TIP] | |
> PRs to correct the transformers tokenizer so that it gives 1-to-1 the same results as the mistral-common reference implementation are very welcome! | |
--- | |
The Mixtral-8x7B Large Language Model (LLM) is a pretrained generative Sparse Mixture of Experts. The Mixtral-8x7B outperforms Llama 2 70B on most benchmarks we tested. | |
For full details of this model please read our [release blog post](https://mistral.ai/news/mixtral-of-experts/). | |
## Warning | |
This repo contains weights that are compatible with [vLLM](https://github.com/vllm-project/vllm) serving of the model as well as Hugging Face [transformers](https://github.com/huggingface/transformers) library. It is based on the original Mixtral [torrent release](magnet:?xt=urn:btih:5546272da9065eddeb6fcd7ffddeef5b75be79a7&dn=mixtral-8x7b-32kseqlen&tr=udp%3A%2F%http://2Fopentracker.i2p.rocks%3A6969%2Fannounce&tr=http%3A%2F%http://2Ftracker.openbittorrent.com%3A80%2Fannounce), but the file format and parameter names are different. Please note that model cannot (yet) be instantiated with HF. | |
## Instruction format | |
This format must be strictly respected, otherwise the model will generate sub-optimal outputs. | |
The template used to build a prompt for the Instruct model is defined as follows: | |
``` | |
<s> [INST] Instruction [/INST] Model answer</s> [INST] Follow-up instruction [/INST] | |
``` | |
Note that `<s>` and `</s>` are special tokens for beginning of string (BOS) and end of string (EOS) while [INST] and [/INST] are regular strings. | |
As reference, here is the pseudo-code used to tokenize instructions during fine-tuning: | |
```python | |
def tokenize(text): | |
return tok.encode(text, add_special_tokens=False) | |
[BOS_ID] + | |
tokenize("[INST]") + tokenize(USER_MESSAGE_1) + tokenize("[/INST]") + | |
tokenize(BOT_MESSAGE_1) + [EOS_ID] + | |
… | |
tokenize("[INST]") + tokenize(USER_MESSAGE_N) + tokenize("[/INST]") + | |
tokenize(BOT_MESSAGE_N) + [EOS_ID] | |
``` | |
In the pseudo-code above, note that the `tokenize` method should not add a BOS or EOS token automatically, but should add a prefix space. | |
In the Transformers library, one can use [chat templates](https://huggingface.co/docs/transformers/main/en/chat_templating) which make sure the right format is applied. | |
## Run the model | |
```python | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1" | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto") | |
messages = [ | |
{"role": "user", "content": "What is your favourite condiment?"}, | |
{"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"}, | |
{"role": "user", "content": "Do you have mayonnaise recipes?"} | |
] | |
inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda") | |
outputs = model.generate(inputs, max_new_tokens=20) | |
print(tokenizer.decode(outputs[0], skip_special_tokens=True)) | |
``` | |
By default, transformers will load the model in full precision. Therefore you might be interested to further reduce down the memory requirements to run the model through the optimizations we offer in HF ecosystem: | |
### In half-precision | |
Note `float16` precision only works on GPU devices | |
<details> | |
<summary> Click to expand </summary> | |
```diff | |
+ import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1" | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
+ model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto") | |
messages = [ | |
{"role": "user", "content": "What is your favourite condiment?"}, | |
{"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"}, | |
{"role": "user", "content": "Do you have mayonnaise recipes?"} | |
] | |
input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda") | |
outputs = model.generate(input_ids, max_new_tokens=20) | |
print(tokenizer.decode(outputs[0], skip_special_tokens=True)) | |
``` | |
</details> | |
### Lower precision using (8-bit & 4-bit) using `bitsandbytes` | |
<details> | |
<summary> Click to expand </summary> | |
```diff | |
+ import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1" | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
+ model = AutoModelForCausalLM.from_pretrained(model_id, load_in_4bit=True, device_map="auto") | |
text = "Hello my name is" | |
messages = [ | |
{"role": "user", "content": "What is your favourite condiment?"}, | |
{"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"}, | |
{"role": "user", "content": "Do you have mayonnaise recipes?"} | |
] | |
input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda") | |
outputs = model.generate(input_ids, max_new_tokens=20) | |
print(tokenizer.decode(outputs[0], skip_special_tokens=True)) | |
``` | |
</details> | |
### Load the model with Flash Attention 2 | |
<details> | |
<summary> Click to expand </summary> | |
```diff | |
+ import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1" | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
+ model = AutoModelForCausalLM.from_pretrained(model_id, use_flash_attention_2=True, device_map="auto") | |
messages = [ | |
{"role": "user", "content": "What is your favourite condiment?"}, | |
{"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"}, | |
{"role": "user", "content": "Do you have mayonnaise recipes?"} | |
] | |
input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda") | |
outputs = model.generate(input_ids, max_new_tokens=20) | |
print(tokenizer.decode(outputs[0], skip_special_tokens=True)) | |
``` | |
</details> | |
## Limitations | |
The Mixtral-8x7B Instruct model is a quick demonstration that the base model can be easily fine-tuned to achieve compelling performance. | |
It does not have any moderation mechanisms. We're looking forward to engaging with the community on ways to | |
make the model finely respect guardrails, allowing for deployment in environments requiring moderated outputs. | |
# The Mistral AI Team | |
Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Blanche Savary, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Emma Bou Hanna, Florian Bressand, Gianna Lengyel, Guillaume Bour, Guillaume Lample, Lélio Renard Lavaud, Louis Ternon, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Théophile Gervet, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed. | |
createTimeSinceEpoch: 1743078742000 | |
lastUpdateTimeSinceEpoch: 1743078742000 | |
artifacts: | |
- protocol: oci | |
tags: ["1.4"] | |
uri: oci://registry.redhat.io/rhelai1/modelcar-mixtral-8x7b-instruct-v0-1:1.4 | |
- repository: rhelai1 | |
name: prometheus-8x7b-v2-0 | |
description: Judge model for multi-phase training and evaluation | |
longDescription: >- | |
Prometheus 2 is a language model using Mistral-Instruct as a base model. It is fine-tuned on 100K feedback within the Feedback Collection and 200K feedback within the Preference Collection. It is also made by weight merging to support both absolute grading (direct assessment) and relative grading (pairwise ranking). | |
license: apache-2.0 | |
licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt | |
maturity: General availability | |
provider: Red Hat | |
logo:  | |
labels: | |
- lab-judge | |
readme: |- | |
## Links for Reference | |
- **Homepage: In Progress** | |
- **Repository:https://github.com/prometheus-eval/prometheus-eval** | |
- **Paper:https://arxiv.org/abs/2405.01535** | |
- **Point of Contact:[email protected]** | |
# TL;DR | |
Prometheus 2 is an alternative of GPT-4 evaluation when doing fine-grained evaluation of an underlying LLM & a Reward model for Reinforcement Learning from Human Feedback (RLHF). | |
 | |
Prometheus 2 is a language model using [Mistral-Instruct](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) as a base model. | |
It is fine-tuned on 100K feedback within the [Feedback Collection](https://huggingface.co/datasets/prometheus-eval/Feedback-Collection) and 200K feedback within the [Preference Collection](https://huggingface.co/datasets/prometheus-eval/Preference-Collection). | |
It is also made by weight merging to support both absolute grading (direct assessment) and relative grading (pairwise ranking). | |
The surprising thing is that we find weight merging also improves performance on each format. | |
# Model Details | |
## Model Description | |
- **Model type:** Language model | |
- **Language(s) (NLP):** English | |
- **License:** Apache 2.0 | |
- **Related Models:** [All Prometheus Checkpoints](https://huggingface.co/models?search=prometheus-eval/Prometheus) | |
- **Resources for more information:** | |
- [Research paper](https://arxiv.org/abs/2405.01535) | |
- [GitHub Repo](https://github.com/prometheus-eval/prometheus-eval) | |
Prometheus is trained with two different sizes (7B and 8x7B). | |
You could check the 7B sized LM on [this page](https://huggingface.co/prometheus-eval/prometheus-2-7b-v2.0). | |
Also, check out our dataset as well on [this page](https://huggingface.co/datasets/prometheus-eval/Feedback-Collection) and [this page](https://huggingface.co/datasets/prometheus-eval/Preference-Collection). | |
## Prompt Format | |
We have made wrapper functions and classes to conveniently use Prometheus 2 at [our github repository](https://github.com/prometheus-eval/prometheus-eval). | |
We highly recommend you use it! | |
However, if you just want to use the model for your use case, please refer to the prompt format below. | |
Note that absolute grading and relative grading requires different prompt templates and system prompts. | |
### Absolute Grading (Direct Assessment) | |
Prometheus requires 4 components in the input: An instruction, a response to evaluate, a score rubric, and a reference answer. You could refer to the prompt format below. | |
You should fill in the instruction, response, reference answer, criteria description, and score description for score in range of 1 to 5. | |
Fix the components with \{text\} inside. | |
``` | |
###Task Description: | |
An instruction (might include an Input inside it), a response to evaluate, a reference answer that gets a score of 5, and a score rubric representing a evaluation criteria are given. | |
1. Write a detailed feedback that assess the quality of the response strictly based on the given score rubric, not evaluating in general. | |
2. After writing a feedback, write a score that is an integer between 1 and 5. You should refer to the score rubric. | |
3. The output format should look as follows: \"Feedback: (write a feedback for criteria) [RESULT] (an integer number between 1 and 5)\" | |
4. Please do not generate any other opening, closing, and explanations. | |
###The instruction to evaluate: | |
{orig_instruction} | |
###Response to evaluate: | |
{orig_response} | |
###Reference Answer (Score 5): | |
{orig_reference_answer} | |
###Score Rubrics: | |
[{orig_criteria}] | |
Score 1: {orig_score1_description} | |
Score 2: {orig_score2_description} | |
Score 3: {orig_score3_description} | |
Score 4: {orig_score4_description} | |
Score 5: {orig_score5_description} | |
###Feedback: | |
``` | |
After this, you should apply the conversation template of Mistral (not applying it might lead to unexpected behaviors). | |
You can find the conversation class at this [link](https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py). | |
``` | |
conv = get_conv_template("mistral") | |
conv.set_system_message("You are a fair judge assistant tasked with providing clear, objective feedback based on specific criteria, ensuring each assessment reflects the absolute standards set for performance.") | |
conv.append_message(conv.roles[0], dialogs['instruction']) | |
conv.append_message(conv.roles[1], None) | |
prompt = conv.get_prompt() | |
x = tokenizer(prompt,truncation=False) | |
``` | |
As a result, a feedback and score decision will be generated, divided by a separating phrase ```[RESULT]``` | |
### Relative Grading (Pairwise Ranking) | |
Prometheus requires 4 components in the input: An instruction, 2 responses to evaluate, a score rubric, and a reference answer. You could refer to the prompt format below. | |
You should fill in the instruction, 2 responses, reference answer, and criteria description. | |
Fix the components with \{text\} inside. | |
``` | |
###Task Description: | |
An instruction (might include an Input inside it), two responses to evaluate (denoted as Response A and Response B), a reference answer, and an evaluation criteria are given. | |
1. Write a detailed feedback that assess the quality of the two responses strictly based on the given evaluation criteria, not evaluating in general. | |
2. Make comparisons between Response A, Response B, and the Reference Answer. Instead of examining Response A and Response B separately, go straight to the point and mention about the commonalities and differences between them. | |
3. After writing the feedback, indicate the better response, either "A" or "B". | |
4. The output format should look as follows: "Feedback: (write a feedback for criteria) [RESULT] (Either "A" or "B")" | |
5. Please do not generate any other opening, closing, and explanations. | |
###Instruction: | |
{orig_instruction} | |
###Response A: | |
{orig_response_A} | |
###Response B: | |
{orig_response_B} | |
###Reference Answer: | |
{orig_reference_answer} | |
###Score Rubric: | |
{orig_criteria} | |
###Feedback: | |
``` | |
After this, you should apply the conversation template of Mistral (not applying it might lead to unexpected behaviors). | |
You can find the conversation class at this [link](https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py). | |
``` | |
conv = get_conv_template("mistral") | |
conv.set_system_message("You are a fair judge assistant assigned to deliver insightful feedback that compares individual performances, highlighting how each stands relative to others within the same cohort.") | |
conv.append_message(conv.roles[0], dialogs['instruction']) | |
conv.append_message(conv.roles[1], None) | |
prompt = conv.get_prompt() | |
x = tokenizer(prompt,truncation=False) | |
``` | |
As a result, a feedback and score decision will be generated, divided by a separating phrase ```[RESULT]``` | |
## License | |
Feedback Collection, Preference Collection, and Prometheus 2 are subject to OpenAI's Terms of Use for the generated data. If you suspect any violations, please reach out to us. | |
# Citation | |
If you find the following model helpful, please consider citing our paper! | |
**BibTeX:** | |
```bibtex | |
@misc{kim2023prometheus, | |
title={Prometheus: Inducing Fine-grained Evaluation Capability in Language Models}, | |
author={Seungone Kim and Jamin Shin and Yejin Cho and Joel Jang and Shayne Longpre and Hwaran Lee and Sangdoo Yun and Seongjin Shin and Sungdong Kim and James Thorne and Minjoon Seo}, | |
year={2023}, | |
eprint={2310.08491}, | |
archivePrefix={arXiv}, | |
primaryClass={cs.CL} | |
} | |
``` | |
```bibtex | |
@misc{kim2024prometheus, | |
title={Prometheus 2: An Open Source Language Model Specialized in Evaluating Other Language Models}, | |
author={Seungone Kim and Juyoung Suk and Shayne Longpre and Bill Yuchen Lin and Jamin Shin and Sean Welleck and Graham Neubig and Moontae Lee and Kyungjae Lee and Minjoon Seo}, | |
year={2024}, | |
eprint={2405.01535}, | |
archivePrefix={arXiv}, | |
primaryClass={cs.CL} | |
} | |
``` | |
createTimeSinceEpoch: 1743078706000 | |
lastUpdateTimeSinceEpoch: 1743078706000 | |
artifacts: | |
- protocol: oci | |
tags: ["1.4"] | |
uri: oci://registry.redhat.io/rhelai1/modelcar-prometheus-8x7b-v2-0:1.4 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment