pboyd · March 27, 2025 17:15
diff --git a/catalog.json b/catalog.json
 {
  "$id": "https://kubeflow.org/model-registry/catalog.yaml",
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "title": "Model Catalog",
  "type": "object",
  "properties": {
    "source": {
      "type": "string",
      "description": "The name of the catalog provider.",
      "example": "Red Hat"
    },
    "models": {
      "type": "array",
      "description": "List of models available in the catalog. `repository` and `name` are used\nto uniquely identify a model, and should be unique within the catalog.",
      "items": {
        "type": "object",
        "required": [
          "repository",
          "name"
        ],
        "properties": {
          "repository": {
            "type": "string",
            "description": "Name of the repository in the catalog.",
            "example": "ibm-granite"
          },
          "name": {
            "type": "string",
            "description": "Code name of the model.",
            "example": "granite-3.1-8b-base"
          },
          "provider": {
            "type": "string",
            "description": "Name of the organization or entity that provides the model.",
            "example": "IBM"
          },
          "description": {
            "type": "string",
            "description": "Short description of the model."
          },
          "longDescription": {
            "type": "string",
            "description": "Longer description of the model."
          },
          "logo": {
            "type": "string",
            "format": "uri",
            "description": "URL to the model's logo. A [data\nURL](https://developer.mozilla.org/en-US/docs/Web/URI/Schemes/data)\nis recommended."
          },
          "readme": {
            "type": "string",
            "description": "Model documentation in Markdown."
          },
          "language": {
            "type": "array",
            "description": "List of supported languages (https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes).",
            "items": {
              "type": "string"
            },
            "example": [
              "en",
              "es",
              "cz"
            ]
          },
          "license": {
            "type": "string",
            "description": "Short name of the model's license.",
            "example": "apache-2.0"
          },
          "licenseLink": {
            "type": "string",
            "format": "uri",
            "description": "URL to the license text."
          },
          "maturity": {
            "type": "string",
            "description": "Maturity level of the model.",
            "example": "Generally Available"
          },
          "libraryName": {
            "type": "string",
            "example": "transformers"
          },
          "baseModel": {
            "type": "array",
            "description": "Reference to the base model (if any).",
            "items": {
              "type": "object",
              "properties": {
                "catalog": {
                  "type": "string",
                  "description": "Name of the catalog for an external base model. Omit for\nmodels in the same catalog.",
                  "example": "huggingface.io"
                },
                "repository": {
                  "type": "string",
                  "description": "Name of the repository in an external catalog where the base\nmodel exists. Omit for models in the same catalog.",
                  "example": "ibm-granite"
                },
                "name": {
                  "type": "string",
                  "example": "granite-3.1-8b-base"
                }
              }
            }
          },
          "labels": {
            "type": "array",
            "description": "List of labels for categorization.",
            "example": [
              "language"
            ],
            "items": {
              "type": "string"
            }
          },
          "tasks": {
            "type": "array",
            "description": "List of tasks the model is designed for.",
            "items": {
              "type": "string"
            },
            "example": [
              "text-generation"
            ]
          },
          "createTimeSinceEpoch": {
            "description": "Creation time in milliseconds since epoch.",
            "type": "integer"
          },
          "lastUpdateTimeSinceEpoch": {
            "description": "Last update time in milliseconds since epoch.",
            "type": "integer"
          },
          "artifacts": {
            "type": "array",
            "description": "If a model has multiple versions, each version should have a\nseparate artifact.",
            "items": {
              "type": "object",
              "properties": {
                "createTimeSinceEpoch": {
                  "description": "Creation time in milliseconds since epoch.",
                  "type": "integer"
                },
                "protocol": {
                  "type": "string",
                  "description": "The protocol used to access the artifact (only `oci` for now).",
                  "enum": [
                    "oci"
                  ]
                },
                "tags": {
                  "type": "array",
                  "description": "List of tags for the artifact. These are for information\nonly. It is recommended that this list only include immutable\ntags (e.g. `1.2.3` instead of ephemeral/floating tags such as\n`1`, `1.2`, or `latest`).",
                  "example": [
                    "2.1.2"
                  ],
                  "items": {
                    "type": "string"
                  }
                },
                "uri": {
                  "type": "string",
                  "description": "Artifact URI."
                }
              }
            }
          }
        }
      }
    }
  }
 }
diff --git a/catalog.yaml b/catalog.yaml
 $id: https://kubeflow.org/model-registry/catalog.yaml
 $schema: https://json-schema.org/draft/2020-12/schema
 title: Model Catalog
 type: object
 properties:
  source:
    type: string
    description: The name of the catalog provider.
    example: Red Hat
  models:
    type: array
    description: |-
      List of models available in the catalog. `repository` and `name` are used
      to uniquely identify a model, and should be unique within the catalog.
    items:
      type: object
      required:
        - repository
        - name
      properties:
        repository:
          type: string
          description: Name of the repository in the catalog.
          example: ibm-granite
        name:
          type: string
          description: Code name of the model.
          example: granite-3.1-8b-base
        provider:
          type: string
          description: Name of the organization or entity that provides the model.
          example: IBM
        description:
          type: string
          description: Short description of the model.
        longDescription:
          type: string
          description: Longer description of the model.
        logo:
          type: string
          format: uri
          description: |-
            URL to the model's logo. A [data
            URL](https://developer.mozilla.org/en-US/docs/Web/URI/Schemes/data)
            is recommended.
        readme:
          type: string
          description: Model documentation in Markdown.
        language:
          type: array
          description: List of supported languages (https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes).
          items:
            type: string
          example:
            - en
            - es
            - cz
        license:
          type: string
          description: Short name of the model's license.
          example: apache-2.0
        licenseLink:
          type: string
          format: uri
          description: URL to the license text.
        maturity:
          type: string
          description: Maturity level of the model.
          example: Generally Available
        libraryName:
          type: string
          example: transformers
        baseModel:
          type: array
          description: Reference to the base model (if any).
          items:
            type: object
            properties:
              catalog:
                type: string
                description: |-
                  Name of the catalog for an external base model. Omit for
                  models in the same catalog.
                example: huggingface.io
              repository:
                type: string
                description: |-
                  Name of the repository in an external catalog where the base
                  model exists. Omit for models in the same catalog.
                example: ibm-granite
              name:
                type: string
                example: granite-3.1-8b-base
        labels:
          type: array
          description: List of labels for categorization.
          example:
            - language
          items:
            type: string
        tasks:
          type: array
          description: List of tasks the model is designed for.
          items:
            type: string
          example:
            - text-generation
        createTimeSinceEpoch:
          description: Creation time in milliseconds since epoch.
          type: integer
        lastUpdateTimeSinceEpoch:
          description: Last update time in milliseconds since epoch.
          type: integer
        artifacts:
          type: array
          description: |-
            If a model has multiple versions, each version should have a
            separate artifact.
          items:
            type: object
            properties:
              createTimeSinceEpoch:
                description: Creation time in milliseconds since epoch.
                type: integer
              protocol:
                type: string
                description: The protocol used to access the artifact (only `oci` for now).
                enum:
                  - oci
              tags:
                type: array
                description: |-
                  List of tags for the artifact. These are for information
                  only. It is recommended that this list only include immutable
                  tags (e.g. `1.2.3` instead of ephemeral/floating tags such as
                  `1`, `1.2`, or `latest`).
                example: ["2.1.2"]
                items:
                  type: string
              uri:
                type: string
                description: Artifact URI.
diff --git a/granite-rh-catalog.yaml b/granite-rh-catalog.yaml
 source: Red Hat
 models:
 - repository: rhelai1
  name: granite-8b-code-base
  provider: Red Hat
  description: A decoder-only code model designed for code generative tasks
  longDescription: |-
    Granite-8B-Code-Base is a decoder-only code model designed for code
    generative tasks (e.g., code generation, code explanation, code fixing,
    etc.). It is trained from scratch with a two-phase training strategy. In
    phase 1, our model is trained on 4 trillion tokens sourced from 116
    programming languages, ensuring a comprehensive understanding of
    programming languages and syntax. In phase 2, our model is trained on 500
    billion tokens with a carefully designed mixture of high-quality data from
    code and natural language domains to improve the models’ ability to reason
    and follow instructions.
  readme: |-
    # Granite-3.1-8B-Base

    **Model Summary:** 
    Granite-3.1-8B-Base extends the context length of Granite-3.0-8B-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K. This long-context pre-training stage was performed using approximately 500B tokens.

    - **Developers:** Granite Team, IBM
    - **GitHub Repository:** [ibm-granite/granite-3.1-language-models](https://github.com/ibm-granite/granite-3.1-language-models)
    - **Website**: [Granite Docs](https://www.ibm.com/granite/docs/)
    - **Paper:** [Granite 3.1 Language Models (coming soon)](https://huggingface.co/collections/ibm-granite/granite-31-language-models-6751dbbf2f3389bec5c6f02d) 
    - **Release Date**: December 18th, 2024
    - **License:** [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0)

    **Supported Languages:** 
    English, German, Spanish, French, Japanese, Portuguese, Arabic, Czech, Italian, Korean, Dutch, and Chinese. Users may finetune Granite 3.1 models for languages beyond these 12 languages.

    **Intended Use:**
    Prominent use cases of LLMs in text-to-text generation include summarization, text classification, extraction, question-answering, and other long-context tasks. All Granite Base models are able to handle these tasks as they were trained on a large amount of data from various domains. Moreover, they can serve as baseline to create specialized models for specific application scenarios.

    **Generation:** 
    This is a simple example of how to use Granite-3.1-8B-Base model.

    Install the following libraries:

    ```shell
    pip install torch torchvision torchaudio
    pip install accelerate
    pip install transformers
    ```
    Then, copy the code snippet below to run the example.

    ```python
    from transformers import AutoModelForCausalLM, AutoTokenizer
    device = "auto"
    model_path = "ibm-granite/granite-3.1-8B-base"
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    # drop device_map if running on CPU
    model = AutoModelForCausalLM.from_pretrained(model_path, device_map=device)
    model.eval()
    # change input text as desired
    input_text = "Where is the Thomas J. Watson Research Center located?"
    # tokenize the text
    input_tokens = tokenizer(input_text, return_tensors="pt").to(device)
    # generate output tokens
    output = model.generate(**input_tokens,
                            max_length=4000)
    # decode output tokens into text
    output = tokenizer.batch_decode(output)
    # print output
    print(output)
    ```

    **Evaluation Results:** 
    <table>
      <caption><b>HuggingFace Open LLM Leaderboard V1</b></caption>
    <thead>
      <tr>
        <th style="text-align:left; background-color: #001d6c; color: white;">Models</th>
        <th style="text-align:center; background-color: #001d6c; color: white;">ARC-Challenge</th>
        <th style="text-align:center; background-color: #001d6c; color: white;">Hellaswag</th>
        <th style="text-align:center; background-color: #001d6c; color: white;">MMLU</th>
        <th style="text-align:center; background-color: #001d6c; color: white;">TruthfulQA</th>
        <th style="text-align:center; background-color: #001d6c; color: white;">Winogrande</th>
        <th style="text-align:center; background-color: #001d6c; color: white;">GSM8K</th>
        <th style="text-align:center; background-color: #001d6c; color: white;">Avg</th>
      </tr></thead>
      <tbody>
      <tr>
        <td style="text-align:left; background-color: #DAE8FF; color: black;">Granite-3.1-8B-Base</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">63.99</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">83.27</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">63.45</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">51.29</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">78.92</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">60.19</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">66.85</td>
      </tr>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: #2D2D2D;">Granite-3.1-2B-Base</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">53.58</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">77.67</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">52.86</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">39.02</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">72.84</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">47.99</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">57.32</td>
      </tr>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: #2D2D2D;">Granite-3.1-3B-A800M-Base</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">50.76</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">74.45</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">48.31</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">39.91</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">69.29</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">40.56</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">53.88</td>
      </tr>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: #2D2D2D;">Granite-3.1-1B-A400M-Base</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">39.42</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">66.13</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">26.53</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">37.67</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">2.03</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">18.87</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">31.78</td>
      </tr>
    </tbody></table>

    <table>
      <caption><b>HuggingFace Open LLM Leaderboard V2</b></caption>
      <thead>
      <tr>
        <th style="text-align:left; background-color: #001d6c; color: white;">Models</th>
        <th style="text-align:center; background-color: #001d6c; color: white;">IFEval</th>
        <th style="text-align:center; background-color: #001d6c; color: white;">BBH</th>
        <th style="text-align:center; background-color: #001d6c; color: white;">MATH Lvl 5</th>
        <th style="text-align:center; background-color: #001d6c; color: white;">GPQA</th>
        <th style="text-align:center; background-color: #001d6c; color: white;">MUSR</th>
        <th style="text-align:center; background-color: #001d6c; color: white;">MMLU-Pro</th>
        <th style="text-align:center; background-color: #001d6c; color: white;">Avg</th>
      </tr></thead>
      <tbody>
      <tr>
        <td style="text-align:left; background-color: #DAE8FF; color: black;">Granite-3.1-8B-Base</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">42.21</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">26.02</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">9.52</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">9.51</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">8.36</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">24.8</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">20.07</td>
      </tr>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: #2D2D2D;">Granite-3.1-2B-Base</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">35.22</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">16.84</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">5.59</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">3.69</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">3.9</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">13.9</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">13.19</td>
      </tr>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: #2D2D2D;">Granite-3.1-3B-A800M-Base</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">29.96</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">11.91</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">4</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">3.69</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">1.11</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">8.81</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">9.91</td>
      </tr>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: #2D2D2D;">Granite-3.1-1B-A400M-Base</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">25.19</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">6.43</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">2.19</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">0.22</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">1.76</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">1.55</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">6.22</td>
      </tr>
    </tbody></table>

    **Model Architecture:** 
    Granite-3.1-8B-Base is based on a decoder-only dense transformer architecture. Core components of this architecture are: GQA and RoPE, MLP with SwiGLU, RMSNorm, and shared input/output embeddings.
    <table>
    <thead>
      <tr>
        <th style="text-align:left; background-color: #001d6c; color: white;">Model</th>
        <th style="text-align:center; background-color: #001d6c; color: white;">2B Dense</th>
        <th style="text-align:center; background-color: #001d6c; color: white;">8B Dense</th>
        <th style="text-align:center; background-color: #001d6c; color: white;">1B MoE</th>
        <th style="text-align:center; background-color: #001d6c; color: white;">3B MoE</th>
      </tr></thead>
    <tbody>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: black;">Embedding size</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">2048</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">4096</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">1024</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">1536</td>
      </tr>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: black;">Number of layers</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">40</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">40</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">24</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">32</td>
      </tr>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: black;">Attention head size</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">64</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">128</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">64</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">64</td>
      </tr>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: black;">Number of attention heads</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">32</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">32</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">16</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">24</td>
      </tr>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: black;">Number of KV heads</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">8</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">8</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">8</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">8</td>
      </tr>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: black;">MLP hidden size</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">8192</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">12800</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">512</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">512</td>
      </tr>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: black;">MLP activation</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">SwiGLU</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">SwiGLU</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">SwiGLU</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">SwiGLU</td>
      </tr>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: black;">Number of experts</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">—</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">—</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">32</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">40</td>
      </tr>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: black;">MoE TopK</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">—</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">—</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">8</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">8</td>
      </tr>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: black;">Initialization std</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">0.1</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">0.1</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">0.1</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">0.1</td>
      </tr>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: black;">Sequence length</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">128K</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">128K</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">128K</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">128K</td>
      </tr>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: black;">Position embedding</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">RoPE</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">RoPE</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">RoPE</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">RoPE</td>
      </tr>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: black;"># Parameters</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">2.5B</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">8.1B</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">1.3B</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">3.3B</td>
      </tr>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: black;"># Active parameters</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">2.5B</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">8.1B</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">400M</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">800M</td>
      </tr>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: black;"># Training tokens</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">12T</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">12T</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">10T</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">10T</td>
      </tr>
    </tbody></table>
    **Training Data:** 
    This model is trained on a mix of open source and proprietary data following a three-stage training strategy.
    * Stage 1 data: The data for stage 1 is sourced from diverse domains, such as: web, code, academic sources, books, and math data.
    * Stage 2 data: The data for stage 2 comprises a curated mix of high-quality data from the same domains, plus multilingual and instruction data. The goal of this second training phase is to enhance the model’s performance on specific tasks. 
    * Stage 3 data: The data for stage 3 consists of original stage-2 pretraining data with additional synthetic long-context data in form of QA/summary pairs where the answer
    contains a recitation of the related paragraph before the answer.

    A detailed attribution of datasets can be found in the [Granite 3.0 Technical Report](https://github.com/ibm-granite/granite-3.0-language-models/blob/main/paper.pdf), [Granite 3.1 Technical Report (coming soon)](https://huggingface.co/collections/ibm-granite/granite-31-language-models-6751dbbf2f3389bec5c6f02d), and [Accompanying Author List](https://github.com/ibm-granite/granite-3.0-language-models/blob/main/author-ack.pdf).

    **Infrastructure:**
    We train Granite 3.1 Language Models using IBM's super computing cluster, Blue Vela, which is outfitted with NVIDIA H100 GPUs. This cluster provides a scalable and efficient infrastructure for training our models over thousands of GPUs.

    **Ethical Considerations and Limitations:** 
    The use of Large Language Models involves risks and ethical considerations people must be aware of, including but not limited to: bias and fairness, misinformation, and autonomous decision-making. Granite-3.1-8B-Base model is not the exception in this regard. Even though this model is suited for multiple generative AI tasks, it has not undergone any safety alignment, there it may produce problematic outputs. Additionally, it remains uncertain whether smaller models might exhibit increased susceptibility to hallucination in generation scenarios by copying text verbatim from the training dataset due to their reduced sizes and memorization capacities. This aspect is currently an active area of research, and we anticipate more rigorous exploration, comprehension, and mitigations in this domain. Regarding ethics, a latent risk associated with all Large Language Models is their malicious utilization. We urge the community to use Granite-3.1-8B-Base model with ethical intentions and in a responsible way.

    **Resources**
    - ⭐️ Learn about the latest updates with Granite: https://www.ibm.com/granite
    - 📄 Get started with tutorials, best practices, and prompt engineering advice: https://www.ibm.com/granite/docs/
    - 💡 Learn about the latest Granite learning resources: https://ibm.biz/granite-learning-resources

  logo: data:image/svg+xml;base64,PHN2ZyBpZD0iTGF5ZXJfMSIgZGF0YS1uYW1lPSJMYXllciAxIiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAxOTIgMTQ1Ij48ZGVmcz48c3R5bGU+LmNscy0xe2ZpbGw6I2UwMDt9PC9zdHlsZT48L2RlZnM+PHRpdGxlPlJlZEhhdC1Mb2dvLUhhdC1Db2xvcjwvdGl0bGU+PHBhdGggZD0iTTE1Ny43Nyw2Mi42MWExNCwxNCwwLDAsMSwuMzEsMy40MmMwLDE0Ljg4LTE4LjEsMTcuNDYtMzAuNjEsMTcuNDZDNzguODMsODMuNDksNDIuNTMsNTMuMjYsNDIuNTMsNDRhNi40Myw2LjQzLDAsMCwxLC4yMi0xLjk0bC0zLjY2LDkuMDZhMTguNDUsMTguNDUsMCwwLDAtMS41MSw3LjMzYzAsMTguMTEsNDEsNDUuNDgsODcuNzQsNDUuNDgsMjAuNjksMCwzNi40My03Ljc2LDM2LjQzLTIxLjc3LDAtMS4wOCwwLTEuOTQtMS43My0xMC4xM1oiLz48cGF0aCBjbGFzcz0iY2xzLTEiIGQ9Ik0xMjcuNDcsODMuNDljMTIuNTEsMCwzMC42MS0yLjU4LDMwLjYxLTE3LjQ2YTE0LDE0LDAsMCwwLS4zMS0zLjQybC03LjQ1LTMyLjM2Yy0xLjcyLTcuMTItMy4yMy0xMC4zNS0xNS43My0xNi42QzEyNC44OSw4LjY5LDEwMy43Ni41LDk3LjUxLjUsOTEuNjkuNSw5MCw4LDgzLjA2LDhjLTYuNjgsMC0xMS42NC01LjYtMTcuODktNS42LTYsMC05LjkxLDQuMDktMTIuOTMsMTIuNSwwLDAtOC40MSwyMy43Mi05LjQ5LDI3LjE2QTYuNDMsNi40MywwLDAsMCw0Mi41Myw0NGMwLDkuMjIsMzYuMywzOS40NSw4NC45NCwzOS40NU0xNjAsNzIuMDdjMS43Myw4LjE5LDEuNzMsOS4wNSwxLjczLDEwLjEzLDAsMTQtMTUuNzQsMjEuNzctMzYuNDMsMjEuNzdDNzguNTQsMTA0LDM3LjU4LDc2LjYsMzcuNTgsNTguNDlhMTguNDUsMTguNDUsMCwwLDEsMS41MS03LjMzQzIyLjI3LDUyLC41LDU1LC41LDc0LjIyYzAsMzEuNDgsNzQuNTksNzAuMjgsMTMzLjY1LDcwLjI4LDQ1LjI4LDAsNTYuNy0yMC40OCw1Ni43LTM2LjY1LDAtMTIuNzItMTEtMjcuMTYtMzAuODMtMzUuNzgiLz48L3N2Zz4=
  language: ["ar", "cs", "de", "en", "es", "fr", "it", "ja", "ko", "nl", "pt", "zh"]
  license: apache-2.0
  licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt
  maturity: Generally Available
  libraryName: transformers
  labels:
    - language
    - granite-3.1
  tasks:
    - text-generation
  createTimeSinceEpoch: 1733514949000
  lastUpdateTimeSinceEpoch: 1734637721000
  artifacts:
    - protocol: oci
      createTimeSinceEpoch: 1733514949000
      tags: ["1.3.0"]
      uri: oci://registry.redhat.io/rhelai1/granite-8b-code-base:1.3-1732870892
 - repository: rhelai1
  name: granite-8b-code-instruct
  provider: Red Hat
  description: A fine-tuned model based on Granite 8B Code Base
  longDescription: |-
    Granite-8B-Code-Instruct is a 8B parameter model fine tuned from
    Granite-8B-Code-Base on a combination of permissively licensed instruction
    data to enhance instruction following capabilities including logical
    reasoning and problem-solving skills.
  logo: data:image/svg+xml;base64,PHN2ZyBpZD0iTGF5ZXJfMSIgZGF0YS1uYW1lPSJMYXllciAxIiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAxOTIgMTQ1Ij48ZGVmcz48c3R5bGU+LmNscy0xe2ZpbGw6I2UwMDt9PC9zdHlsZT48L2RlZnM+PHRpdGxlPlJlZEhhdC1Mb2dvLUhhdC1Db2xvcjwvdGl0bGU+PHBhdGggZD0iTTE1Ny43Nyw2Mi42MWExNCwxNCwwLDAsMSwuMzEsMy40MmMwLDE0Ljg4LTE4LjEsMTcuNDYtMzAuNjEsMTcuNDZDNzguODMsODMuNDksNDIuNTMsNTMuMjYsNDIuNTMsNDRhNi40Myw2LjQzLDAsMCwxLC4yMi0xLjk0bC0zLjY2LDkuMDZhMTguNDUsMTguNDUsMCwwLDAtMS41MSw3LjMzYzAsMTguMTEsNDEsNDUuNDgsODcuNzQsNDUuNDgsMjAuNjksMCwzNi40My03Ljc2LDM2LjQzLTIxLjc3LDAtMS4wOCwwLTEuOTQtMS43My0xMC4xM1oiLz48cGF0aCBjbGFzcz0iY2xzLTEiIGQ9Ik0xMjcuNDcsODMuNDljMTIuNTEsMCwzMC42MS0yLjU4LDMwLjYxLTE3LjQ2YTE0LDE0LDAsMCwwLS4zMS0zLjQybC03LjQ1LTMyLjM2Yy0xLjcyLTcuMTItMy4yMy0xMC4zNS0xNS43My0xNi42QzEyNC44OSw4LjY5LDEwMy43Ni41LDk3LjUxLjUsOTEuNjkuNSw5MCw4LDgzLjA2LDhjLTYuNjgsMC0xMS42NC01LjYtMTcuODktNS42LTYsMC05LjkxLDQuMDktMTIuOTMsMTIuNSwwLDAtOC40MSwyMy43Mi05LjQ5LDI3LjE2QTYuNDMsNi40MywwLDAsMCw0Mi41Myw0NGMwLDkuMjIsMzYuMywzOS40NSw4NC45NCwzOS40NU0xNjAsNzIuMDdjMS43Myw4LjE5LDEuNzMsOS4wNSwxLjczLDEwLjEzLDAsMTQtMTUuNzQsMjEuNzctMzYuNDMsMjEuNzdDNzguNTQsMTA0LDM3LjU4LDc2LjYsMzcuNTgsNTguNDlhMTguNDUsMTguNDUsMCwwLDEsMS41MS03LjMzQzIyLjI3LDUyLC41LDU1LC41LDc0LjIyYzAsMzEuNDgsNzQuNTksNzAuMjgsMTMzLjY1LDcwLjI4LDQ1LjI4LDAsNTYuNy0yMC40OCw1Ni43LTM2LjY1LDAtMTIuNzItMTEtMjcuMTYtMzAuODMtMzUuNzgiLz48L3N2Zz4=
  readme: |-
    # Granite-3.1-8B-Instruct

    **Model Summary:**
    Granite-3.1-8B-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-8B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems. This model is developed using a diverse set of techniques with a structured chat format, including supervised finetuning, model alignment using reinforcement learning, and model merging.

    - **Developers:** Granite Team, IBM
    - **GitHub Repository:** [ibm-granite/granite-3.1-language-models](https://github.com/ibm-granite/granite-3.1-language-models)
    - **Website**: [Granite Docs](https://www.ibm.com/granite/docs/)
    - **Paper:** [Granite 3.1 Language Models (coming soon)](https://huggingface.co/collections/ibm-granite/granite-31-language-models-6751dbbf2f3389bec5c6f02d) 
    - **Release Date**: December 18th, 2024
    - **License:** [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0)

    **Supported Languages:** 
    English, German, Spanish, French, Japanese, Portuguese, Arabic, Czech, Italian, Korean, Dutch, and Chinese. Users may finetune Granite 3.1 models for languages beyond these 12 languages.

    **Intended Use:** 
    The model is designed to respond to general instructions and can be used to build AI assistants for multiple domains, including business applications.

    *Capabilities*
    * Summarization
    * Text classification
    * Text extraction
    * Question-answering
    * Retrieval Augmented Generation (RAG)
    * Code related tasks
    * Function-calling tasks
    * Multilingual dialog use cases
    * Long-context tasks including long document/meeting summarization, long document QA, etc.

    **Generation:** 
    This is a simple example of how to use Granite-3.1-8B-Instruct model.

    Install the following libraries:

    ```shell
    pip install torch torchvision torchaudio
    pip install accelerate
    pip install transformers
    ```
    Then, copy the snippet from the section that is relevant for your use case.

    ```python
    import torch
    from transformers import AutoModelForCausalLM, AutoTokenizer

    device = "auto"
    model_path = "ibm-granite/granite-3.1-8b-instruct"
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    # drop device_map if running on CPU
    model = AutoModelForCausalLM.from_pretrained(model_path, device_map=device)
    model.eval()
    # change input text as desired
    chat = [
        { "role": "user", "content": "Please list one IBM Research laboratory located in the United States. You should only output its name and location." },
    ]
    chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
    # tokenize the text
    input_tokens = tokenizer(chat, return_tensors="pt").to(device)
    # generate output tokens
    output = model.generate(**input_tokens, 
                            max_new_tokens=100)
    # decode output tokens into text
    output = tokenizer.batch_decode(output)
    # print output
    print(output)
    ```
    **Evaluation Results:**
    <table>
      <caption><b>HuggingFace Open LLM Leaderboard V1</b></caption>
    <thead>
      <tr>
        <th style="text-align:left; background-color: #001d6c; color: white;">Models</th>
        <th style="text-align:center; background-color: #001d6c; color: white;">ARC-Challenge</th>
        <th style="text-align:center; background-color: #001d6c; color: white;">Hellaswag</th>
        <th style="text-align:center; background-color: #001d6c; color: white;">MMLU</th>
        <th style="text-align:center; background-color: #001d6c; color: white;">TruthfulQA</th>
        <th style="text-align:center; background-color: #001d6c; color: white;">Winogrande</th>
        <th style="text-align:center; background-color: #001d6c; color: white;">GSM8K</th>
        <th style="text-align:center; background-color: #001d6c; color: white;">Avg</th>
      </tr></thead>
      <tbody>
      <tr>
        <td style="text-align:left; background-color: #DAE8FF; color: black;">Granite-3.1-8B-Instruct</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">62.62</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">84.48</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">65.34</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">66.23</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">75.37</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">73.84</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">71.31</td>
      </tr>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: #2D2D2D;">Granite-3.1-2B-Instruct</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">54.61</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">75.14</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">55.31</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">59.42</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">67.48</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">52.76</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">60.79</td>
      </tr>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: #2D2D2D;">Granite-3.1-3B-A800M-Instruct</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">50.42</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">73.01</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">52.19</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">49.71</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">64.87</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">48.97</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">56.53</td>
      </tr>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: #2D2D2D;">Granite-3.1-1B-A400M-Instruct</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">42.66</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">65.97</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">26.13</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">46.77</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">62.35</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">33.88</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">46.29</td>
      </tr>
    </tbody></table>

    <table>
      <caption><b>HuggingFace Open LLM Leaderboard V2</b></caption>
    <thead>
      <tr>
        <th style="text-align:left; background-color: #001d6c; color: white;">Models</th>
        <th style="text-align:center; background-color: #001d6c; color: white;">IFEval</th>
        <th style="text-align:center; background-color: #001d6c; color: white;">BBH</th>
        <th style="text-align:center; background-color: #001d6c; color: white;">MATH Lvl 5</th>
        <th style="text-align:center; background-color: #001d6c; color: white;">GPQA</th>
        <th style="text-align:center; background-color: #001d6c; color: white;">MUSR</th>
        <th style="text-align:center; background-color: #001d6c; color: white;">MMLU-Pro</th>
        <th style="text-align:center; background-color: #001d6c; color: white;">Avg</th>
      </tr></thead>
      <tbody>
      <tr>
        <td style="text-align:left; background-color: #DAE8FF; color: black;">Granite-3.1-8B-Instruct</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">72.08</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">34.09</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">21.68</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">8.28</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">19.01</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">28.19</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">30.55</td>
      </tr>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: #2D2D2D;">Granite-3.1-2B-Instruct</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">62.86</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">21.82</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">11.33</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">5.26</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">4.87</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">20.21</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">21.06</td>
      </tr>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: #2D2D2D;">Granite-3.1-3B-A800M-Instruct</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">55.16</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">16.69</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">10.35</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">5.15</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">2.51</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">12.75</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">17.1</td>
      </tr>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: #2D2D2D;">Granite-3.1-1B-A400M-Instruct</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">46.86</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">6.18</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">4.08</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">0</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">0.78</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">2.41</td>
        <td style="text-align:center; background-color: #FFFFFF; color: #2D2D2D;">10.05</td>
      </tr>
    </tbody></table>

    **Model Architecture:**
    Granite-3.1-8B-Instruct is based on a decoder-only dense transformer architecture. Core components of this architecture are: GQA and RoPE, MLP with SwiGLU, RMSNorm, and shared input/output embeddings.

    <table>
    <thead>
      <tr>
        <th style="text-align:left; background-color: #001d6c; color: white;">Model</th>
        <th style="text-align:center; background-color: #001d6c; color: white;">2B Dense</th>
        <th style="text-align:center; background-color: #001d6c; color: white;">8B Dense</th>
        <th style="text-align:center; background-color: #001d6c; color: white;">1B MoE</th>
        <th style="text-align:center; background-color: #001d6c; color: white;">3B MoE</th>
      </tr></thead>
    <tbody>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: black;">Embedding size</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">2048</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">4096</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">1024</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">1536</td>
      </tr>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: black;">Number of layers</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">40</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">40</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">24</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">32</td>
      </tr>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: black;">Attention head size</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">64</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">128</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">64</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">64</td>
      </tr>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: black;">Number of attention heads</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">32</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">32</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">16</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">24</td>
      </tr>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: black;">Number of KV heads</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">8</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">8</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">8</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">8</td>
      </tr>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: black;">MLP hidden size</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">8192</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">12800</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">512</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">512</td>
      </tr>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: black;">MLP activation</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">SwiGLU</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">SwiGLU</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">SwiGLU</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">SwiGLU</td>
      </tr>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: black;">Number of experts</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">—</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">—</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">32</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">40</td>
      </tr>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: black;">MoE TopK</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">—</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">—</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">8</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">8</td>
      </tr>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: black;">Initialization std</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">0.1</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">0.1</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">0.1</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">0.1</td>
      </tr>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: black;">Sequence length</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">128K</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">128K</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">128K</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">128K</td>
      </tr>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: black;">Position embedding</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">RoPE</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">RoPE</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">RoPE</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">RoPE</td>
      </tr>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: black;"># Parameters</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">2.5B</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">8.1B</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">1.3B</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">3.3B</td>
      </tr>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: black;"># Active parameters</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">2.5B</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">8.1B</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">400M</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">800M</td>
      </tr>
      <tr>
        <td style="text-align:left; background-color: #FFFFFF; color: black;"># Training tokens</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">12T</td>
        <td style="text-align:center; background-color: #DAE8FF; color: black;">12T</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">10T</td>
        <td style="text-align:center; background-color: #FFFFFF; color: black;">10T</td>
      </tr>
    </tbody></table>

    **Training Data:** 
    Overall, our SFT data is largely comprised of three key sources: (1) publicly available datasets with permissive license, (2) internal synthetic data targeting specific capabilities including long-context tasks, and (3) very small amounts of human-curated data. A detailed attribution of datasets can be found in the [Granite 3.0 Technical Report](https://github.com/ibm-granite/granite-3.0-language-models/blob/main/paper.pdf), [Granite 3.1 Technical Report (coming soon)](https://huggingface.co/collections/ibm-granite/granite-31-language-models-6751dbbf2f3389bec5c6f02d), and [Accompanying Author List](https://github.com/ibm-granite/granite-3.0-language-models/blob/main/author-ack.pdf).

    **Infrastructure:**
    We train Granite 3.1 Language Models using IBM's super computing cluster, Blue Vela, which is outfitted with NVIDIA H100 GPUs. This cluster provides a scalable and efficient infrastructure for training our models over thousands of GPUs.

    **Ethical Considerations and Limitations:** 
    Granite 3.1 Instruct Models are primarily finetuned using instruction-response pairs mostly in English, but also multilingual data covering eleven languages. Although this model can handle multilingual dialog use cases, its performance might not be similar to English tasks. In such case, introducing a small number of examples (few-shot) can help the model in generating more accurate outputs. While this model has been aligned by keeping safety in consideration, the model may in some cases produce inaccurate, biased, or unsafe responses to user prompts. So we urge the community to use this model with proper safety testing and tuning tailored for their specific tasks.

    **Resources**
    - ⭐️ Learn about the latest updates with Granite: https://www.ibm.com/granite
    - 📄 Get started with tutorials, best practices, and prompt engineering advice: https://www.ibm.com/granite/docs/
    - 💡 Learn about the latest Granite learning resources: https://ibm.biz/granite-learning-resources

    <!-- ## Citation
    ```
    @misc{granite-models,
      author = {author 1, author2, ...},
      title = {},
      journal = {},
      volume = {},
      year = {2024},
      url = {https://arxiv.org/abs/0000.00000},
    }
    ``` -->
  language: ["ar", "cs", "de", "en", "es", "fr", "it", "ja", "ko", "nl", "pt", "zh"]
  license: apache-2.0
  licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt
  maturity: Generally Available
  libraryName: transformers
  baseModel:
    - repository: rhelai1
      name: granite-8b-code-base
  labels:
    - language
    - granite-3.1
  tasks:
    - text-generation
  createTimeSinceEpoch: 1733514949000
  lastUpdateTimeSinceEpoch: 1734637721000
  artifacts:
    - protocol: oci
      createTimeSinceEpoch: 1733514949000
      tags: ["1.3.0"]
      uri: oci://registry.redhat.io/rhelai1/granite-8b-code-instruct:1.3-1732870892
 - repository: rhelai1
  name: granite-8b-lab-v2-preview
  provider: Red Hat
  description: A derivative of granite-8b-base trained with the LAB methodology
  longDescription: |-
    LAB: Large-scale Alignment for chatBots is a novel synthetic data-based
      alignment tuning method for LLMs from IBM Research.
      Granite-8b-lab-v2-preview is a Granite-8b-base derivative model trained
      with the LAB methodology, using Mixtral-8x7b-Instruct as a teacher model.
  logo: data:image/svg+xml;base64,PHN2ZyBpZD0iTGF5ZXJfMSIgZGF0YS1uYW1lPSJMYXllciAxIiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAxOTIgMTQ1Ij48ZGVmcz48c3R5bGU+LmNscy0xe2ZpbGw6I2UwMDt9PC9zdHlsZT48L2RlZnM+PHRpdGxlPlJlZEhhdC1Mb2dvLUhhdC1Db2xvcjwvdGl0bGU+PHBhdGggZD0iTTE1Ny43Nyw2Mi42MWExNCwxNCwwLDAsMSwuMzEsMy40MmMwLDE0Ljg4LTE4LjEsMTcuNDYtMzAuNjEsMTcuNDZDNzguODMsODMuNDksNDIuNTMsNTMuMjYsNDIuNTMsNDRhNi40Myw2LjQzLDAsMCwxLC4yMi0xLjk0bC0zLjY2LDkuMDZhMTguNDUsMTguNDUsMCwwLDAtMS41MSw3LjMzYzAsMTguMTEsNDEsNDUuNDgsODcuNzQsNDUuNDgsMjAuNjksMCwzNi40My03Ljc2LDM2LjQzLTIxLjc3LDAtMS4wOCwwLTEuOTQtMS43My0xMC4xM1oiLz48cGF0aCBjbGFzcz0iY2xzLTEiIGQ9Ik0xMjcuNDcsODMuNDljMTIuNTEsMCwzMC42MS0yLjU4LDMwLjYxLTE3LjQ2YTE0LDE0LDAsMCwwLS4zMS0zLjQybC03LjQ1LTMyLjM2Yy0xLjcyLTcuMTItMy4yMy0xMC4zNS0xNS43My0xNi42QzEyNC44OSw4LjY5LDEwMy43Ni41LDk3LjUxLjUsOTEuNjkuNSw5MCw4LDgzLjA2LDhjLTYuNjgsMC0xMS42NC01LjYtMTcuODktNS42LTYsMC05LjkxLDQuMDktMTIuOTMsMTIuNSwwLDAtOC40MSwyMy43Mi05LjQ5LDI3LjE2QTYuNDMsNi40MywwLDAsMCw0Mi41Myw0NGMwLDkuMjIsMzYuMywzOS40NSw4NC45NCwzOS40NU0xNjAsNzIuMDdjMS43Myw4LjE5LDEuNzMsOS4wNSwxLjczLDEwLjEzLDAsMTQtMTUuNzQsMjEuNzctMzYuNDMsMjEuNzdDNzguNTQsMTA0LDM3LjU4LDc2LjYsMzcuNTgsNTguNDlhMTguNDUsMTguNDUsMCwwLDEsMS41MS03LjMzQzIyLjI3LDUyLC41LDU1LC41LDc0LjIyYzAsMzEuNDgsNzQuNTksNzAuMjgsMTMzLjY1LDcwLjI4LDQ1LjI4LDAsNTYuNy0yMC40OCw1Ni43LTM2LjY1LDAtMTIuNzItMTEtMjcuMTYtMzAuODMtMzUuNzgiLz48L3N2Zz4=
  language: ["ar", "cs", "de", "en", "es", "fr", "it", "ja", "ko", "nl", "pt", "zh"]
  license: apache-2.0
  licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt
  maturity: Generally Available
  libraryName: transformers
  baseModel:
    - repository: rhelai1
      name: granite-8b-code-base
  labels:
    - language
    - granite-3.1
    - lab-base
  tasks:
    - text-generation
  createTimeSinceEpoch: 1732870892000
  lastUpdateTimeSinceEpoch: 1732870892000
  artifacts:
    - protocol: oci
      createTimeSinceEpoch: 1732870892000
      tags: ["1.3.0"]
      uri: registry.redhat.io/rhelai1/granite-8b-lab-v2-preview:1.3-1732870892
 - repository: rhelai1
  name: granite-8b-starter-v1
  provider: Red Hat
  description: Custom Red Hat phase00 tuned base model.
  longDescription: |-
    A custom Red Hat base model instruct tuned only for phase 00, produced by IBM Research specifically for RHEL AI.
  logo: data:image/svg+xml;base64,PHN2ZyBpZD0iTGF5ZXJfMSIgZGF0YS1uYW1lPSJMYXllciAxIiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAxOTIgMTQ1Ij48ZGVmcz48c3R5bGU+LmNscy0xe2ZpbGw6I2UwMDt9PC9zdHlsZT48L2RlZnM+PHRpdGxlPlJlZEhhdC1Mb2dvLUhhdC1Db2xvcjwvdGl0bGU+PHBhdGggZD0iTTE1Ny43Nyw2Mi42MWExNCwxNCwwLDAsMSwuMzEsMy40MmMwLDE0Ljg4LTE4LjEsMTcuNDYtMzAuNjEsMTcuNDZDNzguODMsODMuNDksNDIuNTMsNTMuMjYsNDIuNTMsNDRhNi40Myw2LjQzLDAsMCwxLC4yMi0xLjk0bC0zLjY2LDkuMDZhMTguNDUsMTguNDUsMCwwLDAtMS41MSw3LjMzYzAsMTguMTEsNDEsNDUuNDgsODcuNzQsNDUuNDgsMjAuNjksMCwzNi40My03Ljc2LDM2LjQzLTIxLjc3LDAtMS4wOCwwLTEuOTQtMS43My0xMC4xM1oiLz48cGF0aCBjbGFzcz0iY2xzLTEiIGQ9Ik0xMjcuNDcsODMuNDljMTIuNTEsMCwzMC42MS0yLjU4LDMwLjYxLTE3LjQ2YTE0LDE0LDAsMCwwLS4zMS0zLjQybC03LjQ1LTMyLjM2Yy0xLjcyLTcuMTItMy4yMy0xMC4zNS0xNS43My0xNi42QzEyNC44OSw4LjY5LDEwMy43Ni41LDk3LjUxLjUsOTEuNjkuNSw5MCw4LDgzLjA2LDhjLTYuNjgsMC0xMS42NC01LjYtMTcuODktNS42LTYsMC05LjkxLDQuMDktMTIuOTMsMTIuNSwwLDAtOC40MSwyMy43Mi05LjQ5LDI3LjE2QTYuNDMsNi40MywwLDAsMCw0Mi41Myw0NGMwLDkuMjIsMzYuMywzOS40NSw4NC45NCwzOS40NU0xNjAsNzIuMDdjMS43Myw4LjE5LDEuNzMsOS4wNSwxLjczLDEwLjEzLDAsMTQtMTUuNzQsMjEuNzctMzYuNDMsMjEuNzdDNzguNTQsMTA0LDM3LjU4LDc2LjYsMzcuNTgsNTguNDlhMTguNDUsMTguNDUsMCwwLDEsMS41MS03LjMzQzIyLjI3LDUyLC41LDU1LC41LDc0LjIyYzAsMzEuNDgsNzQuNTksNzAuMjgsMTMzLjY1LDcwLjI4LDQ1LjI4LDAsNTYuNy0yMC40OCw1Ni43LTM2LjY1LDAtMTIuNzItMTEtMjcuMTYtMzAuODMtMzUuNzgiLz48L3N2Zz4=
  language: ["ar", "cs", "de", "en", "es", "fr", "it", "ja", "ko", "nl", "pt", "zh"]
  license: apache-2.0
  licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt
  maturity: Generally Available
  libraryName: transformers
  labels:
    - language
    - granite-3.1
    - lab-base
  tasks:
    - text-generation
  createTimeSinceEpoch: 1732870892000
  lastUpdateTimeSinceEpoch: 1732870892000
  artifacts:
    - protocol: oci
      createTimeSinceEpoch: 1732870892000
      tags: ["1.3.0"]
      uri: registry.redhat.io/rhelai1/granite-8b-starter-v1:1.3-1732870892
 - repository: rhelai1
  name: granite-8b-lab-v1
  provider: Red Hat
  description: A derivative of granite-8b-base trained with the LAB methodology
  longDescription: |-
    LAB: Large-scale Alignment for chatBots is a novel synthetic data-based
      alignment tuning method for LLMs from IBM Research. Granite-8b-lab-v1 is
      a Granite-8b-base derivative model trained with the LAB methodology,
      using Mixtral-8x7b-Instruct as a teacher model.
  logo: data:image/svg+xml;base64,PHN2ZyBpZD0iTGF5ZXJfMSIgZGF0YS1uYW1lPSJMYXllciAxIiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAxOTIgMTQ1Ij48ZGVmcz48c3R5bGU+LmNscy0xe2ZpbGw6I2UwMDt9PC9zdHlsZT48L2RlZnM+PHRpdGxlPlJlZEhhdC1Mb2dvLUhhdC1Db2xvcjwvdGl0bGU+PHBhdGggZD0iTTE1Ny43Nyw2Mi42MWExNCwxNCwwLDAsMSwuMzEsMy40MmMwLDE0Ljg4LTE4LjEsMTcuNDYtMzAuNjEsMTcuNDZDNzguODMsODMuNDksNDIuNTMsNTMuMjYsNDIuNTMsNDRhNi40Myw2LjQzLDAsMCwxLC4yMi0xLjk0bC0zLjY2LDkuMDZhMTguNDUsMTguNDUsMCwwLDAtMS41MSw3LjMzYzAsMTguMTEsNDEsNDUuNDgsODcuNzQsNDUuNDgsMjAuNjksMCwzNi40My03Ljc2LDM2LjQzLTIxLjc3LDAtMS4wOCwwLTEuOTQtMS43My0xMC4xM1oiLz48cGF0aCBjbGFzcz0iY2xzLTEiIGQ9Ik0xMjcuNDcsODMuNDljMTIuNTEsMCwzMC42MS0yLjU4LDMwLjYxLTE3LjQ2YTE0LDE0LDAsMCwwLS4zMS0zLjQybC03LjQ1LTMyLjM2Yy0xLjcyLTcuMTItMy4yMy0xMC4zNS0xNS43My0xNi42QzEyNC44OSw4LjY5LDEwMy43Ni41LDk3LjUxLjUsOTEuNjkuNSw5MCw4LDgzLjA2LDhjLTYuNjgsMC0xMS42NC01LjYtMTcuODktNS42LTYsMC05LjkxLDQuMDktMTIuOTMsMTIuNSwwLDAtOC40MSwyMy43Mi05LjQ5LDI3LjE2QTYuNDMsNi40MywwLDAsMCw0Mi41Myw0NGMwLDkuMjIsMzYuMywzOS40NSw4NC45NCwzOS40NU0xNjAsNzIuMDdjMS43Myw4LjE5LDEuNzMsOS4wNSwxLjczLDEwLjEzLDAsMTQtMTUuNzQsMjEuNzctMzYuNDMsMjEuNzdDNzguNTQsMTA0LDM3LjU4LDc2LjYsMzcuNTgsNTguNDlhMTguNDUsMTguNDUsMCwwLDEsMS41MS03LjMzQzIyLjI3LDUyLC41LDU1LC41LDc0LjIyYzAsMzEuNDgsNzQuNTksNzAuMjgsMTMzLjY1LDcwLjI4LDQ1LjI4LDAsNTYuNy0yMC40OCw1Ni43LTM2LjY1LDAtMTIuNzItMTEtMjcuMTYtMzAuODMtMzUuNzgiLz48L3N2Zz4=
  language: ["ar", "cs", "de", "en", "es", "fr", "it", "ja", "ko", "nl", "pt", "zh"]
  license: apache-2.0
  licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt
  maturity: Generally Available
  libraryName: transformers
  baseModel:
    - repository: rhelai1
      name: granite-8b-code-base
  labels:
    - language
    - granite-3.1
  tasks:
    - text-generation
  createTimeSinceEpoch: 1732870892000
  lastUpdateTimeSinceEpoch: 1732870892000
  artifacts:
    - protocol: oci
      createTimeSinceEpoch: 1732870892000
      tags: ["1.3.0"]
      uri: registry.redhat.io/rhelai1/granite-8b-lab-v1:1.3-1732870892
 - repository: rhelai1
  name: granite-7b-starter
  provider: Red Hat
  description: Custom Red Hat phase00 tuned base model.
  longDescription: |-
    A custom Red Hat base model instruct tuned only for phase 00, produced by IBM Research specifically for RHEL AI.
  logo: data:image/svg+xml;base64,PHN2ZyBpZD0iTGF5ZXJfMSIgZGF0YS1uYW1lPSJMYXllciAxIiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAxOTIgMTQ1Ij48ZGVmcz48c3R5bGU+LmNscy0xe2ZpbGw6I2UwMDt9PC9zdHlsZT48L2RlZnM+PHRpdGxlPlJlZEhhdC1Mb2dvLUhhdC1Db2xvcjwvdGl0bGU+PHBhdGggZD0iTTE1Ny43Nyw2Mi42MWExNCwxNCwwLDAsMSwuMzEsMy40MmMwLDE0Ljg4LTE4LjEsMTcuNDYtMzAuNjEsMTcuNDZDNzguODMsODMuNDksNDIuNTMsNTMuMjYsNDIuNTMsNDRhNi40Myw2LjQzLDAsMCwxLC4yMi0xLjk0bC0zLjY2LDkuMDZhMTguNDUsMTguNDUsMCwwLDAtMS41MSw3LjMzYzAsMTguMTEsNDEsNDUuNDgsODcuNzQsNDUuNDgsMjAuNjksMCwzNi40My03Ljc2LDM2LjQzLTIxLjc3LDAtMS4wOCwwLTEuOTQtMS43My0xMC4xM1oiLz48cGF0aCBjbGFzcz0iY2xzLTEiIGQ9Ik0xMjcuNDcsODMuNDljMTIuNTEsMCwzMC42MS0yLjU4LDMwLjYxLTE3LjQ2YTE0LDE0LDAsMCwwLS4zMS0zLjQybC03LjQ1LTMyLjM2Yy0xLjcyLTcuMTItMy4yMy0xMC4zNS0xNS43My0xNi42QzEyNC44OSw4LjY5LDEwMy43Ni41LDk3LjUxLjUsOTEuNjkuNSw5MCw4LDgzLjA2LDhjLTYuNjgsMC0xMS42NC01LjYtMTcuODktNS42LTYsMC05LjkxLDQuMDktMTIuOTMsMTIuNSwwLDAtOC40MSwyMy43Mi05LjQ5LDI3LjE2QTYuNDMsNi40MywwLDAsMCw0Mi41Myw0NGMwLDkuMjIsMzYuMywzOS40NSw4NC45NCwzOS40NU0xNjAsNzIuMDdjMS43Myw4LjE5LDEuNzMsOS4wNSwxLjczLDEwLjEzLDAsMTQtMTUuNzQsMjEuNzctMzYuNDMsMjEuNzdDNzguNTQsMTA0LDM3LjU4LDc2LjYsMzcuNTgsNTguNDlhMTguNDUsMTguNDUsMCwwLDEsMS41MS03LjMzQzIyLjI3LDUyLC41LDU1LC41LDc0LjIyYzAsMzEuNDgsNzQuNTksNzAuMjgsMTMzLjY1LDcwLjI4LDQ1LjI4LDAsNTYuNy0yMC40OCw1Ni43LTM2LjY1LDAtMTIuNzItMTEtMjcuMTYtMzAuODMtMzUuNzgiLz48L3N2Zz4=
  language: ["en"]
  license: apache-2.0
  licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt
  maturity: Generally Available
  libraryName: transformers
  labels:
    - lab-base
  tasks:
    - text-generation
  createTimeSinceEpoch: 1732870892000
  lastUpdateTimeSinceEpoch: 1732870892000
  artifacts:
    - protocol: oci
      createTimeSinceEpoch: 1732870892000
      tags: ["1.3.0"]
      uri: registry.redhat.io/rhelai1/granite-7b-starter:1.3-1732870892
 - repository: rhelai1
  name: granite-7b-redhat
  provider: Red Hat
  description: A derivative of granite-7b-base trained with the LAB methodology
  longDescription: |-
    LAB: Large-scale Alignment for chatBots is a novel synthetic data-based
      alignment tuning method for LLMs from IBM Research. Granite-7b-lab is a
      Granite-7b-base derivative model trained with the LAB methodology, using
      Mixtral-8x7b-Instruct as a teacher model.
  logo: data:image/svg+xml;base64,PHN2ZyBpZD0iTGF5ZXJfMSIgZGF0YS1uYW1lPSJMYXllciAxIiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAxOTIgMTQ1Ij48ZGVmcz48c3R5bGU+LmNscy0xe2ZpbGw6I2UwMDt9PC9zdHlsZT48L2RlZnM+PHRpdGxlPlJlZEhhdC1Mb2dvLUhhdC1Db2xvcjwvdGl0bGU+PHBhdGggZD0iTTE1Ny43Nyw2Mi42MWExNCwxNCwwLDAsMSwuMzEsMy40MmMwLDE0Ljg4LTE4LjEsMTcuNDYtMzAuNjEsMTcuNDZDNzguODMsODMuNDksNDIuNTMsNTMuMjYsNDIuNTMsNDRhNi40Myw2LjQzLDAsMCwxLC4yMi0xLjk0bC0zLjY2LDkuMDZhMTguNDUsMTguNDUsMCwwLDAtMS41MSw3LjMzYzAsMTguMTEsNDEsNDUuNDgsODcuNzQsNDUuNDgsMjAuNjksMCwzNi40My03Ljc2LDM2LjQzLTIxLjc3LDAtMS4wOCwwLTEuOTQtMS43My0xMC4xM1oiLz48cGF0aCBjbGFzcz0iY2xzLTEiIGQ9Ik0xMjcuNDcsODMuNDljMTIuNTEsMCwzMC42MS0yLjU4LDMwLjYxLTE3LjQ2YTE0LDE0LDAsMCwwLS4zMS0zLjQybC03LjQ1LTMyLjM2Yy0xLjcyLTcuMTItMy4yMy0xMC4zNS0xNS43My0xNi42QzEyNC44OSw4LjY5LDEwMy43Ni41LDk3LjUxLjUsOTEuNjkuNSw5MCw4LDgzLjA2LDhjLTYuNjgsMC0xMS42NC01LjYtMTcuODktNS42LTYsMC05LjkxLDQuMDktMTIuOTMsMTIuNSwwLDAtOC40MSwyMy43Mi05LjQ5LDI3LjE2QTYuNDMsNi40MywwLDAsMCw0Mi41Myw0NGMwLDkuMjIsMzYuMywzOS40NSw4NC45NCwzOS40NU0xNjAsNzIuMDdjMS43Myw4LjE5LDEuNzMsOS4wNSwxLjczLDEwLjEzLDAsMTQtMTUuNzQsMjEuNzctMzYuNDMsMjEuNzdDNzguNTQsMTA0LDM3LjU4LDc2LjYsMzcuNTgsNTguNDlhMTguNDUsMTguNDUsMCwwLDEsMS41MS03LjMzQzIyLjI3LDUyLC41LDU1LC41LDc0LjIyYzAsMzEuNDgsNzQuNTksNzAuMjgsMTMzLjY1LDcwLjI4LDQ1LjI4LDAsNTYuNy0yMC40OCw1Ni43LTM2LjY1LDAtMTIuNzItMTEtMjcuMTYtMzAuODMtMzUuNzgiLz48L3N2Zz4=
  language: ["en"]
  license: apache-2.0
  licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt
  maturity: Generally Available
  libraryName: transformers
  labels:
    - language
  tasks:
    - text-generation
  createTimeSinceEpoch: 1732870892000
  lastUpdateTimeSinceEpoch: 1732870892000
  artifacts:
    - protocol: oci
      createTimeSinceEpoch: 1732870892000
      tags: ["1.3.0"]
      uri: registry.redhat.io/rhelai1/granite-7b-redhat-lab:1.3-1732870892
 - repository: rhelai1
  name: mixtral-8x7b-instruct-v0-1
  description: Teacher and critic model for running Synthetic data generation (SDG)
  maturity: General availability
  provider: Red Hat
  logo: data:image/svg+xml;base64,PHN2ZyBpZD0iTGF5ZXJfMSIgZGF0YS1uYW1lPSJMYXllciAxIiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAxOTIgMTQ1Ij48ZGVmcz48c3R5bGU+LmNscy0xe2ZpbGw6I2UwMDt9PC9zdHlsZT48L2RlZnM+PHRpdGxlPlJlZEhhdC1Mb2dvLUhhdC1Db2xvcjwvdGl0bGU+PHBhdGggZD0iTTE1Ny43Nyw2Mi42MWExNCwxNCwwLDAsMSwuMzEsMy40MmMwLDE0Ljg4LTE4LjEsMTcuNDYtMzAuNjEsMTcuNDZDNzguODMsODMuNDksNDIuNTMsNTMuMjYsNDIuNTMsNDRhNi40Myw2LjQzLDAsMCwxLC4yMi0xLjk0bC0zLjY2LDkuMDZhMTguNDUsMTguNDUsMCwwLDAtMS41MSw3LjMzYzAsMTguMTEsNDEsNDUuNDgsODcuNzQsNDUuNDgsMjAuNjksMCwzNi40My03Ljc2LDM2LjQzLTIxLjc3LDAtMS4wOCwwLTEuOTQtMS43My0xMC4xM1oiLz48cGF0aCBjbGFzcz0iY2xzLTEiIGQ9Ik0xMjcuNDcsODMuNDljMTIuNTEsMCwzMC42MS0yLjU4LDMwLjYxLTE3LjQ2YTE0LDE0LDAsMCwwLS4zMS0zLjQybC03LjQ1LTMyLjM2Yy0xLjcyLTcuMTItMy4yMy0xMC4zNS0xNS43My0xNi42QzEyNC44OSw4LjY5LDEwMy43Ni41LDk3LjUxLjUsOTEuNjkuNSw5MCw4LDgzLjA2LDhjLTYuNjgsMC0xMS42NC01LjYtMTcuODktNS42LTYsMC05LjkxLDQuMDktMTIuOTMsMTIuNSwwLDAtOC40MSwyMy43Mi05LjQ5LDI3LjE2QTYuNDMsNi40MywwLDAsMCw0Mi41Myw0NGMwLDkuMjIsMzYuMywzOS40NSw4NC45NCwzOS40NU0xNjAsNzIuMDdjMS43Myw4LjE5LDEuNzMsOS4wNSwxLjczLDEwLjEzLDAsMTQtMTUuNzQsMjEuNzctMzYuNDMsMjEuNzdDNzguNTQsMTA0LDM3LjU4LDc2LjYsMzcuNTgsNTguNDlhMTguNDUsMTguNDUsMCwwLDEsMS41MS03LjMzQzIyLjI3LDUyLC41LDU1LC41LDc0LjIyYzAsMzEuNDgsNzQuNTksNzAuMjgsMTMzLjY1LDcwLjI4LDQ1LjI4LDAsNTYuNy0yMC40OCw1Ni43LTM2LjY1LDAtMTIuNzItMTEtMjcuMTYtMzAuODMtMzUuNzgiLz48L3N2Zz4=
  labels:
    - lab-teacher
  artifacts:
    - protocol: oci
      tags: ["1.4"]
      uri: registry.redhat.io/rhelai1/mixtral-8x7b-instruct-v0-1:1.4

 - repository: rhelai1
  name: prometheus-8x7b-v2-0
  description: Judge model for multi-phase training and evaluation
  maturity: General availability
  provider: Red Hat
  logo: data:image/svg+xml;base64,PHN2ZyBpZD0iTGF5ZXJfMSIgZGF0YS1uYW1lPSJMYXllciAxIiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAxOTIgMTQ1Ij48ZGVmcz48c3R5bGU+LmNscy0xe2ZpbGw6I2UwMDt9PC9zdHlsZT48L2RlZnM+PHRpdGxlPlJlZEhhdC1Mb2dvLUhhdC1Db2xvcjwvdGl0bGU+PHBhdGggZD0iTTE1Ny43Nyw2Mi42MWExNCwxNCwwLDAsMSwuMzEsMy40MmMwLDE0Ljg4LTE4LjEsMTcuNDYtMzAuNjEsMTcuNDZDNzguODMsODMuNDksNDIuNTMsNTMuMjYsNDIuNTMsNDRhNi40Myw2LjQzLDAsMCwxLC4yMi0xLjk0bC0zLjY2LDkuMDZhMTguNDUsMTguNDUsMCwwLDAtMS41MSw3LjMzYzAsMTguMTEsNDEsNDUuNDgsODcuNzQsNDUuNDgsMjAuNjksMCwzNi40My03Ljc2LDM2LjQzLTIxLjc3LDAtMS4wOCwwLTEuOTQtMS43My0xMC4xM1oiLz48cGF0aCBjbGFzcz0iY2xzLTEiIGQ9Ik0xMjcuNDcsODMuNDljMTIuNTEsMCwzMC42MS0yLjU4LDMwLjYxLTE3LjQ2YTE0LDE0LDAsMCwwLS4zMS0zLjQybC03LjQ1LTMyLjM2Yy0xLjcyLTcuMTItMy4yMy0xMC4zNS0xNS43My0xNi42QzEyNC44OSw4LjY5LDEwMy43Ni41LDk3LjUxLjUsOTEuNjkuNSw5MCw4LDgzLjA2LDhjLTYuNjgsMC0xMS42NC01LjYtMTcuODktNS42LTYsMC05LjkxLDQuMDktMTIuOTMsMTIuNSwwLDAtOC40MSwyMy43Mi05LjQ5LDI3LjE2QTYuNDMsNi40MywwLDAsMCw0Mi41Myw0NGMwLDkuMjIsMzYuMywzOS40NSw4NC45NCwzOS40NU0xNjAsNzIuMDdjMS43Myw4LjE5LDEuNzMsOS4wNSwxLjczLDEwLjEzLDAsMTQtMTUuNzQsMjEuNzctMzYuNDMsMjEuNzdDNzguNTQsMTA0LDM3LjU4LDc2LjYsMzcuNTgsNTguNDlhMTguNDUsMTguNDUsMCwwLDEsMS41MS03LjMzQzIyLjI3LDUyLC41LDU1LC41LDc0LjIyYzAsMzEuNDgsNzQuNTksNzAuMjgsMTMzLjY1LDcwLjI4LDQ1LjI4LDAsNTYuNy0yMC40OCw1Ni43LTM2LjY1LDAtMTIuNzItMTEtMjcuMTYtMzAuODMtMzUuNzgiLz48L3N2Zz4=
  labels:
    - lab-judge
  artifacts:
    - protocol: oci
      tags: ["1.4"]
      uri: registry.redhat.io/rhelai1/prometheus-8x7b-v2-0:1.4
diff --git a/granite.yaml b/granite.yaml
 source: Red Hat
 models:
 - repository: rhelai1
  name: granite-8b-code-base
  provider: Red Hat
  description: A decoder-only code model designed for code generative tasks
  longDescription: |-
    Granite-8B-Code-Base is a decoder-only code model designed for code
    generative tasks (e.g., code generation, code explanation, code fixing,
    etc.). It is trained from scratch with a two-phase training strategy. In
    phase 1, our model is trained on 4 trillion tokens sourced from 116
    programming languages, ensuring a comprehensive understanding of
    programming languages and syntax. In phase 2, our model is trained on 500
    billion tokens with a carefully designed mixture of high-quality data from
    code and natural language domains to improve the models’ ability to reason
    and follow instructions.
  readme: |-
    # Granite-3.1-8B-Base

    **Model Summary:**
    Granite-3.1-8B-Base extends the context length of Granite-3.0-8B-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K. This long-context pre-training stage was performed using approximately 500B tokens.

    ...

  logo: data:image/gif;base64,R0lGODlhIAAgAMZLAAAAAAEAAAIAAAQAAAYAAAcAAAgAAAoAAAsAAA0AAA4AAA8AABEAABMAABwAAB4AAB8AACAAACQAACYAACkAAC8AADAAADsAAEIAAEMAAEkAAEoAAE4AAFAAAFYAAFkAAF8AAHAAAHMAAHQAAHsAAHwAAIcAAJMAAJcAAJoAAKYAAKwAAK0AAK4AALAAALYAALcAALkAALoAAL4AAMAAAMUAAMgAAMsAANAAANEAANcAANgAANoAANwAAOEAAOUAAOcAAOgAAOkAAOoAAOwAAO0AAO4AAO8AAPAAAPEAAPIAAP///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////yH5BAEKAH8ALAAAAAAgACAAAAf3gH+Cg4SFhoeIiYqLjI2Oj5CCRkaDk5SRhZaam5iSm5+TkaCjl46ko4+noKaqn441Sa2cixobPbGyoYeWRB0AHTm4sruWRyEAABYqQ8KkRYabSDYTyAkfMUJKSKNHQCaZ0SwRyAAMHCk4QklKSknrSkQpGj6EoEgyFwLkAw8aIygsYMRwccKDAgo9jtT7lIRHCQgByCELQAABAgMRAWD4caQUtyM3SFQwILEkAAcrtun6QwoJkh4sRGSQ0OBAAQMLJoCY0VFTJVJH3AXRUeNFCxc0dhRpttLTqaDvkvR0BS7XKURWUS3KaimV1U4sSYEdS7as2bKBAAA7
  language: ["ar", "cs", "de", "en", "es", "fr", "it", "ja", "ko", "nl", "pt", "zh"]
  license: apache-2.0
  licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt
  maturity: Generally Available
  libraryName: transformers
  labels:
    - language
    - granite-3.1
    - lab-base
  tasks:
    - text-generation
  createTimeSinceEpoch: 1733514949000
  lastUpdateTimeSinceEpoch: 1734637721000
  artifacts:
    - protocol: oci
      createTimeSinceEpoch: 1733514949000
      tags: ["1.3.0"]
      uri: oci://registry.redhat.io/rhelai1/granite-8b-code-base:1.3-1732870892
 - repository: rhelai1
  name: granite-8b-code-instruct
  provider: Red Hat
  description: A fine-tuned model based on Granite 8B Code Base
  longDescription: |-
    Granite-8B-Code-Instruct is a 8B parameter model fine tuned from
    Granite-8B-Code-Base on a combination of permissively licensed instruction
    data to enhance instruction following capabilities including logical
    reasoning and problem-solving skills.
  logo: data:image/gif;base64,R0lGODlhIAAgAMZLAAAAAAEAAAIAAAQAAAYAAAcAAAgAAAoAAAsAAA0AAA4AAA8AABEAABMAABwAAB4AAB8AACAAACQAACYAACkAAC8AADAAADsAAEIAAEMAAEkAAEoAAE4AAFAAAFYAAFkAAF8AAHAAAHMAAHQAAHsAAHwAAIcAAJMAAJcAAJoAAKYAAKwAAK0AAK4AALAAALYAALcAALkAALoAAL4AAMAAAMUAAMgAAMsAANAAANEAANcAANgAANoAANwAAOEAAOUAAOcAAOgAAOkAAOoAAOwAAO0AAO4AAO8AAPAAAPEAAPIAAP///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////yH5BAEKAH8ALAAAAAAgACAAAAf3gH+Cg4SFhoeIiYqLjI2Oj5CCRkaDk5SRhZaam5iSm5+TkaCjl46ko4+noKaqn441Sa2cixobPbGyoYeWRB0AHTm4sruWRyEAABYqQ8KkRYabSDYTyAkfMUJKSKNHQCaZ0SwRyAAMHCk4QklKSknrSkQpGj6EoEgyFwLkAw8aIygsYMRwccKDAgo9jtT7lIRHCQgByCELQAABAgMRAWD4caQUtyM3SFQwILEkAAcrtun6QwoJkh4sRGSQ0OBAAQMLJoCY0VFTJVJH3AXRUeNFCxc0dhRpttLTqaDvkvR0BS7XKURWUS3KaimV1U4sSYEdS7as2bKBAAA7
  readme: |-
    # Granite-3.1-8B-Instruct

    **Model Summary:**
    Granite-3.1-8B-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-8B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems. This model is developed using a diverse set of techniques with a structured chat format, including supervised finetuning, model alignment using reinforcement learning, and model merging.

    ...

  language: ["ar", "cs", "de", "en", "es", "fr", "it", "ja", "ko", "nl", "pt", "zh"]
  license: apache-2.0
  licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt
  maturity: Generally Available
  libraryName: transformers
  baseModel:
    - repository: rhelai1
      name: granite-8b-code-base
  labels:
    - language
    - granite-3.1
  tasks:
    - text-generation
  createTimeSinceEpoch: 1733514949000
  lastUpdateTimeSinceEpoch: 1734637721000
  artifacts:
    - protocol: oci
      createTimeSinceEpoch: 1733514949000
      tags: ["1.3.0"]
      uri: oci://registry.redhat.io/rhelai1/granite-8b-code-instruct:1.3-1732870892
diff --git a/rhelai.yaml b/rhelai.yaml
 source: Red Hat
 models:
 - repository: rhelai1
  name: granite-7b-starter
  description: Base model for customizing and fine-tuning
  longDescription: >-
    A custom Red Hat base model instruct tuned only for phase 00, produced by IBM Research specifically for RHEL AI.
  license: apache-2.0
  licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt
  maturity: General availability
  provider: Red Hat
  logo: data:image/svg+xml;base64,PHN2ZyBpZD0iTGF5ZXJfMSIgZGF0YS1uYW1lPSJMYXllciAxIiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAxOTIgMTQ1Ij48ZGVmcz48c3R5bGU+LmNscy0xe2ZpbGw6I2UwMDt9PC9zdHlsZT48L2RlZnM+PHRpdGxlPlJlZEhhdC1Mb2dvLUhhdC1Db2xvcjwvdGl0bGU+PHBhdGggZD0iTTE1Ny43Nyw2Mi42MWExNCwxNCwwLDAsMSwuMzEsMy40MmMwLDE0Ljg4LTE4LjEsMTcuNDYtMzAuNjEsMTcuNDZDNzguODMsODMuNDksNDIuNTMsNTMuMjYsNDIuNTMsNDRhNi40Myw2LjQzLDAsMCwxLC4yMi0xLjk0bC0zLjY2LDkuMDZhMTguNDUsMTguNDUsMCwwLDAtMS41MSw3LjMzYzAsMTguMTEsNDEsNDUuNDgsODcuNzQsNDUuNDgsMjAuNjksMCwzNi40My03Ljc2LDM2LjQzLTIxLjc3LDAtMS4wOCwwLTEuOTQtMS43My0xMC4xM1oiLz48cGF0aCBjbGFzcz0iY2xzLTEiIGQ9Ik0xMjcuNDcsODMuNDljMTIuNTEsMCwzMC42MS0yLjU4LDMwLjYxLTE3LjQ2YTE0LDE0LDAsMCwwLS4zMS0zLjQybC03LjQ1LTMyLjM2Yy0xLjcyLTcuMTItMy4yMy0xMC4zNS0xNS43My0xNi42QzEyNC44OSw4LjY5LDEwMy43Ni41LDk3LjUxLjUsOTEuNjkuNSw5MCw4LDgzLjA2LDhjLTYuNjgsMC0xMS42NC01LjYtMTcuODktNS42LTYsMC05LjkxLDQuMDktMTIuOTMsMTIuNSwwLDAtOC40MSwyMy43Mi05LjQ5LDI3LjE2QTYuNDMsNi40MywwLDAsMCw0Mi41Myw0NGMwLDkuMjIsMzYuMywzOS40NSw4NC45NCwzOS40NU0xNjAsNzIuMDdjMS43Myw4LjE5LDEuNzMsOS4wNSwxLjczLDEwLjEzLDAsMTQtMTUuNzQsMjEuNzctMzYuNDMsMjEuNzdDNzguNTQsMTA0LDM3LjU4LDc2LjYsMzcuNTgsNTguNDlhMTguNDUsMTguNDUsMCwwLDEsMS41MS03LjMzQzIyLjI3LDUyLC41LDU1LC41LDc0LjIyYzAsMzEuNDgsNzQuNTksNzAuMjgsMTMzLjY1LDcwLjI4LDQ1LjI4LDAsNTYuNy0yMC40OCw1Ni43LTM2LjY1LDAtMTIuNzItMTEtMjcuMTYtMzAuODMtMzUuNzgiLz48L3N2Zz4=
  labels:
    - lab-base
  tasks:
    - text-generation
  languages:
    - en
  readme: |-
    # Model Card for Granite-7b-starter [Paper](https://arxiv.org/abs/2403.01081) 

    ### Overview

    Granite-7b-starter is a starting student model built for InstructLab, based on Granite-7b-base. This model can be used to create LAB models via InstructLab.

    ### Method

    LAB: **L**arge-scale **A**lignment for chat**B**ots is a novel synthetic data-based alignment tuning method for LLMs from IBM Research. Granite-7b-starter is a Granite-7b-base derivative model for training with the LAB methodology, using Mixtral-8x7b-Instruct as a teacher model.

    LAB consists of three key components:

    1. Taxonomy-driven data curation process
    2. Large-scale synthetic data generator
    3. Two-phased-training with replay buffers

    LAB approach allows for adding new knowledge and skills, in an incremental fashion, to an already pre-trained model without suffering from catastrophic forgetting.

    Taxonomy is a tree of seed examples that are used to prompt a teacher model to generate synthetic data. Taxonomy allows the data curator or the model designer to easily specify a diverse set of the knowledge-domains and skills that they would like to include in their LLM. At a high level, these can be categorized into three high-level bins - knowledge, foundational skills, and compositional skills. The leaf nodes of the taxonomy are tasks associated with one or more seed examples.

    During the synthetic data generation, **unlike previous approaches where seed examples are uniformly drawn from the entire pool (i.e. self-instruct), we use the taxonomy to drive the sampling process**: For each knowledge/skill, we only use the local examples within the leaf node as seeds to prompt the teacher model.
    This makes the teacher model better exploit the task distributions defined by the local examples of each node and the diversity in the taxonomy itself ensures the entire generation covers a wide range of tasks, as illustrated below. In turns, this allows for using Mixtral 8x7B as the teacher model for generation while performing very competitively with models such as ORCA-2, WizardLM, and Zephyr Beta that rely on synthetic data generated by much larger and capable models like GPT-4.

    For adding new domain-specific knowledge, we provide an external knowledge source (document) and prompt the model to generate questions and answers based on the document.
    Foundational skills such as reasoning and compositional skills such as creative writing are generated through in-context learning using the seed examples from the taxonomy. 

    Additionally, to ensure the data is high-quality and safe, we employ steps to check the questions and answers to ensure that they are grounded and safe. This is done using the same teacher model that generated the data. 

    Our training consists of two major phases: knowledge tuning and skills tuning. 
    There are two steps in knowledge tuning where the first step learns simple knowledge (short samples) and the second step learns complicated knowledge (longer samples).
    The second step uses replay a replay buffer with data from the first step.
    Both foundational skills and compositional skills are learned during the skills tuning phases, where a replay buffer of data from the knowledge phase is used.
    Importantly, we use a set of hyper-parameters for training that are very different from standard small-scale supervised fine-training: larger batch size and carefully optimized learning rate and scheduler.

    ## Model description
    - **Model Name**: Granite-7b-starter
    - **Language(s):** Primarily English
    - **License:** Apache 2.0
    - **Base model:** [ibm/granite-7b-base](https://huggingface.co/ibm/granite-7b-base)
    - **Teacher Model:** [mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1)

    ## Prompt Template

    ```python
    sys_prompt = "I am, Red Hat® Starter Model based on Granite 7B, an AI language model developed by Red Hat and IBM Research, based on the Granite-7b-base language model.  My primary function is to provide a base model for InstructLab model alignment based on the LAB (Large-scale Alignment for chatBots) methodology."

    prompt = f'<|system|>\n{sys_prompt}\n<|user|>\n{inputs}\n<|assistant|>\n'
    stop_token = '<|endoftext|>'
    ```

    We advise utilizing the system prompt employed during the model's training for optimal inference performance, as there could be performance variations based on the provided instructions. 


    **Bias, Risks, and Limitations**

    Granite-7b-starter is a base model and has not undergone any safety alignment, there it may produce problematic outputs. In the absence of adequate safeguards and RLHF, there exists a risk of malicious utilization of these models for generating disinformation or harmful content. Caution is urged against complete reliance on a specific language model for crucial decisions or impactful information, as preventing these models from fabricating content is not straightforward. Additionally, it remains uncertain whether smaller models might exhibit increased susceptibility to hallucination in ungrounded generation scenarios due to their reduced sizes and memorization capacities. This aspect is currently an active area of research, and we anticipate more rigorous exploration, comprehension, and mitigations in this domain.

  createTimeSinceEpoch: 1739210683000
  lastUpdateTimeSinceEpoch: 1739210683000
  artifacts:
    - protocol: oci
      tags: ["1.4.0"]
      uri: oci://registry.redhat.io/rhelai1/modelcar-granite-7b-starter:1.4.0

 - repository: rhelai1
  name: granite-7b-redhat-lab
  description: Granite model for inference serving
  longDescription: >-
    LAB: Large-scale Alignment for chatBots is a novel synthetic data-based alignment tuning method for LLMs from IBM Research. Granite-7b-lab is a Granite-7b-base derivative model trained with the LAB methodology, using Mixtral-8x7b-Instruct as a teacher model.
  license: apache-2.0
  licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt
  maturity: General availability
  provider: Red Hat
  logo: data:image/svg+xml;base64,PHN2ZyBpZD0iTGF5ZXJfMSIgZGF0YS1uYW1lPSJMYXllciAxIiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAxOTIgMTQ1Ij48ZGVmcz48c3R5bGU+LmNscy0xe2ZpbGw6I2UwMDt9PC9zdHlsZT48L2RlZnM+PHRpdGxlPlJlZEhhdC1Mb2dvLUhhdC1Db2xvcjwvdGl0bGU+PHBhdGggZD0iTTE1Ny43Nyw2Mi42MWExNCwxNCwwLDAsMSwuMzEsMy40MmMwLDE0Ljg4LTE4LjEsMTcuNDYtMzAuNjEsMTcuNDZDNzguODMsODMuNDksNDIuNTMsNTMuMjYsNDIuNTMsNDRhNi40Myw2LjQzLDAsMCwxLC4yMi0xLjk0bC0zLjY2LDkuMDZhMTguNDUsMTguNDUsMCwwLDAtMS41MSw3LjMzYzAsMTguMTEsNDEsNDUuNDgsODcuNzQsNDUuNDgsMjAuNjksMCwzNi40My03Ljc2LDM2LjQzLTIxLjc3LDAtMS4wOCwwLTEuOTQtMS43My0xMC4xM1oiLz48cGF0aCBjbGFzcz0iY2xzLTEiIGQ9Ik0xMjcuNDcsODMuNDljMTIuNTEsMCwzMC42MS0yLjU4LDMwLjYxLTE3LjQ2YTE0LDE0LDAsMCwwLS4zMS0zLjQybC03LjQ1LTMyLjM2Yy0xLjcyLTcuMTItMy4yMy0xMC4zNS0xNS43My0xNi42QzEyNC44OSw4LjY5LDEwMy43Ni41LDk3LjUxLjUsOTEuNjkuNSw5MCw4LDgzLjA2LDhjLTYuNjgsMC0xMS42NC01LjYtMTcuODktNS42LTYsMC05LjkxLDQuMDktMTIuOTMsMTIuNSwwLDAtOC40MSwyMy43Mi05LjQ5LDI3LjE2QTYuNDMsNi40MywwLDAsMCw0Mi41Myw0NGMwLDkuMjIsMzYuMywzOS40NSw4NC45NCwzOS40NU0xNjAsNzIuMDdjMS43Myw4LjE5LDEuNzMsOS4wNSwxLjczLDEwLjEzLDAsMTQtMTUuNzQsMjEuNzctMzYuNDMsMjEuNzdDNzguNTQsMTA0LDM3LjU4LDc2LjYsMzcuNTgsNTguNDlhMTguNDUsMTguNDUsMCwwLDEsMS41MS03LjMzQzIyLjI3LDUyLC41LDU1LC41LDc0LjIyYzAsMzEuNDgsNzQuNTksNzAuMjgsMTMzLjY1LDcwLjI4LDQ1LjI4LDAsNTYuNy0yMC40OCw1Ni43LTM2LjY1LDAtMTIuNzItMTEtMjcuMTYtMzAuODMtMzUuNzgiLz48L3N2Zz4=
  createTimeSinceEpoch: 1739210683000
  lastUpdateTimeSinceEpoch: 1739210683000
  tasks:
    - text-generation
  languages:
    - en
  readme: |-
    # Model Card for Granite-7b-redhat-lab [Paper](https://arxiv.org/abs/2403.01081) 

    ### Overview

    Granite-7b-redhat-lab is an instruction-tuned LAB model built via InstructLab, based on Granite-7b-base

    ### Method

    LAB: **L**arge-scale **A**lignment for chat**B**ots is a novel synthetic data-based alignment tuning method for LLMs from IBM Research. Granite-7b-redhat-lab is a Granite-7b-base derivative model trained with the LAB methodology, using Mixtral-8x7b-Instruct as a teacher model.

    LAB consists of three key components:

    1. Taxonomy-driven data curation process
    2. Large-scale synthetic data generator
    3. Two-phased-training with replay buffers

    LAB approach allows for adding new knowledge and skills, in an incremental fashion, to an already pre-trained model without suffering from catastrophic forgetting.

    Taxonomy is a tree of seed examples that are used to prompt a teacher model to generate synthetic data. Taxonomy allows the data curator or the model designer to easily specify a diverse set of the knowledge-domains and skills that they would like to include in their LLM. At a high level, these can be categorized into three high-level bins - knowledge, foundational skills, and compositional skills. The leaf nodes of the taxonomy are tasks associated with one or more seed examples.

    During the synthetic data generation, **unlike previous approaches where seed examples are uniformly drawn from the entire pool (i.e. self-instruct), we use the taxonomy to drive the sampling process**: For each knowledge/skill, we only use the local examples within the leaf node as seeds to prompt the teacher model.
    This makes the teacher model better exploit the task distributions defined by the local examples of each node and the diversity in the taxonomy itself ensures the entire generation covers a wide range of tasks, as illustrated below. In turns, this allows for using Mixtral 8x7B as the teacher model for generation while performing very competitively with models such as ORCA-2, WizardLM, and Zephyr Beta that rely on synthetic data generated by much larger and capable models like GPT-4.

    For adding new domain-specific knowledge, we provide an external knowledge source (document) and prompt the model to generate questions and answers based on the document.
    Foundational skills such as reasoning and compositional skills such as creative writing are generated through in-context learning using the seed examples from the taxonomy. 

    Additionally, to ensure the data is high-quality and safe, we employ steps to check the questions and answers to ensure that they are grounded and safe. This is done using the same teacher model that generated the data. 

    Our training consists of two major phases: knowledge tuning and skills tuning. 
    There are two steps in knowledge tuning where the first step learns simple knowledge (short samples) and the second step learns complicated knowledge (longer samples).
    The second step uses replay a replay buffer with data from the first step.
    Both foundational skills and compositional skills are learned during the skills tuning phases, where a replay buffer of data from the knowledge phase is used.
    Importantly, we use a set of hyper-parameters for training that are very different from standard small-scale supervised fine-training: larger batch size and carefully optimized learning rate and scheduler.

    ## Model description
    - **Model Name**: Granite-7b-redhat-lab
    - **Language(s):** Primarily English
    - **License:** Apache 2.0
    - **Base model:** [ibm/granite-7b-base](https://huggingface.co/ibm/granite-7b-base)
    - **Teacher Model:** [mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1)

    ## Prompt Template

    ```python
    sys_prompt = "I am, Red Hat® Instruct Model based on Granite 7B, an AI language model developed by Red Hat and IBM Research, based on the Granite-7b-base language model.  My primary function is to be a chat assistant."

    prompt = f'<|system|>\n{sys_prompt}\n<|user|>\n{inputs}\n<|assistant|>\n'
    stop_token = '<|endoftext|>'
    ```

    We advise utilizing the system prompt employed during the model's training for optimal inference performance, as there could be performance variations based on the provided instructions. 


    **Bias, Risks, and Limitations**

    Granite-7b-redhat-lab is a base model and has not undergone any safety alignment, there it may produce problematic outputs. In the absence of adequate safeguards and RLHF, there exists a risk of malicious utilization of these models for generating disinformation or harmful content. Caution is urged against complete reliance on a specific language model for crucial decisions or impactful information, as preventing these models from fabricating content is not straightforward. Additionally, it remains uncertain whether smaller models might exhibit increased susceptibility to hallucination in ungrounded generation scenarios due to their reduced sizes and memorization capacities. This aspect is currently an active area of research, and we anticipate more rigorous exploration, comprehension, and mitigations in this domain.

  artifacts:
    - protocol: oci
      tags: ["1.4.0"]
      uri: oci://registry.redhat.io/rhelai1/modelcar-granite-7b-redhat-lab:1.4.0

 - repository: rhelai1
  name: granite-8b-starter-v1
  description: Base model for customizing and fine-tuning
  longDescription: >-
    A custom Red Hat base model instruct tuned only for phase 00, produced by IBM Research specifically for RHEL AI.
  license: apache-2.0
  licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt
  maturity: General availability
  provider: Red Hat
  logo: data:image/svg+xml;base64,PHN2ZyBpZD0iTGF5ZXJfMSIgZGF0YS1uYW1lPSJMYXllciAxIiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAxOTIgMTQ1Ij48ZGVmcz48c3R5bGU+LmNscy0xe2ZpbGw6I2UwMDt9PC9zdHlsZT48L2RlZnM+PHRpdGxlPlJlZEhhdC1Mb2dvLUhhdC1Db2xvcjwvdGl0bGU+PHBhdGggZD0iTTE1Ny43Nyw2Mi42MWExNCwxNCwwLDAsMSwuMzEsMy40MmMwLDE0Ljg4LTE4LjEsMTcuNDYtMzAuNjEsMTcuNDZDNzguODMsODMuNDksNDIuNTMsNTMuMjYsNDIuNTMsNDRhNi40Myw2LjQzLDAsMCwxLC4yMi0xLjk0bC0zLjY2LDkuMDZhMTguNDUsMTguNDUsMCwwLDAtMS41MSw3LjMzYzAsMTguMTEsNDEsNDUuNDgsODcuNzQsNDUuNDgsMjAuNjksMCwzNi40My03Ljc2LDM2LjQzLTIxLjc3LDAtMS4wOCwwLTEuOTQtMS43My0xMC4xM1oiLz48cGF0aCBjbGFzcz0iY2xzLTEiIGQ9Ik0xMjcuNDcsODMuNDljMTIuNTEsMCwzMC42MS0yLjU4LDMwLjYxLTE3LjQ2YTE0LDE0LDAsMCwwLS4zMS0zLjQybC03LjQ1LTMyLjM2Yy0xLjcyLTcuMTItMy4yMy0xMC4zNS0xNS43My0xNi42QzEyNC44OSw4LjY5LDEwMy43Ni41LDk3LjUxLjUsOTEuNjkuNSw5MCw4LDgzLjA2LDhjLTYuNjgsMC0xMS42NC01LjYtMTcuODktNS42LTYsMC05LjkxLDQuMDktMTIuOTMsMTIuNSwwLDAtOC40MSwyMy43Mi05LjQ5LDI3LjE2QTYuNDMsNi40MywwLDAsMCw0Mi41Myw0NGMwLDkuMjIsMzYuMywzOS40NSw4NC45NCwzOS40NU0xNjAsNzIuMDdjMS43Myw4LjE5LDEuNzMsOS4wNSwxLjczLDEwLjEzLDAsMTQtMTUuNzQsMjEuNzctMzYuNDMsMjEuNzdDNzguNTQsMTA0LDM3LjU4LDc2LjYsMzcuNTgsNTguNDlhMTguNDUsMTguNDUsMCwwLDEsMS41MS03LjMzQzIyLjI3LDUyLC41LDU1LC41LDc0LjIyYzAsMzEuNDgsNzQuNTksNzAuMjgsMTMzLjY1LDcwLjI4LDQ1LjI4LDAsNTYuNy0yMC40OCw1Ni43LTM2LjY1LDAtMTIuNzItMTEtMjcuMTYtMzAuODMtMzUuNzgiLz48L3N2Zz4=
  labels:
    - lab-base
  tasks:
    - text-generation
  languages:
    - en
  readme: |-
    # Model Card for Granite-8b-starter-v1 [Paper](https://arxiv.org/abs/2403.01081) 

    ### Overview

    Granite-8b-starter-v1 is a starting student model built for InstructLab, based on Granite-3.0-8b-base. This model can be used to create LAB models via InstructLab.

    ### Method

    LAB: **L**arge-scale **A**lignment for chat**B**ots is a novel synthetic data-based alignment tuning method for LLMs from IBM Research. Granite-8b-starter-v1 is a Granite-3.0-8b-base derivative model for training with the LAB methodology, using Mixtral-8x7b-Instruct as a teacher model.

    LAB consists of three key components:

    1. Taxonomy-driven data curation process
    2. Large-scale synthetic data generator
    3. Two-phased-training with replay buffers

    LAB approach allows for adding new knowledge and skills, in an incremental fashion, to an already pre-trained model without suffering from catastrophic forgetting.

    Taxonomy is a tree of seed examples that are used to prompt a teacher model to generate synthetic data. Taxonomy allows the data curator or the model designer to easily specify a diverse set of the knowledge-domains and skills that they would like to include in their LLM. At a high level, these can be categorized into three high-level bins - knowledge, foundational skills, and compositional skills. The leaf nodes of the taxonomy are tasks associated with one or more seed examples.

    During the synthetic data generation, **unlike previous approaches where seed examples are uniformly drawn from the entire pool (i.e. self-instruct), we use the taxonomy to drive the sampling process**: For each knowledge/skill, we only use the local examples within the leaf node as seeds to prompt the teacher model.
    This makes the teacher model better exploit the task distributions defined by the local examples of each node and the diversity in the taxonomy itself ensures the entire generation covers a wide range of tasks, as illustrated below. In turns, this allows for using Mixtral 8x7B as the teacher model for generation while performing very competitively with models such as ORCA-2, WizardLM, and Zephyr Beta that rely on synthetic data generated by much larger and capable models like GPT-4.

    For adding new domain-specific knowledge, we provide an external knowledge source (document) and prompt the model to generate questions and answers based on the document.
    Foundational skills such as reasoning and compositional skills such as creative writing are generated through in-context learning using the seed examples from the taxonomy. 

    Additionally, to ensure the data is high-quality and safe, we employ steps to check the questions and answers to ensure that they are grounded and safe. This is done using the same teacher model that generated the data. 

    Our training consists of two major phases: knowledge tuning and skills tuning. 
    There are two steps in knowledge tuning where the first step learns simple knowledge (short samples) and the second step learns complicated knowledge (longer samples).
    The second step uses replay a replay buffer with data from the first step.
    Both foundational skills and compositional skills are learned during the skills tuning phases, where a replay buffer of data from the knowledge phase is used.
    Importantly, we use a set of hyper-parameters for training that are very different from standard small-scale supervised fine-training: larger batch size and carefully optimized learning rate and scheduler.

    ## Model description
    - **Model Name**: Granite-8b-starter-v1
    - **Language(s):** Primarily English
    - **License:** Apache 2.0
    - **Base model:** [ibm-granite/granite-3.0-8b-base](https://huggingface.co/ibm-granite/granite-3.0-8b-base)
    - **Teacher Model:** [mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1)

    ## Prompt Template

    ```python
    sys_prompt = "I am a Red Hat® Starter Model, an AI language model developed by Red Hat and IBM Research based on the granite-3.0-8b-base model. My primary function is to provide a base model for InstructLab model alignment based on the LAB (Large-scale Alignment for chatBots) methodology."

    prompt = f'<|system|>\n{sys_prompt}\n<|user|>\n{inputs}\n<|assistant|>\n'
    stop_token = '<|endoftext|>'
    ```

    We advise utilizing the system prompt employed during the model's training for optimal inference performance, as there could be performance variations based on the provided instructions. 


    **Bias, Risks, and Limitations**

    Granite-8b-starter-v1 is a base model and has not undergone any safety alignment, there it may produce problematic outputs. In the absence of adequate safeguards and RLHF, there exists a risk of malicious utilization of these models for generating disinformation or harmful content. Caution is urged against complete reliance on a specific language model for crucial decisions or impactful information, as preventing these models from fabricating content is not straightforward. Additionally, it remains uncertain whether smaller models might exhibit increased susceptibility to hallucination in ungrounded generation scenarios due to their reduced sizes and memorization capacities. This aspect is currently an active area of research, and we anticipate more rigorous exploration, comprehension, and mitigations in this domain.
  createTimeSinceEpoch: 1739210683000
  lastUpdateTimeSinceEpoch: 1739210683000
  artifacts:
    - protocol: oci
      tags: ["1.4.0"]
      uri: oci://registry.redhat.io/rhelai1/modelcar-granite-8b-starter-v1:1.4.0

 - repository: rhelai1
  name: granite-8b-lab-v1
  description: Granite model for inference serving
  longDescription: >-
    LAB: Large-scale Alignment for chatBots is a novel synthetic data-based alignment tuning method for LLMs from IBM Research. Granite-8b-lab-v1 is a Granite-8b-base derivative model trained with the LAB methodology, using Mixtral-8x7b-Instruct as a teacher model.
  license: apache-2.0
  licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt
  maturity: General availability
  provider: Red Hat
  logo: data:image/svg+xml;base64,PHN2ZyBpZD0iTGF5ZXJfMSIgZGF0YS1uYW1lPSJMYXllciAxIiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAxOTIgMTQ1Ij48ZGVmcz48c3R5bGU+LmNscy0xe2ZpbGw6I2UwMDt9PC9zdHlsZT48L2RlZnM+PHRpdGxlPlJlZEhhdC1Mb2dvLUhhdC1Db2xvcjwvdGl0bGU+PHBhdGggZD0iTTE1Ny43Nyw2Mi42MWExNCwxNCwwLDAsMSwuMzEsMy40MmMwLDE0Ljg4LTE4LjEsMTcuNDYtMzAuNjEsMTcuNDZDNzguODMsODMuNDksNDIuNTMsNTMuMjYsNDIuNTMsNDRhNi40Myw2LjQzLDAsMCwxLC4yMi0xLjk0bC0zLjY2LDkuMDZhMTguNDUsMTguNDUsMCwwLDAtMS41MSw3LjMzYzAsMTguMTEsNDEsNDUuNDgsODcuNzQsNDUuNDgsMjAuNjksMCwzNi40My03Ljc2LDM2LjQzLTIxLjc3LDAtMS4wOCwwLTEuOTQtMS43My0xMC4xM1oiLz48cGF0aCBjbGFzcz0iY2xzLTEiIGQ9Ik0xMjcuNDcsODMuNDljMTIuNTEsMCwzMC42MS0yLjU4LDMwLjYxLTE3LjQ2YTE0LDE0LDAsMCwwLS4zMS0zLjQybC03LjQ1LTMyLjM2Yy0xLjcyLTcuMTItMy4yMy0xMC4zNS0xNS43My0xNi42QzEyNC44OSw4LjY5LDEwMy43Ni41LDk3LjUxLjUsOTEuNjkuNSw5MCw4LDgzLjA2LDhjLTYuNjgsMC0xMS42NC01LjYtMTcuODktNS42LTYsMC05LjkxLDQuMDktMTIuOTMsMTIuNSwwLDAtOC40MSwyMy43Mi05LjQ5LDI3LjE2QTYuNDMsNi40MywwLDAsMCw0Mi41Myw0NGMwLDkuMjIsMzYuMywzOS40NSw4NC45NCwzOS40NU0xNjAsNzIuMDdjMS43Myw4LjE5LDEuNzMsOS4wNSwxLjczLDEwLjEzLDAsMTQtMTUuNzQsMjEuNzctMzYuNDMsMjEuNzdDNzguNTQsMTA0LDM3LjU4LDc2LjYsMzcuNTgsNTguNDlhMTguNDUsMTguNDUsMCwwLDEsMS41MS03LjMzQzIyLjI3LDUyLC41LDU1LC41LDc0LjIyYzAsMzEuNDgsNzQuNTksNzAuMjgsMTMzLjY1LDcwLjI4LDQ1LjI4LDAsNTYuNy0yMC40OCw1Ni43LTM2LjY1LDAtMTIuNzItMTEtMjcuMTYtMzAuODMtMzUuNzgiLz48L3N2Zz4=
  tasks:
    - text-generation
  languages:
    - en
  readme: |-
    # Model Card for Granite-8b-lab-v1 [Paper](https://arxiv.org/abs/2403.01081) 

    ### Overview

    Granite-8b-lab-v1 is an instruction-tuned LAB model built via InstructLab, based on Granite-3.0-8b-base

    ### Method

    LAB: **L**arge-scale **A**lignment for chat**B**ots is a novel synthetic data-based alignment tuning method for LLMs from IBM Research. Granite-8b-lab-v1 is a Granite-3.0-8b-base derivative model trained with the LAB methodology, using Mixtral-8x7b-Instruct as a teacher model.

    LAB consists of three key components:

    1. Taxonomy-driven data curation process
    2. Large-scale synthetic data generator
    3. Two-phased-training with replay buffers

    LAB approach allows for adding new knowledge and skills, in an incremental fashion, to an already pre-trained model without suffering from catastrophic forgetting.

    Taxonomy is a tree of seed examples that are used to prompt a teacher model to generate synthetic data. Taxonomy allows the data curator or the model designer to easily specify a diverse set of the knowledge-domains and skills that they would like to include in their LLM. At a high level, these can be categorized into three high-level bins - knowledge, foundational skills, and compositional skills. The leaf nodes of the taxonomy are tasks associated with one or more seed examples.

    During the synthetic data generation, **unlike previous approaches where seed examples are uniformly drawn from the entire pool (i.e. self-instruct), we use the taxonomy to drive the sampling process**: For each knowledge/skill, we only use the local examples within the leaf node as seeds to prompt the teacher model.
    This makes the teacher model better exploit the task distributions defined by the local examples of each node and the diversity in the taxonomy itself ensures the entire generation covers a wide range of tasks, as illustrated below. In turns, this allows for using Mixtral 8x7B as the teacher model for generation while performing very competitively with models such as ORCA-2, WizardLM, and Zephyr Beta that rely on synthetic data generated by much larger and capable models like GPT-4.

    For adding new domain-specific knowledge, we provide an external knowledge source (document) and prompt the model to generate questions and answers based on the document.
    Foundational skills such as reasoning and compositional skills such as creative writing are generated through in-context learning using the seed examples from the taxonomy. 

    Additionally, to ensure the data is high-quality and safe, we employ steps to check the questions and answers to ensure that they are grounded and safe. This is done using the same teacher model that generated the data. 

    Our training consists of two major phases: knowledge tuning and skills tuning. 
    There are two steps in knowledge tuning where the first step learns simple knowledge (short samples) and the second step learns complicated knowledge (longer samples).
    The second step uses replay a replay buffer with data from the first step.
    Both foundational skills and compositional skills are learned during the skills tuning phases, where a replay buffer of data from the knowledge phase is used.
    Importantly, we use a set of hyper-parameters for training that are very different from standard small-scale supervised fine-training: larger batch size and carefully optimized learning rate and scheduler.

    ## Model description
    - **Model Name**: Granite-8b-lab-v1
    - **Language(s):** Primarily English
    - **License:** Apache 2.0
    - **Base model:** [ibm-granite/granite-3.0-8b-base](https://huggingface.co/ibm-granite/granite-3.0-8b-base)
    - **Teacher Model:** [mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1)

    ## Prompt Template

    ```python
    sys_prompt = "I am a Red Hat® Instruct Model, an AI language model developed by Red Hat and IBM Research based on the granite-3.0-8b-base model. My primary role is to serve as a chat assistant."

    prompt = f'<|system|>\n{sys_prompt}\n<|user|>\n{inputs}\n<|assistant|>\n'
    stop_token = '<|endoftext|>'
    ```

    We advise utilizing the system prompt employed during the model's training for optimal inference performance, as there could be performance variations based on the provided instructions. 


    **Bias, Risks, and Limitations**

    Granite-8b-lab-v1 is a base model and has not undergone any safety alignment, there it may produce problematic outputs. In the absence of adequate safeguards and RLHF, there exists a risk of malicious utilization of these models for generating disinformation or harmful content. Caution is urged against complete reliance on a specific language model for crucial decisions or impactful information, as preventing these models from fabricating content is not straightforward. Additionally, it remains uncertain whether smaller models might exhibit increased susceptibility to hallucination in ungrounded generation scenarios due to their reduced sizes and memorization capacities. This aspect is currently an active area of research, and we anticipate more rigorous exploration, comprehension, and mitigations in this domain.
  createTimeSinceEpoch: 1739210683000
  lastUpdateTimeSinceEpoch: 1739210683000
  artifacts:
    - protocol: oci
      tags: ["1.4.0"]
      uri: oci://registry.redhat.io/rhelai1/modelcar-granite-8b-lab-v1:1.4.0

 - repository: rhelai1
  name: granite-8b-lab-v2-preview
  description: Preview of the version 2 8b Granite model for inference serving
  longDescription: >-
    LAB: Large-scale Alignment for chatBots is a novel synthetic data-based alignment tuning method for LLMs from IBM Research. Granite-8b-lab-v2-preview is a Granite-8b-base derivative model trained with the LAB methodology, using Mixtral-8x7b-Instruct as a teacher model.
  license: apache-2.0
  licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt
  maturity: General availability
  provider: Red Hat
  logo: data:image/svg+xml;base64,PHN2ZyBpZD0iTGF5ZXJfMSIgZGF0YS1uYW1lPSJMYXllciAxIiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAxOTIgMTQ1Ij48ZGVmcz48c3R5bGU+LmNscy0xe2ZpbGw6I2UwMDt9PC9zdHlsZT48L2RlZnM+PHRpdGxlPlJlZEhhdC1Mb2dvLUhhdC1Db2xvcjwvdGl0bGU+PHBhdGggZD0iTTE1Ny43Nyw2Mi42MWExNCwxNCwwLDAsMSwuMzEsMy40MmMwLDE0Ljg4LTE4LjEsMTcuNDYtMzAuNjEsMTcuNDZDNzguODMsODMuNDksNDIuNTMsNTMuMjYsNDIuNTMsNDRhNi40Myw2LjQzLDAsMCwxLC4yMi0xLjk0bC0zLjY2LDkuMDZhMTguNDUsMTguNDUsMCwwLDAtMS41MSw3LjMzYzAsMTguMTEsNDEsNDUuNDgsODcuNzQsNDUuNDgsMjAuNjksMCwzNi40My03Ljc2LDM2LjQzLTIxLjc3LDAtMS4wOCwwLTEuOTQtMS43My0xMC4xM1oiLz48cGF0aCBjbGFzcz0iY2xzLTEiIGQ9Ik0xMjcuNDcsODMuNDljMTIuNTEsMCwzMC42MS0yLjU4LDMwLjYxLTE3LjQ2YTE0LDE0LDAsMCwwLS4zMS0zLjQybC03LjQ1LTMyLjM2Yy0xLjcyLTcuMTItMy4yMy0xMC4zNS0xNS43My0xNi42QzEyNC44OSw4LjY5LDEwMy43Ni41LDk3LjUxLjUsOTEuNjkuNSw5MCw4LDgzLjA2LDhjLTYuNjgsMC0xMS42NC01LjYtMTcuODktNS42LTYsMC05LjkxLDQuMDktMTIuOTMsMTIuNSwwLDAtOC40MSwyMy43Mi05LjQ5LDI3LjE2QTYuNDMsNi40MywwLDAsMCw0Mi41Myw0NGMwLDkuMjIsMzYuMywzOS40NSw4NC45NCwzOS40NU0xNjAsNzIuMDdjMS43Myw4LjE5LDEuNzMsOS4wNSwxLjczLDEwLjEzLDAsMTQtMTUuNzQsMjEuNzctMzYuNDMsMjEuNzdDNzguNTQsMTA0LDM3LjU4LDc2LjYsMzcuNTgsNTguNDlhMTguNDUsMTguNDUsMCwwLDEsMS41MS03LjMzQzIyLjI3LDUyLC41LDU1LC41LDc0LjIyYzAsMzEuNDgsNzQuNTksNzAuMjgsMTMzLjY1LDcwLjI4LDQ1LjI4LDAsNTYuNy0yMC40OCw1Ni43LTM2LjY1LDAtMTIuNzItMTEtMjcuMTYtMzAuODMtMzUuNzgiLz48L3N2Zz4=
  createTimeSinceEpoch: 1739210683000
  lastUpdateTimeSinceEpoch: 1739210683000
  artifacts:
    - protocol: oci
      tags: ["1.4.0"]
      uri: oci://registry.redhat.io/rhelai1/modelcar-granite-8b-lab-v2-preview:1.4.0

 - repository: rhelai1
  name: granite-3.1-8b-starter-v1
  description: Version 1 of the Granite 3.1 base model for customizing and fine-tuning
  longDescription: >-
    A custom Red Hat base model instruct tuned only for phase 00, produced by IBM Research specifically for RHEL AI.
  license: apache-2.0
  licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt
  maturity: General availability
  provider: Red Hat
  logo: data:image/svg+xml;base64,PHN2ZyBpZD0iTGF5ZXJfMSIgZGF0YS1uYW1lPSJMYXllciAxIiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAxOTIgMTQ1Ij48ZGVmcz48c3R5bGU+LmNscy0xe2ZpbGw6I2UwMDt9PC9zdHlsZT48L2RlZnM+PHRpdGxlPlJlZEhhdC1Mb2dvLUhhdC1Db2xvcjwvdGl0bGU+PHBhdGggZD0iTTE1Ny43Nyw2Mi42MWExNCwxNCwwLDAsMSwuMzEsMy40MmMwLDE0Ljg4LTE4LjEsMTcuNDYtMzAuNjEsMTcuNDZDNzguODMsODMuNDksNDIuNTMsNTMuMjYsNDIuNTMsNDRhNi40Myw2LjQzLDAsMCwxLC4yMi0xLjk0bC0zLjY2LDkuMDZhMTguNDUsMTguNDUsMCwwLDAtMS41MSw3LjMzYzAsMTguMTEsNDEsNDUuNDgsODcuNzQsNDUuNDgsMjAuNjksMCwzNi40My03Ljc2LDM2LjQzLTIxLjc3LDAtMS4wOCwwLTEuOTQtMS43My0xMC4xM1oiLz48cGF0aCBjbGFzcz0iY2xzLTEiIGQ9Ik0xMjcuNDcsODMuNDljMTIuNTEsMCwzMC42MS0yLjU4LDMwLjYxLTE3LjQ2YTE0LDE0LDAsMCwwLS4zMS0zLjQybC03LjQ1LTMyLjM2Yy0xLjcyLTcuMTItMy4yMy0xMC4zNS0xNS43My0xNi42QzEyNC44OSw4LjY5LDEwMy43Ni41LDk3LjUxLjUsOTEuNjkuNSw5MCw4LDgzLjA2LDhjLTYuNjgsMC0xMS42NC01LjYtMTcuODktNS42LTYsMC05LjkxLDQuMDktMTIuOTMsMTIuNSwwLDAtOC40MSwyMy43Mi05LjQ5LDI3LjE2QTYuNDMsNi40MywwLDAsMCw0Mi41Myw0NGMwLDkuMjIsMzYuMywzOS40NSw4NC45NCwzOS40NU0xNjAsNzIuMDdjMS43Myw4LjE5LDEuNzMsOS4wNSwxLjczLDEwLjEzLDAsMTQtMTUuNzQsMjEuNzctMzYuNDMsMjEuNzdDNzguNTQsMTA0LDM3LjU4LDc2LjYsMzcuNTgsNTguNDlhMTguNDUsMTguNDUsMCwwLDEsMS41MS03LjMzQzIyLjI3LDUyLC41LDU1LC41LDc0LjIyYzAsMzEuNDgsNzQuNTksNzAuMjgsMTMzLjY1LDcwLjI4LDQ1LjI4LDAsNTYuNy0yMC40OCw1Ni43LTM2LjY1LDAtMTIuNzItMTEtMjcuMTYtMzAuODMtMzUuNzgiLz48L3N2Zz4=
  tasks:
    - text-generation
  languages:
    - en
  readme: |-
    # Model Card for Granite-8b-starter-v1 [Paper](https://arxiv.org/abs/2403.01081) 

    ### Overview

    Granite-8b-starter-v1 is a starting student model built for InstructLab, based on Granite-3.0-8b-base. This model can be used to create LAB models via InstructLab.

    ### Method

    LAB: **L**arge-scale **A**lignment for chat**B**ots is a novel synthetic data-based alignment tuning method for LLMs from IBM Research. Granite-8b-starter-v1 is a Granite-3.0-8b-base derivative model for training with the LAB methodology, using Mixtral-8x7b-Instruct as a teacher model.

    LAB consists of three key components:

    1. Taxonomy-driven data curation process
    2. Large-scale synthetic data generator
    3. Two-phased-training with replay buffers

    LAB approach allows for adding new knowledge and skills, in an incremental fashion, to an already pre-trained model without suffering from catastrophic forgetting.

    Taxonomy is a tree of seed examples that are used to prompt a teacher model to generate synthetic data. Taxonomy allows the data curator or the model designer to easily specify a diverse set of the knowledge-domains and skills that they would like to include in their LLM. At a high level, these can be categorized into three high-level bins - knowledge, foundational skills, and compositional skills. The leaf nodes of the taxonomy are tasks associated with one or more seed examples.

    During the synthetic data generation, **unlike previous approaches where seed examples are uniformly drawn from the entire pool (i.e. self-instruct), we use the taxonomy to drive the sampling process**: For each knowledge/skill, we only use the local examples within the leaf node as seeds to prompt the teacher model.
    This makes the teacher model better exploit the task distributions defined by the local examples of each node and the diversity in the taxonomy itself ensures the entire generation covers a wide range of tasks, as illustrated below. In turns, this allows for using Mixtral 8x7B as the teacher model for generation while performing very competitively with models such as ORCA-2, WizardLM, and Zephyr Beta that rely on synthetic data generated by much larger and capable models like GPT-4.

    For adding new domain-specific knowledge, we provide an external knowledge source (document) and prompt the model to generate questions and answers based on the document.
    Foundational skills such as reasoning and compositional skills such as creative writing are generated through in-context learning using the seed examples from the taxonomy. 

    Additionally, to ensure the data is high-quality and safe, we employ steps to check the questions and answers to ensure that they are grounded and safe. This is done using the same teacher model that generated the data. 

    Our training consists of two major phases: knowledge tuning and skills tuning. 
    There are two steps in knowledge tuning where the first step learns simple knowledge (short samples) and the second step learns complicated knowledge (longer samples).
    The second step uses replay a replay buffer with data from the first step.
    Both foundational skills and compositional skills are learned during the skills tuning phases, where a replay buffer of data from the knowledge phase is used.
    Importantly, we use a set of hyper-parameters for training that are very different from standard small-scale supervised fine-training: larger batch size and carefully optimized learning rate and scheduler.

    ## Model description
    - **Model Name**: Granite-8b-starter-v1
    - **Language(s):** Primarily English
    - **License:** Apache 2.0
    - **Base model:** [ibm-granite/granite-3.0-8b-base](https://huggingface.co/ibm-granite/granite-3.0-8b-base)
    - **Teacher Model:** [mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1)

    ## Prompt Template

    ```python
    sys_prompt = "I am a Red Hat® Starter Model, an AI language model developed by Red Hat and IBM Research based on the granite-3.0-8b-base model. My primary function is to provide a base model for InstructLab model alignment based on the LAB (Large-scale Alignment for chatBots) methodology."

    prompt = f'<|system|>\n{sys_prompt}\n<|user|>\n{inputs}\n<|assistant|>\n'
    stop_token = '<|endoftext|>'
    ```

    We advise utilizing the system prompt employed during the model's training for optimal inference performance, as there could be performance variations based on the provided instructions. 


    **Bias, Risks, and Limitations**

    Granite-8b-starter-v1 is a base model and has not undergone any safety alignment, there it may produce problematic outputs. In the absence of adequate safeguards and RLHF, there exists a risk of malicious utilization of these models for generating disinformation or harmful content. Caution is urged against complete reliance on a specific language model for crucial decisions or impactful information, as preventing these models from fabricating content is not straightforward. Additionally, it remains uncertain whether smaller models might exhibit increased susceptibility to hallucination in ungrounded generation scenarios due to their reduced sizes and memorization capacities. This aspect is currently an active area of research, and we anticipate more rigorous exploration, comprehension, and mitigations in this domain.
  createTimeSinceEpoch: 1739210683000
  lastUpdateTimeSinceEpoch: 1739210683000
  artifacts:
    - protocol: oci
      tags: ["1.4.0"]
      uri: oci://registry.redhat.io/rhelai1/modelcar-granite-3-1-8b-starter-v1:1.4.0

 - repository: rhelai1
  name: granite-3.1-8b-lab-v1
  description: Version 1 of the Granite 3.1 model for inference serving
  longDescription: >-
    LAB: Large-scale Alignment for chatBots is a novel synthetic data-based alignment tuning method for LLMs from IBM Research. Granite-8b-lab-v1 is a Granite-8b-base derivative model trained with the LAB methodology, using Mixtral-8x7b-Instruct as a teacher model.
  license: apache-2.0
  licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt
  maturity: General availability
  provider: Red Hat
  logo: data:image/svg+xml;base64,PHN2ZyBpZD0iTGF5ZXJfMSIgZGF0YS1uYW1lPSJMYXllciAxIiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAxOTIgMTQ1Ij48ZGVmcz48c3R5bGU+LmNscy0xe2ZpbGw6I2UwMDt9PC9zdHlsZT48L2RlZnM+PHRpdGxlPlJlZEhhdC1Mb2dvLUhhdC1Db2xvcjwvdGl0bGU+PHBhdGggZD0iTTE1Ny43Nyw2Mi42MWExNCwxNCwwLDAsMSwuMzEsMy40MmMwLDE0Ljg4LTE4LjEsMTcuNDYtMzAuNjEsMTcuNDZDNzguODMsODMuNDksNDIuNTMsNTMuMjYsNDIuNTMsNDRhNi40Myw2LjQzLDAsMCwxLC4yMi0xLjk0bC0zLjY2LDkuMDZhMTguNDUsMTguNDUsMCwwLDAtMS41MSw3LjMzYzAsMTguMTEsNDEsNDUuNDgsODcuNzQsNDUuNDgsMjAuNjksMCwzNi40My03Ljc2LDM2LjQzLTIxLjc3LDAtMS4wOCwwLTEuOTQtMS43My0xMC4xM1oiLz48cGF0aCBjbGFzcz0iY2xzLTEiIGQ9Ik0xMjcuNDcsODMuNDljMTIuNTEsMCwzMC42MS0yLjU4LDMwLjYxLTE3LjQ2YTE0LDE0LDAsMCwwLS4zMS0zLjQybC03LjQ1LTMyLjM2Yy0xLjcyLTcuMTItMy4yMy0xMC4zNS0xNS43My0xNi42QzEyNC44OSw4LjY5LDEwMy43Ni41LDk3LjUxLjUsOTEuNjkuNSw5MCw4LDgzLjA2LDhjLTYuNjgsMC0xMS42NC01LjYtMTcuODktNS42LTYsMC05LjkxLDQuMDktMTIuOTMsMTIuNSwwLDAtOC40MSwyMy43Mi05LjQ5LDI3LjE2QTYuNDMsNi40MywwLDAsMCw0Mi41Myw0NGMwLDkuMjIsMzYuMywzOS40NSw4NC45NCwzOS40NU0xNjAsNzIuMDdjMS43Myw4LjE5LDEuNzMsOS4wNSwxLjczLDEwLjEzLDAsMTQtMTUuNzQsMjEuNzctMzYuNDMsMjEuNzdDNzguNTQsMTA0LDM3LjU4LDc2LjYsMzcuNTgsNTguNDlhMTguNDUsMTguNDUsMCwwLDEsMS41MS03LjMzQzIyLjI3LDUyLC41LDU1LC41LDc0LjIyYzAsMzEuNDgsNzQuNTksNzAuMjgsMTMzLjY1LDcwLjI4LDQ1LjI4LDAsNTYuNy0yMC40OCw1Ni43LTM2LjY1LDAtMTIuNzItMTEtMjcuMTYtMzAuODMtMzUuNzgiLz48L3N2Zz4=
  tasks:
    - text-generation
  languages:
    - en
  readme: |-
    # Model Card for Granite-8b-lab-v1 [Paper](https://arxiv.org/abs/2403.01081) 

    ### Overview

    Granite-8b-lab-v1 is an instruction-tuned LAB model built via InstructLab, based on Granite-3.0-8b-base

    ### Method

    LAB: **L**arge-scale **A**lignment for chat**B**ots is a novel synthetic data-based alignment tuning method for LLMs from IBM Research. Granite-8b-lab-v1 is a Granite-3.0-8b-base derivative model trained with the LAB methodology, using Mixtral-8x7b-Instruct as a teacher model.

    LAB consists of three key components:

    1. Taxonomy-driven data curation process
    2. Large-scale synthetic data generator
    3. Two-phased-training with replay buffers

    LAB approach allows for adding new knowledge and skills, in an incremental fashion, to an already pre-trained model without suffering from catastrophic forgetting.

    Taxonomy is a tree of seed examples that are used to prompt a teacher model to generate synthetic data. Taxonomy allows the data curator or the model designer to easily specify a diverse set of the knowledge-domains and skills that they would like to include in their LLM. At a high level, these can be categorized into three high-level bins - knowledge, foundational skills, and compositional skills. The leaf nodes of the taxonomy are tasks associated with one or more seed examples.

    During the synthetic data generation, **unlike previous approaches where seed examples are uniformly drawn from the entire pool (i.e. self-instruct), we use the taxonomy to drive the sampling process**: For each knowledge/skill, we only use the local examples within the leaf node as seeds to prompt the teacher model.
    This makes the teacher model better exploit the task distributions defined by the local examples of each node and the diversity in the taxonomy itself ensures the entire generation covers a wide range of tasks, as illustrated below. In turns, this allows for using Mixtral 8x7B as the teacher model for generation while performing very competitively with models such as ORCA-2, WizardLM, and Zephyr Beta that rely on synthetic data generated by much larger and capable models like GPT-4.

    For adding new domain-specific knowledge, we provide an external knowledge source (document) and prompt the model to generate questions and answers based on the document.
    Foundational skills such as reasoning and compositional skills such as creative writing are generated through in-context learning using the seed examples from the taxonomy. 

    Additionally, to ensure the data is high-quality and safe, we employ steps to check the questions and answers to ensure that they are grounded and safe. This is done using the same teacher model that generated the data. 

    Our training consists of two major phases: knowledge tuning and skills tuning. 
    There are two steps in knowledge tuning where the first step learns simple knowledge (short samples) and the second step learns complicated knowledge (longer samples).
    The second step uses replay a replay buffer with data from the first step.
    Both foundational skills and compositional skills are learned during the skills tuning phases, where a replay buffer of data from the knowledge phase is used.
    Importantly, we use a set of hyper-parameters for training that are very different from standard small-scale supervised fine-training: larger batch size and carefully optimized learning rate and scheduler.

    ## Model description
    - **Model Name**: Granite-8b-lab-v1
    - **Language(s):** Primarily English
    - **License:** Apache 2.0
    - **Base model:** [ibm-granite/granite-3.0-8b-base](https://huggingface.co/ibm-granite/granite-3.0-8b-base)
    - **Teacher Model:** [mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1)

    ## Prompt Template

    ```python
    sys_prompt = "I am a Red Hat® Instruct Model, an AI language model developed by Red Hat and IBM Research based on the granite-3.0-8b-base model. My primary role is to serve as a chat assistant."

    prompt = f'<|system|>\n{sys_prompt}\n<|user|>\n{inputs}\n<|assistant|>\n'
    stop_token = '<|endoftext|>'
    ```

    We advise utilizing the system prompt employed during the model's training for optimal inference performance, as there could be performance variations based on the provided instructions. 


    **Bias, Risks, and Limitations**

    Granite-8b-lab-v1 is a base model and has not undergone any safety alignment, there it may produce problematic outputs. In the absence of adequate safeguards and RLHF, there exists a risk of malicious utilization of these models for generating disinformation or harmful content. Caution is urged against complete reliance on a specific language model for crucial decisions or impactful information, as preventing these models from fabricating content is not straightforward. Additionally, it remains uncertain whether smaller models might exhibit increased susceptibility to hallucination in ungrounded generation scenarios due to their reduced sizes and memorization capacities. This aspect is currently an active area of research, and we anticipate more rigorous exploration, comprehension, and mitigations in this domain.
  createTimeSinceEpoch: 1739210683000
  lastUpdateTimeSinceEpoch: 1739210683000
  artifacts:
    - protocol: oci
      tags: ["1.4.0"]
      uri: oci://registry.redhat.io/rhelai1/modelcar-granite-3-1-8b-lab-v1:1.4.0

 - repository: rhelai1
  name: granite-8b-code-instruct
  description: LAB fine-tuned granite code model for inference serving
  longDescription: >-
    Granite-8B-Code-Instruct is a 8B parameter model fine tuned from Granite-8B-Code-Base on a combination of permissively licensed instruction data to enhance instruction following capabilities including logical reasoning and problem-solving skills.
  license: apache-2.0
  licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt
  maturity: Technology preview
  provider: Red Hat
  logo: data:image/svg+xml;base64,PHN2ZyBpZD0iTGF5ZXJfMSIgZGF0YS1uYW1lPSJMYXllciAxIiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAxOTIgMTQ1Ij48ZGVmcz48c3R5bGU+LmNscy0xe2ZpbGw6I2UwMDt9PC9zdHlsZT48L2RlZnM+PHRpdGxlPlJlZEhhdC1Mb2dvLUhhdC1Db2xvcjwvdGl0bGU+PHBhdGggZD0iTTE1Ny43Nyw2Mi42MWExNCwxNCwwLDAsMSwuMzEsMy40MmMwLDE0Ljg4LTE4LjEsMTcuNDYtMzAuNjEsMTcuNDZDNzguODMsODMuNDksNDIuNTMsNTMuMjYsNDIuNTMsNDRhNi40Myw2LjQzLDAsMCwxLC4yMi0xLjk0bC0zLjY2LDkuMDZhMTguNDUsMTguNDUsMCwwLDAtMS41MSw3LjMzYzAsMTguMTEsNDEsNDUuNDgsODcuNzQsNDUuNDgsMjAuNjksMCwzNi40My03Ljc2LDM2LjQzLTIxLjc3LDAtMS4wOCwwLTEuOTQtMS43My0xMC4xM1oiLz48cGF0aCBjbGFzcz0iY2xzLTEiIGQ9Ik0xMjcuNDcsODMuNDljMTIuNTEsMCwzMC42MS0yLjU4LDMwLjYxLTE3LjQ2YTE0LDE0LDAsMCwwLS4zMS0zLjQybC03LjQ1LTMyLjM2Yy0xLjcyLTcuMTItMy4yMy0xMC4zNS0xNS43My0xNi42QzEyNC44OSw4LjY5LDEwMy43Ni41LDk3LjUxLjUsOTEuNjkuNSw5MCw4LDgzLjA2LDhjLTYuNjgsMC0xMS42NC01LjYtMTcuODktNS42LTYsMC05LjkxLDQuMDktMTIuOTMsMTIuNSwwLDAtOC40MSwyMy43Mi05LjQ5LDI3LjE2QTYuNDMsNi40MywwLDAsMCw0Mi41Myw0NGMwLDkuMjIsMzYuMywzOS40NSw4NC45NCwzOS40NU0xNjAsNzIuMDdjMS43Myw4LjE5LDEuNzMsOS4wNSwxLjczLDEwLjEzLDAsMTQtMTUuNzQsMjEuNzctMzYuNDMsMjEuNzdDNzguNTQsMTA0LDM3LjU4LDc2LjYsMzcuNTgsNTguNDlhMTguNDUsMTguNDUsMCwwLDEsMS41MS03LjMzQzIyLjI3LDUyLC41LDU1LC41LDc0LjIyYzAsMzEuNDgsNzQuNTksNzAuMjgsMTMzLjY1LDcwLjI4LDQ1LjI4LDAsNTYuNy0yMC40OCw1Ni43LTM2LjY1LDAtMTIuNzItMTEtMjcuMTYtMzAuODMtMzUuNzgiLz48L3N2Zz4=
  readme: |-
    ![image/png](https://cdn-uploads.huggingface.co/production/uploads/62cd5057674cdb524450093d/1hzxoPwqkBJXshKVVe6_9.png)

    # Granite-8B-Code-Instruct-4K

    ## Model Summary
    **Granite-8B-Code-Instruct-4K** is a 8B parameter model fine tuned from *Granite-8B-Code-Base-4K* on a combination of **permissively licensed** instruction data to enhance instruction following capabilities including logical reasoning and problem-solving skills.

    - **Developers:** IBM Research
    - **GitHub Repository:** [ibm-granite/granite-code-models](https://github.com/ibm-granite/granite-code-models)
    - **Paper:** [Granite Code Models: A Family of Open Foundation Models for Code Intelligence](https://arxiv.org/abs/2405.04324)
    - **Release Date**: May 6th, 2024
    - **License:** [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0).

    ## Usage
    ### Intended use
    The model is designed to respond to coding related instructions and can be used to build coding assistants.

    <!-- TO DO: Check starcoder2 instruct code example that includes the template https://huggingface.co/bigcode/starcoder2-15b-instruct-v0.1 -->

    ### Generation
    This is a simple example of how to use **Granite-8B-Code-Instruct-4K** model.

    ```python
    import torch
    from transformers import AutoModelForCausalLM, AutoTokenizer
    device = "cuda" # or "cpu"
    model_path = "ibm-granite/granite-8b-code-instruct-4k"
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    # drop device_map if running on CPU
    model = AutoModelForCausalLM.from_pretrained(model_path, device_map=device)
    model.eval()
    # change input text as desired
    chat = [
        { "role": "user", "content": "Write a code to find the maximum value in a list of numbers." },
    ]
    chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
    # tokenize the text
    input_tokens = tokenizer(chat, return_tensors="pt")
    # transfer tokenized inputs to the device
    for i in input_tokens:
        input_tokens[i] = input_tokens[i].to(device)
    # generate output tokens
    output = model.generate(**input_tokens, max_new_tokens=100)
    # decode output tokens into text
    output = tokenizer.batch_decode(output)
    # loop over the batch to print, in this example the batch size is 1
    for i in output:
        print(i)
    ```

    <!-- TO DO: Check this part -->
    ## Training Data
    Granite Code Instruct models are trained on the following types of data.
    * Code Commits Datasets: we sourced code commits data from the [CommitPackFT](https://huggingface.co/datasets/bigcode/commitpackft) dataset, a filtered version of the full CommitPack dataset. From CommitPackFT dataset, we only consider data for 92 programming languages. Our inclusion criteria boils down to selecting programming languages common across CommitPackFT and the 116 languages that we considered to pretrain the code-base model (*Granite-8B-Code-Base*).
    * Math Datasets: We consider two high-quality math datasets, [MathInstruct](https://huggingface.co/datasets/TIGER-Lab/MathInstruct) and [MetaMathQA](https://huggingface.co/datasets/meta-math/MetaMathQA). Due to license issues, we filtered out GSM8K-RFT and Camel-Math from MathInstruct dataset.
    * Code Instruction Datasets: We use [Glaive-Code-Assistant-v3](https://huggingface.co/datasets/glaiveai/glaive-code-assistant-v3), [Glaive-Function-Calling-v2](https://huggingface.co/datasets/glaiveai/glaive-function-calling-v2), [NL2SQL11](https://huggingface.co/datasets/bugdaryan/sql-create-context-instruction) and a small collection of synthetic API calling datasets.
    * Language Instruction Datasets: We include high-quality datasets such as [HelpSteer](https://huggingface.co/datasets/nvidia/HelpSteer) and an open license-filtered version of [Platypus](https://huggingface.co/datasets/garage-bAInd/Open-Platypus). We also include a collection of hardcoded prompts to ensure our model generates correct outputs given inquiries about its name or developers.

    ## Infrastructure
    We train the Granite Code models using two of IBM's super computing clusters, namely Vela and Blue Vela, both outfitted with NVIDIA A100 and H100 GPUs respectively. These clusters provide a scalable and efficient infrastructure for training our models over thousands of GPUs.

    ## Ethical Considerations and Limitations
    Granite code instruct models are primarily finetuned using instruction-response pairs across a specific set of programming languages. Thus, their performance may be limited with out-of-domain programming languages. In this situation, it is beneficial providing few-shot examples to steer the model's output. Moreover, developers should perform safety testing and target-specific tuning before deploying these models on critical applications. The model also inherits ethical considerations and limitations from its base model. For more information, please refer to *[Granite-8B-Code-Base-4K](https://huggingface.co/ibm-granite/granite-8b-code-base-4k)* model card.
  createTimeSinceEpoch: 1739210683000
  lastUpdateTimeSinceEpoch: 1739210683000
  artifacts:
    - protocol: oci
      tags: ["1.4.0"]
      uri: oci://registry.redhat.io/rhelai1/modelcar-granite-8b-code-instruct:1.4.0

 - repository: rhelai1
  name: granite-8b-code-base
  description: Granite code model for inference serving
  longDescription: >-
    Granite-8B-Code-Base is a decoder-only code model designed for code generative tasks (e.g., code generation, code explanation, code fixing, etc.). It is trained from scratch with a two-phase training strategy. In phase 1, our model is trained on 4 trillion tokens sourced from 116 programming languages, ensuring a comprehensive understanding of programming languages and syntax. In phase 2, our model is trained on 500 billion tokens with a carefully designed mixture of high-quality data from code and natural language domains to improve the models’ ability to reason and follow instructions.
  license: apache-2.0
  licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt
  maturity: Technology preview
  provider: Red Hat
  logo: data:image/svg+xml;base64,PHN2ZyBpZD0iTGF5ZXJfMSIgZGF0YS1uYW1lPSJMYXllciAxIiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAxOTIgMTQ1Ij48ZGVmcz48c3R5bGU+LmNscy0xe2ZpbGw6I2UwMDt9PC9zdHlsZT48L2RlZnM+PHRpdGxlPlJlZEhhdC1Mb2dvLUhhdC1Db2xvcjwvdGl0bGU+PHBhdGggZD0iTTE1Ny43Nyw2Mi42MWExNCwxNCwwLDAsMSwuMzEsMy40MmMwLDE0Ljg4LTE4LjEsMTcuNDYtMzAuNjEsMTcuNDZDNzguODMsODMuNDksNDIuNTMsNTMuMjYsNDIuNTMsNDRhNi40Myw2LjQzLDAsMCwxLC4yMi0xLjk0bC0zLjY2LDkuMDZhMTguNDUsMTguNDUsMCwwLDAtMS41MSw3LjMzYzAsMTguMTEsNDEsNDUuNDgsODcuNzQsNDUuNDgsMjAuNjksMCwzNi40My03Ljc2LDM2LjQzLTIxLjc3LDAtMS4wOCwwLTEuOTQtMS43My0xMC4xM1oiLz48cGF0aCBjbGFzcz0iY2xzLTEiIGQ9Ik0xMjcuNDcsODMuNDljMTIuNTEsMCwzMC42MS0yLjU4LDMwLjYxLTE3LjQ2YTE0LDE0LDAsMCwwLS4zMS0zLjQybC03LjQ1LTMyLjM2Yy0xLjcyLTcuMTItMy4yMy0xMC4zNS0xNS43My0xNi42QzEyNC44OSw4LjY5LDEwMy43Ni41LDk3LjUxLjUsOTEuNjkuNSw5MCw4LDgzLjA2LDhjLTYuNjgsMC0xMS42NC01LjYtMTcuODktNS42LTYsMC05LjkxLDQuMDktMTIuOTMsMTIuNSwwLDAtOC40MSwyMy43Mi05LjQ5LDI3LjE2QTYuNDMsNi40MywwLDAsMCw0Mi41Myw0NGMwLDkuMjIsMzYuMywzOS40NSw4NC45NCwzOS40NU0xNjAsNzIuMDdjMS43Myw4LjE5LDEuNzMsOS4wNSwxLjczLDEwLjEzLDAsMTQtMTUuNzQsMjEuNzctMzYuNDMsMjEuNzdDNzguNTQsMTA0LDM3LjU4LDc2LjYsMzcuNTgsNTguNDlhMTguNDUsMTguNDUsMCwwLDEsMS41MS03LjMzQzIyLjI3LDUyLC41LDU1LC41LDc0LjIyYzAsMzEuNDgsNzQuNTksNzAuMjgsMTMzLjY1LDcwLjI4LDQ1LjI4LDAsNTYuNy0yMC40OCw1Ni43LTM2LjY1LDAtMTIuNzItMTEtMjcuMTYtMzAuODMtMzUuNzgiLz48L3N2Zz4=
  readme: |-
    ![image/png](https://cdn-uploads.huggingface.co/production/uploads/62cd5057674cdb524450093d/1hzxoPwqkBJXshKVVe6_9.png)

    # Granite-8B-Code-Base-4K

    ## Model Summary
    **Granite-8B-Code-Base-4K** is a decoder-only code model designed for code generative tasks (e.g., code generation, code explanation, code fixing, etc.). It is trained from scratch with a two-phase training strategy. In phase 1, our model is trained on 4 trillion tokens sourced from 116 programming languages, ensuring a comprehensive understanding of programming languages and syntax. In phase 2, our model is trained on 500 billion tokens with a carefully designed mixture of high-quality data from code and natural language domains to improve the models’ ability to reason and follow instructions.

    - **Developers:** IBM Research
    - **GitHub Repository:** [ibm-granite/granite-code-models](https://github.com/ibm-granite/granite-code-models)
    - **Paper:** [Granite Code Models: A Family of Open Foundation Models for Code Intelligence](https://arxiv.org/abs/2405.04324)
    - **Release Date**: May 6th, 2024
    - **License:** [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0).

    ## Usage
    ### Intended use
    Prominent enterprise use cases of LLMs in software engineering productivity include code generation, code explanation, code fixing, generating unit tests, generating documentation, addressing technical debt issues, vulnerability detection, code translation, and more. All Granite Code Base models, including the **8B parameter model**, are able to handle these tasks as they were trained on a large amount of code data from 116 programming languages.

    ### Generation
    This is a simple example of how to use **Granite-8B-Code-Base-4K** model.

    ```python
    import torch
    from transformers import AutoModelForCausalLM, AutoTokenizer
    device = "cuda" # or "cpu"
    model_path = "ibm-granite/granite-8b-code-base-4k"
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    # drop device_map if running on CPU
    model = AutoModelForCausalLM.from_pretrained(model_path, device_map=device)
    model.eval()
    # change input text as desired
    input_text = "def generate():"
    # tokenize the text
    input_tokens = tokenizer(input_text, return_tensors="pt")
    # transfer tokenized inputs to the device
    for i in input_tokens:
        input_tokens[i] = input_tokens[i].to(device)
    # generate output tokens
    output = model.generate(**input_tokens)
    # decode output tokens into text
    output = tokenizer.batch_decode(output)
    # loop over the batch to print, in this example the batch size is 1
    for i in output:
        print(i)
    ```

    ## Training Data
    - **Data Collection and Filtering:** Pretraining code data is sourced from a combination of publicly available datasets (e.g., [GitHub Code Clean](https://huggingface.co/datasets/codeparrot/github-code-clean), [Starcoder data](https://huggingface.co/datasets/bigcode/starcoderdata)), and additional public code repositories and issues from GitHub. We filter raw data to retain a list of 116 programming languages. After language filtering, we also filter out low-quality code.
    - **Exact and Fuzzy Deduplication:** We adopt an aggressive deduplication strategy that includes both exact and fuzzy deduplication to remove documents having (near) identical code content.
    - **HAP, PII, Malware Filtering:** We apply a HAP content filter that reduces models' likelihood of generating hateful, abusive, or profane language. We also make sure to redact Personally Identifiable Information (PII) by replacing PII content (e.g., names, email addresses, keys, passwords) with corresponding tokens (e.g., ⟨NAME⟩, ⟨EMAIL⟩, ⟨KEY⟩, ⟨PASSWORD⟩). Moreover, we scan all datasets using [ClamAV](https://www.clamav.net/) to identify and remove instances of malware in the source code.
    - **Natural Language Datasets:** In addition to collecting code data for model training, we curate several publicly available high-quality natural language datasets to improve models' proficiency in language understanding and mathematical reasoning. Unlike the code data, we do not deduplicate these datasets.

    ## Infrastructure
    We train the Granite Code models using two of IBM's super computing clusters, namely Vela and Blue Vela, both outfitted with NVIDIA A100 and H100 GPUs respectively. These clusters provide a scalable and efficient infrastructure for training our models over thousands of GPUs.

    ## Ethical Considerations and Limitations
    The use of Large Language Models involves risks and ethical considerations people must be aware of. Regarding code generation, caution is urged against complete reliance on specific code models for crucial decisions or impactful information as the generated code is not guaranteed to work as intended. **Granite-8B-Code-Base-4K** model is not the exception in this regard. Even though this model is suited for multiple code-related tasks, it has not undergone any safety alignment, there it may produce problematic outputs. Additionally, it remains uncertain whether smaller models might exhibit increased susceptibility to hallucination in generation scenarios by copying source code verbatim from the training dataset due to their reduced sizes and memorization capacities. This aspect is currently an active area of research, and we anticipate more rigorous exploration, comprehension, and mitigations in this domain. Regarding ethics, a latent risk associated with all Large Language Models is their malicious utilization. We urge the community to use **Granite-8B-Code-Base-4K** model with ethical intentions and in a responsible way.
  createTimeSinceEpoch: 1739210683000
  lastUpdateTimeSinceEpoch: 1739210683000
  artifacts:
    - protocol: oci
      tags: ["1.4.0"]
      uri: oci://registry.redhat.io/rhelai1/modelcar-granite-8b-code-base:1.4.0

 - repository: rhelai1
  name: mixtral-8x7b-instruct-v0-1
  description: Teacher and critic model for running Synthetic data generation (SDG)
  license: apache-2.0
  licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt
  maturity: General availability
  provider: Red Hat
  logo: data:image/svg+xml;base64,PHN2ZyBpZD0iTGF5ZXJfMSIgZGF0YS1uYW1lPSJMYXllciAxIiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAxOTIgMTQ1Ij48ZGVmcz48c3R5bGU+LmNscy0xe2ZpbGw6I2UwMDt9PC9zdHlsZT48L2RlZnM+PHRpdGxlPlJlZEhhdC1Mb2dvLUhhdC1Db2xvcjwvdGl0bGU+PHBhdGggZD0iTTE1Ny43Nyw2Mi42MWExNCwxNCwwLDAsMSwuMzEsMy40MmMwLDE0Ljg4LTE4LjEsMTcuNDYtMzAuNjEsMTcuNDZDNzguODMsODMuNDksNDIuNTMsNTMuMjYsNDIuNTMsNDRhNi40Myw2LjQzLDAsMCwxLC4yMi0xLjk0bC0zLjY2LDkuMDZhMTguNDUsMTguNDUsMCwwLDAtMS41MSw3LjMzYzAsMTguMTEsNDEsNDUuNDgsODcuNzQsNDUuNDgsMjAuNjksMCwzNi40My03Ljc2LDM2LjQzLTIxLjc3LDAtMS4wOCwwLTEuOTQtMS43My0xMC4xM1oiLz48cGF0aCBjbGFzcz0iY2xzLTEiIGQ9Ik0xMjcuNDcsODMuNDljMTIuNTEsMCwzMC42MS0yLjU4LDMwLjYxLTE3LjQ2YTE0LDE0LDAsMCwwLS4zMS0zLjQybC03LjQ1LTMyLjM2Yy0xLjcyLTcuMTItMy4yMy0xMC4zNS0xNS43My0xNi42QzEyNC44OSw4LjY5LDEwMy43Ni41LDk3LjUxLjUsOTEuNjkuNSw5MCw4LDgzLjA2LDhjLTYuNjgsMC0xMS42NC01LjYtMTcuODktNS42LTYsMC05LjkxLDQuMDktMTIuOTMsMTIuNSwwLDAtOC40MSwyMy43Mi05LjQ5LDI3LjE2QTYuNDMsNi40MywwLDAsMCw0Mi41Myw0NGMwLDkuMjIsMzYuMywzOS40NSw4NC45NCwzOS40NU0xNjAsNzIuMDdjMS43Myw4LjE5LDEuNzMsOS4wNSwxLjczLDEwLjEzLDAsMTQtMTUuNzQsMjEuNzctMzYuNDMsMjEuNzdDNzguNTQsMTA0LDM3LjU4LDc2LjYsMzcuNTgsNTguNDlhMTguNDUsMTguNDUsMCwwLDEsMS41MS03LjMzQzIyLjI3LDUyLC41LDU1LC41LDc0LjIyYzAsMzEuNDgsNzQuNTksNzAuMjgsMTMzLjY1LDcwLjI4LDQ1LjI4LDAsNTYuNy0yMC40OCw1Ni43LTM2LjY1LDAtMTIuNzItMTEtMjcuMTYtMzAuODMtMzUuNzgiLz48L3N2Zz4=
  labels:
    - lab-teacher
  readme: |-
    # Model Card for Mixtral-8x7B

    ### Tokenization with `mistral-common`

    ```py
    from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
    from mistral_common.protocol.instruct.messages import UserMessage
    from mistral_common.protocol.instruct.request import ChatCompletionRequest

    mistral_models_path = "MISTRAL_MODELS_PATH"

    tokenizer = MistralTokenizer.v1()

    completion_request = ChatCompletionRequest(messages=[UserMessage(content="Explain Machine Learning to me in a nutshell.")])

    tokens = tokenizer.encode_chat_completion(completion_request).tokens
    ```

    ## Inference with `mistral_inference`

     ```py
    from mistral_inference.transformer import Transformer
    from mistral_inference.generate import generate

    model = Transformer.from_folder(mistral_models_path)
    out_tokens, _ = generate([tokens], model, max_tokens=64, temperature=0.0, eos_id=tokenizer.instruct_tokenizer.tokenizer.eos_id)

    result = tokenizer.decode(out_tokens[0])
    print(result)
    ```
    ## Inference with hugging face `transformers`

    ```py
    from transformers import AutoModelForCausalLM

    model = AutoModelForCausalLM.from_pretrained("mistralai/Mixtral-8x7B-Instruct-v0.1")
    model.to("cuda")

    generated_ids = model.generate(tokens, max_new_tokens=1000, do_sample=True)

    # decode with mistral tokenizer
    result = tokenizer.decode(generated_ids[0].tolist())
    print(result)
    ```
    > [!TIP]
    > PRs to correct the transformers tokenizer so that it gives 1-to-1 the same results as the mistral-common reference implementation are very welcome!


    ---
    The Mixtral-8x7B Large Language Model (LLM) is a pretrained generative Sparse Mixture of Experts. The Mixtral-8x7B outperforms Llama 2 70B on most benchmarks we tested.
    For full details of this model please read our [release blog post](https://mistral.ai/news/mixtral-of-experts/).
    ## Warning
    This repo contains weights that are compatible with [vLLM](https://github.com/vllm-project/vllm) serving of the model as well as Hugging Face [transformers](https://github.com/huggingface/transformers) library. It is based on the original Mixtral [torrent release](magnet:?xt=urn:btih:5546272da9065eddeb6fcd7ffddeef5b75be79a7&dn=mixtral-8x7b-32kseqlen&tr=udp%3A%2F%http://2Fopentracker.i2p.rocks%3A6969%2Fannounce&tr=http%3A%2F%http://2Ftracker.openbittorrent.com%3A80%2Fannounce), but the file format and parameter names are different. Please note that model cannot (yet) be instantiated with HF.
    ## Instruction format
    This format must be strictly respected, otherwise the model will generate sub-optimal outputs.
    The template used to build a prompt for the Instruct model is defined as follows:
    ```
    <s> [INST] Instruction [/INST] Model answer</s> [INST] Follow-up instruction [/INST]
    ```
    Note that `<s>` and `</s>` are special tokens for beginning of string (BOS) and end of string (EOS) while [INST] and [/INST] are regular strings.
    As reference, here is the pseudo-code used to tokenize instructions during fine-tuning:
    ```python
    def tokenize(text):
        return tok.encode(text, add_special_tokens=False)
    [BOS_ID] +
    tokenize("[INST]") + tokenize(USER_MESSAGE_1) + tokenize("[/INST]") +
    tokenize(BOT_MESSAGE_1) + [EOS_ID] +
    …
    tokenize("[INST]") + tokenize(USER_MESSAGE_N) + tokenize("[/INST]") +
    tokenize(BOT_MESSAGE_N) + [EOS_ID]
    ```
    In the pseudo-code above, note that the `tokenize` method should not add a BOS or EOS token automatically, but should add a prefix space.
    In the Transformers library, one can use [chat templates](https://huggingface.co/docs/transformers/main/en/chat_templating) which make sure the right format is applied.
    ## Run the model
    ```python
    from transformers import AutoModelForCausalLM, AutoTokenizer

    model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")

    messages = [
        {"role": "user", "content": "What is your favourite condiment?"},
        {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
        {"role": "user", "content": "Do you have mayonnaise recipes?"}
    ]
    inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
    outputs = model.generate(inputs, max_new_tokens=20)
    print(tokenizer.decode(outputs[0], skip_special_tokens=True))
    ```
    By default, transformers will load the model in full precision. Therefore you might be interested to further reduce down the memory requirements to run the model through the optimizations we offer in HF ecosystem:
    ### In half-precision
    Note `float16` precision only works on GPU devices
    <details>
    <summary> Click to expand </summary>
    ```diff
    + import torch
    from transformers import AutoModelForCausalLM, AutoTokenizer
    model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
    tokenizer = AutoTokenizer.from_pretrained(model_id)

    + model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")

    messages = [
        {"role": "user", "content": "What is your favourite condiment?"},
        {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
        {"role": "user", "content": "Do you have mayonnaise recipes?"}
    ]
    input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")

    outputs = model.generate(input_ids, max_new_tokens=20)
    print(tokenizer.decode(outputs[0], skip_special_tokens=True))
    ```
    </details>
    ### Lower precision using (8-bit & 4-bit) using `bitsandbytes`
    <details>
    <summary> Click to expand </summary>
    ```diff
    + import torch
    from transformers import AutoModelForCausalLM, AutoTokenizer
    model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
    tokenizer = AutoTokenizer.from_pretrained(model_id)

    + model = AutoModelForCausalLM.from_pretrained(model_id, load_in_4bit=True, device_map="auto")
    text = "Hello my name is"
    messages = [
        {"role": "user", "content": "What is your favourite condiment?"},
        {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
        {"role": "user", "content": "Do you have mayonnaise recipes?"}
    ]
    input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
    outputs = model.generate(input_ids, max_new_tokens=20)
    print(tokenizer.decode(outputs[0], skip_special_tokens=True))
    ```
    </details>
    ### Load the model with Flash Attention 2
    <details>
    <summary> Click to expand </summary>
    ```diff
    + import torch
    from transformers import AutoModelForCausalLM, AutoTokenizer

    model_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    + model = AutoModelForCausalLM.from_pretrained(model_id, use_flash_attention_2=True, device_map="auto")
    messages = [
        {"role": "user", "content": "What is your favourite condiment?"},
        {"role": "assistant", "content": "Well, I'm quite partial to a good squeeze of fresh lemon juice. It adds just the right amount of zesty flavour to whatever I'm cooking up in the kitchen!"},
        {"role": "user", "content": "Do you have mayonnaise recipes?"}
    ]
    input_ids = tokenizer.apply_chat_template(messages, return_tensors="pt").to("cuda")
    outputs = model.generate(input_ids, max_new_tokens=20)
    print(tokenizer.decode(outputs[0], skip_special_tokens=True))
    ```
    </details>
    ## Limitations
    The Mixtral-8x7B Instruct model is a quick demonstration that the base model can be easily fine-tuned to achieve compelling performance.
    It does not have any moderation mechanisms. We're looking forward to engaging with the community on ways to
    make the model finely respect guardrails, allowing for deployment in environments requiring moderated outputs.
    # The Mistral AI Team
    Albert Jiang, Alexandre Sablayrolles, Arthur Mensch, Blanche Savary, Chris Bamford, Devendra Singh Chaplot, Diego de las Casas, Emma Bou Hanna, Florian Bressand, Gianna Lengyel, Guillaume Bour, Guillaume Lample, Lélio Renard Lavaud, Louis Ternon, Lucile Saulnier, Marie-Anne Lachaux, Pierre Stock, Teven Le Scao, Théophile Gervet, Thibaut Lavril, Thomas Wang, Timothée Lacroix, William El Sayed.

  createTimeSinceEpoch: 1743078742000
  lastUpdateTimeSinceEpoch: 1743078742000
  artifacts:
    - protocol: oci
      tags: ["1.4"]
      uri: oci://registry.redhat.io/rhelai1/modelcar-mixtral-8x7b-instruct-v0-1:1.4

 - repository: rhelai1
  name: prometheus-8x7b-v2-0
  description: Judge model for multi-phase training and evaluation
  longDescription: >-
    Prometheus 2 is a language model using Mistral-Instruct as a base model. It is fine-tuned on 100K feedback within the Feedback Collection and 200K feedback within the Preference Collection. It is also made by weight merging to support both absolute grading (direct assessment) and relative grading (pairwise ranking).
  license: apache-2.0
  licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt
  maturity: General availability
  provider: Red Hat
  logo: data:image/svg+xml;base64,PHN2ZyBpZD0iTGF5ZXJfMSIgZGF0YS1uYW1lPSJMYXllciAxIiB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAxOTIgMTQ1Ij48ZGVmcz48c3R5bGU+LmNscy0xe2ZpbGw6I2UwMDt9PC9zdHlsZT48L2RlZnM+PHRpdGxlPlJlZEhhdC1Mb2dvLUhhdC1Db2xvcjwvdGl0bGU+PHBhdGggZD0iTTE1Ny43Nyw2Mi42MWExNCwxNCwwLDAsMSwuMzEsMy40MmMwLDE0Ljg4LTE4LjEsMTcuNDYtMzAuNjEsMTcuNDZDNzguODMsODMuNDksNDIuNTMsNTMuMjYsNDIuNTMsNDRhNi40Myw2LjQzLDAsMCwxLC4yMi0xLjk0bC0zLjY2LDkuMDZhMTguNDUsMTguNDUsMCwwLDAtMS41MSw3LjMzYzAsMTguMTEsNDEsNDUuNDgsODcuNzQsNDUuNDgsMjAuNjksMCwzNi40My03Ljc2LDM2LjQzLTIxLjc3LDAtMS4wOCwwLTEuOTQtMS43My0xMC4xM1oiLz48cGF0aCBjbGFzcz0iY2xzLTEiIGQ9Ik0xMjcuNDcsODMuNDljMTIuNTEsMCwzMC42MS0yLjU4LDMwLjYxLTE3LjQ2YTE0LDE0LDAsMCwwLS4zMS0zLjQybC03LjQ1LTMyLjM2Yy0xLjcyLTcuMTItMy4yMy0xMC4zNS0xNS43My0xNi42QzEyNC44OSw4LjY5LDEwMy43Ni41LDk3LjUxLjUsOTEuNjkuNSw5MCw4LDgzLjA2LDhjLTYuNjgsMC0xMS42NC01LjYtMTcuODktNS42LTYsMC05LjkxLDQuMDktMTIuOTMsMTIuNSwwLDAtOC40MSwyMy43Mi05LjQ5LDI3LjE2QTYuNDMsNi40MywwLDAsMCw0Mi41Myw0NGMwLDkuMjIsMzYuMywzOS40NSw4NC45NCwzOS40NU0xNjAsNzIuMDdjMS43Myw4LjE5LDEuNzMsOS4wNSwxLjczLDEwLjEzLDAsMTQtMTUuNzQsMjEuNzctMzYuNDMsMjEuNzdDNzguNTQsMTA0LDM3LjU4LDc2LjYsMzcuNTgsNTguNDlhMTguNDUsMTguNDUsMCwwLDEsMS41MS03LjMzQzIyLjI3LDUyLC41LDU1LC41LDc0LjIyYzAsMzEuNDgsNzQuNTksNzAuMjgsMTMzLjY1LDcwLjI4LDQ1LjI4LDAsNTYuNy0yMC40OCw1Ni43LTM2LjY1LDAtMTIuNzItMTEtMjcuMTYtMzAuODMtMzUuNzgiLz48L3N2Zz4=
  labels:
    - lab-judge
  readme: |-
    ## Links for Reference

    - **Homepage: In Progress**
    - **Repository:https://github.com/prometheus-eval/prometheus-eval**
    - **Paper:https://arxiv.org/abs/2405.01535**
    - **Point of Contact:[email protected]**

    # TL;DR
    Prometheus 2 is an alternative of GPT-4 evaluation when doing fine-grained evaluation of an underlying LLM & a Reward model for Reinforcement Learning from Human Feedback (RLHF).
    ![plot](./finegrained_eval.JPG)

    Prometheus 2 is a language model using [Mistral-Instruct](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) as a base model.
    It is fine-tuned on 100K feedback within the [Feedback Collection](https://huggingface.co/datasets/prometheus-eval/Feedback-Collection) and 200K feedback within the [Preference Collection](https://huggingface.co/datasets/prometheus-eval/Preference-Collection).
    It is also made by weight merging to support both absolute grading (direct assessment) and relative grading (pairwise ranking).
    The surprising thing is that we find weight merging also improves performance on each format.

    # Model Details

    ## Model Description

    - **Model type:** Language model
    - **Language(s) (NLP):** English
    - **License:** Apache 2.0
    - **Related Models:** [All Prometheus Checkpoints](https://huggingface.co/models?search=prometheus-eval/Prometheus)
    - **Resources for more information:**
      - [Research paper](https://arxiv.org/abs/2405.01535)
      - [GitHub Repo](https://github.com/prometheus-eval/prometheus-eval)


    Prometheus is trained with two different sizes (7B and 8x7B).
    You could check the 7B sized LM on [this page](https://huggingface.co/prometheus-eval/prometheus-2-7b-v2.0).
    Also, check out our dataset as well on [this page](https://huggingface.co/datasets/prometheus-eval/Feedback-Collection) and [this page](https://huggingface.co/datasets/prometheus-eval/Preference-Collection).

    ## Prompt Format

    We have made wrapper functions and classes to conveniently use Prometheus 2 at [our github repository](https://github.com/prometheus-eval/prometheus-eval).
    We highly recommend you use it!

    However, if you just want to use the model for your use case, please refer to the prompt format below.
    Note that absolute grading and relative grading requires different prompt templates and system prompts.

    ### Absolute Grading (Direct Assessment)
    Prometheus requires 4 components in the input: An instruction, a response to evaluate, a score rubric, and a reference answer. You could refer to the prompt format below.
    You should fill in the instruction, response, reference answer, criteria description, and score description for score in range of 1 to 5.

    Fix the components with \{text\} inside.
    ```
    ###Task Description:
    An instruction (might include an Input inside it), a response to evaluate, a reference answer that gets a score of 5, and a score rubric representing a evaluation criteria are given.
    1. Write a detailed feedback that assess the quality of the response strictly based on the given score rubric, not evaluating in general.
    2. After writing a feedback, write a score that is an integer between 1 and 5. You should refer to the score rubric.
    3. The output format should look as follows: \"Feedback: (write a feedback for criteria) [RESULT] (an integer number between 1 and 5)\"
    4. Please do not generate any other opening, closing, and explanations.
    ###The instruction to evaluate:
    {orig_instruction}
    ###Response to evaluate:
    {orig_response}
    ###Reference Answer (Score 5):
    {orig_reference_answer}
    ###Score Rubrics:
    [{orig_criteria}]
    Score 1: {orig_score1_description}
    Score 2: {orig_score2_description}
    Score 3: {orig_score3_description}
    Score 4: {orig_score4_description}
    Score 5: {orig_score5_description}
    ###Feedback:
    ```

    After this, you should apply the conversation template of Mistral (not applying it might lead to unexpected behaviors).
    You can find the conversation class at this [link](https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py).
    ```
    conv = get_conv_template("mistral")
    conv.set_system_message("You are a fair judge assistant tasked with providing clear, objective feedback based on specific criteria, ensuring each assessment reflects the absolute standards set for performance.")
    conv.append_message(conv.roles[0], dialogs['instruction'])
    conv.append_message(conv.roles[1], None)
    prompt = conv.get_prompt()
    x = tokenizer(prompt,truncation=False)
    ```

    As a result, a feedback and score decision will be generated, divided by a separating phrase ```[RESULT]```

    ### Relative Grading (Pairwise Ranking)
    Prometheus requires 4 components in the input: An instruction, 2 responses to evaluate, a score rubric, and a reference answer. You could refer to the prompt format below.
    You should fill in the instruction, 2 responses, reference answer, and criteria description.

    Fix the components with \{text\} inside.
    ```
    ###Task Description:
    An instruction (might include an Input inside it), two responses to evaluate (denoted as Response A and Response B), a reference answer, and an evaluation criteria are given.
    1. Write a detailed feedback that assess the quality of the two responses strictly based on the given evaluation criteria, not evaluating in general.
    2. Make comparisons between Response A, Response B, and the Reference Answer. Instead of examining Response A and Response B separately, go straight to the point and mention about the commonalities and differences between them.
    3. After writing the feedback, indicate the better response, either "A" or "B".
    4. The output format should look as follows: "Feedback: (write a feedback for criteria) [RESULT] (Either "A" or "B")"
    5. Please do not generate any other opening, closing, and explanations.
    ###Instruction:
    {orig_instruction}
    ###Response A:
    {orig_response_A}
    ###Response B:
    {orig_response_B}
    ###Reference Answer:
    {orig_reference_answer}
    ###Score Rubric:
    {orig_criteria}
    ###Feedback:
    ```

    After this, you should apply the conversation template of Mistral (not applying it might lead to unexpected behaviors).
    You can find the conversation class at this [link](https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py).
    ```
    conv = get_conv_template("mistral")
    conv.set_system_message("You are a fair judge assistant assigned to deliver insightful feedback that compares individual performances, highlighting how each stands relative to others within the same cohort.")
    conv.append_message(conv.roles[0], dialogs['instruction'])
    conv.append_message(conv.roles[1], None)
    prompt = conv.get_prompt()
    x = tokenizer(prompt,truncation=False)
    ```

    As a result, a feedback and score decision will be generated, divided by a separating phrase ```[RESULT]```

    ## License
    Feedback Collection, Preference Collection, and Prometheus 2 are subject to OpenAI's Terms of Use for the generated data. If you suspect any violations, please reach out to us.


    # Citation


    If you find the following model helpful, please consider citing our paper!

    **BibTeX:**

    ```bibtex
    @misc{kim2023prometheus,
        title={Prometheus: Inducing Fine-grained Evaluation Capability in Language Models},
        author={Seungone Kim and Jamin Shin and Yejin Cho and Joel Jang and Shayne Longpre and Hwaran Lee and Sangdoo Yun and Seongjin Shin and Sungdong Kim and James Thorne and Minjoon Seo},
        year={2023},
        eprint={2310.08491},
        archivePrefix={arXiv},
        primaryClass={cs.CL}
    }
    ```
    ```bibtex
    @misc{kim2024prometheus,
        title={Prometheus 2: An Open Source Language Model Specialized in Evaluating Other Language Models},
        author={Seungone Kim and Juyoung Suk and Shayne Longpre and Bill Yuchen Lin and Jamin Shin and Sean Welleck and Graham Neubig and Moontae Lee and Kyungjae Lee and Minjoon Seo},
        year={2024},
        eprint={2405.01535},
        archivePrefix={arXiv},
        primaryClass={cs.CL}
    }
    ```
  createTimeSinceEpoch: 1743078706000
  lastUpdateTimeSinceEpoch: 1743078706000
  artifacts:
    - protocol: oci
      tags: ["1.4"]
      uri: oci://registry.redhat.io/rhelai1/modelcar-prometheus-8x7b-v2-0:1.4
	{
	"$id": "https://kubeflow.org/model-registry/catalog.yaml",
	"$schema": "https://json-schema.org/draft/2020-12/schema",
	"title": "Model Catalog",
	"type": "object",
	"properties": {
	"source": {
	"type": "string",
	"description": "The name of the catalog provider.",
	"example": "Red Hat"
	},
	"models": {
	"type": "array",
	"description": "List of models available in the catalog. `repository` and `name` are used\nto uniquely identify a model, and should be unique within the catalog.",
	"items": {
	"type": "object",
	"required": [
	"repository",
	"name"
	],
	"properties": {
	"repository": {
	"type": "string",
	"description": "Name of the repository in the catalog.",
	"example": "ibm-granite"
	},
	"name": {
	"type": "string",
	"description": "Code name of the model.",
	"example": "granite-3.1-8b-base"
	},
	"provider": {
	"type": "string",
	"description": "Name of the organization or entity that provides the model.",
	"example": "IBM"
	},
	"description": {
	"type": "string",
	"description": "Short description of the model."
	},
	"longDescription": {
	"type": "string",
	"description": "Longer description of the model."
	},
	"logo": {
	"type": "string",
	"format": "uri",
	"description": "URL to the model's logo. A [data\nURL](https://developer.mozilla.org/en-US/docs/Web/URI/Schemes/data)\nis recommended."
	},
	"readme": {
	"type": "string",
	"description": "Model documentation in Markdown."
	},
	"language": {
	"type": "array",
	"description": "List of supported languages (https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes).",
	"items": {
	"type": "string"
	},
	"example": [
	"en",
	"es",
	"cz"
	]
	},
	"license": {
	"type": "string",
	"description": "Short name of the model's license.",
	"example": "apache-2.0"
	},
	"licenseLink": {
	"type": "string",
	"format": "uri",
	"description": "URL to the license text."
	},
	"maturity": {
	"type": "string",
	"description": "Maturity level of the model.",
	"example": "Generally Available"
	},
	"libraryName": {
	"type": "string",
	"example": "transformers"
	},
	"baseModel": {
	"type": "array",
	"description": "Reference to the base model (if any).",
	"items": {
	"type": "object",
	"properties": {
	"catalog": {
	"type": "string",
	"description": "Name of the catalog for an external base model. Omit for\nmodels in the same catalog.",
	"example": "huggingface.io"
	},
	"repository": {
	"type": "string",
	"description": "Name of the repository in an external catalog where the base\nmodel exists. Omit for models in the same catalog.",
	"example": "ibm-granite"
	},
	"name": {
	"type": "string",
	"example": "granite-3.1-8b-base"
	}
	}
	}
	},
	"labels": {
	"type": "array",
	"description": "List of labels for categorization.",
	"example": [
	"language"
	],
	"items": {
	"type": "string"
	}
	},
	"tasks": {
	"type": "array",
	"description": "List of tasks the model is designed for.",
	"items": {
	"type": "string"
	},
	"example": [
	"text-generation"
	]
	},
	"createTimeSinceEpoch": {
	"description": "Creation time in milliseconds since epoch.",
	"type": "integer"
	},
	"lastUpdateTimeSinceEpoch": {
	"description": "Last update time in milliseconds since epoch.",
	"type": "integer"
	},
	"artifacts": {
	"type": "array",
	"description": "If a model has multiple versions, each version should have a\nseparate artifact.",
	"items": {
	"type": "object",
	"properties": {
	"createTimeSinceEpoch": {
	"description": "Creation time in milliseconds since epoch.",
	"type": "integer"
	},
	"protocol": {
	"type": "string",
	"description": "The protocol used to access the artifact (only `oci` for now).",
	"enum": [
	"oci"
	]
	},
	"tags": {
	"type": "array",
	"description": "List of tags for the artifact. These are for information\nonly. It is recommended that this list only include immutable\ntags (e.g. `1.2.3` instead of ephemeral/floating tags such as\n`1`, `1.2`, or `latest`).",
	"example": [
	"2.1.2"
	],
	"items": {
	"type": "string"
	}
	},
	"uri": {
	"type": "string",
	"description": "Artifact URI."
	}
	}
	}
	}
	}
	}
	}
	}
	}
	$id: https://kubeflow.org/model-registry/catalog.yaml
	$schema: https://json-schema.org/draft/2020-12/schema
	title: Model Catalog
	type: object
	properties:
	source:
	type: string
	description: The name of the catalog provider.
	example: Red Hat
	models:
	type: array
	description: \|-
	List of models available in the catalog. `repository` and `name` are used
	to uniquely identify a model, and should be unique within the catalog.
	items:
	type: object
	required:
	- repository
	- name
	properties:
	repository:
	type: string
	description: Name of the repository in the catalog.
	example: ibm-granite
	name:
	type: string
	description: Code name of the model.
	example: granite-3.1-8b-base
	provider:
	type: string
	description: Name of the organization or entity that provides the model.
	example: IBM
	description:
	type: string
	description: Short description of the model.
	longDescription:
	type: string
	description: Longer description of the model.
	logo:
	type: string
	format: uri
	description: \|-
	URL to the model's logo. A [data
	URL](https://developer.mozilla.org/en-US/docs/Web/URI/Schemes/data)
	is recommended.
	readme:
	type: string
	description: Model documentation in Markdown.
	language:
	type: array
	description: List of supported languages (https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes).
	items:
	type: string
	example:
	- en
	- es
	- cz
	license:
	type: string
	description: Short name of the model's license.
	example: apache-2.0
	licenseLink:
	type: string
	format: uri
	description: URL to the license text.
	maturity:
	type: string
	description: Maturity level of the model.
	example: Generally Available
	libraryName:
	type: string
	example: transformers
	baseModel:
	type: array
	description: Reference to the base model (if any).
	items:
	type: object
	properties:
	catalog:
	type: string
	description: \|-
	Name of the catalog for an external base model. Omit for
	models in the same catalog.
	example: huggingface.io
	repository:
	type: string
	description: \|-
	Name of the repository in an external catalog where the base
	model exists. Omit for models in the same catalog.
	example: ibm-granite
	name:
	type: string
	example: granite-3.1-8b-base
	labels:
	type: array
	description: List of labels for categorization.
	example:
	- language
	items:
	type: string
	tasks:
	type: array
	description: List of tasks the model is designed for.
	items:
	type: string
	example:
	- text-generation
	createTimeSinceEpoch:
	description: Creation time in milliseconds since epoch.
	type: integer
	lastUpdateTimeSinceEpoch:
	description: Last update time in milliseconds since epoch.
	type: integer
	artifacts:
	type: array
	description: \|-
	If a model has multiple versions, each version should have a
	separate artifact.
	items:
	type: object
	properties:
	createTimeSinceEpoch:
	description: Creation time in milliseconds since epoch.
	type: integer
	protocol:
	type: string
	description: The protocol used to access the artifact (only `oci` for now).
	enum:
	- oci
	tags:
	type: array
	description: \|-
	List of tags for the artifact. These are for information
	only. It is recommended that this list only include immutable
	tags (e.g. `1.2.3` instead of ephemeral/floating tags such as
	`1`, `1.2`, or `latest`).
	example: ["2.1.2"]
	items:
	type: string
	uri:
	type: string
	description: Artifact URI.
	source: Red Hat
	models:
	- repository: rhelai1
	name: granite-8b-code-base
	provider: Red Hat
	description: A decoder-only code model designed for code generative tasks
	longDescription: \|-
	Granite-8B-Code-Base is a decoder-only code model designed for code
	generative tasks (e.g., code generation, code explanation, code fixing,
	etc.). It is trained from scratch with a two-phase training strategy. In
	phase 1, our model is trained on 4 trillion tokens sourced from 116
	programming languages, ensuring a comprehensive understanding of
	programming languages and syntax. In phase 2, our model is trained on 500
	billion tokens with a carefully designed mixture of high-quality data from
	code and natural language domains to improve the models’ ability to reason
	and follow instructions.
	readme: \|-
	# Granite-3.1-8B-Base

	Model Summary:
	Granite-3.1-8B-Base extends the context length of Granite-3.0-8B-Base from 4K to 128K using a progressive training strategy by increasing the supported context length in increments while adjusting RoPE theta until the model has successfully adapted to desired length of 128K. This long-context pre-training stage was performed using approximately 500B tokens.

	...

	logo: data:image/gif;base64,R0lGODlhIAAgAMZLAAAAAAEAAAIAAAQAAAYAAAcAAAgAAAoAAAsAAA0AAA4AAA8AABEAABMAABwAAB4AAB8AACAAACQAACYAACkAAC8AADAAADsAAEIAAEMAAEkAAEoAAE4AAFAAAFYAAFkAAF8AAHAAAHMAAHQAAHsAAHwAAIcAAJMAAJcAAJoAAKYAAKwAAK0AAK4AALAAALYAALcAALkAALoAAL4AAMAAAMUAAMgAAMsAANAAANEAANcAANgAANoAANwAAOEAAOUAAOcAAOgAAOkAAOoAAOwAAO0AAO4AAO8AAPAAAPEAAPIAAP///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////yH5BAEKAH8ALAAAAAAgACAAAAf3gH+Cg4SFhoeIiYqLjI2Oj5CCRkaDk5SRhZaam5iSm5+TkaCjl46ko4+noKaqn441Sa2cixobPbGyoYeWRB0AHTm4sruWRyEAABYqQ8KkRYabSDYTyAkfMUJKSKNHQCaZ0SwRyAAMHCk4QklKSknrSkQpGj6EoEgyFwLkAw8aIygsYMRwccKDAgo9jtT7lIRHCQgByCELQAABAgMRAWD4caQUtyM3SFQwILEkAAcrtun6QwoJkh4sRGSQ0OBAAQMLJoCY0VFTJVJH3AXRUeNFCxc0dhRpttLTqaDvkvR0BS7XKURWUS3KaimV1U4sSYEdS7as2bKBAAA7
	language: ["ar", "cs", "de", "en", "es", "fr", "it", "ja", "ko", "nl", "pt", "zh"]
	license: apache-2.0
	licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt
	maturity: Generally Available
	libraryName: transformers
	labels:
	- language
	- granite-3.1
	- lab-base
	tasks:
	- text-generation
	createTimeSinceEpoch: 1733514949000
	lastUpdateTimeSinceEpoch: 1734637721000
	artifacts:
	- protocol: oci
	createTimeSinceEpoch: 1733514949000
	tags: ["1.3.0"]
	uri: oci://registry.redhat.io/rhelai1/granite-8b-code-base:1.3-1732870892
	- repository: rhelai1
	name: granite-8b-code-instruct
	provider: Red Hat
	description: A fine-tuned model based on Granite 8B Code Base
	longDescription: \|-
	Granite-8B-Code-Instruct is a 8B parameter model fine tuned from
	Granite-8B-Code-Base on a combination of permissively licensed instruction
	data to enhance instruction following capabilities including logical
	reasoning and problem-solving skills.
	logo: data:image/gif;base64,R0lGODlhIAAgAMZLAAAAAAEAAAIAAAQAAAYAAAcAAAgAAAoAAAsAAA0AAA4AAA8AABEAABMAABwAAB4AAB8AACAAACQAACYAACkAAC8AADAAADsAAEIAAEMAAEkAAEoAAE4AAFAAAFYAAFkAAF8AAHAAAHMAAHQAAHsAAHwAAIcAAJMAAJcAAJoAAKYAAKwAAK0AAK4AALAAALYAALcAALkAALoAAL4AAMAAAMUAAMgAAMsAANAAANEAANcAANgAANoAANwAAOEAAOUAAOcAAOgAAOkAAOoAAOwAAO0AAO4AAO8AAPAAAPEAAPIAAP///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////yH5BAEKAH8ALAAAAAAgACAAAAf3gH+Cg4SFhoeIiYqLjI2Oj5CCRkaDk5SRhZaam5iSm5+TkaCjl46ko4+noKaqn441Sa2cixobPbGyoYeWRB0AHTm4sruWRyEAABYqQ8KkRYabSDYTyAkfMUJKSKNHQCaZ0SwRyAAMHCk4QklKSknrSkQpGj6EoEgyFwLkAw8aIygsYMRwccKDAgo9jtT7lIRHCQgByCELQAABAgMRAWD4caQUtyM3SFQwILEkAAcrtun6QwoJkh4sRGSQ0OBAAQMLJoCY0VFTJVJH3AXRUeNFCxc0dhRpttLTqaDvkvR0BS7XKURWUS3KaimV1U4sSYEdS7as2bKBAAA7
	readme: \|-
	# Granite-3.1-8B-Instruct

	Model Summary:
	Granite-3.1-8B-Instruct is a 8B parameter long-context instruct model finetuned from Granite-3.1-8B-Base using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets tailored for solving long context problems. This model is developed using a diverse set of techniques with a structured chat format, including supervised finetuning, model alignment using reinforcement learning, and model merging.

	...

	language: ["ar", "cs", "de", "en", "es", "fr", "it", "ja", "ko", "nl", "pt", "zh"]
	license: apache-2.0
	licenseLink: https://www.apache.org/licenses/LICENSE-2.0.txt
	maturity: Generally Available
	libraryName: transformers
	baseModel:
	- repository: rhelai1
	name: granite-8b-code-base
	labels:
	- language
	- granite-3.1
	tasks:
	- text-generation
	createTimeSinceEpoch: 1733514949000
	lastUpdateTimeSinceEpoch: 1734637721000
	artifacts:
	- protocol: oci
	createTimeSinceEpoch: 1733514949000
	tags: ["1.3.0"]
	uri: oci://registry.redhat.io/rhelai1/granite-8b-code-instruct:1.3-1732870892