justinchuby · October 8, 2024 22:08
diff --git a/llama.json b/llama.json
 {
    "input_model": {
        "type": "PyTorchModel",
        "model_script": "model_loader.py",
        "model_loader": "load_model",
        "model_path": "meta-llama/Llama-3.2-11B-Vision",
        "io_config": {
            "input_names": [
                "input_ids",
                "attention_mask",
                "pixel_values",
                "aspect_ratio_ids",
                "aspect_ratio_mask",
                "cross_attention_mask"
            ],
            "output_names": [
                "logits"
            ],
            "input_shapes": [
                [
                    1,
                    14
                ],
                [
                    1,
                    14
                ],
                [
                    1,
                    1,
                    4,
                    3,
                    448,
                    448
                ],
                [
                    1,
                    1
                ],
                [
                    1,
                    1,
                    4
                ],
                [
                    1,
                    14,
                    1,
                    4
                ]
            ],
            "input_types": [
                "int32",
                "int32",
                "float32",
                "int32",
                "int32",
                "int32"
            ]
        }
    },
    "passes": {
        "onnx_conversion": {
            "type": "OnnxConversion",
            "use_dynamo_exporter": true,
            "target_opset": 18
        }
    },
    "engine": {
        "log_severity_level": 0
    }
 }
diff --git a/model_loader.py b/model_loader.py
 from transformers import MllamaForConditionalGeneration


 def load_model(model_path):
    return MllamaForConditionalGeneration.from_pretrained(model_path)
diff --git a/olive.sh b/olive.sh
 olive run --config llama.json
	{
	"input_model": {
	"type": "PyTorchModel",
	"model_script": "model_loader.py",
	"model_loader": "load_model",
	"model_path": "meta-llama/Llama-3.2-11B-Vision",
	"io_config": {
	"input_names": [
	"input_ids",
	"attention_mask",
	"pixel_values",
	"aspect_ratio_ids",
	"aspect_ratio_mask",
	"cross_attention_mask"
	],
	"output_names": [
	"logits"
	],
	"input_shapes": [
	[
	1,
	14
	],
	[
	1,
	14
	],
	[
	1,
	1,
	4,
	3,
	448,
	448
	],
	[
	1,
	1
	],
	[
	1,
	1,
	4
	],
	[
	1,
	14,
	1,
	4
	]
	],
	"input_types": [
	"int32",
	"int32",
	"float32",
	"int32",
	"int32",
	"int32"
	]
	}
	},
	"passes": {
	"onnx_conversion": {
	"type": "OnnxConversion",
	"use_dynamo_exporter": true,
	"target_opset": 18
	}
	},
	"engine": {
	"log_severity_level": 0
	}
	}
	from transformers import MllamaForConditionalGeneration


	def load_model(model_path):
	return MllamaForConditionalGeneration.from_pretrained(model_path)