Skip to content

Instantly share code, notes, and snippets.

@alexanderhenne
Last active June 15, 2025 18:32
Show Gist options
  • Save alexanderhenne/efd0c03717e0d4657c3e5db01c8075e8 to your computer and use it in GitHub Desktop.
Save alexanderhenne/efd0c03717e0d4657c3e5db01c8075e8 to your computer and use it in GitHub Desktop.
DINOv2 ViT-S/14 conversion to Core ML .mlpackage (CLS token)
import torch
import coremltools as ct
model = torch.hub.load('facebookresearch/dinov2', 'dinov2_vits14_reg')
img_size = 518 # only 518 supported due to Core ML ops limitation
example_input = torch.randn(1, 3, img_size, img_size)
wrapper = model.eval()
with torch.no_grad():
wrapper.eval()
traced_model = torch.jit.trace(wrapper, example_input)
from coremltools.converters.mil import Builder as mb
from coremltools.converters.mil import register_torch_op
@register_torch_op
def _upsample_bicubic2d_aa(context, node):
a = context[node.inputs[0]]
output_size = context[node.inputs[1]].val
align_corners = context[node.inputs[2]].val
scale = context[node.inputs[3]]
if scale is None:
input_height = a.shape[-2]
input_width = a.shape[-1]
scale_h = output_size[0] / input_height
scale_w = output_size[1] / input_width
else:
scale_h = scale.val[0]
scale_w = scale.val[1]
x = mb.upsample_bilinear(x=a, scale_factor_height=scale_h, scale_factor_width=scale_w, align_corners=align_corners, name=node.name)
context.add(x)
model_from_trace = ct.convert(
traced_model,
inputs=[ct.ImageType(name="input", shape=example_input.shape, scale=1/255.0, bias=[-0.485/0.229, -0.456/0.224, -0.406/0.225])],
outputs=[ct.TensorType(name="cls_token")],
compute_precision=ct.precision.FLOAT16
)
model_from_trace.save("dinov2_vits14_reg_518.mlpackage")
@alexanderhenne
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment