Skip to content

Instantly share code, notes, and snippets.

View pashu123's full-sized avatar
๐Ÿ˜‡
Working from home

Prashant Kumar pashu123

๐Ÿ˜‡
Working from home
View GitHub Profile
.text
.intel_syntax noprefix
.file "mmt3d_kernel_linked_llvm_cpu"
.section .text.turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32,"ax",@progbits
.p2align 4, 0x90
.type turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32,@function
turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32:
.Lfunc_begin0:
.file 1 "-"
.loc 1 1 0
.text
.file "mmt3d_kernel_linked_llvm_cpu"
.section .text.turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32,"ax",@progbits
.p2align 4, 0x90
.type turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32,@function
turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0_pack_f32:
.Lfunc_begin0:
.file 1 "-"
.loc 1 1 0
.cfi_startproc
This file has been truncated, but you can view the full file.
// -----// IR Dump After AssignTargetDevicesPass (iree-hal-assign-target-devices) //----- //
#executable_target_embedded_elf_x86_64_ = #hal.executable.target<"llvm-cpu", "embedded-elf-x86_64", {cpu = "znver4", cpu_features = "+prfchw,-cldemote,+avx,+aes,+sahf,+pclmul,-xop,+crc32,+xsaves,-avx512fp16,-usermsr,-sm4,-egpr,+sse4.1,+avx512ifma,+xsave,-avx512pf,+sse4.2,-tsxldtrk,-ptwrite,-widekl,-sm3,+invpcid,+64bit,+xsavec,-avx10.1-512,+avx512vpopcntdq,+cmov,-avx512vp2intersect,+avx512cd,+movbe,-avxvnniint8,-avx512er,-ccmp,-amx-int8,-kl,-avx10.1-256,-sha512,-avxvnni,-rtm,+adx,+avx2,-hreset,-movdiri,-serialize,+vpclmulqdq,+avx512vl,-uintr,-cf,+clflushopt,-raoint,-cmpccxadd,+bmi,-amx-tile,+sse,+gfni,-avxvnniint16,-amx-fp16,-ndd,+xsaveopt,+rdrnd,+avx512f,-amx-bf16,+avx512bf16,+avx512vnni,-push2pop2,+cx8,+avx512bw,+sse3,+pku,+fsgsbase,+clzero,+mwaitx,-lwp,+lzcnt,+sha,-movdir64b,-ppx,+wbnoinvd,-enqcmd,-prefetchwt1,-avxneconvert,-tbm,-pconfig,-amx-complex,+ssse3,+cx16,+bmi2,+fma,+popcnt,-avxifma,+f16c,+avx512bitalg,+rdpr
hal.executable public @turbine_llm_mmtfp_3d_8640_3200_f32f16_dispatch_0 {
hal.executable.variant public @embedded_elf_x86_64 target(<"llvm-cpu", "embedded-elf-x86_64", {cpu = "znver4", cpu_features = "+prfchw,-cldemote,+avx,+aes,+sahf,+pclmul,-xop,+crc32,+xsaves,-avx512fp16,-usermsr,-sm4,-egpr,+sse4.1,+avx512ifma,+xsave,-avx512pf,+sse4.2,-tsxldtrk,-ptwrite,-widekl,-sm3,+invpcid,+64bit,+xsavec,-avx10.1-512,+avx512vpopcntdq,+cmov,-avx512vp2intersect,+avx512cd,+movbe,-avxvnniint8,-avx512er,-ccmp,-amx-int8,-kl,-avx10.1-256,-sha512,-avxvnni,-rtm,+adx,+avx2,-hreset,-movdiri,-serialize,+vpclmulqdq,+avx512vl,-uintr,-cf,+clflushopt,-raoint,-cmpccxadd,+bmi,-amx-tile,+sse,+gfni,-avxvnniint16,-amx-fp16,-ndd,+xsaveopt,+rdrnd,+avx512f,-amx-bf16,+avx512bf16,+avx512vnni,-push2pop2,+cx8,+avx512bw,+sse3,+pku,+fsgsbase,+clzero,+mwaitx,-lwp,+lzcnt,+sha,-movdir64b,-ppx,+wbnoinvd,-enqcmd,-prefetchwt1,-avxneconvert,-tbm,-pconfig,-amx-complex,+ssse3,+cx16,+bmi2,+fma,+popcnt,-avxifma,+f16c,+avx512bitalg,+rdpru,+clwb,+mmx,+sse2,+rds
-- Read the docs: https://www.lunarvim.org/docs/configuration
-- Video Tutorials: https://www.youtube.com/watch?v=sFA9kX-Ud_c&list=PLhoH5vyxr6QqGu0i7tt_XoVK9v-KvZ3m6
-- Forum: https://www.reddit.com/r/lunarvim/
-- Discord: https://discord.com/invite/Xb9B4Ny
--
lvim.colorscheme = "lunar"
lvim.format_on_save.enabled = false
import sys
import torch
import torch_mlir
from shark.shark_importer import import_with_fx
import torchvision.models as models
import copy
import io
import numpy as np
from transformers import (
AutoModelForCausalLM,
import sys
import torch
import torch_mlir
from shark.shark_importer import import_with_fx
import torchvision.models as models
import copy
import io
import numpy as np
import torch
import torch_mlir
class ANET(torch.nn.Module):
def __init__(self):
super().__init__()
self.alexnet = torch.hub.load('pytorch/vision:v0.10.0', 'alexnet', pretrained=True)
self.alexnet.eval()
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
# Also available under a BSD-style license. See LICENSE.
import sys
from PIL import Image
import requests
from iree import runtime as ireert
from iree.compiler import compile_str
import numpy as np
import os
with open(os.path.join("vicuna_fp32_cpu.vmfb"), "rb") as mlir_file:
flatbuffer_blob = mlir_file.read()