Skip to content

Instantly share code, notes, and snippets.

View mht-sharma's full-sized avatar
🏠
Working from home

Mohit Sharma mht-sharma

🏠
Working from home
View GitHub Profile
@mht-sharma
mht-sharma / llava_onnx_inference.py
Created April 4, 2024 14:40
Llava optimum ONNX inference
import os
from typing import List, Optional, Tuple
import onnxruntime as onnxrt
import requests
import torch
from PIL import Image
from transformers import AutoConfig, AutoProcessor, GenerationConfig, PreTrainedModel
from transformers.generation import GenerationMixin
from transformers.modeling_outputs import BaseModelOutput, CausalLMOutputWithPast
@mht-sharma
mht-sharma / onnx_trocr_inference.py
Created December 16, 2022 10:46
ONNX TrOCR Inference
import os
import time
from typing import Optional, Tuple
import torch
from PIL import Image
import onnxruntime as onnxrt
import requests
from transformers import AutoConfig, AutoModelForVision2Seq, TrOCRProcessor, VisionEncoderDecoderModel
@mht-sharma
mht-sharma / benchmark_qa.py
Created November 24, 2022 11:34
Benchmark QA pipeline Roberta HF
from pathlib import Path
from time import perf_counter
import numpy as np
from optimum.onnxruntime import ORTModelForQuestionAnswering
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
model_id = "deepset/roberta-base-squad2"
onnx_path = Path("onnx")
task = "question-answering"
@mht-sharma
mht-sharma / benchmark_whisper.py
Last active November 24, 2022 11:04
Profiling Whisper Model - Hugging Face
import time
import numpy as np
import onnxruntime
import torch
from datasets import load_dataset
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
sess_options = onnxruntime.SessionOptions()