fxmarty

anything deep learning deployment

170 followers · 107 following

France
@efxmarty

View GitHub Profile

Recently created

Least recently created

Recently updated

Least recently updated

fxmarty / benchmark_quanto.py

Created July 17, 2024 14:53

benchmark quanto

	import torch
	import torch.nn as nn
	import time
	import numpy as np

	from optimum.quanto import Calibration, freeze, qint4, qint8, quantize, qfloat8, qfloat8_e4m3fn
	from torch.profiler import ProfilerActivity, profile

	M_SHAPES = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192]
	N_SHAPE = 4096

fxmarty / transformers_compile.py

Created July 25, 2024 14:47

transformers_compile.py

	from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
	import torch
	from transformers.cache_utils import StaticCache
	import logging
	import time

	#model_id = "fxmarty/tiny-llama-fast-tokenizer"
	model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

	tokenizer = AutoTokenizer.from_pretrained(

OlderNewer