import intel_extension_for_pytorch # requried for XPU
import torch
from bigdl.llm.transformers import AutoModelForCausalLM
from transformers import AutoTokenizer, pipeline
# model_id = "facebook/opt-1.3b"
# model_id = "meta-llama/Llama-2-7b"
model_id = "meta-llama/Llama-2-7b-chat-hf"
prompt = "I love the Avengers,"$ python bench_linear.py --bs 1
BS: 1, Latency: 0.389 ms, IC: 4096, OC: 11008, Samples: 100, Warmup: 10
$ python bench_linear.py --bs 128
BS: 128, Latency: 3.640 ms, IC: 4096, OC: 11008, Samples: 100, Warmup: 10
$ python bench_linear.py --bs 1024
BS: 1024, Latency: 41.244 ms, IC: 4096, OC: 11008, Samples: 100, Warmup: 10https://github.com/state-spaces/mamba
from transformers import MambaConfig, MambaForCausalLM, AutoTokenizer, AutoModelForCausalLM
import torch
from functools import partial
from collections import OrderedDict, defaultdict
import oshttps://huggingface.co/openbmb/MiniCPM-V-2_6
import torch
from PIL import Image
from transformers import AutoModelForCausalLM, AutoTokenizer
import requests| Set 1 | Set 2 | Set 3 | Least Character Word |
|---|---|---|---|
| Apex | Aurora | Atom | Apex |
| Blaze | Blitz | Bolt | Bolt |
| Cipher | Cascade | Cobalt | Cipher |
| Dynamo | Drift | Drift | Drift |
| Echo | Eclipse | Ember | Echo |
| Flux | Flare | Frost | Flux |
| Gravitas | Glide | Grit | Grit |
| Helix | Horizon | Haven | Helix |
OlderNewer