Skip to content

Instantly share code, notes, and snippets.

View leslie-fang-intel's full-sized avatar

Leslie Fang leslie-fang-intel

  • INTC
  • Shanghai
View GitHub Profile
import torch
from itertools import product
import random
from typing import Tuple, List
@torch.library.custom_op(
"mylib::reshape_and_cache",
mutates_args=["key_cache", "value_cache"],
schema="(Tensor key, Tensor value, Tensor(a!) key_cache, Tensor(a!) value_cache, Tensor slot_mapping) -> ()",
)
This file has been truncated, but you can view the full file.
Namespace(model='meta-llama/Llama-2-7b-hf', speculative_model=None, num_speculative_tokens=None, speculative_draft_tensor_parallel_size=None, tokenizer=None, quantization=None, tensor_parallel_size=1, input_len=32, output_len=3, batch_size=1, n=1, use_beam_search=False, num_iters_warmup=10, num_iters=20, trust_remote_code=True, max_model_len=None, dtype='bfloat16', enforce_eager=False, kv_cache_dtype='auto', quantization_param_path=None, profile=True, profile_result_dir=None, device='cpu', block_size=16, enable_chunked_prefill=False, enable_prefix_caching=False, use_v2_block_manager=False, ray_workers_use_nsight=False, download_dir=None, output_json=None, gpu_memory_utilization=0.9, load_format='auto', distributed_executor_backend=None, otlp_traces_endpoint=None)
WARNING 08-07 17:49:09 config.py:1428] Casting torch.float16 to torch.bfloat16.
INFO 08-07 17:49:09 llm_engine.py:176] Initializing an LLM engine (v0.5.3.post1) with config: model='meta-llama/Llama-2-7b-hf', speculative_config=None, tokenizer='meta-l
This file has been truncated, but you can view the full file.
Namespace(model='meta-llama/Llama-2-7b-hf', speculative_model=None, num_speculative_tokens=None, speculative_draft_tensor_parallel_size=None, tokenizer=None, quantization=None, tensor_parallel_size=1, input_len=32, output_len=3, batch_size=1, n=1, use_beam_search=False, num_iters_warmup=3, num_iters=2, trust_remote_code=True, max_model_len=None, dtype='bfloat16', enforce_eager=False, kv_cache_dtype='auto', quantization_param_path=None, profile=False, profile_result_dir=None, device='cpu', block_size=16, enable_chunked_prefill=False, enable_prefix_caching=False, use_v2_block_manager=False, ray_workers_use_nsight=False, download_dir=None, output_json=None, gpu_memory_utilization=0.9, load_format='auto', distributed_executor_backend=None, otlp_traces_endpoint=None)
WARNING 08-06 22:54:20 config.py:1428] Casting torch.float16 to torch.bfloat16.
INFO 08-06 22:54:20 llm_engine.py:176] Initializing an LLM engine (v0.5.3.post1) with config: model='meta-llama/Llama-2-7b-hf', speculative_config=None, tokenizer='meta-ll
import torch
import time
import random
import numpy as np
from dataclasses import dataclass
# import intel_extension_for_pytorch as ipex
local_seed= 2024
torch.manual_seed(local_seed) # Set PyTorch seed
import torch
import torch._inductor.config as config
config.freezing = True
config.max_autotune = True
config.max_autotune_gemm_backends = "CPP,ATEN"
class M(torch.nn.Module):
def __init__(self, output_feature=384):
super().__init__()
/localdisk/leslie/miniconda/envs/pytorch_community/lib/python3.10/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.
warnings.warn(
loading model: 0it [00:00, ?it/s]config.num_buckets is not set. Setting config.num_buckets to 128...
config.num_buckets is not set. Setting config.num_buckets to 128...
config.num_buckets is not set. Setting config.num_buckets to 128...
loading model: 0it [00:00, ?it/s]
cpu eval Reformer
config.num_buckets is not set. Setting config.num_buckets to 128...
# AOT ID: ['0_inference']
from ctypes import c_void_p, c_long
import torch
import math
import random
import os
import tempfile
from math import inf, nan
from torch._inductor.hooks import run_intermediate_hooks
# TORCHINDUCTOR_FREEZING=1 TORCH_LOGS="+output_code" numactl -C 56-111 -m 1 python test_linear.py
import torch
import time
import random
import numpy as np
from torch._inductor import config as inductor_config
from torch._dynamo import config as dynamo_config
from torch._export import capture_pre_autograd_graph
This file has been truncated, but you can view the full file.
V0627 17:31:00.663000 139845268738432 torch/_logging/structured.py:19] {"str": ["/localdisk/leslie/torch_inductor_community/pytorch/benchmarks/dynamo/torchbench.py", 0]}
V0627 17:31:00.663000 139845268738432 torch/_logging/structured.py:19] {"str": ["/localdisk/leslie/torch_inductor_community/pytorch/benchmarks/dynamo/common.py", 1]}
V0627 17:31:00.663000 139845268738432 torch/_logging/structured.py:19] {"str": ["/localdisk/leslie/torch_inductor_community/pytorch/torch/_dynamo/eval_frame.py", 2]}
V0627 17:31:00.663000 139845268738432 torch/_logging/structured.py:19] {"str": ["/localdisk/leslie/torch_inductor_community/pytorch/torch/_dynamo/convert_frame.py", 3]}
V0627 17:31:00.663000 139845268738432 torch/_dynamo/convert_frame.py:802] {"dynamo_start": {"stack": [{"line": 456, "name": "<module>", "filename": 0}, {"line": 452, "name": "torchbench_main", "filename": 0}, {"line": 3661, "name": "main", "filename": 1}, {"line": 3593, "name": "process_entry", "filename": 1}, {"line": 4220, "name": "run", "filename":
import torch
import transformers
from transformers import (
AutoConfig,
AutoModelForSequenceClassification,
AutoTokenizer,
DataCollatorWithPadding,
EvalPrediction,
HfArgumentParser,