Skip to content

Instantly share code, notes, and snippets.

@CoffeeVampir3
Created June 27, 2023 01:23
Show Gist options
  • Save CoffeeVampir3/cb3178fdfc6eacc3c80d2f549cd84022 to your computer and use it in GitHub Desktop.
Save CoffeeVampir3/cb3178fdfc6eacc3c80d2f549cd84022 to your computer and use it in GitHub Desktop.
notebook
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"id": "9dd3ad99-539a-493d-bbf0-6626576d8f07",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2023-06-26 18:31:36.339564: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
"2023-06-26 18:31:36.635829: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
"To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"===================================BUG REPORT===================================\n",
"Welcome to bitsandbytes. For bug reports, please run\n",
"\n",
"python -m bitsandbytes\n",
"\n",
" and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues\n",
"================================================================================\n",
"bin /home/blackroot/mambaforge/lib/python3.10/site-packages/bitsandbytes-0.39.0-py3.10.egg/bitsandbytes/libbitsandbytes_cuda118.so\n",
"CUDA SETUP: CUDA runtime path found: /home/blackroot/mambaforge/lib/libcudart.so.11.0\n",
"CUDA SETUP: Highest compute capability among GPUs detected: 8.6\n",
"CUDA SETUP: Detected CUDA version 118\n",
"CUDA SETUP: Loading binary /home/blackroot/mambaforge/lib/python3.10/site-packages/bitsandbytes-0.39.0-py3.10.egg/bitsandbytes/libbitsandbytes_cuda118.so...\n"
]
}
],
"source": [
"import os\n",
"\n",
"from transformers import AutoTokenizer, TextGenerationPipeline, LlamaForCausalLM, LlamaTokenizer\n",
"from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig\n",
"import numpy as np\n",
"import torch\n",
"import torch.nn as nn\n",
"\n",
"# os.makedirs(quantized_model_dir, exist_ok=True)\n",
"def get_wikitext2(nsamples, seed, seqlen, model):\n",
" from datasets import load_dataset\n",
" traindata = load_dataset('wikitext', 'wikitext-2-raw-v1', split='train')\n",
" testdata = load_dataset('wikitext', 'wikitext-2-raw-v1', split='test')\n",
"\n",
" from transformers import AutoTokenizer\n",
" try:\n",
" tokenizer = AutoTokenizer.from_pretrained(model, use_fast=False)\n",
" except:\n",
" tokenizer = AutoTokenizer.from_pretrained(model, use_fast=True)\n",
" trainenc = tokenizer(\"\\n\\n\".join(traindata['text']), return_tensors='pt')\n",
" testenc = tokenizer(\"\\n\\n\".join(testdata['text']), return_tensors='pt')\n",
"\n",
" import random\n",
" random.seed(seed)\n",
" np.random.seed(0)\n",
" torch.random.manual_seed(0)\n",
" \n",
" traindataset = []\n",
" for _ in range(nsamples):\n",
" i = random.randint(0, trainenc.input_ids.shape[1] - seqlen - 1)\n",
" j = i + seqlen\n",
" inp = trainenc.input_ids[:, i:j]\n",
" attention_mask = torch.ones_like(inp)\n",
" traindataset.append({'input_ids':inp,'attention_mask': attention_mask})\n",
" return traindataset, testenc\n",
"\n",
"@torch.no_grad()\n",
"def opt_eval(model, testenc, dev, seqlen = 2048):\n",
" print('Evaluating ...')\n",
"\n",
" testenc = testenc.input_ids\n",
" nsamples = testenc.numel() // seqlen\n",
"\n",
" use_cache = model.config.use_cache\n",
" model.config.use_cache = False\n",
" layers = model.model.decoder.layers\n",
"\n",
" model.model.decoder.embed_tokens = model.model.decoder.embed_tokens.to(dev)\n",
" model.model.decoder.embed_positions = model.model.decoder.embed_positions.to(dev)\n",
" if hasattr(model.model.decoder, 'project_out') and model.model.decoder.project_out:\n",
" model.model.decoder.project_out = model.model.decoder.project_out.to(dev)\n",
" if hasattr(model.model.decoder, 'project_in') and model.model.decoder.project_in:\n",
" model.model.decoder.project_in = model.model.decoder.project_in.to(dev)\n",
" layers[0] = layers[0].to(dev)\n",
"\n",
" dtype = next(iter(model.parameters())).dtype\n",
" inps = torch.zeros((nsamples, seqlen, model.config.hidden_size), dtype=dtype, device=dev)\n",
" cache = {'i': 0, 'attention_mask': None}\n",
"\n",
" class Catcher(nn.Module):\n",
"\n",
" def __init__(self, module):\n",
" super().__init__()\n",
" self.module = module\n",
"\n",
" def forward(self, inp, **kwargs):\n",
" inps[cache['i']] = inp\n",
" cache['i'] += 1\n",
" cache['attention_mask'] = kwargs['attention_mask']\n",
" raise ValueError\n",
"\n",
" layers[0] = Catcher(layers[0])\n",
" for i in range(nsamples):\n",
" batch = testenc[:, (i * seqlen):((i + 1) * seqlen)].to(dev)\n",
" try:\n",
" model(batch)\n",
" except ValueError:\n",
" pass\n",
" layers[0] = layers[0].module\n",
"\n",
" layers[0] = layers[0].cpu()\n",
" model.model.decoder.embed_tokens = model.model.decoder.embed_tokens.cpu()\n",
" model.model.decoder.embed_positions = model.model.decoder.embed_positions.cpu()\n",
" if hasattr(model.model.decoder, 'project_out') and model.model.decoder.project_out:\n",
" model.model.decoder.project_out = model.model.decoder.project_out.cpu()\n",
" if hasattr(model.model.decoder, 'project_in') and model.model.decoder.project_in:\n",
" model.model.decoder.project_in = model.model.decoder.project_in.cpu()\n",
" torch.cuda.empty_cache()\n",
"\n",
" outs = torch.zeros_like(inps)\n",
" attention_mask = cache['attention_mask']\n",
"\n",
" for i in range(len(layers)):\n",
" print(i)\n",
" layer = layers[i].to(dev)\n",
"\n",
" for j in range(nsamples):\n",
" outs[j] = layer(inps[j].unsqueeze(0), attention_mask=attention_mask)[0]\n",
" layers[i] = layer.cpu()\n",
" del layer\n",
" torch.cuda.empty_cache()\n",
" inps, outs = outs, inps\n",
"\n",
" if model.model.decoder.final_layer_norm is not None:\n",
" model.model.decoder.final_layer_norm = model.model.decoder.final_layer_norm.to(dev)\n",
" if model.model.decoder.project_out is not None:\n",
" model.model.decoder.project_out = model.model.decoder.project_out.to(dev)\n",
" model.lm_head = model.lm_head.to(dev)\n",
"\n",
" testenc = testenc.to(dev)\n",
" nlls = []\n",
" for i in range(nsamples):\n",
" hidden_states = inps[i].unsqueeze(0)\n",
" if model.model.decoder.final_layer_norm is not None:\n",
" hidden_states = model.model.decoder.final_layer_norm(hidden_states)\n",
" if model.model.decoder.project_out is not None:\n",
" hidden_states = model.model.decoder.project_out(hidden_states)\n",
" lm_logits = model.lm_head(hidden_states)\n",
" shift_logits = lm_logits[:, :-1, :].contiguous()\n",
" shift_labels = testenc[:, (i * seqlen):((i + 1) * seqlen)][:, 1:]\n",
" loss_fct = nn.CrossEntropyLoss()\n",
" loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))\n",
" neg_log_likelihood = loss.float() * seqlen\n",
" nlls.append(neg_log_likelihood)\n",
" ppl = torch.exp(torch.stack(nlls).sum() / (nsamples * seqlen))\n",
" print(ppl.item())\n",
"\n",
" model.config.use_cache = use_cache\n",
"\n",
"def quantize():\n",
" tokenizer = AutoTokenizer.from_pretrained(pretrained_model_dir, use_fast=True)\n",
" traindataset,testenc = get_wikitext2(128, 0, 2048, pretrained_model_dir)\n",
"\n",
" quantize_config = BaseQuantizeConfig(\n",
" bits=4, # quantize model to 4-bit\n",
" group_size=32, # it is recommended to set the value to 128\n",
" desc_act=True, # desc_act and group size only works on triton\n",
" )\n",
"\n",
" # load un-quantized model, the model will always be force loaded into cpu\n",
" model = AutoGPTQForCausalLM.from_pretrained(pretrained_model_dir, quantize_config)\n",
"\n",
" # quantize model, the examples should be list of dict whose keys can only be \"input_ids\" and \"attention_mask\" \n",
" # with value under torch.LongTensor type.\n",
" model.quantize(traindataset, use_triton=True)\n",
" return model, traindataset, testenc, tokenizer\n",
"\n",
"import logging\n",
"\n",
"logging.basicConfig(\n",
" format=\"%(asctime)s %(levelname)s [%(name)s] %(message)s\", level=logging.INFO, datefmt=\"%Y-%m-%d %H:%M:%S\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "a4f6e019-b8bd-4376-92fa-a8fb21662cf6",
"metadata": {},
"outputs": [],
"source": [
"pretrained_model_dir = \"models/30B-Lazarus\"\n",
"quantized_model_dir = \"33b-Lazarus-4bit-gptq-32g-desc\"\n",
"os.makedirs(quantized_model_dir, exist_ok=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f360fc51-3666-4c5e-a33a-00279e02b028",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2023-06-26 18:33:14 WARNING [datasets.builder] Found cached dataset wikitext (/home/blackroot/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/1.0.0/a241db52902eaf2c6aa732210bead40c090019a499ceb13bcbfa3f8ab646a126)\n",
"2023-06-26 18:33:15 WARNING [datasets.builder] Found cached dataset wikitext (/home/blackroot/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/1.0.0/a241db52902eaf2c6aa732210bead40c090019a499ceb13bcbfa3f8ab646a126)\n",
"Token indices sequence length is longer than the specified maximum sequence length for this model (2874559 > 2048). Running this sequence through the model will result in indexing errors\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "818dd001e4a64f05bc63aaf7b60ea2bb",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"2023-06-26 18:34:04 INFO [auto_gptq.modeling._base] Start quantizing layer 1/60\n",
"2023-06-26 18:34:12 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 1/60...\n",
"2023-06-26 18:34:14 INFO [auto_gptq.quantization.gptq] duration: 1.0164523124694824\n",
"2023-06-26 18:34:14 INFO [auto_gptq.quantization.gptq] avg loss: 0.7486292123794556\n",
"2023-06-26 18:34:14 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 1/60...\n",
"2023-06-26 18:34:14 INFO [auto_gptq.quantization.gptq] duration: 0.8721108436584473\n",
"2023-06-26 18:34:14 INFO [auto_gptq.quantization.gptq] avg loss: 0.05392022058367729\n",
"2023-06-26 18:34:14 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 1/60...\n",
"2023-06-26 18:34:15 INFO [auto_gptq.quantization.gptq] duration: 0.8752179145812988\n",
"2023-06-26 18:34:15 INFO [auto_gptq.quantization.gptq] avg loss: 0.6056452989578247\n",
"2023-06-26 18:34:22 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 1/60...\n",
"2023-06-26 18:34:23 INFO [auto_gptq.quantization.gptq] duration: 0.9049282073974609\n",
"2023-06-26 18:34:23 INFO [auto_gptq.quantization.gptq] avg loss: 0.006863628514111042\n",
"2023-06-26 18:34:30 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 1/60...\n",
"2023-06-26 18:34:31 INFO [auto_gptq.quantization.gptq] duration: 1.1148509979248047\n",
"2023-06-26 18:34:31 INFO [auto_gptq.quantization.gptq] avg loss: 0.39658525586128235\n",
"2023-06-26 18:34:31 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 1/60...\n",
"2023-06-26 18:34:32 INFO [auto_gptq.quantization.gptq] duration: 1.0600247383117676\n",
"2023-06-26 18:34:32 INFO [auto_gptq.quantization.gptq] avg loss: 0.432053804397583\n",
"2023-06-26 18:34:44 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 1/60...\n",
"2023-06-26 18:34:47 INFO [auto_gptq.quantization.gptq] duration: 3.1218526363372803\n",
"2023-06-26 18:34:47 INFO [auto_gptq.quantization.gptq] avg loss: 0.029051033779978752\n",
"2023-06-26 18:34:52 INFO [auto_gptq.modeling._base] Start quantizing layer 2/60\n",
"2023-06-26 18:35:01 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 2/60...\n",
"2023-06-26 18:35:01 INFO [auto_gptq.quantization.gptq] duration: 0.8791849613189697\n",
"2023-06-26 18:35:01 INFO [auto_gptq.quantization.gptq] avg loss: 2.2411227226257324\n",
"2023-06-26 18:35:02 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 2/60...\n",
"2023-06-26 18:35:02 INFO [auto_gptq.quantization.gptq] duration: 0.8647327423095703\n",
"2023-06-26 18:35:02 INFO [auto_gptq.quantization.gptq] avg loss: 0.3230254650115967\n",
"2023-06-26 18:35:02 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 2/60...\n",
"2023-06-26 18:35:03 INFO [auto_gptq.quantization.gptq] duration: 0.8582100868225098\n",
"2023-06-26 18:35:03 INFO [auto_gptq.quantization.gptq] avg loss: 2.264517307281494\n",
"2023-06-26 18:35:10 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 2/60...\n",
"2023-06-26 18:35:10 INFO [auto_gptq.quantization.gptq] duration: 0.8574714660644531\n",
"2023-06-26 18:35:10 INFO [auto_gptq.quantization.gptq] avg loss: 0.09072926640510559\n",
"2023-06-26 18:35:17 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 2/60...\n",
"2023-06-26 18:35:19 INFO [auto_gptq.quantization.gptq] duration: 1.0922691822052002\n",
"2023-06-26 18:35:19 INFO [auto_gptq.quantization.gptq] avg loss: 3.5277843475341797\n",
"2023-06-26 18:35:19 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 2/60...\n",
"2023-06-26 18:35:20 INFO [auto_gptq.quantization.gptq] duration: 1.0501480102539062\n",
"2023-06-26 18:35:20 INFO [auto_gptq.quantization.gptq] avg loss: 4.02543306350708\n",
"2023-06-26 18:35:32 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 2/60...\n",
"2023-06-26 18:35:35 INFO [auto_gptq.quantization.gptq] duration: 3.364971160888672\n",
"2023-06-26 18:35:35 INFO [auto_gptq.quantization.gptq] avg loss: 0.3110250234603882\n",
"2023-06-26 18:35:41 INFO [auto_gptq.modeling._base] Start quantizing layer 3/60\n",
"2023-06-26 18:35:49 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 3/60...\n",
"2023-06-26 18:35:50 INFO [auto_gptq.quantization.gptq] duration: 0.88547682762146\n",
"2023-06-26 18:35:50 INFO [auto_gptq.quantization.gptq] avg loss: 8.452648162841797\n",
"2023-06-26 18:35:50 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 3/60...\n",
"2023-06-26 18:35:51 INFO [auto_gptq.quantization.gptq] duration: 0.8506603240966797\n",
"2023-06-26 18:35:51 INFO [auto_gptq.quantization.gptq] avg loss: 1.795954942703247\n",
"2023-06-26 18:35:51 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 3/60...\n",
"2023-06-26 18:35:52 INFO [auto_gptq.quantization.gptq] duration: 0.8812410831451416\n",
"2023-06-26 18:35:52 INFO [auto_gptq.quantization.gptq] avg loss: 8.62606430053711\n",
"2023-06-26 18:35:58 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 3/60...\n",
"2023-06-26 18:35:59 INFO [auto_gptq.quantization.gptq] duration: 0.9138636589050293\n",
"2023-06-26 18:35:59 INFO [auto_gptq.quantization.gptq] avg loss: 0.28623563051223755\n",
"2023-06-26 18:36:06 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 3/60...\n",
"2023-06-26 18:36:08 INFO [auto_gptq.quantization.gptq] duration: 1.1141860485076904\n",
"2023-06-26 18:36:08 INFO [auto_gptq.quantization.gptq] avg loss: 11.346738815307617\n",
"2023-06-26 18:36:08 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 3/60...\n",
"2023-06-26 18:36:09 INFO [auto_gptq.quantization.gptq] duration: 1.0859854221343994\n",
"2023-06-26 18:36:09 INFO [auto_gptq.quantization.gptq] avg loss: 13.467681884765625\n",
"2023-06-26 18:36:21 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 3/60...\n",
"2023-06-26 18:36:24 INFO [auto_gptq.quantization.gptq] duration: 3.397627353668213\n",
"2023-06-26 18:36:24 INFO [auto_gptq.quantization.gptq] avg loss: 0.719761312007904\n",
"2023-06-26 18:36:30 INFO [auto_gptq.modeling._base] Start quantizing layer 4/60\n",
"2023-06-26 18:36:38 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 4/60...\n",
"2023-06-26 18:36:39 INFO [auto_gptq.quantization.gptq] duration: 0.8878118991851807\n",
"2023-06-26 18:36:39 INFO [auto_gptq.quantization.gptq] avg loss: 16.114456176757812\n",
"2023-06-26 18:36:39 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 4/60...\n",
"2023-06-26 18:36:40 INFO [auto_gptq.quantization.gptq] duration: 0.8713064193725586\n",
"2023-06-26 18:36:40 INFO [auto_gptq.quantization.gptq] avg loss: 3.6073555946350098\n",
"2023-06-26 18:36:40 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 4/60...\n",
"2023-06-26 18:36:41 INFO [auto_gptq.quantization.gptq] duration: 0.8678483963012695\n",
"2023-06-26 18:36:41 INFO [auto_gptq.quantization.gptq] avg loss: 15.45523452758789\n",
"2023-06-26 18:36:47 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 4/60...\n",
"2023-06-26 18:36:48 INFO [auto_gptq.quantization.gptq] duration: 0.7987043857574463\n",
"2023-06-26 18:36:48 INFO [auto_gptq.quantization.gptq] avg loss: 0.47601693868637085\n",
"2023-06-26 18:36:54 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 4/60...\n",
"2023-06-26 18:36:55 INFO [auto_gptq.quantization.gptq] duration: 0.9941482543945312\n",
"2023-06-26 18:36:55 INFO [auto_gptq.quantization.gptq] avg loss: 19.755653381347656\n",
"2023-06-26 18:36:55 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 4/60...\n",
"2023-06-26 18:36:56 INFO [auto_gptq.quantization.gptq] duration: 0.9954080581665039\n",
"2023-06-26 18:36:56 INFO [auto_gptq.quantization.gptq] avg loss: 23.054058074951172\n",
"2023-06-26 18:37:09 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 4/60...\n",
"2023-06-26 18:37:12 INFO [auto_gptq.quantization.gptq] duration: 3.39924955368042\n",
"2023-06-26 18:37:12 INFO [auto_gptq.quantization.gptq] avg loss: 4.203612327575684\n",
"2023-06-26 18:37:18 INFO [auto_gptq.modeling._base] Start quantizing layer 5/60\n",
"2023-06-26 18:37:26 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 5/60...\n",
"2023-06-26 18:37:27 INFO [auto_gptq.quantization.gptq] duration: 0.7880158424377441\n",
"2023-06-26 18:37:27 INFO [auto_gptq.quantization.gptq] avg loss: 48.49755859375\n",
"2023-06-26 18:37:27 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 5/60...\n",
"2023-06-26 18:37:28 INFO [auto_gptq.quantization.gptq] duration: 0.7654588222503662\n",
"2023-06-26 18:37:28 INFO [auto_gptq.quantization.gptq] avg loss: 19.109439849853516\n",
"2023-06-26 18:37:28 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 5/60...\n",
"2023-06-26 18:37:29 INFO [auto_gptq.quantization.gptq] duration: 0.7804515361785889\n",
"2023-06-26 18:37:29 INFO [auto_gptq.quantization.gptq] avg loss: 49.366539001464844\n",
"2023-06-26 18:37:35 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 5/60...\n",
"2023-06-26 18:37:36 INFO [auto_gptq.quantization.gptq] duration: 0.884080171585083\n",
"2023-06-26 18:37:36 INFO [auto_gptq.quantization.gptq] avg loss: 0.4922410547733307\n",
"2023-06-26 18:37:43 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 5/60...\n",
"2023-06-26 18:37:44 INFO [auto_gptq.quantization.gptq] duration: 1.0507736206054688\n",
"2023-06-26 18:37:44 INFO [auto_gptq.quantization.gptq] avg loss: 29.97650909423828\n",
"2023-06-26 18:37:44 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 5/60...\n",
"2023-06-26 18:37:45 INFO [auto_gptq.quantization.gptq] duration: 0.9667980670928955\n",
"2023-06-26 18:37:45 INFO [auto_gptq.quantization.gptq] avg loss: 34.878204345703125\n",
"2023-06-26 18:37:56 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 5/60...\n",
"2023-06-26 18:38:00 INFO [auto_gptq.quantization.gptq] duration: 3.1949493885040283\n",
"2023-06-26 18:38:00 INFO [auto_gptq.quantization.gptq] avg loss: 1.6271657943725586\n",
"2023-06-26 18:38:05 INFO [auto_gptq.modeling._base] Start quantizing layer 6/60\n",
"2023-06-26 18:38:13 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 6/60...\n",
"2023-06-26 18:38:14 INFO [auto_gptq.quantization.gptq] duration: 0.8870787620544434\n",
"2023-06-26 18:38:14 INFO [auto_gptq.quantization.gptq] avg loss: 53.9148063659668\n",
"2023-06-26 18:38:14 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 6/60...\n",
"2023-06-26 18:38:15 INFO [auto_gptq.quantization.gptq] duration: 0.8542630672454834\n",
"2023-06-26 18:38:15 INFO [auto_gptq.quantization.gptq] avg loss: 22.988101959228516\n",
"2023-06-26 18:38:15 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 6/60...\n",
"2023-06-26 18:38:16 INFO [auto_gptq.quantization.gptq] duration: 0.8532156944274902\n",
"2023-06-26 18:38:16 INFO [auto_gptq.quantization.gptq] avg loss: 55.68257141113281\n",
"2023-06-26 18:38:22 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 6/60...\n",
"2023-06-26 18:38:23 INFO [auto_gptq.quantization.gptq] duration: 0.8844602108001709\n",
"2023-06-26 18:38:23 INFO [auto_gptq.quantization.gptq] avg loss: 0.675839900970459\n",
"2023-06-26 18:38:30 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 6/60...\n",
"2023-06-26 18:38:31 INFO [auto_gptq.quantization.gptq] duration: 0.9985096454620361\n",
"2023-06-26 18:38:31 INFO [auto_gptq.quantization.gptq] avg loss: 35.58518981933594\n",
"2023-06-26 18:38:31 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 6/60...\n",
"2023-06-26 18:38:32 INFO [auto_gptq.quantization.gptq] duration: 0.9587287902832031\n",
"2023-06-26 18:38:32 INFO [auto_gptq.quantization.gptq] avg loss: 41.73162078857422\n",
"2023-06-26 18:38:44 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 6/60...\n",
"2023-06-26 18:38:47 INFO [auto_gptq.quantization.gptq] duration: 3.355031728744507\n",
"2023-06-26 18:38:47 INFO [auto_gptq.quantization.gptq] avg loss: 2.190372943878174\n",
"2023-06-26 18:38:52 INFO [auto_gptq.modeling._base] Start quantizing layer 7/60\n",
"2023-06-26 18:39:01 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 7/60...\n",
"2023-06-26 18:39:02 INFO [auto_gptq.quantization.gptq] duration: 0.8752186298370361\n",
"2023-06-26 18:39:02 INFO [auto_gptq.quantization.gptq] avg loss: 60.75889587402344\n",
"2023-06-26 18:39:02 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 7/60...\n",
"2023-06-26 18:39:02 INFO [auto_gptq.quantization.gptq] duration: 0.8710393905639648\n",
"2023-06-26 18:39:02 INFO [auto_gptq.quantization.gptq] avg loss: 28.260587692260742\n",
"2023-06-26 18:39:02 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 7/60...\n",
"2023-06-26 18:39:03 INFO [auto_gptq.quantization.gptq] duration: 0.8537850379943848\n",
"2023-06-26 18:39:03 INFO [auto_gptq.quantization.gptq] avg loss: 63.66701126098633\n",
"2023-06-26 18:39:09 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 7/60...\n",
"2023-06-26 18:39:10 INFO [auto_gptq.quantization.gptq] duration: 0.8030886650085449\n",
"2023-06-26 18:39:10 INFO [auto_gptq.quantization.gptq] avg loss: 0.9930809736251831\n",
"2023-06-26 18:39:17 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 7/60...\n",
"2023-06-26 18:39:19 INFO [auto_gptq.quantization.gptq] duration: 1.1270134449005127\n",
"2023-06-26 18:39:19 INFO [auto_gptq.quantization.gptq] avg loss: 43.68928909301758\n",
"2023-06-26 18:39:19 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 7/60...\n",
"2023-06-26 18:39:20 INFO [auto_gptq.quantization.gptq] duration: 0.9928648471832275\n",
"2023-06-26 18:39:20 INFO [auto_gptq.quantization.gptq] avg loss: 49.892127990722656\n",
"2023-06-26 18:39:32 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 7/60...\n",
"2023-06-26 18:39:35 INFO [auto_gptq.quantization.gptq] duration: 3.3165969848632812\n",
"2023-06-26 18:39:35 INFO [auto_gptq.quantization.gptq] avg loss: 2.8778295516967773\n",
"2023-06-26 18:39:40 INFO [auto_gptq.modeling._base] Start quantizing layer 8/60\n",
"2023-06-26 18:39:48 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 8/60...\n",
"2023-06-26 18:39:49 INFO [auto_gptq.quantization.gptq] duration: 0.7897717952728271\n",
"2023-06-26 18:39:49 INFO [auto_gptq.quantization.gptq] avg loss: 69.85736083984375\n",
"2023-06-26 18:39:49 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 8/60...\n",
"2023-06-26 18:39:50 INFO [auto_gptq.quantization.gptq] duration: 0.7602143287658691\n",
"2023-06-26 18:39:50 INFO [auto_gptq.quantization.gptq] avg loss: 33.126991271972656\n",
"2023-06-26 18:39:50 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 8/60...\n",
"2023-06-26 18:39:51 INFO [auto_gptq.quantization.gptq] duration: 0.7645907402038574\n",
"2023-06-26 18:39:51 INFO [auto_gptq.quantization.gptq] avg loss: 72.08340454101562\n",
"2023-06-26 18:39:57 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 8/60...\n",
"2023-06-26 18:39:57 INFO [auto_gptq.quantization.gptq] duration: 0.7975301742553711\n",
"2023-06-26 18:39:57 INFO [auto_gptq.quantization.gptq] avg loss: 1.4421252012252808\n",
"2023-06-26 18:40:04 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 8/60...\n",
"2023-06-26 18:40:05 INFO [auto_gptq.quantization.gptq] duration: 1.0917129516601562\n",
"2023-06-26 18:40:05 INFO [auto_gptq.quantization.gptq] avg loss: 53.537391662597656\n",
"2023-06-26 18:40:05 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 8/60...\n",
"2023-06-26 18:40:07 INFO [auto_gptq.quantization.gptq] duration: 1.0489530563354492\n",
"2023-06-26 18:40:07 INFO [auto_gptq.quantization.gptq] avg loss: 60.526329040527344\n",
"2023-06-26 18:40:19 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 8/60...\n",
"2023-06-26 18:40:22 INFO [auto_gptq.quantization.gptq] duration: 3.399822473526001\n",
"2023-06-26 18:40:22 INFO [auto_gptq.quantization.gptq] avg loss: 3.789963960647583\n",
"2023-06-26 18:40:28 INFO [auto_gptq.modeling._base] Start quantizing layer 9/60\n",
"2023-06-26 18:40:36 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 9/60...\n",
"2023-06-26 18:40:37 INFO [auto_gptq.quantization.gptq] duration: 0.8786149024963379\n",
"2023-06-26 18:40:37 INFO [auto_gptq.quantization.gptq] avg loss: 75.57379913330078\n",
"2023-06-26 18:40:37 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 9/60...\n",
"2023-06-26 18:40:38 INFO [auto_gptq.quantization.gptq] duration: 0.8603739738464355\n",
"2023-06-26 18:40:38 INFO [auto_gptq.quantization.gptq] avg loss: 37.354087829589844\n",
"2023-06-26 18:40:38 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 9/60...\n",
"2023-06-26 18:40:39 INFO [auto_gptq.quantization.gptq] duration: 0.86700439453125\n",
"2023-06-26 18:40:39 INFO [auto_gptq.quantization.gptq] avg loss: 77.53741455078125\n",
"2023-06-26 18:40:45 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 9/60...\n",
"2023-06-26 18:40:46 INFO [auto_gptq.quantization.gptq] duration: 0.8887379169464111\n",
"2023-06-26 18:40:46 INFO [auto_gptq.quantization.gptq] avg loss: 1.9536398649215698\n",
"2023-06-26 18:40:53 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 9/60...\n",
"2023-06-26 18:40:54 INFO [auto_gptq.quantization.gptq] duration: 1.0840914249420166\n",
"2023-06-26 18:40:54 INFO [auto_gptq.quantization.gptq] avg loss: 63.6827507019043\n",
"2023-06-26 18:40:54 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 9/60...\n",
"2023-06-26 18:40:55 INFO [auto_gptq.quantization.gptq] duration: 1.0438430309295654\n",
"2023-06-26 18:40:55 INFO [auto_gptq.quantization.gptq] avg loss: 71.64509582519531\n",
"2023-06-26 18:41:08 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 9/60...\n",
"2023-06-26 18:41:11 INFO [auto_gptq.quantization.gptq] duration: 3.378542184829712\n",
"2023-06-26 18:41:11 INFO [auto_gptq.quantization.gptq] avg loss: 4.953500270843506\n",
"2023-06-26 18:41:17 INFO [auto_gptq.modeling._base] Start quantizing layer 10/60\n",
"2023-06-26 18:41:25 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 10/60...\n",
"2023-06-26 18:41:26 INFO [auto_gptq.quantization.gptq] duration: 0.8969614505767822\n",
"2023-06-26 18:41:26 INFO [auto_gptq.quantization.gptq] avg loss: 75.61265563964844\n",
"2023-06-26 18:41:26 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 10/60...\n",
"2023-06-26 18:41:27 INFO [auto_gptq.quantization.gptq] duration: 0.8411314487457275\n",
"2023-06-26 18:41:27 INFO [auto_gptq.quantization.gptq] avg loss: 37.645469665527344\n",
"2023-06-26 18:41:27 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 10/60...\n",
"2023-06-26 18:41:28 INFO [auto_gptq.quantization.gptq] duration: 0.8332717418670654\n",
"2023-06-26 18:41:28 INFO [auto_gptq.quantization.gptq] avg loss: 75.6242446899414\n",
"2023-06-26 18:41:34 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 10/60...\n",
"2023-06-26 18:41:35 INFO [auto_gptq.quantization.gptq] duration: 0.8819420337677002\n",
"2023-06-26 18:41:35 INFO [auto_gptq.quantization.gptq] avg loss: 2.373723030090332\n",
"2023-06-26 18:41:42 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 10/60...\n",
"2023-06-26 18:41:43 INFO [auto_gptq.quantization.gptq] duration: 1.090320110321045\n",
"2023-06-26 18:41:43 INFO [auto_gptq.quantization.gptq] avg loss: 73.69998168945312\n",
"2023-06-26 18:41:43 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 10/60...\n",
"2023-06-26 18:41:44 INFO [auto_gptq.quantization.gptq] duration: 1.0385167598724365\n",
"2023-06-26 18:41:44 INFO [auto_gptq.quantization.gptq] avg loss: 82.44574737548828\n",
"2023-06-26 18:41:57 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 10/60...\n",
"2023-06-26 18:42:00 INFO [auto_gptq.quantization.gptq] duration: 3.323763370513916\n",
"2023-06-26 18:42:00 INFO [auto_gptq.quantization.gptq] avg loss: 6.058459758758545\n",
"2023-06-26 18:42:06 INFO [auto_gptq.modeling._base] Start quantizing layer 11/60\n",
"2023-06-26 18:42:14 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 11/60...\n",
"2023-06-26 18:42:15 INFO [auto_gptq.quantization.gptq] duration: 0.8609449863433838\n",
"2023-06-26 18:42:15 INFO [auto_gptq.quantization.gptq] avg loss: 87.7587890625\n",
"2023-06-26 18:42:15 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 11/60...\n",
"2023-06-26 18:42:16 INFO [auto_gptq.quantization.gptq] duration: 0.839996337890625\n",
"2023-06-26 18:42:16 INFO [auto_gptq.quantization.gptq] avg loss: 44.406089782714844\n",
"2023-06-26 18:42:16 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 11/60...\n",
"2023-06-26 18:42:17 INFO [auto_gptq.quantization.gptq] duration: 0.8448247909545898\n",
"2023-06-26 18:42:17 INFO [auto_gptq.quantization.gptq] avg loss: 87.57785034179688\n",
"2023-06-26 18:42:23 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 11/60...\n",
"2023-06-26 18:42:24 INFO [auto_gptq.quantization.gptq] duration: 0.8896353244781494\n",
"2023-06-26 18:42:24 INFO [auto_gptq.quantization.gptq] avg loss: 3.2383458614349365\n",
"2023-06-26 18:42:31 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 11/60...\n",
"2023-06-26 18:42:32 INFO [auto_gptq.quantization.gptq] duration: 1.1170198917388916\n",
"2023-06-26 18:42:32 INFO [auto_gptq.quantization.gptq] avg loss: 80.91424560546875\n",
"2023-06-26 18:42:32 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 11/60...\n",
"2023-06-26 18:42:33 INFO [auto_gptq.quantization.gptq] duration: 1.063755989074707\n",
"2023-06-26 18:42:33 INFO [auto_gptq.quantization.gptq] avg loss: 89.0909652709961\n",
"2023-06-26 18:42:46 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 11/60...\n",
"2023-06-26 18:42:49 INFO [auto_gptq.quantization.gptq] duration: 3.3413150310516357\n",
"2023-06-26 18:42:49 INFO [auto_gptq.quantization.gptq] avg loss: 8.131601333618164\n",
"2023-06-26 18:42:54 INFO [auto_gptq.modeling._base] Start quantizing layer 12/60\n",
"2023-06-26 18:43:02 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 12/60...\n",
"2023-06-26 18:43:03 INFO [auto_gptq.quantization.gptq] duration: 0.7817614078521729\n",
"2023-06-26 18:43:03 INFO [auto_gptq.quantization.gptq] avg loss: 110.98269653320312\n",
"2023-06-26 18:43:03 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 12/60...\n",
"2023-06-26 18:43:04 INFO [auto_gptq.quantization.gptq] duration: 0.7597556114196777\n",
"2023-06-26 18:43:04 INFO [auto_gptq.quantization.gptq] avg loss: 63.313026428222656\n",
"2023-06-26 18:43:04 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 12/60...\n",
"2023-06-26 18:43:05 INFO [auto_gptq.quantization.gptq] duration: 0.7637412548065186\n",
"2023-06-26 18:43:05 INFO [auto_gptq.quantization.gptq] avg loss: 113.91293334960938\n",
"2023-06-26 18:43:11 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 12/60...\n",
"2023-06-26 18:43:11 INFO [auto_gptq.quantization.gptq] duration: 0.7957217693328857\n",
"2023-06-26 18:43:11 INFO [auto_gptq.quantization.gptq] avg loss: 2.8956499099731445\n",
"2023-06-26 18:43:18 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 12/60...\n",
"2023-06-26 18:43:19 INFO [auto_gptq.quantization.gptq] duration: 1.0048880577087402\n",
"2023-06-26 18:43:19 INFO [auto_gptq.quantization.gptq] avg loss: 90.8055419921875\n",
"2023-06-26 18:43:19 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 12/60...\n",
"2023-06-26 18:43:20 INFO [auto_gptq.quantization.gptq] duration: 1.0553069114685059\n",
"2023-06-26 18:43:20 INFO [auto_gptq.quantization.gptq] avg loss: 98.46626281738281\n",
"2023-06-26 18:43:33 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 12/60...\n",
"2023-06-26 18:43:36 INFO [auto_gptq.quantization.gptq] duration: 3.3442001342773438\n",
"2023-06-26 18:43:36 INFO [auto_gptq.quantization.gptq] avg loss: 8.251964569091797\n",
"2023-06-26 18:43:41 INFO [auto_gptq.modeling._base] Start quantizing layer 13/60\n",
"2023-06-26 18:43:50 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 13/60...\n",
"2023-06-26 18:43:51 INFO [auto_gptq.quantization.gptq] duration: 0.8961377143859863\n",
"2023-06-26 18:43:51 INFO [auto_gptq.quantization.gptq] avg loss: 112.5550765991211\n",
"2023-06-26 18:43:51 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 13/60...\n",
"2023-06-26 18:43:52 INFO [auto_gptq.quantization.gptq] duration: 0.8616325855255127\n",
"2023-06-26 18:43:52 INFO [auto_gptq.quantization.gptq] avg loss: 62.06625747680664\n",
"2023-06-26 18:43:52 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 13/60...\n",
"2023-06-26 18:43:53 INFO [auto_gptq.quantization.gptq] duration: 0.8973448276519775\n",
"2023-06-26 18:43:53 INFO [auto_gptq.quantization.gptq] avg loss: 113.1330795288086\n",
"2023-06-26 18:43:59 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 13/60...\n",
"2023-06-26 18:44:00 INFO [auto_gptq.quantization.gptq] duration: 0.8999853134155273\n",
"2023-06-26 18:44:00 INFO [auto_gptq.quantization.gptq] avg loss: 3.7796993255615234\n",
"2023-06-26 18:44:07 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 13/60...\n",
"2023-06-26 18:44:08 INFO [auto_gptq.quantization.gptq] duration: 1.0903074741363525\n",
"2023-06-26 18:44:08 INFO [auto_gptq.quantization.gptq] avg loss: 95.89436340332031\n",
"2023-06-26 18:44:09 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 13/60...\n",
"2023-06-26 18:44:10 INFO [auto_gptq.quantization.gptq] duration: 1.0441439151763916\n",
"2023-06-26 18:44:10 INFO [auto_gptq.quantization.gptq] avg loss: 103.26261138916016\n",
"2023-06-26 18:44:22 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 13/60...\n",
"2023-06-26 18:44:25 INFO [auto_gptq.quantization.gptq] duration: 3.3541462421417236\n",
"2023-06-26 18:44:25 INFO [auto_gptq.quantization.gptq] avg loss: 9.13792896270752\n",
"2023-06-26 18:44:31 INFO [auto_gptq.modeling._base] Start quantizing layer 14/60\n",
"2023-06-26 18:44:39 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 14/60...\n",
"2023-06-26 18:44:40 INFO [auto_gptq.quantization.gptq] duration: 0.873603105545044\n",
"2023-06-26 18:44:40 INFO [auto_gptq.quantization.gptq] avg loss: 103.32135009765625\n",
"2023-06-26 18:44:40 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 14/60...\n",
"2023-06-26 18:44:41 INFO [auto_gptq.quantization.gptq] duration: 0.8401966094970703\n",
"2023-06-26 18:44:41 INFO [auto_gptq.quantization.gptq] avg loss: 56.74312210083008\n",
"2023-06-26 18:44:41 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 14/60...\n",
"2023-06-26 18:44:41 INFO [auto_gptq.quantization.gptq] duration: 0.8579323291778564\n",
"2023-06-26 18:44:41 INFO [auto_gptq.quantization.gptq] avg loss: 103.12858581542969\n",
"2023-06-26 18:44:47 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 14/60...\n",
"2023-06-26 18:44:48 INFO [auto_gptq.quantization.gptq] duration: 0.7920126914978027\n",
"2023-06-26 18:44:48 INFO [auto_gptq.quantization.gptq] avg loss: 4.403987407684326\n",
"2023-06-26 18:44:55 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 14/60...\n",
"2023-06-26 18:44:57 INFO [auto_gptq.quantization.gptq] duration: 1.102994680404663\n",
"2023-06-26 18:44:57 INFO [auto_gptq.quantization.gptq] avg loss: 98.72038269042969\n",
"2023-06-26 18:44:57 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 14/60...\n",
"2023-06-26 18:44:58 INFO [auto_gptq.quantization.gptq] duration: 1.0945208072662354\n",
"2023-06-26 18:44:58 INFO [auto_gptq.quantization.gptq] avg loss: 103.73863983154297\n",
"2023-06-26 18:45:10 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 14/60...\n",
"2023-06-26 18:45:13 INFO [auto_gptq.quantization.gptq] duration: 3.253654956817627\n",
"2023-06-26 18:45:13 INFO [auto_gptq.quantization.gptq] avg loss: 9.936589241027832\n",
"2023-06-26 18:45:18 INFO [auto_gptq.modeling._base] Start quantizing layer 15/60\n",
"2023-06-26 18:45:26 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 15/60...\n",
"2023-06-26 18:45:27 INFO [auto_gptq.quantization.gptq] duration: 0.8774707317352295\n",
"2023-06-26 18:45:27 INFO [auto_gptq.quantization.gptq] avg loss: 108.03919982910156\n",
"2023-06-26 18:45:27 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 15/60...\n",
"2023-06-26 18:45:28 INFO [auto_gptq.quantization.gptq] duration: 0.8367099761962891\n",
"2023-06-26 18:45:28 INFO [auto_gptq.quantization.gptq] avg loss: 61.751075744628906\n",
"2023-06-26 18:45:28 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 15/60...\n",
"2023-06-26 18:45:29 INFO [auto_gptq.quantization.gptq] duration: 0.8968749046325684\n",
"2023-06-26 18:45:29 INFO [auto_gptq.quantization.gptq] avg loss: 109.04547119140625\n",
"2023-06-26 18:45:35 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 15/60...\n",
"2023-06-26 18:45:36 INFO [auto_gptq.quantization.gptq] duration: 0.8103358745574951\n",
"2023-06-26 18:45:36 INFO [auto_gptq.quantization.gptq] avg loss: 4.077201843261719\n",
"2023-06-26 18:45:43 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 15/60...\n",
"2023-06-26 18:45:44 INFO [auto_gptq.quantization.gptq] duration: 1.0424370765686035\n",
"2023-06-26 18:45:44 INFO [auto_gptq.quantization.gptq] avg loss: 104.90103912353516\n",
"2023-06-26 18:45:44 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 15/60...\n",
"2023-06-26 18:45:45 INFO [auto_gptq.quantization.gptq] duration: 1.0294833183288574\n",
"2023-06-26 18:45:45 INFO [auto_gptq.quantization.gptq] avg loss: 108.58885192871094\n",
"2023-06-26 18:45:57 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 15/60...\n",
"2023-06-26 18:46:00 INFO [auto_gptq.quantization.gptq] duration: 3.104301929473877\n",
"2023-06-26 18:46:00 INFO [auto_gptq.quantization.gptq] avg loss: 10.550216674804688\n",
"2023-06-26 18:46:05 INFO [auto_gptq.modeling._base] Start quantizing layer 16/60\n",
"2023-06-26 18:46:14 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 16/60...\n",
"2023-06-26 18:46:14 INFO [auto_gptq.quantization.gptq] duration: 0.7858569622039795\n",
"2023-06-26 18:46:14 INFO [auto_gptq.quantization.gptq] avg loss: 128.04994201660156\n",
"2023-06-26 18:46:14 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 16/60...\n",
"2023-06-26 18:46:15 INFO [auto_gptq.quantization.gptq] duration: 0.7610125541687012\n",
"2023-06-26 18:46:15 INFO [auto_gptq.quantization.gptq] avg loss: 76.8011474609375\n",
"2023-06-26 18:46:15 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 16/60...\n",
"2023-06-26 18:46:16 INFO [auto_gptq.quantization.gptq] duration: 0.7789857387542725\n",
"2023-06-26 18:46:16 INFO [auto_gptq.quantization.gptq] avg loss: 130.0301513671875\n",
"2023-06-26 18:46:22 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 16/60...\n",
"2023-06-26 18:46:23 INFO [auto_gptq.quantization.gptq] duration: 0.8825023174285889\n",
"2023-06-26 18:46:23 INFO [auto_gptq.quantization.gptq] avg loss: 5.505874156951904\n",
"2023-06-26 18:46:30 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 16/60...\n",
"2023-06-26 18:46:31 INFO [auto_gptq.quantization.gptq] duration: 0.9842641353607178\n",
"2023-06-26 18:46:31 INFO [auto_gptq.quantization.gptq] avg loss: 109.96064758300781\n",
"2023-06-26 18:46:31 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 16/60...\n",
"2023-06-26 18:46:32 INFO [auto_gptq.quantization.gptq] duration: 0.9492306709289551\n",
"2023-06-26 18:46:32 INFO [auto_gptq.quantization.gptq] avg loss: 114.07837677001953\n",
"2023-06-26 18:46:44 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 16/60...\n",
"2023-06-26 18:46:47 INFO [auto_gptq.quantization.gptq] duration: 3.225769519805908\n",
"2023-06-26 18:46:47 INFO [auto_gptq.quantization.gptq] avg loss: 11.930509567260742\n",
"2023-06-26 18:46:52 INFO [auto_gptq.modeling._base] Start quantizing layer 17/60\n",
"2023-06-26 18:47:00 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 17/60...\n",
"2023-06-26 18:47:01 INFO [auto_gptq.quantization.gptq] duration: 0.79018235206604\n",
"2023-06-26 18:47:01 INFO [auto_gptq.quantization.gptq] avg loss: 132.3023681640625\n",
"2023-06-26 18:47:01 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 17/60...\n",
"2023-06-26 18:47:02 INFO [auto_gptq.quantization.gptq] duration: 0.7670350074768066\n",
"2023-06-26 18:47:02 INFO [auto_gptq.quantization.gptq] avg loss: 84.94049835205078\n",
"2023-06-26 18:47:02 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 17/60...\n",
"2023-06-26 18:47:03 INFO [auto_gptq.quantization.gptq] duration: 0.7590692043304443\n",
"2023-06-26 18:47:03 INFO [auto_gptq.quantization.gptq] avg loss: 135.49224853515625\n",
"2023-06-26 18:47:08 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 17/60...\n",
"2023-06-26 18:47:09 INFO [auto_gptq.quantization.gptq] duration: 0.8762412071228027\n",
"2023-06-26 18:47:09 INFO [auto_gptq.quantization.gptq] avg loss: 5.2532057762146\n",
"2023-06-26 18:47:17 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 17/60...\n",
"2023-06-26 18:47:18 INFO [auto_gptq.quantization.gptq] duration: 1.1032989025115967\n",
"2023-06-26 18:47:18 INFO [auto_gptq.quantization.gptq] avg loss: 118.88277435302734\n",
"2023-06-26 18:47:18 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 17/60...\n",
"2023-06-26 18:47:19 INFO [auto_gptq.quantization.gptq] duration: 1.0712347030639648\n",
"2023-06-26 18:47:19 INFO [auto_gptq.quantization.gptq] avg loss: 121.41624450683594\n",
"2023-06-26 18:47:31 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 17/60...\n",
"2023-06-26 18:47:34 INFO [auto_gptq.quantization.gptq] duration: 3.150277853012085\n",
"2023-06-26 18:47:34 INFO [auto_gptq.quantization.gptq] avg loss: 12.765552520751953\n",
"2023-06-26 18:47:40 INFO [auto_gptq.modeling._base] Start quantizing layer 18/60\n",
"2023-06-26 18:47:48 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 18/60...\n",
"2023-06-26 18:47:49 INFO [auto_gptq.quantization.gptq] duration: 0.8962030410766602\n",
"2023-06-26 18:47:49 INFO [auto_gptq.quantization.gptq] avg loss: 126.00875854492188\n",
"2023-06-26 18:47:49 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 18/60...\n",
"2023-06-26 18:47:50 INFO [auto_gptq.quantization.gptq] duration: 0.8615415096282959\n",
"2023-06-26 18:47:50 INFO [auto_gptq.quantization.gptq] avg loss: 81.93507385253906\n",
"2023-06-26 18:47:50 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 18/60...\n",
"2023-06-26 18:47:51 INFO [auto_gptq.quantization.gptq] duration: 0.8687634468078613\n",
"2023-06-26 18:47:51 INFO [auto_gptq.quantization.gptq] avg loss: 127.84761047363281\n",
"2023-06-26 18:47:57 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 18/60...\n",
"2023-06-26 18:47:57 INFO [auto_gptq.quantization.gptq] duration: 0.8269202709197998\n",
"2023-06-26 18:47:57 INFO [auto_gptq.quantization.gptq] avg loss: 5.458505630493164\n",
"2023-06-26 18:48:04 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 18/60...\n",
"2023-06-26 18:48:05 INFO [auto_gptq.quantization.gptq] duration: 0.9920969009399414\n",
"2023-06-26 18:48:05 INFO [auto_gptq.quantization.gptq] avg loss: 125.72673034667969\n",
"2023-06-26 18:48:05 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 18/60...\n",
"2023-06-26 18:48:06 INFO [auto_gptq.quantization.gptq] duration: 0.9499971866607666\n",
"2023-06-26 18:48:06 INFO [auto_gptq.quantization.gptq] avg loss: 126.58809661865234\n",
"2023-06-26 18:48:18 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 18/60...\n",
"2023-06-26 18:48:22 INFO [auto_gptq.quantization.gptq] duration: 3.426759719848633\n",
"2023-06-26 18:48:22 INFO [auto_gptq.quantization.gptq] avg loss: 13.693597793579102\n",
"2023-06-26 18:48:27 INFO [auto_gptq.modeling._base] Start quantizing layer 19/60\n",
"2023-06-26 18:48:35 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 19/60...\n",
"2023-06-26 18:48:36 INFO [auto_gptq.quantization.gptq] duration: 0.8628525733947754\n",
"2023-06-26 18:48:36 INFO [auto_gptq.quantization.gptq] avg loss: 133.16976928710938\n",
"2023-06-26 18:48:36 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 19/60...\n",
"2023-06-26 18:48:37 INFO [auto_gptq.quantization.gptq] duration: 0.8920607566833496\n",
"2023-06-26 18:48:37 INFO [auto_gptq.quantization.gptq] avg loss: 83.82490539550781\n",
"2023-06-26 18:48:37 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 19/60...\n",
"2023-06-26 18:48:38 INFO [auto_gptq.quantization.gptq] duration: 0.8800840377807617\n",
"2023-06-26 18:48:38 INFO [auto_gptq.quantization.gptq] avg loss: 134.3311004638672\n",
"2023-06-26 18:48:44 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 19/60...\n",
"2023-06-26 18:48:45 INFO [auto_gptq.quantization.gptq] duration: 0.8910653591156006\n",
"2023-06-26 18:48:45 INFO [auto_gptq.quantization.gptq] avg loss: 8.259206771850586\n",
"2023-06-26 18:48:53 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 19/60...\n",
"2023-06-26 18:48:54 INFO [auto_gptq.quantization.gptq] duration: 1.1455304622650146\n",
"2023-06-26 18:48:54 INFO [auto_gptq.quantization.gptq] avg loss: 129.4827117919922\n",
"2023-06-26 18:48:54 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 19/60...\n",
"2023-06-26 18:48:55 INFO [auto_gptq.quantization.gptq] duration: 1.0251309871673584\n",
"2023-06-26 18:48:55 INFO [auto_gptq.quantization.gptq] avg loss: 130.6291961669922\n",
"2023-06-26 18:49:07 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 19/60...\n",
"2023-06-26 18:49:10 INFO [auto_gptq.quantization.gptq] duration: 3.1485159397125244\n",
"2023-06-26 18:49:10 INFO [auto_gptq.quantization.gptq] avg loss: 16.032426834106445\n",
"2023-06-26 18:49:15 INFO [auto_gptq.modeling._base] Start quantizing layer 20/60\n",
"2023-06-26 18:49:23 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 20/60...\n",
"2023-06-26 18:49:24 INFO [auto_gptq.quantization.gptq] duration: 0.9035704135894775\n",
"2023-06-26 18:49:24 INFO [auto_gptq.quantization.gptq] avg loss: 127.06283569335938\n",
"2023-06-26 18:49:24 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 20/60...\n",
"2023-06-26 18:49:25 INFO [auto_gptq.quantization.gptq] duration: 0.856377363204956\n",
"2023-06-26 18:49:25 INFO [auto_gptq.quantization.gptq] avg loss: 88.74179077148438\n",
"2023-06-26 18:49:25 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 20/60...\n",
"2023-06-26 18:49:26 INFO [auto_gptq.quantization.gptq] duration: 0.8708395957946777\n",
"2023-06-26 18:49:26 INFO [auto_gptq.quantization.gptq] avg loss: 129.656982421875\n",
"2023-06-26 18:49:32 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 20/60...\n",
"2023-06-26 18:49:33 INFO [auto_gptq.quantization.gptq] duration: 0.8664703369140625\n",
"2023-06-26 18:49:33 INFO [auto_gptq.quantization.gptq] avg loss: 7.385319709777832\n",
"2023-06-26 18:49:40 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 20/60...\n",
"2023-06-26 18:49:41 INFO [auto_gptq.quantization.gptq] duration: 1.1212098598480225\n",
"2023-06-26 18:49:41 INFO [auto_gptq.quantization.gptq] avg loss: 136.6591339111328\n",
"2023-06-26 18:49:41 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 20/60...\n",
"2023-06-26 18:49:42 INFO [auto_gptq.quantization.gptq] duration: 1.069464921951294\n",
"2023-06-26 18:49:42 INFO [auto_gptq.quantization.gptq] avg loss: 138.58953857421875\n",
"2023-06-26 18:49:54 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 20/60...\n",
"2023-06-26 18:49:58 INFO [auto_gptq.quantization.gptq] duration: 3.2242279052734375\n",
"2023-06-26 18:49:58 INFO [auto_gptq.quantization.gptq] avg loss: 17.37506866455078\n",
"2023-06-26 18:50:03 INFO [auto_gptq.modeling._base] Start quantizing layer 21/60\n",
"2023-06-26 18:50:11 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 21/60...\n",
"2023-06-26 18:50:12 INFO [auto_gptq.quantization.gptq] duration: 0.8095815181732178\n",
"2023-06-26 18:50:12 INFO [auto_gptq.quantization.gptq] avg loss: 126.92865753173828\n",
"2023-06-26 18:50:12 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 21/60...\n",
"2023-06-26 18:50:13 INFO [auto_gptq.quantization.gptq] duration: 0.8184711933135986\n",
"2023-06-26 18:50:13 INFO [auto_gptq.quantization.gptq] avg loss: 95.93402862548828\n",
"2023-06-26 18:50:13 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 21/60...\n",
"2023-06-26 18:50:14 INFO [auto_gptq.quantization.gptq] duration: 0.8476521968841553\n",
"2023-06-26 18:50:14 INFO [auto_gptq.quantization.gptq] avg loss: 132.3957061767578\n",
"2023-06-26 18:50:20 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 21/60...\n",
"2023-06-26 18:50:21 INFO [auto_gptq.quantization.gptq] duration: 0.9167556762695312\n",
"2023-06-26 18:50:21 INFO [auto_gptq.quantization.gptq] avg loss: 7.170637130737305\n",
"2023-06-26 18:50:28 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 21/60...\n",
"2023-06-26 18:50:29 INFO [auto_gptq.quantization.gptq] duration: 1.0578546524047852\n",
"2023-06-26 18:50:29 INFO [auto_gptq.quantization.gptq] avg loss: 147.7042236328125\n",
"2023-06-26 18:50:29 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 21/60...\n",
"2023-06-26 18:50:31 INFO [auto_gptq.quantization.gptq] duration: 1.0613703727722168\n",
"2023-06-26 18:50:31 INFO [auto_gptq.quantization.gptq] avg loss: 150.50067138671875\n",
"2023-06-26 18:50:42 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 21/60...\n",
"2023-06-26 18:50:45 INFO [auto_gptq.quantization.gptq] duration: 3.031423807144165\n",
"2023-06-26 18:50:45 INFO [auto_gptq.quantization.gptq] avg loss: 18.32520294189453\n",
"2023-06-26 18:50:50 INFO [auto_gptq.modeling._base] Start quantizing layer 22/60\n",
"2023-06-26 18:50:58 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 22/60...\n",
"2023-06-26 18:50:59 INFO [auto_gptq.quantization.gptq] duration: 0.8010966777801514\n",
"2023-06-26 18:50:59 INFO [auto_gptq.quantization.gptq] avg loss: 125.7695083618164\n",
"2023-06-26 18:50:59 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 22/60...\n",
"2023-06-26 18:51:00 INFO [auto_gptq.quantization.gptq] duration: 0.7973763942718506\n",
"2023-06-26 18:51:00 INFO [auto_gptq.quantization.gptq] avg loss: 89.74046325683594\n",
"2023-06-26 18:51:00 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 22/60...\n",
"2023-06-26 18:51:01 INFO [auto_gptq.quantization.gptq] duration: 0.7884676456451416\n",
"2023-06-26 18:51:01 INFO [auto_gptq.quantization.gptq] avg loss: 130.23899841308594\n",
"2023-06-26 18:51:07 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 22/60...\n",
"2023-06-26 18:51:07 INFO [auto_gptq.quantization.gptq] duration: 0.8016724586486816\n",
"2023-06-26 18:51:07 INFO [auto_gptq.quantization.gptq] avg loss: 8.64238452911377\n",
"2023-06-26 18:51:14 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 22/60...\n",
"2023-06-26 18:51:15 INFO [auto_gptq.quantization.gptq] duration: 0.9903783798217773\n",
"2023-06-26 18:51:15 INFO [auto_gptq.quantization.gptq] avg loss: 151.12429809570312\n",
"2023-06-26 18:51:15 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 22/60...\n",
"2023-06-26 18:51:16 INFO [auto_gptq.quantization.gptq] duration: 0.9579942226409912\n",
"2023-06-26 18:51:16 INFO [auto_gptq.quantization.gptq] avg loss: 153.98001098632812\n",
"2023-06-26 18:51:28 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 22/60...\n",
"2023-06-26 18:51:31 INFO [auto_gptq.quantization.gptq] duration: 3.381758689880371\n",
"2023-06-26 18:51:31 INFO [auto_gptq.quantization.gptq] avg loss: 20.917919158935547\n",
"2023-06-26 18:51:37 INFO [auto_gptq.modeling._base] Start quantizing layer 23/60\n",
"2023-06-26 18:51:45 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 23/60...\n",
"2023-06-26 18:51:46 INFO [auto_gptq.quantization.gptq] duration: 0.8666648864746094\n",
"2023-06-26 18:51:46 INFO [auto_gptq.quantization.gptq] avg loss: 102.06298828125\n",
"2023-06-26 18:51:46 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 23/60...\n",
"2023-06-26 18:51:47 INFO [auto_gptq.quantization.gptq] duration: 0.864837646484375\n",
"2023-06-26 18:51:47 INFO [auto_gptq.quantization.gptq] avg loss: 88.53225708007812\n",
"2023-06-26 18:51:47 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 23/60...\n",
"2023-06-26 18:51:48 INFO [auto_gptq.quantization.gptq] duration: 0.8436150550842285\n",
"2023-06-26 18:51:48 INFO [auto_gptq.quantization.gptq] avg loss: 107.5790023803711\n",
"2023-06-26 18:51:54 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 23/60...\n",
"2023-06-26 18:51:55 INFO [auto_gptq.quantization.gptq] duration: 0.9097700119018555\n",
"2023-06-26 18:51:55 INFO [auto_gptq.quantization.gptq] avg loss: 7.461499214172363\n",
"2023-06-26 18:52:03 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 23/60...\n",
"2023-06-26 18:52:04 INFO [auto_gptq.quantization.gptq] duration: 1.0915772914886475\n",
"2023-06-26 18:52:04 INFO [auto_gptq.quantization.gptq] avg loss: 161.130615234375\n",
"2023-06-26 18:52:04 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 23/60...\n",
"2023-06-26 18:52:05 INFO [auto_gptq.quantization.gptq] duration: 1.057647466659546\n",
"2023-06-26 18:52:05 INFO [auto_gptq.quantization.gptq] avg loss: 165.51211547851562\n",
"2023-06-26 18:52:17 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 23/60...\n",
"2023-06-26 18:52:20 INFO [auto_gptq.quantization.gptq] duration: 3.038745403289795\n",
"2023-06-26 18:52:20 INFO [auto_gptq.quantization.gptq] avg loss: 23.00075340270996\n",
"2023-06-26 18:52:25 INFO [auto_gptq.modeling._base] Start quantizing layer 24/60\n",
"2023-06-26 18:52:33 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 24/60...\n",
"2023-06-26 18:52:34 INFO [auto_gptq.quantization.gptq] duration: 0.8485012054443359\n",
"2023-06-26 18:52:34 INFO [auto_gptq.quantization.gptq] avg loss: 129.95777893066406\n",
"2023-06-26 18:52:34 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 24/60...\n",
"2023-06-26 18:52:35 INFO [auto_gptq.quantization.gptq] duration: 0.8569326400756836\n",
"2023-06-26 18:52:35 INFO [auto_gptq.quantization.gptq] avg loss: 108.55410766601562\n",
"2023-06-26 18:52:35 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 24/60...\n",
"2023-06-26 18:52:36 INFO [auto_gptq.quantization.gptq] duration: 0.8365185260772705\n",
"2023-06-26 18:52:36 INFO [auto_gptq.quantization.gptq] avg loss: 136.326416015625\n",
"2023-06-26 18:52:42 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 24/60...\n",
"2023-06-26 18:52:43 INFO [auto_gptq.quantization.gptq] duration: 0.8799235820770264\n",
"2023-06-26 18:52:43 INFO [auto_gptq.quantization.gptq] avg loss: 7.996666431427002\n",
"2023-06-26 18:52:51 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 24/60...\n",
"2023-06-26 18:52:52 INFO [auto_gptq.quantization.gptq] duration: 1.098374605178833\n",
"2023-06-26 18:52:52 INFO [auto_gptq.quantization.gptq] avg loss: 169.85446166992188\n",
"2023-06-26 18:52:52 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 24/60...\n",
"2023-06-26 18:52:53 INFO [auto_gptq.quantization.gptq] duration: 1.0548110008239746\n",
"2023-06-26 18:52:53 INFO [auto_gptq.quantization.gptq] avg loss: 176.4759063720703\n",
"2023-06-26 18:53:05 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 24/60...\n",
"2023-06-26 18:53:08 INFO [auto_gptq.quantization.gptq] duration: 3.122931957244873\n",
"2023-06-26 18:53:08 INFO [auto_gptq.quantization.gptq] avg loss: 25.351951599121094\n",
"2023-06-26 18:53:13 INFO [auto_gptq.modeling._base] Start quantizing layer 25/60\n",
"2023-06-26 18:53:21 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 25/60...\n",
"2023-06-26 18:53:22 INFO [auto_gptq.quantization.gptq] duration: 0.8818497657775879\n",
"2023-06-26 18:53:22 INFO [auto_gptq.quantization.gptq] avg loss: 121.40121459960938\n",
"2023-06-26 18:53:22 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 25/60...\n",
"2023-06-26 18:53:23 INFO [auto_gptq.quantization.gptq] duration: 0.8518698215484619\n",
"2023-06-26 18:53:23 INFO [auto_gptq.quantization.gptq] avg loss: 105.07537841796875\n",
"2023-06-26 18:53:23 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 25/60...\n",
"2023-06-26 18:53:24 INFO [auto_gptq.quantization.gptq] duration: 0.8478443622589111\n",
"2023-06-26 18:53:24 INFO [auto_gptq.quantization.gptq] avg loss: 128.11102294921875\n",
"2023-06-26 18:53:30 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 25/60...\n",
"2023-06-26 18:53:31 INFO [auto_gptq.quantization.gptq] duration: 0.8888711929321289\n",
"2023-06-26 18:53:31 INFO [auto_gptq.quantization.gptq] avg loss: 9.340039253234863\n",
"2023-06-26 18:53:38 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 25/60...\n",
"2023-06-26 18:53:40 INFO [auto_gptq.quantization.gptq] duration: 1.1407098770141602\n",
"2023-06-26 18:53:40 INFO [auto_gptq.quantization.gptq] avg loss: 173.57928466796875\n",
"2023-06-26 18:53:40 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 25/60...\n",
"2023-06-26 18:53:41 INFO [auto_gptq.quantization.gptq] duration: 1.0959575176239014\n",
"2023-06-26 18:53:41 INFO [auto_gptq.quantization.gptq] avg loss: 181.16122436523438\n",
"2023-06-26 18:53:52 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 25/60...\n",
"2023-06-26 18:53:56 INFO [auto_gptq.quantization.gptq] duration: 3.258500576019287\n",
"2023-06-26 18:53:56 INFO [auto_gptq.quantization.gptq] avg loss: 28.011882781982422\n",
"2023-06-26 18:54:01 INFO [auto_gptq.modeling._base] Start quantizing layer 26/60\n",
"2023-06-26 18:54:09 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 26/60...\n",
"2023-06-26 18:54:10 INFO [auto_gptq.quantization.gptq] duration: 0.8260703086853027\n",
"2023-06-26 18:54:10 INFO [auto_gptq.quantization.gptq] avg loss: 125.8947525024414\n",
"2023-06-26 18:54:10 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 26/60...\n",
"2023-06-26 18:54:11 INFO [auto_gptq.quantization.gptq] duration: 0.7929630279541016\n",
"2023-06-26 18:54:11 INFO [auto_gptq.quantization.gptq] avg loss: 97.88679504394531\n",
"2023-06-26 18:54:11 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 26/60...\n",
"2023-06-26 18:54:12 INFO [auto_gptq.quantization.gptq] duration: 0.7765088081359863\n",
"2023-06-26 18:54:12 INFO [auto_gptq.quantization.gptq] avg loss: 130.12698364257812\n",
"2023-06-26 18:54:18 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 26/60...\n",
"2023-06-26 18:54:19 INFO [auto_gptq.quantization.gptq] duration: 0.8952882289886475\n",
"2023-06-26 18:54:19 INFO [auto_gptq.quantization.gptq] avg loss: 12.512389183044434\n",
"2023-06-26 18:54:25 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 26/60...\n",
"2023-06-26 18:54:26 INFO [auto_gptq.quantization.gptq] duration: 1.0225069522857666\n",
"2023-06-26 18:54:26 INFO [auto_gptq.quantization.gptq] avg loss: 178.24818420410156\n",
"2023-06-26 18:54:26 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 26/60...\n",
"2023-06-26 18:54:27 INFO [auto_gptq.quantization.gptq] duration: 1.1003315448760986\n",
"2023-06-26 18:54:27 INFO [auto_gptq.quantization.gptq] avg loss: 190.01304626464844\n",
"2023-06-26 18:54:40 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 26/60...\n",
"2023-06-26 18:54:43 INFO [auto_gptq.quantization.gptq] duration: 3.263535976409912\n",
"2023-06-26 18:54:43 INFO [auto_gptq.quantization.gptq] avg loss: 33.16166687011719\n",
"2023-06-26 18:54:48 INFO [auto_gptq.modeling._base] Start quantizing layer 27/60\n",
"2023-06-26 18:54:56 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 27/60...\n",
"2023-06-26 18:54:57 INFO [auto_gptq.quantization.gptq] duration: 0.7903859615325928\n",
"2023-06-26 18:54:57 INFO [auto_gptq.quantization.gptq] avg loss: 132.41000366210938\n",
"2023-06-26 18:54:57 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 27/60...\n",
"2023-06-26 18:54:58 INFO [auto_gptq.quantization.gptq] duration: 0.8033373355865479\n",
"2023-06-26 18:54:58 INFO [auto_gptq.quantization.gptq] avg loss: 126.73532104492188\n",
"2023-06-26 18:54:58 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 27/60...\n",
"2023-06-26 18:54:58 INFO [auto_gptq.quantization.gptq] duration: 0.7599074840545654\n",
"2023-06-26 18:54:58 INFO [auto_gptq.quantization.gptq] avg loss: 142.08071899414062\n",
"2023-06-26 18:55:04 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 27/60...\n",
"2023-06-26 18:55:05 INFO [auto_gptq.quantization.gptq] duration: 0.825977087020874\n",
"2023-06-26 18:55:05 INFO [auto_gptq.quantization.gptq] avg loss: 9.536409378051758\n",
"2023-06-26 18:55:12 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 27/60...\n",
"2023-06-26 18:55:13 INFO [auto_gptq.quantization.gptq] duration: 1.0226025581359863\n",
"2023-06-26 18:55:13 INFO [auto_gptq.quantization.gptq] avg loss: 191.2169647216797\n",
"2023-06-26 18:55:13 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 27/60...\n",
"2023-06-26 18:55:14 INFO [auto_gptq.quantization.gptq] duration: 0.9879231452941895\n",
"2023-06-26 18:55:14 INFO [auto_gptq.quantization.gptq] avg loss: 208.06155395507812\n",
"2023-06-26 18:55:26 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 27/60...\n",
"2023-06-26 18:55:30 INFO [auto_gptq.quantization.gptq] duration: 3.387193441390991\n",
"2023-06-26 18:55:30 INFO [auto_gptq.quantization.gptq] avg loss: 36.402374267578125\n",
"2023-06-26 18:55:35 INFO [auto_gptq.modeling._base] Start quantizing layer 28/60\n",
"2023-06-26 18:55:44 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 28/60...\n",
"2023-06-26 18:55:45 INFO [auto_gptq.quantization.gptq] duration: 0.8887379169464111\n",
"2023-06-26 18:55:45 INFO [auto_gptq.quantization.gptq] avg loss: 107.79998779296875\n",
"2023-06-26 18:55:45 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 28/60...\n",
"2023-06-26 18:55:46 INFO [auto_gptq.quantization.gptq] duration: 0.8286387920379639\n",
"2023-06-26 18:55:46 INFO [auto_gptq.quantization.gptq] avg loss: 108.58135223388672\n",
"2023-06-26 18:55:46 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 28/60...\n",
"2023-06-26 18:55:46 INFO [auto_gptq.quantization.gptq] duration: 0.7691891193389893\n",
"2023-06-26 18:55:46 INFO [auto_gptq.quantization.gptq] avg loss: 116.74995422363281\n",
"2023-06-26 18:55:52 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 28/60...\n",
"2023-06-26 18:55:53 INFO [auto_gptq.quantization.gptq] duration: 0.8373372554779053\n",
"2023-06-26 18:55:53 INFO [auto_gptq.quantization.gptq] avg loss: 6.757271766662598\n",
"2023-06-26 18:56:00 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 28/60...\n",
"2023-06-26 18:56:01 INFO [auto_gptq.quantization.gptq] duration: 0.9927432537078857\n",
"2023-06-26 18:56:01 INFO [auto_gptq.quantization.gptq] avg loss: 203.89488220214844\n",
"2023-06-26 18:56:01 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 28/60...\n",
"2023-06-26 18:56:02 INFO [auto_gptq.quantization.gptq] duration: 0.9814658164978027\n",
"2023-06-26 18:56:02 INFO [auto_gptq.quantization.gptq] avg loss: 225.85540771484375\n",
"2023-06-26 18:56:14 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 28/60...\n",
"2023-06-26 18:56:17 INFO [auto_gptq.quantization.gptq] duration: 3.293182611465454\n",
"2023-06-26 18:56:17 INFO [auto_gptq.quantization.gptq] avg loss: 38.82634353637695\n",
"2023-06-26 18:56:22 INFO [auto_gptq.modeling._base] Start quantizing layer 29/60\n",
"2023-06-26 18:56:30 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 29/60...\n",
"2023-06-26 18:56:31 INFO [auto_gptq.quantization.gptq] duration: 0.7914106845855713\n",
"2023-06-26 18:56:31 INFO [auto_gptq.quantization.gptq] avg loss: 125.37593078613281\n",
"2023-06-26 18:56:31 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 29/60...\n",
"2023-06-26 18:56:32 INFO [auto_gptq.quantization.gptq] duration: 0.7717330455780029\n",
"2023-06-26 18:56:32 INFO [auto_gptq.quantization.gptq] avg loss: 132.06216430664062\n",
"2023-06-26 18:56:32 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 29/60...\n",
"2023-06-26 18:56:33 INFO [auto_gptq.quantization.gptq] duration: 0.7582547664642334\n",
"2023-06-26 18:56:33 INFO [auto_gptq.quantization.gptq] avg loss: 136.78805541992188\n",
"2023-06-26 18:56:38 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 29/60...\n",
"2023-06-26 18:56:39 INFO [auto_gptq.quantization.gptq] duration: 0.79974365234375\n",
"2023-06-26 18:56:39 INFO [auto_gptq.quantization.gptq] avg loss: 7.262049198150635\n",
"2023-06-26 18:56:46 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 29/60...\n",
"2023-06-26 18:56:47 INFO [auto_gptq.quantization.gptq] duration: 0.9930026531219482\n",
"2023-06-26 18:56:47 INFO [auto_gptq.quantization.gptq] avg loss: 213.5492706298828\n",
"2023-06-26 18:56:47 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 29/60...\n",
"2023-06-26 18:56:48 INFO [auto_gptq.quantization.gptq] duration: 0.9543735980987549\n",
"2023-06-26 18:56:48 INFO [auto_gptq.quantization.gptq] avg loss: 239.37843322753906\n",
"2023-06-26 18:56:59 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 29/60...\n",
"2023-06-26 18:57:03 INFO [auto_gptq.quantization.gptq] duration: 3.1859171390533447\n",
"2023-06-26 18:57:03 INFO [auto_gptq.quantization.gptq] avg loss: 41.274070739746094\n",
"2023-06-26 18:57:08 INFO [auto_gptq.modeling._base] Start quantizing layer 30/60\n",
"2023-06-26 18:57:16 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 30/60...\n",
"2023-06-26 18:57:17 INFO [auto_gptq.quantization.gptq] duration: 0.8118512630462646\n",
"2023-06-26 18:57:17 INFO [auto_gptq.quantization.gptq] avg loss: 106.44863891601562\n",
"2023-06-26 18:57:17 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 30/60...\n",
"2023-06-26 18:57:17 INFO [auto_gptq.quantization.gptq] duration: 0.7601842880249023\n",
"2023-06-26 18:57:17 INFO [auto_gptq.quantization.gptq] avg loss: 119.94055938720703\n",
"2023-06-26 18:57:17 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 30/60...\n",
"2023-06-26 18:57:18 INFO [auto_gptq.quantization.gptq] duration: 0.8024940490722656\n",
"2023-06-26 18:57:18 INFO [auto_gptq.quantization.gptq] avg loss: 118.45585632324219\n",
"2023-06-26 18:57:24 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 30/60...\n",
"2023-06-26 18:57:25 INFO [auto_gptq.quantization.gptq] duration: 0.8384387493133545\n",
"2023-06-26 18:57:25 INFO [auto_gptq.quantization.gptq] avg loss: 8.346393585205078\n",
"2023-06-26 18:57:32 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 30/60...\n",
"2023-06-26 18:57:33 INFO [auto_gptq.quantization.gptq] duration: 1.0108578205108643\n",
"2023-06-26 18:57:33 INFO [auto_gptq.quantization.gptq] avg loss: 222.24371337890625\n",
"2023-06-26 18:57:33 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 30/60...\n",
"2023-06-26 18:57:34 INFO [auto_gptq.quantization.gptq] duration: 0.980762243270874\n",
"2023-06-26 18:57:34 INFO [auto_gptq.quantization.gptq] avg loss: 251.6155548095703\n",
"2023-06-26 18:57:45 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 30/60...\n",
"2023-06-26 18:57:48 INFO [auto_gptq.quantization.gptq] duration: 3.239154100418091\n",
"2023-06-26 18:57:48 INFO [auto_gptq.quantization.gptq] avg loss: 42.83588409423828\n",
"2023-06-26 18:57:54 INFO [auto_gptq.modeling._base] Start quantizing layer 31/60\n",
"2023-06-26 18:58:01 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 31/60...\n",
"2023-06-26 18:58:02 INFO [auto_gptq.quantization.gptq] duration: 0.8236608505249023\n",
"2023-06-26 18:58:02 INFO [auto_gptq.quantization.gptq] avg loss: 113.93006134033203\n",
"2023-06-26 18:58:02 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 31/60...\n",
"2023-06-26 18:58:03 INFO [auto_gptq.quantization.gptq] duration: 0.8111763000488281\n",
"2023-06-26 18:58:03 INFO [auto_gptq.quantization.gptq] avg loss: 124.1449203491211\n",
"2023-06-26 18:58:03 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 31/60...\n",
"2023-06-26 18:58:04 INFO [auto_gptq.quantization.gptq] duration: 0.8100242614746094\n",
"2023-06-26 18:58:04 INFO [auto_gptq.quantization.gptq] avg loss: 126.37265014648438\n",
"2023-06-26 18:58:10 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 31/60...\n",
"2023-06-26 18:58:11 INFO [auto_gptq.quantization.gptq] duration: 0.8795177936553955\n",
"2023-06-26 18:58:11 INFO [auto_gptq.quantization.gptq] avg loss: 8.915656089782715\n",
"2023-06-26 18:58:18 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 31/60...\n",
"2023-06-26 18:58:19 INFO [auto_gptq.quantization.gptq] duration: 1.0053420066833496\n",
"2023-06-26 18:58:19 INFO [auto_gptq.quantization.gptq] avg loss: 230.86685180664062\n",
"2023-06-26 18:58:19 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 31/60...\n",
"2023-06-26 18:58:20 INFO [auto_gptq.quantization.gptq] duration: 0.9989216327667236\n",
"2023-06-26 18:58:20 INFO [auto_gptq.quantization.gptq] avg loss: 264.6224670410156\n",
"2023-06-26 18:58:31 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 31/60...\n",
"2023-06-26 18:58:34 INFO [auto_gptq.quantization.gptq] duration: 3.239499568939209\n",
"2023-06-26 18:58:34 INFO [auto_gptq.quantization.gptq] avg loss: 45.18273162841797\n",
"2023-06-26 18:58:40 INFO [auto_gptq.modeling._base] Start quantizing layer 32/60\n",
"2023-06-26 18:58:48 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 32/60...\n",
"2023-06-26 18:58:49 INFO [auto_gptq.quantization.gptq] duration: 0.7941009998321533\n",
"2023-06-26 18:58:49 INFO [auto_gptq.quantization.gptq] avg loss: 102.84536743164062\n",
"2023-06-26 18:58:49 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 32/60...\n",
"2023-06-26 18:58:50 INFO [auto_gptq.quantization.gptq] duration: 0.7946913242340088\n",
"2023-06-26 18:58:50 INFO [auto_gptq.quantization.gptq] avg loss: 116.18577575683594\n",
"2023-06-26 18:58:50 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 32/60...\n",
"2023-06-26 18:58:50 INFO [auto_gptq.quantization.gptq] duration: 0.7658727169036865\n",
"2023-06-26 18:58:50 INFO [auto_gptq.quantization.gptq] avg loss: 115.6014633178711\n",
"2023-06-26 18:58:56 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 32/60...\n",
"2023-06-26 18:58:57 INFO [auto_gptq.quantization.gptq] duration: 0.7937936782836914\n",
"2023-06-26 18:58:57 INFO [auto_gptq.quantization.gptq] avg loss: 6.148465156555176\n",
"2023-06-26 18:59:04 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 32/60...\n",
"2023-06-26 18:59:05 INFO [auto_gptq.quantization.gptq] duration: 1.0435261726379395\n",
"2023-06-26 18:59:05 INFO [auto_gptq.quantization.gptq] avg loss: 240.48928833007812\n",
"2023-06-26 18:59:05 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 32/60...\n",
"2023-06-26 18:59:06 INFO [auto_gptq.quantization.gptq] duration: 1.0001847743988037\n",
"2023-06-26 18:59:06 INFO [auto_gptq.quantization.gptq] avg loss: 277.3734130859375\n",
"2023-06-26 18:59:18 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 32/60...\n",
"2023-06-26 18:59:21 INFO [auto_gptq.quantization.gptq] duration: 3.045942783355713\n",
"2023-06-26 18:59:21 INFO [auto_gptq.quantization.gptq] avg loss: 46.707881927490234\n",
"2023-06-26 18:59:26 INFO [auto_gptq.modeling._base] Start quantizing layer 33/60\n",
"2023-06-26 18:59:34 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 33/60...\n",
"2023-06-26 18:59:35 INFO [auto_gptq.quantization.gptq] duration: 0.9074945449829102\n",
"2023-06-26 18:59:35 INFO [auto_gptq.quantization.gptq] avg loss: 100.52265167236328\n",
"2023-06-26 18:59:35 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 33/60...\n",
"2023-06-26 18:59:36 INFO [auto_gptq.quantization.gptq] duration: 0.8408381938934326\n",
"2023-06-26 18:59:36 INFO [auto_gptq.quantization.gptq] avg loss: 117.53783416748047\n",
"2023-06-26 18:59:36 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 33/60...\n",
"2023-06-26 18:59:37 INFO [auto_gptq.quantization.gptq] duration: 0.8172168731689453\n",
"2023-06-26 18:59:37 INFO [auto_gptq.quantization.gptq] avg loss: 112.21620178222656\n",
"2023-06-26 18:59:42 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 33/60...\n",
"2023-06-26 18:59:43 INFO [auto_gptq.quantization.gptq] duration: 0.8127152919769287\n",
"2023-06-26 18:59:43 INFO [auto_gptq.quantization.gptq] avg loss: 9.685774803161621\n",
"2023-06-26 18:59:50 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 33/60...\n",
"2023-06-26 18:59:51 INFO [auto_gptq.quantization.gptq] duration: 1.0332043170928955\n",
"2023-06-26 18:59:51 INFO [auto_gptq.quantization.gptq] avg loss: 247.3164825439453\n",
"2023-06-26 18:59:51 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 33/60...\n",
"2023-06-26 18:59:52 INFO [auto_gptq.quantization.gptq] duration: 0.9602148532867432\n",
"2023-06-26 18:59:52 INFO [auto_gptq.quantization.gptq] avg loss: 286.4335632324219\n",
"2023-06-26 19:00:04 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 33/60...\n",
"2023-06-26 19:00:07 INFO [auto_gptq.quantization.gptq] duration: 3.046875\n",
"2023-06-26 19:00:07 INFO [auto_gptq.quantization.gptq] avg loss: 48.745758056640625\n",
"2023-06-26 19:00:12 INFO [auto_gptq.modeling._base] Start quantizing layer 34/60\n",
"2023-06-26 19:00:20 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 34/60...\n",
"2023-06-26 19:00:21 INFO [auto_gptq.quantization.gptq] duration: 0.902158260345459\n",
"2023-06-26 19:00:21 INFO [auto_gptq.quantization.gptq] avg loss: 128.93505859375\n",
"2023-06-26 19:00:21 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 34/60...\n",
"2023-06-26 19:00:22 INFO [auto_gptq.quantization.gptq] duration: 0.8162107467651367\n",
"2023-06-26 19:00:22 INFO [auto_gptq.quantization.gptq] avg loss: 135.19137573242188\n",
"2023-06-26 19:00:22 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 34/60...\n",
"2023-06-26 19:00:23 INFO [auto_gptq.quantization.gptq] duration: 0.7855491638183594\n",
"2023-06-26 19:00:23 INFO [auto_gptq.quantization.gptq] avg loss: 139.91702270507812\n",
"2023-06-26 19:00:29 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 34/60...\n",
"2023-06-26 19:00:30 INFO [auto_gptq.quantization.gptq] duration: 0.8762063980102539\n",
"2023-06-26 19:00:30 INFO [auto_gptq.quantization.gptq] avg loss: 10.718202590942383\n",
"2023-06-26 19:00:37 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 34/60...\n",
"2023-06-26 19:00:38 INFO [auto_gptq.quantization.gptq] duration: 1.1128206253051758\n",
"2023-06-26 19:00:38 INFO [auto_gptq.quantization.gptq] avg loss: 253.95745849609375\n",
"2023-06-26 19:00:38 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 34/60...\n",
"2023-06-26 19:00:39 INFO [auto_gptq.quantization.gptq] duration: 1.0621719360351562\n",
"2023-06-26 19:00:39 INFO [auto_gptq.quantization.gptq] avg loss: 296.1127624511719\n",
"2023-06-26 19:00:51 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 34/60...\n",
"2023-06-26 19:00:54 INFO [auto_gptq.quantization.gptq] duration: 3.0331196784973145\n",
"2023-06-26 19:00:54 INFO [auto_gptq.quantization.gptq] avg loss: 51.56779098510742\n",
"2023-06-26 19:01:00 INFO [auto_gptq.modeling._base] Start quantizing layer 35/60\n",
"2023-06-26 19:01:08 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 35/60...\n",
"2023-06-26 19:01:09 INFO [auto_gptq.quantization.gptq] duration: 0.8780801296234131\n",
"2023-06-26 19:01:09 INFO [auto_gptq.quantization.gptq] avg loss: 125.47807312011719\n",
"2023-06-26 19:01:09 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 35/60...\n",
"2023-06-26 19:01:10 INFO [auto_gptq.quantization.gptq] duration: 0.840322732925415\n",
"2023-06-26 19:01:10 INFO [auto_gptq.quantization.gptq] avg loss: 141.07540893554688\n",
"2023-06-26 19:01:10 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 35/60...\n",
"2023-06-26 19:01:11 INFO [auto_gptq.quantization.gptq] duration: 0.8557493686676025\n",
"2023-06-26 19:01:11 INFO [auto_gptq.quantization.gptq] avg loss: 138.64340209960938\n",
"2023-06-26 19:01:17 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 35/60...\n",
"2023-06-26 19:01:18 INFO [auto_gptq.quantization.gptq] duration: 0.8742625713348389\n",
"2023-06-26 19:01:18 INFO [auto_gptq.quantization.gptq] avg loss: 7.823941230773926\n",
"2023-06-26 19:01:25 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 35/60...\n",
"2023-06-26 19:01:26 INFO [auto_gptq.quantization.gptq] duration: 1.0989618301391602\n",
"2023-06-26 19:01:26 INFO [auto_gptq.quantization.gptq] avg loss: 263.9283447265625\n",
"2023-06-26 19:01:26 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 35/60...\n",
"2023-06-26 19:01:27 INFO [auto_gptq.quantization.gptq] duration: 0.9925136566162109\n",
"2023-06-26 19:01:27 INFO [auto_gptq.quantization.gptq] avg loss: 308.7467956542969\n",
"2023-06-26 19:01:39 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 35/60...\n",
"2023-06-26 19:01:42 INFO [auto_gptq.quantization.gptq] duration: 3.3430066108703613\n",
"2023-06-26 19:01:42 INFO [auto_gptq.quantization.gptq] avg loss: 52.91896057128906\n",
"2023-06-26 19:01:48 INFO [auto_gptq.modeling._base] Start quantizing layer 36/60\n",
"2023-06-26 19:01:56 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 36/60...\n",
"2023-06-26 19:01:57 INFO [auto_gptq.quantization.gptq] duration: 0.8155972957611084\n",
"2023-06-26 19:01:57 INFO [auto_gptq.quantization.gptq] avg loss: 116.38986206054688\n",
"2023-06-26 19:01:57 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 36/60...\n",
"2023-06-26 19:01:58 INFO [auto_gptq.quantization.gptq] duration: 0.8470666408538818\n",
"2023-06-26 19:01:58 INFO [auto_gptq.quantization.gptq] avg loss: 133.25381469726562\n",
"2023-06-26 19:01:58 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 36/60...\n",
"2023-06-26 19:01:59 INFO [auto_gptq.quantization.gptq] duration: 0.8164200782775879\n",
"2023-06-26 19:01:59 INFO [auto_gptq.quantization.gptq] avg loss: 129.2470245361328\n",
"2023-06-26 19:02:05 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 36/60...\n",
"2023-06-26 19:02:05 INFO [auto_gptq.quantization.gptq] duration: 0.8781113624572754\n",
"2023-06-26 19:02:05 INFO [auto_gptq.quantization.gptq] avg loss: 8.471790313720703\n",
"2023-06-26 19:02:13 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 36/60...\n",
"2023-06-26 19:02:14 INFO [auto_gptq.quantization.gptq] duration: 1.0064783096313477\n",
"2023-06-26 19:02:14 INFO [auto_gptq.quantization.gptq] avg loss: 269.8959045410156\n",
"2023-06-26 19:02:14 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 36/60...\n",
"2023-06-26 19:02:15 INFO [auto_gptq.quantization.gptq] duration: 0.9609713554382324\n",
"2023-06-26 19:02:15 INFO [auto_gptq.quantization.gptq] avg loss: 318.770751953125\n",
"2023-06-26 19:02:27 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 36/60...\n",
"2023-06-26 19:02:30 INFO [auto_gptq.quantization.gptq] duration: 3.3487536907196045\n",
"2023-06-26 19:02:30 INFO [auto_gptq.quantization.gptq] avg loss: 56.503746032714844\n",
"2023-06-26 19:02:36 INFO [auto_gptq.modeling._base] Start quantizing layer 37/60\n",
"2023-06-26 19:02:43 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 37/60...\n",
"2023-06-26 19:02:44 INFO [auto_gptq.quantization.gptq] duration: 0.8249001502990723\n",
"2023-06-26 19:02:44 INFO [auto_gptq.quantization.gptq] avg loss: 129.81163024902344\n",
"2023-06-26 19:02:44 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 37/60...\n",
"2023-06-26 19:02:45 INFO [auto_gptq.quantization.gptq] duration: 0.7988829612731934\n",
"2023-06-26 19:02:45 INFO [auto_gptq.quantization.gptq] avg loss: 151.71546936035156\n",
"2023-06-26 19:02:45 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 37/60...\n",
"2023-06-26 19:02:46 INFO [auto_gptq.quantization.gptq] duration: 0.7914750576019287\n",
"2023-06-26 19:02:46 INFO [auto_gptq.quantization.gptq] avg loss: 143.4711456298828\n",
"2023-06-26 19:02:52 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 37/60...\n",
"2023-06-26 19:02:53 INFO [auto_gptq.quantization.gptq] duration: 0.7918145656585693\n",
"2023-06-26 19:02:53 INFO [auto_gptq.quantization.gptq] avg loss: 9.912076950073242\n",
"2023-06-26 19:02:59 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 37/60...\n",
"2023-06-26 19:03:00 INFO [auto_gptq.quantization.gptq] duration: 0.9902970790863037\n",
"2023-06-26 19:03:00 INFO [auto_gptq.quantization.gptq] avg loss: 277.80120849609375\n",
"2023-06-26 19:03:00 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 37/60...\n",
"2023-06-26 19:03:01 INFO [auto_gptq.quantization.gptq] duration: 0.954108715057373\n",
"2023-06-26 19:03:01 INFO [auto_gptq.quantization.gptq] avg loss: 329.1649169921875\n",
"2023-06-26 19:03:13 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 37/60...\n",
"2023-06-26 19:03:17 INFO [auto_gptq.quantization.gptq] duration: 3.3314731121063232\n",
"2023-06-26 19:03:17 INFO [auto_gptq.quantization.gptq] avg loss: 57.83793258666992\n",
"2023-06-26 19:03:22 INFO [auto_gptq.modeling._base] Start quantizing layer 38/60\n",
"2023-06-26 19:03:31 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 38/60...\n",
"2023-06-26 19:03:32 INFO [auto_gptq.quantization.gptq] duration: 0.8326842784881592\n",
"2023-06-26 19:03:32 INFO [auto_gptq.quantization.gptq] avg loss: 116.96524047851562\n",
"2023-06-26 19:03:32 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 38/60...\n",
"2023-06-26 19:03:32 INFO [auto_gptq.quantization.gptq] duration: 0.804539680480957\n",
"2023-06-26 19:03:32 INFO [auto_gptq.quantization.gptq] avg loss: 143.6186981201172\n",
"2023-06-26 19:03:32 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 38/60...\n",
"2023-06-26 19:03:33 INFO [auto_gptq.quantization.gptq] duration: 0.8216087818145752\n",
"2023-06-26 19:03:33 INFO [auto_gptq.quantization.gptq] avg loss: 131.62216186523438\n",
"2023-06-26 19:03:40 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 38/60...\n",
"2023-06-26 19:03:40 INFO [auto_gptq.quantization.gptq] duration: 0.8850359916687012\n",
"2023-06-26 19:03:40 INFO [auto_gptq.quantization.gptq] avg loss: 8.164924621582031\n",
"2023-06-26 19:03:47 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 38/60...\n",
"2023-06-26 19:03:49 INFO [auto_gptq.quantization.gptq] duration: 1.0445630550384521\n",
"2023-06-26 19:03:49 INFO [auto_gptq.quantization.gptq] avg loss: 286.7233581542969\n",
"2023-06-26 19:03:49 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 38/60...\n",
"2023-06-26 19:03:50 INFO [auto_gptq.quantization.gptq] duration: 1.0699622631072998\n",
"2023-06-26 19:03:50 INFO [auto_gptq.quantization.gptq] avg loss: 338.72979736328125\n",
"2023-06-26 19:04:02 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 38/60...\n",
"2023-06-26 19:04:05 INFO [auto_gptq.quantization.gptq] duration: 3.233502149581909\n",
"2023-06-26 19:04:05 INFO [auto_gptq.quantization.gptq] avg loss: 59.07769012451172\n",
"2023-06-26 19:04:10 INFO [auto_gptq.modeling._base] Start quantizing layer 39/60\n",
"2023-06-26 19:04:19 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 39/60...\n",
"2023-06-26 19:04:20 INFO [auto_gptq.quantization.gptq] duration: 0.8899762630462646\n",
"2023-06-26 19:04:20 INFO [auto_gptq.quantization.gptq] avg loss: 110.57682800292969\n",
"2023-06-26 19:04:20 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 39/60...\n",
"2023-06-26 19:04:21 INFO [auto_gptq.quantization.gptq] duration: 0.8628089427947998\n",
"2023-06-26 19:04:21 INFO [auto_gptq.quantization.gptq] avg loss: 150.46737670898438\n",
"2023-06-26 19:04:21 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 39/60...\n",
"2023-06-26 19:04:22 INFO [auto_gptq.quantization.gptq] duration: 0.8501040935516357\n",
"2023-06-26 19:04:22 INFO [auto_gptq.quantization.gptq] avg loss: 125.95295715332031\n",
"2023-06-26 19:04:28 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 39/60...\n",
"2023-06-26 19:04:28 INFO [auto_gptq.quantization.gptq] duration: 0.869788646697998\n",
"2023-06-26 19:04:28 INFO [auto_gptq.quantization.gptq] avg loss: 7.9990458488464355\n",
"2023-06-26 19:04:36 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 39/60...\n",
"2023-06-26 19:04:37 INFO [auto_gptq.quantization.gptq] duration: 1.0505897998809814\n",
"2023-06-26 19:04:37 INFO [auto_gptq.quantization.gptq] avg loss: 296.5372314453125\n",
"2023-06-26 19:04:37 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 39/60...\n",
"2023-06-26 19:04:38 INFO [auto_gptq.quantization.gptq] duration: 0.9888980388641357\n",
"2023-06-26 19:04:38 INFO [auto_gptq.quantization.gptq] avg loss: 350.091552734375\n",
"2023-06-26 19:04:50 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 39/60...\n",
"2023-06-26 19:04:53 INFO [auto_gptq.quantization.gptq] duration: 3.2650582790374756\n",
"2023-06-26 19:04:53 INFO [auto_gptq.quantization.gptq] avg loss: 60.93070983886719\n",
"2023-06-26 19:04:59 INFO [auto_gptq.modeling._base] Start quantizing layer 40/60\n",
"2023-06-26 19:05:07 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 40/60...\n",
"2023-06-26 19:05:08 INFO [auto_gptq.quantization.gptq] duration: 0.880157470703125\n",
"2023-06-26 19:05:08 INFO [auto_gptq.quantization.gptq] avg loss: 109.099853515625\n",
"2023-06-26 19:05:08 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 40/60...\n",
"2023-06-26 19:05:09 INFO [auto_gptq.quantization.gptq] duration: 0.8684589862823486\n",
"2023-06-26 19:05:09 INFO [auto_gptq.quantization.gptq] avg loss: 143.61087036132812\n",
"2023-06-26 19:05:09 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 40/60...\n",
"2023-06-26 19:05:09 INFO [auto_gptq.quantization.gptq] duration: 0.8829076290130615\n",
"2023-06-26 19:05:09 INFO [auto_gptq.quantization.gptq] avg loss: 124.00684356689453\n",
"2023-06-26 19:05:16 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 40/60...\n",
"2023-06-26 19:05:17 INFO [auto_gptq.quantization.gptq] duration: 0.8577485084533691\n",
"2023-06-26 19:05:17 INFO [auto_gptq.quantization.gptq] avg loss: 5.940661907196045\n",
"2023-06-26 19:05:24 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 40/60...\n",
"2023-06-26 19:05:25 INFO [auto_gptq.quantization.gptq] duration: 1.138828992843628\n",
"2023-06-26 19:05:25 INFO [auto_gptq.quantization.gptq] avg loss: 306.11199951171875\n",
"2023-06-26 19:05:25 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 40/60...\n",
"2023-06-26 19:05:26 INFO [auto_gptq.quantization.gptq] duration: 1.0934555530548096\n",
"2023-06-26 19:05:26 INFO [auto_gptq.quantization.gptq] avg loss: 360.33551025390625\n",
"2023-06-26 19:05:39 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 40/60...\n",
"2023-06-26 19:05:42 INFO [auto_gptq.quantization.gptq] duration: 3.345963954925537\n",
"2023-06-26 19:05:42 INFO [auto_gptq.quantization.gptq] avg loss: 62.54813766479492\n",
"2023-06-26 19:05:48 INFO [auto_gptq.modeling._base] Start quantizing layer 41/60\n",
"2023-06-26 19:05:56 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 41/60...\n",
"2023-06-26 19:05:57 INFO [auto_gptq.quantization.gptq] duration: 0.86240553855896\n",
"2023-06-26 19:05:57 INFO [auto_gptq.quantization.gptq] avg loss: 115.49275970458984\n",
"2023-06-26 19:05:57 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 41/60...\n",
"2023-06-26 19:05:57 INFO [auto_gptq.quantization.gptq] duration: 0.828516960144043\n",
"2023-06-26 19:05:57 INFO [auto_gptq.quantization.gptq] avg loss: 156.80584716796875\n",
"2023-06-26 19:05:57 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 41/60...\n",
"2023-06-26 19:05:58 INFO [auto_gptq.quantization.gptq] duration: 0.8238203525543213\n",
"2023-06-26 19:05:58 INFO [auto_gptq.quantization.gptq] avg loss: 133.60838317871094\n",
"2023-06-26 19:06:04 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 41/60...\n",
"2023-06-26 19:06:05 INFO [auto_gptq.quantization.gptq] duration: 0.7886037826538086\n",
"2023-06-26 19:06:05 INFO [auto_gptq.quantization.gptq] avg loss: 8.38182258605957\n",
"2023-06-26 19:06:12 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 41/60...\n",
"2023-06-26 19:06:13 INFO [auto_gptq.quantization.gptq] duration: 1.0482730865478516\n",
"2023-06-26 19:06:13 INFO [auto_gptq.quantization.gptq] avg loss: 315.52325439453125\n",
"2023-06-26 19:06:13 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 41/60...\n",
"2023-06-26 19:06:14 INFO [auto_gptq.quantization.gptq] duration: 1.0288128852844238\n",
"2023-06-26 19:06:14 INFO [auto_gptq.quantization.gptq] avg loss: 368.9722900390625\n",
"2023-06-26 19:06:26 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 41/60...\n",
"2023-06-26 19:06:30 INFO [auto_gptq.quantization.gptq] duration: 3.3715531826019287\n",
"2023-06-26 19:06:30 INFO [auto_gptq.quantization.gptq] avg loss: 63.09906005859375\n",
"2023-06-26 19:06:35 INFO [auto_gptq.modeling._base] Start quantizing layer 42/60\n",
"2023-06-26 19:06:44 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 42/60...\n",
"2023-06-26 19:06:45 INFO [auto_gptq.quantization.gptq] duration: 0.8984777927398682\n",
"2023-06-26 19:06:45 INFO [auto_gptq.quantization.gptq] avg loss: 101.10447692871094\n",
"2023-06-26 19:06:45 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 42/60...\n",
"2023-06-26 19:06:45 INFO [auto_gptq.quantization.gptq] duration: 0.8638644218444824\n",
"2023-06-26 19:06:45 INFO [auto_gptq.quantization.gptq] avg loss: 141.1641082763672\n",
"2023-06-26 19:06:45 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 42/60...\n",
"2023-06-26 19:06:46 INFO [auto_gptq.quantization.gptq] duration: 0.8690340518951416\n",
"2023-06-26 19:06:46 INFO [auto_gptq.quantization.gptq] avg loss: 116.75743103027344\n",
"2023-06-26 19:06:53 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 42/60...\n",
"2023-06-26 19:06:54 INFO [auto_gptq.quantization.gptq] duration: 0.8782382011413574\n",
"2023-06-26 19:06:54 INFO [auto_gptq.quantization.gptq] avg loss: 6.863119602203369\n",
"2023-06-26 19:07:01 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 42/60...\n",
"2023-06-26 19:07:02 INFO [auto_gptq.quantization.gptq] duration: 1.0940203666687012\n",
"2023-06-26 19:07:02 INFO [auto_gptq.quantization.gptq] avg loss: 323.49627685546875\n",
"2023-06-26 19:07:02 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 42/60...\n",
"2023-06-26 19:07:03 INFO [auto_gptq.quantization.gptq] duration: 1.0579540729522705\n",
"2023-06-26 19:07:03 INFO [auto_gptq.quantization.gptq] avg loss: 377.34283447265625\n",
"2023-06-26 19:07:15 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 42/60...\n",
"2023-06-26 19:07:18 INFO [auto_gptq.quantization.gptq] duration: 3.340921401977539\n",
"2023-06-26 19:07:18 INFO [auto_gptq.quantization.gptq] avg loss: 64.95208740234375\n",
"2023-06-26 19:07:24 INFO [auto_gptq.modeling._base] Start quantizing layer 43/60\n",
"2023-06-26 19:07:32 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 43/60...\n",
"2023-06-26 19:07:33 INFO [auto_gptq.quantization.gptq] duration: 0.8694005012512207\n",
"2023-06-26 19:07:33 INFO [auto_gptq.quantization.gptq] avg loss: 90.90045928955078\n",
"2023-06-26 19:07:33 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 43/60...\n",
"2023-06-26 19:07:34 INFO [auto_gptq.quantization.gptq] duration: 0.8390889167785645\n",
"2023-06-26 19:07:34 INFO [auto_gptq.quantization.gptq] avg loss: 128.86856079101562\n",
"2023-06-26 19:07:34 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 43/60...\n",
"2023-06-26 19:07:35 INFO [auto_gptq.quantization.gptq] duration: 0.8536701202392578\n",
"2023-06-26 19:07:35 INFO [auto_gptq.quantization.gptq] avg loss: 102.96995544433594\n",
"2023-06-26 19:07:41 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 43/60...\n",
"2023-06-26 19:07:42 INFO [auto_gptq.quantization.gptq] duration: 0.8843672275543213\n",
"2023-06-26 19:07:42 INFO [auto_gptq.quantization.gptq] avg loss: 7.065851211547852\n",
"2023-06-26 19:07:49 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 43/60...\n",
"2023-06-26 19:07:50 INFO [auto_gptq.quantization.gptq] duration: 1.1005828380584717\n",
"2023-06-26 19:07:50 INFO [auto_gptq.quantization.gptq] avg loss: 334.3738098144531\n",
"2023-06-26 19:07:50 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 43/60...\n",
"2023-06-26 19:07:51 INFO [auto_gptq.quantization.gptq] duration: 1.062932014465332\n",
"2023-06-26 19:07:51 INFO [auto_gptq.quantization.gptq] avg loss: 388.8782958984375\n",
"2023-06-26 19:08:04 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 43/60...\n",
"2023-06-26 19:08:07 INFO [auto_gptq.quantization.gptq] duration: 3.34733247756958\n",
"2023-06-26 19:08:07 INFO [auto_gptq.quantization.gptq] avg loss: 66.80487060546875\n",
"2023-06-26 19:08:13 INFO [auto_gptq.modeling._base] Start quantizing layer 44/60\n",
"2023-06-26 19:08:21 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 44/60...\n",
"2023-06-26 19:08:22 INFO [auto_gptq.quantization.gptq] duration: 0.874225378036499\n",
"2023-06-26 19:08:22 INFO [auto_gptq.quantization.gptq] avg loss: 121.41039276123047\n",
"2023-06-26 19:08:22 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 44/60...\n",
"2023-06-26 19:08:23 INFO [auto_gptq.quantization.gptq] duration: 0.8524196147918701\n",
"2023-06-26 19:08:23 INFO [auto_gptq.quantization.gptq] avg loss: 160.3300018310547\n",
"2023-06-26 19:08:23 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 44/60...\n",
"2023-06-26 19:08:24 INFO [auto_gptq.quantization.gptq] duration: 0.8621621131896973\n",
"2023-06-26 19:08:24 INFO [auto_gptq.quantization.gptq] avg loss: 138.993408203125\n",
"2023-06-26 19:08:30 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 44/60...\n",
"2023-06-26 19:08:31 INFO [auto_gptq.quantization.gptq] duration: 0.8975052833557129\n",
"2023-06-26 19:08:31 INFO [auto_gptq.quantization.gptq] avg loss: 10.9855318069458\n",
"2023-06-26 19:08:38 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 44/60...\n",
"2023-06-26 19:08:40 INFO [auto_gptq.quantization.gptq] duration: 1.1072988510131836\n",
"2023-06-26 19:08:40 INFO [auto_gptq.quantization.gptq] avg loss: 345.7747802734375\n",
"2023-06-26 19:08:40 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 44/60...\n",
"2023-06-26 19:08:41 INFO [auto_gptq.quantization.gptq] duration: 1.0454034805297852\n",
"2023-06-26 19:08:41 INFO [auto_gptq.quantization.gptq] avg loss: 401.86346435546875\n",
"2023-06-26 19:08:53 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 44/60...\n",
"2023-06-26 19:08:56 INFO [auto_gptq.quantization.gptq] duration: 3.3339309692382812\n",
"2023-06-26 19:08:56 INFO [auto_gptq.quantization.gptq] avg loss: 69.18773651123047\n",
"2023-06-26 19:09:02 INFO [auto_gptq.modeling._base] Start quantizing layer 45/60\n",
"2023-06-26 19:09:11 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 45/60...\n",
"2023-06-26 19:09:12 INFO [auto_gptq.quantization.gptq] duration: 0.882659912109375\n",
"2023-06-26 19:09:12 INFO [auto_gptq.quantization.gptq] avg loss: 104.83455657958984\n",
"2023-06-26 19:09:12 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 45/60...\n",
"2023-06-26 19:09:12 INFO [auto_gptq.quantization.gptq] duration: 0.8536560535430908\n",
"2023-06-26 19:09:12 INFO [auto_gptq.quantization.gptq] avg loss: 143.9286651611328\n",
"2023-06-26 19:09:12 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 45/60...\n",
"2023-06-26 19:09:13 INFO [auto_gptq.quantization.gptq] duration: 0.8694431781768799\n",
"2023-06-26 19:09:13 INFO [auto_gptq.quantization.gptq] avg loss: 120.81422424316406\n",
"2023-06-26 19:09:20 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 45/60...\n",
"2023-06-26 19:09:21 INFO [auto_gptq.quantization.gptq] duration: 0.8992917537689209\n",
"2023-06-26 19:09:21 INFO [auto_gptq.quantization.gptq] avg loss: 9.62309455871582\n",
"2023-06-26 19:09:28 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 45/60...\n",
"2023-06-26 19:09:29 INFO [auto_gptq.quantization.gptq] duration: 1.1023387908935547\n",
"2023-06-26 19:09:29 INFO [auto_gptq.quantization.gptq] avg loss: 354.5903015136719\n",
"2023-06-26 19:09:29 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 45/60...\n",
"2023-06-26 19:09:30 INFO [auto_gptq.quantization.gptq] duration: 1.051722764968872\n",
"2023-06-26 19:09:30 INFO [auto_gptq.quantization.gptq] avg loss: 409.78070068359375\n",
"2023-06-26 19:09:42 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 45/60...\n",
"2023-06-26 19:09:45 INFO [auto_gptq.quantization.gptq] duration: 3.3999433517456055\n",
"2023-06-26 19:09:45 INFO [auto_gptq.quantization.gptq] avg loss: 71.1861343383789\n",
"2023-06-26 19:09:51 INFO [auto_gptq.modeling._base] Start quantizing layer 46/60\n",
"2023-06-26 19:09:59 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 46/60...\n",
"2023-06-26 19:09:59 INFO [auto_gptq.quantization.gptq] duration: 0.7852199077606201\n",
"2023-06-26 19:09:59 INFO [auto_gptq.quantization.gptq] avg loss: 109.76959228515625\n",
"2023-06-26 19:10:00 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 46/60...\n",
"2023-06-26 19:10:00 INFO [auto_gptq.quantization.gptq] duration: 0.7684922218322754\n",
"2023-06-26 19:10:00 INFO [auto_gptq.quantization.gptq] avg loss: 150.2676239013672\n",
"2023-06-26 19:10:00 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 46/60...\n",
"2023-06-26 19:10:01 INFO [auto_gptq.quantization.gptq] duration: 0.7671937942504883\n",
"2023-06-26 19:10:01 INFO [auto_gptq.quantization.gptq] avg loss: 124.19405364990234\n",
"2023-06-26 19:10:07 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 46/60...\n",
"2023-06-26 19:10:08 INFO [auto_gptq.quantization.gptq] duration: 0.8404891490936279\n",
"2023-06-26 19:10:08 INFO [auto_gptq.quantization.gptq] avg loss: 9.083423614501953\n",
"2023-06-26 19:10:15 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 46/60...\n",
"2023-06-26 19:10:16 INFO [auto_gptq.quantization.gptq] duration: 1.0024285316467285\n",
"2023-06-26 19:10:16 INFO [auto_gptq.quantization.gptq] avg loss: 361.96429443359375\n",
"2023-06-26 19:10:16 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 46/60...\n",
"2023-06-26 19:10:17 INFO [auto_gptq.quantization.gptq] duration: 0.9612009525299072\n",
"2023-06-26 19:10:17 INFO [auto_gptq.quantization.gptq] avg loss: 416.73681640625\n",
"2023-06-26 19:10:29 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 46/60...\n",
"2023-06-26 19:10:32 INFO [auto_gptq.quantization.gptq] duration: 3.087716579437256\n",
"2023-06-26 19:10:32 INFO [auto_gptq.quantization.gptq] avg loss: 71.58656311035156\n",
"2023-06-26 19:10:37 INFO [auto_gptq.modeling._base] Start quantizing layer 47/60\n",
"2023-06-26 19:10:46 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 47/60...\n",
"2023-06-26 19:10:46 INFO [auto_gptq.quantization.gptq] duration: 0.8905768394470215\n",
"2023-06-26 19:10:46 INFO [auto_gptq.quantization.gptq] avg loss: 88.91883850097656\n",
"2023-06-26 19:10:46 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 47/60...\n",
"2023-06-26 19:10:47 INFO [auto_gptq.quantization.gptq] duration: 0.8527712821960449\n",
"2023-06-26 19:10:47 INFO [auto_gptq.quantization.gptq] avg loss: 125.84911346435547\n",
"2023-06-26 19:10:47 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 47/60...\n",
"2023-06-26 19:10:48 INFO [auto_gptq.quantization.gptq] duration: 0.8551654815673828\n",
"2023-06-26 19:10:48 INFO [auto_gptq.quantization.gptq] avg loss: 108.26112365722656\n",
"2023-06-26 19:10:55 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 47/60...\n",
"2023-06-26 19:10:55 INFO [auto_gptq.quantization.gptq] duration: 0.8127129077911377\n",
"2023-06-26 19:10:55 INFO [auto_gptq.quantization.gptq] avg loss: 6.7243194580078125\n",
"2023-06-26 19:11:02 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 47/60...\n",
"2023-06-26 19:11:03 INFO [auto_gptq.quantization.gptq] duration: 1.1134724617004395\n",
"2023-06-26 19:11:03 INFO [auto_gptq.quantization.gptq] avg loss: 369.46875\n",
"2023-06-26 19:11:04 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 47/60...\n",
"2023-06-26 19:11:05 INFO [auto_gptq.quantization.gptq] duration: 1.0565602779388428\n",
"2023-06-26 19:11:05 INFO [auto_gptq.quantization.gptq] avg loss: 424.93450927734375\n",
"2023-06-26 19:11:16 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 47/60...\n",
"2023-06-26 19:11:20 INFO [auto_gptq.quantization.gptq] duration: 3.198499917984009\n",
"2023-06-26 19:11:20 INFO [auto_gptq.quantization.gptq] avg loss: 72.35057067871094\n",
"2023-06-26 19:11:25 INFO [auto_gptq.modeling._base] Start quantizing layer 48/60\n",
"2023-06-26 19:11:33 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 48/60...\n",
"2023-06-26 19:11:34 INFO [auto_gptq.quantization.gptq] duration: 0.7907023429870605\n",
"2023-06-26 19:11:34 INFO [auto_gptq.quantization.gptq] avg loss: 96.09598541259766\n",
"2023-06-26 19:11:34 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 48/60...\n",
"2023-06-26 19:11:35 INFO [auto_gptq.quantization.gptq] duration: 0.7911534309387207\n",
"2023-06-26 19:11:35 INFO [auto_gptq.quantization.gptq] avg loss: 141.85467529296875\n",
"2023-06-26 19:11:35 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 48/60...\n",
"2023-06-26 19:11:35 INFO [auto_gptq.quantization.gptq] duration: 0.7958567142486572\n",
"2023-06-26 19:11:35 INFO [auto_gptq.quantization.gptq] avg loss: 111.97084045410156\n",
"2023-06-26 19:11:41 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 48/60...\n",
"2023-06-26 19:11:42 INFO [auto_gptq.quantization.gptq] duration: 0.8895368576049805\n",
"2023-06-26 19:11:42 INFO [auto_gptq.quantization.gptq] avg loss: 9.014059066772461\n",
"2023-06-26 19:11:50 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 48/60...\n",
"2023-06-26 19:11:51 INFO [auto_gptq.quantization.gptq] duration: 1.1031289100646973\n",
"2023-06-26 19:11:51 INFO [auto_gptq.quantization.gptq] avg loss: 379.0202331542969\n",
"2023-06-26 19:11:51 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 48/60...\n",
"2023-06-26 19:11:52 INFO [auto_gptq.quantization.gptq] duration: 1.0228877067565918\n",
"2023-06-26 19:11:52 INFO [auto_gptq.quantization.gptq] avg loss: 434.7698974609375\n",
"2023-06-26 19:12:04 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 48/60...\n",
"2023-06-26 19:12:07 INFO [auto_gptq.quantization.gptq] duration: 3.253666639328003\n",
"2023-06-26 19:12:07 INFO [auto_gptq.quantization.gptq] avg loss: 74.15811157226562\n",
"2023-06-26 19:12:13 INFO [auto_gptq.modeling._base] Start quantizing layer 49/60\n",
"2023-06-26 19:12:21 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 49/60...\n",
"2023-06-26 19:12:22 INFO [auto_gptq.quantization.gptq] duration: 0.8571200370788574\n",
"2023-06-26 19:12:22 INFO [auto_gptq.quantization.gptq] avg loss: 101.3019790649414\n",
"2023-06-26 19:12:22 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 49/60...\n",
"2023-06-26 19:12:23 INFO [auto_gptq.quantization.gptq] duration: 0.8744349479675293\n",
"2023-06-26 19:12:23 INFO [auto_gptq.quantization.gptq] avg loss: 139.41696166992188\n",
"2023-06-26 19:12:23 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 49/60...\n",
"2023-06-26 19:12:23 INFO [auto_gptq.quantization.gptq] duration: 0.8737223148345947\n",
"2023-06-26 19:12:23 INFO [auto_gptq.quantization.gptq] avg loss: 114.99104309082031\n",
"2023-06-26 19:12:30 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 49/60...\n",
"2023-06-26 19:12:31 INFO [auto_gptq.quantization.gptq] duration: 0.8957891464233398\n",
"2023-06-26 19:12:31 INFO [auto_gptq.quantization.gptq] avg loss: 6.871525287628174\n",
"2023-06-26 19:12:38 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 49/60...\n",
"2023-06-26 19:12:39 INFO [auto_gptq.quantization.gptq] duration: 1.102329969406128\n",
"2023-06-26 19:12:39 INFO [auto_gptq.quantization.gptq] avg loss: 389.14715576171875\n",
"2023-06-26 19:12:39 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 49/60...\n",
"2023-06-26 19:12:40 INFO [auto_gptq.quantization.gptq] duration: 1.0604302883148193\n",
"2023-06-26 19:12:40 INFO [auto_gptq.quantization.gptq] avg loss: 442.780029296875\n",
"2023-06-26 19:12:53 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 49/60...\n",
"2023-06-26 19:12:56 INFO [auto_gptq.quantization.gptq] duration: 3.3560800552368164\n",
"2023-06-26 19:12:56 INFO [auto_gptq.quantization.gptq] avg loss: 74.9392318725586\n",
"2023-06-26 19:13:02 INFO [auto_gptq.modeling._base] Start quantizing layer 50/60\n",
"2023-06-26 19:13:10 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 50/60...\n",
"2023-06-26 19:13:11 INFO [auto_gptq.quantization.gptq] duration: 0.872051477432251\n",
"2023-06-26 19:13:11 INFO [auto_gptq.quantization.gptq] avg loss: 127.75462341308594\n",
"2023-06-26 19:13:11 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 50/60...\n",
"2023-06-26 19:13:12 INFO [auto_gptq.quantization.gptq] duration: 0.8440308570861816\n",
"2023-06-26 19:13:12 INFO [auto_gptq.quantization.gptq] avg loss: 185.817626953125\n",
"2023-06-26 19:13:12 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 50/60...\n",
"2023-06-26 19:13:13 INFO [auto_gptq.quantization.gptq] duration: 0.8486764430999756\n",
"2023-06-26 19:13:13 INFO [auto_gptq.quantization.gptq] avg loss: 143.77371215820312\n",
"2023-06-26 19:13:19 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 50/60...\n",
"2023-06-26 19:13:20 INFO [auto_gptq.quantization.gptq] duration: 0.8144946098327637\n",
"2023-06-26 19:13:20 INFO [auto_gptq.quantization.gptq] avg loss: 16.18939781188965\n",
"2023-06-26 19:13:27 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 50/60...\n",
"2023-06-26 19:13:28 INFO [auto_gptq.quantization.gptq] duration: 1.1117775440216064\n",
"2023-06-26 19:13:28 INFO [auto_gptq.quantization.gptq] avg loss: 395.81494140625\n",
"2023-06-26 19:13:28 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 50/60...\n",
"2023-06-26 19:13:29 INFO [auto_gptq.quantization.gptq] duration: 1.0665395259857178\n",
"2023-06-26 19:13:29 INFO [auto_gptq.quantization.gptq] avg loss: 446.4227294921875\n",
"2023-06-26 19:13:42 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 50/60...\n",
"2023-06-26 19:13:45 INFO [auto_gptq.quantization.gptq] duration: 3.5033068656921387\n",
"2023-06-26 19:13:45 INFO [auto_gptq.quantization.gptq] avg loss: 76.99166870117188\n",
"2023-06-26 19:13:51 INFO [auto_gptq.modeling._base] Start quantizing layer 51/60\n",
"2023-06-26 19:13:59 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 51/60...\n",
"2023-06-26 19:14:00 INFO [auto_gptq.quantization.gptq] duration: 0.7894580364227295\n",
"2023-06-26 19:14:00 INFO [auto_gptq.quantization.gptq] avg loss: 120.90390014648438\n",
"2023-06-26 19:14:00 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 51/60...\n",
"2023-06-26 19:14:01 INFO [auto_gptq.quantization.gptq] duration: 0.7620759010314941\n",
"2023-06-26 19:14:01 INFO [auto_gptq.quantization.gptq] avg loss: 169.8406219482422\n",
"2023-06-26 19:14:01 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 51/60...\n",
"2023-06-26 19:14:01 INFO [auto_gptq.quantization.gptq] duration: 0.7594552040100098\n",
"2023-06-26 19:14:01 INFO [auto_gptq.quantization.gptq] avg loss: 138.35787963867188\n",
"2023-06-26 19:14:07 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 51/60...\n",
"2023-06-26 19:14:08 INFO [auto_gptq.quantization.gptq] duration: 0.8016848564147949\n",
"2023-06-26 19:14:08 INFO [auto_gptq.quantization.gptq] avg loss: 9.87087631225586\n",
"2023-06-26 19:14:15 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 51/60...\n",
"2023-06-26 19:14:16 INFO [auto_gptq.quantization.gptq] duration: 1.0900943279266357\n",
"2023-06-26 19:14:16 INFO [auto_gptq.quantization.gptq] avg loss: 403.95709228515625\n",
"2023-06-26 19:14:16 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 51/60...\n",
"2023-06-26 19:14:17 INFO [auto_gptq.quantization.gptq] duration: 1.0303058624267578\n",
"2023-06-26 19:14:17 INFO [auto_gptq.quantization.gptq] avg loss: 450.1544494628906\n",
"2023-06-26 19:14:29 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 51/60...\n",
"2023-06-26 19:14:33 INFO [auto_gptq.quantization.gptq] duration: 3.309267520904541\n",
"2023-06-26 19:14:33 INFO [auto_gptq.quantization.gptq] avg loss: 78.50980377197266\n",
"2023-06-26 19:14:38 INFO [auto_gptq.modeling._base] Start quantizing layer 52/60\n",
"2023-06-26 19:14:47 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 52/60...\n",
"2023-06-26 19:14:48 INFO [auto_gptq.quantization.gptq] duration: 0.8992969989776611\n",
"2023-06-26 19:14:48 INFO [auto_gptq.quantization.gptq] avg loss: 119.07684326171875\n",
"2023-06-26 19:14:48 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 52/60...\n",
"2023-06-26 19:14:49 INFO [auto_gptq.quantization.gptq] duration: 0.8915762901306152\n",
"2023-06-26 19:14:49 INFO [auto_gptq.quantization.gptq] avg loss: 164.55499267578125\n",
"2023-06-26 19:14:49 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 52/60...\n",
"2023-06-26 19:14:50 INFO [auto_gptq.quantization.gptq] duration: 0.8657851219177246\n",
"2023-06-26 19:14:50 INFO [auto_gptq.quantization.gptq] avg loss: 136.43682861328125\n",
"2023-06-26 19:14:56 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 52/60...\n",
"2023-06-26 19:14:57 INFO [auto_gptq.quantization.gptq] duration: 0.9029607772827148\n",
"2023-06-26 19:14:57 INFO [auto_gptq.quantization.gptq] avg loss: 10.887556076049805\n",
"2023-06-26 19:15:04 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 52/60...\n",
"2023-06-26 19:15:05 INFO [auto_gptq.quantization.gptq] duration: 1.0318324565887451\n",
"2023-06-26 19:15:05 INFO [auto_gptq.quantization.gptq] avg loss: 410.3730163574219\n",
"2023-06-26 19:15:05 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 52/60...\n",
"2023-06-26 19:15:06 INFO [auto_gptq.quantization.gptq] duration: 1.1183817386627197\n",
"2023-06-26 19:15:06 INFO [auto_gptq.quantization.gptq] avg loss: 452.9522705078125\n",
"2023-06-26 19:15:18 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 52/60...\n",
"2023-06-26 19:15:21 INFO [auto_gptq.quantization.gptq] duration: 3.1358907222747803\n",
"2023-06-26 19:15:21 INFO [auto_gptq.quantization.gptq] avg loss: 80.51775360107422\n",
"2023-06-26 19:15:26 INFO [auto_gptq.modeling._base] Start quantizing layer 53/60\n",
"2023-06-26 19:15:34 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 53/60...\n",
"2023-06-26 19:15:35 INFO [auto_gptq.quantization.gptq] duration: 0.8960332870483398\n",
"2023-06-26 19:15:35 INFO [auto_gptq.quantization.gptq] avg loss: 97.01756286621094\n",
"2023-06-26 19:15:35 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 53/60...\n",
"2023-06-26 19:15:36 INFO [auto_gptq.quantization.gptq] duration: 0.850938081741333\n",
"2023-06-26 19:15:36 INFO [auto_gptq.quantization.gptq] avg loss: 134.7041015625\n",
"2023-06-26 19:15:36 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 53/60...\n",
"2023-06-26 19:15:37 INFO [auto_gptq.quantization.gptq] duration: 0.8614938259124756\n",
"2023-06-26 19:15:37 INFO [auto_gptq.quantization.gptq] avg loss: 110.16641235351562\n",
"2023-06-26 19:15:43 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 53/60...\n",
"2023-06-26 19:15:44 INFO [auto_gptq.quantization.gptq] duration: 0.8772797584533691\n",
"2023-06-26 19:15:44 INFO [auto_gptq.quantization.gptq] avg loss: 10.21967887878418\n",
"2023-06-26 19:15:52 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 53/60...\n",
"2023-06-26 19:15:53 INFO [auto_gptq.quantization.gptq] duration: 1.1169819831848145\n",
"2023-06-26 19:15:53 INFO [auto_gptq.quantization.gptq] avg loss: 413.29205322265625\n",
"2023-06-26 19:15:53 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 53/60...\n",
"2023-06-26 19:15:54 INFO [auto_gptq.quantization.gptq] duration: 1.063856840133667\n",
"2023-06-26 19:15:54 INFO [auto_gptq.quantization.gptq] avg loss: 451.09552001953125\n",
"2023-06-26 19:16:06 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 53/60...\n",
"2023-06-26 19:16:10 INFO [auto_gptq.quantization.gptq] duration: 3.291640043258667\n",
"2023-06-26 19:16:10 INFO [auto_gptq.quantization.gptq] avg loss: 82.87527465820312\n",
"2023-06-26 19:16:15 INFO [auto_gptq.modeling._base] Start quantizing layer 54/60\n",
"2023-06-26 19:16:24 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 54/60...\n",
"2023-06-26 19:16:24 INFO [auto_gptq.quantization.gptq] duration: 0.8664493560791016\n",
"2023-06-26 19:16:24 INFO [auto_gptq.quantization.gptq] avg loss: 96.48133850097656\n",
"2023-06-26 19:16:24 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 54/60...\n",
"2023-06-26 19:16:25 INFO [auto_gptq.quantization.gptq] duration: 0.8199574947357178\n",
"2023-06-26 19:16:25 INFO [auto_gptq.quantization.gptq] avg loss: 126.42342376708984\n",
"2023-06-26 19:16:25 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 54/60...\n",
"2023-06-26 19:16:26 INFO [auto_gptq.quantization.gptq] duration: 0.7867157459259033\n",
"2023-06-26 19:16:26 INFO [auto_gptq.quantization.gptq] avg loss: 109.40805053710938\n",
"2023-06-26 19:16:32 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 54/60...\n",
"2023-06-26 19:16:33 INFO [auto_gptq.quantization.gptq] duration: 0.7897233963012695\n",
"2023-06-26 19:16:33 INFO [auto_gptq.quantization.gptq] avg loss: 8.785682678222656\n",
"2023-06-26 19:16:39 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 54/60...\n",
"2023-06-26 19:16:40 INFO [auto_gptq.quantization.gptq] duration: 0.9950759410858154\n",
"2023-06-26 19:16:40 INFO [auto_gptq.quantization.gptq] avg loss: 406.2794494628906\n",
"2023-06-26 19:16:40 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 54/60...\n",
"2023-06-26 19:16:41 INFO [auto_gptq.quantization.gptq] duration: 0.9626250267028809\n",
"2023-06-26 19:16:41 INFO [auto_gptq.quantization.gptq] avg loss: 440.399169921875\n",
"2023-06-26 19:16:53 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 54/60...\n",
"2023-06-26 19:16:56 INFO [auto_gptq.quantization.gptq] duration: 3.066605806350708\n",
"2023-06-26 19:16:56 INFO [auto_gptq.quantization.gptq] avg loss: 79.85897064208984\n",
"2023-06-26 19:17:02 INFO [auto_gptq.modeling._base] Start quantizing layer 55/60\n",
"2023-06-26 19:17:10 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 55/60...\n",
"2023-06-26 19:17:11 INFO [auto_gptq.quantization.gptq] duration: 0.8989500999450684\n",
"2023-06-26 19:17:11 INFO [auto_gptq.quantization.gptq] avg loss: 104.44737243652344\n",
"2023-06-26 19:17:11 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 55/60...\n",
"2023-06-26 19:17:12 INFO [auto_gptq.quantization.gptq] duration: 0.8267917633056641\n",
"2023-06-26 19:17:12 INFO [auto_gptq.quantization.gptq] avg loss: 146.22024536132812\n",
"2023-06-26 19:17:12 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 55/60...\n",
"2023-06-26 19:17:13 INFO [auto_gptq.quantization.gptq] duration: 0.7923059463500977\n",
"2023-06-26 19:17:13 INFO [auto_gptq.quantization.gptq] avg loss: 118.45008850097656\n",
"2023-06-26 19:17:19 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 55/60...\n",
"2023-06-26 19:17:20 INFO [auto_gptq.quantization.gptq] duration: 0.8816087245941162\n",
"2023-06-26 19:17:20 INFO [auto_gptq.quantization.gptq] avg loss: 9.32460880279541\n",
"2023-06-26 19:17:27 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 55/60...\n",
"2023-06-26 19:17:28 INFO [auto_gptq.quantization.gptq] duration: 1.119274616241455\n",
"2023-06-26 19:17:28 INFO [auto_gptq.quantization.gptq] avg loss: 408.80352783203125\n",
"2023-06-26 19:17:28 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 55/60...\n",
"2023-06-26 19:17:30 INFO [auto_gptq.quantization.gptq] duration: 1.0608580112457275\n",
"2023-06-26 19:17:30 INFO [auto_gptq.quantization.gptq] avg loss: 438.30731201171875\n",
"2023-06-26 19:17:42 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 55/60...\n",
"2023-06-26 19:17:45 INFO [auto_gptq.quantization.gptq] duration: 3.32900071144104\n",
"2023-06-26 19:17:45 INFO [auto_gptq.quantization.gptq] avg loss: 82.18743896484375\n",
"2023-06-26 19:17:50 INFO [auto_gptq.modeling._base] Start quantizing layer 56/60\n",
"2023-06-26 19:17:58 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 56/60...\n",
"2023-06-26 19:17:59 INFO [auto_gptq.quantization.gptq] duration: 0.8732919692993164\n",
"2023-06-26 19:17:59 INFO [auto_gptq.quantization.gptq] avg loss: 107.80227661132812\n",
"2023-06-26 19:17:59 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 56/60...\n",
"2023-06-26 19:18:00 INFO [auto_gptq.quantization.gptq] duration: 0.841500997543335\n",
"2023-06-26 19:18:00 INFO [auto_gptq.quantization.gptq] avg loss: 148.77845764160156\n",
"2023-06-26 19:18:00 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 56/60...\n",
"2023-06-26 19:18:01 INFO [auto_gptq.quantization.gptq] duration: 0.8596153259277344\n",
"2023-06-26 19:18:01 INFO [auto_gptq.quantization.gptq] avg loss: 123.578857421875\n",
"2023-06-26 19:18:07 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 56/60...\n",
"2023-06-26 19:18:08 INFO [auto_gptq.quantization.gptq] duration: 0.8792097568511963\n",
"2023-06-26 19:18:08 INFO [auto_gptq.quantization.gptq] avg loss: 11.161661148071289\n",
"2023-06-26 19:18:15 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 56/60...\n",
"2023-06-26 19:18:16 INFO [auto_gptq.quantization.gptq] duration: 0.9883682727813721\n",
"2023-06-26 19:18:16 INFO [auto_gptq.quantization.gptq] avg loss: 407.434814453125\n",
"2023-06-26 19:18:16 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 56/60...\n",
"2023-06-26 19:18:17 INFO [auto_gptq.quantization.gptq] duration: 0.9493212699890137\n",
"2023-06-26 19:18:17 INFO [auto_gptq.quantization.gptq] avg loss: 432.9546813964844\n",
"2023-06-26 19:18:29 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 56/60...\n",
"2023-06-26 19:18:32 INFO [auto_gptq.quantization.gptq] duration: 3.075756072998047\n",
"2023-06-26 19:18:32 INFO [auto_gptq.quantization.gptq] avg loss: 86.06930541992188\n",
"2023-06-26 19:18:37 INFO [auto_gptq.modeling._base] Start quantizing layer 57/60\n",
"2023-06-26 19:18:46 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 57/60...\n",
"2023-06-26 19:18:47 INFO [auto_gptq.quantization.gptq] duration: 0.8866662979125977\n",
"2023-06-26 19:18:47 INFO [auto_gptq.quantization.gptq] avg loss: 93.93045043945312\n",
"2023-06-26 19:18:47 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 57/60...\n",
"2023-06-26 19:18:48 INFO [auto_gptq.quantization.gptq] duration: 0.8517231941223145\n",
"2023-06-26 19:18:48 INFO [auto_gptq.quantization.gptq] avg loss: 142.82904052734375\n",
"2023-06-26 19:18:48 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 57/60...\n",
"2023-06-26 19:18:49 INFO [auto_gptq.quantization.gptq] duration: 0.8707468509674072\n",
"2023-06-26 19:18:49 INFO [auto_gptq.quantization.gptq] avg loss: 107.96147155761719\n",
"2023-06-26 19:18:55 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 57/60...\n",
"2023-06-26 19:18:56 INFO [auto_gptq.quantization.gptq] duration: 0.8973045349121094\n",
"2023-06-26 19:18:56 INFO [auto_gptq.quantization.gptq] avg loss: 18.4396915435791\n",
"2023-06-26 19:19:03 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 57/60...\n",
"2023-06-26 19:19:04 INFO [auto_gptq.quantization.gptq] duration: 1.1113805770874023\n",
"2023-06-26 19:19:04 INFO [auto_gptq.quantization.gptq] avg loss: 405.3811340332031\n",
"2023-06-26 19:19:04 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 57/60...\n",
"2023-06-26 19:19:05 INFO [auto_gptq.quantization.gptq] duration: 1.0461633205413818\n",
"2023-06-26 19:19:05 INFO [auto_gptq.quantization.gptq] avg loss: 427.056396484375\n",
"2023-06-26 19:19:18 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 57/60...\n",
"2023-06-26 19:19:21 INFO [auto_gptq.quantization.gptq] duration: 3.293987989425659\n",
"2023-06-26 19:19:21 INFO [auto_gptq.quantization.gptq] avg loss: 96.71849060058594\n",
"2023-06-26 19:19:27 INFO [auto_gptq.modeling._base] Start quantizing layer 58/60\n",
"2023-06-26 19:19:35 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 58/60...\n",
"2023-06-26 19:19:36 INFO [auto_gptq.quantization.gptq] duration: 0.8998959064483643\n",
"2023-06-26 19:19:36 INFO [auto_gptq.quantization.gptq] avg loss: 91.81371307373047\n",
"2023-06-26 19:19:36 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 58/60...\n",
"2023-06-26 19:19:37 INFO [auto_gptq.quantization.gptq] duration: 0.8910307884216309\n",
"2023-06-26 19:19:37 INFO [auto_gptq.quantization.gptq] avg loss: 122.11761474609375\n",
"2023-06-26 19:19:37 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 58/60...\n",
"2023-06-26 19:19:38 INFO [auto_gptq.quantization.gptq] duration: 0.8021938800811768\n",
"2023-06-26 19:19:38 INFO [auto_gptq.quantization.gptq] avg loss: 106.75048065185547\n",
"2023-06-26 19:19:44 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 58/60...\n",
"2023-06-26 19:19:44 INFO [auto_gptq.quantization.gptq] duration: 0.7928588390350342\n",
"2023-06-26 19:19:44 INFO [auto_gptq.quantization.gptq] avg loss: 12.140018463134766\n",
"2023-06-26 19:19:52 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 58/60...\n",
"2023-06-26 19:19:53 INFO [auto_gptq.quantization.gptq] duration: 1.0948951244354248\n",
"2023-06-26 19:19:53 INFO [auto_gptq.quantization.gptq] avg loss: 393.1167297363281\n",
"2023-06-26 19:19:53 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 58/60...\n",
"2023-06-26 19:19:54 INFO [auto_gptq.quantization.gptq] duration: 1.0659363269805908\n",
"2023-06-26 19:19:54 INFO [auto_gptq.quantization.gptq] avg loss: 412.990234375\n",
"2023-06-26 19:20:06 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 58/60...\n",
"2023-06-26 19:20:09 INFO [auto_gptq.quantization.gptq] duration: 3.1261372566223145\n",
"2023-06-26 19:20:09 INFO [auto_gptq.quantization.gptq] avg loss: 107.19601440429688\n",
"2023-06-26 19:20:15 INFO [auto_gptq.modeling._base] Start quantizing layer 59/60\n",
"2023-06-26 19:20:23 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 59/60...\n",
"2023-06-26 19:20:23 INFO [auto_gptq.quantization.gptq] duration: 0.7836513519287109\n",
"2023-06-26 19:20:23 INFO [auto_gptq.quantization.gptq] avg loss: 77.7601318359375\n",
"2023-06-26 19:20:23 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 59/60...\n",
"2023-06-26 19:20:24 INFO [auto_gptq.quantization.gptq] duration: 0.7688579559326172\n",
"2023-06-26 19:20:24 INFO [auto_gptq.quantization.gptq] avg loss: 95.62125396728516\n",
"2023-06-26 19:20:24 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 59/60...\n",
"2023-06-26 19:20:25 INFO [auto_gptq.quantization.gptq] duration: 0.7812063694000244\n",
"2023-06-26 19:20:25 INFO [auto_gptq.quantization.gptq] avg loss: 92.36518859863281\n",
"2023-06-26 19:20:31 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 59/60...\n",
"2023-06-26 19:20:32 INFO [auto_gptq.quantization.gptq] duration: 0.8254716396331787\n",
"2023-06-26 19:20:32 INFO [auto_gptq.quantization.gptq] avg loss: 9.853437423706055\n",
"2023-06-26 19:20:39 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 59/60...\n",
"2023-06-26 19:20:40 INFO [auto_gptq.quantization.gptq] duration: 1.1016294956207275\n",
"2023-06-26 19:20:40 INFO [auto_gptq.quantization.gptq] avg loss: 357.4652099609375\n",
"2023-06-26 19:20:40 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 59/60...\n",
"2023-06-26 19:20:41 INFO [auto_gptq.quantization.gptq] duration: 1.0455937385559082\n",
"2023-06-26 19:20:41 INFO [auto_gptq.quantization.gptq] avg loss: 383.8266906738281\n",
"2023-06-26 19:20:52 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 59/60...\n",
"2023-06-26 19:20:56 INFO [auto_gptq.quantization.gptq] duration: 3.355104923248291\n",
"2023-06-26 19:20:56 INFO [auto_gptq.quantization.gptq] avg loss: 118.69153594970703\n",
"2023-06-26 19:21:01 INFO [auto_gptq.modeling._base] Start quantizing layer 60/60\n",
"2023-06-26 19:21:09 INFO [auto_gptq.modeling._base] Quantizing self_attn.k_proj in layer 60/60...\n",
"2023-06-26 19:21:10 INFO [auto_gptq.quantization.gptq] duration: 0.840193510055542\n",
"2023-06-26 19:21:10 INFO [auto_gptq.quantization.gptq] avg loss: 71.8792495727539\n",
"2023-06-26 19:21:10 INFO [auto_gptq.modeling._base] Quantizing self_attn.v_proj in layer 60/60...\n",
"2023-06-26 19:21:11 INFO [auto_gptq.quantization.gptq] duration: 0.7966632843017578\n",
"2023-06-26 19:21:11 INFO [auto_gptq.quantization.gptq] avg loss: 74.31680297851562\n",
"2023-06-26 19:21:11 INFO [auto_gptq.modeling._base] Quantizing self_attn.q_proj in layer 60/60...\n",
"2023-06-26 19:21:12 INFO [auto_gptq.quantization.gptq] duration: 0.8555908203125\n",
"2023-06-26 19:21:12 INFO [auto_gptq.quantization.gptq] avg loss: 90.05723571777344\n",
"2023-06-26 19:21:18 INFO [auto_gptq.modeling._base] Quantizing self_attn.o_proj in layer 60/60...\n",
"2023-06-26 19:21:19 INFO [auto_gptq.quantization.gptq] duration: 0.8945095539093018\n",
"2023-06-26 19:21:19 INFO [auto_gptq.quantization.gptq] avg loss: 14.275856971740723\n",
"2023-06-26 19:21:26 INFO [auto_gptq.modeling._base] Quantizing mlp.up_proj in layer 60/60...\n",
"2023-06-26 19:21:27 INFO [auto_gptq.quantization.gptq] duration: 1.1007232666015625\n",
"2023-06-26 19:21:27 INFO [auto_gptq.quantization.gptq] avg loss: 275.5982666015625\n",
"2023-06-26 19:21:27 INFO [auto_gptq.modeling._base] Quantizing mlp.gate_proj in layer 60/60...\n",
"2023-06-26 19:21:28 INFO [auto_gptq.quantization.gptq] duration: 1.0531039237976074\n",
"2023-06-26 19:21:28 INFO [auto_gptq.quantization.gptq] avg loss: 296.5889892578125\n",
"2023-06-26 19:21:41 INFO [auto_gptq.modeling._base] Quantizing mlp.down_proj in layer 60/60...\n",
"2023-06-26 19:21:44 INFO [auto_gptq.quantization.gptq] duration: 3.134366035461426\n",
"2023-06-26 19:21:44 INFO [auto_gptq.quantization.gptq] avg loss: 155.39511108398438\n",
"2023-06-26 19:21:49 INFO [auto_gptq.modeling._utils] Packing model...\n",
"2023-06-26 19:21:51 INFO [auto_gptq.modeling._utils] model.layers.0.self_attn.k_proj\n",
"2023-06-26 19:21:52 INFO [auto_gptq.modeling._utils] model.layers.0.self_attn.o_proj\n",
"2023-06-26 19:21:53 INFO [auto_gptq.modeling._utils] model.layers.0.self_attn.q_proj\n",
"2023-06-26 19:21:54 INFO [auto_gptq.modeling._utils] model.layers.0.self_attn.v_proj\n",
"2023-06-26 19:21:54 INFO [auto_gptq.modeling._utils] model.layers.0.mlp.down_proj\n",
"2023-06-26 19:21:57 INFO [auto_gptq.modeling._utils] model.layers.0.mlp.gate_proj\n",
"2023-06-26 19:21:59 INFO [auto_gptq.modeling._utils] model.layers.0.mlp.up_proj\n",
"2023-06-26 19:22:02 INFO [auto_gptq.modeling._utils] model.layers.1.self_attn.k_proj\n",
"2023-06-26 19:22:02 INFO [auto_gptq.modeling._utils] model.layers.1.self_attn.o_proj\n",
"2023-06-26 19:22:03 INFO [auto_gptq.modeling._utils] model.layers.1.self_attn.q_proj\n",
"2023-06-26 19:22:04 INFO [auto_gptq.modeling._utils] model.layers.1.self_attn.v_proj\n",
"2023-06-26 19:22:05 INFO [auto_gptq.modeling._utils] model.layers.1.mlp.down_proj\n",
"2023-06-26 19:22:07 INFO [auto_gptq.modeling._utils] model.layers.1.mlp.gate_proj\n",
"2023-06-26 19:22:09 INFO [auto_gptq.modeling._utils] model.layers.1.mlp.up_proj\n",
"2023-06-26 19:22:11 INFO [auto_gptq.modeling._utils] model.layers.2.self_attn.k_proj\n",
"2023-06-26 19:22:12 INFO [auto_gptq.modeling._utils] model.layers.2.self_attn.o_proj\n",
"2023-06-26 19:22:13 INFO [auto_gptq.modeling._utils] model.layers.2.self_attn.q_proj\n",
"2023-06-26 19:22:14 INFO [auto_gptq.modeling._utils] model.layers.2.self_attn.v_proj\n",
"2023-06-26 19:22:14 INFO [auto_gptq.modeling._utils] model.layers.2.mlp.down_proj\n",
"2023-06-26 19:22:16 INFO [auto_gptq.modeling._utils] model.layers.2.mlp.gate_proj\n",
"2023-06-26 19:22:19 INFO [auto_gptq.modeling._utils] model.layers.2.mlp.up_proj\n",
"2023-06-26 19:22:21 INFO [auto_gptq.modeling._utils] model.layers.3.self_attn.k_proj\n",
"2023-06-26 19:22:22 INFO [auto_gptq.modeling._utils] model.layers.3.self_attn.o_proj\n",
"2023-06-26 19:22:23 INFO [auto_gptq.modeling._utils] model.layers.3.self_attn.q_proj\n",
"2023-06-26 19:22:23 INFO [auto_gptq.modeling._utils] model.layers.3.self_attn.v_proj\n",
"2023-06-26 19:22:24 INFO [auto_gptq.modeling._utils] model.layers.3.mlp.down_proj\n",
"2023-06-26 19:22:26 INFO [auto_gptq.modeling._utils] model.layers.3.mlp.gate_proj\n",
"2023-06-26 19:22:29 INFO [auto_gptq.modeling._utils] model.layers.3.mlp.up_proj\n",
"2023-06-26 19:22:31 INFO [auto_gptq.modeling._utils] model.layers.4.self_attn.k_proj\n",
"2023-06-26 19:22:32 INFO [auto_gptq.modeling._utils] model.layers.4.self_attn.o_proj\n",
"2023-06-26 19:22:32 INFO [auto_gptq.modeling._utils] model.layers.4.self_attn.q_proj\n",
"2023-06-26 19:22:33 INFO [auto_gptq.modeling._utils] model.layers.4.self_attn.v_proj\n",
"2023-06-26 19:22:34 INFO [auto_gptq.modeling._utils] model.layers.4.mlp.down_proj\n",
"2023-06-26 19:22:36 INFO [auto_gptq.modeling._utils] model.layers.4.mlp.gate_proj\n",
"2023-06-26 19:22:38 INFO [auto_gptq.modeling._utils] model.layers.4.mlp.up_proj\n",
"2023-06-26 19:22:41 INFO [auto_gptq.modeling._utils] model.layers.5.self_attn.k_proj\n",
"2023-06-26 19:22:42 INFO [auto_gptq.modeling._utils] model.layers.5.self_attn.o_proj\n",
"2023-06-26 19:22:42 INFO [auto_gptq.modeling._utils] model.layers.5.self_attn.q_proj\n",
"2023-06-26 19:22:43 INFO [auto_gptq.modeling._utils] model.layers.5.self_attn.v_proj\n",
"2023-06-26 19:22:44 INFO [auto_gptq.modeling._utils] model.layers.5.mlp.down_proj\n",
"2023-06-26 19:22:46 INFO [auto_gptq.modeling._utils] model.layers.5.mlp.gate_proj\n",
"2023-06-26 19:22:48 INFO [auto_gptq.modeling._utils] model.layers.5.mlp.up_proj\n",
"2023-06-26 19:22:51 INFO [auto_gptq.modeling._utils] model.layers.6.self_attn.k_proj\n",
"2023-06-26 19:22:51 INFO [auto_gptq.modeling._utils] model.layers.6.self_attn.o_proj\n",
"2023-06-26 19:22:52 INFO [auto_gptq.modeling._utils] model.layers.6.self_attn.q_proj\n",
"2023-06-26 19:22:53 INFO [auto_gptq.modeling._utils] model.layers.6.self_attn.v_proj\n",
"2023-06-26 19:22:54 INFO [auto_gptq.modeling._utils] model.layers.6.mlp.down_proj\n",
"2023-06-26 19:22:56 INFO [auto_gptq.modeling._utils] model.layers.6.mlp.gate_proj\n",
"2023-06-26 19:22:58 INFO [auto_gptq.modeling._utils] model.layers.6.mlp.up_proj\n",
"2023-06-26 19:23:00 INFO [auto_gptq.modeling._utils] model.layers.7.self_attn.k_proj\n",
"2023-06-26 19:23:01 INFO [auto_gptq.modeling._utils] model.layers.7.self_attn.o_proj\n",
"2023-06-26 19:23:02 INFO [auto_gptq.modeling._utils] model.layers.7.self_attn.q_proj\n",
"2023-06-26 19:23:03 INFO [auto_gptq.modeling._utils] model.layers.7.self_attn.v_proj\n",
"2023-06-26 19:23:04 INFO [auto_gptq.modeling._utils] model.layers.7.mlp.down_proj\n",
"2023-06-26 19:23:06 INFO [auto_gptq.modeling._utils] model.layers.7.mlp.gate_proj\n",
"2023-06-26 19:23:08 INFO [auto_gptq.modeling._utils] model.layers.7.mlp.up_proj\n",
"2023-06-26 19:23:11 INFO [auto_gptq.modeling._utils] model.layers.8.self_attn.k_proj\n",
"2023-06-26 19:23:11 INFO [auto_gptq.modeling._utils] model.layers.8.self_attn.o_proj\n",
"2023-06-26 19:23:12 INFO [auto_gptq.modeling._utils] model.layers.8.self_attn.q_proj\n",
"2023-06-26 19:23:13 INFO [auto_gptq.modeling._utils] model.layers.8.self_attn.v_proj\n",
"2023-06-26 19:23:14 INFO [auto_gptq.modeling._utils] model.layers.8.mlp.down_proj\n",
"2023-06-26 19:23:16 INFO [auto_gptq.modeling._utils] model.layers.8.mlp.gate_proj\n",
"2023-06-26 19:23:18 INFO [auto_gptq.modeling._utils] model.layers.8.mlp.up_proj\n",
"2023-06-26 19:23:20 INFO [auto_gptq.modeling._utils] model.layers.9.self_attn.k_proj\n",
"2023-06-26 19:23:21 INFO [auto_gptq.modeling._utils] model.layers.9.self_attn.o_proj\n",
"2023-06-26 19:23:22 INFO [auto_gptq.modeling._utils] model.layers.9.self_attn.q_proj\n",
"2023-06-26 19:23:23 INFO [auto_gptq.modeling._utils] model.layers.9.self_attn.v_proj\n",
"2023-06-26 19:23:23 INFO [auto_gptq.modeling._utils] model.layers.9.mlp.down_proj\n",
"2023-06-26 19:23:26 INFO [auto_gptq.modeling._utils] model.layers.9.mlp.gate_proj\n",
"2023-06-26 19:23:28 INFO [auto_gptq.modeling._utils] model.layers.9.mlp.up_proj\n"
]
}
],
"source": [
"model, train, test, tokenizer = quantize()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1fc7e479-f248-4029-943c-c70eff6f4428",
"metadata": {},
"outputs": [],
"source": [
"# save quantized model\n",
"#model.save_quantized(quantized_model_dir)\n",
"\n",
"# save quantized model using safetensors\n",
"model.save_quantized(quantized_model_dir, use_safetensors=True)\n",
"\n",
"tokenizer = AutoTokenizer.from_pretrained(pretrained_model_dir, use_fast=True)\n",
"tokenizer.save_pretrained(quantized_model_dir)\n",
"\n",
"# load quantized model, currently only support cpu or single gpu\n",
"#model = AutoGPTQForCausalLM.from_quantized(quantized_model_dir, device=\"cuda:0\", use_triton=True)\n",
"\n",
"#opt_eval(model.model, testenc, \"cuda:0\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment