This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
template <typename scalar_t, typename idx_t> | |
void softmaxLUT_kernel(scalar_t* input, scalar_t* output, idx_t n, idx_t c, idx_t sizeC, | |
idx_t input_sN, idx_t input_sC, idx_t output_sN, idx_t output_sC, | |
idx_t multiplier, idx_t zero_point, idx_t* exp_table) { | |
/* | |
* Perform quantized softmax for one element | |
* | |
* output_i = multiplier / sum (table[A_j - A_i + 255] for all j) | |
*/ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
import argparse | |
import numpy as np | |
import torch | |
import torch.quantization as tq | |
from pathlib import Path | |
from torch import nn | |
from _collections_abc import Iterable | |
from collections import OrderedDict |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import torch | |
import torch.quantization as tq | |
from torch import nn | |
from copy import deepcopy | |
## Manual quantization | |
# weight, bias, input | |
kernel_size = 3 |