ochafik · August 12, 2023 01:01
diff --git a/obj.py b/obj.py
 from ggml import GgmlContext, GgmlTensor, lib, ffi, type_to_dtype
 import numpy as np

 from typing import Optional, Union

 # dtype_to_type = {v: k for k, v in type_to_dtype.items()}

 class GgmlTensor:
  def __init__(self, ctx, type, ptr: ffi.CData):
    self.type = type
    self.ptr = ptr
    self.ctx = ctx
    self.is_int = type in [lib.GGML_TYPE_I8, lib.GGML_TYPE_I16, lib.GGML_TYPE_I32]
    
  @property
  def shape(self):
    return [self.ptr.ne[i] for i in range(self.ptr.n_dims)]
  
  def build_forward(self):
    return GgmlComputationGraph(self.ctx, lib.ggml_build_forward(self.ptr))

  def __add__(self, other): return self.ctx.add(self, other)
  def __sub__(self, other): return self.ctx.sub(self, other)
  def __mul__(self, other): return self.ctx.mul(self, other)
  def __div__(self, other): return self.ctx.div(self, other)
  def __truediv__(self, other): return self.ctx.div(self, other)
  def __neg__(self): return self.ctx.unary(self, lib.GGML_UNARY_OP_NEG)
  def __matmul__(self, other): return self.ctx.mul_mat(self, other)

  def reshape(self, shape_or_tensor):
    if isinstance(shape_or_tensor, GgmlTensor):
       return self.__op(lib.ggml_reshape, shape_or_tensor)
    else:
      shape = shape_or_tensor
      if len(shape) == 1:
        return self.__op(lib.ggml_reshape_1d, shape[0])
      elif len(shape) == 2:
        return self.__op(lib.ggml_reshape_2d, shape[0], shape[1])
      elif len(shape) == 3:
        return self.__op(lib.ggml_reshape_3d, shape[0], shape[1], shape[2])
      elif len(shape) == 4:
        return self.__op(lib.ggml_reshape_4d, shape[0], shape[1], shape[2], shape[3])
      else:
        raise NotImplementedError()

  @property
  def strides(self):
    return [self.ptr.nb[i] for i in range(self.ptr.n_dims)]

  def write(self, values, quantize=False):
    if isinstance(values, np.ndarray):
      if values.shape != tuple(self.shape):
        raise ValueError(f"Shape mismatch: tensor has {self.shape} but values has {values.shape}")
      
      data = lib.ggml_get_data(self.ptr)

      dtype = type_to_dtype.get(self.type)
      if not dtype and lib.ggml_is_quantized(self.type):
        if not quantize:
          raise ValueError("Writing to tensor requires quantization. Force with quantize=True")
        dtype = np.float32
        if dtype != values.dtype:
          raise ValueError(f"Can only quantize from float32 but values are {values.dtype}")

        ttraits = lib.ggml_internal_get_type_traits(self.type)
        # destination = dequantize if isinstance(dequantize, np.ndarray) else np.empty(tuple(shape), dtype=np.float32)
        # TODO: np.ascontiguousarray(destination)? Or assert that it's contiguous.
        ttraits.from_float(ffi.cast("float*", ffi.from_buffer(values)), data, values.size)
        
        return
      if dtype != values.dtype:
        raise ValueError(f"Value type mismatch: tensor has {dtype} but values has {values.dtype}")
      
      ffi.memmove(data, ffi.from_buffer(values), values.nbytes)
      return
    else:
      for i, f in enumerate(values):
        self[i] = f

  def numpy(self, dequantize: Union[bool, np.ndarray] = False) -> np.ndarray:
    shape = self.shape
    strides = self.strides
    nbytes = lib.ggml_nbytes(self.ptr)
    assert(nbytes == strides[-1] * shape[-1])

    data = lib.ggml_get_data(self.ptr)
    
    if lib.ggml_is_quantized(self.type):
      if dequantize == False:
        raise ValueError("Quantized tensor requires extra memory to be converted to numpy array, and changes to the numpy array aren't reflected back to the tensor. Force with dequantize=True")
      
      ttraits = lib.ggml_internal_get_type_traits(self.type)
      destination = dequantize if isinstance(dequantize, np.ndarray) else np.empty(tuple(shape), dtype=np.float32)
      # TODO: np.ascontiguousarray(destination)? Or assert that it's contiguous.
      ttraits.to_float(data, ffi.buffer(destination), lib.ggml_nelements(self.ptr))
      return destination
    else:
      dtype = type_to_dtype.get(self.type)
      if not dtype:
        raise NotImplementedError(f'Unknown type {self.type}')
    
    a = np.frombuffer(ffi.buffer(data, nbytes), dtype=dtype)
    a.shape = tuple(self.shape)
    return a

  def get_floats_1d(self):
    n = lib.ggml_nelements(t)
    return [self[i] for i in range(n)]

  def set_f32_1d(self, i, v): lib.ggml_set_f32_1d(self.ptr, i, v)
  def get_f32_1d(self, i): lib.ggml_get_f32_1d(self.ptr, i)

  def set_i32_1d(self, i, v): lib.ggml_set_i32_1d(self.ptr, i, v)
  def get_i32_1d(self, i): lib.ggml_get_i32_1d(self.ptr, i)

  def __getitem__(self, index):
    (self.get_i32_1d if self.is_int else self.get_f32_1d)(index)

  def __setitem__(self, index, value):
    (self.set_i32_1d if self.is_int else self.set_f32_1d)(index, value)

 class GgmlContext:
  def __init__(self, mem_size=None, mem_buffer=ffi.NULL, no_alloc=False):
      params = ffi.new('struct ggml_init_params*')
      params.mem_size = mem_size
      params.mem_buffer = mem_buffer
      params.no_alloc = no_alloc
      self.ptr = ffi.gc(lib.ggml_init(params[0]), lib.ggml_free)

  def __op(self, fn, *args, inplace=False) -> GgmlTensor:
      res = fn(self.ptr, *[o.ptr if type(o) == GgmlTensor else o for o in args])
      assert(len(args) > 0)
      first = args[0]
      if inplace:
          assert(isinstance(first, GgmlTensor))
          first.ptr = res
          return first
      
      if isinstance(first, GgmlTensor):
          tp = first.type
          if lib.ggml_is_quantized(tp):
            # tp_size = lib.ggml_type_size(tp)
            nbytes = lib.ggml_nbytes(res)
            nelements = lib.ggml_nelements(res)
            if nbytes == nelements * 4:
                print(f"Warning: nbytes {nbytes} == nelements ({nelements}) * 4 = {nelements * 4}. Dropping to F32.")
                tp = lib.GGML_TYPE_F32


          # if lib.ggml_is_quantized(tp):
              # raise NotImplementedError("Quantized tensors are not supported yet")
          # if lib.ggml_is_quantized(tp) and lib.ggml_nbytes(res) != lib.ggml_nbytes(first.ptr):
      else:
          tp = first
          assert(type(tp) == int)
      
      return GgmlTensor(self, tp, ptr=res)

  def set_param(self, tensor):
    lib.ggml_set_param(self.ptr, tensor.ptr)

  def build_forward(self, tensor):
    return GgmlComputationGraph(self, lib.ggml_build_forward_ctx(self.ptr, tensor.ptr))
  
  def new_graph(self):
    return GgmlComputationGraph(self, lib.ggml_new_graph(self.ptr))

  def new_tensor(self, type, *shape) -> GgmlTensor:
    n_dims = len(shape)
    dims = ffi.new(f'int64_t[]', n_dims)
    for i, dim in enumerate(shape):
      dims[i] = dim
    return self.__op(lib.ggml_new_tensor, type, n_dims, dims)

  def new_tensor_1d(self, type, ne0): return self.__op(lib.ggml_new_tensor_1d, type, ne0)
  def new_tensor_2d(self, type, ne0, ne1): return self.__op(lib.ggml_new_tensor_2d, type, ne0, ne1)
  def new_tensor_3d(self, type, ne0, ne1, ne2): return self.__op(lib.ggml_new_tensor_3d, type, ne0, ne1, ne2)
  def new_tensor_4d(self, type, ne0, ne1, ne2, ne3): return self.__op(lib.ggml_new_tensor_4d, type, ne0, ne1, ne2, ne3)
  
  def tensor(self, shape, dtype=np.float32) -> GgmlTensor:
     type = self.dtype_map.get(dtype) or dtype
     return GgmlTensor(self, type=type, shape=shape)
  
  def dup(self, a): return self.__op(lib.ggml_dup, a)
  def dup_inplace(self, a): return self.__op(lib.ggml_dup_inplace, a, inplace=True)
  def sqr(self, a): return self.__op(lib.ggml_sqr, a)
  def sqr_inplace(self, a): return self.__op(lib.ggml_sqr_inplace, a, inplace=True)
  def sqrt(self, a): return self.__op(lib.ggml_sqrt, a)
  def sqrt_inplace(self, a): return self.__op(lib.ggml_sqrt_inplace, a, inplace=True)
  def log(self, a): return self.__op(lib.ggml_log, a)
  def log_inplace(self, a): return self.__op(lib.ggml_log_inplace, a, inplace=True)
  def sum(self, a): return self.__op(lib.ggml_sum, a)
  def sum_rows(self, a): return self.__op(lib.ggml_sum_rows, a)
  def mean(self, a): return self.__op(lib.ggml_mean, a)
  def argmax(self, a): return self.__op(lib.ggml_argmax, a)
  def abs(self, a): return self.__op(lib.ggml_abs, a)
  def abs_inplace(self, a): return self.__op(lib.ggml_abs_inplace, a, inplace=True)
  def sgn(self, a): return self.__op(lib.ggml_sgn, a)
  def sgn_inplace(self, a): return self.__op(lib.ggml_sgn_inplace, a, inplace=True)
  def neg(self, a): return self.__op(lib.ggml_neg, a)
  def neg_inplace(self, a): return self.__op(lib.ggml_neg_inplace, a, inplace=True)
  def step(self, a): return self.__op(lib.ggml_step, a)
  def step_inplace(self, a): return self.__op(lib.ggml_step_inplace, a, inplace=True)
  def tanh(self, a): return self.__op(lib.ggml_tanh, a)
  def tanh_inplace(self, a): return self.__op(lib.ggml_tanh_inplace, a, inplace=True)
  def elu(self, a): return self.__op(lib.ggml_elu, a)
  def elu_inplace(self, a): return self.__op(lib.ggml_elu_inplace, a, inplace=True)
  def relu(self, a): return self.__op(lib.ggml_relu, a)
  def relu_inplace(self, a): return self.__op(lib.ggml_relu_inplace, a, inplace=True)
  def gelu(self, a): return self.__op(lib.ggml_gelu, a)
  def gelu_inplace(self, a): return self.__op(lib.ggml_gelu_inplace, a, inplace=True)
  def gelu_quick(self, a): return self.__op(lib.ggml_gelu_quick, a)
  def gelu_quick_inplace(self, a): return self.__op(lib.ggml_gelu_quick_inplace, a, inplace=True)
  def silu(self, a): return self.__op(lib.ggml_silu, a)
  def silu_inplace(self, a): return self.__op(lib.ggml_silu_inplace, a, inplace=True)
  def norm(self, a): return self.__op(lib.ggml_norm, a)
  def norm_inplace(self, a): return self.__op(lib.ggml_norm_inplace, a, inplace=True)
  def cont(self, a): return self.__op(lib.ggml_cont, a)
  def cont_inplace(self, a): return self.__op(lib.ggml_cont_inplace, a, inplace=True)
  def transpose(self, a): return self.__op(lib.ggml_transpose, a)
  def soft_max(self, a): return self.__op(lib.ggml_soft_max, a)
  def soft_max_inplace(self, a): return self.__op(lib.ggml_soft_max_inplace, a, inplace=True)

  def add(self, a, b): return self.__op(lib.ggml_add, a, b)
  def add_inplace(self, a, b): return self.__op(lib.ggml_add_inplace, a, b, inplace=True)
  def add1(self, a, b): return self.__op(lib.ggml_add1, a, b)
  def add1_inplace(self, a, b): return self.__op(lib.ggml_add1_inplace, a, b, inplace=True)
  def sub(self, a, b): return self.__op(lib.ggml_sub, a, b)
  def sub_inplace(self, a, b): return self.__op(lib.ggml_sub_inplace, a, b, inplace=True)
  def mul(self, a, b): return self.__op(lib.ggml_mul, a, b)
  def mul_inplace(self, a, b): return self.__op(lib.ggml_mul_inplace, a, b, inplace=True)
  def div(self, a, b): return self.__op(lib.ggml_div, a, b)
  def div_inplace(self, a, b): return self.__op(lib.ggml_div_inplace, a, b, inplace=True)
  def repeat(self, a, b): return self.__op(lib.ggml_repeat, a, b)
  def repeat_back(self, a, b): return self.__op(lib.ggml_repeat_back, a, b)
  def silu_back(self, a, b): return self.__op(lib.ggml_silu_back, a, b)
  def rms_norm_back(self, a, b): return self.__op(lib.ggml_rms_norm_back, a, b)
  def mul_mat(self, a, b): return self.__op(lib.ggml_mul_mat, a, b)
  def out_prod(self, a, b): return self.__op(lib.ggml_out_prod, a, b)
  def scale(self, a, b): return self.__op(lib.ggml_scale, a, b)
  def scale_inplace(self, a, b): return self.__op(lib.ggml_scale_inplace, a, b, inplace=True)
  def cpy(self, a, b): return self.__op(lib.ggml_cpy, a, b)
  def cpy_inplace(self, a, b): return self.__op(lib.ggml_cpy_inplace, a, b, inplace=True)
  def reshape(self, a, b): return self.__op(lib.ggml_reshape, a, b)
  def get_rows(self, a, b): return self.__op(lib.ggml_get_rows, a, b)
  def soft_max_back(self, a, b): return self.__op(lib.ggml_soft_max_back, a, b)
  def soft_max_back_inplace(self, a, b): return self.__op(lib.ggml_soft_max_back_inplace, a, b, inplace=True)

  def rms_norm(self, a, eps): return self.__op(lib.ggml_soft_max_back, a, eps)
  def rms_norm_inplace(self, a, eps): return self.__op(lib.ggml_soft_max_back, a, eps, inplace=True)
    
  def get_rows_back(self, a, b, c): return self.__op(lib.ggml_get_rows_back, a, b, c)
  def diag(self, a): return self.__op(lib.ggml_diag, a)
  def diag_mask_inf(self, a, n_past): return self.__op(lib.ggml_diag_mask_inf, a, n_past)
  def diag_mask_inf_inplace(self, a, n_past): return self.__op(lib.ggml_diag_mask_inf_inplace, a, n_past, inplace=True)
  def diag_mask_zero(self, a, n_past): return self.__op(lib.ggml_diag_mask_zero, a, n_past)
  def diag_mask_zero_inplace(self, a, n_past): return self.__op(lib.ggml_diag_mask_zero_inplace, a, n_past, inplace=True)
  
  def rope(self, a, n_past, n_dims, mode, n_ctx): return self.__op(lib.ggml_rope, a, n_past, n_dims, mode, n_ctx)
  def rope_inplace(self, a, n_past, n_dims, mode, n_ctx): return self.__op(lib.ggml_rope_inplace, a, n_past, n_dims, mode, n_ctx, inplace=True)
  def rope_back(self, a, n_past, n_dims, mode, n_ctx): return self.__op(lib.ggml_rope_back, a, n_past, n_dims, mode, n_ctx)
  
  def rope_custom(self, a, n_past, n_dims, mode, n_ctx, freq_base, freq_scale): return self.__op(lib.ggml_rope_custom, a, n_past, n_dims, mode, n_ctx, freq_base, freq_scale)
  def rope_custom_inplace(self, a, n_past, n_dims, mode, n_ctx, freq_base, freq_scale): return self.__op(lib.ggml_rope_custom_inplace, a, n_past, n_dims, mode, n_ctx, freq_base, freq_scale, inplace=True)
  
  def alibi(self, a, n_past, n_head, bias_max): return self.__op(lib.ggml_alibi, a, n_past, n_head, bias_max)
  def clamp(self, a, min, max): return self.__op(lib.ggml_clamp, a, min, max)

  def conv_1d(self, a, b, s0, p0, d0): return self.__op(lib.ggml_conv_1d, a, b, s0, p0, d0)
  def conv_1d_ph(self, a, b, s, d): return self.__op(lib.ggml_conv_1d_ph, a, b, s, d)
  def conv_2d(self, a, b, s0, s1, p0, p1, d0, d1): return self.__op(lib.ggml_conv_1d, a, b, s0, s1, p0, p1, d0, d1)
  
  def pool_1d(self, a, op, k0, s0, p0): return self.__op(lib.ggml_pool_1d, a, op, k0, s0, p0)
  def pool_2d(self, a, op, k0, k1, s0, s1, p0, p1): return self.__op(lib.ggml_pool_2d, a, op, k0, k1, s0, s1, p0, p1)

  def set(self, a, b, nb1, nb2, nb3, offset): return self.__op(lib.ggml_set, a, b, nb1, nb2, nb3, offset)
  def set_inplace(self, a, b, nb1, nb2, nb3, offset): return self.__op(lib.ggml_set_inplace, a, b, nb1, nb2, nb3, offset, inplace=TrueTrue)
  def set_1d(self, a, b, offset): return self.__op(lib.ggml_set_1d, a, b, offset)
  def set_1d_inplace(self, a, b, offset): return self.__op(lib.ggml_set_1d_inplace, a, b, offset, inplace=True)

  def set_2d(self, a, b, nb1, offset): return self.__op(lib.ggml_set_2d, a, b, nb1, offset)

  def set_2d_inplace(self, a, b, nb1, offset): return self.__op(lib.ggml_set_2d_inplace, a, b, nb1, offset, inplace=True)

  def view_1d(self, a, ne0, offset): return self.__op(lib.ggml_view_1d, a, ne0, offset)
  def view_2d(self, a, ne0, ne1, nb1, offset): return self.__op(lib.ggml_view_2d, a, ne0, ne1, nb1, offset)
  def view_3d(self, a, ne0, ne1, ne2, nb1, nb2, offset): return self.__op(lib.ggml_view_3d, a, ne0, ne1, ne2, nb1, nb2, offset)
  def view_4d(self, a, ne0, ne1, ne2, ne3, nb1, nb2, nb3, offset): return self.__op(lib.ggml_view_4d, a, ne0, ne1, ne2, ne3, nb1, nb2, nb3, offset)

  def win_part(self, a, w): return self.__op(lib.ggml_win_part, a, w)
  def win_unpart(self, a, w0, h0, w): return self.__op(lib.ggml_win_unpart, a, w0, h0, w)

  def unary(self, a, op): return self.__op(lib.ggml_unary, a, op)
  def unary_inplace(self, a, op): return self.__op(lib.ggml_unary_inplace, a, op, inplace=True)
  def map_unary_f32(self, a, fun): return self.__op(lib.ggml_map_unary_f32, a, fun)
  def map_unary_inplace_f32(self, a, fun): return self.__op(lib.ggml_map_unary_inplace_f32, a, fun)
  def map_binary_f32(self, a, b, fun): return self.__op(lib.ggml_map_binary_f32, a, b, fun)
  def map_binary_inplace_f32(self, a, b, fun): return self.__op(lib.ggml_map_binary_inplace_f32, a, b, fun)
  def map_custom1_f32(self, a, fun): return self.__op(lib.ggml_map_custom1_f32, a, fun)
  def map_custom1_inplace_f32(self, a, fun): return self.__op(lib.ggml_map_custom1_inplace_f32, a, fun)
  def map_custom2_f32(self, a, b, fun): return self.__op(lib.ggml_map_custom2_f32, a, b, fun)
  def map_custom2_inplace_f32(self, a, b, fun): return self.__op(lib.ggml_map_custom2_inplace_f32, a, b, fun)
  def map_custom3_f32(self, a, b, c, fun): return self.__op(lib.ggml_map_custom3_f32, a, b, c, fun)
  def map_custom3_inplace_f32(self, a, b, c, fun): return self.__op(lib.ggml_map_custom3_inplace_f32, a, b, c, fun)
    
  def cross_entropy_loss(self, a, b): return self.__op(lib.ggml_cross_entropy_loss, a, b)
  def cross_entropy_loss_back(self, a, b, c): return self.__op(lib.ggml_cross_entropy_loss_back, a, b, c)
    
 class GgmlComputationGraph:
  def __init__(self, ctx, ptr):
    self.ctx = ctx
    self.ptr = ptr

  def build_forward_expand(self, t) -> None:
    lib.ggml_build_forward_expand(self.ptr, t.ptr)

  def build_backward(self, keep: bool):
    return GgmlComputationGraph(self.ctx, lib.ggml_build_backward(self.ctx.ptr, self.ptr, keep))


  def compute(self, n_threads) -> None:
    lib.ggml_graph_compute_with_ctx(self.ctx.ptr, self.ptr, n_threads)
	from ggml import GgmlContext, GgmlTensor, lib, ffi, type_to_dtype
	import numpy as np

	from typing import Optional, Union

	# dtype_to_type = {v: k for k, v in type_to_dtype.items()}

	class GgmlTensor:
	def __init__(self, ctx, type, ptr: ffi.CData):
	self.type = type
	self.ptr = ptr
	self.ctx = ctx
	self.is_int = type in [lib.GGML_TYPE_I8, lib.GGML_TYPE_I16, lib.GGML_TYPE_I32]

	@property
	def shape(self):
	return [self.ptr.ne[i] for i in range(self.ptr.n_dims)]

	def build_forward(self):
	return GgmlComputationGraph(self.ctx, lib.ggml_build_forward(self.ptr))

	def __add__(self, other): return self.ctx.add(self, other)
	def __sub__(self, other): return self.ctx.sub(self, other)
	def __mul__(self, other): return self.ctx.mul(self, other)
	def __div__(self, other): return self.ctx.div(self, other)
	def __truediv__(self, other): return self.ctx.div(self, other)
	def __neg__(self): return self.ctx.unary(self, lib.GGML_UNARY_OP_NEG)
	def __matmul__(self, other): return self.ctx.mul_mat(self, other)

	def reshape(self, shape_or_tensor):
	if isinstance(shape_or_tensor, GgmlTensor):
	return self.__op(lib.ggml_reshape, shape_or_tensor)
	else:
	shape = shape_or_tensor
	if len(shape) == 1:
	return self.__op(lib.ggml_reshape_1d, shape[0])
	elif len(shape) == 2:
	return self.__op(lib.ggml_reshape_2d, shape[0], shape[1])
	elif len(shape) == 3:
	return self.__op(lib.ggml_reshape_3d, shape[0], shape[1], shape[2])
	elif len(shape) == 4:
	return self.__op(lib.ggml_reshape_4d, shape[0], shape[1], shape[2], shape[3])
	else:
	raise NotImplementedError()

	@property
	def strides(self):
	return [self.ptr.nb[i] for i in range(self.ptr.n_dims)]

	def write(self, values, quantize=False):
	if isinstance(values, np.ndarray):
	if values.shape != tuple(self.shape):
	raise ValueError(f"Shape mismatch: tensor has {self.shape} but values has {values.shape}")

	data = lib.ggml_get_data(self.ptr)

	dtype = type_to_dtype.get(self.type)
	if not dtype and lib.ggml_is_quantized(self.type):
	if not quantize:
	raise ValueError("Writing to tensor requires quantization. Force with quantize=True")
	dtype = np.float32
	if dtype != values.dtype:
	raise ValueError(f"Can only quantize from float32 but values are {values.dtype}")

	ttraits = lib.ggml_internal_get_type_traits(self.type)
	# destination = dequantize if isinstance(dequantize, np.ndarray) else np.empty(tuple(shape), dtype=np.float32)
	# TODO: np.ascontiguousarray(destination)? Or assert that it's contiguous.
	ttraits.from_float(ffi.cast("float*", ffi.from_buffer(values)), data, values.size)

	return
	if dtype != values.dtype:
	raise ValueError(f"Value type mismatch: tensor has {dtype} but values has {values.dtype}")

	ffi.memmove(data, ffi.from_buffer(values), values.nbytes)
	return
	else:
	for i, f in enumerate(values):
	self[i] = f

	def numpy(self, dequantize: Union[bool, np.ndarray] = False) -> np.ndarray:
	shape = self.shape
	strides = self.strides
	nbytes = lib.ggml_nbytes(self.ptr)
	assert(nbytes == strides[-1] * shape[-1])

	data = lib.ggml_get_data(self.ptr)

	if lib.ggml_is_quantized(self.type):
	if dequantize == False:
	raise ValueError("Quantized tensor requires extra memory to be converted to numpy array, and changes to the numpy array aren't reflected back to the tensor. Force with dequantize=True")

	ttraits = lib.ggml_internal_get_type_traits(self.type)
	destination = dequantize if isinstance(dequantize, np.ndarray) else np.empty(tuple(shape), dtype=np.float32)
	# TODO: np.ascontiguousarray(destination)? Or assert that it's contiguous.
	ttraits.to_float(data, ffi.buffer(destination), lib.ggml_nelements(self.ptr))
	return destination
	else:
	dtype = type_to_dtype.get(self.type)
	if not dtype:
	raise NotImplementedError(f'Unknown type {self.type}')

	a = np.frombuffer(ffi.buffer(data, nbytes), dtype=dtype)
	a.shape = tuple(self.shape)
	return a

	def get_floats_1d(self):
	n = lib.ggml_nelements(t)
	return [self[i] for i in range(n)]

	def set_f32_1d(self, i, v): lib.ggml_set_f32_1d(self.ptr, i, v)
	def get_f32_1d(self, i): lib.ggml_get_f32_1d(self.ptr, i)

	def set_i32_1d(self, i, v): lib.ggml_set_i32_1d(self.ptr, i, v)
	def get_i32_1d(self, i): lib.ggml_get_i32_1d(self.ptr, i)

	def __getitem__(self, index):
	(self.get_i32_1d if self.is_int else self.get_f32_1d)(index)

	def __setitem__(self, index, value):
	(self.set_i32_1d if self.is_int else self.set_f32_1d)(index, value)

	class GgmlContext:
	def __init__(self, mem_size=None, mem_buffer=ffi.NULL, no_alloc=False):
	params = ffi.new('struct ggml_init_params*')
	params.mem_size = mem_size
	params.mem_buffer = mem_buffer
	params.no_alloc = no_alloc
	self.ptr = ffi.gc(lib.ggml_init(params[0]), lib.ggml_free)

	def __op(self, fn, *args, inplace=False) -> GgmlTensor:
	res = fn(self.ptr, *[o.ptr if type(o) == GgmlTensor else o for o in args])
	assert(len(args) > 0)
	first = args[0]
	if inplace:
	assert(isinstance(first, GgmlTensor))
	first.ptr = res
	return first

	if isinstance(first, GgmlTensor):
	tp = first.type
	if lib.ggml_is_quantized(tp):
	# tp_size = lib.ggml_type_size(tp)
	nbytes = lib.ggml_nbytes(res)
	nelements = lib.ggml_nelements(res)
	if nbytes == nelements * 4:
	print(f"Warning: nbytes {nbytes} == nelements ({nelements}) * 4 = {nelements * 4}. Dropping to F32.")
	tp = lib.GGML_TYPE_F32


	# if lib.ggml_is_quantized(tp):
	# raise NotImplementedError("Quantized tensors are not supported yet")
	# if lib.ggml_is_quantized(tp) and lib.ggml_nbytes(res) != lib.ggml_nbytes(first.ptr):
	else:
	tp = first
	assert(type(tp) == int)

	return GgmlTensor(self, tp, ptr=res)

	def set_param(self, tensor):
	lib.ggml_set_param(self.ptr, tensor.ptr)

	def build_forward(self, tensor):
	return GgmlComputationGraph(self, lib.ggml_build_forward_ctx(self.ptr, tensor.ptr))

	def new_graph(self):
	return GgmlComputationGraph(self, lib.ggml_new_graph(self.ptr))

	def new_tensor(self, type, *shape) -> GgmlTensor:
	n_dims = len(shape)
	dims = ffi.new(f'int64_t[]', n_dims)
	for i, dim in enumerate(shape):
	dims[i] = dim
	return self.__op(lib.ggml_new_tensor, type, n_dims, dims)

	def new_tensor_1d(self, type, ne0): return self.__op(lib.ggml_new_tensor_1d, type, ne0)
	def new_tensor_2d(self, type, ne0, ne1): return self.__op(lib.ggml_new_tensor_2d, type, ne0, ne1)
	def new_tensor_3d(self, type, ne0, ne1, ne2): return self.__op(lib.ggml_new_tensor_3d, type, ne0, ne1, ne2)
	def new_tensor_4d(self, type, ne0, ne1, ne2, ne3): return self.__op(lib.ggml_new_tensor_4d, type, ne0, ne1, ne2, ne3)

	def tensor(self, shape, dtype=np.float32) -> GgmlTensor:
	type = self.dtype_map.get(dtype) or dtype
	return GgmlTensor(self, type=type, shape=shape)

	def dup(self, a): return self.__op(lib.ggml_dup, a)
	def dup_inplace(self, a): return self.__op(lib.ggml_dup_inplace, a, inplace=True)
	def sqr(self, a): return self.__op(lib.ggml_sqr, a)
	def sqr_inplace(self, a): return self.__op(lib.ggml_sqr_inplace, a, inplace=True)
	def sqrt(self, a): return self.__op(lib.ggml_sqrt, a)
	def sqrt_inplace(self, a): return self.__op(lib.ggml_sqrt_inplace, a, inplace=True)
	def log(self, a): return self.__op(lib.ggml_log, a)
	def log_inplace(self, a): return self.__op(lib.ggml_log_inplace, a, inplace=True)
	def sum(self, a): return self.__op(lib.ggml_sum, a)
	def sum_rows(self, a): return self.__op(lib.ggml_sum_rows, a)
	def mean(self, a): return self.__op(lib.ggml_mean, a)
	def argmax(self, a): return self.__op(lib.ggml_argmax, a)
	def abs(self, a): return self.__op(lib.ggml_abs, a)
	def abs_inplace(self, a): return self.__op(lib.ggml_abs_inplace, a, inplace=True)
	def sgn(self, a): return self.__op(lib.ggml_sgn, a)
	def sgn_inplace(self, a): return self.__op(lib.ggml_sgn_inplace, a, inplace=True)
	def neg(self, a): return self.__op(lib.ggml_neg, a)
	def neg_inplace(self, a): return self.__op(lib.ggml_neg_inplace, a, inplace=True)
	def step(self, a): return self.__op(lib.ggml_step, a)
	def step_inplace(self, a): return self.__op(lib.ggml_step_inplace, a, inplace=True)
	def tanh(self, a): return self.__op(lib.ggml_tanh, a)
	def tanh_inplace(self, a): return self.__op(lib.ggml_tanh_inplace, a, inplace=True)
	def elu(self, a): return self.__op(lib.ggml_elu, a)
	def elu_inplace(self, a): return self.__op(lib.ggml_elu_inplace, a, inplace=True)
	def relu(self, a): return self.__op(lib.ggml_relu, a)
	def relu_inplace(self, a): return self.__op(lib.ggml_relu_inplace, a, inplace=True)
	def gelu(self, a): return self.__op(lib.ggml_gelu, a)
	def gelu_inplace(self, a): return self.__op(lib.ggml_gelu_inplace, a, inplace=True)
	def gelu_quick(self, a): return self.__op(lib.ggml_gelu_quick, a)
	def gelu_quick_inplace(self, a): return self.__op(lib.ggml_gelu_quick_inplace, a, inplace=True)
	def silu(self, a): return self.__op(lib.ggml_silu, a)
	def silu_inplace(self, a): return self.__op(lib.ggml_silu_inplace, a, inplace=True)
	def norm(self, a): return self.__op(lib.ggml_norm, a)
	def norm_inplace(self, a): return self.__op(lib.ggml_norm_inplace, a, inplace=True)
	def cont(self, a): return self.__op(lib.ggml_cont, a)
	def cont_inplace(self, a): return self.__op(lib.ggml_cont_inplace, a, inplace=True)
	def transpose(self, a): return self.__op(lib.ggml_transpose, a)
	def soft_max(self, a): return self.__op(lib.ggml_soft_max, a)
	def soft_max_inplace(self, a): return self.__op(lib.ggml_soft_max_inplace, a, inplace=True)

	def add(self, a, b): return self.__op(lib.ggml_add, a, b)
	def add_inplace(self, a, b): return self.__op(lib.ggml_add_inplace, a, b, inplace=True)
	def add1(self, a, b): return self.__op(lib.ggml_add1, a, b)
	def add1_inplace(self, a, b): return self.__op(lib.ggml_add1_inplace, a, b, inplace=True)
	def sub(self, a, b): return self.__op(lib.ggml_sub, a, b)
	def sub_inplace(self, a, b): return self.__op(lib.ggml_sub_inplace, a, b, inplace=True)
	def mul(self, a, b): return self.__op(lib.ggml_mul, a, b)
	def mul_inplace(self, a, b): return self.__op(lib.ggml_mul_inplace, a, b, inplace=True)
	def div(self, a, b): return self.__op(lib.ggml_div, a, b)
	def div_inplace(self, a, b): return self.__op(lib.ggml_div_inplace, a, b, inplace=True)
	def repeat(self, a, b): return self.__op(lib.ggml_repeat, a, b)
	def repeat_back(self, a, b): return self.__op(lib.ggml_repeat_back, a, b)
	def silu_back(self, a, b): return self.__op(lib.ggml_silu_back, a, b)
	def rms_norm_back(self, a, b): return self.__op(lib.ggml_rms_norm_back, a, b)
	def mul_mat(self, a, b): return self.__op(lib.ggml_mul_mat, a, b)
	def out_prod(self, a, b): return self.__op(lib.ggml_out_prod, a, b)
	def scale(self, a, b): return self.__op(lib.ggml_scale, a, b)
	def scale_inplace(self, a, b): return self.__op(lib.ggml_scale_inplace, a, b, inplace=True)
	def cpy(self, a, b): return self.__op(lib.ggml_cpy, a, b)
	def cpy_inplace(self, a, b): return self.__op(lib.ggml_cpy_inplace, a, b, inplace=True)
	def reshape(self, a, b): return self.__op(lib.ggml_reshape, a, b)
	def get_rows(self, a, b): return self.__op(lib.ggml_get_rows, a, b)
	def soft_max_back(self, a, b): return self.__op(lib.ggml_soft_max_back, a, b)
	def soft_max_back_inplace(self, a, b): return self.__op(lib.ggml_soft_max_back_inplace, a, b, inplace=True)

	def rms_norm(self, a, eps): return self.__op(lib.ggml_soft_max_back, a, eps)
	def rms_norm_inplace(self, a, eps): return self.__op(lib.ggml_soft_max_back, a, eps, inplace=True)

	def get_rows_back(self, a, b, c): return self.__op(lib.ggml_get_rows_back, a, b, c)
	def diag(self, a): return self.__op(lib.ggml_diag, a)
	def diag_mask_inf(self, a, n_past): return self.__op(lib.ggml_diag_mask_inf, a, n_past)
	def diag_mask_inf_inplace(self, a, n_past): return self.__op(lib.ggml_diag_mask_inf_inplace, a, n_past, inplace=True)
	def diag_mask_zero(self, a, n_past): return self.__op(lib.ggml_diag_mask_zero, a, n_past)
	def diag_mask_zero_inplace(self, a, n_past): return self.__op(lib.ggml_diag_mask_zero_inplace, a, n_past, inplace=True)

	def rope(self, a, n_past, n_dims, mode, n_ctx): return self.__op(lib.ggml_rope, a, n_past, n_dims, mode, n_ctx)
	def rope_inplace(self, a, n_past, n_dims, mode, n_ctx): return self.__op(lib.ggml_rope_inplace, a, n_past, n_dims, mode, n_ctx, inplace=True)
	def rope_back(self, a, n_past, n_dims, mode, n_ctx): return self.__op(lib.ggml_rope_back, a, n_past, n_dims, mode, n_ctx)

	def rope_custom(self, a, n_past, n_dims, mode, n_ctx, freq_base, freq_scale): return self.__op(lib.ggml_rope_custom, a, n_past, n_dims, mode, n_ctx, freq_base, freq_scale)
	def rope_custom_inplace(self, a, n_past, n_dims, mode, n_ctx, freq_base, freq_scale): return self.__op(lib.ggml_rope_custom_inplace, a, n_past, n_dims, mode, n_ctx, freq_base, freq_scale, inplace=True)

	def alibi(self, a, n_past, n_head, bias_max): return self.__op(lib.ggml_alibi, a, n_past, n_head, bias_max)
	def clamp(self, a, min, max): return self.__op(lib.ggml_clamp, a, min, max)

	def conv_1d(self, a, b, s0, p0, d0): return self.__op(lib.ggml_conv_1d, a, b, s0, p0, d0)
	def conv_1d_ph(self, a, b, s, d): return self.__op(lib.ggml_conv_1d_ph, a, b, s, d)
	def conv_2d(self, a, b, s0, s1, p0, p1, d0, d1): return self.__op(lib.ggml_conv_1d, a, b, s0, s1, p0, p1, d0, d1)

	def pool_1d(self, a, op, k0, s0, p0): return self.__op(lib.ggml_pool_1d, a, op, k0, s0, p0)
	def pool_2d(self, a, op, k0, k1, s0, s1, p0, p1): return self.__op(lib.ggml_pool_2d, a, op, k0, k1, s0, s1, p0, p1)

	def set(self, a, b, nb1, nb2, nb3, offset): return self.__op(lib.ggml_set, a, b, nb1, nb2, nb3, offset)
	def set_inplace(self, a, b, nb1, nb2, nb3, offset): return self.__op(lib.ggml_set_inplace, a, b, nb1, nb2, nb3, offset, inplace=TrueTrue)
	def set_1d(self, a, b, offset): return self.__op(lib.ggml_set_1d, a, b, offset)
	def set_1d_inplace(self, a, b, offset): return self.__op(lib.ggml_set_1d_inplace, a, b, offset, inplace=True)

	def set_2d(self, a, b, nb1, offset): return self.__op(lib.ggml_set_2d, a, b, nb1, offset)

	def set_2d_inplace(self, a, b, nb1, offset): return self.__op(lib.ggml_set_2d_inplace, a, b, nb1, offset, inplace=True)

	def view_1d(self, a, ne0, offset): return self.__op(lib.ggml_view_1d, a, ne0, offset)
	def view_2d(self, a, ne0, ne1, nb1, offset): return self.__op(lib.ggml_view_2d, a, ne0, ne1, nb1, offset)
	def view_3d(self, a, ne0, ne1, ne2, nb1, nb2, offset): return self.__op(lib.ggml_view_3d, a, ne0, ne1, ne2, nb1, nb2, offset)
	def view_4d(self, a, ne0, ne1, ne2, ne3, nb1, nb2, nb3, offset): return self.__op(lib.ggml_view_4d, a, ne0, ne1, ne2, ne3, nb1, nb2, nb3, offset)

	def win_part(self, a, w): return self.__op(lib.ggml_win_part, a, w)
	def win_unpart(self, a, w0, h0, w): return self.__op(lib.ggml_win_unpart, a, w0, h0, w)

	def unary(self, a, op): return self.__op(lib.ggml_unary, a, op)
	def unary_inplace(self, a, op): return self.__op(lib.ggml_unary_inplace, a, op, inplace=True)
	def map_unary_f32(self, a, fun): return self.__op(lib.ggml_map_unary_f32, a, fun)
	def map_unary_inplace_f32(self, a, fun): return self.__op(lib.ggml_map_unary_inplace_f32, a, fun)
	def map_binary_f32(self, a, b, fun): return self.__op(lib.ggml_map_binary_f32, a, b, fun)
	def map_binary_inplace_f32(self, a, b, fun): return self.__op(lib.ggml_map_binary_inplace_f32, a, b, fun)
	def map_custom1_f32(self, a, fun): return self.__op(lib.ggml_map_custom1_f32, a, fun)
	def map_custom1_inplace_f32(self, a, fun): return self.__op(lib.ggml_map_custom1_inplace_f32, a, fun)
	def map_custom2_f32(self, a, b, fun): return self.__op(lib.ggml_map_custom2_f32, a, b, fun)
	def map_custom2_inplace_f32(self, a, b, fun): return self.__op(lib.ggml_map_custom2_inplace_f32, a, b, fun)
	def map_custom3_f32(self, a, b, c, fun): return self.__op(lib.ggml_map_custom3_f32, a, b, c, fun)
	def map_custom3_inplace_f32(self, a, b, c, fun): return self.__op(lib.ggml_map_custom3_inplace_f32, a, b, c, fun)

	def cross_entropy_loss(self, a, b): return self.__op(lib.ggml_cross_entropy_loss, a, b)
	def cross_entropy_loss_back(self, a, b, c): return self.__op(lib.ggml_cross_entropy_loss_back, a, b, c)

	class GgmlComputationGraph:
	def __init__(self, ctx, ptr):
	self.ctx = ctx
	self.ptr = ptr

	def build_forward_expand(self, t) -> None:
	lib.ggml_build_forward_expand(self.ptr, t.ptr)

	def build_backward(self, keep: bool):
	return GgmlComputationGraph(self.ctx, lib.ggml_build_backward(self.ctx.ptr, self.ptr, keep))


	def compute(self, n_threads) -> None:
	lib.ggml_graph_compute_with_ctx(self.ctx.ptr, self.ptr, n_threads)
No results found