ityonemo · November 18, 2023 20:07
diff --git a/nx_ext.ex b/nx_ext.ex
 defmodule NxExt do
  import Nx.Defn

  @bitshift Nx.tensor([[1], [16]], type: {:u, 8})

  @doc """
  Takes an N-vector of floats (arbitrarily typed) and converts it into 4-bit gptq, which has
  a range of -8..-7.  Should be compacted into two "floats" per byte, with the lower indexed
  value in the less significant nybble 
  
  ### TODO: check that the sub-endianness is correct.

  ```elixir
  iex> [-6.0, 1.0, 7.0, -3.0]
  ...>   |> Nx.tensor(type: {:f, 16})
  ...>   |> NxExt.to_gptq()
  ...>   |> Nx.to_binary()
  <<1::signed-size(4), -6::signed-size(4), -3::signed-size(4), 7::signed-size(4)>> =
  ```
  """

  defn to_gptq(tensor) do
    reshaped =
      tensor
      |> Nx.clip(-8, 7)
      |> Nx.as_type({:s, 8})
      |> Nx.bitcast({:u, 8})
      |> Nx.bitwise_and(15)
      |> Nx.reshape({:auto, 2})
      |> Nx.dot(@bitshift)
      |> Nx.reshape({:auto})
  end
 end
	defmodule NxExt do
	import Nx.Defn

	@bitshift Nx.tensor([[1], [16]], type: {:u, 8})

	@doc """
	Takes an N-vector of floats (arbitrarily typed) and converts it into 4-bit gptq, which has
	a range of -8..-7. Should be compacted into two "floats" per byte, with the lower indexed
	value in the less significant nybble

	### TODO: check that the sub-endianness is correct.

	```elixir
	iex> [-6.0, 1.0, 7.0, -3.0]
	...> \|> Nx.tensor(type: {:f, 16})
	...> \|> NxExt.to_gptq()
	...> \|> Nx.to_binary()
	<<1::signed-size(4), -6::signed-size(4), -3::signed-size(4), 7::signed-size(4)>> =
	```
	"""

	defn to_gptq(tensor) do
	reshaped =
	tensor
	\|> Nx.clip(-8, 7)
	\|> Nx.as_type({:s, 8})
	\|> Nx.bitcast({:u, 8})
	\|> Nx.bitwise_and(15)
	\|> Nx.reshape({:auto, 2})
	\|> Nx.dot(@bitshift)
	\|> Nx.reshape({:auto})
	end
	end
No results found