Skip to content

Instantly share code, notes, and snippets.

@Vindaar
Created June 19, 2021 23:40
Show Gist options
  • Select an option

  • Save Vindaar/ddbf22ea95f3a3e0a70fd7728a22bf19 to your computer and use it in GitHub Desktop.

Select an option

Save Vindaar/ddbf22ea95f3a3e0a70fd7728a22bf19 to your computer and use it in GitHub Desktop.
Combine `seq` and `Tensor` in a concept
import arraymancer
import arraymancer/laser/strided_iteration/foreach
import sequtils
import macros
import math
func len[T](t: Tensor[T]): int = t.size
iterator items[T](t: Tensor[T]): T =
doAssert t.rank == 1
when T is KnownSupportsCopyMem:
forEach x in t:
yield x
else:
for i in 0 ..< t.len:
yield t[i]
iterator pairs[T](t: Tensor[T]): (int, T) =
doAssert t.rank == 1
when T is KnownSupportsCopyMem:
var idx = 0
forEach x in t:
yield (idx, x)
inc idx
else:
for i in 0 ..< t.len:
yield (i, t[i])
type
VectorLike[T] {.explain.} = concept x
x.len is int
x[0] is T
for el in items(x):
el is T
for i, el in pairs(x):
i is int
el is T
proc traverseTree(input: NimNode): NimNode =
# iterate children
for i in 0 ..< input.len:
case input[i].kind
of nnkSym:
# if we found a symbol, take it
result = input[i]
of nnkBracketExpr:
# has more children, traverse
result = traverseTree(input[i])
else:
error("Unsupported type: " & $input.kind)
macro getSubType*(TT: typed): untyped =
## macro to get the subtype of a nested type by iterating
## the AST
# traverse the AST
let res = traverseTree(TT.getTypeInst)
# assign symbol to result
result = quote do:
`res`
echo result.repr
proc max[T](s: VectorLike[T]): T =
if s.len == 0: return T(0)
result = s[0]
for x in s:
result = max(x, result)
proc min[T](s: VectorLike[T]): T =
if s.len == 0: return T(0)
result = s[0]
for x in s:
result = min(x, result)
## newLike needs this weird signature, because otherwise we get a `newLike` leads to
## an ambiguous overload error
proc newLike[T: Tensor](dtype: typedesc[T], size: int): T =
result = newTensor[getSubType(T)](size)
proc newLike[T: seq](dtype: typedesc[T], size: int): T =
result = newSeq[getSubType(T)](size)
proc cumProd*[T](x: VectorLike[T]): VectorLike[T] =
## cumulative product for each element of ``x``
##
## ``cumProd(@[1,2,3,4])`` produces ``@[1,2,6,24]``
result = newLike(type(x), x.len)
var cp = T(1)
for i in 0..<x.len:
cp = cp * x[i]
result[i] = cp
proc cumSum*[T](x: VectorLike[T]): VectorLike[T] =
## cumulative sum for each element of ``x``
##
## ``cumSum(@[1,2,3,4])`` produces ``@[1,3,6,10]``
result = newLike(type(x), x.len)
var cp = T(0)
for i in 0..<x.len:
cp = cp + x[i]
result[i] = cp
proc cumCount*[T: SomeInteger](x: VectorLike[T], v: T): VectorLike[T] =
## cumulative count of a number in ``x``
##
## the cumulative count of ``3`` for ``@[1,3,3,2,3]`` produces ``@[0,1,2,2,3]``
result = newLike(type(x), x.len)
var cp = T(0)
for i in 0..<x.len:
if x[i] == v: inc(cp)
result[i] = cp
proc cumPowSum*[T](x: VectorLike[T], p: T): VectorLike[float] =
## cumulative sum of ``pow(x[], p)`` for each element
## The resultant sequence is of type ``float``
##
## ``cumPowSum([1,2,3,4],2)`` produces ``@[1, 5, 14, 30]``
result = newLike[VectorLike[float]](x.len)
var cps = 0.0
for i in 0..<x.len:
cps += pow(x[i].toFloat, p.toFloat)
result[i] = cps
# ----------- single-result seq math -----------------------
proc product*[T](x: VectorLike[T]): T =
## sum each element of ``x``
## returning a single value
##
## ``product(@[1,2,3,4])`` produces ``24`` (= 1 * 2 * 3 * 4)
var cp = T(1)
for i in 0..<x.len: cp *= x[i]
result = cp
proc sumSquares*[T](x: VectorLike[T]): T =
## sum of ``x[i] * x[i]`` for each element
## returning a single value
##
## ``sumSquares(@[1,2,3,4])``
## produces ``30`` (= 1*1 + 2*2 + 3*3 + 4*4)
var ps = T(0)
for i in items(x):
ps += i*i
result = ps
proc powSum*[T](x: VectorLike[T], p: T): float =
## sum of ``pow(x[], p)`` of each element
## returning a single value
##
## ``powSum(@[1,2], 3)``
## produces ``9`` (= pow(1,3) + pow(2,3))
var ps = 0.0
for i in 0..<x.len: ps += pow(x[i].toFloat, p.toFloat)
result = ps
proc max*[T](x: VectorLike[T], m: T): VectorLike[T] =
## Maximum of each element of ``x`` compared to the value ``m``
## as a sequence
##
## ``max(@[-1,-2,3,4], 0)`` produces ``@[0,0,3,4]``
if x.len == 0: result = @[m]
else:
result = newLike(type(x), x.len)
for i in 0..<x.len:
result[i] = max(m, x[i])
proc argmax*[T](x: VectorLike[T]): int =
let m = max(x)
for i, el in x:
if el == m:
return i
proc argmin*[T](x: VectorLike[T]): int =
let m = min(x)
for i, el in x:
if el == m:
return i
proc max*[T](x, y: VectorLike[T]): VectorLike[T] =
## Note: previous definition using an VectorLike as the type
## does not work anymore, since it clashes with with
## system.max[T](x, y: T) now
## Maximum value of each element of ``x`` and
## ``y`` respectively, as a sequence.
##
## ``max(@[-1,-2,3,4], @[4,3,2,1])`` produces ``@[4,3,3,4]``
if x.len == 0: result = @[]
else:
result = newLike(type(x), x.len)
let xLen = max(x.len, y.len)
let nlen = min(x.len, y.len)
for i in 0..<xLen:
if i < nlen: result[i] = max(x[i], y[i])
elif i < x.len: result[i] = x[i]
else: result[i] = y[i]
proc min*[T](x: VectorLike[T], m: T): VectorLike[T] =
## Minimum of each element of ``x`` compared to the value ``m``
## as a sequence
##
## ``min(@[1,2,30,40], 10)`` produces ``@[1,2,10,10]``
if x.len == 0: result = @[m]
else:
result = newLike(type(x), x.len)
for i in 0..<x.len:
result[i] = min(m, x[i])
proc min*[T](x, y: VectorLike[T]): VectorLike[T] =
## Note: previous definition using an VectorLike as the type
## does not work anymore, since it clashes with with
## system.min[T](x, y: T) now
## Minimum value of each element of ``x`` and
## ``y`` respectively, as a sequence.
##
## ``min(@[-1,-2,3,4], @[4,3,2,1])`` produces ``@[-1,-2,2,1]``
if x.len == 0: result = newLike(type(x), x.len)
else:
result = newLike(type(x), x.len)
let xLen = max(x.len, y.len)
let nlen = min(x.len, y.len)
for i in 0..<xLen:
if i < nlen: result[i] = min(x[i], y[i])
elif i < x.len: result[i] = x[i]
else: result[i] = y[i]
proc bincount*(x: VectorLike[int], minLength: int): VectorLike[int] =
## Count of the number of occurrences of each value in
## sequence ``x`` of non-negative ints.
##
## The result is a sequence of length ``max(x)+1``
## or ``minLength`` if it is larger than ``max(x)``.
## Covering every integer from ``0`` to
## ``max(max(x), minLength)``
doAssert min(x) >= 0, "Negative values are not allowed in bincount!"
let size = max(max(x) + 1, minLength)
result = newLike(type(x), size)
for idx in x:
inc(result[idx])
proc bincount*[T](x: VectorLike[int], minLength: int,
weights: VectorLike[T]): VectorLike[T] =
## version of `bincount` taking into account weights. The resulting dtype is
## the type of the given weights.
doAssert min(x) >= 0, "Negative values are not allowed in bincount!"
let size = max(max(x) + 1, minLength)
result = newLike(type(x), size)
doAssert weights.len == x.len or weights.len == 0
if weights.len > 0:
for wIdx, rIdx in x:
result[rIdx] += weights[wIdx]
else:
for wIdx, rIdx in x:
result[rIdx] += 1
proc digitize*[T](x: VectorLike[T], bins: VectorLike[T], right = false): VectorLike[int] =
## Return the indices of the ``bins`` to which each value of ``x`` belongs.
##
## Each returned index for *increasing ``bins``* is ``bins[i-1]<=x< bins[i]``
## and if ``right`` is true, then returns ``bins[i-1]<x<=bins[i]``
##
## Each returned index for *decreasing ``bins``* is ``bins[i-1] > x >= bins[i]``
## and if ``right`` is true, then returns ``bins[i-1] >= x > bins[i]``
##
## Note: if ``x`` has values outside of ``bins``, then ``digitize`` returns an index
## outside the range of ``bins`` (``0`` or ``bins.len``)
doAssert(bins.len > 1,"digitize() must have two or more bin values")
result = newLike[VectorLike[int]](x.len)
# default of increasing bin values
for i in 0..<x.len:
result[i] = bins.high + 1
if bins[1] > bins[0]:
for k in 0..<bins.len:
if x[i] < bins[k] and not right:
result[i] = k
break
elif x[i] <= bins[k] and right:
result[i] = k
break
#decreasing bin values
else:
for k in 0..<bins.len:
if x[i] >= bins[k] and not right:
result[i] = k
break
elif x[i] > bins[k] and right:
result[i] = k
break
func areBinsUniform(bin_edges: VectorLike[float]): bool =
## simple check whether bins are uniform
if bin_edges.len in {0, 1, 2}: return true
else:
let binWidth = bin_edges[1] - bin_edges[0]
for i in 0 ..< bin_edges.high:
if abs((bin_edges[i+1] - bin_edges[i]) - binWidth) > 1e-8:
return false
proc rebin*[T](bins: VectorLike[T], by: int): VectorLike[T] =
## Given a set of `bins`, `rebin` combines each consecutive `by` bins
## into a single bin, summing the bin contents. The resulting seq thus
## has a length of `bins.lev div by`.
## TODO: add tests for this!
result = newLike(type(bins), bins.len div by)
var tmp = T(0)
var j = 0
for i in 0 .. bins.high:
if i > 0 and i mod by == 0:
result[j] = tmp
tmp = T(0)
inc j
tmp += bins[i]
proc fillHist*[T](bins: VectorLike[T], data: VectorLike[T],
upperRangeBinRight = true): VectorLike[int] =
## Given a set of `bins` (which are interpreted to correspond to the left
## bin edge!) and a sequence of `data`, it will fill a histogram according
## to the `bins`. That is for each element `d` of `data` the correct bin
## `b` is checked and this bin is increased by `1`.
## If `upperRangeBinRight` is true, the last bin entry is considered the right
## edge of the last bin. All values larger than it will be dropped. Otherwise
## the last bin includes everything larger than bins[^1].
## TODO: write tests for this!
var mbins = bins
if not upperRangeBinRight:
# add `inf` bin to `mbins` as upper range
mbins.add Inf
result = newLike[VectorLike[int]](mbins.len - 1)
let dataSorted = data.sorted
var
curIdx = 0
curBin = mbins[curIdx]
nextBin = mbins[curIdx + 1]
idx = 0
d: T
while idx < dataSorted.len:
d = dataSorted[idx]
if d >= curBin and d < nextBin:
inc result[curIdx]
inc idx
elif d < curBin:
# outside of range
inc idx
elif d >= nextBin:
inc curIdx
if curIdx + 1 == mbins.len: break
curBin = mbins[curIdx]
nextBin = mbins[curIdx + 1]
else:
doAssert false, "should never happen!"
proc histogramImpl*[T; U: float | int](
x: VectorLike[T],
dtype: typedesc[U],
bins: (int | string | VectorLike[T]) = 10,
range: tuple[mn, mx: float] = (0.0, 0.0),
weights: VectorLike[dtype] = @[],
density: static bool = false,
upperRangeBinRight = true): (VectorLike[dtype], VectorLike[float]) =
## Compute the histogram of a set of data. Adapted from Numpy's code.
## If `bins` is an integer, the required bin edges will be calculated in the
## range `range`. If no `range` is given, the `(min, max)` of `x` will be taken.
## If `bins` is a `seq[T]`, the bin edges are taken as is. Note however, that
## the bin edges must include both the left most, as well as the right most
## bin edge. Thus the length must be `numBins + 1` relative to the desired number
## of bins of the resulting histogram!
## The behavior of range can be set via `upperRangeBinRight`. It controls the
## interpretation of the upper range. If it is `true` the upper range is considered
## the right edge of the last bin. If `false` we understand it as the left bin edge
## of the last bin. This of course only has an effect if `bins` is given as an
## integer!
## Returns a tuple of
## - histogram: seq[dtype] = the resulting histogram binned via `bin_edges`. `dtype`
## is `int` for unweighted histograms and `float` for float weighted histograms
## - bin_edges: seq[T] = the bin edges used to create the histogram
if x.len == 0:
raise newException(ValueError, "Cannot compute histogram of empty array!")
if weights.len > 0 and weights.len != x.len:
raise newException(ValueError, "The number of weights needs to be equal to the number of elements in the input seq!")
var uniformBins = true # are bins uniform?
# parse the range parameter
var (mn, mx) = range
if anyIt(@[mn, mx], classify(it) == fcNaN):
raise newException(ValueError, "One of the input ranges is NaN!")
elif anyIt(@[mn, mx], classify(it) in {fcInf, fcNegInf}):
raise newException(ValueError, "One of the input ranges is Inf!")
if mn == 0.0 and mx == 0.0:
mn = x.min.float
mx = x.max.float
if mn > mx:
raise newException(ValueError, "Max range must be larger than min range!")
elif mn == mx:
mn -= 0.5
mx += 0.5
# from here on mn, mx unchanged
when type(bins) is string:
# to be implemented to guess the number of bins from different algorithms. Looking at the Numpy code
# for the implementations it's only a few LoC
raise newException(NotImplementedError, "Automatic choice of number of bins based on different " &
"algorithms not implemented yet.")
elif type(bins) is VectorLike[T]:
let bin_edges = bins.mapIt(it.float)
let numBins = bin_edges.len - 1
# possibly truncate the input range (e.g. bin edges smaller range than data)
mn = min(bin_edges[0], mn)
mx = min(bin_edges[^1], mx)
# check if bins really uniform
uniformBins = areBinsUniform(bin_edges)
elif type(bins) is int:
if bins == 0:
raise newException(ValueError, "0 bins is not a valid number of bins!")
let numBins = bins
var bin_edges: VectorLike[float]
if upperRangeBinRight:
bin_edges = linspace(mn, mx, numBins + 1, endpoint = true)
else:
let binWidth = (mx - mn) / (numBins.float - 1)
bin_edges = linspace(mn, mx + binWidth, numBins + 1, endpoint = true)
when T isnot float:
var x_data = mapIt(@x, it.float)
# redefine locally as floats
var weights = weights.mapIt(it.float)
else:
var x_data = @x
# weights already float too, redefine mutable
var weights = @weights
if uniformBins:
# normalization
let norm = numBins.float / (mx - mn)
# make sure input array is float and filter to all elements inside valid range
# x_keep is used to calculate the indices whereas x_data is used for hist calc
let idxData = toSeq(0 ..< x_data.len)
let idxKeep = idxData.filterIt(x_data[it] >= mn and x_data[it] <= mx)
var x_keep = idxKeep.mapIt(x_data[it])
x_data = x_keep
# apply to weights if any
if weights.len > 0:
weights = idxKeep.mapIt(weights[it])
# remove potential offset
for x in mitems(x_keep):
x = (x - mn) * norm
# compute bin indices
var indices = mapIt(x_keep, it.int)
# for indices which are equal to the max value, subtract 1
indices.apply do (it: int) -> int:
if it == numBins:
it - 1
else:
it
# since index computation not guaranteed to give exactly consistent results within
# ~1 ULP of the bin edges, decrement some indices
let decrement = x_data < bin_edges[indices]
for i in 0 .. indices.high:
if decrement[i] == true:
indices[i] -= 1
if x_data[i] >= bin_edges[indices[i] + 1] and indices[i] != (numBins - 1):
indices[i] += 1
# currently weights and min length not implemented for bincount
when dtype is int:
result = (bincount(indices, minLength = numBins), bin_edges)
else:
result = (bincount(indices, minLength = numBins,
weights = weights),
bin_edges)
else:
# bins are not uniform
doAssert weights.len == 0, "Weigths are currently unsupported for histograms with " &
"unequal bin widths!"
let hist = fillHist(bin_edges, x_data,
upperRangeBinRight = upperRangeBinRight)
when dtype is float:
result = (hist.mapIt(it.float),
bin_edges)
else:
result = (hist,
bin_edges)
when dtype is float:
if density:
# normalize the result
let tot = result[0].sum
for i in 0 ..< bin_edges.high:
result[0][i] = result[0][i] / (bin_edges[i+1] - bin_edges[i]) / tot
else:
if density:
raise newException(ValueError, "Cannot satisfy `density == true` with a " &
"dtype of `int`!")
proc histogram*[T](
x: VectorLike[T],
bins: (int | string | VectorLike[T]) = 10,
range: tuple[mn, mx: float] = (0.0, 0.0),
density: static bool = false,
upperRangeBinRight = true,
dtype: typedesc = int): (VectorLike[dtype], VectorLike[float]) =
## Computes the histogram of `x` given `bins` in the desired
## `range`.
## Right most bin edge by default is assumed to be right most
## bin edge, can be changed via `upperRangeBinRight`. If weights
## are desired, see the `histogram` overload below. For a more
## detailed docstring see `histogramImpl`.
##
## `density` is a static argument, because we a density normalized
## histogram returns float values, whereas a normal histogram is
## a sequence of ints.
when density:
# when density is to be returned, result must be float
type dtype = float
else:
type dtype = int
result = histogramImpl(x = x,
dtype = dtype,
bins = bins,
range = range,
density = density,
upperRangeBinRight = upperRangeBinRight)
proc histogram*[T; U: float | int](
x: VectorLike[T],
weights: VectorLike[U],
bins: (int | string | VectorLike[T]) = 10,
range: tuple[mn, mx: float] = (0.0, 0.0),
density: static bool = false,
upperRangeBinRight = true): (VectorLike[U], VectorLike[float]) =
## Computes the histogram of `x` given `bins` in the desired
## `range`.
## Right most bin edge by default is assumed to be right most
## bin edge, can be changed via `upperRangeBinRight`. If weights
## are not desired, see the `histogram` overload above. For a more
## detailed docstring see `histogramImpl`.
##
## `density` is a static argument, because we a density normalized
## histogram returns float values, whereas a normal histogram is
## a sequence of ints.
when density:
type dtype = float
else:
type dtype = U
result = histogramImpl(x = x,
dtype = dtype,
bins = bins,
range = range,
weights = weights,
density = density,
upperRangeBinRight = upperRangeBinRight)
let x = @[1, 2, 3, 4, 5]
let y = linspace(0.0, 10.0, 20)
let yInt = arange(1, 6, 1)
echo max(x)
echo max(y)
echo cumSum(x)
echo cumSum(y)
echo cumProd(x)
echo cumProd(y)
echo cumCount(x, 1)
echo cumCount(yInt, 1)
echo product(x)
echo product(yInt)
echo sumSquares(x)
echo sumSquares(yInt)
echo argmax(x)
echo argmax(yInt)
echo argmin(x)
echo argmin(yInt)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment