Skip to content

Instantly share code, notes, and snippets.

@mratsim
Created May 3, 2018 08:18
Show Gist options
  • Select an option

  • Save mratsim/6c49a521f7814267827974effee36288 to your computer and use it in GitHub Desktop.

Select an option

Save mratsim/6c49a521f7814267827974effee36288 to your computer and use it in GitHub Desktop.
Copy-on-Write Tensors
# Copyright 2017 Mamy André-Ratsimbazafy
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import ./backend/metadataArray,
./backend/storage,
nimblas
export nimblas.OrderType
type
Backend*{.deprecated.}= enum
## ``Backend`` for tensor computation and memory allocation.
##
##
## Only deprecated procs from v0.1.3 uses this for the moment.
Cpu,
Cuda
Tensor*[T] = object
## Tensor data structure stored on Cpu
## - ``shape``: Dimensions of the tensor
## - ``strides``: Numbers of items to skip to get the next item along a dimension.
## - ``offset``: Offset to get the first item of the Tensor. Note: offset can be negative, in particular for slices.
## - ``storage``: An "opaque" CpuStorage datatype that holds the actual data + a reference counter.
## Data is accessible via the ".data" accessor.
## Fields are public so that external libraries can easily construct a Tensor.
shape*: MetadataArray
strides*: MetadataArray
offset*: int
storage*: CpuStorage[T]
CudaTensor*[T: SomeReal] = object
## Tensor data structure stored on Nvidia GPU (Cuda)
## - ``shape``: Dimensions of the tensor
## - ``strides``: Numbers of items to skip to get the next item along a dimension.
## - ``offset``: Offset to get the first item of the Tensor. Note: offset can be negative, in particular for slices.
## - ``data``: A cuda seq-like object that points to the data location
## Note: currently ``=`` assignement for CudaTensor does not copy. Both CudaTensors will share a view of the same data location.
## Modifying the data in one will modify the data in the other.
##
## In the future CudaTensor will leverage Nim compiler to automatically
## copy if a memory location would be used more than once in a mutable manner.
shape*: MetadataArray
strides*: MetadataArray
offset*: int
data*: CudaSeq[T] # Memory on Cuda device will be automatically garbage-collected
AnyTensor*[T] = Tensor[T] or CudaTensor[T]
# #############
# Copy-on-write
# #############
proc dataFrom*[T](t: var Tensor[T], s: seq[T]) {.inline, noSideEffect.}=
# Safely change the old storage to a new reference.
# It relies on Nim garbage collector for cleanup when needup.
#
# Note: this only works without races if only the main thread can access this.
# Also increment is only done on assignment, slices do not increment.
var tmp_store: CpuStorage[T]
new tmp_store
initRef tmp_store
tmp_store.Fdata = s
swap(t.storage, tmp_store)
decRef tmp_store
proc detach*[T](t: var Tensor[T]) {.inline, noSideEffect.}=
# Create a new storage copy if more than
# one tensor alread refer to the storage.
if t.storage.isUniqueRef:
return
dataFrom(t, t.storage.Fdata)
proc `=`*[T](dst: var Tensor[T]; src: Tensor[T]) {.inline, noSideEffect.}=
# Assignment overload to track reference count.
# Note: only `let`, `var` and assignment to a var triggers refcounting
# result = expression or function parameter passing will not.
incRef src.storage
system.`=`(dst, src)
## Use --newruntime with Arraymancer
# {.experimental.}
# proc `=destroy`*[T](c: Tensor[T]) {.inline, noSideEffect.}=
# # Automatically called on tensor destruction. It will decrease
# # the reference count on the shared storage
# decRef c.storage
# ###############
# Field accessors
# ###############
proc data*[T](t: Tensor[T]): seq[T] {.inline,noInit.} =
# Get tensor raw data
# This is intended for library writer
shallowCopy(result, t.storage.Fdata)
proc data*[T](t: var Tensor[T]): var seq[T] {.inline,noInit.} =
# Get mutable tensor raw data
# This is intended for library writer
shallowCopy(result, t.storage.Fdata)
proc `data=`*[T](t: var Tensor[T], s: seq[T]) {.inline, noSideEffect.}=
# Set tensor raw data
# This is intended for library writer
dataFrom[T](t, s)
# Copyright 2017 Mamy André-Ratsimbazafy
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
type
CpuStorage*[T] = ref object
## This implements a opaque refcounted storage for copy-on-write.
## Data is shared between multiple tensors as long as none modifies the data.
## If a mutation is needed, it is done in-place if the Tensor is the only one referring to this storage.
## Otherwise the mutator copies the data and refers to his own copy.
##
## Note: Do not use the exposed Fdata, this is only exposed for transition.
Frefcount: int
Fdata*: seq[T] # This should only be visible internally
CudaSeq* [T: SomeReal] = object
## Seq-like structure on the Cuda backend.
##
## Nim garbage collector will automatically ask cuda to clear GPU memory if ``data`` becomes unused.
##
## Warning ⚠: This will be revamped, renamed and kept private before 0.3.0 for copy-on-write semantics on Cuda.
len*: int
data*: ref[ptr UncheckedArray[T]]
# Note, non-CPU storage are also forward-declared here so that AnyTensor
# is always Tensor + CudaTensor + ...
# The implementation requires cudaMalloc / cudaFree and cannot be done in this file
# as it is also imported on for non-Cuda targets.
proc incRef*(store: CpuStorage){.inline.}=
if not store.isNil: # If a tensor is in a wrapper like the autograd Variable it may not be initialized
inc store.Frefcount
proc decRef*(store: CpuStorage){.inline.}=
if not store.isNil: # We may swap unitialized storage with initialized storage and destroy it after
dec store.Frefcount
proc initRef*(store: CpuStorage){.inline.}=
store.Frefcount = 0 # Start at 0, first assignation will bring that to 1
proc isUniqueRef*(store: CpuStorage): bool {.inline.}=
store.Frefcount <= 1 # Results from proc may have a refcount of 0 for a fresh storage.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment