Created
May 3, 2018 08:18
-
-
Save mratsim/6c49a521f7814267827974effee36288 to your computer and use it in GitHub Desktop.
Copy-on-Write Tensors
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Copyright 2017 Mamy André-Ratsimbazafy | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| import ./backend/metadataArray, | |
| ./backend/storage, | |
| nimblas | |
| export nimblas.OrderType | |
| type | |
| Backend*{.deprecated.}= enum | |
| ## ``Backend`` for tensor computation and memory allocation. | |
| ## | |
| ## | |
| ## Only deprecated procs from v0.1.3 uses this for the moment. | |
| Cpu, | |
| Cuda | |
| Tensor*[T] = object | |
| ## Tensor data structure stored on Cpu | |
| ## - ``shape``: Dimensions of the tensor | |
| ## - ``strides``: Numbers of items to skip to get the next item along a dimension. | |
| ## - ``offset``: Offset to get the first item of the Tensor. Note: offset can be negative, in particular for slices. | |
| ## - ``storage``: An "opaque" CpuStorage datatype that holds the actual data + a reference counter. | |
| ## Data is accessible via the ".data" accessor. | |
| ## Fields are public so that external libraries can easily construct a Tensor. | |
| shape*: MetadataArray | |
| strides*: MetadataArray | |
| offset*: int | |
| storage*: CpuStorage[T] | |
| CudaTensor*[T: SomeReal] = object | |
| ## Tensor data structure stored on Nvidia GPU (Cuda) | |
| ## - ``shape``: Dimensions of the tensor | |
| ## - ``strides``: Numbers of items to skip to get the next item along a dimension. | |
| ## - ``offset``: Offset to get the first item of the Tensor. Note: offset can be negative, in particular for slices. | |
| ## - ``data``: A cuda seq-like object that points to the data location | |
| ## Note: currently ``=`` assignement for CudaTensor does not copy. Both CudaTensors will share a view of the same data location. | |
| ## Modifying the data in one will modify the data in the other. | |
| ## | |
| ## In the future CudaTensor will leverage Nim compiler to automatically | |
| ## copy if a memory location would be used more than once in a mutable manner. | |
| shape*: MetadataArray | |
| strides*: MetadataArray | |
| offset*: int | |
| data*: CudaSeq[T] # Memory on Cuda device will be automatically garbage-collected | |
| AnyTensor*[T] = Tensor[T] or CudaTensor[T] | |
| # ############# | |
| # Copy-on-write | |
| # ############# | |
| proc dataFrom*[T](t: var Tensor[T], s: seq[T]) {.inline, noSideEffect.}= | |
| # Safely change the old storage to a new reference. | |
| # It relies on Nim garbage collector for cleanup when needup. | |
| # | |
| # Note: this only works without races if only the main thread can access this. | |
| # Also increment is only done on assignment, slices do not increment. | |
| var tmp_store: CpuStorage[T] | |
| new tmp_store | |
| initRef tmp_store | |
| tmp_store.Fdata = s | |
| swap(t.storage, tmp_store) | |
| decRef tmp_store | |
| proc detach*[T](t: var Tensor[T]) {.inline, noSideEffect.}= | |
| # Create a new storage copy if more than | |
| # one tensor alread refer to the storage. | |
| if t.storage.isUniqueRef: | |
| return | |
| dataFrom(t, t.storage.Fdata) | |
| proc `=`*[T](dst: var Tensor[T]; src: Tensor[T]) {.inline, noSideEffect.}= | |
| # Assignment overload to track reference count. | |
| # Note: only `let`, `var` and assignment to a var triggers refcounting | |
| # result = expression or function parameter passing will not. | |
| incRef src.storage | |
| system.`=`(dst, src) | |
| ## Use --newruntime with Arraymancer | |
| # {.experimental.} | |
| # proc `=destroy`*[T](c: Tensor[T]) {.inline, noSideEffect.}= | |
| # # Automatically called on tensor destruction. It will decrease | |
| # # the reference count on the shared storage | |
| # decRef c.storage | |
| # ############### | |
| # Field accessors | |
| # ############### | |
| proc data*[T](t: Tensor[T]): seq[T] {.inline,noInit.} = | |
| # Get tensor raw data | |
| # This is intended for library writer | |
| shallowCopy(result, t.storage.Fdata) | |
| proc data*[T](t: var Tensor[T]): var seq[T] {.inline,noInit.} = | |
| # Get mutable tensor raw data | |
| # This is intended for library writer | |
| shallowCopy(result, t.storage.Fdata) | |
| proc `data=`*[T](t: var Tensor[T], s: seq[T]) {.inline, noSideEffect.}= | |
| # Set tensor raw data | |
| # This is intended for library writer | |
| dataFrom[T](t, s) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Copyright 2017 Mamy André-Ratsimbazafy | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| type | |
| CpuStorage*[T] = ref object | |
| ## This implements a opaque refcounted storage for copy-on-write. | |
| ## Data is shared between multiple tensors as long as none modifies the data. | |
| ## If a mutation is needed, it is done in-place if the Tensor is the only one referring to this storage. | |
| ## Otherwise the mutator copies the data and refers to his own copy. | |
| ## | |
| ## Note: Do not use the exposed Fdata, this is only exposed for transition. | |
| Frefcount: int | |
| Fdata*: seq[T] # This should only be visible internally | |
| CudaSeq* [T: SomeReal] = object | |
| ## Seq-like structure on the Cuda backend. | |
| ## | |
| ## Nim garbage collector will automatically ask cuda to clear GPU memory if ``data`` becomes unused. | |
| ## | |
| ## Warning ⚠: This will be revamped, renamed and kept private before 0.3.0 for copy-on-write semantics on Cuda. | |
| len*: int | |
| data*: ref[ptr UncheckedArray[T]] | |
| # Note, non-CPU storage are also forward-declared here so that AnyTensor | |
| # is always Tensor + CudaTensor + ... | |
| # The implementation requires cudaMalloc / cudaFree and cannot be done in this file | |
| # as it is also imported on for non-Cuda targets. | |
| proc incRef*(store: CpuStorage){.inline.}= | |
| if not store.isNil: # If a tensor is in a wrapper like the autograd Variable it may not be initialized | |
| inc store.Frefcount | |
| proc decRef*(store: CpuStorage){.inline.}= | |
| if not store.isNil: # We may swap unitialized storage with initialized storage and destroy it after | |
| dec store.Frefcount | |
| proc initRef*(store: CpuStorage){.inline.}= | |
| store.Frefcount = 0 # Start at 0, first assignation will bring that to 1 | |
| proc isUniqueRef*(store: CpuStorage): bool {.inline.}= | |
| store.Frefcount <= 1 # Results from proc may have a refcount of 0 for a fresh storage. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment