mratsim · May 3, 2018 08:18
diff --git a/data_structure.nim b/data_structure.nim
 # Copyright 2017 Mamy André-Ratsimbazafy
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.

 import  ./backend/metadataArray,
        ./backend/storage,
        nimblas

 export nimblas.OrderType

 type
  Backend*{.deprecated.}= enum
    ## ``Backend`` for tensor computation and memory allocation.
    ##
    ##
    ## Only deprecated procs from v0.1.3 uses this for the moment.
    Cpu,
    Cuda

  Tensor*[T] = object
    ## Tensor data structure stored on Cpu
    ##   - ``shape``: Dimensions of the tensor
    ##   - ``strides``: Numbers of items to skip to get the next item along a dimension.
    ##   - ``offset``: Offset to get the first item of the Tensor. Note: offset can be negative, in particular for slices.
    ##   - ``storage``: An "opaque" CpuStorage datatype that holds the actual data + a reference counter.
    ##                  Data is accessible via the ".data" accessor.
    ## Fields are public so that external libraries can easily construct a Tensor.
    shape*: MetadataArray
    strides*: MetadataArray
    offset*: int
    storage*: CpuStorage[T]

  CudaTensor*[T: SomeReal] = object
    ## Tensor data structure stored on Nvidia GPU (Cuda)
    ##   - ``shape``: Dimensions of the tensor
    ##   - ``strides``: Numbers of items to skip to get the next item along a dimension.
    ##   - ``offset``: Offset to get the first item of the Tensor. Note: offset can be negative, in particular for slices.
    ##   - ``data``: A cuda seq-like object that points to the data location
    ## Note: currently ``=`` assignement for CudaTensor does not copy. Both CudaTensors will share a view of the same data location.
    ## Modifying the data in one will modify the data in the other.
    ##
    ## In the future CudaTensor will leverage Nim compiler to automatically
    ## copy if a memory location would be used more than once in a mutable manner.
    shape*: MetadataArray
    strides*: MetadataArray
    offset*: int
    data*: CudaSeq[T] # Memory on Cuda device will be automatically garbage-collected

  AnyTensor*[T] = Tensor[T] or CudaTensor[T]

 # #############
 # Copy-on-write
 # #############

 proc dataFrom*[T](t: var Tensor[T], s: seq[T]) {.inline, noSideEffect.}=
  # Safely change the old storage to a new reference.
  # It relies on Nim garbage collector for cleanup when needup.
  #
  # Note: this only works without races if only the main thread can access this.
  # Also increment is only done on assignment, slices do not increment.

  var tmp_store: CpuStorage[T]
  new tmp_store

  initRef tmp_store
  tmp_store.Fdata = s

  swap(t.storage, tmp_store)

  decRef tmp_store

 proc detach*[T](t: var Tensor[T]) {.inline, noSideEffect.}=
  # Create a new storage copy if more than
  # one tensor alread refer to the storage.
  if t.storage.isUniqueRef:
    return

  dataFrom(t, t.storage.Fdata)

 proc `=`*[T](dst: var Tensor[T]; src: Tensor[T]) {.inline, noSideEffect.}=
  # Assignment overload to track reference count.
  # Note: only `let`, `var` and assignment to a var triggers refcounting
  # result = expression or function parameter passing will not.
  incRef src.storage
  system.`=`(dst, src)

 ## Use --newruntime with Arraymancer
 # {.experimental.}
 # proc `=destroy`*[T](c: Tensor[T]) {.inline, noSideEffect.}=
 #   # Automatically called on tensor destruction. It will decrease
 #   # the reference count on the shared storage
 #   decRef c.storage

 # ###############
 # Field accessors
 # ###############

 proc data*[T](t: Tensor[T]): seq[T] {.inline,noInit.} =
  # Get tensor raw data
  # This is intended for library writer
  shallowCopy(result, t.storage.Fdata)

 proc data*[T](t: var Tensor[T]): var seq[T] {.inline,noInit.} =
  # Get mutable tensor raw data
  # This is intended for library writer
  shallowCopy(result, t.storage.Fdata)

 proc `data=`*[T](t: var Tensor[T], s: seq[T]) {.inline, noSideEffect.}=
  # Set tensor raw data
  # This is intended for library writer
  dataFrom[T](t, s)
diff --git a/storage.nim b/storage.nim
 # Copyright 2017 Mamy André-Ratsimbazafy
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.

 type
  CpuStorage*[T] = ref object
    ## This implements a opaque refcounted storage for copy-on-write.
    ## Data is shared between multiple tensors as long as none modifies the data.
    ## If a mutation is needed, it is done in-place if the Tensor is the only one referring to this storage.
    ## Otherwise the mutator copies the data and refers to his own copy.
    ##
    ## Note: Do not use the exposed Fdata, this is only exposed for transition.
    Frefcount: int
    Fdata*: seq[T] # This should only be visible internally

  CudaSeq* [T: SomeReal] = object
    ## Seq-like structure on the Cuda backend.
    ##
    ## Nim garbage collector will automatically ask cuda to clear GPU memory if ``data`` becomes unused.
    ##
    ## Warning ⚠: This will be revamped, renamed and kept private before 0.3.0 for copy-on-write semantics on Cuda.
    len*: int
    data*: ref[ptr UncheckedArray[T]]

  # Note, non-CPU storage are also forward-declared here so that AnyTensor
  # is always Tensor + CudaTensor + ...
  # The implementation requires cudaMalloc / cudaFree and cannot be done in this file
  # as it is also imported on for non-Cuda targets.


 proc incRef*(store: CpuStorage){.inline.}=
  if not store.isNil: # If a tensor is in a wrapper like the autograd Variable it may not be initialized
    inc store.Frefcount

 proc decRef*(store: CpuStorage){.inline.}=
  if not store.isNil: # We may swap unitialized storage with initialized storage and destroy it after
    dec store.Frefcount

 proc initRef*(store: CpuStorage){.inline.}=
  store.Frefcount = 0 # Start at 0, first assignation will bring that to 1

 proc isUniqueRef*(store: CpuStorage): bool {.inline.}=
  store.Frefcount <= 1 # Results from proc may have a refcount of 0 for a fresh storage.
	# Copyright 2017 Mamy André-Ratsimbazafy
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	import ./backend/metadataArray,
	./backend/storage,
	nimblas

	export nimblas.OrderType

	type
	Backend*{.deprecated.}= enum
	## ``Backend`` for tensor computation and memory allocation.
	##
	##
	## Only deprecated procs from v0.1.3 uses this for the moment.
	Cpu,
	Cuda

	Tensor*[T] = object
	## Tensor data structure stored on Cpu
	## - ``shape``: Dimensions of the tensor
	## - ``strides``: Numbers of items to skip to get the next item along a dimension.
	## - ``offset``: Offset to get the first item of the Tensor. Note: offset can be negative, in particular for slices.
	## - ``storage``: An "opaque" CpuStorage datatype that holds the actual data + a reference counter.
	## Data is accessible via the ".data" accessor.
	## Fields are public so that external libraries can easily construct a Tensor.
	shape*: MetadataArray
	strides*: MetadataArray
	offset*: int
	storage*: CpuStorage[T]

	CudaTensor*[T: SomeReal] = object
	## Tensor data structure stored on Nvidia GPU (Cuda)
	## - ``shape``: Dimensions of the tensor
	## - ``strides``: Numbers of items to skip to get the next item along a dimension.
	## - ``offset``: Offset to get the first item of the Tensor. Note: offset can be negative, in particular for slices.
	## - ``data``: A cuda seq-like object that points to the data location
	## Note: currently ``=`` assignement for CudaTensor does not copy. Both CudaTensors will share a view of the same data location.
	## Modifying the data in one will modify the data in the other.
	##
	## In the future CudaTensor will leverage Nim compiler to automatically
	## copy if a memory location would be used more than once in a mutable manner.
	shape*: MetadataArray
	strides*: MetadataArray
	offset*: int
	data*: CudaSeq[T] # Memory on Cuda device will be automatically garbage-collected

	AnyTensor*[T] = Tensor[T] or CudaTensor[T]

	# #############
	# Copy-on-write
	# #############

	proc dataFrom*[T](t: var Tensor[T], s: seq[T]) {.inline, noSideEffect.}=
	# Safely change the old storage to a new reference.
	# It relies on Nim garbage collector for cleanup when needup.
	#
	# Note: this only works without races if only the main thread can access this.
	# Also increment is only done on assignment, slices do not increment.

	var tmp_store: CpuStorage[T]
	new tmp_store

	initRef tmp_store
	tmp_store.Fdata = s

	swap(t.storage, tmp_store)

	decRef tmp_store

	proc detach*[T](t: var Tensor[T]) {.inline, noSideEffect.}=
	# Create a new storage copy if more than
	# one tensor alread refer to the storage.
	if t.storage.isUniqueRef:
	return

	dataFrom(t, t.storage.Fdata)

	proc `=`*[T](dst: var Tensor[T]; src: Tensor[T]) {.inline, noSideEffect.}=
	# Assignment overload to track reference count.
	# Note: only `let`, `var` and assignment to a var triggers refcounting
	# result = expression or function parameter passing will not.
	incRef src.storage
	system.`=`(dst, src)

	## Use --newruntime with Arraymancer
	# {.experimental.}
	# proc `=destroy`*[T](c: Tensor[T]) {.inline, noSideEffect.}=
	# # Automatically called on tensor destruction. It will decrease
	# # the reference count on the shared storage
	# decRef c.storage

	# ###############
	# Field accessors
	# ###############

	proc data*[T](t: Tensor[T]): seq[T] {.inline,noInit.} =
	# Get tensor raw data
	# This is intended for library writer
	shallowCopy(result, t.storage.Fdata)

	proc data*[T](t: var Tensor[T]): var seq[T] {.inline,noInit.} =
	# Get mutable tensor raw data
	# This is intended for library writer
	shallowCopy(result, t.storage.Fdata)

	proc `data=`*[T](t: var Tensor[T], s: seq[T]) {.inline, noSideEffect.}=
	# Set tensor raw data
	# This is intended for library writer
	dataFrom[T](t, s)
No results found