kevinrobinson’s gists

kevinrobinson / conv2d_gpu_simplified.cc

Created December 16, 2015 19:37

	struct LaunchConvOp<GPUDevice, T> {
	static void launch(OpKernelContext* ctx, bool use_cudnn, const Tensor& input_param, const Tensor& filter, int stride, const Eigen::PaddingType& padding, Tensor* output) {
	auto* stream = ctx->op_device_context<GPUDeviceContext>()->stream();

	// First, we check if the CUDA platform is registered, and fall back to using Eigen on the GPU if not.
	if (use_cudnn) {
	Tensor input = input_param;
	if (filter.dim_size(0) == 1 && filter.dim_size(1) == 1) {
	// ... 1x1 filter, so call cublas directly ...
	bool blas_launch_status = stream->ThenBlasGemm(no_transpose, no_transpose, n, m, k, 1.0f, b_ptr, n, a_ptr, k, 0.0f, &c_ptr, n).ok();

kevinrobinson / conv_2d.cc

Last active December 16, 2015 18:51

	template <typename T>
	struct LaunchConvOp<GPUDevice, T> {
	static void launch(OpKernelContext* ctx, bool use_cudnn, const Tensor& input_param, const Tensor& filter, int stride, const Eigen::PaddingType& padding, Tensor* output) {
	auto* stream = ctx->op_device_context<GPUDeviceContext>()->stream();
	OP_REQUIRES(ctx, stream, errors::Internal("No GPU stream available."));

	// There's branching here for three separate paths.
	// First, we check if the CUDA platform is registered, and fall back to using Eigen on the GPU if not.
	if (use_cudnn) {
	Tensor input = input_param;

kevinrobinson / matmul_gpu_op.cc

Last active December 15, 2015 22:31

	struct LaunchMatMul<GPUDevice, T, true /* USE_CUBLAS */> {
	static void launch(OpKernelContext* ctx, OpKernel* kernel, const Tensor& a, const Tensor& b, const Eigen::array<Eigen::IndexPair<Eigen::DenseIndex>, 1>& dim_pair, Tensor* out) {
	const uint64 m = a.dim_size(1 - dim_pair[0].first);
	const uint64 k = a.dim_size(dim_pair[0].first);
	const uint64 n = b.dim_size(1 - dim_pair[0].second);
	// .. options for transposing the input matrices to the format cuBLAS expects ...

	// Get a Stream for this GPUDevice
	auto* stream = ctx->op_device_context<GPUDeviceContext>()->stream();
	OP_REQUIRES(ctx, stream, errors::Internal("No GPU stream available."));

kevinrobinson / stream_executor_example.cc

Created December 15, 2015 18:50

	// Load GPU kernel
	gpu::StreamExecutor stream_exec{PlatformKind::kCuda};
	gcudacc::kernel::MyKernel my_kernel{&stream_exec};
	bool ok = stream_exec.GetKernel(gcudacc::spec::MyKernelSpec(), &my_kernel);
	if (!ok) { ... }

	// Allocate some CPU memory for the output from the GPU
	gpu::DeviceMemory<int> result = stream_exec.AllocateZeroed<int>();
	if (result == nullptr) { ... }

kevinrobinson / matmult_opkernel.cc

Last active December 16, 2015 17:51

	class MatMulOp : public OpKernel {
	public:
	explicit MatMulOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
	OP_REQUIRES_OK(ctx, ctx->GetAttr("transpose_a", &transpose_a_));
	OP_REQUIRES_OK(ctx, ctx->GetAttr("transpose_b", &transpose_b_));
	}

	void Compute(OpKernelContext* ctx) override {
	const Tensor& a = ctx->input(0);
	const Tensor& b = ctx->input(1);

kevinrobinson / todo.cc

Created December 15, 2015 01:27

	// TODO(jeff,sanjay): Session tests
	// . Create and delete
	// . Extend graph
	// . Run

kevinrobinson / graph_def.py

Last active December 18, 2015 02:41

	>>> import tensorflow as tf
	>>> a = tf.Variable(tf.zeros([784, 10], name='a')
	>>> print str(tf.get_default_graph().as_graph_def())
	node {
	name: "a"
	op: "Const"
	attr {
	key: "dtype"
	value {
	type: DT_FLOAT

kevinrobinson / example.py

Last active January 7, 2016 17:13

	# load training and test data from disk
	import input_data
	mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

	# describe model
	import tensorflow as tf
	x = tf.placeholder(tf.float32, [None, 784])
	W = tf.Variable(tf.zeros([784, 10]))
	b = tf.Variable(tf.zeros([10]))
	y = tf.nn.softmax(tf.matmul(x, W) + b)

kevinrobinson / rendezvous_key.txt

Last active December 11, 2015 21:00

	src_device: /job:host_a/replica:0/task:0/cpu:0;
	src_incarnation: 82;
	dst_device: /job:host_b/replica:0/task:0/cpu:3;
	tensor_name: a;
	frame_id: 0
	iter_id: 0

kevinrobinson / graph_partition_test.cc

Last active August 6, 2017 19:03

	TEST_F(GraphPartitionTest, CrossDeviceData) {
	using namespace ::tensorflow::ops; // NOLINT(build/namespaces)
	Node* a1 = Input(in_.opts().WithName("A1"));
	Node* b1 = Input(in_.opts().WithName("B1"));
	Cross(a1, b1, in_.opts().WithName("B2"));

	Partition(ToGraphDef(), &partitions_);
	EXPECT_EQ(2, partitions_.size());

	// ... there’s some addition assertions, but they require a bit of unwinding

Kevin Robinson kevinrobinson