This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
struct LaunchConvOp<GPUDevice, T> { | |
static void launch(OpKernelContext* ctx, bool use_cudnn, const Tensor& input_param, const Tensor& filter, int stride, const Eigen::PaddingType& padding, Tensor* output) { | |
auto* stream = ctx->op_device_context<GPUDeviceContext>()->stream(); | |
// First, we check if the CUDA platform is registered, and fall back to using Eigen on the GPU if not. | |
if (use_cudnn) { | |
Tensor input = input_param; | |
if (filter.dim_size(0) == 1 && filter.dim_size(1) == 1) { | |
// ... 1x1 filter, so call cublas directly ... | |
bool blas_launch_status = stream->ThenBlasGemm(no_transpose, no_transpose, n, m, k, 1.0f, b_ptr, n, a_ptr, k, 0.0f, &c_ptr, n).ok(); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
template <typename T> | |
struct LaunchConvOp<GPUDevice, T> { | |
static void launch(OpKernelContext* ctx, bool use_cudnn, const Tensor& input_param, const Tensor& filter, int stride, const Eigen::PaddingType& padding, Tensor* output) { | |
auto* stream = ctx->op_device_context<GPUDeviceContext>()->stream(); | |
OP_REQUIRES(ctx, stream, errors::Internal("No GPU stream available.")); | |
// There's branching here for three separate paths. | |
// First, we check if the CUDA platform is registered, and fall back to using Eigen on the GPU if not. | |
if (use_cudnn) { | |
Tensor input = input_param; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
struct LaunchMatMul<GPUDevice, T, true /* USE_CUBLAS */> { | |
static void launch(OpKernelContext* ctx, OpKernel* kernel, const Tensor& a, const Tensor& b, const Eigen::array<Eigen::IndexPair<Eigen::DenseIndex>, 1>& dim_pair, Tensor* out) { | |
const uint64 m = a.dim_size(1 - dim_pair[0].first); | |
const uint64 k = a.dim_size(dim_pair[0].first); | |
const uint64 n = b.dim_size(1 - dim_pair[0].second); | |
// .. options for transposing the input matrices to the format cuBLAS expects ... | |
// Get a Stream for this GPUDevice | |
auto* stream = ctx->op_device_context<GPUDeviceContext>()->stream(); | |
OP_REQUIRES(ctx, stream, errors::Internal("No GPU stream available.")); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Load GPU kernel | |
gpu::StreamExecutor stream_exec{PlatformKind::kCuda}; | |
gcudacc::kernel::MyKernel my_kernel{&stream_exec}; | |
bool ok = stream_exec.GetKernel(gcudacc::spec::MyKernelSpec(), &my_kernel); | |
if (!ok) { ... } | |
// Allocate some CPU memory for the output from the GPU | |
gpu::DeviceMemory<int> result = stream_exec.AllocateZeroed<int>(); | |
if (result == nullptr) { ... } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class MatMulOp : public OpKernel { | |
public: | |
explicit MatMulOp(OpKernelConstruction* ctx) : OpKernel(ctx) { | |
OP_REQUIRES_OK(ctx, ctx->GetAttr("transpose_a", &transpose_a_)); | |
OP_REQUIRES_OK(ctx, ctx->GetAttr("transpose_b", &transpose_b_)); | |
} | |
void Compute(OpKernelContext* ctx) override { | |
const Tensor& a = ctx->input(0); | |
const Tensor& b = ctx->input(1); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// TODO(jeff,sanjay): Session tests | |
// . Create and delete | |
// . Extend graph | |
// . Run |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
>>> import tensorflow as tf | |
>>> a = tf.Variable(tf.zeros([784, 10], name='a') | |
>>> print str(tf.get_default_graph().as_graph_def()) | |
node { | |
name: "a" | |
op: "Const" | |
attr { | |
key: "dtype" | |
value { | |
type: DT_FLOAT |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# load training and test data from disk | |
import input_data | |
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) | |
# describe model | |
import tensorflow as tf | |
x = tf.placeholder(tf.float32, [None, 784]) | |
W = tf.Variable(tf.zeros([784, 10])) | |
b = tf.Variable(tf.zeros([10])) | |
y = tf.nn.softmax(tf.matmul(x, W) + b) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
src_device: /job:host_a/replica:0/task:0/cpu:0; | |
src_incarnation: 82; | |
dst_device: /job:host_b/replica:0/task:0/cpu:3; | |
tensor_name: a; | |
frame_id: 0 | |
iter_id: 0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
TEST_F(GraphPartitionTest, CrossDeviceData) { | |
using namespace ::tensorflow::ops; // NOLINT(build/namespaces) | |
Node* a1 = Input(in_.opts().WithName("A1")); | |
Node* b1 = Input(in_.opts().WithName("B1")); | |
Cross(a1, b1, in_.opts().WithName("B2")); | |
Partition(ToGraphDef(), &partitions_); | |
EXPECT_EQ(2, partitions_.size()); | |
// ... there’s some addition assertions, but they require a bit of unwinding |