taylanbil · July 24, 2020 15:21
diff --git a/dlrm-criteo-common-stack-trace.txt b/dlrm-criteo-common-stack-trace.txt
 mensions={0,1}, to_apply=%AddComputation.1114
 2020-07-24 05:41:09.472955: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] *** Begin stack trace ***
 2020-07-24 05:41:09.472964: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]
 2020-07-24 05:41:09.472969: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]   %compare.1123 = pred[] compare(s32[] %constant.1120, s32[] %constant.1121), direction=NE
 2020-07-24 05:41:09.472992: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] }
 2020-07-24 05:41:09.473015: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]   %constant.1122 = f32[] constant(1)
 2020-07-24 05:41:09.473003: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]   %constant.1119 = s32[] constant(16)
 2020-07-24 05:41:09.473007: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]        tensorflow::CurrentStackTrace[abi:cxx11]()
 2020-07-24 05:41:09.473010: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]
 2020-07-24 05:41:09.473021: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]
 2020-07-24 05:41:09.473026: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]   %convert.1124 = f32[] convert(s32[] %constant.1120)
 2020-07-24 05:41:09.473033: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]   %constant.1120 = s32[] constant(0)
 2020-07-24 05:41:09.473040: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]        xla::util::ReportComputationError(tensorflow::Status const&, absl::lts_2020_02_25::Span<xla::XlaComputation const* const>, absl::lts_2020_02_25::Span<xla::Shape const* const>)
 2020-07-24 05:41:09.473046: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]
 2020-07-24 05:41:09.473050: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]
 2020-07-24 05:41:09.473076: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]        xla::util::ShapeHash(xla::Shape const&)
 2020-07-24 05:41:09.473057: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]   %divide.1125 = f32[] divide(f32[] %constant.1122, f32[] %convert.1124)
 2020-07-24 05:41:09.473087: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]        xla::XrtComputationClient::ExecuteComputation(xla::ComputationClient::Computation const&, absl::lts_2020_02_25::Span<std::shared_ptr<xla::ComputationClient::Data> const>, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, xla::ComputationClient::ExecuteComputationOptions const&)
 2020-07-24 05:41:09.473065: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]   %compare.1122 = pred[] compare(s32[] %constant.1119, s32[] %constant.1120), direction=NE
 2020-07-24 05:41:09.473076: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]
 2020-07-24 05:41:09.473083: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] OutputShape: (f32[512,13]{0,1}, f32[512]{0}, f32[256,512]{1,0}, f32[256]{0}, f32[64,256]{1,0}, f32[64]{0}, f32[16,64]{1,0}, f32[16]{0}, f32[512,367]{1,0}, f32[512]{0}, f32[256,512]{1,0}, f32[256]{0}, f32[1,256]{1,0}, f32[1]{0}, f32[24,16]{0,1}, f32[27,16]{0,1}, f32[18,16]{0,1}, f32[1]{0}, f32[1,256]{1,0}, f32[256]{0}, f32[256,512]{1,0}, f32[512]{0}, f32[512,367]{1,0}, f32[16]{0}, f32[16,64]{1,0}, f32[64]{0}, f32[64,256]{1,0}, f32[256]{0}, f32[256,512]{1,0}, f32[512]{0}, f32[512,13]{0,1}, f32[24,16]{0,1}, f32[27,16]{0,1}, f32[18,16]{0,1}, f32[], f32[128,16]{0,1}, f32[128,16]{0,1}, f32[128,16]{0,1}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,1]{0,1}, f32[], f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0})
 2020-07-24 05:41:09.473093: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]   %constant.1126 = f32[] constant(nan)
 2020-07-24 05:41:09.473104: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]
 2020-07-24 05:41:09.473112: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]   %constant.1121 = f32[] constant(1)
 2020-07-24 05:41:09.473119: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]
 2020-07-24 05:41:09.473129: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]
 2020-07-24 05:41:09.473135: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]   %select.1127 = f32[] select(pred[] %compare.1123, f32[] %divide.1125, f32[] %constant.1126)
 2020-07-24 05:41:09.473139: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]
 2020-07-24 05:41:09.473165: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]   %multiply.1128 = f32[] multiply(f32[] %reduce.1119, f32[] %select.1127)
 2020-07-24 05:41:09.473170: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]
 2020-07-24 05:41:09.473148: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]   %convert.1123 = f32[] convert(s32[] %constant.1119)
 2020-07-24 05:41:09.473154: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]        clone
 2020-07-24 05:41:09.473160: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] StackTrace:
 2020-07-24 05:41:09.473176: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]   %p30.1099 = f32[] parameter(30)
 2020-07-24 05:41:09.473181: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]
 2020-07-24 05:41:09.473189: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]   %divide.1124 = f32[] divide(f32[] %constant.1121, f32[] %convert.1123)
 2020-07-24 05:41:09.473191: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] *** End stack trace ***
 2020-07-24 05:41:09.473196: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] *** Begin stack trace ***
 2020-07-24 05:41:09.473201: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]   %multiply.1129 = f32[] multiply(f32[] %multiply.1128, f32[] %p30.1099)
 2020-07-24 05:41:09.473203: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]
 2020-07-24 05:41:09.473212: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]   %constant.1125 = f32[] constant(nan)
 2020-07-24 05:41:09.473217: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]
 2020-07-24 05:41:09.473223: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]        tensorflow::CurrentStackTrace[abi:cxx11]()
 2020-07-24 05:41:09.473249: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] Status: Cancelled: From /job:tpu_worker/replica:0/task:0:
 2020-07-24 05:41:09.473228: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]   %constant.1098 = f32[] constant(1)
 2020-07-24 05:41:09.473234: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]        clone
 2020-07-24 05:41:09.473244: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]   %select.1126 = f32[] select(pred[] %compare.1122, f32[] %divide.1124, f32[] %constant.1125)
 2020-07-24 05:41:09.473256: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]        xla::util::ReportComputationError(tensorflow::Status const&, absl::lts_2020_02_25::Span<xla::XlaComputation const* const>, absl::lts_2020_02_25::Span<xla::Shape const* const>)
 2020-07-24 05:41:09.473260: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] Cancelled by TearDown.
 2020-07-24 05:41:09.473265: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]   %multiply.1130 = f32[] multiply(f32[] %multiply.1129, f32[] %constant.1098)
 2020-07-24 05:41:09.473275: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] *** End stack trace ***
 2020-07-24 05:41:09.473282: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]   %multiply.1127 = f32[] multiply(f32[] %reduce.1118, f32[] %select.1126)
 2020-07-24 05:41:09.473292: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]        xla::util::ShapeHash(xla::Shape const&)
 2020-07-24 05:41:09.473297: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]         [[{{node XRTExecute}}]]
 2020-07-24 05:41:09.473302: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]   %add.1132 = f32[] add(f32[] %p31.1131, f32[] %multiply.1130)
 2020-07-24 05:41:09.473308: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]
 2020-07-24 05:41:09.473315: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]   %p29.1098 = f32[] parameter(29)
 2020-07-24 05:41:09.473322: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]        xla::XrtComputationClient::ExecuteComputation(xla::ComputationClient::Computation const&, absl::lts_2020_02_25::Span<std::shared_ptr<xla::ComputationClient::Data> const>, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, xla::ComputationClient::ExecuteComputationOptions const&)
 2020-07-24 05:41:09.473334: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] Status: Cancelled: From /job:tpu_worker/replica:0/task:0:
 2020-07-24 05:41:09.473335: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]   ROOT %tuple.1133 = (f32[512,13]{0,1}, f32[512]{0}, f32[256,512]{1,0}, f32[256]{0}, f32[64,256]{1,0}, f32[64]{0}, f32[16,64]{1,0}, f32[16]{0}, f32[512,367]{1,0}, f32[512]{0}, f32[256,512]{1,0}, f32[256]{0}, f32[1,256]{1,0}, f32[1]{0}, f32[12517,16]{0,1}, f32[14992,16]{0,1}, f32[15,16]{1,0}, f32[1]{0}, f32[1,256]{1,0}, f32[256]{0}, f32[256,512]{1,0}, f32[512]{0}, f32[512,367]{1,0}, f32[16]{0}, f32[16,64]{1,0}, f32[64]{0}, f32[64,256]{1,0}, f32[256]{0}, f32[256,512]{1,0}, f32[512]{0}, f32[512,13]{1,0}, f32[12517,16]{1,0}, f32[14992,16]{1,0}, f32[15,16]{1,0}, f32[], f32[128,16]{1,0}, f32[128,16]{1,0}, f32[128,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,1]{1,0}, f32[], f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}) tuple(f32[512,13]{0,1} %add.750, f32[512]{0} %add.756, f32[256,512]{1,0} %add.762, f32[256]{0} %add.768, f32[64,256]{1,0} %add.774, f32[64]{0} %add.780, f32[16,64]{1,0} %add.786, f32[16]{0} %add.792, f32[512,367]{1,0} %add.798, f32[512]{0} %add.804, f32[256,512]{1,0} %add.810, f32[256]{0} %add.816, f32[1,256]{1,0} %add.822, f32[1]{0} %add.826, f32[12517,16]{0,1} %add.977, f32[14992,16]{0,1} %add.1037, f32[15,16]{1,0} %add.1097, f32[1]{0} %multiply.747, f32[1,256]{1,0} %multiply.743, f32[256]{0} %multiply.739, f32[256,512]{1,0} %multiply.735, f32[512]{0} %multiply.731, f32[512,367]{1,0} %multiply.727, f32[16]{0} %multiply.723, f32[16,64]{1,0} %multiply.719, f32[64]{0} %multiply.715, f32[64,256]{1,0} %multiply.711, f32[256]{0} %multiply.707, f32[256,512]{1,0} %multiply.703, f32[512]{0} %multiply.699, f32[512,13]{1,0} %multiply.695, f32[12517,16]{1,0} %add.975, f32[14992,16]{1,0} %add.1035, f32[15,16]{1,0} %add.1095, f32[] %add.1132, f32[128,16]{1,0} %gather.27, f32[128,16]{1,0} %gather.22, f32[128,16]{1,0} %gather.17, f32[16,16]{1,0} %reshape.187, f32[16,16]{1,0} %reshape.184, f32[16,16]{1,0} %reshape.181, f32[16,16]{1,0} %reshape.178, f32[16,16]{1,0} %reshape.175, f32[16,16]{1,0} %reshape.172, f32[16,16]{1,0} %reshape.169, f32[16,16]{1,0} %reshape.166, f32[16,16]{1,0} %reshape.163, f32[16,16]{1,0} %reshape.160, f32[16,16]{1,0} %reshape.157, f32[16,16]{1,0} %reshape.154, f32[16,16]{1,0} %reshape.151, f32[16,16]{1,0} %reshape.148, f32[16,16]{1,0} %reshape.145, f32[16,16]{1,0} %reshape.142, f32[16,16]{1,0} %reshape.139, f32[16,16]{1,0} %reshape.136, f32[16,16]{1,0} %reshape.133, f32[16,16]{1,0} %reshape.130, f32[16,16]{1,0} %reshape.127, f32[16,16]{1,0} %reshape.124, f32[16,16]{1,0} %reshape.121, f32[16,16]{1,0} %reshape.118, f32[16,16]{1,0} %reshape.115, f32[16,16]{1,0} %reshape.112, f32[16,1]{1,0} %add.277, f32[] %multiply.1128, f32[16,16]{1,0} %slice.892, f32[16,16]{1,0} %slice.890, f32[16,16]{1,0} %slice.888, f32[16,16]{1,0} %slice.886, f32[16,16]{1,0} %slice.884, f32[16,16]{1,0} %slice.882, f32[16,16]{1,0} %slice.880, f32[16,16]{1,0} %slice.878, f32[16,16]{1,0} %slice.876, f32[16,16]{1,0} %slice.874, f32[16,16]{1,0} %slice.872, f32[16,16]{1,0} %slice.870, f32[16,16]{1,0} %slice.868, f32[16,16]{1,0} %slice.866, f32[16,16]{1,0} %slice.864, f32[16,16]{1,0} %slice.862, f32[16,16]{1,0} %slice.860, f32[16,16]{1,0} %slice.858, f32[16,16]{1,0} %slice.856, f32[16,16]{1,0} %slice.854, f32[16,16]{1,0} %slice.852, f32[16,16]{1,0} %slice.850, f32[16,16]{1,0} %slice.848, f32[16,16]{1,0} %slice.846, f32[16,16]{1,0} %slice.844, f32[16,16]{1,0} %slice.842)
 2020-07-24 05:41:09.473343: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]   %multiply.1128 = f32[] multiply(f32[] %multiply.1127, f32[] %p29.1098)
 2020-07-24 05:41:09.473350: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]
 2020-07-24 05:41:09.473358: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] Cancelled by TearDown.
 2020-07-24 05:41:09.473382: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] }
 2020-07-24 05:41:09.473391: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]   %constant.1097 = f32[] constant(1)
 2020-07-24 05:41:09.473401: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]
 2020-07-24 05:41:09.473406: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]         [[{{node XRTExecute}}]]
 2020-07-24 05:41:09.473411: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]
 2020-07-24 05:41:09.473418: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]   %multiply.1129 = f32[] multiply(f32[] %multiply.1128, f32[] %constant.1097)
 2020-07-24 05:41:09.473428: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]
 2020-07-24 05:41:09.473453: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]   %add.1131 = f32[] add(f32[] %p30.1130, f32[] %multiply.1129)
 2020-07-24 05:41:09.473440: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]
 2020-07-24 05:41:09.473459: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]
 2020-07-24 05:41:09.473473: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] OutputShape: (f32[512,13]{0,1}, f32[512]{0}, f32[256,512]{1,0}, f32[256]{0}, f32[64,256]{1,0}, f32[64]{0}, f32[16,64]{1,0}, f32[16]{0}, f32[512,367]{1,0}, f32[512]{0}, f32[256,512]{1,0}, f32[256]{0}, f32[1,256]{1,0}, f32[1]{0}, f32[12517,16]{0,1}, f32[14992,16]{0,1}, f32[15,16]{1,0}, f32[1]{0}, f32[1,256]{1,0}, f32[256]{0}, f32[256,512]{1,0}, f32[512]{0}, f32[512,367]{1,0}, f32[16]{0}, f32[16,64]{1,0}, f32[64]{0}, f32[64,256]{1,0}, f32[256]{0}, f32[256,512]{1,0}, f32[512]{0}, f32[512,13]{0,1}, f32[12517,16]{0,1}, f32[14992,16]{0,1}, f32[15,16]{1,0}, f32[], f32[128,16]{0,1}, f32[128,16]{0,1}, f32[128,16]{0,1}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,1]{0,1}, f32[], f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0})
 2020-07-24 05:41:09.473474: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]   ROOT %tuple.1132 = (f32[512,13]{0,1}, f32[512]{0}, f32[256,512]{1,0}, f32[256]{0}, f32[64,256]{1,0}, f32[64]{0}, f32[16,64]{1,0}, f32[16]{0}, f32[512,367]{1,0}, f32[512]{0}, f32[256,512]{1,0}, f32[256]{0}, f32[1,256]{1,0}, f32[1]{0}, f32[2202608,16]{0,1}, f32[8351593,16]{0,1}, f32[4,16]{1,0}, f32[1]{0}, f32[1,256]{1,0}, f32[256]{0}, f32[256,512]{1,0}, f32[512]{0}, f32[512,367]{1,0}, f32[16]{0}, f32[16,64]{1,0}, f32[64]{0}, f32[64,256]{1,0}, f32[256]{0}, f32[256,512]{1,0}, f32[512]{0}, f32[512,13]{1,0}, f32[2202608,16]{1,0}, f32[8351593,16]{1,0}, f32[4,16]{1,0}, f32[], f32[128,16]{1,0}, f32[128,16]{1,0}, f32[128,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,1]{1,0}, f32[], f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}) tuple(f32[512,13]{0,1} %add.750, f32[512]{0} %add.756, f32[256,512]{1,0} %add.762, f32[256]{0} %add.768, f32[64,256]{1,0} %add.774, f32[64]{0} %add.780, f32[16,64]{1,0} %add.786, f32[16]{0} %add.792, f32[512,367]{1,0} %add.798, f32[512]{0} %add.804, f32[256,512]{1,0} %add.810, f32[256]{0} %add.816, f32[1,256]{1,0} %add.822, f32[1]{0} %add.826, f32[2202608,16]{0,1} %add.977, f32[8351593,16]{0,1} %add.1037, f32[4,16]{1,0} %add.1096, f32[1]{0} %multiply.747, f32[1,256]{1,0} %multiply.743, f32[256]{0} %multiply.739, f32[256,512]{1,0} %multiply.735, f32[512]{0} %multiply.731, f32[512,367]{1,0} %multiply.727, f32[16]{0} %multiply.723, f32[16,64]{1,0} %multiply.719, f32[64]{0} %multiply.715, f32[64,256]{1,0} %multiply.711, f32[256]{0} %multiply.707, f32[256,512]{1,0} %multiply.703, f32[512]{0} %multiply.699, f32[512,13]{1,0} %multiply.695, f32[2202608,16]{1,0} %add.975, f32[8351593,16]{1,0} %add.1035, f32[4,16]{1,0} %add.1094, f32[] %add.1131, f32[128,16]{1,0} %gather.27, f32[128,16]{1,0} %gather.22, f32[128,16]{1,0} %gather.17, f32[16,16]{1,0} %reshape.187, f32[16,16]{1,0} %reshape.184, f32[16,16]{1,0} %reshape.181, f32[16,16]{1,0} %reshape.178, f32[16,16]{1,0} %reshape.175, f32[16,16]{1,0} %reshape.172, f32[16,16]{1,0} %reshape.169, f32[16,16]{1,0} %reshape.166, f32[16,16]{1,0} %reshape.163, f32[16,16]{1,0} %reshape.160, f32[16,16]{1,0} %reshape.157, f32[16,16]{1,0} %reshape.154, f32[16,16]{1,0} %reshape.151, f32[16,16]{1,0} %reshape.148, f32[16,16]{1,0} %reshape.145, f32[16,16]{1,0} %reshape.142, f32[16,16]{1,0} %reshape.139, f32[16,16]{1,0} %reshape.136, f32[16,16]{1,0} %reshape.133, f32[16,16]{1,0} %reshape.130, f32[16,16]{1,0} %reshape.127, f32[16,16]{1,0} %reshape.124, f32[16,16]{1,0} %reshape.121, f32[16,16]{1,0} %reshape.118, f32[16,16]{1,0} %reshape.115, f32[16,16]{1,0} %reshape.112, f32[16,1]{1,0} %add.277, f32[] %multiply.1127, f32[16,16]{1,0} %slice.892, f32[16,16]{1,0} %slice.890, f32[16,16]{1,0} %slice.888, f32[16,16]{1,0} %slice.886, f32[16,16]{1,0} %slice.884, f32[16,16]{1,0} %slice.882, f32[16,16]{1,0} %slice.880, f32[16,16]{1,0} %slice.878, f32[16,16]{1,0} %slice.876, f32[16,16]{1,0} %slice.874, f32[16,16]{1,0} %slice.872, f32[16,16]{1,0} %slice.870, f32[16,16]{1,0} %slice.868, f32[16,16]{1,0} %slice.866, f32[16,16]{1,0} %slice.864, f32[16,16]{1,0} %slice.862, f32[16,16]{1,0} %slice.860, f32[16,16]{1,0} %slice.858, f32[16,16]{1,0} %slice.856, f32[16,16]{1,0} %slice.854, f32[16,16]{1,0} %slice.852, f32[16,16]{1,0} %slice.850, f32[16,16]{1,0} %slice.848, f32[16,16]{1,0} %slice.846, f32[16,16]{1,0} %slice.844, f32[16,16]{1,0} %slice.842)
 2020-07-24 05:41:09.473476: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]
 2020-07-24 05:41:09.473489: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]
 2020-07-24 05:41:09.473515: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] }
 2020-07-24 05:41:09.473521: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]        clone
 2020-07-24 05:41:09.473525: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] StackTrace:
 2020-07-24 05:41:09.473534: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]
 2020-07-24 05:41:09.473539: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] *** End stack trace ***
 2020-07-24 05:41:09.473548: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] *** Begin stack trace ***
 2020-07-24 05:41:09.473554: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]
 2020-07-24 05:41:09.473563: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]        tensorflow::CurrentStackTrace[abi:cxx11]()
 2020-07-24 05:41:09.473556: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]
 2020-07-24 05:41:09.473570: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] OutputShape: (f32[512,13]{0,1}, f32[512]{0}, f32[256,512]{1,0}, f32[256]{0}, f32[64,256]{1,0}, f32[64]{0}, f32[16,64]{1,0}, f32[16]{0}, f32[512,367]{1,0}, f32[512]{0}, f32[256,512]{1,0}, f32[256]{0}, f32[1,256]{1,0}, f32[1]{0}, f32[2202608,16]{0,1}, f32[8351593,16]{0,1}, f32[4,16]{1,0}, f32[1]{0}, f32[1,256]{1,0}, f32[256]{0}, f32[256,512]{1,0}, f32[512]{0}, f32[512,367]{1,0}, f32[16]{0}, f32[16,64]{1,0}, f32[64]{0}, f32[64,256]{1,0}, f32[256]{0}, f32[256,512]{1,0}, f32[512]{0}, f32[512,13]{0,1}, f32[2202608,16]{0,1}, f32[8351593,16]{0,1}, f32[4,16]{1,0}, f32[], f32[128,16]{0,1}, f32[128,16]{0,1}, f32[128,16]{0,1}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,1]{0,1}, f32[], f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0})
 2020-07-24 05:41:09.473575: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]        xla::util::ReportComputationError(tensorflow::Status const&, absl::lts_2020_02_25::Span<xla::XlaComputation const* const>, absl::lts_2020_02_25::Span<xla::Shape const* const>)
 2020-07-24 05:41:09.473578: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] Status: Cancelled: From /job:tpu_worker/replica:0/task:0:
 2020-07-24 05:41:09.473594: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]
 2020-07-24 05:41:09.473602: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]        xla::util::ShapeHash(xla::Shape const&)
 2020-07-24 05:41:09.473607: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] Cancelled by TearDown.
 2020-07-24 05:41:09.473619: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] StackTrace:
 2020-07-24 05:41:09.473626: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]        xla::XrtComputationClient::ExecuteComputation(xla::ComputationClient::Computation const&, absl::lts_2020_02_25::Span<std::shared_ptr<xla::ComputationClient::Data> const>, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, xla::ComputationClient::ExecuteComputationOptions const&)
 2020-07-24 05:41:09.473631: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]         [[{{node XRTExecute}}]]
 2020-07-24 05:41:09.473640: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] *** Begin stack trace ***
 2020-07-24 05:41:09.473647: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]
 2020-07-24 05:41:09.473659: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]        tensorflow::CurrentStackTrace[abi:cxx11]()
 2020-07-24 05:41:09.473666: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]
 2020-07-24 05:41:09.473674: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]        xla::util::ReportComputationError(tensorflow::Status const&, absl::lts_2020_02_25::Span<xla::XlaComputation const* const>, absl::lts_2020_02_25::Span<xla::Shape const* const>)
 2020-07-24 05:41:09.473678: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]
 2020-07-24 05:41:09.473720: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]        xla::util::ShapeHash(xla::Shape const&)
 2020-07-24 05:41:09.473725: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]
 2020-07-24 05:41:09.473733: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]        xla::XrtComputationClient::ExecuteComputation(xla::ComputationClient::Computation const&, absl::lts_2020_02_25::Span<std::shared_ptr<xla::ComputationClient::Data> const>, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, xla::ComputationClient::ExecuteComputationOptions const&)
 2020-07-24 05:41:09.473738: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]
 2020-07-24 05:41:09.473748: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]
 2020-07-24 05:41:09.473752: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]        clone
 2020-07-24 05:41:09.473763: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]
 2020-07-24 05:41:09.473768: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] *** End stack trace ***
 2020-07-24 05:41:09.473778: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]
 2020-07-24 05:41:09.473782: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]
 2020-07-24 05:41:09.473791: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]
 2020-07-24 05:41:09.473797: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] Status: Cancelled: From /job:tpu_worker/replica:0/task:0:
 2020-07-24 05:41:09.473806: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]
 2020-07-24 05:41:09.473809: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] Cancelled by TearDown.
 2020-07-24 05:41:09.473819: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]        clone
 2020-07-24 05:41:09.473821: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]         [[{{node XRTExecute}}]]
 2020-07-24 05:41:09.473831: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] *** End stack trace ***
 2020-07-24 05:41:09.473845: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]
 2020-07-24 05:41:09.473856: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] Status: Cancelled: From /job:tpu_worker/replica:0/task:0:
 2020-07-24 05:41:09.473866: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] Cancelled by TearDown.
 2020-07-24 05:41:09.473877: E tensorflow/compiler/xla/xla_client/xla_util.cc:76]         [[{{node XRTExecute}}]]
 Exception in device=TPU:6: tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:381 : Check failed: session->session()->Run( session_work->feed_inputs, session_work->outputs_handles, &outputs) == ::tensorflow::Status::OK() (Cancelled: From /job:tpu_worker/replica:0/task:0:
 Cancelled by TearDown. vs. OK)
 *** Begin stack trace ***
        tensorflow::CurrentStackTrace[abi:cxx11]()





        clone
 *** End stack trace ***
 Exception in device=TPU:2: tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:381 : Check failed: session->session()->Run( session_work->feed_inputs, session_work->outputs_handles, &outputs) == ::tensorflow::Status::OK() (Cancelled: From /job:tpu_worker/replica:0/task:0:
 Cancelled by TearDown. vs. OK)
 *** Begin stack trace ***
        tensorflow::CurrentStackTrace[abi:cxx11]()





        clone
 *** End stack trace ***
 Exception in device=TPU:3: tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:381 : Check failed: session->session()->Run( session_work->feed_inputs, session_work->outputs_handles, &outputs) == ::tensorflow::Status::OK() (Cancelled: From /job:tpu_worker/replica:0/task:0:
 Cancelled by TearDown. vs. OK)
 *** Begin stack trace ***
        tensorflow::CurrentStackTrace[abi:cxx11]()





        clone
 *** End stack trace ***
 Exception in device=TPU:5: tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:381 : Check failed: session->session()->Run( session_work->feed_inputs, session_work->outputs_handles, &outputs) == ::tensorflow::Status::OK() (Cancelled: From /job:tpu_worker/replica:0/task:0:
 Cancelled by TearDown. vs. OK)
 *** Begin stack trace ***
        tensorflow::CurrentStackTrace[abi:cxx11]()





        clone
 *** End stack trace ***




 Exception in device=TPU:4: tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:381 : Check failed: session->session()->Run( session_work->feed_inputs, session_work->outputs_handles, &outputs) == ::tensorflow::Status::OK() (Cancelled: From /job:tpu_worker/replica:0/task:0:
 Cancelled by TearDown. vs. OK)
 *** Begin stack trace ***
        tensorflow::CurrentStackTrace[abi:cxx11]()





        clone
 *** End stack trace ***
 Traceback (most recent call last):

 Traceback (most recent call last):
  File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch_xla/distributed/xla_multiprocessing.py", line 330, in _mp_start_fn
    _start_fn(index, pf_cfg, fn, args)
  File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch_xla/distributed/xla_multiprocessing.py", line 324, in _start_fn
    fn(gindex, *args)
  File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 1350, in main
    dlrm_wrap(X, lS_o, lS_i, use_gpu, device)
  File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 1204, in dlrm_wrap
    return dlrm(X, lS_o, lS_i)
  File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch/nn/modules/module.py", line 726, in _call_impl
    result = self.forward(*input, **kwargs)
  File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch_xla/distributed/xla_multiprocessing.py", line 330, in _mp_start_fn
    _start_fn(index, pf_cfg, fn, args)
 Traceback (most recent call last):
  File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 478, in forward
    return self.tpu_parallel_forward(dense_x, lS_o, lS_i)
  File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch_xla/distributed/xla_multiprocessing.py", line 324, in _start_fn
    fn(gindex, *args)
  File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 583, in tpu_parallel_forward
    ly = self._collect_distribute_embeddings(ly_local)
  File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 1350, in main
    dlrm_wrap(X, lS_o, lS_i, use_gpu, device)
  File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 521, in _collect_distribute_embeddings
    full_data = full_data[self._non_pad_indices]
  File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 1204, in dlrm_wrap
    return dlrm(X, lS_o, lS_i)
  File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch/nn/modules/module.py", line 726, in _call_impl
    result = self.forward(*input, **kwargs)
 Traceback (most recent call last):
  File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 478, in forward
    return self.tpu_parallel_forward(dense_x, lS_o, lS_i)
  File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 583, in tpu_parallel_forward
    ly = self._collect_distribute_embeddings(ly_local)
 RuntimeError: tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:381 : Check failed: session->session()->Run( session_work->feed_inputs, session_work->outputs_handles, &outputs) == ::tensorflow::Status::OK() (Cancelled: From /job:tpu_worker/replica:0/task:0:
 Cancelled by TearDown. vs. OK)
 *** Begin stack trace ***
        tensorflow::CurrentStackTrace[abi:cxx11]()





        clone
 *** End stack trace ***

  File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 521, in _collect_distribute_embeddings
    full_data = full_data[self._non_pad_indices]
  File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch_xla/distributed/xla_multiprocessing.py", line 330, in _mp_start_fn
    _start_fn(index, pf_cfg, fn, args)
  File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch_xla/distributed/xla_multiprocessing.py", line 324, in _start_fn
    fn(gindex, *args)
  File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 1350, in main
    dlrm_wrap(X, lS_o, lS_i, use_gpu, device)
  File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 1204, in dlrm_wrap
    return dlrm(X, lS_o, lS_i)
  File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch/nn/modules/module.py", line 726, in _call_impl
    result = self.forward(*input, **kwargs)
  File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 478, in forward
    return self.tpu_parallel_forward(dense_x, lS_o, lS_i)
  File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 583, in tpu_parallel_forward
    ly = self._collect_distribute_embeddings(ly_local)
  File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 521, in _collect_distribute_embeddings
    full_data = full_data[self._non_pad_indices]
  File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch_xla/distributed/xla_multiprocessing.py", line 330, in _mp_start_fn
    _start_fn(index, pf_cfg, fn, args)
 RuntimeError: tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:381 : Check failed: session->session()->Run( session_work->feed_inputs, session_work->outputs_handles, &outputs) == ::tensorflow::Status::OK() (Cancelled: From /job:tpu_worker/replica:0/task:0:
 Cancelled by TearDown. vs. OK)
 *** Begin stack trace ***
        tensorflow::CurrentStackTrace[abi:cxx11]()





        clone
 *** End stack trace ***

  File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch_xla/distributed/xla_multiprocessing.py", line 324, in _start_fn
    fn(gindex, *args)
  File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 1350, in main
    dlrm_wrap(X, lS_o, lS_i, use_gpu, device)
  File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 1204, in dlrm_wrap
    return dlrm(X, lS_o, lS_i)
  File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch/nn/modules/module.py", line 726, in _call_impl
    result = self.forward(*input, **kwargs)
  File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 478, in forward
    return self.tpu_parallel_forward(dense_x, lS_o, lS_i)
  File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 583, in tpu_parallel_forward
    ly = self._collect_distribute_embeddings(ly_local)
  File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 521, in _collect_distribute_embeddings
    full_data = full_data[self._non_pad_indices]
 RuntimeError: tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:381 : Check failed: session->session()->Run( session_work->feed_inputs, session_work->outputs_handles, &outputs) == ::tensorflow::Status::OK() (Cancelled: From /job:tpu_worker/replica:0/task:0:
 Cancelled by TearDown. vs. OK)
 *** Begin stack trace ***
        tensorflow::CurrentStackTrace[abi:cxx11]()





        clone
 *** End stack trace ***

 RuntimeError: tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:381 : Check failed: session->session()->Run( session_work->feed_inputs, session_work->outputs_handles, &outputs) == ::tensorflow::Status::OK() (Cancelled: From /job:tpu_worker/replica:0/task:0:
 Cancelled by TearDown. vs. OK)
 *** Begin stack trace ***
        tensorflow::CurrentStackTrace[abi:cxx11]()





        clone
 *** End stack trace ***

 Traceback (most recent call last):
  File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch_xla/distributed/xla_multiprocessing.py", line 330, in _mp_start_fn
    _start_fn(index, pf_cfg, fn, args)
  File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch_xla/distributed/xla_multiprocessing.py", line 324, in _start_fn
    fn(gindex, *args)
  File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 1350, in main
    dlrm_wrap(X, lS_o, lS_i, use_gpu, device)
  File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 1204, in dlrm_wrap
    return dlrm(X, lS_o, lS_i)
  File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch/nn/modules/module.py", line 726, in _call_impl
    result = self.forward(*input, **kwargs)
  File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 478, in forward
    return self.tpu_parallel_forward(dense_x, lS_o, lS_i)
  File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 583, in tpu_parallel_forward
    ly = self._collect_distribute_embeddings(ly_local)
  File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 521, in _collect_distribute_embeddings
    full_data = full_data[self._non_pad_indices]
 RuntimeError: tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:381 : Check failed: session->session()->Run( session_work->feed_inputs, session_work->outputs_handles, &outputs) == ::tensorflow::Status::OK() (Cancelled: From /job:tpu_worker/replica:0/task:0:
 Cancelled by TearDown. vs. OK)
 *** Begin stack trace ***
        tensorflow::CurrentStackTrace[abi:cxx11]()





        clone
 *** End stack trace ***

 Error in atexit._run_exitfuncs:
 Traceback (most recent call last):
  File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch_xla/__init__.py", line 57, in _prepare_to_exit
 Error in atexit._run_exitfuncs:
 Traceback (most recent call last):
  File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch_xla/__init__.py", line 57, in _prepare_to_exit
 Error in atexit._run_exitfuncs:
 Traceback (most recent call last):
  File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch_xla/__init__.py", line 57, in _prepare_to_exit
    _XLAC._prepare_to_exit()
 RuntimeError: Cancelled: From /job:tpu_worker/replica:0/task:0:
 Cancelled by TearDown.
         [[{{node XRTExecute}}]]
    _XLAC._prepare_to_exit()
 RuntimeError: Cancelled: From /job:tpu_worker/replica:0/task:0:
 Cancelled by TearDown.
         [[{{node XRTExecute}}]]
 Error in atexit._run_exitfuncs:
 Traceback (most recent call last):
  File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch_xla/__init__.py", line 57, in _prepare_to_exit
    _XLAC._prepare_to_exit()
 RuntimeError: Cancelled: From /job:tpu_worker/replica:0/task:0:
 Cancelled by TearDown.
         [[{{node XRTExecute}}]]
    _XLAC._prepare_to_exit()
 RuntimeError: Cancelled: From /job:tpu_worker/replica:0/task:0:
 Cancelled by TearDown.
         [[{{node XRTExecute}}]]
 Traceback (most recent call last):
  File "/home/taylanbil/dlrm/dlrm_tpu_runner.py", line 15, in <module>
    xmp.spawn(main, args=(), nprocs=pre_spawn_flags.tpu_cores)
  File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch_xla/distributed/xla_multiprocessing.py", line 395, in spawn
    start_method=start_method)
  File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch/multiprocessing/spawn.py", line 158, in start_processes
    while not context.join():
  File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch/multiprocessing/spawn.py", line 108, in join
    (error_index, name)
 Exception: process 7 terminated with signal SIGSEGV