Created
July 24, 2020 15:21
-
-
Save taylanbil/7b6bd83ceb937b665a9f67e47e19f514 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
mensions={0,1}, to_apply=%AddComputation.1114 | |
2020-07-24 05:41:09.472955: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] *** Begin stack trace *** | |
2020-07-24 05:41:09.472964: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] | |
2020-07-24 05:41:09.472969: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] %compare.1123 = pred[] compare(s32[] %constant.1120, s32[] %constant.1121), direction=NE | |
2020-07-24 05:41:09.472992: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] } | |
2020-07-24 05:41:09.473015: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] %constant.1122 = f32[] constant(1) | |
2020-07-24 05:41:09.473003: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] %constant.1119 = s32[] constant(16) | |
2020-07-24 05:41:09.473007: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] tensorflow::CurrentStackTrace[abi:cxx11]() | |
2020-07-24 05:41:09.473010: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] | |
2020-07-24 05:41:09.473021: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] | |
2020-07-24 05:41:09.473026: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] %convert.1124 = f32[] convert(s32[] %constant.1120) | |
2020-07-24 05:41:09.473033: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] %constant.1120 = s32[] constant(0) | |
2020-07-24 05:41:09.473040: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] xla::util::ReportComputationError(tensorflow::Status const&, absl::lts_2020_02_25::Span<xla::XlaComputation const* const>, absl::lts_2020_02_25::Span<xla::Shape const* const>) | |
2020-07-24 05:41:09.473046: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] | |
2020-07-24 05:41:09.473050: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] | |
2020-07-24 05:41:09.473076: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] xla::util::ShapeHash(xla::Shape const&) | |
2020-07-24 05:41:09.473057: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] %divide.1125 = f32[] divide(f32[] %constant.1122, f32[] %convert.1124) | |
2020-07-24 05:41:09.473087: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] xla::XrtComputationClient::ExecuteComputation(xla::ComputationClient::Computation const&, absl::lts_2020_02_25::Span<std::shared_ptr<xla::ComputationClient::Data> const>, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, xla::ComputationClient::ExecuteComputationOptions const&) | |
2020-07-24 05:41:09.473065: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] %compare.1122 = pred[] compare(s32[] %constant.1119, s32[] %constant.1120), direction=NE | |
2020-07-24 05:41:09.473076: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] | |
2020-07-24 05:41:09.473083: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] OutputShape: (f32[512,13]{0,1}, f32[512]{0}, f32[256,512]{1,0}, f32[256]{0}, f32[64,256]{1,0}, f32[64]{0}, f32[16,64]{1,0}, f32[16]{0}, f32[512,367]{1,0}, f32[512]{0}, f32[256,512]{1,0}, f32[256]{0}, f32[1,256]{1,0}, f32[1]{0}, f32[24,16]{0,1}, f32[27,16]{0,1}, f32[18,16]{0,1}, f32[1]{0}, f32[1,256]{1,0}, f32[256]{0}, f32[256,512]{1,0}, f32[512]{0}, f32[512,367]{1,0}, f32[16]{0}, f32[16,64]{1,0}, f32[64]{0}, f32[64,256]{1,0}, f32[256]{0}, f32[256,512]{1,0}, f32[512]{0}, f32[512,13]{0,1}, f32[24,16]{0,1}, f32[27,16]{0,1}, f32[18,16]{0,1}, f32[], f32[128,16]{0,1}, f32[128,16]{0,1}, f32[128,16]{0,1}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,1]{0,1}, f32[], f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}) | |
2020-07-24 05:41:09.473093: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] %constant.1126 = f32[] constant(nan) | |
2020-07-24 05:41:09.473104: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] | |
2020-07-24 05:41:09.473112: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] %constant.1121 = f32[] constant(1) | |
2020-07-24 05:41:09.473119: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] | |
2020-07-24 05:41:09.473129: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] | |
2020-07-24 05:41:09.473135: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] %select.1127 = f32[] select(pred[] %compare.1123, f32[] %divide.1125, f32[] %constant.1126) | |
2020-07-24 05:41:09.473139: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] | |
2020-07-24 05:41:09.473165: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] %multiply.1128 = f32[] multiply(f32[] %reduce.1119, f32[] %select.1127) | |
2020-07-24 05:41:09.473170: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] | |
2020-07-24 05:41:09.473148: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] %convert.1123 = f32[] convert(s32[] %constant.1119) | |
2020-07-24 05:41:09.473154: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] clone | |
2020-07-24 05:41:09.473160: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] StackTrace: | |
2020-07-24 05:41:09.473176: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] %p30.1099 = f32[] parameter(30) | |
2020-07-24 05:41:09.473181: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] | |
2020-07-24 05:41:09.473189: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] %divide.1124 = f32[] divide(f32[] %constant.1121, f32[] %convert.1123) | |
2020-07-24 05:41:09.473191: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] *** End stack trace *** | |
2020-07-24 05:41:09.473196: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] *** Begin stack trace *** | |
2020-07-24 05:41:09.473201: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] %multiply.1129 = f32[] multiply(f32[] %multiply.1128, f32[] %p30.1099) | |
2020-07-24 05:41:09.473203: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] | |
2020-07-24 05:41:09.473212: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] %constant.1125 = f32[] constant(nan) | |
2020-07-24 05:41:09.473217: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] | |
2020-07-24 05:41:09.473223: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] tensorflow::CurrentStackTrace[abi:cxx11]() | |
2020-07-24 05:41:09.473249: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] Status: Cancelled: From /job:tpu_worker/replica:0/task:0: | |
2020-07-24 05:41:09.473228: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] %constant.1098 = f32[] constant(1) | |
2020-07-24 05:41:09.473234: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] clone | |
2020-07-24 05:41:09.473244: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] %select.1126 = f32[] select(pred[] %compare.1122, f32[] %divide.1124, f32[] %constant.1125) | |
2020-07-24 05:41:09.473256: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] xla::util::ReportComputationError(tensorflow::Status const&, absl::lts_2020_02_25::Span<xla::XlaComputation const* const>, absl::lts_2020_02_25::Span<xla::Shape const* const>) | |
2020-07-24 05:41:09.473260: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] Cancelled by TearDown. | |
2020-07-24 05:41:09.473265: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] %multiply.1130 = f32[] multiply(f32[] %multiply.1129, f32[] %constant.1098) | |
2020-07-24 05:41:09.473275: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] *** End stack trace *** | |
2020-07-24 05:41:09.473282: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] %multiply.1127 = f32[] multiply(f32[] %reduce.1118, f32[] %select.1126) | |
2020-07-24 05:41:09.473292: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] xla::util::ShapeHash(xla::Shape const&) | |
2020-07-24 05:41:09.473297: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] [[{{node XRTExecute}}]] | |
2020-07-24 05:41:09.473302: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] %add.1132 = f32[] add(f32[] %p31.1131, f32[] %multiply.1130) | |
2020-07-24 05:41:09.473308: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] | |
2020-07-24 05:41:09.473315: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] %p29.1098 = f32[] parameter(29) | |
2020-07-24 05:41:09.473322: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] xla::XrtComputationClient::ExecuteComputation(xla::ComputationClient::Computation const&, absl::lts_2020_02_25::Span<std::shared_ptr<xla::ComputationClient::Data> const>, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, xla::ComputationClient::ExecuteComputationOptions const&) | |
2020-07-24 05:41:09.473334: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] Status: Cancelled: From /job:tpu_worker/replica:0/task:0: | |
2020-07-24 05:41:09.473335: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] ROOT %tuple.1133 = (f32[512,13]{0,1}, f32[512]{0}, f32[256,512]{1,0}, f32[256]{0}, f32[64,256]{1,0}, f32[64]{0}, f32[16,64]{1,0}, f32[16]{0}, f32[512,367]{1,0}, f32[512]{0}, f32[256,512]{1,0}, f32[256]{0}, f32[1,256]{1,0}, f32[1]{0}, f32[12517,16]{0,1}, f32[14992,16]{0,1}, f32[15,16]{1,0}, f32[1]{0}, f32[1,256]{1,0}, f32[256]{0}, f32[256,512]{1,0}, f32[512]{0}, f32[512,367]{1,0}, f32[16]{0}, f32[16,64]{1,0}, f32[64]{0}, f32[64,256]{1,0}, f32[256]{0}, f32[256,512]{1,0}, f32[512]{0}, f32[512,13]{1,0}, f32[12517,16]{1,0}, f32[14992,16]{1,0}, f32[15,16]{1,0}, f32[], f32[128,16]{1,0}, f32[128,16]{1,0}, f32[128,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,1]{1,0}, f32[], f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}) tuple(f32[512,13]{0,1} %add.750, f32[512]{0} %add.756, f32[256,512]{1,0} %add.762, f32[256]{0} %add.768, f32[64,256]{1,0} %add.774, f32[64]{0} %add.780, f32[16,64]{1,0} %add.786, f32[16]{0} %add.792, f32[512,367]{1,0} %add.798, f32[512]{0} %add.804, f32[256,512]{1,0} %add.810, f32[256]{0} %add.816, f32[1,256]{1,0} %add.822, f32[1]{0} %add.826, f32[12517,16]{0,1} %add.977, f32[14992,16]{0,1} %add.1037, f32[15,16]{1,0} %add.1097, f32[1]{0} %multiply.747, f32[1,256]{1,0} %multiply.743, f32[256]{0} %multiply.739, f32[256,512]{1,0} %multiply.735, f32[512]{0} %multiply.731, f32[512,367]{1,0} %multiply.727, f32[16]{0} %multiply.723, f32[16,64]{1,0} %multiply.719, f32[64]{0} %multiply.715, f32[64,256]{1,0} %multiply.711, f32[256]{0} %multiply.707, f32[256,512]{1,0} %multiply.703, f32[512]{0} %multiply.699, f32[512,13]{1,0} %multiply.695, f32[12517,16]{1,0} %add.975, f32[14992,16]{1,0} %add.1035, f32[15,16]{1,0} %add.1095, f32[] %add.1132, f32[128,16]{1,0} %gather.27, f32[128,16]{1,0} %gather.22, f32[128,16]{1,0} %gather.17, f32[16,16]{1,0} %reshape.187, f32[16,16]{1,0} %reshape.184, f32[16,16]{1,0} %reshape.181, f32[16,16]{1,0} %reshape.178, f32[16,16]{1,0} %reshape.175, f32[16,16]{1,0} %reshape.172, f32[16,16]{1,0} %reshape.169, f32[16,16]{1,0} %reshape.166, f32[16,16]{1,0} %reshape.163, f32[16,16]{1,0} %reshape.160, f32[16,16]{1,0} %reshape.157, f32[16,16]{1,0} %reshape.154, f32[16,16]{1,0} %reshape.151, f32[16,16]{1,0} %reshape.148, f32[16,16]{1,0} %reshape.145, f32[16,16]{1,0} %reshape.142, f32[16,16]{1,0} %reshape.139, f32[16,16]{1,0} %reshape.136, f32[16,16]{1,0} %reshape.133, f32[16,16]{1,0} %reshape.130, f32[16,16]{1,0} %reshape.127, f32[16,16]{1,0} %reshape.124, f32[16,16]{1,0} %reshape.121, f32[16,16]{1,0} %reshape.118, f32[16,16]{1,0} %reshape.115, f32[16,16]{1,0} %reshape.112, f32[16,1]{1,0} %add.277, f32[] %multiply.1128, f32[16,16]{1,0} %slice.892, f32[16,16]{1,0} %slice.890, f32[16,16]{1,0} %slice.888, f32[16,16]{1,0} %slice.886, f32[16,16]{1,0} %slice.884, f32[16,16]{1,0} %slice.882, f32[16,16]{1,0} %slice.880, f32[16,16]{1,0} %slice.878, f32[16,16]{1,0} %slice.876, f32[16,16]{1,0} %slice.874, f32[16,16]{1,0} %slice.872, f32[16,16]{1,0} %slice.870, f32[16,16]{1,0} %slice.868, f32[16,16]{1,0} %slice.866, f32[16,16]{1,0} %slice.864, f32[16,16]{1,0} %slice.862, f32[16,16]{1,0} %slice.860, f32[16,16]{1,0} %slice.858, f32[16,16]{1,0} %slice.856, f32[16,16]{1,0} %slice.854, f32[16,16]{1,0} %slice.852, f32[16,16]{1,0} %slice.850, f32[16,16]{1,0} %slice.848, f32[16,16]{1,0} %slice.846, f32[16,16]{1,0} %slice.844, f32[16,16]{1,0} %slice.842) | |
2020-07-24 05:41:09.473343: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] %multiply.1128 = f32[] multiply(f32[] %multiply.1127, f32[] %p29.1098) | |
2020-07-24 05:41:09.473350: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] | |
2020-07-24 05:41:09.473358: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] Cancelled by TearDown. | |
2020-07-24 05:41:09.473382: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] } | |
2020-07-24 05:41:09.473391: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] %constant.1097 = f32[] constant(1) | |
2020-07-24 05:41:09.473401: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] | |
2020-07-24 05:41:09.473406: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] [[{{node XRTExecute}}]] | |
2020-07-24 05:41:09.473411: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] | |
2020-07-24 05:41:09.473418: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] %multiply.1129 = f32[] multiply(f32[] %multiply.1128, f32[] %constant.1097) | |
2020-07-24 05:41:09.473428: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] | |
2020-07-24 05:41:09.473453: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] %add.1131 = f32[] add(f32[] %p30.1130, f32[] %multiply.1129) | |
2020-07-24 05:41:09.473440: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] | |
2020-07-24 05:41:09.473459: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] | |
2020-07-24 05:41:09.473473: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] OutputShape: (f32[512,13]{0,1}, f32[512]{0}, f32[256,512]{1,0}, f32[256]{0}, f32[64,256]{1,0}, f32[64]{0}, f32[16,64]{1,0}, f32[16]{0}, f32[512,367]{1,0}, f32[512]{0}, f32[256,512]{1,0}, f32[256]{0}, f32[1,256]{1,0}, f32[1]{0}, f32[12517,16]{0,1}, f32[14992,16]{0,1}, f32[15,16]{1,0}, f32[1]{0}, f32[1,256]{1,0}, f32[256]{0}, f32[256,512]{1,0}, f32[512]{0}, f32[512,367]{1,0}, f32[16]{0}, f32[16,64]{1,0}, f32[64]{0}, f32[64,256]{1,0}, f32[256]{0}, f32[256,512]{1,0}, f32[512]{0}, f32[512,13]{0,1}, f32[12517,16]{0,1}, f32[14992,16]{0,1}, f32[15,16]{1,0}, f32[], f32[128,16]{0,1}, f32[128,16]{0,1}, f32[128,16]{0,1}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,1]{0,1}, f32[], f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}) | |
2020-07-24 05:41:09.473474: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] ROOT %tuple.1132 = (f32[512,13]{0,1}, f32[512]{0}, f32[256,512]{1,0}, f32[256]{0}, f32[64,256]{1,0}, f32[64]{0}, f32[16,64]{1,0}, f32[16]{0}, f32[512,367]{1,0}, f32[512]{0}, f32[256,512]{1,0}, f32[256]{0}, f32[1,256]{1,0}, f32[1]{0}, f32[2202608,16]{0,1}, f32[8351593,16]{0,1}, f32[4,16]{1,0}, f32[1]{0}, f32[1,256]{1,0}, f32[256]{0}, f32[256,512]{1,0}, f32[512]{0}, f32[512,367]{1,0}, f32[16]{0}, f32[16,64]{1,0}, f32[64]{0}, f32[64,256]{1,0}, f32[256]{0}, f32[256,512]{1,0}, f32[512]{0}, f32[512,13]{1,0}, f32[2202608,16]{1,0}, f32[8351593,16]{1,0}, f32[4,16]{1,0}, f32[], f32[128,16]{1,0}, f32[128,16]{1,0}, f32[128,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,1]{1,0}, f32[], f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}) tuple(f32[512,13]{0,1} %add.750, f32[512]{0} %add.756, f32[256,512]{1,0} %add.762, f32[256]{0} %add.768, f32[64,256]{1,0} %add.774, f32[64]{0} %add.780, f32[16,64]{1,0} %add.786, f32[16]{0} %add.792, f32[512,367]{1,0} %add.798, f32[512]{0} %add.804, f32[256,512]{1,0} %add.810, f32[256]{0} %add.816, f32[1,256]{1,0} %add.822, f32[1]{0} %add.826, f32[2202608,16]{0,1} %add.977, f32[8351593,16]{0,1} %add.1037, f32[4,16]{1,0} %add.1096, f32[1]{0} %multiply.747, f32[1,256]{1,0} %multiply.743, f32[256]{0} %multiply.739, f32[256,512]{1,0} %multiply.735, f32[512]{0} %multiply.731, f32[512,367]{1,0} %multiply.727, f32[16]{0} %multiply.723, f32[16,64]{1,0} %multiply.719, f32[64]{0} %multiply.715, f32[64,256]{1,0} %multiply.711, f32[256]{0} %multiply.707, f32[256,512]{1,0} %multiply.703, f32[512]{0} %multiply.699, f32[512,13]{1,0} %multiply.695, f32[2202608,16]{1,0} %add.975, f32[8351593,16]{1,0} %add.1035, f32[4,16]{1,0} %add.1094, f32[] %add.1131, f32[128,16]{1,0} %gather.27, f32[128,16]{1,0} %gather.22, f32[128,16]{1,0} %gather.17, f32[16,16]{1,0} %reshape.187, f32[16,16]{1,0} %reshape.184, f32[16,16]{1,0} %reshape.181, f32[16,16]{1,0} %reshape.178, f32[16,16]{1,0} %reshape.175, f32[16,16]{1,0} %reshape.172, f32[16,16]{1,0} %reshape.169, f32[16,16]{1,0} %reshape.166, f32[16,16]{1,0} %reshape.163, f32[16,16]{1,0} %reshape.160, f32[16,16]{1,0} %reshape.157, f32[16,16]{1,0} %reshape.154, f32[16,16]{1,0} %reshape.151, f32[16,16]{1,0} %reshape.148, f32[16,16]{1,0} %reshape.145, f32[16,16]{1,0} %reshape.142, f32[16,16]{1,0} %reshape.139, f32[16,16]{1,0} %reshape.136, f32[16,16]{1,0} %reshape.133, f32[16,16]{1,0} %reshape.130, f32[16,16]{1,0} %reshape.127, f32[16,16]{1,0} %reshape.124, f32[16,16]{1,0} %reshape.121, f32[16,16]{1,0} %reshape.118, f32[16,16]{1,0} %reshape.115, f32[16,16]{1,0} %reshape.112, f32[16,1]{1,0} %add.277, f32[] %multiply.1127, f32[16,16]{1,0} %slice.892, f32[16,16]{1,0} %slice.890, f32[16,16]{1,0} %slice.888, f32[16,16]{1,0} %slice.886, f32[16,16]{1,0} %slice.884, f32[16,16]{1,0} %slice.882, f32[16,16]{1,0} %slice.880, f32[16,16]{1,0} %slice.878, f32[16,16]{1,0} %slice.876, f32[16,16]{1,0} %slice.874, f32[16,16]{1,0} %slice.872, f32[16,16]{1,0} %slice.870, f32[16,16]{1,0} %slice.868, f32[16,16]{1,0} %slice.866, f32[16,16]{1,0} %slice.864, f32[16,16]{1,0} %slice.862, f32[16,16]{1,0} %slice.860, f32[16,16]{1,0} %slice.858, f32[16,16]{1,0} %slice.856, f32[16,16]{1,0} %slice.854, f32[16,16]{1,0} %slice.852, f32[16,16]{1,0} %slice.850, f32[16,16]{1,0} %slice.848, f32[16,16]{1,0} %slice.846, f32[16,16]{1,0} %slice.844, f32[16,16]{1,0} %slice.842) | |
2020-07-24 05:41:09.473476: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] | |
2020-07-24 05:41:09.473489: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] | |
2020-07-24 05:41:09.473515: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] } | |
2020-07-24 05:41:09.473521: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] clone | |
2020-07-24 05:41:09.473525: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] StackTrace: | |
2020-07-24 05:41:09.473534: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] | |
2020-07-24 05:41:09.473539: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] *** End stack trace *** | |
2020-07-24 05:41:09.473548: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] *** Begin stack trace *** | |
2020-07-24 05:41:09.473554: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] | |
2020-07-24 05:41:09.473563: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] tensorflow::CurrentStackTrace[abi:cxx11]() | |
2020-07-24 05:41:09.473556: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] | |
2020-07-24 05:41:09.473570: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] OutputShape: (f32[512,13]{0,1}, f32[512]{0}, f32[256,512]{1,0}, f32[256]{0}, f32[64,256]{1,0}, f32[64]{0}, f32[16,64]{1,0}, f32[16]{0}, f32[512,367]{1,0}, f32[512]{0}, f32[256,512]{1,0}, f32[256]{0}, f32[1,256]{1,0}, f32[1]{0}, f32[2202608,16]{0,1}, f32[8351593,16]{0,1}, f32[4,16]{1,0}, f32[1]{0}, f32[1,256]{1,0}, f32[256]{0}, f32[256,512]{1,0}, f32[512]{0}, f32[512,367]{1,0}, f32[16]{0}, f32[16,64]{1,0}, f32[64]{0}, f32[64,256]{1,0}, f32[256]{0}, f32[256,512]{1,0}, f32[512]{0}, f32[512,13]{0,1}, f32[2202608,16]{0,1}, f32[8351593,16]{0,1}, f32[4,16]{1,0}, f32[], f32[128,16]{0,1}, f32[128,16]{0,1}, f32[128,16]{0,1}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,1]{0,1}, f32[], f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}, f32[16,16]{1,0}) | |
2020-07-24 05:41:09.473575: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] xla::util::ReportComputationError(tensorflow::Status const&, absl::lts_2020_02_25::Span<xla::XlaComputation const* const>, absl::lts_2020_02_25::Span<xla::Shape const* const>) | |
2020-07-24 05:41:09.473578: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] Status: Cancelled: From /job:tpu_worker/replica:0/task:0: | |
2020-07-24 05:41:09.473594: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] | |
2020-07-24 05:41:09.473602: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] xla::util::ShapeHash(xla::Shape const&) | |
2020-07-24 05:41:09.473607: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] Cancelled by TearDown. | |
2020-07-24 05:41:09.473619: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] StackTrace: | |
2020-07-24 05:41:09.473626: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] xla::XrtComputationClient::ExecuteComputation(xla::ComputationClient::Computation const&, absl::lts_2020_02_25::Span<std::shared_ptr<xla::ComputationClient::Data> const>, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, xla::ComputationClient::ExecuteComputationOptions const&) | |
2020-07-24 05:41:09.473631: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] [[{{node XRTExecute}}]] | |
2020-07-24 05:41:09.473640: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] *** Begin stack trace *** | |
2020-07-24 05:41:09.473647: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] | |
2020-07-24 05:41:09.473659: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] tensorflow::CurrentStackTrace[abi:cxx11]() | |
2020-07-24 05:41:09.473666: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] | |
2020-07-24 05:41:09.473674: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] xla::util::ReportComputationError(tensorflow::Status const&, absl::lts_2020_02_25::Span<xla::XlaComputation const* const>, absl::lts_2020_02_25::Span<xla::Shape const* const>) | |
2020-07-24 05:41:09.473678: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] | |
2020-07-24 05:41:09.473720: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] xla::util::ShapeHash(xla::Shape const&) | |
2020-07-24 05:41:09.473725: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] | |
2020-07-24 05:41:09.473733: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] xla::XrtComputationClient::ExecuteComputation(xla::ComputationClient::Computation const&, absl::lts_2020_02_25::Span<std::shared_ptr<xla::ComputationClient::Data> const>, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, xla::ComputationClient::ExecuteComputationOptions const&) | |
2020-07-24 05:41:09.473738: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] | |
2020-07-24 05:41:09.473748: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] | |
2020-07-24 05:41:09.473752: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] clone | |
2020-07-24 05:41:09.473763: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] | |
2020-07-24 05:41:09.473768: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] *** End stack trace *** | |
2020-07-24 05:41:09.473778: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] | |
2020-07-24 05:41:09.473782: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] | |
2020-07-24 05:41:09.473791: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] | |
2020-07-24 05:41:09.473797: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] Status: Cancelled: From /job:tpu_worker/replica:0/task:0: | |
2020-07-24 05:41:09.473806: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] | |
2020-07-24 05:41:09.473809: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] Cancelled by TearDown. | |
2020-07-24 05:41:09.473819: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] clone | |
2020-07-24 05:41:09.473821: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] [[{{node XRTExecute}}]] | |
2020-07-24 05:41:09.473831: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] *** End stack trace *** | |
2020-07-24 05:41:09.473845: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] | |
2020-07-24 05:41:09.473856: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] Status: Cancelled: From /job:tpu_worker/replica:0/task:0: | |
2020-07-24 05:41:09.473866: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] Cancelled by TearDown. | |
2020-07-24 05:41:09.473877: E tensorflow/compiler/xla/xla_client/xla_util.cc:76] [[{{node XRTExecute}}]] | |
Exception in device=TPU:6: tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:381 : Check failed: session->session()->Run( session_work->feed_inputs, session_work->outputs_handles, &outputs) == ::tensorflow::Status::OK() (Cancelled: From /job:tpu_worker/replica:0/task:0: | |
Cancelled by TearDown. vs. OK) | |
*** Begin stack trace *** | |
tensorflow::CurrentStackTrace[abi:cxx11]() | |
clone | |
*** End stack trace *** | |
Exception in device=TPU:2: tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:381 : Check failed: session->session()->Run( session_work->feed_inputs, session_work->outputs_handles, &outputs) == ::tensorflow::Status::OK() (Cancelled: From /job:tpu_worker/replica:0/task:0: | |
Cancelled by TearDown. vs. OK) | |
*** Begin stack trace *** | |
tensorflow::CurrentStackTrace[abi:cxx11]() | |
clone | |
*** End stack trace *** | |
Exception in device=TPU:3: tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:381 : Check failed: session->session()->Run( session_work->feed_inputs, session_work->outputs_handles, &outputs) == ::tensorflow::Status::OK() (Cancelled: From /job:tpu_worker/replica:0/task:0: | |
Cancelled by TearDown. vs. OK) | |
*** Begin stack trace *** | |
tensorflow::CurrentStackTrace[abi:cxx11]() | |
clone | |
*** End stack trace *** | |
Exception in device=TPU:5: tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:381 : Check failed: session->session()->Run( session_work->feed_inputs, session_work->outputs_handles, &outputs) == ::tensorflow::Status::OK() (Cancelled: From /job:tpu_worker/replica:0/task:0: | |
Cancelled by TearDown. vs. OK) | |
*** Begin stack trace *** | |
tensorflow::CurrentStackTrace[abi:cxx11]() | |
clone | |
*** End stack trace *** | |
Exception in device=TPU:4: tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:381 : Check failed: session->session()->Run( session_work->feed_inputs, session_work->outputs_handles, &outputs) == ::tensorflow::Status::OK() (Cancelled: From /job:tpu_worker/replica:0/task:0: | |
Cancelled by TearDown. vs. OK) | |
*** Begin stack trace *** | |
tensorflow::CurrentStackTrace[abi:cxx11]() | |
clone | |
*** End stack trace *** | |
Traceback (most recent call last): | |
Traceback (most recent call last): | |
File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch_xla/distributed/xla_multiprocessing.py", line 330, in _mp_start_fn | |
_start_fn(index, pf_cfg, fn, args) | |
File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch_xla/distributed/xla_multiprocessing.py", line 324, in _start_fn | |
fn(gindex, *args) | |
File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 1350, in main | |
dlrm_wrap(X, lS_o, lS_i, use_gpu, device) | |
File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 1204, in dlrm_wrap | |
return dlrm(X, lS_o, lS_i) | |
File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch/nn/modules/module.py", line 726, in _call_impl | |
result = self.forward(*input, **kwargs) | |
File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch_xla/distributed/xla_multiprocessing.py", line 330, in _mp_start_fn | |
_start_fn(index, pf_cfg, fn, args) | |
Traceback (most recent call last): | |
File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 478, in forward | |
return self.tpu_parallel_forward(dense_x, lS_o, lS_i) | |
File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch_xla/distributed/xla_multiprocessing.py", line 324, in _start_fn | |
fn(gindex, *args) | |
File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 583, in tpu_parallel_forward | |
ly = self._collect_distribute_embeddings(ly_local) | |
File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 1350, in main | |
dlrm_wrap(X, lS_o, lS_i, use_gpu, device) | |
File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 521, in _collect_distribute_embeddings | |
full_data = full_data[self._non_pad_indices] | |
File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 1204, in dlrm_wrap | |
return dlrm(X, lS_o, lS_i) | |
File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch/nn/modules/module.py", line 726, in _call_impl | |
result = self.forward(*input, **kwargs) | |
Traceback (most recent call last): | |
File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 478, in forward | |
return self.tpu_parallel_forward(dense_x, lS_o, lS_i) | |
File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 583, in tpu_parallel_forward | |
ly = self._collect_distribute_embeddings(ly_local) | |
RuntimeError: tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:381 : Check failed: session->session()->Run( session_work->feed_inputs, session_work->outputs_handles, &outputs) == ::tensorflow::Status::OK() (Cancelled: From /job:tpu_worker/replica:0/task:0: | |
Cancelled by TearDown. vs. OK) | |
*** Begin stack trace *** | |
tensorflow::CurrentStackTrace[abi:cxx11]() | |
clone | |
*** End stack trace *** | |
File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 521, in _collect_distribute_embeddings | |
full_data = full_data[self._non_pad_indices] | |
File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch_xla/distributed/xla_multiprocessing.py", line 330, in _mp_start_fn | |
_start_fn(index, pf_cfg, fn, args) | |
File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch_xla/distributed/xla_multiprocessing.py", line 324, in _start_fn | |
fn(gindex, *args) | |
File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 1350, in main | |
dlrm_wrap(X, lS_o, lS_i, use_gpu, device) | |
File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 1204, in dlrm_wrap | |
return dlrm(X, lS_o, lS_i) | |
File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch/nn/modules/module.py", line 726, in _call_impl | |
result = self.forward(*input, **kwargs) | |
File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 478, in forward | |
return self.tpu_parallel_forward(dense_x, lS_o, lS_i) | |
File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 583, in tpu_parallel_forward | |
ly = self._collect_distribute_embeddings(ly_local) | |
File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 521, in _collect_distribute_embeddings | |
full_data = full_data[self._non_pad_indices] | |
File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch_xla/distributed/xla_multiprocessing.py", line 330, in _mp_start_fn | |
_start_fn(index, pf_cfg, fn, args) | |
RuntimeError: tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:381 : Check failed: session->session()->Run( session_work->feed_inputs, session_work->outputs_handles, &outputs) == ::tensorflow::Status::OK() (Cancelled: From /job:tpu_worker/replica:0/task:0: | |
Cancelled by TearDown. vs. OK) | |
*** Begin stack trace *** | |
tensorflow::CurrentStackTrace[abi:cxx11]() | |
clone | |
*** End stack trace *** | |
File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch_xla/distributed/xla_multiprocessing.py", line 324, in _start_fn | |
fn(gindex, *args) | |
File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 1350, in main | |
dlrm_wrap(X, lS_o, lS_i, use_gpu, device) | |
File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 1204, in dlrm_wrap | |
return dlrm(X, lS_o, lS_i) | |
File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch/nn/modules/module.py", line 726, in _call_impl | |
result = self.forward(*input, **kwargs) | |
File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 478, in forward | |
return self.tpu_parallel_forward(dense_x, lS_o, lS_i) | |
File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 583, in tpu_parallel_forward | |
ly = self._collect_distribute_embeddings(ly_local) | |
File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 521, in _collect_distribute_embeddings | |
full_data = full_data[self._non_pad_indices] | |
RuntimeError: tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:381 : Check failed: session->session()->Run( session_work->feed_inputs, session_work->outputs_handles, &outputs) == ::tensorflow::Status::OK() (Cancelled: From /job:tpu_worker/replica:0/task:0: | |
Cancelled by TearDown. vs. OK) | |
*** Begin stack trace *** | |
tensorflow::CurrentStackTrace[abi:cxx11]() | |
clone | |
*** End stack trace *** | |
RuntimeError: tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:381 : Check failed: session->session()->Run( session_work->feed_inputs, session_work->outputs_handles, &outputs) == ::tensorflow::Status::OK() (Cancelled: From /job:tpu_worker/replica:0/task:0: | |
Cancelled by TearDown. vs. OK) | |
*** Begin stack trace *** | |
tensorflow::CurrentStackTrace[abi:cxx11]() | |
clone | |
*** End stack trace *** | |
Traceback (most recent call last): | |
File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch_xla/distributed/xla_multiprocessing.py", line 330, in _mp_start_fn | |
_start_fn(index, pf_cfg, fn, args) | |
File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch_xla/distributed/xla_multiprocessing.py", line 324, in _start_fn | |
fn(gindex, *args) | |
File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 1350, in main | |
dlrm_wrap(X, lS_o, lS_i, use_gpu, device) | |
File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 1204, in dlrm_wrap | |
return dlrm(X, lS_o, lS_i) | |
File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch/nn/modules/module.py", line 726, in _call_impl | |
result = self.forward(*input, **kwargs) | |
File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 478, in forward | |
return self.tpu_parallel_forward(dense_x, lS_o, lS_i) | |
File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 583, in tpu_parallel_forward | |
ly = self._collect_distribute_embeddings(ly_local) | |
File "/home/taylanbil/dlrm/dlrm_s_pytorch.py", line 521, in _collect_distribute_embeddings | |
full_data = full_data[self._non_pad_indices] | |
RuntimeError: tensorflow/compiler/xla/xla_client/xrt_computation_client.cc:381 : Check failed: session->session()->Run( session_work->feed_inputs, session_work->outputs_handles, &outputs) == ::tensorflow::Status::OK() (Cancelled: From /job:tpu_worker/replica:0/task:0: | |
Cancelled by TearDown. vs. OK) | |
*** Begin stack trace *** | |
tensorflow::CurrentStackTrace[abi:cxx11]() | |
clone | |
*** End stack trace *** | |
Error in atexit._run_exitfuncs: | |
Traceback (most recent call last): | |
File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch_xla/__init__.py", line 57, in _prepare_to_exit | |
Error in atexit._run_exitfuncs: | |
Traceback (most recent call last): | |
File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch_xla/__init__.py", line 57, in _prepare_to_exit | |
Error in atexit._run_exitfuncs: | |
Traceback (most recent call last): | |
File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch_xla/__init__.py", line 57, in _prepare_to_exit | |
_XLAC._prepare_to_exit() | |
RuntimeError: Cancelled: From /job:tpu_worker/replica:0/task:0: | |
Cancelled by TearDown. | |
[[{{node XRTExecute}}]] | |
_XLAC._prepare_to_exit() | |
RuntimeError: Cancelled: From /job:tpu_worker/replica:0/task:0: | |
Cancelled by TearDown. | |
[[{{node XRTExecute}}]] | |
Error in atexit._run_exitfuncs: | |
Traceback (most recent call last): | |
File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch_xla/__init__.py", line 57, in _prepare_to_exit | |
_XLAC._prepare_to_exit() | |
RuntimeError: Cancelled: From /job:tpu_worker/replica:0/task:0: | |
Cancelled by TearDown. | |
[[{{node XRTExecute}}]] | |
_XLAC._prepare_to_exit() | |
RuntimeError: Cancelled: From /job:tpu_worker/replica:0/task:0: | |
Cancelled by TearDown. | |
[[{{node XRTExecute}}]] | |
Traceback (most recent call last): | |
File "/home/taylanbil/dlrm/dlrm_tpu_runner.py", line 15, in <module> | |
xmp.spawn(main, args=(), nprocs=pre_spawn_flags.tpu_cores) | |
File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch_xla/distributed/xla_multiprocessing.py", line 395, in spawn | |
start_method=start_method) | |
File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch/multiprocessing/spawn.py", line 158, in start_processes | |
while not context.join(): | |
File "/anaconda3/envs/torch-xla-nightly/lib/python3.6/site-packages/torch/multiprocessing/spawn.py", line 108, in join | |
(error_index, name) | |
Exception: process 7 terminated with signal SIGSEGV |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment