Created
May 29, 2017 12:25
-
-
Save hughperkins/39ad7e00fdd3a91f1af3fb9cc7a5a744 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(env3) (tensorflow-cl|…10△2) ~/git/tensorflow-cl$ git diff | |
diff --git a/tensorflow/core/common_runtime/gpu/gpu_event_mgr.cc b/tensorflow/core/common_runtime/gpu/gpu_event_mgr.cc | |
index f18ee5e..ba664ea 100644 | |
--- a/tensorflow/core/common_runtime/gpu/gpu_event_mgr.cc | |
+++ b/tensorflow/core/common_runtime/gpu/gpu_event_mgr.cc | |
@@ -18,6 +18,8 @@ limitations under the License. | |
#include "tensorflow/core/platform/stream_executor.h" | |
#include "tensorflow/core/protobuf/config.pb.h" | |
+#include <sstream> | |
+ | |
namespace gpu = ::perftools::gputools; | |
namespace tensorflow { | |
@@ -142,23 +144,45 @@ void EventMgr::PollLoop() { | |
polling_stopped_->Notify(); | |
} | |
+std::string EventMgr::debugIU(const InUse &iu) { | |
+ std::ostringstream ss; | |
+ std::cout << " debugui iu=" << &iu << std::endl; | |
+ std::cout << " debugui origfn=" << iu.funcOrig << std::endl; | |
+ std::cout << " debugui pre=" << iu.pre << std::endl; | |
+ std::cout << " debugui post=" << iu.post << std::endl; | |
+ std::cout << " debugui &iu.func=" << &iu.func << std::endl; | |
+ std::cout << " debugui (char *)&iu.func=" << (char *)&iu.func << std::endl; | |
+ std::cout << " debugui (long *)(char *)&iu.func=" << (long *)(char *)&iu.func << std::endl; | |
+ std::cout << " debugui *(long *)(char *)&iu.func=" << *(long *)(char *)&iu.func << std::endl; | |
+ ss << "iu=" << &iu << " origfn=" << iu.funcOrig << " pre=" << iu.pre << " func=" << *(long*)(char*)(&iu.func) << " post=" << iu.post; | |
+ return ss.str(); | |
+} | |
+ | |
void EventMgr::QueueInUse(gpu::Stream* stream, InUse iu) { | |
VLOG(2) << "QueueInUse free_events_ " << free_events_.size() | |
<< " used_events_ " << used_events_.size(); | |
+ std::cout << "QueueInUse() " << debugIU(iu) << std::endl; | |
+ | |
// Events are created on demand, and repeatedly reused. There is no | |
// limit placed here on the number of allocated Events. | |
if (free_events_.empty()) { | |
+ std::cout << " queueInUse no free events: creating new one" << std::endl; | |
free_events_.push_back(new gpu::Event(exec_)); | |
free_events_.back()->Init(); | |
} | |
gpu::Event* e = free_events_.back(); | |
+ // std::cout << " queueInUse event " << e << std::endl; | |
free_events_.pop_back(); | |
stream->ThenRecordEvent(e); | |
iu.event = e; | |
+ std::cout << " queueInUse event=" << e << " " << debugIU(iu) << std::endl; | |
bool was_empty = used_events_.empty(); | |
used_events_.push_back(iu); | |
+ std::cout << " queueInUse queued iu used_events[used_events.size() - 1] " << debugIU(used_events_[used_events_.size() - 1]) << " used_events_.size() " << used_events_.size() << std::endl; | |
+ //InUse *iuqueued = &used_events_[used_events_.size() - 1]; | |
// Maybe wake up the polling thread | |
if (was_empty) events_pending_.notify_all(); | |
+ std::cout << " queueInUse after notify_all(): used_events_.size() " << used_events_.size() << std::endl; | |
} | |
// This function must be called periodically to check whether pending | |
diff --git a/tensorflow/core/common_runtime/gpu/gpu_event_mgr.h b/tensorflow/core/common_runtime/gpu/gpu_event_mgr.h | |
index b97b5e4..9975f06 100644 | |
--- a/tensorflow/core/common_runtime/gpu/gpu_event_mgr.h | |
+++ b/tensorflow/core/common_runtime/gpu/gpu_event_mgr.h | |
@@ -102,12 +102,16 @@ class EventMgr { | |
perftools::gputools::Event* event; | |
TensorReferenceVector* mem; | |
BufRec bufrec; | |
+ long funcOrig; | |
+ long long pre; | |
const std::function<void()> &func; | |
+ long long post; | |
}; | |
typedef gtl::InlinedVector<InUse, 4> ToFreeVector; | |
void FreeMemory(const ToFreeVector& to_free) { | |
+ std::cout << "core/common_runtime/gpu/gpu_event_mgr.h FreeMemory()" << std::endl; | |
for (const auto& iu : to_free) { | |
if (iu.mem != nullptr) { | |
for (auto& t : *(iu.mem)) { | |
@@ -124,10 +128,13 @@ class EventMgr { | |
iu.bufrec.alloc->DeallocateRaw(iu.bufrec.buf); | |
} | |
// The function must be called in another thread. | |
+ std::cout << debugIU(iu); | |
if (iu.func != nullptr) threadpool_.Schedule(iu.func); | |
} | |
} | |
+ std::string debugIU(const InUse &iu); | |
+ | |
// Stream-enqueue an unused Event and save with it a collection of | |
// Tensors and/or a BufRec to be deleted only after the Event | |
// records. | |
@@ -137,17 +144,17 @@ class EventMgr { | |
void QueueTensors(perftools::gputools::Stream* stream, | |
TensorReferenceVector* tensors) | |
EXCLUSIVE_LOCKS_REQUIRED(mu_) { | |
- QueueInUse(stream, {nullptr, tensors, BufRec(), nullptr}); | |
+ QueueInUse(stream, {nullptr, tensors, BufRec(), 0, 123, nullptr, 123}); | |
} | |
void QueueBuffer(perftools::gputools::Stream* stream, BufRec bufrec) | |
EXCLUSIVE_LOCKS_REQUIRED(mu_) { | |
- QueueInUse(stream, {nullptr, nullptr, bufrec, nullptr}); | |
+ QueueInUse(stream, {nullptr, nullptr, bufrec, 0, 123, nullptr, 123}); | |
} | |
void QueueFunc(perftools::gputools::Stream* stream, | |
const std::function<void()> &func) EXCLUSIVE_LOCKS_REQUIRED(mu_) { | |
- QueueInUse(stream, {nullptr, nullptr, BufRec(), func}); | |
+ QueueInUse(stream, {nullptr, nullptr, BufRec(), *(long *)(char *)&func, 123, func, 123}); | |
} | |
// This function should be called at roughly the same tempo as |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment