Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save hughperkins/39ad7e00fdd3a91f1af3fb9cc7a5a744 to your computer and use it in GitHub Desktop.
Save hughperkins/39ad7e00fdd3a91f1af3fb9cc7a5a744 to your computer and use it in GitHub Desktop.
(env3) (tensorflow-cl|…10△2) ~/git/tensorflow-cl$ git diff
diff --git a/tensorflow/core/common_runtime/gpu/gpu_event_mgr.cc b/tensorflow/core/common_runtime/gpu/gpu_event_mgr.cc
index f18ee5e..ba664ea 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_event_mgr.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_event_mgr.cc
@@ -18,6 +18,8 @@ limitations under the License.
#include "tensorflow/core/platform/stream_executor.h"
#include "tensorflow/core/protobuf/config.pb.h"
+#include <sstream>
+
namespace gpu = ::perftools::gputools;
namespace tensorflow {
@@ -142,23 +144,45 @@ void EventMgr::PollLoop() {
polling_stopped_->Notify();
}
+std::string EventMgr::debugIU(const InUse &iu) {
+ std::ostringstream ss;
+ std::cout << " debugui iu=" << &iu << std::endl;
+ std::cout << " debugui origfn=" << iu.funcOrig << std::endl;
+ std::cout << " debugui pre=" << iu.pre << std::endl;
+ std::cout << " debugui post=" << iu.post << std::endl;
+ std::cout << " debugui &iu.func=" << &iu.func << std::endl;
+ std::cout << " debugui (char *)&iu.func=" << (char *)&iu.func << std::endl;
+ std::cout << " debugui (long *)(char *)&iu.func=" << (long *)(char *)&iu.func << std::endl;
+ std::cout << " debugui *(long *)(char *)&iu.func=" << *(long *)(char *)&iu.func << std::endl;
+ ss << "iu=" << &iu << " origfn=" << iu.funcOrig << " pre=" << iu.pre << " func=" << *(long*)(char*)(&iu.func) << " post=" << iu.post;
+ return ss.str();
+}
+
void EventMgr::QueueInUse(gpu::Stream* stream, InUse iu) {
VLOG(2) << "QueueInUse free_events_ " << free_events_.size()
<< " used_events_ " << used_events_.size();
+ std::cout << "QueueInUse() " << debugIU(iu) << std::endl;
+
// Events are created on demand, and repeatedly reused. There is no
// limit placed here on the number of allocated Events.
if (free_events_.empty()) {
+ std::cout << " queueInUse no free events: creating new one" << std::endl;
free_events_.push_back(new gpu::Event(exec_));
free_events_.back()->Init();
}
gpu::Event* e = free_events_.back();
+ // std::cout << " queueInUse event " << e << std::endl;
free_events_.pop_back();
stream->ThenRecordEvent(e);
iu.event = e;
+ std::cout << " queueInUse event=" << e << " " << debugIU(iu) << std::endl;
bool was_empty = used_events_.empty();
used_events_.push_back(iu);
+ std::cout << " queueInUse queued iu used_events[used_events.size() - 1] " << debugIU(used_events_[used_events_.size() - 1]) << " used_events_.size() " << used_events_.size() << std::endl;
+ //InUse *iuqueued = &used_events_[used_events_.size() - 1];
// Maybe wake up the polling thread
if (was_empty) events_pending_.notify_all();
+ std::cout << " queueInUse after notify_all(): used_events_.size() " << used_events_.size() << std::endl;
}
// This function must be called periodically to check whether pending
diff --git a/tensorflow/core/common_runtime/gpu/gpu_event_mgr.h b/tensorflow/core/common_runtime/gpu/gpu_event_mgr.h
index b97b5e4..9975f06 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_event_mgr.h
+++ b/tensorflow/core/common_runtime/gpu/gpu_event_mgr.h
@@ -102,12 +102,16 @@ class EventMgr {
perftools::gputools::Event* event;
TensorReferenceVector* mem;
BufRec bufrec;
+ long funcOrig;
+ long long pre;
const std::function<void()> &func;
+ long long post;
};
typedef gtl::InlinedVector<InUse, 4> ToFreeVector;
void FreeMemory(const ToFreeVector& to_free) {
+ std::cout << "core/common_runtime/gpu/gpu_event_mgr.h FreeMemory()" << std::endl;
for (const auto& iu : to_free) {
if (iu.mem != nullptr) {
for (auto& t : *(iu.mem)) {
@@ -124,10 +128,13 @@ class EventMgr {
iu.bufrec.alloc->DeallocateRaw(iu.bufrec.buf);
}
// The function must be called in another thread.
+ std::cout << debugIU(iu);
if (iu.func != nullptr) threadpool_.Schedule(iu.func);
}
}
+ std::string debugIU(const InUse &iu);
+
// Stream-enqueue an unused Event and save with it a collection of
// Tensors and/or a BufRec to be deleted only after the Event
// records.
@@ -137,17 +144,17 @@ class EventMgr {
void QueueTensors(perftools::gputools::Stream* stream,
TensorReferenceVector* tensors)
EXCLUSIVE_LOCKS_REQUIRED(mu_) {
- QueueInUse(stream, {nullptr, tensors, BufRec(), nullptr});
+ QueueInUse(stream, {nullptr, tensors, BufRec(), 0, 123, nullptr, 123});
}
void QueueBuffer(perftools::gputools::Stream* stream, BufRec bufrec)
EXCLUSIVE_LOCKS_REQUIRED(mu_) {
- QueueInUse(stream, {nullptr, nullptr, bufrec, nullptr});
+ QueueInUse(stream, {nullptr, nullptr, bufrec, 0, 123, nullptr, 123});
}
void QueueFunc(perftools::gputools::Stream* stream,
const std::function<void()> &func) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
- QueueInUse(stream, {nullptr, nullptr, BufRec(), func});
+ QueueInUse(stream, {nullptr, nullptr, BufRec(), *(long *)(char *)&func, 123, func, 123});
}
// This function should be called at roughly the same tempo as
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment