Use some c10::ThreadLocal to avoid crashes on old Android toolchains (p…

…ytorch#59017) Summary: Pull Request resolved: pytorch#59017 See the comment in ThreadLocal.h for context. I used a slightly dirty preprocessor hack to minimize the number of changes. The hope is that we'll be able to revert all of these soon. Test Plan: CI. Built FB4A with gnustl and saw no references to cxa_thread_atexit in the PyTorch libraries. Reviewed By: ilia-cher Differential Revision: D28720762 fbshipit-source-id: 0f13c7ac5a108b95f8fde6dbc63c6b8bdb8599de
zhangbo9674 · May 28, 2021 · 9c83e41 · 9c83e41
1 parent 4b3d17c
commit 9c83e41
Show file tree

Hide file tree

Showing 3 changed files with 11 additions and 6 deletions.
diff --git a/c10/util/ThreadLocalDebugInfo.cpp b/c10/util/ThreadLocalDebugInfo.cpp
@@ -1,11 +1,11 @@
+#include <c10/util/ThreadLocal.h>
 #include <c10/util/ThreadLocalDebugInfo.h>
 
 namespace c10 {
 
-namespace {
 // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
-thread_local std::shared_ptr<ThreadLocalDebugInfo> debug_info = nullptr;
-} // namespace
+C10_DEFINE_TLS_static(std::shared_ptr<ThreadLocalDebugInfo>, tls_debug_info);
+#define debug_info (tls_debug_info.get())
 
 /* static */
 DebugInfoBase* ThreadLocalDebugInfo::get(DebugInfoKind kind) {

diff --git a/torch/csrc/autograd/engine.cpp b/torch/csrc/autograd/engine.cpp
@@ -16,6 +16,7 @@
 #include <c10/core/Event.h>
 #include <c10/core/DeviceGuard.h>
 #include <c10/util/Optional.h>
+#include <c10/util/ThreadLocal.h>
 #include <c10/core/StreamGuard.h>
 
 #include <atomic>
@@ -86,7 +87,8 @@ static thread_local int total_depth = 0;
 // The current GraphTask being executed by this thread. This helps
 // queue_callback() to find the target GraphTask to append final callbacks.
 // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
-static thread_local std::shared_ptr<GraphTask> current_graph_task = nullptr;
+C10_DEFINE_TLS_static(std::shared_ptr<GraphTask>, tls_current_graph_task);
+#define current_graph_task (tls_current_graph_task.get())
 
 // Every autograd worker thread is associated with a ready queue, which specifies
 // the stream of work of this thread to do. This shared_ptr is a thread_local
@@ -103,7 +105,8 @@ static thread_local std::shared_ptr<GraphTask> current_graph_task = nullptr;
 // ReadyQueue with the parent thread for performance improvement.
 // see Note [Reentrant backwards] for more details.
 // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
-static thread_local std::shared_ptr<ReadyQueue> local_ready_queue = nullptr;
+C10_DEFINE_TLS_static(std::shared_ptr<ReadyQueue>, tls_local_ready_queue);
+#define local_ready_queue (tls_local_ready_queue.get())
 
 // Note [Reentrant backwards]
 // ~~~~~~~~~~~~~~~~~~~~~~~~~~

diff --git a/torch/csrc/autograd/function.cpp b/torch/csrc/autograd/function.cpp
@@ -1,5 +1,6 @@
 #include <torch/csrc/autograd/function.h>
 
+#include <c10/util/ThreadLocal.h>
 #include <torch/csrc/autograd/engine.h>
 #include <torch/csrc/autograd/variable.h>
 
@@ -19,7 +20,8 @@ namespace torch { namespace autograd {
 // parent of new nodes created during the evaluation of this node in anomaly
 // mode.
 // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
-static thread_local std::shared_ptr<Node> current_evaluating_node = nullptr;
+C10_DEFINE_TLS_static(std::shared_ptr<Node>, tls_current_evaluating_node);
+#define current_evaluating_node (tls_current_evaluating_node.get())
 
 NodeGuard::NodeGuard(std::shared_ptr<Node> node) {
   last_evaluating_node_ = std::move(current_evaluating_node);