June 23, 2022 03:07 · March 31, 2021 16:03 · March 31, 2021 04:58 · March 31, 2021 00:02 · March 10, 2021 03:56 · August 3, 2020 23:24
 diff --git a/fbcode/caffe2/c10/core/DispatchKeySet.h b/fbcode/caffe2/c10/core/DispatchKeySet.h
 --- a/fbcode/caffe2/c10/core/DispatchKeySet.h
 +++ b/fbcode/caffe2/c10/core/DispatchKeySet.h
 @@ -82,6 +82,10 @@
   DispatchKeySet operator-(DispatchKeySet other) const {
     return DispatchKeySet(repr_ & ~other.repr_);
   }
 +  // Compute self ^ other
 +  DispatchKeySet operator^(DispatchKeySet other) const {
 +    return DispatchKeySet(repr_ ^ other.repr_);
 diff --git a/fbcode/caffe2/c10/core/DispatchKeySet.h b/fbcode/caffe2/c10/core/DispatchKeySet.h
 --- a/fbcode/caffe2/c10/core/DispatchKeySet.h
 +++ b/fbcode/caffe2/c10/core/DispatchKeySet.h
 @@ -82,6 +82,10 @@
   DispatchKeySet operator-(DispatchKeySet other) const {
     return DispatchKeySet(repr_ & ~other.repr_);
   }
 +  // Compute self ^ other
 +  DispatchKeySet operator^(DispatchKeySet other) const {
 +    return DispatchKeySet(repr_ ^ other.repr_);
 diff --git a/fbcode/caffe2/aten/src/ATen/core/dispatch/DispatchKeyExtractor.h b/fbcode/caffe2/aten/src/ATen/core/dispatch/DispatchKeyExtractor.h
 --- a/fbcode/caffe2/aten/src/ATen/core/dispatch/DispatchKeyExtractor.h
 +++ b/fbcode/caffe2/aten/src/ATen/core/dispatch/DispatchKeyExtractor.h
 @@ -49,7 +49,8 @@
   // it's a bit troublesome, because fastpath TLS access requires the type of
   // the TLS in question to be zero-initialized, so you don't actually win
   // anyting in that case.
 -  return (((ks | local.included_ | always_included) - local.excluded_) & key_mask);
 +  // For the addtional XOR op, see note [TLS Initialization]
 +  return (((ks | (local.included_ ^ c10::InplaceOrView_keyset) | always_included) - local.excluded_) & key_mask);
 #include "torch/csrc/autograd/VariableTypeUtils.h"

 #include <torch/library.h>

 #include "torch/csrc/autograd/function.h"

 #include <ATen/RedispatchFunctions.h>
 #include "ATen/quantized/Quantizer.h"

 // @generated from tools/autograd/templates/InplaceOrViewType.cpp
 # No difference in VariableType
 
 λ ~/pytorch033 cat diff_TypeDefault
 --- TypeDefault_before  2020-08-03 16:23:14.155651681 -0700
 +++ TypeDefault_after   2020-08-03 16:17:08.590947589 -0700
 @@ -3904,6 +3904,8 @@
   m.impl("conv_transpose3d.input",
          torch::CppFunction::makeUnboxedOnly(&TypeDefault::conv_transpose3d_input));
   m.def("copy_(Tensor(a!) self, Tensor src, bool non_blocking=False) -> Tensor(a!)");
 +  m.impl("copy_",
 --- before42031/VariableTypeEverything.cpp	2020-07-31 13:16:02.450902000 -0700
 +++ after42031/VariableTypeEverything.cpp	2020-07-31 13:06:18.799446000 -0700
 @@ -46,22 +46,6 @@
 // Later when we merge the mobile op registration the anonymous namespace
 // will be restored.
 // namespace {
 -Tensor __and___Scalar(const Tensor & self, Scalar other) {
 -  auto result = TypeDefault::__and___Scalar(self, other);
 -  return result;
 -}

 0.Program arguments: /usr/lib/llvm-9/bin/clang -cc1 -triple x86_64-pc-linux-gnu -emit-obj -disable-free -disable-llvm-verifier -discard-value-names -main-file-name SobolEngineOps.cpp -mrelocation-model pic -pic-level 2 -mthread-mod
 el posix -fno-trapping-math -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -coverage-notes-file /var/lib/jenkins/pytorch/bui
 ld/caffe2/CMakeFiles/torch_cpu.dir/__/aten/src/ATen/native/SobolEngineOps.cpp.gcno -resource-dir /usr/lib/llvm-9/lib/clang/9.0.0 -isystem /var/lib/jenkins/pytorch/build/third_party/gloo -isystem /var/lib/jenkins/pytorch/cmake/../th
 ird_party/gloo -isystem /var/lib/jenkins/pytorch/cmake/../third_party/googletest/googlemock/include -isystem /var/lib/jenkins/pytorch/cmake/../third_party/googletest/googletest/include -isystem /var/lib/jenkins/pytorch/third_party/
 protobuf/src -isystem /opt/conda/include -isystem /var/lib/jenkins/pytorch/third_party/gemmlowp

 0.Program arguments: /usr/lib/llvm-9/bin/clang -cc1 -triple x86_64-pc-linux-gnu -emit-obj -disable-free -disable-llvm-verifier -discard-value-names -main-file-name SobolEngineOps.cpp -mrelocation-model pic -pic-level 2 -mthread-mod
 el posix -fno-trapping-math -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -coverage-notes-file /var/lib/jenkins/pytorch/bui
 ld/caffe2/CMakeFiles/torch_cpu.dir/__/aten/src/ATen/native/SobolEngineOps.cpp.gcno -resource-dir /usr/lib/llvm-9/lib/clang/9.0.0 -isystem /var/lib/jenkins/pytorch/build/third_party/gloo -isystem /var/lib/jenkins/pytorch/cmake/../th
 ird_party/gloo -isystem /var/lib/jenkins/pytorch/cmake/../third_party/googletest/googlemock/include -isystem /var/lib/jenkins/pytorch/cmake/../third_party/googletest/googletest/include -isystem /var/lib/jenkins/pytorch/third_party/
 protobuf/src -isystem /opt/conda/include -isystem /var/lib/jenkins/pytorch/third_party/gemmlowp
	diff --git a/fbcode/caffe2/c10/core/DispatchKeySet.h b/fbcode/caffe2/c10/core/DispatchKeySet.h
	--- a/fbcode/caffe2/c10/core/DispatchKeySet.h
	+++ b/fbcode/caffe2/c10/core/DispatchKeySet.h
	@@ -82,6 +82,10 @@
	DispatchKeySet operator-(DispatchKeySet other) const {
	return DispatchKeySet(repr_ & ~other.repr_);
	}
	+ // Compute self ^ other
	+ DispatchKeySet operator^(DispatchKeySet other) const {
	+ return DispatchKeySet(repr_ ^ other.repr_);
	diff --git a/fbcode/caffe2/aten/src/ATen/core/dispatch/DispatchKeyExtractor.h b/fbcode/caffe2/aten/src/ATen/core/dispatch/DispatchKeyExtractor.h
	--- a/fbcode/caffe2/aten/src/ATen/core/dispatch/DispatchKeyExtractor.h
	+++ b/fbcode/caffe2/aten/src/ATen/core/dispatch/DispatchKeyExtractor.h
	@@ -49,7 +49,8 @@
	// it's a bit troublesome, because fastpath TLS access requires the type of
	// the TLS in question to be zero-initialized, so you don't actually win
	// anyting in that case.
	- return (((ks \| local.included_ \| always_included) - local.excluded_) & key_mask);
	+ // For the addtional XOR op, see note [TLS Initialization]
	+ return (((ks \| (local.included_ ^ c10::InplaceOrView_keyset) \| always_included) - local.excluded_) & key_mask);
	#include "torch/csrc/autograd/VariableTypeUtils.h"

	#include <torch/library.h>

	#include "torch/csrc/autograd/function.h"

	#include <ATen/RedispatchFunctions.h>
	#include "ATen/quantized/Quantizer.h"

	// @generated from tools/autograd/templates/InplaceOrViewType.cpp
	# No difference in VariableType

	λ ~/pytorch033 cat diff_TypeDefault
	--- TypeDefault_before 2020-08-03 16:23:14.155651681 -0700
	+++ TypeDefault_after 2020-08-03 16:17:08.590947589 -0700
	@@ -3904,6 +3904,8 @@
	m.impl("conv_transpose3d.input",
	torch::CppFunction::makeUnboxedOnly(&TypeDefault::conv_transpose3d_input));
	m.def("copy_(Tensor(a!) self, Tensor src, bool non_blocking=False) -> Tensor(a!)");
	+ m.impl("copy_",
	--- before42031/VariableTypeEverything.cpp 2020-07-31 13:16:02.450902000 -0700
	+++ after42031/VariableTypeEverything.cpp 2020-07-31 13:06:18.799446000 -0700
	@@ -46,22 +46,6 @@
	// Later when we merge the mobile op registration the anonymous namespace
	// will be restored.
	// namespace {
	-Tensor __and___Scalar(const Tensor & self, Scalar other) {
	- auto result = TypeDefault::__and___Scalar(self, other);
	- return result;
	-}
	jit_premul_bias + master	jit_premul_bias + Reduce time per guard
fuser=none executor=simple	13.53	12.79
fuser=none executor=profiling	19.52	13.88
fuser=te executor=profiling	13.44	12.14

	0.Program arguments: /usr/lib/llvm-9/bin/clang -cc1 -triple x86_64-pc-linux-gnu -emit-obj -disable-free -disable-llvm-verifier -discard-value-names -main-file-name SobolEngineOps.cpp -mrelocation-model pic -pic-level 2 -mthread-mod
	el posix -fno-trapping-math -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -coverage-notes-file /var/lib/jenkins/pytorch/bui
	ld/caffe2/CMakeFiles/torch_cpu.dir/__/aten/src/ATen/native/SobolEngineOps.cpp.gcno -resource-dir /usr/lib/llvm-9/lib/clang/9.0.0 -isystem /var/lib/jenkins/pytorch/build/third_party/gloo -isystem /var/lib/jenkins/pytorch/cmake/../th
	ird_party/gloo -isystem /var/lib/jenkins/pytorch/cmake/../third_party/googletest/googlemock/include -isystem /var/lib/jenkins/pytorch/cmake/../third_party/googletest/googletest/include -isystem /var/lib/jenkins/pytorch/third_party/
	protobuf/src -isystem /opt/conda/include -isystem /var/lib/jenkins/pytorch/third_party/gemmlowp