Skip to content

Instantly share code, notes, and snippets.

@albanD
Last active October 16, 2019 19:27
Show Gist options
  • Save albanD/3990b979f2e470d79ac9f1b96293e0bc to your computer and use it in GitHub Desktop.
Save albanD/3990b979f2e470d79ac9f1b96293e0bc to your computer and use it in GitHub Desktop.
Autodiff linear debugging

Debugging code

std::cout << "Forwarding into jit module" << std::endl;
std::cout << "Forward code:" << std::endl;
std::cout << *grad.f.get() << std::endl;
std::cout << "Backward code:" << std::endl;
std::cout << *grad.df.get() << std::endl;
std::cout << "End print !" << std::endl;

Printing during the evaluation:

Forwarding into jit module
Forward code:
graph(%0 : Double(*, *),
      %1 : Double(*),
      %2 : Double(*, *),
      %3 : Double(*, *),
      %4 : Double(*)):
  %5 : int = prim::Constant[value=1]() # <string>:193:92
  %6 : int = prim::Constant[value=2]() # <string>:384:35
  %7 : Double(*, *) = aten::t(%3)
  %8 : Double(*, *) = aten::mm(%2, %7) # <string>:8:25
  %9 : Double(*, *) = aten::add(%4, %8, %5) # <string>:8:18
  %x.2 : Tensor = aten::linear(%9, %0, %1), scope: VAE/Linear[fc4] # /Users/albandes/workspace/pytorch_dev/torch/nn/functional.py:1415:0
  %x : Tensor = aten::pow(%x.2, %6), scope: VAE # foo.py:16:0
  return (%x, %9, %6, %x.2, %5)

Backward code:
graph(%0 : Tensor,
      %1 : Tensor,
      %2 : Tensor,
      %3 : Double(*, *),
      %4 : Double(*),
      %5 : Double(*, *),
      %6 : Double(*, *),
      %7 : Double(*),
      %input : Tensor,
      %9 : int,
      %x.1 : Tensor,
      %11 : int):
  %12 : None = prim::Constant() # <string>:380:32
  %13 : int = prim::Constant[value=0]() # <string>:349:24
  %14 : bool = prim::Constant[value=1]() # <string>:349:16
  %15 : int = prim::Constant[value=-2]() # <string>:340:23
  %16 : int = prim::Constant[value=-1]() # <string>:339:23
  %17 : int = prim::Constant[value=1]() # <string>:333:23
  %18 : float = prim::Constant[value=0]() # <string>:190:39
  %19 : int = prim::Constant[value=2]() # <string>:384:35
  %grad_self : Tensor = prim::GradOf[name="aten::pow"](%0)
    block0():
      %21 : float = prim::Float(%19) # <string>:190:20
      %22 : bool = aten::eq(%21, %18) # <string>:190:20
      %grad_self.3 : Tensor = prim::If(%22) # <string>:190:17
        block0():
          %grad_self.4 : Tensor = aten::zeros_like(%x.1) # <string>:191:33
          -> (%grad_self.4)
        block1():
          %25 : Tensor = aten::mul(%0, %9) # <string>:193:33
          %26 : float = prim::Float(%9) # <string>:193:74
          %27 : float = aten::sub(%26, %11) # <string>:193:74
          %28 : Tensor = aten::pow(%x.1, %27) # <string>:193:58
          %grad_self.2 : Tensor = aten::mul(%25, %28) # <string>:193:33
          -> (%grad_self.2)
      -> (%grad_self.3)
  %30 : Tensor = prim::AutogradAdd(%2, %grad_self)
  %grad_input.3 : Tensor, %grad_weight.3 : Tensor, %grad_bias.3 : Tensor? = prim::GradOf[name="aten::linear"](%30)
    block0():
      %34 : bool = aten::__isnot__(%4, %12) # <string>:380:20
      %grad_bias.2 : Tensor? = prim::If(%34) # <string>:380:17
        block0():
          %bias.6 : Tensor = prim::unchecked_unwrap_optional(%4)
          %37 : int[] = aten::size(%bias.6) # <string>:381:63
          %grad_bias.4 : Tensor = aten::_grad_sum_to_size(%30, %37) # <string>:381:33
          -> (%grad_bias.4)
        block1():
          -> (%12)
      %39 : int = aten::dim(%input) # <string>:384:20
      %40 : bool = aten::eq(%39, %19) # <string>:384:20
      %grad_input.2 : Tensor, %grad_weight.2 : Tensor = prim::If(%40) # <string>:384:17
        block0():
          %grad_input.4 : Tensor = aten::mm(%30, %3) # <string>:385:34
          %44 : Tensor = aten::t(%30) # <string>:386:35
          %grad_weight.5 : Tensor = aten::mm(%44, %input) # <string>:386:35
          -> (%grad_input.4, %grad_weight.5)
        block1():
          %46 : Tensor = aten::t(%3) # <string>:388:86
          %self_size.2 : int[] = aten::size(%input) # <string>:366:25
          %other_size.2 : int[] = aten::size(%46) # <string>:367:26
          %dim.3 : int = aten::dim(%46) # <string>:332:19
          %50 : bool = aten::eq(%dim.3, %17) # <string>:333:16
          %out.20 : Tensor = prim::If(%50) # <string>:333:13
            block0():
              -> (%46)
            block1():
              %52 : bool = aten::eq(%dim.3, %19) # <string>:335:18
              %out.21 : Tensor = prim::If(%52) # <string>:335:18
                block0():
                  %out.22 : Tensor = aten::t(%46) # <string>:336:23
                  -> (%out.22)
                block1():
                  %dims.3 : int[] = prim::rangelist(%dim.3) # <string>:338:24
                  %56 : int = aten::sub(%dim.3, %19) # <string>:339:28
                  %57 : int[] = aten::_set_item(%dims.3, %16, %56) # <string>:339:17
                  %58 : int = aten::sub(%dim.3, %17) # <string>:340:28
                  %59 : int[] = aten::_set_item(%dims.3, %15, %58) # <string>:340:17
                  %out.29 : Tensor = aten::permute(%46, %dims.3) # <string>:341:23
                  -> (%out.29)
              -> (%out.21)
          %dim1.3 : int = aten::dim(%30) # <string>:346:20
          %dim2.3 : int = aten::dim(%out.20) # <string>:347:20
          %dim_out.3 : int = aten::len(%self_size.2) # <string>:348:23
          %64 : bool = aten::eq(%dim1.3, %13) # <string>:349:16
          %65 : bool = prim::If(%64) # <string>:349:16
            block0():
              -> (%14)
            block1():
              %66 : bool = aten::eq(%dim2.3, %13) # <string>:349:29
              -> (%66)
          %out.30 : Tensor = prim::If(%65) # <string>:349:13
            block0():
              %out.31 : Tensor = aten::mul(%30, %out.20) # <string>:350:23
              -> (%out.31)
            block1():
              %69 : int = aten::add(%dim1.3, %dim2.3) # <string>:351:18
              %70 : bool = aten::eq(%69, %dim_out.3) # <string>:351:18
              %out.32 : Tensor = prim::If(%70) # <string>:351:18
                block0():
                  %72 : bool = aten::eq(%dim2.3, %17) # <string>:352:20
                  %target_dim2.2 : int = prim::If(%72) # <string>:352:17
                    block0():
                      -> (%13)
                    block1():
                      -> (%15)
                  %74 : Tensor = aten::unsqueeze(%30, %dim1.3) # <string>:356:36
                  %75 : Tensor = aten::unsqueeze(%out.20, %target_dim2.2) # <string>:356:58
                  %out.33 : Tensor = aten::matmul(%74, %75) # <string>:356:23
                  -> (%out.33)
                block1():
                  %77 : int = aten::sub(%dim1.3, %dim2.3) # <string>:357:29
                  %78 : bool = aten::eq(%dim_out.3, %77) # <string>:357:18
                  %out.34 : Tensor = prim::If(%78) # <string>:357:18
                    block0():
                      %80 : Tensor = aten::unsqueeze(%out.20, %dim2.3) # <string>:358:42
                      %81 : Tensor = aten::matmul(%30, %80) # <string>:358:23
                      %out.35 : Tensor = aten::squeeze(%81, %16) # <string>:358:23
                      -> (%out.35)
                    block1():
                      %83 : int = aten::sub(%dim2.3, %dim1.3) # <string>:359:29
                      %84 : bool = aten::eq(%dim_out.3, %83) # <string>:359:18
                      %out.36 : Tensor = prim::If(%84) # <string>:359:18
                        block0():
                          %86 : Tensor = aten::unsqueeze(%30, %15) # <string>:360:36
                          %87 : Tensor = aten::matmul(%86, %out.20) # <string>:360:23
                          %out.37 : Tensor = aten::squeeze(%87, %15) # <string>:360:23
                          -> (%out.37)
                        block1():
                          %out.38 : Tensor = aten::matmul(%30, %out.20) # <string>:362:23
                          -> (%out.38)
                      -> (%out.36)
                  -> (%out.34)
              -> (%out.32)
          %grad_self.5 : Tensor = aten::_grad_sum_to_size(%out.30, %self_size.2) # <string>:368:25
          %dim.4 : int = aten::dim(%input) # <string>:332:19
          %92 : bool = aten::eq(%dim.4, %17) # <string>:333:16
          %out.39 : Tensor = prim::If(%92) # <string>:333:13
            block0():
              -> (%input)
            block1():
              %94 : bool = aten::eq(%dim.4, %19) # <string>:335:18
              %out.40 : Tensor = prim::If(%94) # <string>:335:18
                block0():
                  %out.41 : Tensor = aten::t(%input) # <string>:336:23
                  -> (%out.41)
                block1():
                  %dims.4 : int[] = prim::rangelist(%dim.4) # <string>:338:24
                  %98 : int = aten::sub(%dim.4, %19) # <string>:339:28
                  %99 : int[] = aten::_set_item(%dims.4, %16, %98) # <string>:339:17
                  %100 : int = aten::sub(%dim.4, %17) # <string>:340:28
                  %101 : int[] = aten::_set_item(%dims.4, %15, %100) # <string>:340:17
                  %out.42 : Tensor = aten::permute(%input, %dims.4) # <string>:341:23
                  -> (%out.42)
              -> (%out.40)
          %dim1.4 : int = aten::dim(%out.39) # <string>:346:20
          %dim2.4 : int = aten::dim(%30) # <string>:347:20
          %dim_out.4 : int = aten::len(%other_size.2) # <string>:348:23
          %106 : bool = aten::eq(%dim1.4, %13) # <string>:349:16
          %107 : bool = prim::If(%106) # <string>:349:16
            block0():
              -> (%14)
            block1():
              %108 : bool = aten::eq(%dim2.4, %13) # <string>:349:29
              -> (%108)
          %out.43 : Tensor = prim::If(%107) # <string>:349:13
            block0():
              %out.44 : Tensor = aten::mul(%out.39, %30) # <string>:350:23
              -> (%out.44)
            block1():
              %111 : int = aten::add(%dim1.4, %dim2.4) # <string>:351:18
              %112 : bool = aten::eq(%111, %dim_out.4) # <string>:351:18
              %out.45 : Tensor = prim::If(%112) # <string>:351:18
                block0():
                  %114 : bool = aten::eq(%dim2.4, %17) # <string>:352:20
                  %target_dim2.3 : int = prim::If(%114) # <string>:352:17
                    block0():
                      -> (%13)
                    block1():
                      -> (%15)
                  %116 : Tensor = aten::unsqueeze(%out.39, %dim1.4) # <string>:356:36
                  %117 : Tensor = aten::unsqueeze(%30, %target_dim2.3) # <string>:356:58
                  %out.46 : Tensor = aten::matmul(%116, %117) # <string>:356:23
                  -> (%out.46)
                block1():
                  %119 : int = aten::sub(%dim1.4, %dim2.4) # <string>:357:29
                  %120 : bool = aten::eq(%dim_out.4, %119) # <string>:357:18
                  %out.47 : Tensor = prim::If(%120) # <string>:357:18
                    block0():
                      %122 : Tensor = aten::unsqueeze(%30, %dim2.4) # <string>:358:42
                      %123 : Tensor = aten::matmul(%out.39, %122) # <string>:358:23
                      %out.48 : Tensor = aten::squeeze(%123, %16) # <string>:358:23
                      -> (%out.48)
                    block1():
                      %125 : int = aten::sub(%dim2.4, %dim1.4) # <string>:359:29
                      %126 : bool = aten::eq(%dim_out.4, %125) # <string>:359:18
                      %out.49 : Tensor = prim::If(%126) # <string>:359:18
                        block0():
                          %128 : Tensor = aten::unsqueeze(%out.39, %15) # <string>:360:36
                          %129 : Tensor = aten::matmul(%128, %30) # <string>:360:23
                          %out.50 : Tensor = aten::squeeze(%129, %15) # <string>:360:23
                          -> (%out.50)
                        block1():
                          %out.51 : Tensor = aten::matmul(%out.39, %30) # <string>:362:23
                          -> (%out.51)
                      -> (%out.49)
                  -> (%out.47)
              -> (%out.45)
          %grad_other.2 : Tensor = aten::_grad_sum_to_size(%out.43, %other_size.2) # <string>:369:26
          %grad_weight.6 : Tensor = aten::t(%grad_other.2) # <string>:389:35
          -> (%grad_self.5, %grad_weight.6)
      -> (%grad_input.2, %grad_weight.2, %grad_bias.2)
  %134 : Tensor = prim::AutogradAdd(%1, %grad_input.3)
  %grad_weight : Tensor, %grad_bias : Tensor? = prim::GradOf[name="aten::linear"](%134)
    block0():
      %137 : bool = aten::__isnot__(%7, %12) # <string>:380:20
      %grad_bias.5 : Tensor? = prim::If(%137) # <string>:380:17
        block0():
          %bias.5 : Tensor = prim::unchecked_unwrap_optional(%7)
          %140 : int[] = aten::size(%bias.5) # <string>:381:63
          %grad_bias.1 : Tensor = aten::_grad_sum_to_size(%134, %140) # <string>:381:33
          -> (%grad_bias.1)
        block1():
          -> (%12)
      %142 : int = aten::dim(%5) # <string>:384:20
      %143 : bool = aten::eq(%142, %19) # <string>:384:20
      %grad_weight.7 : Tensor = prim::If(%143) # <string>:384:17
        block0():
          %145 : Tensor = aten::t(%134) # <string>:386:35
          %grad_weight.1 : Tensor = aten::mm(%145, %5) # <string>:386:35
          -> (%grad_weight.1)
        block1():
          %147 : Tensor = aten::t(%6) # <string>:388:86
          %other_size.1 : int[] = aten::size(%147) # <string>:367:26
          %dim.2 : int = aten::dim(%147) # <string>:332:19
          %150 : bool = aten::eq(%dim.2, %17) # <string>:333:16
           = prim::If(%150) # <string>:333:13
            block0():
              -> ()
            block1():
              %151 : bool = aten::eq(%dim.2, %19) # <string>:335:18
               = prim::If(%151) # <string>:335:18
                block0():
                  -> ()
                block1():
                  %dims.2 : int[] = prim::rangelist(%dim.2) # <string>:338:24
                  %153 : int = aten::sub(%dim.2, %19) # <string>:339:28
                  %154 : int[] = aten::_set_item(%dims.2, %16, %153) # <string>:339:17
                  %155 : int = aten::sub(%dim.2, %17) # <string>:340:28
                  %156 : int[] = aten::_set_item(%dims.2, %15, %155) # <string>:340:17
                  -> ()
              -> ()
          %dim.1 : int = aten::dim(%5) # <string>:332:19
          %158 : bool = aten::eq(%dim.1, %17) # <string>:333:16
          %out.14 : Tensor = prim::If(%158) # <string>:333:13
            block0():
              -> (%5)
            block1():
              %160 : bool = aten::eq(%dim.1, %19) # <string>:335:18
              %out.12 : Tensor = prim::If(%160) # <string>:335:18
                block0():
                  %out.15 : Tensor = aten::t(%5) # <string>:336:23
                  -> (%out.15)
                block1():
                  %dims.1 : int[] = prim::rangelist(%dim.1) # <string>:338:24
                  %164 : int = aten::sub(%dim.1, %19) # <string>:339:28
                  %165 : int[] = aten::_set_item(%dims.1, %16, %164) # <string>:339:17
                  %166 : int = aten::sub(%dim.1, %17) # <string>:340:28
                  %167 : int[] = aten::_set_item(%dims.1, %15, %166) # <string>:340:17
                  %out.16 : Tensor = aten::permute(%5, %dims.1) # <string>:341:23
                  -> (%out.16)
              -> (%out.12)
          %dim1.1 : int = aten::dim(%out.14) # <string>:346:20
          %dim2.1 : int = aten::dim(%134) # <string>:347:20
          %dim_out.1 : int = aten::len(%other_size.1) # <string>:348:23
          %172 : bool = aten::eq(%dim1.1, %13) # <string>:349:16
          %173 : bool = prim::If(%172) # <string>:349:16
            block0():
              -> (%14)
            block1():
              %174 : bool = aten::eq(%dim2.1, %13) # <string>:349:29
              -> (%174)
          %out : Tensor = prim::If(%173) # <string>:349:13
            block0():
              %out.1 : Tensor = aten::mul(%out.14, %134) # <string>:350:23
              -> (%out.1)
            block1():
              %177 : int = aten::add(%dim1.1, %dim2.1) # <string>:351:18
              %178 : bool = aten::eq(%177, %dim_out.1) # <string>:351:18
              %out.25 : Tensor = prim::If(%178) # <string>:351:18
                block0():
                  %180 : bool = aten::eq(%dim2.1, %17) # <string>:352:20
                  %target_dim2 : int = prim::If(%180) # <string>:352:17
                    block0():
                      -> (%13)
                    block1():
                      -> (%15)
                  %182 : Tensor = aten::unsqueeze(%out.14, %dim1.1) # <string>:356:36
                  %183 : Tensor = aten::unsqueeze(%134, %target_dim2) # <string>:356:58
                  %out.2 : Tensor = aten::matmul(%182, %183) # <string>:356:23
                  -> (%out.2)
                block1():
                  %185 : int = aten::sub(%dim1.1, %dim2.1) # <string>:357:29
                  %186 : bool = aten::eq(%dim_out.1, %185) # <string>:357:18
                  %out.24 : Tensor = prim::If(%186) # <string>:357:18
                    block0():
                      %188 : Tensor = aten::unsqueeze(%134, %dim2.1) # <string>:358:42
                      %189 : Tensor = aten::matmul(%out.14, %188) # <string>:358:23
                      %out.3 : Tensor = aten::squeeze(%189, %16) # <string>:358:23
                      -> (%out.3)
                    block1():
                      %191 : int = aten::sub(%dim2.1, %dim1.1) # <string>:359:29
                      %192 : bool = aten::eq(%dim_out.1, %191) # <string>:359:18
                      %out.23 : Tensor = prim::If(%192) # <string>:359:18
                        block0():
                          %194 : Tensor = aten::unsqueeze(%out.14, %15) # <string>:360:36
                          %195 : Tensor = aten::matmul(%194, %134) # <string>:360:23
                          %out.4 : Tensor = aten::squeeze(%195, %15) # <string>:360:23
                          -> (%out.4)
                        block1():
                          %out.5 : Tensor = aten::matmul(%out.14, %134) # <string>:362:23
                          -> (%out.5)
                      -> (%out.23)
                  -> (%out.24)
              -> (%out.25)
          %grad_other.1 : Tensor = aten::_grad_sum_to_size(%out, %other_size.1) # <string>:369:26
          %grad_weight.4 : Tensor = aten::t(%grad_other.1) # <string>:389:35
          -> (%grad_weight.4)
      -> (%grad_weight.7, %grad_bias.5)
  return (%grad_weight.3, %grad_bias.3, %grad_weight, %grad_bias)

End print !

Printing during the backward pass

Forwarding into jit module
Forward code:
graph(%0 : int[],
      %1 : Double(*, *),
      %2 : int[],
      %3 : Double(*, *),
      %4 : Double(*, *),
      %5 : Double(*, *),
      %6 : float,
      %7 : Double(*, *),
      %8 : int):
  %9 : None = prim::Constant() # <string>:274:28
  %10 : int = prim::Constant[value=1]() # <string>:193:92
  %11 : Double(*, *) = aten::mul(%7, %8) # <string>:193:33
  %12 : Double(*, *) = aten::pow(%5, %6) # <string>:193:58
  %grad_self.13 : Double(*, *) = aten::mul(%11, %12) # <string>:193:33
  %14 : int[] = aten::size(%12) # <string>:2:94
  %15 : int[] = aten::size(%grad_self.13) # <string>:2:105
  %16 : int[]? = aten::_size_if_not_equal(%14, %15) # <string>:2:69
  %grad_input.7 : Double(*, *) = aten::mm(%grad_self.13, %4) # <string>:385:34
  %18 : Double(*, *) = aten::t(%grad_input.7) # <string>:247:20
  %grad_weight.1 : Double(*, *) = aten::mm(%18, %3) # <string>:386:35
  %self_size.3 : int[] = aten::size(%grad_input.7) # <string>:275:29
  %result.3 : Tensor = aten::_grad_sum_to_size(%grad_input.7, %2) # <string>:279:22
  %22 : Double(*, *) = aten::t(%grad_self.13) # <string>:247:20
  %grad_weight.5 : Double(*, *) = aten::mm(%22, %1) # <string>:386:35
  %self_size.2 : int[] = aten::size(%grad_self.13) # <string>:275:29
  %result.2 : Tensor = aten::_grad_sum_to_size(%grad_self.13, %0) # <string>:279:22
  return (%result.2, %grad_weight.5, %result.3, %grad_weight.1, %11, %10, %grad_self.13, %16, %grad_input.7, %9, %self_size.3, %22, %self_size.2)

Backward code:
graph(%0 : Tensor,
      %1 : Double(*, *),
      %2 : Tensor,
      %3 : Double(*, *),
      %4 : Double(*, *),
      %5 : Double(*, *),
      %6 : int[],
      %7 : Double(*, *),
      %8 : int[],
      %9 : Double(*, *),
      %10 : Double(*, *),
      %11 : Double(*, *),
      %12 : float,
      %13 : Double(*, *),
      %14 : int,
      %grad_self.11 : Double(*, *),
      %16 : int[]?,
      %grad_input.4 : Double(*, *),
      %18 : None,
      %self_size.20 : int[]?,
      %20 : Double(*, *),
      %self_size.18 : int[]?):
  %22 : None = prim::Constant() # <string>:281:33
  %23 : bool = prim::Constant[value=0]()
  %24 : float = prim::Constant[value=0]() # <string>:190:39
  %grad_input.3 : Tensor = prim::GradOf[name="aten::_grad_sum_to_size"](%0)
    block0():
      %26 : bool = aten::__is__(%self_size.18, %22) # <string>:281:20
      %grad_input.2 : Tensor = prim::If(%26) # <string>:281:17
        block0():
          -> (%0)
        block1():
          %self_size.12 : int[] = prim::unchecked_unwrap_optional(%self_size.18)
          %grad_input.5 : Tensor = aten::expand(%0, %self_size.12, %23) # <string>:284:34
          -> (%grad_input.5)
      -> (%grad_input.2)
  %grad_self.4 : Tensor, %grad_mat2.3 : Tensor = prim::GradOf[name="aten::mm"](%1)
    block0():
      %32 : Tensor = aten::t(%7) # <string>:192:28
      %grad_self.3 : Tensor = aten::mm(%1, %32) # <string>:192:20
      %34 : Tensor = aten::t(%20) # <string>:195:20
      %grad_mat2.2 : Tensor = aten::mm(%34, %1) # <string>:195:20
      -> (%grad_self.3, %grad_mat2.2)
  %36 : Tensor = prim::GradOf[name="aten::t"](%grad_self.4)
    block0():
      %37 : Tensor = aten::t(%grad_self.4) # <string>:245:24
      -> (%37)
  %38 : Tensor = prim::AutogradAdd(%grad_input.3, %36)
  %grad_input : Tensor = prim::GradOf[name="aten::_grad_sum_to_size"](%2)
    block0():
      %40 : bool = aten::__is__(%self_size.20, %22) # <string>:281:20
      %grad_input.6 : Tensor = prim::If(%40) # <string>:281:17
        block0():
          -> (%2)
        block1():
          %self_size.11 : int[] = prim::unchecked_unwrap_optional(%self_size.20)
          %grad_input.1 : Tensor = aten::expand(%2, %self_size.11, %23) # <string>:284:34
          -> (%grad_input.1)
      -> (%grad_input.6)
  %grad_self.6 : Tensor = prim::GradOf[name="aten::mm"](%3)
    block0():
      %45 : Tensor = aten::t(%9) # <string>:192:28
      %grad_self.5 : Tensor = aten::mm(%3, %45) # <string>:192:20
      -> (%grad_self.5)
  %47 : Tensor = prim::GradOf[name="aten::t"](%grad_self.6)
    block0():
      %48 : Tensor = aten::t(%grad_self.6) # <string>:245:24
      -> (%48)
  %49 : Tensor = prim::AutogradAdd(%grad_input, %47)
  %50 : Tensor = prim::AutogradAdd(%5, %49)
  %grad_self.8 : Tensor, %grad_mat2.1 : Tensor = prim::GradOf[name="aten::mm"](%50)
    block0():
      %53 : Tensor = aten::t(%10) # <string>:192:28
      %grad_self.7 : Tensor = aten::mm(%50, %53) # <string>:192:20
      %55 : Tensor = aten::t(%grad_self.11) # <string>:195:20
      %grad_mat2.6 : Tensor = aten::mm(%55, %50) # <string>:195:20
      -> (%grad_self.7, %grad_mat2.6)
  %57 : Tensor = prim::AutogradAdd(%38, %grad_self.8)
  %58 : Tensor = prim::AutogradAdd(%4, %57)
  %grad_other.1 : Tensor = prim::GradOf[name="aten::mul"](%58)
    block0():
      %60 : Tensor = aten::mul(%58, %13) # <string>:12:31
      %grad_other.2 : Tensor = aten::_grad_sum_to_size(%60, %16) # <string>:12:31
      -> (%grad_other.2)
  %grad_self : Tensor = prim::GradOf[name="aten::pow"](%grad_other.1)
    block0():
      %63 : float = prim::Float(%12) # <string>:190:20
      %64 : bool = aten::eq(%63, %24) # <string>:190:20
      %grad_self.12 : Tensor = prim::If(%64) # <string>:190:17
        block0():
          %grad_self.1 : Tensor = aten::zeros_like(%11) # <string>:191:33
          -> (%grad_self.1)
        block1():
          %67 : Tensor = aten::mul(%grad_other.1, %12) # <string>:193:33
          %68 : float = prim::Float(%12) # <string>:193:74
          %69 : float = aten::sub(%68, %14) # <string>:193:74
          %70 : Tensor = aten::pow(%11, %69) # <string>:193:58
          %grad_self.2 : Tensor = aten::mul(%67, %70) # <string>:193:33
          -> (%grad_self.2)
      -> (%grad_self.12)
  return (%grad_mat2.3, %grad_mat2.1, %grad_self)

End print !

Full print with python side printing as well:

Tracing
foo.py:15: TracerWarning: Converting a tensor to a Python integer might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  print(x.size(), x.grad_fn, x.requires_grad)
torch.Size([1, 4]) <AddmmBackward object at 0x1210ba690> True
torch.Size([1, 4]) None False
torch.Size([1, 4]) None False
Tracing done
Forwarding into jit module
Forward code:
graph(%0 : Double(*, *),
      %1 : Double(*),
      %2 : Double(*, *),
      %3 : Double(*, *),
      %4 : Double(*)):
  %5 : int = prim::Constant[value=1]() # <string>:193:92
  %6 : int = prim::Constant[value=2]() # <string>:384:35
  %7 : Double(*, *) = aten::t(%3)
  %8 : Double(*, *) = aten::mm(%2, %7) # <string>:8:25
  %9 : Double(*, *) = aten::add(%4, %8, %5) # <string>:8:18
  %x.2 : Tensor = aten::linear(%9, %0, %1), scope: VAE/Linear[fc4] # /Users/albandes/workspace/pytorch_dev/torch/nn/functional.py:1415:0
  %x : Tensor = aten::pow(%x.2, %6), scope: VAE # foo.py:16:0
  return (%x, %9, %6, %x.2, %5)

Backward code:
graph(%0 : Tensor,
      %1 : Tensor,
      %2 : Tensor,
      %3 : Double(*, *),
      %4 : Double(*),
      %5 : Double(*, *),
      %6 : Double(*, *),
      %7 : Double(*),
      %input : Tensor,
      %9 : int,
      %x.1 : Tensor,
      %11 : int):
  %12 : None = prim::Constant() # <string>:380:32
  %13 : int = prim::Constant[value=0]() # <string>:349:24
  %14 : bool = prim::Constant[value=1]() # <string>:349:16
  %15 : int = prim::Constant[value=-2]() # <string>:340:23
  %16 : int = prim::Constant[value=-1]() # <string>:339:23
  %17 : int = prim::Constant[value=1]() # <string>:333:23
  %18 : float = prim::Constant[value=0]() # <string>:190:39
  %19 : int = prim::Constant[value=2]() # <string>:384:35
  %grad_self : Tensor = prim::GradOf[name="aten::pow"](%0)
    block0():
      %21 : float = prim::Float(%19) # <string>:190:20
      %22 : bool = aten::eq(%21, %18) # <string>:190:20
      %grad_self.3 : Tensor = prim::If(%22) # <string>:190:17
        block0():
          %grad_self.4 : Tensor = aten::zeros_like(%x.1) # <string>:191:33
          -> (%grad_self.4)
        block1():
          %25 : Tensor = aten::mul(%0, %9) # <string>:193:33
          %26 : float = prim::Float(%9) # <string>:193:74
          %27 : float = aten::sub(%26, %11) # <string>:193:74
          %28 : Tensor = aten::pow(%x.1, %27) # <string>:193:58
          %grad_self.2 : Tensor = aten::mul(%25, %28) # <string>:193:33
          -> (%grad_self.2)
      -> (%grad_self.3)
  %30 : Tensor = prim::AutogradAdd(%2, %grad_self)
  %grad_input.3 : Tensor, %grad_weight.3 : Tensor, %grad_bias.3 : Tensor? = prim::GradOf[name="aten::linear"](%30)
    block0():
      %34 : bool = aten::__isnot__(%4, %12) # <string>:380:20
      %grad_bias.2 : Tensor? = prim::If(%34) # <string>:380:17
        block0():
          %bias.6 : Tensor = prim::unchecked_unwrap_optional(%4)
          %37 : int[] = aten::size(%bias.6) # <string>:381:63
          %grad_bias.4 : Tensor = aten::_grad_sum_to_size(%30, %37) # <string>:381:33
          -> (%grad_bias.4)
        block1():
          -> (%12)
      %39 : int = aten::dim(%input) # <string>:384:20
      %40 : bool = aten::eq(%39, %19) # <string>:384:20
      %grad_input.2 : Tensor, %grad_weight.2 : Tensor = prim::If(%40) # <string>:384:17
        block0():
          %grad_input.4 : Tensor = aten::mm(%30, %3) # <string>:385:34
          %44 : Tensor = aten::t(%30) # <string>:386:35
          %grad_weight.5 : Tensor = aten::mm(%44, %input) # <string>:386:35
          -> (%grad_input.4, %grad_weight.5)
        block1():
          %46 : Tensor = aten::t(%3) # <string>:388:86
          %self_size.2 : int[] = aten::size(%input) # <string>:366:25
          %other_size.2 : int[] = aten::size(%46) # <string>:367:26
          %dim.3 : int = aten::dim(%46) # <string>:332:19
          %50 : bool = aten::eq(%dim.3, %17) # <string>:333:16
          %out.20 : Tensor = prim::If(%50) # <string>:333:13
            block0():
              -> (%46)
            block1():
              %52 : bool = aten::eq(%dim.3, %19) # <string>:335:18
              %out.21 : Tensor = prim::If(%52) # <string>:335:18
                block0():
                  %out.22 : Tensor = aten::t(%46) # <string>:336:23
                  -> (%out.22)
                block1():
                  %dims.3 : int[] = prim::rangelist(%dim.3) # <string>:338:24
                  %56 : int = aten::sub(%dim.3, %19) # <string>:339:28
                  %57 : int[] = aten::_set_item(%dims.3, %16, %56) # <string>:339:17
                  %58 : int = aten::sub(%dim.3, %17) # <string>:340:28
                  %59 : int[] = aten::_set_item(%dims.3, %15, %58) # <string>:340:17
                  %out.29 : Tensor = aten::permute(%46, %dims.3) # <string>:341:23
                  -> (%out.29)
              -> (%out.21)
          %dim1.3 : int = aten::dim(%30) # <string>:346:20
          %dim2.3 : int = aten::dim(%out.20) # <string>:347:20
          %dim_out.3 : int = aten::len(%self_size.2) # <string>:348:23
          %64 : bool = aten::eq(%dim1.3, %13) # <string>:349:16
          %65 : bool = prim::If(%64) # <string>:349:16
            block0():
              -> (%14)
            block1():
              %66 : bool = aten::eq(%dim2.3, %13) # <string>:349:29
              -> (%66)
          %out.30 : Tensor = prim::If(%65) # <string>:349:13
            block0():
              %out.31 : Tensor = aten::mul(%30, %out.20) # <string>:350:23
              -> (%out.31)
            block1():
              %69 : int = aten::add(%dim1.3, %dim2.3) # <string>:351:18
              %70 : bool = aten::eq(%69, %dim_out.3) # <string>:351:18
              %out.32 : Tensor = prim::If(%70) # <string>:351:18
                block0():
                  %72 : bool = aten::eq(%dim2.3, %17) # <string>:352:20
                  %target_dim2.2 : int = prim::If(%72) # <string>:352:17
                    block0():
                      -> (%13)
                    block1():
                      -> (%15)
                  %74 : Tensor = aten::unsqueeze(%30, %dim1.3) # <string>:356:36
                  %75 : Tensor = aten::unsqueeze(%out.20, %target_dim2.2) # <string>:356:58
                  %out.33 : Tensor = aten::matmul(%74, %75) # <string>:356:23
                  -> (%out.33)
                block1():
                  %77 : int = aten::sub(%dim1.3, %dim2.3) # <string>:357:29
                  %78 : bool = aten::eq(%dim_out.3, %77) # <string>:357:18
                  %out.34 : Tensor = prim::If(%78) # <string>:357:18
                    block0():
                      %80 : Tensor = aten::unsqueeze(%out.20, %dim2.3) # <string>:358:42
                      %81 : Tensor = aten::matmul(%30, %80) # <string>:358:23
                      %out.35 : Tensor = aten::squeeze(%81, %16) # <string>:358:23
                      -> (%out.35)
                    block1():
                      %83 : int = aten::sub(%dim2.3, %dim1.3) # <string>:359:29
                      %84 : bool = aten::eq(%dim_out.3, %83) # <string>:359:18
                      %out.36 : Tensor = prim::If(%84) # <string>:359:18
                        block0():
                          %86 : Tensor = aten::unsqueeze(%30, %15) # <string>:360:36
                          %87 : Tensor = aten::matmul(%86, %out.20) # <string>:360:23
                          %out.37 : Tensor = aten::squeeze(%87, %15) # <string>:360:23
                          -> (%out.37)
                        block1():
                          %out.38 : Tensor = aten::matmul(%30, %out.20) # <string>:362:23
                          -> (%out.38)
                      -> (%out.36)
                  -> (%out.34)
              -> (%out.32)
          %grad_self.5 : Tensor = aten::_grad_sum_to_size(%out.30, %self_size.2) # <string>:368:25
          %dim.4 : int = aten::dim(%input) # <string>:332:19
          %92 : bool = aten::eq(%dim.4, %17) # <string>:333:16
          %out.39 : Tensor = prim::If(%92) # <string>:333:13
            block0():
              -> (%input)
            block1():
              %94 : bool = aten::eq(%dim.4, %19) # <string>:335:18
              %out.40 : Tensor = prim::If(%94) # <string>:335:18
                block0():
                  %out.41 : Tensor = aten::t(%input) # <string>:336:23
                  -> (%out.41)
                block1():
                  %dims.4 : int[] = prim::rangelist(%dim.4) # <string>:338:24
                  %98 : int = aten::sub(%dim.4, %19) # <string>:339:28
                  %99 : int[] = aten::_set_item(%dims.4, %16, %98) # <string>:339:17
                  %100 : int = aten::sub(%dim.4, %17) # <string>:340:28
                  %101 : int[] = aten::_set_item(%dims.4, %15, %100) # <string>:340:17
                  %out.42 : Tensor = aten::permute(%input, %dims.4) # <string>:341:23
                  -> (%out.42)
              -> (%out.40)
          %dim1.4 : int = aten::dim(%out.39) # <string>:346:20
          %dim2.4 : int = aten::dim(%30) # <string>:347:20
          %dim_out.4 : int = aten::len(%other_size.2) # <string>:348:23
          %106 : bool = aten::eq(%dim1.4, %13) # <string>:349:16
          %107 : bool = prim::If(%106) # <string>:349:16
            block0():
              -> (%14)
            block1():
              %108 : bool = aten::eq(%dim2.4, %13) # <string>:349:29
              -> (%108)
          %out.43 : Tensor = prim::If(%107) # <string>:349:13
            block0():
              %out.44 : Tensor = aten::mul(%out.39, %30) # <string>:350:23
              -> (%out.44)
            block1():
              %111 : int = aten::add(%dim1.4, %dim2.4) # <string>:351:18
              %112 : bool = aten::eq(%111, %dim_out.4) # <string>:351:18
              %out.45 : Tensor = prim::If(%112) # <string>:351:18
                block0():
                  %114 : bool = aten::eq(%dim2.4, %17) # <string>:352:20
                  %target_dim2.3 : int = prim::If(%114) # <string>:352:17
                    block0():
                      -> (%13)
                    block1():
                      -> (%15)
                  %116 : Tensor = aten::unsqueeze(%out.39, %dim1.4) # <string>:356:36
                  %117 : Tensor = aten::unsqueeze(%30, %target_dim2.3) # <string>:356:58
                  %out.46 : Tensor = aten::matmul(%116, %117) # <string>:356:23
                  -> (%out.46)
                block1():
                  %119 : int = aten::sub(%dim1.4, %dim2.4) # <string>:357:29
                  %120 : bool = aten::eq(%dim_out.4, %119) # <string>:357:18
                  %out.47 : Tensor = prim::If(%120) # <string>:357:18
                    block0():
                      %122 : Tensor = aten::unsqueeze(%30, %dim2.4) # <string>:358:42
                      %123 : Tensor = aten::matmul(%out.39, %122) # <string>:358:23
                      %out.48 : Tensor = aten::squeeze(%123, %16) # <string>:358:23
                      -> (%out.48)
                    block1():
                      %125 : int = aten::sub(%dim2.4, %dim1.4) # <string>:359:29
                      %126 : bool = aten::eq(%dim_out.4, %125) # <string>:359:18
                      %out.49 : Tensor = prim::If(%126) # <string>:359:18
                        block0():
                          %128 : Tensor = aten::unsqueeze(%out.39, %15) # <string>:360:36
                          %129 : Tensor = aten::matmul(%128, %30) # <string>:360:23
                          %out.50 : Tensor = aten::squeeze(%129, %15) # <string>:360:23
                          -> (%out.50)
                        block1():
                          %out.51 : Tensor = aten::matmul(%out.39, %30) # <string>:362:23
                          -> (%out.51)
                      -> (%out.49)
                  -> (%out.47)
              -> (%out.45)
          %grad_other.2 : Tensor = aten::_grad_sum_to_size(%out.43, %other_size.2) # <string>:369:26
          %grad_weight.6 : Tensor = aten::t(%grad_other.2) # <string>:389:35
          -> (%grad_self.5, %grad_weight.6)
      -> (%grad_input.2, %grad_weight.2, %grad_bias.2)
  %134 : Tensor = prim::AutogradAdd(%1, %grad_input.3)
  %grad_weight : Tensor, %grad_bias : Tensor? = prim::GradOf[name="aten::linear"](%134)
    block0():
      %137 : bool = aten::__isnot__(%7, %12) # <string>:380:20
      %grad_bias.5 : Tensor? = prim::If(%137) # <string>:380:17
        block0():
          %bias.5 : Tensor = prim::unchecked_unwrap_optional(%7)
          %140 : int[] = aten::size(%bias.5) # <string>:381:63
          %grad_bias.1 : Tensor = aten::_grad_sum_to_size(%134, %140) # <string>:381:33
          -> (%grad_bias.1)
        block1():
          -> (%12)
      %142 : int = aten::dim(%5) # <string>:384:20
      %143 : bool = aten::eq(%142, %19) # <string>:384:20
      %grad_weight.7 : Tensor = prim::If(%143) # <string>:384:17
        block0():
          %145 : Tensor = aten::t(%134) # <string>:386:35
          %grad_weight.1 : Tensor = aten::mm(%145, %5) # <string>:386:35
          -> (%grad_weight.1)
        block1():
          %147 : Tensor = aten::t(%6) # <string>:388:86
          %other_size.1 : int[] = aten::size(%147) # <string>:367:26
          %dim.2 : int = aten::dim(%147) # <string>:332:19
          %150 : bool = aten::eq(%dim.2, %17) # <string>:333:16
           = prim::If(%150) # <string>:333:13
            block0():
              -> ()
            block1():
              %151 : bool = aten::eq(%dim.2, %19) # <string>:335:18
               = prim::If(%151) # <string>:335:18
                block0():
                  -> ()
                block1():
                  %dims.2 : int[] = prim::rangelist(%dim.2) # <string>:338:24
                  %153 : int = aten::sub(%dim.2, %19) # <string>:339:28
                  %154 : int[] = aten::_set_item(%dims.2, %16, %153) # <string>:339:17
                  %155 : int = aten::sub(%dim.2, %17) # <string>:340:28
                  %156 : int[] = aten::_set_item(%dims.2, %15, %155) # <string>:340:17
                  -> ()
              -> ()
          %dim.1 : int = aten::dim(%5) # <string>:332:19
          %158 : bool = aten::eq(%dim.1, %17) # <string>:333:16
          %out.14 : Tensor = prim::If(%158) # <string>:333:13
            block0():
              -> (%5)
            block1():
              %160 : bool = aten::eq(%dim.1, %19) # <string>:335:18
              %out.12 : Tensor = prim::If(%160) # <string>:335:18
                block0():
                  %out.15 : Tensor = aten::t(%5) # <string>:336:23
                  -> (%out.15)
                block1():
                  %dims.1 : int[] = prim::rangelist(%dim.1) # <string>:338:24
                  %164 : int = aten::sub(%dim.1, %19) # <string>:339:28
                  %165 : int[] = aten::_set_item(%dims.1, %16, %164) # <string>:339:17
                  %166 : int = aten::sub(%dim.1, %17) # <string>:340:28
                  %167 : int[] = aten::_set_item(%dims.1, %15, %166) # <string>:340:17
                  %out.16 : Tensor = aten::permute(%5, %dims.1) # <string>:341:23
                  -> (%out.16)
              -> (%out.12)
          %dim1.1 : int = aten::dim(%out.14) # <string>:346:20
          %dim2.1 : int = aten::dim(%134) # <string>:347:20
          %dim_out.1 : int = aten::len(%other_size.1) # <string>:348:23
          %172 : bool = aten::eq(%dim1.1, %13) # <string>:349:16
          %173 : bool = prim::If(%172) # <string>:349:16
            block0():
              -> (%14)
            block1():
              %174 : bool = aten::eq(%dim2.1, %13) # <string>:349:29
              -> (%174)
          %out : Tensor = prim::If(%173) # <string>:349:13
            block0():
              %out.1 : Tensor = aten::mul(%out.14, %134) # <string>:350:23
              -> (%out.1)
            block1():
              %177 : int = aten::add(%dim1.1, %dim2.1) # <string>:351:18
              %178 : bool = aten::eq(%177, %dim_out.1) # <string>:351:18
              %out.25 : Tensor = prim::If(%178) # <string>:351:18
                block0():
                  %180 : bool = aten::eq(%dim2.1, %17) # <string>:352:20
                  %target_dim2 : int = prim::If(%180) # <string>:352:17
                    block0():
                      -> (%13)
                    block1():
                      -> (%15)
                  %182 : Tensor = aten::unsqueeze(%out.14, %dim1.1) # <string>:356:36
                  %183 : Tensor = aten::unsqueeze(%134, %target_dim2) # <string>:356:58
                  %out.2 : Tensor = aten::matmul(%182, %183) # <string>:356:23
                  -> (%out.2)
                block1():
                  %185 : int = aten::sub(%dim1.1, %dim2.1) # <string>:357:29
                  %186 : bool = aten::eq(%dim_out.1, %185) # <string>:357:18
                  %out.24 : Tensor = prim::If(%186) # <string>:357:18
                    block0():
                      %188 : Tensor = aten::unsqueeze(%134, %dim2.1) # <string>:358:42
                      %189 : Tensor = aten::matmul(%out.14, %188) # <string>:358:23
                      %out.3 : Tensor = aten::squeeze(%189, %16) # <string>:358:23
                      -> (%out.3)
                    block1():
                      %191 : int = aten::sub(%dim2.1, %dim1.1) # <string>:359:29
                      %192 : bool = aten::eq(%dim_out.1, %191) # <string>:359:18
                      %out.23 : Tensor = prim::If(%192) # <string>:359:18
                        block0():
                          %194 : Tensor = aten::unsqueeze(%out.14, %15) # <string>:360:36
                          %195 : Tensor = aten::matmul(%194, %134) # <string>:360:23
                          %out.4 : Tensor = aten::squeeze(%195, %15) # <string>:360:23
                          -> (%out.4)
                        block1():
                          %out.5 : Tensor = aten::matmul(%out.14, %134) # <string>:362:23
                          -> (%out.5)
                      -> (%out.23)
                  -> (%out.24)
              -> (%out.25)
          %grad_other.1 : Tensor = aten::_grad_sum_to_size(%out, %other_size.1) # <string>:369:26
          %grad_weight.4 : Tensor = aten::t(%grad_other.1) # <string>:389:35
          -> (%grad_weight.4)
      -> (%grad_weight.7, %grad_bias.5)
  return (%grad_weight.3, %grad_bias.3, %grad_weight, %grad_bias)

End print !
Saving [4, 3]||0
Saving [4]||0
Saving [1, 2]||0
Saving [3, 2]||0
Saving [3]||0
Saving [1, 3]||1
Saving [1, 4]||1
def forward(self,
    input: Tensor) -> Tensor:
  _0 = self.fc1
  weight = _0.weight
  bias = _0.bias
  _1 = self.fc4
  weight0 = _1.weight
  bias0 = _1.bias
  input0 = torch.linear(input, weight, bias)
  x = torch.linear(input0, weight0, bias0)
  x0 = torch.pow(x, 2)
  return torch.sum(x0)

Unpacking tensor...[4, 3]
done
Unpacking tensor...[4]
done
Unpacking tensor...[1, 2]
done
Unpacking tensor...[3, 2]
done
Unpacking tensor...[3]
done
Unpacking tensor...[1, 3]
done
Unpacking tensor...[1, 4]
done
Forwarding into jit module
Forward code:
graph(%0 : int[],
      %1 : Double(*, *),
      %2 : int[],
      %3 : Double(*, *),
      %4 : Double(*, *),
      %5 : Double(*, *),
      %6 : float,
      %7 : Double(*, *),
      %8 : int):
  %9 : None = prim::Constant() # <string>:274:28
  %10 : int = prim::Constant[value=1]() # <string>:193:92
  %11 : Double(*, *) = aten::mul(%7, %8) # <string>:193:33
  %12 : Double(*, *) = aten::pow(%5, %6) # <string>:193:58
  %grad_self.13 : Double(*, *) = aten::mul(%11, %12) # <string>:193:33
  %14 : int[] = aten::size(%12) # <string>:2:94
  %15 : int[] = aten::size(%grad_self.13) # <string>:2:105
  %16 : int[]? = aten::_size_if_not_equal(%14, %15) # <string>:2:69
  %grad_input.7 : Double(*, *) = aten::mm(%grad_self.13, %4) # <string>:385:34
  %18 : Double(*, *) = aten::t(%grad_input.7) # <string>:247:20
  %grad_weight.1 : Double(*, *) = aten::mm(%18, %3) # <string>:386:35
  %self_size.3 : int[] = aten::size(%grad_input.7) # <string>:275:29
  %result.3 : Tensor = aten::_grad_sum_to_size(%grad_input.7, %2) # <string>:279:22
  %22 : Double(*, *) = aten::t(%grad_self.13) # <string>:247:20
  %grad_weight.5 : Double(*, *) = aten::mm(%22, %1) # <string>:386:35
  %self_size.2 : int[] = aten::size(%grad_self.13) # <string>:275:29
  %result.2 : Tensor = aten::_grad_sum_to_size(%grad_self.13, %0) # <string>:279:22
  return (%result.2, %grad_weight.5, %result.3, %grad_weight.1, %11, %10, %grad_self.13, %16, %grad_input.7, %9, %self_size.3, %22, %self_size.2)

Backward code:
graph(%0 : Tensor,
      %1 : Double(*, *),
      %2 : Tensor,
      %3 : Double(*, *),
      %4 : Double(*, *),
      %5 : Double(*, *),
      %6 : int[],
      %7 : Double(*, *),
      %8 : int[],
      %9 : Double(*, *),
      %10 : Double(*, *),
      %11 : Double(*, *),
      %12 : float,
      %13 : Double(*, *),
      %14 : int,
      %grad_self.11 : Double(*, *),
      %16 : int[]?,
      %grad_input.4 : Double(*, *),
      %18 : None,
      %self_size.20 : int[]?,
      %20 : Double(*, *),
      %self_size.18 : int[]?):
  %22 : None = prim::Constant() # <string>:281:33
  %23 : bool = prim::Constant[value=0]()
  %24 : float = prim::Constant[value=0]() # <string>:190:39
  %grad_input.3 : Tensor = prim::GradOf[name="aten::_grad_sum_to_size"](%0)
    block0():
      %26 : bool = aten::__is__(%self_size.18, %22) # <string>:281:20
      %grad_input.2 : Tensor = prim::If(%26) # <string>:281:17
        block0():
          -> (%0)
        block1():
          %self_size.12 : int[] = prim::unchecked_unwrap_optional(%self_size.18)
          %grad_input.5 : Tensor = aten::expand(%0, %self_size.12, %23) # <string>:284:34
          -> (%grad_input.5)
      -> (%grad_input.2)
  %grad_self.4 : Tensor, %grad_mat2.3 : Tensor = prim::GradOf[name="aten::mm"](%1)
    block0():
      %32 : Tensor = aten::t(%7) # <string>:192:28
      %grad_self.3 : Tensor = aten::mm(%1, %32) # <string>:192:20
      %34 : Tensor = aten::t(%20) # <string>:195:20
      %grad_mat2.2 : Tensor = aten::mm(%34, %1) # <string>:195:20
      -> (%grad_self.3, %grad_mat2.2)
  %36 : Tensor = prim::GradOf[name="aten::t"](%grad_self.4)
    block0():
      %37 : Tensor = aten::t(%grad_self.4) # <string>:245:24
      -> (%37)
  %38 : Tensor = prim::AutogradAdd(%grad_input.3, %36)
  %grad_input : Tensor = prim::GradOf[name="aten::_grad_sum_to_size"](%2)
    block0():
      %40 : bool = aten::__is__(%self_size.20, %22) # <string>:281:20
      %grad_input.6 : Tensor = prim::If(%40) # <string>:281:17
        block0():
          -> (%2)
        block1():
          %self_size.11 : int[] = prim::unchecked_unwrap_optional(%self_size.20)
          %grad_input.1 : Tensor = aten::expand(%2, %self_size.11, %23) # <string>:284:34
          -> (%grad_input.1)
      -> (%grad_input.6)
  %grad_self.6 : Tensor = prim::GradOf[name="aten::mm"](%3)
    block0():
      %45 : Tensor = aten::t(%9) # <string>:192:28
      %grad_self.5 : Tensor = aten::mm(%3, %45) # <string>:192:20
      -> (%grad_self.5)
  %47 : Tensor = prim::GradOf[name="aten::t"](%grad_self.6)
    block0():
      %48 : Tensor = aten::t(%grad_self.6) # <string>:245:24
      -> (%48)
  %49 : Tensor = prim::AutogradAdd(%grad_input, %47)
  %50 : Tensor = prim::AutogradAdd(%5, %49)
  %grad_self.8 : Tensor, %grad_mat2.1 : Tensor = prim::GradOf[name="aten::mm"](%50)
    block0():
      %53 : Tensor = aten::t(%10) # <string>:192:28
      %grad_self.7 : Tensor = aten::mm(%50, %53) # <string>:192:20
      %55 : Tensor = aten::t(%grad_self.11) # <string>:195:20
      %grad_mat2.6 : Tensor = aten::mm(%55, %50) # <string>:195:20
      -> (%grad_self.7, %grad_mat2.6)
  %57 : Tensor = prim::AutogradAdd(%38, %grad_self.8)
  %58 : Tensor = prim::AutogradAdd(%4, %57)
  %grad_other.1 : Tensor = prim::GradOf[name="aten::mul"](%58)
    block0():
      %60 : Tensor = aten::mul(%58, %13) # <string>:12:31
      %grad_other.2 : Tensor = aten::_grad_sum_to_size(%60, %16) # <string>:12:31
      -> (%grad_other.2)
  %grad_self : Tensor = prim::GradOf[name="aten::pow"](%grad_other.1)
    block0():
      %63 : float = prim::Float(%12) # <string>:190:20
      %64 : bool = aten::eq(%63, %24) # <string>:190:20
      %grad_self.12 : Tensor = prim::If(%64) # <string>:190:17
        block0():
          %grad_self.1 : Tensor = aten::zeros_like(%11) # <string>:191:33
          -> (%grad_self.1)
        block1():
          %67 : Tensor = aten::mul(%grad_other.1, %12) # <string>:193:33
          %68 : float = prim::Float(%12) # <string>:193:74
          %69 : float = aten::sub(%68, %14) # <string>:193:74
          %70 : Tensor = aten::pow(%11, %69) # <string>:193:58
          %grad_self.2 : Tensor = aten::mul(%67, %70) # <string>:193:33
          -> (%grad_self.2)
      -> (%grad_self.12)
  return (%grad_mat2.3, %grad_mat2.1, %grad_self)

End print !
Saving [1, 3]||0
Saving [1, 2]||0
Saving [4, 3]||0
Saving [1, 4]||0
Saving [1, 4]||1
Saving [1, 4]||1
Saving [1, 3]||1
Saving [4, 1]||1
grad torch.Size([3, 2])
grad torch.Size([3])
grad torch.Size([4, 3])
grad torch.Size([4])
FW/BW ok
Unpacking tensor...[1, 3]
done
Unpacking tensor...[1, 2]
done
Unpacking tensor...[4, 3]
done
Unpacking tensor...[1, 4]
done
Unpacking tensor...[1, 4]
done
Unpacking tensor...[1, 4]
done
Unpacking tensor...[1, 3]
done
Unpacking tensor...[4, 1]
[4, 1]
0   libtorch.dylib                      0x0000000118afc004 _ZNK5torch8autograd13SavedVariable6unpackENSt3__110shared_ptrINS0_8FunctionEEE + 2244
1   libtorch.dylib                      0x0000000118b566fe _ZN5torch3jit12_GLOBAL__N_127DifferentiableGraphBackward5applyEONSt3__16vectorINS_8autograd8VariableENS3_9allocatorIS6_EEEE + 1630
2   libtorch.dylib                      0x0000000118ae73df _ZN5torch8autograd8FunctionclEONSt3__16vectorINS0_8VariableENS2_9allocatorIS4_EEEE + 367
3   libtorch.dylib                      0x0000000118adf7fc _ZN5torch8autograd6Engine17evaluate_functionERNS0_12FunctionTaskE + 1100
4   libtorch.dylib                      0x0000000118adec79 _ZN5torch8autograd6Engine11thread_mainEPNS0_9GraphTaskE + 217
5   libtorch.dylib                      0x0000000118adeb88 _ZN5torch8autograd6Engine11thread_initEi + 136
6   libtorch_python.dylib               0x0000000115c3d05c _ZN5torch8autograd6python12PythonEngine11thread_initEi + 44
7   libtorch.dylib                      0x0000000118aead02 _ZNSt3__114__thread_proxyINS_5tupleIJNS_10unique_ptrINS_15__thread_structENS_14default_deleteIS3_EEEEMN5torch8autograd6EngineEFviEPS9_iEEEEEPvSE_ + 66
8   libsystem_pthread.dylib             0x00007fff79a2c2eb _pthread_body + 126
9   libsystem_pthread.dylib             0x00007fff79a2f249 _pthread_start + 66
10  libsystem_pthread.dylib             0x00007fff79a2b40d thread_start + 13

Traceback (most recent call last):
  File "foo.py", line 49, in <module>
    torch.autograd.grad(s, model.parameters())
  File "/Users/albandes/workspace/pytorch_dev/torch/autograd/__init__.py", line 149, in grad
    inputs, allow_unused)
RuntimeError: No grad accumulator for a saved leaf!
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment