debarko · March 24, 2021 07:02
diff --git a/pytorch101.ipynb b/pytorch101.ipynb
 ################################################################################
 # This gist is a solution of EECS 498-007 / 598-005
 # Deep Learning for Computer Vision Course
 # Part 1 PyTorch 101
 # https://web.eecs.umich.edu/~justincj/teaching/eecs498/assignment1.html
 # There can be multiple ways a problem can be solved
 # and this doesn't guarantee that this is the best 
 # possible solution. Hence please feel free to optimise
 # whereever possible.
 # solutions by: debarko.de
 ################################################################################

 import torch

 print(torch.__version__)

 a = torch.tensor([1,2,3])
 a.cuda()

 print("Here is a : ")
 print(a)
 print(type(a))
 print(a.dim())
 print(a.shape)

 print(a[0])

 type(a[0])

 b=[[12,3,324],[1231,6787,345]]
 b = torch.tensor(b)

 print(b.dim())
 print(b.shape)

 print(a[0].item())

 print(type(a[0].item()))

 a[1]=1000
 print(a)

 c = None
 ################################################################################
 # TODO: Construct a tensor c filled with all zeros, initializing from nested   #
 # Python lists.                                                                #
 ################################################################################
 # Replace "pass" statement with your code
 c = torch.tensor([[0,0],[0,0],[0,0]])
 ################################################################################
 #                              END OF YOUR CODE                                #
 ################################################################################
 print('c is a tensor: ', torch.is_tensor(c))
 print('Correct shape: ', c.shape == (3, 2))
 print('All zeros: ', (c == 0).all().item() == 1)

 ################################################################################
 # TODO: Set element (0, 1) of c to 10, and element (1, 0) of c to 100.         #
 ################################################################################
 # Replace "pass" statement with your code
 c[0,1]=10
 c[1,0]=100
 ################################################################################
 #                              END OF YOUR CODE                                #
 ################################################################################
 print('\nAfter mutating:')
 print('Correct shape: ', c.shape == (3, 2))
 print('c[0, 1] correct: ', c[0, 1] == 10)
 print('c[1, 0] correct: ', c[1, 0] == 100)
 print('Rest of c is still zero: ', (c == 0).sum().item() == 4)

 a=torch.zeros(2,3)
 print(a)
 a=torch.zeros(3,2)
 print(a)

 b=torch.ones(1,2)

 b

 b=torch.ones(55,5)

 b

 c=torch.eye(10,9)

 c

 d=torch.rand(4,5)

 print(d*200)

 e = None
 ################################################################################
 # TODO: Create a tensor of shape (2, 3, 4) filled entirely with 7, stored in e #
 ################################################################################
 # Replace "pass" statement with your code
 e=torch.full((2,3,4),int(7), out=None, dtype=int)
 ################################################################################
 #                              END OF YOUR CODE                                #
 ################################################################################
 print('e is a tensor:', torch.is_tensor(e))

 print('e has correct shape: ', e.shape == (2, 3, 4))
 print('e is filled with sevens: ', (e == 7).all().item() == 1)

 # Let torch choose the datatype
 x0 = torch.tensor([1, 2])   # List of integers
 x1 = torch.tensor([1., 2.]) # List of floats
 x2 = torch.tensor([1., 2])  # Mixed list
 print('dtype when torch chooses for us:')
 print('List of integers:', x0.dtype)
 print('List of floats:', x1.dtype)
 print('Mixed list:', x2.dtype)

 # Force a particular datatype
 y0 = torch.tensor([1, 2], dtype=torch.float32)  # 32-bit float
 y1 = torch.tensor([1, 2], dtype=torch.int32)    # 32-bit (signed) integer
 y2 = torch.tensor([1, 2], dtype=torch.int64)    # 64-bit (signed) integer
 print('\ndtype when we force a datatype:')
 print('32-bit float: ', y0.dtype)
 print('32-bit integer: ', y1.dtype)
 print('64-bit integer: ', y2.dtype)

 # Other creation ops also take a dtype argument
 z0 = torch.ones(1, 2)  # Let torch choose for us
 z1 = torch.ones(1, 2, dtype=torch.int16) # 16-bit (signed) integer
 z2 = torch.ones(1, 2, dtype=torch.uint8) # 8-bit (unsigned) integer
 print('\ntorch.ones with different dtypes')
 print('default dtype:', z0.dtype)
 print('16-bit integer:', z1.dtype)
 print('8-bit unsigned integer:', z2.dtype)

 x0 = torch.eye(3, dtype=torch.int64)
 x1 = x0.float()  # Cast to 32-bit float
 x2 = x0.long() # Cast to 64-bit float
 x3 = x0.to(torch.float32) # Alternate way to cast to 32-bit float
 x4 = x0.to(torch.float64) # Alternate way to cast to 64-bit float
 print('x0:', x0.dtype)
 print('x1:', x1.dtype)
 print('x2:', x2.dtype)
 print('x3:', x3.dtype)
 print('x4:', x4.dtype)

 print(x0)
 print(x1)
 print(x2)
 print(x3)
 print(x4)

 x = None
 ##############################################################################
 # TODO: Make x contain a six-element vector of 64-bit floating-bit values,   #
 # evenly spaced between 10 and 20.                                           #
 ##############################################################################
 # Replace "pass" statement with your code
 x=torch.linspace(10,20,6,dtype=torch.float64)
 ##############################################################################
 #                             END OF YOUR CODE                               #
 ##############################################################################
 print('x is a tensor: ', torch.is_tensor(x))
 print('x has correct shape: ', x.shape == (6,))
 print('x has correct dtype: ', x.dtype == torch.float64)
 y = [10, 12, 14, 16, 18, 20]
 correct_vals = all(a.item() == b for a, b in zip(x, y))
 print('x has correct valus: ', correct_vals)
 print(x)

 a = torch.tensor([0, 11, 22, 33, 44, 55, 66])
 print(0, a)        # (0) Original tensor
 print(1, a[2:5])   # (1) Elements between index 2 and 5
 print(2, a[2:])    # (2) Elements after index 2
 print(3, a[:5])    # (3) Elements before index 5
 print(4, a[:])     # (4) All elements
 print(5, a[1:5:2]) # (5) Every second element between indices 1 and 5
 print(6, a[:-1])   # (6) All but the last element
 print(7, a[-4::2]) # (7) Every second element, starting from the fourth-last

 # Create the following rank 2 tensor with shape (3, 4)
 # [[ 1  2  3  4]
 #  [ 5  6  7  8]
 #  [ 9 10 11 12]]
 a = torch.tensor([[1,2,3,4], [5,6,7,8], [9,10,11,12]])
 print('Original tensor:')
 print(a)
 print('shape: ', a.shape)

 # Get row 1, and all columns. 
 print('\nSingle row:')
 print(a[1, :])
 print(a[1])  # Gives the same result; we can omit : for trailing dimensions
 print('shape: ', a[1].shape)

 print('\nSingle column:')
 print(a[:, 1])
 print('shape: ', a[:, 1].shape)

 # Get the first two rows and the last three columns
 print('\nFirst two rows, last two columns:')
 print(a[:2, -3:])
 print('shape: ', a[:2, -3:].shape)

 # Get every other row, and columns at index 1 and 2
 print('\nEvery other row, middle columns:')
 print(a[::2, 1:3])
 print('shape: ', a[::2, 1:3].shape)

 a = torch.tensor([[1,2,3,4], [5,6,7,8], [9,10,11,12]])

 print('Original tensor')
 print(a)

 row_r1 = a[1, :]    # Rank 1 view of the second row of a  
 row_r2 = a[1:2, :]  # Rank 2 view of the second row of a
 print('\nTwo ways of accessing a single row:')
 print(row_r1, row_r1.shape)
 print(row_r2, row_r2.shape)

 # We can make the same distinction when accessing columns::
 col_r1 = a[:, 1]
 col_r2 = a[:, 1:2]
 print('\nTwo ways of accessing a single column:')
 print(col_r1, col_r1.shape)
 print(col_r2, col_r2.shape)

 row_r1

 row_r2

 # Create a tensor, a slice, and a clone of a slice
 a = torch.tensor([[1, 2, 3, 4], [5, 6, 7, 8]])
 b = a[0, 1:]
 c = a[0, 1:].clone()
 print('Before mutating:')
 print(a)
 print(b)
 print(c)

 a[0, 1] = 20  # a[0, 1] and b[0] point to the same element
 b[1] = 30     # b[1] and a[0, 2] point to the same element
 c[2] = 40     # c is a clone, so it has its own data
 print('\nAfter mutating:')
 print(a)
 print(b)
 print(c)

 print(a.storage().data_ptr() == c.storage().data_ptr())

 # We will use this helper function to check your results
 def check(orig, actual, expected):
  expected = torch.tensor(expected)
  same_elements = (actual == expected).all().item() == 1
  same_storage = (orig.storage().data_ptr() == actual.storage().data_ptr())
  return same_elements and same_storage

 # Create the following rank 2 tensor of shape (3, 5)
 # [[ 1  2  3  4  5]
 #  [ 6  7  8  9 10]
 #  [11 12 13 14 15]]
 a = torch.tensor([[1, 2, 3, 4, 5], [6, 7, 8, 8, 10], [11, 12, 13, 14, 15]])

 b, c, d, e = None, None, None, None
 ##############################################################################
 # TODO: Extract the last row of a, and store it in b; it should have rank 1. #
 ##############################################################################
 # Replace "pass" statement with your code
 b=a[2,:]
 ##############################################################################
 #                             END OF YOUR CODE                               #
 ##############################################################################
 print('b correct:', check(a, b, [11, 12, 13, 14, 15]))

 ##############################################################################
 # TODO: Extract the third col of a, and store it in c; it should have rank 2 #
 ##############################################################################
 # Replace "pass" statement with your code
 c=a[:,2:3]
 ##############################################################################
 #                             END OF YOUR CODE                               #
 ##############################################################################
 print('c correct:', check(a, c, [[3], [8], [13]]))

 ##############################################################################
 # TODO: Use slicing to extract the first two rows and first three columns    #
 # from a; store the result into d.                                           #
 ##############################################################################
 # Replace "pass" statement with your code
 d=a[0:2,0:3]
 ##############################################################################
 #                             END OF YOUR CODE                               #
 ##############################################################################
 print('d correct:', check(a, d, [[1, 2, 3], [6, 7, 8]]))


 ##############################################################################
 # TODO: Use slicing to extract a subtensor of a consisting of rows 0 and 2   #
 # and columns 1 and 4.                                                       #
 ##############################################################################
 # Replace "pass" statement with your code
 e=a[0::2,1:5:3]
 ##############################################################################
 #                             END OF YOUR CODE                               #
 ##############################################################################
 print('e correct:', check(a, e, [[2, 5], [12, 15]]))

 x = torch.zeros(4, 6, dtype=torch.int64)
 ##############################################################################
 # TODO: Use slicing to modify the tensor x so it has the following contents: #
 #     [[1, 0, 2, 2, 2, 2],                                                   #
 #      [0, 1, 2, 2, 2, 2],                                                   #
 #      [3, 4, 3, 4, 5, 5],                                                   #
 #      [3, 4, 3, 4, 5, 5]]                                                   #
 # This can be achieved using five slicing assignment operations.             #
 ##############################################################################
 # Replace "pass" statement with your code
 x[:2,2:]=torch.full((2,4),2,dtype=torch.int)
 x[2:,4:]=torch.full((2,2),5,dtype=torch.int)
 x[:2,:2]=torch.eye(2,2)
 x[2:,:4]=torch.tensor([[3,4,3,4],[3,4,3,4]])
 ##############################################################################
 #                             END OF YOUR CODE                               #
 ##############################################################################

 expected = [
    [1, 0, 2, 2, 2, 2],
    [0, 1, 2, 2, 2, 2],
    [3, 4, 3, 4, 5, 5],
    [3, 4, 3, 4, 5, 5],
 ]
 print('correct:', x.tolist() == expected)

 a = torch.tensor([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
 print('Original tensor:')
 print(a)

 idx = [0, 0, 2, 1, 1]  # index arrays can be Python lists of integers
 print('\nReordered rows:')
 print(a[idx])

 idx = torch.tensor([3, 2, 1, 0])  # Index arrays can be int64 torch tensors
 print('\nReordered columns:')
 print(a[:, idx])

 a = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
 print('Original tensor:')
 print(a)

 idx = [0, 1, 2]
 print('\nGet the diagonal:')
 print(a[idx, idx])

 # Modify the diagonal
 a[idx, idx] = torch.tensor([11, 22, 33])
 print('\nAfter setting the diagonal:')
 print(a)

 # Create a new tensor from which we will select elements
 a = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])
 print('Original tensor:')
 print(a)

 # Take on element from each row of a:
 # from row 0, take element 1;
 # from row 1, take element 2;
 # from row 2, take element 1;
 # from row 3, take element 0
 idx0 = torch.arange(a.shape[0])  # Quick way to build [0, 1, 2, 3]
 idx1 = torch.tensor([1, 2, 1, 0])
 print('\nSelect one element from each row:')
 print(a[idx0, idx1])

 # Now set each of those elements to zero
 a[idx0, idx1] = 0
 print('\nAfter modifying one element from each row:')
 print(a)

 # Build a tensor of shape (4, 3):
 # [[ 1,  2,  3],
 #  [ 4,  5,  6],
 #  [ 7,  8,  9],
 #  [10, 11, 12]]
 a = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])
 print('Here is a:')
 print(a)

 b, c, d = None, None, None
 ##############################################################################
 # TODO: Use integer array indexing to create a tensor of shape (4, 4) where: #
 # - The first two columns are the same as the first column of a              #
 # - The next column is the same as the third column of a                     #
 # - The last column is the same as the second column of a                    #
 # Store the resulting tensor in b.                                           #
 ##############################################################################
 # Replace "pass" statement with your code
 b=torch.zeros(a.shape[0], a.shape[0], dtype=torch.int64)
 idx0 = [0,0,2,1]
 b=a[:,idx0]
 ##############################################################################
 #                             END OF YOUR CODE                               #
 ##############################################################################
 print('\nHere is b:')
 print(b)
 expected = [[1, 1, 3, 2], [4, 4, 6, 5], [7, 7, 9, 8], [10, 10, 12, 11]]
 print('b correct:', b.tolist() == expected)
 ##############################################################################
 # TODO: Use integer array indexing to create a new tensor which is the same  #
 # as a, but has its rows reversed. Store the result in c.                    #
 ##############################################################################
 # Replace "pass" statement with your code
 idx1 = [3,2,1,0]
 c=a[idx1]
 ##############################################################################
 #                             END OF YOUR CODE                               #
 ##############################################################################
 print('\nHere is c:')
 print(c)
 expected = [[10, 11, 12], [7, 8, 9], [4, 5, 6], [1, 2, 3]]
 print('c correct:', c.tolist() == expected)
 ##############################################################################
 # TODO: Use integer array indexing to create a new tensor by selecting one   #
 # element from each column of a:                                             #
 # - From the first column, take the second element.                          #
 # - From the second column, take the first element.                          #
 # - From the third column, take the fourth element.                          #
 # Store the result in d.                                                     #
 ##############################################################################
 # Replace "pass" statement with your code
 idx2 = torch.arange(a.shape[1])
 idx3 = torch.tensor([1,0,3])
 d=a[idx3,idx2]
 ##############################################################################
 #                             END OF YOUR CODE                               #
 ##############################################################################
 print('\nHere is d:')
 print(d)
 expected = [4, 2, 12]
 print('d correct:', d.tolist() == expected)

 a = torch.tensor([[1,2], [3, 4], [5, 6]])
 print('Original tensor:')
 print(a)
 # Find the elements of a that are bigger than 3. The mask has the same shape as
 # a, where each element of mask tells whether the corresponding element of a
 # is greater than three.
 mask = (a > 3)
 print('\nMask tensor:')
 print(mask)
 # We can use the mask to construct a rank-1 tensor containing the elements of a
 # that are selected by the mask
 print('\nSelecting elements with the mask:')
 print(a[mask])

 # We can also use boolean masks to modify tensors; for example this sets all
 # elements <= 3 to zero:
 a[a <= 3] = 0
 print('\nAfter modifying with a mask:')
 print(a)

 def num_negative(x):
  """
  Return the number of negative values in the tensor x
 
  Inputs:
  - x: A tensor of any shape
 
  Returns:
  - num_neg: Number of negative values in x
  """
  num_neg = 0
  ##############################################################################
  # TODO: Use boolean masks to count the number of negative elements in x.     #
  ##############################################################################
  # Replace "pass" statement with your code
  mask = x < 0
  num_neg=list(x[mask].shape)[0]
  ##############################################################################
  #                             END OF YOUR CODE                               #
  ##############################################################################
  return num_neg

 # Make a few test cases
 torch.manual_seed(598)
 x0 = torch.tensor([[-1, -1, 0], [0, 1, 2], [3, 4, 5]])
 x1 = torch.tensor([0, 1, 2, 3])
 x2 = torch.randn(100, 100)
 assert num_negative(x0) == 2
 assert num_negative(x1) == 0
 assert num_negative(x2) == 4984
 print('num_negative seems to be correct!')

 def make_one_hot(x):
  """
  Construct a tensor of one-hot-vectors from a list of Python integers.

  Input:
  - x: A list of N ints

  Returns:
  - y: A tensor of shape (N, C) where C = 1 + max(x) is one more than the max
       value in x. The nth row of y is a one-hot-vector representation of x[n];
       In other words, if x[n] = c then y[n, c] = 1; all other elements of y are
       zeros.
  """
  y = None
  ##############################################################################
  # TODO: Complete the implementation of this function.                        #
  ##############################################################################
  # Replace "pass" statement with your code
  max_val = 1+max(x)
  dummy= torch.zeros(len(x), max_val, dtype=torch.int64)
  idx0 = torch.arange(len(x))
  idx1 = torch.tensor(x)
  dummy[idx0,idx1]=1
  y=dummy
  ##############################################################################
  #                             END OF YOUR CODE                               #
  ##############################################################################
  return y

 def check_one_hot(x, y):
  C = y.shape[1]
  for i, n in enumerate(x):
    if n >= C: return False
    for j in range(C):
      expected = 1.0 if j == n else 0.0
      if y[i, j].item() != expected: return False
  return True
      
 x0 = [1, 4, 3, 2]
 y0 = make_one_hot(x0)
 print('Here is y0:')
 print(y0)
 assert check_one_hot(x0, y0), 'y0 is wrong'

 x1 = [1, 3, 5, 7, 6, 2]
 y1 = make_one_hot(x1)
 print('\nHere is y1:')
 print(y1)
 assert check_one_hot(x1, y1), 'y1 is wrong'

 print('all checks pass!')

 x0 = torch.tensor([[1, 2, 3, 4], [5, 6, 7, 8]])
 print('Original tensor:')
 print(x0)
 print('shape:', x0.shape)

 # Flatten x0 into a rank 1 vector of shape (8,)
 x1 = x0.view(8)
 print('\nFlattened tensor:')
 print(x1)
 print('shape:', x1.shape)

 # Convert x1 to a rank 2 "row vector" of shape (1, 8)
 x2 = x1.view(1, 8)
 print('\nRow vector:')
 print(x2)
 print('shape:', x2.shape)

 # Convert x1 to a rank 2 "column vector" of shape (8, 1)
 x3 = x1.view(8, 1)
 print('\nColumn vector:')
 print(x3)
 print('shape:', x3.shape)

 # Convert x1 to a rank 3 tensor of shape (2, 2, 2):
 x4 = x1.view(2, 2, 2)
 print('\nRank 3 tensor:')
 print(x4)
 print('shape:', x4.shape)

 # We can reuse these functions for tensors of different shapes
 def flatten(x):
  return x.view(-1)

 def make_row_vec(x):
  return x.view(1, -1)

 x0 = torch.tensor([[1, 2, 3], [4, 5, 6]])
 x0_flat = flatten(x0)
 x0_row = make_row_vec(x0)
 print('x0:')
 print(x0)
 print('x0_flat:')
 print(x0_flat)
 print('x0_row:')
 print(x0_row)

 x1 = torch.tensor([[1, 2], [3, 4]])
 x1_flat = flatten(x1)
 x1_row = make_row_vec(x1)
 print('\nx1:')
 print(x1)
 print('x1_flat:')
 print(x1_flat)
 print('x1_row:')
 print(x1_row)

 x = torch.tensor([[1, 2, 3], [4, 5, 6]])
 x_flat = x.view(-1)
 print('x before modifying:')
 print(x)
 print('x_flat before modifying:')
 print(x_flat)

 x[0, 0] = 10   # x[0, 0] and x_flat[0] point to the same data
 x_flat[1] = 20 # x_flat[1] and x[0, 1] point to the same data

 print('\nx after modifying:')
 print(x)
 print('x_flat after modifying:')
 print(x_flat)

 x = torch.tensor([[1, 2, 3], [4, 5, 6]])
 print('Original matrix:')
 print(x)
 print('\nTransposing with view DOES NOT WORK!')
 print(x.view(3, 2))
 print('\nTransposed matrix:')
 print(torch.t(x))
 print(x.t())

 # Create a tensor of shape (2, 3, 4)
 x0 = torch.tensor([
     [[1,  2,  3,  4],
      [5,  6,  7,  8],
      [9, 10, 11, 12]],
     [[13, 14, 15, 16],
      [17, 18, 19, 20],
      [21, 22, 23, 24]]])
 print('Original tensor:')
 print(x0)
 print('shape:', x0.shape)

 # Swap axes 1 and 2; shape is (2, 4, 3)
 x1 = x0.transpose(1, 2)
 print('\nSwap axes 1 and 2:')
 print(x1)
 print(x1.shape)

 # Permute axes; the argument (1, 2, 0) means:
 # - Make the old dimension 1 appear at dimension 0;
 # - Make the old dimension 2 appear at dimension 1;
 # - Make the old dimension 0 appear at dimension 2
 # This results in a tensor of shape (3, 4, 2)
 x2 = x0.permute(1, 2, 0)
 print('\nPermute axes')
 print(x2)
 print('shape:', x2.shape)

 x0 = torch.arange(24)
 print('Here is x0:')
 print(x0)

 x1 = None
 ##############################################################################
 # TODO: Use reshape operations to create x1 from x0                          #
 ##############################################################################
 # Review this line please, objective is to make x0 tensor
 # in the shape of expected list below.
 x1= x0.view(2,3,4).transpose(1,2).reshape(8,3).transpose(0,1)
 # Please suggest any better way to do this tranposition
 ##############################################################################
 #                             END OF YOUR CODE                               #
 ##############################################################################
 print('\nHere is x1:')
 print(x1)

 expected = [
    [0, 1,  2,  3, 12, 13, 14, 15],
    [4, 5,  6,  7, 16, 17, 18, 19],
    [8, 9, 10, 11, 20, 21, 22, 23]]
 print('Correct:', x1.tolist() == expected)

 x = torch.tensor([[1, 2, 3], 
                  [4, 5, 6]], dtype=torch.float32)
 print('Original tensor:')
 print(x)

 print('\nSum over entire tensor:')
 print(torch.sum(x))
 print(x.sum())

 # We can sum over each row:
 print('\nSum of each row:')
 print(torch.sum(x, dim=0))
 print(x.sum(dim=0))

 # Sum over each column:
 print('\nSum of each column:')
 print(torch.sum(x, dim=1))
 print(x.sum(dim=1))

 x = torch.tensor([[2, 4, 3, 5], [3, 3, 5, 2]], dtype=torch.float32)
 print('Original tensor:')
 print(x, x.shape)

 # Finding the overall minimum only returns a single value
 print('\nOverall minimum: ', x.min())

 # Compute the minimum along each column; we get both the value and location:
 # The minimum of the first column is 2, and it appears at index 0;
 # the minimum of the second column is 3 and it appears at index 1; etc
 col_min_vals, col_min_idxs = x.min(dim=0)
 print('\nMinimum along each column:')
 print('values:', col_min_vals)
 print('idxs:', col_min_idxs)
 print( x.min(dim=0))

 # Compute the minimum along each row; we get both the value and the minimum
 row_min_vals, row_min_idxs = x.min(dim=1)
 print('\nMinimum along each row:')
 print('values:', row_min_vals)
 print('idxs:', row_min_idxs)

 # Create a tensor of shape (128, 10, 3, 64, 64)
 x = torch.randn(2, 3, 3, 2, 3)
 print(x)

 # Take the mean over dimension 1; shape is now (128, 3, 64, 64)
 x = x.mean(dim=1)
 print(x)

 # Take the sum over dimension 2; shape is now (128, 3, 64)
 x = x.sum(dim=2)
 print(x)

 # Take the mean over dimension 1, but keep the dimension from being eliminated
 # by passing keepdim=True; shape is now (128, 1, 64)
 x = x.mean(dim=1, keepdim=True)
 print(x)

 def zero_row_min(x):
  """
  Return a copy of x, where the minimum value along each row has been set to 0.

  For example, if x is:
  x = torch.tensor([[
        [10, 20, 30],
        [ 2,  5,  1]
      ]])
  
  Then y = zero_row_min(x) should be:
  torch.tensor([
    [0, 20, 30],
    [2,  5,  0]
  ])

  Inputs:
  - x: Tensor of rank 2 with shape (N, M)

  Returns:
  - y: Tensor of rank 2 that is a copy of x, except the minimum value along each
       row is replaced with 0.
  """
  y = x.clone()
  ##############################################################################
  # TODO: Complete the implementation of this function.                        #
  ##############################################################################
  # Replace "pass" statement with your code
  row_min_vals, row_min_idxs = x.min(dim=1)
  row_min_idxs = row_min_idxs.tolist()
  for i in range(len(row_min_idxs)):
    y[i][row_min_idxs[i]]=0
  ##############################################################################
  #                             END OF YOUR CODE                               #
  ##############################################################################
  return y

 x0 = torch.tensor([[10, 20, 30], [2, 5, 1]])
 print('Here is x0:')
 print(x0)
 y0 = zero_row_min(x0)
 print('Here is y0:')
 print(y0)
 assert y0.tolist() == [[0, 20, 30], [2, 5, 0]]

 x1 = torch.tensor([[2, 5, 10, -1], [1, 3, 2, 4], [5, 6, 2, 10]])
 print('\nHere is x1:')
 print(x1)
 y1 = zero_row_min(x1)
 print('Here is y1:')
 print(y1)
 assert y1.tolist() == [[2, 5, 10, 0], [0, 3, 2, 4], [5, 6, 0, 10]]

 print('\nSimple tests pass!')

 v = torch.tensor([9,10], dtype=torch.float32)
 w = torch.tensor([11, 12], dtype=torch.float32)

 # Inner product of vectors
 print('Dot products:')
 print(torch.dot(v, w))
 print(v.dot(w))

 # dot only works for vectors -- it will give an error for tensors of rank > 1
 x = torch.tensor([[1,2],[3,4]], dtype=torch.float32)
 y = torch.tensor([[5,6],[7,8]], dtype=torch.float32)
 print(x)
 print(y)
 try:
  print(x.dot(y))
 except RuntimeError as e:
  print(e)
  
 # Instead we use mm for matrix-matrix products:
 print('\nMatrix-matrix product:')
 print(torch.mm(x, y))
 print(x.mm(y))

 B, N, M, P = 3, 2, 5, 4
 x = torch.rand(B, N, M)  # Random tensor of shape (B, N, M)
 y = torch.rand(B, M, P)  # Random tensor of shape (B, M, P)

 # We can use a for loop to (inefficiently) compute a batch of matrix multiply
 # operations
 z1 = torch.empty(B, N, P)  # Empty tensor of shape (B, N, P)
 for i in range(B):
  z1[i] = x[i].mm(y[i])
 print('Here is the result of batched matrix multiply with a loop:')
 print(z1)

 z2 = None
 ##############################################################################
 # TODO: Use bmm to compute a batched matrix multiply between x and y; store  #
 # the result in z2.                                                          #
 ##############################################################################
 # Replace "pass" statement with your code
 z2=torch.bmm(x,y)
 ##############################################################################
 #                             END OF YOUR CODE                               #
 ##############################################################################
 print('\nHere is the result of batched matrix multiply with bmm:')
 print(z2)

 # The two may not return exactly the same result; different linear algebra
 # routines often return slightly different results due to the fact that
 # floating-point math is non-exact and non-associative.
 diff = (z1 - z2).abs().max().item()
 print('\nDifference:', diff)
 print('Difference within threshold:', diff < 1e-6)

 def normalize_columns(x):
  """
  Normalize the columns of a matrix by subtracting the mean and dividing by the
  standard deviation.
 
  Inputs:
  - x: Tensor of shape (N, M)
  
  Returns:
  - y: Tensor of shape (N, M) which is a copy of x with normalized columns.
  """
  y = x.clone()
  ##############################################################################
  # TODO: Complete the implementation of this function. Do not modify x.       #
  # Your implementation should not use any loops; instead you should use       #
  # reduction and broadcasting operations.                                     #
  ##############################################################################
  # Replace "pass" statement with your code
  print(x)
  meanT = torch.mean(y,dim=0)
  stdT  = torch.std(y,dim=0)
  print('meanT ',meanT)
  print('stdT ',stdT)
  y = (y - meanT) / stdT
  print('y ',y)
  print('expected\n',torch.tensor([[0., 1., 1.], [1., -1., -1.], [-1., 0., 0.]]))
  ##############################################################################
  #                             END OF YOUR CODE                               #
  ##############################################################################
  return y
 x0 = torch.tensor([[0., 30., 600.], [1., 10., 200.], [-1., 20., 400.]])
 y0 = normalize_columns(x0)
 print('Here is x0:')
 print(x0)
 print('Here is y0:')
 print(y0)
 assert y0.tolist() == [[0., 1., 1.], [1., -1., -1.], [-1., 0., 0.]]
 assert x0.tolist() == [[0., 30., 600.], [1., 10., 200.], [-1., 20., 400.]]

 p=torch.tensor([[30.,10.,20.]])
 print(torch.std(p))
 print(torch.mean(p))

 if torch.cuda.is_available:
  print('PyTorch can use GPUs!')
 else:
  print('PyTorch cannot use GPUs.')

 import time

 a_cpu = torch.randn(10000, 10000, dtype=torch.float32)
 b_cpu = torch.randn(10000, 10000, dtype=torch.float32)

 a_gpu = a_cpu.cuda()
 b_gpu = b_cpu.cuda()
 torch.cuda.synchronize()

 t0 = time.time()
 c_cpu = a_cpu + b_cpu
 t1 = time.time()
 c_gpu = a_gpu + b_gpu
 torch.cuda.synchronize()
 t2 = time.time()

 # Check that they computed the same thing
 diff = (c_gpu.cpu() - c_cpu).abs().max().item()
 print('Max difference between c_gpu and c_cpu:', diff)

 cpu_time = 1000.0 * (t1 - t0)
 gpu_time = 1000.0 * (t2 - t1)
 print('CPU time: %.2f ms' % cpu_time)
 print('GPU time: %.2f ms' % gpu_time)
 print('GPU speedup: %.2f x' % (cpu_time / gpu_time))

 import time

 x = torch.rand(512, 4096)
 w = torch.rand(4096, 4096)

 t0 = time.time()
 y0 = x.mm(w)
 t1 = time.time()

 y1 = None
 ##############################################################################
 # TODO: Write a bit of code that performs matrix multiplication of x and w   #
 # on the GPU, and then moves the result back to the CPU. Store the result    #
 # in y1.
 ##############################################################################
 # Replace "pass" statement with your code
 x = x.cuda()
 w = w.cuda()
 y = torch.matmul(x,w)
 y1 = y.cpu()
 ##############################################################################
 #                             END OF YOUR CODE                               #
 ##############################################################################
 torch.cuda.synchronize()
 t2 = time.time()

 print('y1 on CPU:', y1.device == torch.device('cpu'))
 diff = (y0 - y1).abs().max().item()
 print('Max difference between y0 and y1:', diff)
 print('Difference within tolerance:', diff < 5e-2)

 cpu_time = 1000.0 * (t1 - t0)
 gpu_time = 1000.0 * (t2 - t1)
 print('CPU time: %.2f ms' % cpu_time)
 print('GPU time: %.2f ms' % gpu_time)
 print('GPU speedup: %.2f x' % (cpu_time / gpu_time))