10185501402
/
DaSE-Computer-Vision-2021


								from builtins import range

								import numpy as np


								def affine_forward(x, w, b):

								    """

								    为仿射(全连接)层计算前向传播


								    输入x的形状为(N, d_1, ..., d_k)，其中包含了N个样本，

								    每个样本x[i]都有形状(d_1, ..., d_k)。我们把每个输入重新reshape成为一个D维向量

								    D = d_1 * ... * d_k，然后将其转换为M维的输出向量。


								    Inputs:

								    - x: A numpy array containing input data, of shape (N, d_1, ..., d_k)

								    - w: A numpy array of weights, of shape (D, M)

								    - b: A numpy array of biases, of shape (M,)


								    Returns a tuple of:

								    - out: output, of shape (N, M)

								    - cache: (x, w, b)

								    """

								    out = None

								    ###########################################################################

								    # TODO: Implement the affine forward pass. Store the result in out. You   #

								    # will need to reshape the input into rows.                               #

								    ###########################################################################

								    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****


								    pass


								    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

								    ###########################################################################

								    #                             END OF YOUR CODE                            #

								    ###########################################################################

								    cache = (x, w, b)

								    return out, cache


								def affine_backward(dout, cache):

								    """

								    为仿射层计算反向传播


								    Inputs:

								    - dout: Upstream derivative, of shape (N, M)

								    - cache: Tuple of:

								      - x: Input data, of shape (N, d_1, ... d_k)

								      - w: Weights, of shape (D, M)

								      - b: Biases, of shape (M,)


								    Returns a tuple of:

								    - dx: Gradient with respect to x, of shape (N, d1, ..., d_k)

								    - dw: Gradient with respect to w, of shape (D, M)

								    - db: Gradient with respect to b, of shape (M,)

								    """

								    x, w, b = cache

								    dx, dw, db = None, None, None

								    ###########################################################################

								    # TODO: Implement the affine backward pass.                               #

								    ###########################################################################

								    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****


								    pass


								    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

								    ###########################################################################

								    #                             END OF YOUR CODE                            #

								    ###########################################################################

								    return dx, dw, db


								def relu_forward(x):

								    """

								    计算一层整流线性单元(ReLUs)的前向传播。


								    Input:

								    - x: Inputs, of any shape


								    Returns a tuple of:

								    - out: Output, of the same shape as x

								    - cache: x

								    """

								    out = None

								    ###########################################################################

								    # TODO: Implement the ReLU forward pass.                                  #

								    ###########################################################################

								    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****


								    pass


								    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

								    ###########################################################################

								    #                             END OF YOUR CODE                            #

								    ###########################################################################

								    cache = x

								    return out, cache


								def relu_backward(dout, cache):

								    """

								    Computes the backward pass for a layer of rectified linear units (ReLUs).


								    Input:

								    - dout: Upstream derivatives, of any shape

								    - cache: Input x, of same shape as dout


								    Returns:

								    - dx: Gradient with respect to x

								    """

								    dx, x = None, cache

								    ###########################################################################

								    # TODO: Implement the ReLU backward pass.                                 #

								    ###########################################################################

								    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****


								    pass


								    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

								    ###########################################################################

								    #                             END OF YOUR CODE                            #

								    ###########################################################################

								    return dx


								def batchnorm_forward(x, gamma, beta, bn_param):

								    """

								    Forward pass for batch normalization.


								    During training the sample mean and (uncorrected) sample variance are

								    computed from minibatch statistics and used to normalize the incoming data.

								    During training we also keep an exponentially decaying running mean of the

								    mean and variance of each feature, and these averages are used to normalize

								    data at test-time.


								    At each timestep we update the running averages for mean and variance using

								    an exponential decay based on the momentum parameter:


								    running_mean = momentum * running_mean + (1 - momentum) * sample_mean

								    running_var = momentum * running_var + (1 - momentum) * sample_var


								    Note that the batch normalization paper suggests a different test-time

								    behavior: they compute sample mean and variance for each feature using a

								    large number of training images rather than using a running average. For

								    this implementation we have chosen to use running averages instead since

								    they do not require an additional estimation step; the torch7

								    implementation of batch normalization also uses running averages.


								    Input:

								    - x: Data of shape (N, D)

								    - gamma: Scale parameter of shape (D,)

								    - beta: Shift paremeter of shape (D,)

								    - bn_param: Dictionary with the following keys:

								      - mode: 'train' or 'test'; required

								      - eps: Constant for numeric stability

								      - momentum: Constant for running mean / variance.

								      - running_mean: Array of shape (D,) giving running mean of features

								      - running_var Array of shape (D,) giving running variance of features


								    Returns a tuple of:

								    - out: of shape (N, D)

								    - cache: A tuple of values needed in the backward pass

								    """

								    mode = bn_param['mode']

								    eps = bn_param.get('eps', 1e-5)

								    momentum = bn_param.get('momentum', 0.9)


								    N, D = x.shape

								    running_mean = bn_param.get('running_mean', np.zeros(D, dtype=x.dtype))

								    running_var = bn_param.get('running_var', np.zeros(D, dtype=x.dtype))


								    out, cache = None, None

								    if mode == 'train':

								        #######################################################################

								        # TODO: Implement the training-time forward pass for batch norm.      #

								        # Use minibatch statistics to compute the mean and variance, use      #

								        # these statistics to normalize the incoming data, and scale and      #

								        # shift the normalized data using gamma and beta.                     #

								        #                                                                     #

								        # You should store the output in the variable out. Any intermediates  #

								        # that you need for the backward pass should be stored in the cache   #

								        # variable.                                                           #

								        #                                                                     #

								        # You should also use your computed sample mean and variance together #

								        # with the momentum variable to update the running mean and running   #

								        # variance, storing your result in the running_mean and running_var   #

								        # variables.                                                          #

								        #                                                                     #

								        # Note that though you should be keeping track of the running         #

								        # variance, you should normalize the data based on the standard       #

								        # deviation (square root of variance) instead!                        #

								        # Referencing the original paper (https://arxiv.org/abs/1502.03167)   #

								        # might prove to be helpful.                                          #

								        #######################################################################

								        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****


								        pass


								        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

								        #######################################################################

								        #                           END OF YOUR CODE                          #

								        #######################################################################

								    elif mode == 'test':

								        #######################################################################

								        # TODO: Implement the test-time forward pass for batch normalization. #

								        # Use the running mean and variance to normalize the incoming data,   #

								        # then scale and shift the normalized data using gamma and beta.      #

								        # Store the result in the out variable.                               #

								        #######################################################################

								        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****


								        pass


								        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

								        #######################################################################

								        #                          END OF YOUR CODE                           #

								        #######################################################################

								    else:

								        raise ValueError('Invalid forward batchnorm mode "%s"' % mode)


								    # Store the updated running means back into bn_param

								    bn_param['running_mean'] = running_mean

								    bn_param['running_var'] = running_var


								    return out, cache


								def batchnorm_backward(dout, cache):

								    """

								    Backward pass for batch normalization.


								    For this implementation, you should write out a computation graph for

								    batch normalization on paper and propagate gradients backward through

								    intermediate nodes.


								    Inputs:

								    - dout: Upstream derivatives, of shape (N, D)

								    - cache: Variable of intermediates from batchnorm_forward.


								    Returns a tuple of:

								    - dx: Gradient with respect to inputs x, of shape (N, D)

								    - dgamma: Gradient with respect to scale parameter gamma, of shape (D,)

								    - dbeta: Gradient with respect to shift parameter beta, of shape (D,)

								    """

								    dx, dgamma, dbeta = None, None, None

								    ###########################################################################

								    # TODO: Implement the backward pass for batch normalization. Store the    #

								    # results in the dx, dgamma, and dbeta variables.                         #

								    # Referencing the original paper (https://arxiv.org/abs/1502.03167)       #

								    # might prove to be helpful.                                              #

								    ###########################################################################

								    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****


								    pass


								    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

								    ###########################################################################

								    #                             END OF YOUR CODE                            #

								    ###########################################################################


								    return dx, dgamma, dbeta


								def batchnorm_backward_alt(dout, cache):

								    """

								    Alternative backward pass for batch normalization.


								    For this implementation you should work out the derivatives for the batch

								    normalizaton backward pass on paper and simplify as much as possible. You

								    should be able to derive a simple expression for the backward pass.

								    See the jupyter notebook for more hints.


								    Note: This implementation should expect to receive the same cache variable

								    as batchnorm_backward, but might not use all of the values in the cache.


								    Inputs / outputs: Same as batchnorm_backward

								    """

								    dx, dgamma, dbeta = None, None, None

								    ###########################################################################

								    # TODO: Implement the backward pass for batch normalization. Store the    #

								    # results in the dx, dgamma, and dbeta variables.                         #

								    #                                                                         #

								    # After computing the gradient with respect to the centered inputs, you   #

								    # should be able to compute gradients with respect to the inputs in a     #

								    # single statement; our implementation fits on a single 80-character line.#

								    ###########################################################################

								    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****


								    pass


								    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

								    ###########################################################################

								    #                             END OF YOUR CODE                            #

								    ###########################################################################


								    return dx, dgamma, dbeta


								def layernorm_forward(x, gamma, beta, ln_param):

								    """

								    Forward pass for layer normalization.


								    During both training and test-time, the incoming data is normalized per data-point,

								    before being scaled by gamma and beta parameters identical to that of batch normalization.


								    Note that in contrast to batch normalization, the behavior during train and test-time for

								    layer normalization are identical, and we do not need to keep track of running averages

								    of any sort.


								    Input:

								    - x: Data of shape (N, D)

								    - gamma: Scale parameter of shape (D,)

								    - beta: Shift paremeter of shape (D,)

								    - ln_param: Dictionary with the following keys:

								        - eps: Constant for numeric stability


								    Returns a tuple of:

								    - out: of shape (N, D)

								    - cache: A tuple of values needed in the backward pass

								    """

								    out, cache = None, None

								    eps = ln_param.get('eps', 1e-5)

								    ###########################################################################

								    # TODO: Implement the training-time forward pass for layer norm.          #

								    # Normalize the incoming data, and scale and  shift the normalized data   #

								    #  using gamma and beta.                                                  #

								    # HINT: this can be done by slightly modifying your training-time         #

								    # implementation of  batch normalization, and inserting a line or two of  #

								    # well-placed code. In particular, can you think of any matrix            #

								    # transformations you could perform, that would enable you to copy over   #

								    # the batch norm code and leave it almost unchanged?                      #

								    ###########################################################################

								    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****


								    pass


								    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

								    ###########################################################################

								    #                             END OF YOUR CODE                            #

								    ###########################################################################

								    return out, cache


								def layernorm_backward(dout, cache):

								    """

								    Backward pass for layer normalization.


								    For this implementation, you can heavily rely on the work you've done already

								    for batch normalization.


								    Inputs:

								    - dout: Upstream derivatives, of shape (N, D)

								    - cache: Variable of intermediates from layernorm_forward.


								    Returns a tuple of:

								    - dx: Gradient with respect to inputs x, of shape (N, D)

								    - dgamma: Gradient with respect to scale parameter gamma, of shape (D,)

								    - dbeta: Gradient with respect to shift parameter beta, of shape (D,)

								    """

								    dx, dgamma, dbeta = None, None, None

								    ###########################################################################

								    # TODO: Implement the backward pass for layer norm.                       #

								    #                                                                         #

								    # HINT: this can be done by slightly modifying your training-time         #

								    # implementation of batch normalization. The hints to the forward pass    #

								    # still apply!                                                            #

								    ###########################################################################

								    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****


								    pass


								    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

								    ###########################################################################

								    #                             END OF YOUR CODE                            #

								    ###########################################################################

								    return dx, dgamma, dbeta


								def dropout_forward(x, dropout_param):

								    """

								    Performs the forward pass for (inverted) dropout.


								    Inputs:

								    - x: Input data, of any shape

								    - dropout_param: A dictionary with the following keys:

								      - p: Dropout parameter. We keep each neuron output with probability p.

								      - mode: 'test' or 'train'. If the mode is train, then perform dropout;

								        if the mode is test, then just return the input.

								      - seed: Seed for the random number generator. Passing seed makes this

								        function deterministic, which is needed for gradient checking but not

								        in real networks.


								    Outputs:

								    - out: Array of the same shape as x.

								    - cache: tuple (dropout_param, mask). In training mode, mask is the dropout

								      mask that was used to multiply the input; in test mode, mask is None.


								    NOTE: Please implement **inverted** dropout, not the vanilla version of dropout.

								    See http://cs231n.github.io/neural-networks-2/#reg for more details.


								    NOTE 2: Keep in mind that p is the probability of **keep** a neuron

								    output; this might be contrary to some sources, where it is referred to

								    as the probability of dropping a neuron output.

								    """

								    p, mode = dropout_param['p'], dropout_param['mode']

								    if 'seed' in dropout_param:

								        np.random.seed(dropout_param['seed'])


								    mask = None

								    out = None


								    if mode == 'train':

								        #######################################################################

								        # TODO: 完成训练阶段的dropout正向传播。

								        # 将dropout掩码存储在mask变量中。

								        #######################################################################

								        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****


								        pass


								        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

								        #######################################################################

								        #                           END OF YOUR CODE                          #

								        #######################################################################

								    elif mode == 'test':

								        #######################################################################

								        # TODO: 完成测试阶段的dropout正向传播。

								        #######################################################################

								        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****


								        pass


								        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

								        #######################################################################

								        #                            END OF YOUR CODE                         #

								        #######################################################################


								    cache = (dropout_param, mask)

								    out = out.astype(x.dtype, copy=False)


								    return out, cache


								def dropout_backward(dout, cache):

								    """

								    Perform the backward pass for (inverted) dropout.


								    Inputs:

								    - dout: Upstream derivatives, of any shape

								    - cache: (dropout_param, mask) from dropout_forward.

								    """

								    dropout_param, mask = cache

								    mode = dropout_param['mode']


								    dx = None

								    if mode == 'train':

								        #######################################################################

								        # TODO: 完成训练阶段的dropout反向传播。

								        #######################################################################

								        # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****


								        pass


								        # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

								        #######################################################################

								        #                          END OF YOUR CODE                           #

								        #######################################################################

								    elif mode == 'test':

								        dx = dout

								    return dx


								def conv_forward_naive(x, w, b, conv_param):

								    """

								    A naive implementation of the forward pass for a convolutional layer.


								    The input consists of N data points, each with C channels, height H and

								    width W. We convolve each input with F different filters, where each filter

								    spans all C channels and has height HH and width WW.


								    Input:

								    - x: Input data of shape (N, C, H, W)

								    - w: Filter weights of shape (F, C, HH, WW)

								    - b: Biases, of shape (F,)

								    - conv_param: A dictionary with the following keys:

								      - 'stride': The number of pixels between adjacent receptive fields in the

								        horizontal and vertical directions.

								      - 'pad': The number of pixels that will be used to zero-pad the input.


								    During padding, 'pad' zeros should be placed symmetrically (i.e equally on both sides)

								    along the height and width axes of the input. Be careful not to modfiy the original

								    input x directly.


								    Returns a tuple of:

								    - out: Output data, of shape (N, F, H', W') where H' and W' are given by

								      H' = 1 + (H + 2 * pad - HH) / stride

								      W' = 1 + (W + 2 * pad - WW) / stride

								    - cache: (x, w, b, conv_param)

								    """

								    out = None

								    ###########################################################################

								    # TODO: 实现卷积正向传播。

								    # Hint: 你可以使用np.pad函数进行填充。

								    ###########################################################################

								    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****


								    pass


								    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

								    ###########################################################################

								    #                             END OF YOUR CODE                            #

								    ###########################################################################

								    cache = (x, w, b, conv_param)

								    return out, cache


								def conv_backward_naive(dout, cache):

								    """

								    A naive implementation of the backward pass for a convolutional layer.


								    Inputs:

								    - dout: Upstream derivatives.

								    - cache: A tuple of (x, w, b, conv_param) as in conv_forward_naive


								    Returns a tuple of:

								    - dx: Gradient with respect to x

								    - dw: Gradient with respect to w

								    - db: Gradient with respect to b

								    """

								    dx, dw, db = None, None, None

								    ###########################################################################

								    # TODO: 实现卷积的反向传播

								    ###########################################################################

								    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

								    pass

								    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

								    ###########################################################################

								    #                             END OF YOUR CODE                            #

								    ###########################################################################

								    return dx, dw, db


								def max_pool_forward_naive(x, pool_param):

								    """

								    A naive implementation of the forward pass for a max-pooling layer.


								    Inputs:

								    - x: Input data, of shape (N, C, H, W)

								    - pool_param: dictionary with the following keys:

								      - 'pool_height': The height of each pooling region

								      - 'pool_width': The width of each pooling region

								      - 'stride': The distance between adjacent pooling regions


								    No padding is necessary here. Output size is given by


								    Returns a tuple of:

								    - out: Output data, of shape (N, C, H', W') where H' and W' are given by

								      H' = 1 + (H - pool_height) / stride

								      W' = 1 + (W - pool_width) / stride

								    - cache: (x, pool_param)

								    """

								    out = None

								    ###########################################################################

								    # TODO: 完成最大池化的正向传播。                           #

								    ###########################################################################

								    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****


								    pass


								    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

								    ###########################################################################

								    #                             END OF YOUR CODE                            #

								    ###########################################################################

								    cache = (x, pool_param)

								    return out, cache


								def max_pool_backward_naive(dout, cache):

								    """

								    A naive implementation of the backward pass for a max-pooling layer.


								    Inputs:

								    - dout: Upstream derivatives

								    - cache: A tuple of (x, pool_param) as in the forward pass.


								    Returns:

								    - dx: Gradient with respect to x

								    """

								    dx = None

								    ###########################################################################

								    # TODO: 完成最大池化的反向传播                          #

								    ###########################################################################

								    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

								    pass

								    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

								    ###########################################################################

								    #                             END OF YOUR CODE                            #

								    ###########################################################################

								    return dx


								def spatial_batchnorm_forward(x, gamma, beta, bn_param):

								    """

								    Computes the forward pass for spatial batch normalization.


								    Inputs:

								    - x: Input data of shape (N, C, H, W)

								    - gamma: Scale parameter, of shape (C,)

								    - beta: Shift parameter, of shape (C,)

								    - bn_param: Dictionary with the following keys:

								      - mode: 'train' or 'test'; required

								      - eps: Constant for numeric stability

								      - momentum: Constant for running mean / variance. momentum=0 means that

								        old information is discarded completely at every time step, while

								        momentum=1 means that new information is never incorporated. The

								        default of momentum=0.9 should work well in most situations.

								      - running_mean: Array of shape (D,) giving running mean of features

								      - running_var Array of shape (D,) giving running variance of features


								    Returns a tuple of:

								    - out: Output data, of shape (N, C, H, W)

								    - cache: Values needed for the backward pass

								    """

								    out, cache = None, None


								    ###########################################################################

								    # TODO: 完成空间批量归一化的正向传播。

								    #

								    # HINT: 您可以通过调用上面实现的批量标准化的原始版本来实现空间批量标准化。

								    # 您的实现应该非常简短；我们的代码少于五行。

								    ###########################################################################

								    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****


								    pass


								    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

								    ###########################################################################

								    #                             END OF YOUR CODE                            #

								    ###########################################################################


								    return out, cache


								def spatial_batchnorm_backward(dout, cache):

								    """

								    Computes the backward pass for spatial batch normalization.


								    Inputs:

								    - dout: Upstream derivatives, of shape (N, C, H, W)

								    - cache: Values from the forward pass


								    Returns a tuple of:

								    - dx: Gradient with respect to inputs, of shape (N, C, H, W)

								    - dgamma: Gradient with respect to scale parameter, of shape (C,)

								    - dbeta: Gradient with respect to shift parameter, of shape (C,)

								    """

								    dx, dgamma, dbeta = None, None, None


								    ###########################################################################

								    # TODO: 为空间批量归一化实现反向传递。

								    #

								    # HINT: 您可以通过调用上面实现的批量标准化的原始版本来实现空间批量标准化。

								    # 您的实现应该非常简短；我们的代码少于五行。

								    ###########################################################################

								    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****


								    pass

								    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

								    ###########################################################################

								    #                             END OF YOUR CODE                            #

								    ###########################################################################


								    return dx, dgamma, dbeta


								def spatial_groupnorm_forward(x, gamma, beta, G, gn_param):

								    """

								    Computes the forward pass for spatial group normalization.

								    In contrast to layer normalization, group normalization splits each entry

								    in the data into G contiguous pieces, which it then normalizes independently.

								    Per feature shifting and scaling are then applied to the data, in a manner identical to that of batch normalization and layer normalization.


								    Inputs:

								    - x: Input data of shape (N, C, H, W)

								    - gamma: Scale parameter, of shape (C,)

								    - beta: Shift parameter, of shape (C,)

								    - G: Integer mumber of groups to split into, should be a divisor of C

								    - gn_param: Dictionary with the following keys:

								      - eps: Constant for numeric stability


								    Returns a tuple of:

								    - out: Output data, of shape (N, C, H, W)

								    - cache: Values needed for the backward pass

								    """

								    out, cache = None, None

								    eps = gn_param.get('eps',1e-5)

								    ###########################################################################

								    # TODO: 为空间组归一化实现正向传播。

								    # 这与层归一化的实现极为相似。

								    # 尤其要考虑如何转换矩阵，以使大部分代码与训练时批处理归一化和层归一化相似！

								    ###########################################################################

								    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

								     pass


								    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

								    ###########################################################################

								    #                             END OF YOUR CODE                            #

								    ###########################################################################

								    return out, cache


								def spatial_groupnorm_backward(dout, cache):

								    """

								    Computes the backward pass for spatial group normalization.


								    Inputs:

								    - dout: Upstream derivatives, of shape (N, C, H, W)

								    - cache: Values from the forward pass


								    Returns a tuple of:

								    - dx: Gradient with respect to inputs, of shape (N, C, H, W)

								    - dgamma: Gradient with respect to scale parameter, of shape (C,)

								    - dbeta: Gradient with respect to shift parameter, of shape (C,)

								    """

								    dx, dgamma, dbeta = None, None, None


								    ###########################################################################

								    # TODO: 为空间组归一化实现反反向传播。

								    # 这将与层归一化的实现极为相似。

								    ###########################################################################

								    # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****


								    pass


								    # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****

								    ###########################################################################

								    #                             END OF YOUR CODE                            #

								    ###########################################################################

								    return dx, dgamma, dbeta


								def svm_loss(x, y):

								    """

								    Computes the loss and gradient using for multiclass SVM classification.


								    Inputs:

								    - x: Input data, of shape (N, C) where x[i, j] is the score for the jth

								      class for the ith input.

								    - y: Vector of labels, of shape (N,) where y[i] is the label for x[i] and

								      0 <= y[i] < C


								    Returns a tuple of:

								    - loss: Scalar giving the loss

								    - dx: Gradient of the loss with respect to x

								    """

								    N = x.shape[0]

								    correct_class_scores = x[np.arange(N), y]

								    margins = np.maximum(0, x - correct_class_scores[:, np.newaxis] + 1.0)

								    margins[np.arange(N), y] = 0

								    loss = np.sum(margins) / N

								    num_pos = np.sum(margins > 0, axis=1)

								    dx = np.zeros_like(x)

								    dx[margins > 0] = 1

								    dx[np.arange(N), y] -= num_pos

								    dx /= N

								    return loss, dx


								def softmax_loss(x, y):

								    """

								    Computes the loss and gradient for softmax classification.


								    Inputs:

								    - x: Input data, of shape (N, C) where x[i, j] is the score for the jth

								      class for the ith input.

								    - y: Vector of labels, of shape (N,) where y[i] is the label for x[i] and

								      0 <= y[i] < C


								    Returns a tuple of:

								    - loss: Scalar giving the loss

								    - dx: Gradient of the loss with respect to x

								    """

								    shifted_logits = x - np.max(x, axis=1, keepdims=True)

								    Z = np.sum(np.exp(shifted_logits), axis=1, keepdims=True)

								    log_probs = shifted_logits - np.log(Z)

								    probs = np.exp(log_probs)

								    N = x.shape[0]

								    loss = -np.sum(log_probs[np.arange(N), y]) / N

								    dx = probs.copy()

								    dx[np.arange(N), y] -= 1

								    dx /= N

								    return loss, dx