From 0643586cb7aab37a852800f185b5ab9d17aa8678 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=AD=99=E7=A7=8B=E5=AE=9E?= <10185501402@stu.ecnu.edu.cn> Date: Sun, 5 Sep 2021 21:56:13 +0800 Subject: [PATCH] =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6=E8=87=B3?= =?UTF-8?q?=20'assignment1/classifiers'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- assignment1/classifiers/__init__.py | 2 + assignment1/classifiers/k_nearest_neighbor.py | 186 ++++++++++++++++++++++ assignment1/classifiers/linear_classifier.py | 136 ++++++++++++++++ assignment1/classifiers/linear_svm.py | 97 ++++++++++++ assignment1/classifiers/neural_net.py | 220 ++++++++++++++++++++++++++ assignment1/classifiers/softmax.py | 65 ++++++++ 6 files changed, 706 insertions(+) create mode 100644 assignment1/classifiers/__init__.py create mode 100644 assignment1/classifiers/k_nearest_neighbor.py create mode 100644 assignment1/classifiers/linear_classifier.py create mode 100644 assignment1/classifiers/linear_svm.py create mode 100644 assignment1/classifiers/neural_net.py create mode 100644 assignment1/classifiers/softmax.py diff --git a/assignment1/classifiers/__init__.py b/assignment1/classifiers/__init__.py new file mode 100644 index 0000000..1c3a5c5 --- /dev/null +++ b/assignment1/classifiers/__init__.py @@ -0,0 +1,2 @@ +from daseCV.classifiers.k_nearest_neighbor import * +from daseCV.classifiers.linear_classifier import * diff --git a/assignment1/classifiers/k_nearest_neighbor.py b/assignment1/classifiers/k_nearest_neighbor.py new file mode 100644 index 0000000..fecf995 --- /dev/null +++ b/assignment1/classifiers/k_nearest_neighbor.py @@ -0,0 +1,186 @@ +from builtins import range +from builtins import object +import numpy as np +from past.builtins import xrange + + +class KNearestNeighbor(object): + """ a kNN classifier with L2 distance """ + + def __init__(self): + pass + + def train(self, X, y): + """ + Train the classifier. For k-nearest neighbors this is just + memorizing the training data. + + Inputs: + - X: A numpy array of shape (num_train, D) containing the training data + consisting of num_train samples each of dimension D. + - y: A numpy array of shape (N,) containing the training labels, where + y[i] is the label for X[i]. + """ + self.X_train = X + self.y_train = y + + def predict(self, X, k=1, num_loops=0): + """ + Predict labels for test data using this classifier. + + Inputs: + - X: A numpy array of shape (num_test, D) containing test data consisting + of num_test samples each of dimension D. + - k: The number of nearest neighbors that vote for the predicted labels. + - num_loops: Determines which implementation to use to compute distances + between training points and testing points. + + Returns: + - y: A numpy array of shape (num_test,) containing predicted labels for the + test data, where y[i] is the predicted label for the test point X[i]. + """ + if num_loops == 0: + dists = self.compute_distances_no_loops(X) + elif num_loops == 1: + dists = self.compute_distances_one_loop(X) + elif num_loops == 2: + dists = self.compute_distances_two_loops(X) + else: + raise ValueError('Invalid value %d for num_loops' % num_loops) + + return self.predict_labels(dists, k=k) + + def compute_distances_two_loops(self, X): + """ + Compute the distance between each test point in X and each training point + in self.X_train using a nested loop over both the training data and the + test data. + + Inputs: + - X: A numpy array of shape (num_test, D) containing test data. + + Returns: + - dists: A numpy array of shape (num_test, num_train) where dists[i, j] + is the Euclidean distance between the ith test point and the jth training + point. + """ + num_test = X.shape[0] + num_train = self.X_train.shape[0] + dists = np.zeros((num_test, num_train)) + for i in range(num_test): + for j in range(num_train): + ##################################################################### + # TODO: + #计算第i个测试点与第j个训练点之间的l2距离,并将结果存储在dists[i,j]中。 + #你不应使用循环和np.linalg.norm()函数。 + ##################################################################### + # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + + pass + + # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + return dists + + def compute_distances_one_loop(self, X): + """ + Compute the distance between each test point in X and each training point + in self.X_train using a single loop over the test data. + + Input / Output: Same as compute_distances_two_loops + """ + num_test = X.shape[0] + num_train = self.X_train.shape[0] + dists = np.zeros((num_test, num_train)) + for i in range(num_test): + ####################################################################### + # TODO: + #计算第i个测试点与所有训练点之间的l2距离,并将结果存储在dists[i,:]中。 + #不要使用np.linalg.norm()。 + ####################################################################### + # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + + # 注意np.sum中要加上维度axis=1才能得出正确的结果 + # 关于axis的介绍 + # https://zhuanlan.zhihu.com/p/30960190 + # 以及np.sum的介绍 + # https://docs.scipy.org/doc/numpy/reference/generated/numpy.sum.html + + # self.X_train (5000,3072) X[i] (1,3072) (self.X_train - X[i]) (5000,3072) + pass + + # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + return dists + + def compute_distances_no_loops(self, X): + """ + Compute the distance between each test point in X and each training point + in self.X_train using no explicit loops. + + Input / Output: Same as compute_distances_two_loops + """ + num_test = X.shape[0] + num_train = self.X_train.shape[0] + dists = np.zeros((num_test, num_train)) + ######################################################################### + # TODO: + #在不使用任何显式循环的情况下,计算所有测试点和所有训练点之间的l2距离, + #并将结果存储在dists中。 + #您应该仅使用基本的数组操作来实现此功能。 + #不可以使用scipy中的函数以及函数np.linalg.norm()。 + # + #提示:尝试使用矩阵乘法和广播总和来计算l2距离。 + ######################################################################### + # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + + # (x-y)^2 = x^2 + y^2 - 2xy + # reshape是为了让两个矩阵有个维度为1,这样子便可进行广播 + pass + + # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + return dists + + def predict_labels(self, dists, k=1): + """ + Given a matrix of distances between test points and training points, + predict a label for each test point. + + Inputs: + - dists: A numpy array of shape (num_test, num_train) where dists[i, j] + gives the distance betwen the ith test point and the jth training point. + + Returns: + - y: A numpy array of shape (num_test,) containing predicted labels for the + test data, where y[i] is the predicted label for the test point X[i]. + """ + num_test = dists.shape[0] + y_pred = np.zeros(num_test) + for i in range(num_test): + # A list of length k storing the labels of the k nearest neighbors to + # the ith test point. + closest_y = [] + ######################################################################### + # TODO: + #使用距离矩阵查找第i个测试点的k个最近邻居, + #并使用self.y_train查找这些邻居的标签。 + #将这些标签存储在closest_y中。 + # + #提示:查阅函数numpy.argsort。 + ######################################################################### + # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + + # numpy.argsort 返回排序好的数列的索引 + pass + + # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + ######################################################################### + # TODO: + # + #现在,你已经找到了k个最近邻的标签,接着需要在closest_y中找到最可能的标签。 #将此标签存储在y_pred [i]中。如果有两个标签可能性一样的话选择索引更小的那个。 + ######################################################################### + # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + + y_pred[i] = np.bincount(closest_y).argmax() + + # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + + return y_pred diff --git a/assignment1/classifiers/linear_classifier.py b/assignment1/classifiers/linear_classifier.py new file mode 100644 index 0000000..5280792 --- /dev/null +++ b/assignment1/classifiers/linear_classifier.py @@ -0,0 +1,136 @@ +from __future__ import print_function + +from builtins import range +from builtins import object +import numpy as np +from daseCV.classifiers.linear_svm import * +from daseCV.classifiers.softmax import * +from past.builtins import xrange + + +class LinearClassifier(object): + + def __init__(self): + self.W = None + + def train(self, X, y, learning_rate=1e-3, reg=1e-5, num_iters=100, + batch_size=200, verbose=False): + """ + Train this linear classifier using stochastic gradient descent. + + Inputs: + - X: A numpy array of shape (N, D) containing training data; there are N + training samples each of dimension D. + - y: A numpy array of shape (N,) containing training labels; y[i] = c + means that X[i] has label 0 <= c < C for C classes. + - learning_rate: (float) learning rate for optimization. + - reg: (float) regularization strength. + - num_iters: (integer) number of steps to take when optimizing + - batch_size: (integer) number of training examples to use at each step. + - verbose: (boolean) If true, print progress during optimization. + + Outputs: + A list containing the value of the loss function at each training iteration. + """ + num_train, dim = X.shape + num_classes = np.max(y) + 1 # assume y takes values 0...K-1 where K is number of classes + if self.W is None: + # lazily initialize W + self.W = 0.001 * np.random.randn(dim, num_classes) + + # Run stochastic gradient descent to optimize W + loss_history = [] + for it in range(num_iters): + X_batch = None + y_batch = None + + ######################################################################### + # TODO: + # 从训练数据及其相应的标签中采样batch_size大小的样本,以用于本轮梯度下降。 + # 将数据存储在X_batch中,并将其相应的标签存储在y_batch中: + # 采样后,X_batch的形状为(batch_size,dim),y_batch的形状(batch_size,) + # + # 提示:使用np.random.choice生成索引。 可重复的采样比不可重复的采样要快一点。 + ######################################################################### + # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + + pass + + # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + + # evaluate loss and gradient + loss, grad = self.loss(X_batch, y_batch, reg) + loss_history.append(loss) + + # perform parameter update + ######################################################################### + # TODO: + # 使用梯度和学习率更新权重。 + ######################################################################### + # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + + pass + + # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + + if verbose and it % 100 == 0: + print('iteration %d / %d: loss %f' % (it, num_iters, loss)) + + return loss_history + + def predict(self, X): + """ + Use the trained weights of this linear classifier to predict labels for + data points. + + Inputs: + - X: A numpy array of shape (N, D) containing training data; there are N + training samples each of dimension D. + + Returns: + - y_pred: Predicted labels for the data in X. y_pred is a 1-dimensional + array of length N, and each element is an integer giving the predicted + class. + """ + y_pred = np.zeros(X.shape[0]) + ########################################################################### + # TODO: + # 实现此方法。将预测的标签存储在y_pred中。 + ########################################################################### + # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + + pass + + # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + return y_pred + + def loss(self, X_batch, y_batch, reg): + """ + Compute the loss function and its derivative. + Subclasses will override this. + + Inputs: + - X_batch: A numpy array of shape (N, D) containing a minibatch of N + data points; each point has dimension D. + - y_batch: A numpy array of shape (N,) containing labels for the minibatch. + - reg: (float) regularization strength. + + Returns: A tuple containing: + - loss as a single float + - gradient with respect to self.W; an array of the same shape as W + """ + pass + + +class LinearSVM(LinearClassifier): + """ A subclass that uses the Multiclass SVM loss function """ + + def loss(self, X_batch, y_batch, reg): + return svm_loss_vectorized(self.W, X_batch, y_batch, reg) + + +class Softmax(LinearClassifier): + """ A subclass that uses the Softmax + Cross-entropy loss function """ + + def loss(self, X_batch, y_batch, reg): + return softmax_loss_vectorized(self.W, X_batch, y_batch, reg) diff --git a/assignment1/classifiers/linear_svm.py b/assignment1/classifiers/linear_svm.py new file mode 100644 index 0000000..f01189d --- /dev/null +++ b/assignment1/classifiers/linear_svm.py @@ -0,0 +1,97 @@ +from builtins import range +import numpy as np +from random import shuffle +from past.builtins import xrange + +def svm_loss_naive(W, X, y, reg): + """ + Structured SVM loss function, naive implementation (with loops). + + Inputs have dimension D, there are C classes, and we operate on minibatches + of N examples. + + Inputs: + - W: A numpy array of shape (D, C) containing weights. + - X: A numpy array of shape (N, D) containing a minibatch of data. + - y: A numpy array of shape (N,) containing training labels; y[i] = c means + that X[i] has label c, where 0 <= c < C. + - reg: (float) regularization strength + + Returns a tuple of: + - loss as single float + - gradient with respect to weights W; an array of same shape as W + """ + dW = np.zeros(W.shape) # initialize the gradient as zero + + # compute the loss and the gradient + num_classes = W.shape[1] + num_train = X.shape[0] + loss = 0.0 + for i in range(num_train): + scores = X[i].dot(W) + correct_class_score = scores[y[i]] + for j in range(num_classes): + if j == y[i]: + continue + margin = scores[j] - correct_class_score + 1 # note delta = 1 + if margin > 0: + loss += margin + dW[:,j] += X[i] # dW计算 + dW[:,y[i]] += -X[i] # dW计算 + + # Right now the loss is a sum over all training examples, but we want it + # to be an average instead so we divide by num_train. + loss /= num_train + + # Add regularization to the loss. + loss += reg * np.sum(W * W) + + ############################################################################# + # TODO: + # 计算损失函数的梯度并将其存储为dW。 + # 与其先计算损失再计算梯度,还不如在计算损失的同时计算梯度更简单。 + # 因此,您可能需要修改上面的一些代码来计算梯度。 + ############################################################################# + # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + + pass + + # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + + return loss, dW + + + +def svm_loss_vectorized(W, X, y, reg): + """ + Structured SVM loss function, vectorized implementation. + + Inputs and outputs are the same as svm_loss_naive. + """ + loss = 0.0 + dW = np.zeros(W.shape) # initialize the gradient as zero + + ############################################################################# + # TODO: + # 实现一个向量化SVM损失计算方法,并将结果存储到loss中 + ############################################################################# + + # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + + pass + + # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + + ############################################################################# + # TODO: + # 实现一个向量化的梯度计算方法,并将结果存储到dW中 + # + # 提示:与其从头计算梯度,不如利用一些计算loss时的中间变量 + ############################################################################# + # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + + pass + + # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + + return loss, dW diff --git a/assignment1/classifiers/neural_net.py b/assignment1/classifiers/neural_net.py new file mode 100644 index 0000000..d0f9018 --- /dev/null +++ b/assignment1/classifiers/neural_net.py @@ -0,0 +1,220 @@ +from __future__ import print_function + +from builtins import range +from builtins import object +import numpy as np +import matplotlib.pyplot as plt +from past.builtins import xrange + +class TwoLayerNet(object): + """ + A two-layer fully-connected neural network. The net has an input dimension of + N, a hidden layer dimension of H, and performs classification over C classes. + We train the network with a softmax loss function and L2 regularization on the + weight matrices. The network uses a ReLU nonlinearity after the first fully + connected layer. + + In other words, the network has the following architecture: + + input - fully connected layer - ReLU - fully connected layer - softmax + + The outputs of the second fully-connected layer are the scores for each class. + """ + + def __init__(self, input_size, hidden_size, output_size, std=1e-4): + """ + Initialize the model. Weights are initialized to small random values and + biases are initialized to zero. Weights and biases are stored in the + variable self.params, which is a dictionary with the following keys: + + W1: First layer weights; has shape (D, H) + b1: First layer biases; has shape (H,) + W2: Second layer weights; has shape (H, C) + b2: Second layer biases; has shape (C,) + + Inputs: + - input_size: The dimension D of the input data. + - hidden_size: The number of neurons H in the hidden layer. + - output_size: The number of classes C. + """ + self.params = {} + self.params['W1'] = std * np.random.randn(input_size, hidden_size) + self.params['b1'] = np.zeros(hidden_size) + self.params['W2'] = std * np.random.randn(hidden_size, output_size) + self.params['b2'] = np.zeros(output_size) + + def loss(self, X, y=None, reg=0.0): + """ + Compute the loss and gradients for a two layer fully connected neural + network. + + Inputs: + - X: Input data of shape (N, D). Each X[i] is a training sample. + - y: Vector of training labels. y[i] is the label for X[i], and each y[i] is + an integer in the range 0 <= y[i] < C. This parameter is optional; if it + is not passed then we only return scores, and if it is passed then we + instead return the loss and gradients. + - reg: Regularization strength. + + Returns: + If y is None, return a matrix scores of shape (N, C) where scores[i, c] is + the score for class c on input X[i]. + + If y is not None, instead return a tuple of: + - loss: Loss (data loss and regularization loss) for this batch of training + samples. + - grads: Dictionary mapping parameter names to gradients of those parameters + with respect to the loss function; has the same keys as self.params. + """ + # Unpack variables from the params dictionary + W1, b1 = self.params['W1'], self.params['b1'] + W2, b2 = self.params['W2'], self.params['b2'] + N, D = X.shape + + # Compute the forward pass + scores = None + ############################################################################# + # TODO: 执行向前传播,计算输入数据的每个类的score。 + # 将结果存储在scores变量中,该变量应该是一个(N, C)维的数组。 + ############################################################################# + # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + + pass + + # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + + # If the targets are not given then jump out, we're done + if y is None: + return scores + + # Compute the loss + loss = None + ############################################################################# + # TODO: 完成向前传播,计算损失。 + # 这应该包括数据损失和W1和W2的L2正则化项。 + # 将结果存储在变量loss中,它应该是一个标量。 + # 使用Softmax损失函数。 + ############################################################################# + # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + + pass + + # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + + # Backward pass: compute gradients + grads = {} + ############################################################################# + # TODO: 计算反向传播,计算权重和偏置值的梯度, 将结果存储在grads字典中。 + # 例如,grads['W1']存储W1的梯度,并且和W1是相同大小的矩阵。 + ############################################################################# + # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + + pass + + # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + + return loss, grads + + def train(self, X, y, X_val, y_val, + learning_rate=1e-3, learning_rate_decay=0.95, + reg=5e-6, num_iters=100, + batch_size=200, verbose=False): + """ + Train this neural network using stochastic gradient descent. + + Inputs: + - X: A numpy array of shape (N, D) giving training data. + - y: A numpy array f shape (N,) giving training labels; y[i] = c means that + X[i] has label c, where 0 <= c < C. + - X_val: A numpy array of shape (N_val, D) giving validation data. + - y_val: A numpy array of shape (N_val,) giving validation labels. + - learning_rate: Scalar giving learning rate for optimization. + - learning_rate_decay: Scalar giving factor used to decay the learning rate + after each epoch. + - reg: Scalar giving regularization strength. + - num_iters: Number of steps to take when optimizing. + - batch_size: Number of training examples to use per step. + - verbose: boolean; if true print progress during optimization. + """ + num_train = X.shape[0] + iterations_per_epoch = max(num_train / batch_size, 1) + + # Use SGD to optimize the parameters in self.model + loss_history = [] + train_acc_history = [] + val_acc_history = [] + + for it in range(num_iters): + X_batch = None + y_batch = None + + ######################################################################### + # TODO: 创建一个随机的数据和标签的mini-batch,存储在X_batch和y_batch中。 + ######################################################################### + # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + + pass + + # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + + # Compute loss and gradients using the current minibatch + loss, grads = self.loss(X_batch, y=y_batch, reg=reg) + loss_history.append(loss) + + ######################################################################### + # TODO: 使用grads字典中的梯度来更新网络参数(参数存储在字典self.params中) + # 使用随机梯度下降法。 + ######################################################################### + # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + + pass + + # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + + if verbose and it % 100 == 0: + print('iteration %d / %d: loss %f' % (it, num_iters, loss)) + + # Every epoch, check train and val accuracy and decay learning rate. + if it % iterations_per_epoch == 0: + # Check accuracy + train_acc = (self.predict(X_batch) == y_batch).mean() + val_acc = (self.predict(X_val) == y_val).mean() + train_acc_history.append(train_acc) + val_acc_history.append(val_acc) + + # Decay learning rate + learning_rate *= learning_rate_decay + + return { + 'loss_history': loss_history, + 'train_acc_history': train_acc_history, + 'val_acc_history': val_acc_history, + } + + def predict(self, X): + """ + Use the trained weights of this two-layer network to predict labels for + data points. For each data point we predict scores for each of the C + classes, and assign each data point to the class with the highest score. + + Inputs: + - X: A numpy array of shape (N, D) giving N D-dimensional data points to + classify. + + Returns: + - y_pred: A numpy array of shape (N,) giving predicted labels for each of + the elements of X. For all i, y_pred[i] = c means that X[i] is predicted + to have class c, where 0 <= c < C. + """ + y_pred = None + + ########################################################################### + # TODO: Implement this function; it should be VERY simple! # + ########################################################################### + # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + + pass + + # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + + return y_pred diff --git a/assignment1/classifiers/softmax.py b/assignment1/classifiers/softmax.py new file mode 100644 index 0000000..7908a67 --- /dev/null +++ b/assignment1/classifiers/softmax.py @@ -0,0 +1,65 @@ +from builtins import range +import numpy as np +from random import shuffle +from past.builtins import xrange + +def softmax_loss_naive(W, X, y, reg): + """ + Softmax loss function, naive implementation (with loops) + + Inputs have dimension D, there are C classes, and we operate on minibatches + of N examples. + + Inputs: + - W: A numpy array of shape (D, C) containing weights. + - X: A numpy array of shape (N, D) containing a minibatch of data. + - y: A numpy array of shape (N,) containing training labels; y[i] = c means + that X[i] has label c, where 0 <= c < C. + - reg: (float) regularization strength + + Returns a tuple of: + - loss as single float + - gradient with respect to weights W; an array of same shape as W + """ + # Initialize the loss and gradient to zero. + loss = 0.0 + dW = np.zeros_like(W) + + ############################################################################# + # TODO: 使用显式循环计算softmax损失及其梯度。 + # 将损失和梯度分别保存在loss和dW中。 + # 如果你不小心,很容易遇到数值不稳定的情况。 + # 不要忘了正则化! + ############################################################################# + # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + + pass + + # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + + return loss, dW + + +def softmax_loss_vectorized(W, X, y, reg): + """ + Softmax loss function, vectorized version. + + Inputs and outputs are the same as softmax_loss_naive. + """ + # Initialize the loss and gradient to zero. + loss = 0.0 + dW = np.zeros_like(W) + + ############################################################################# + # TODO: 不使用显式循环计算softmax损失及其梯度。 + # 将损失和梯度分别保存在loss和dW中。 + # 如果你不小心,很容易遇到数值不稳定的情况。 + # 不要忘了正则化! + ############################################################################# + # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + + pass + + # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)***** + + return loss, dW