DaSE-Computer-Vision-2021
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

186 lines
8.1 KiB

  1. from builtins import range
  2. from builtins import object
  3. import numpy as np
  4. from past.builtins import xrange
  5. class KNearestNeighbor(object):
  6. """ a kNN classifier with L2 distance """
  7. def __init__(self):
  8. pass
  9. def train(self, X, y):
  10. """
  11. Train the classifier. For k-nearest neighbors this is just
  12. memorizing the training data.
  13. Inputs:
  14. - X: A numpy array of shape (num_train, D) containing the training data
  15. consisting of num_train samples each of dimension D.
  16. - y: A numpy array of shape (N,) containing the training labels, where
  17. y[i] is the label for X[i].
  18. """
  19. self.X_train = X
  20. self.y_train = y
  21. def predict(self, X, k=1, num_loops=0):
  22. """
  23. Predict labels for test data using this classifier.
  24. Inputs:
  25. - X: A numpy array of shape (num_test, D) containing test data consisting
  26. of num_test samples each of dimension D.
  27. - k: The number of nearest neighbors that vote for the predicted labels.
  28. - num_loops: Determines which implementation to use to compute distances
  29. between training points and testing points.
  30. Returns:
  31. - y: A numpy array of shape (num_test,) containing predicted labels for the
  32. test data, where y[i] is the predicted label for the test point X[i].
  33. """
  34. if num_loops == 0:
  35. dists = self.compute_distances_no_loops(X)
  36. elif num_loops == 1:
  37. dists = self.compute_distances_one_loop(X)
  38. elif num_loops == 2:
  39. dists = self.compute_distances_two_loops(X)
  40. else:
  41. raise ValueError('Invalid value %d for num_loops' % num_loops)
  42. return self.predict_labels(dists, k=k)
  43. def compute_distances_two_loops(self, X):
  44. """
  45. Compute the distance between each test point in X and each training point
  46. in self.X_train using a nested loop over both the training data and the
  47. test data.
  48. Inputs:
  49. - X: A numpy array of shape (num_test, D) containing test data.
  50. Returns:
  51. - dists: A numpy array of shape (num_test, num_train) where dists[i, j]
  52. is the Euclidean distance between the ith test point and the jth training
  53. point.
  54. """
  55. num_test = X.shape[0]
  56. num_train = self.X_train.shape[0]
  57. dists = np.zeros((num_test, num_train))
  58. for i in range(num_test):
  59. for j in range(num_train):
  60. #####################################################################
  61. # TODO:
  62. #计算第i个测试点与第j个训练点之间的l2距离,并将结果存储在dists[i,j]中。
  63. #你不应使用循环和np.linalg.norm()函数。
  64. #####################################################################
  65. # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
  66. pass
  67. # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
  68. return dists
  69. def compute_distances_one_loop(self, X):
  70. """
  71. Compute the distance between each test point in X and each training point
  72. in self.X_train using a single loop over the test data.
  73. Input / Output: Same as compute_distances_two_loops
  74. """
  75. num_test = X.shape[0]
  76. num_train = self.X_train.shape[0]
  77. dists = np.zeros((num_test, num_train))
  78. for i in range(num_test):
  79. #######################################################################
  80. # TODO:
  81. #计算第i个测试点与所有训练点之间的l2距离,并将结果存储在dists[i,:]中。
  82. #不要使用np.linalg.norm()。
  83. #######################################################################
  84. # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
  85. # 注意np.sum中要加上维度axis=1才能得出正确的结果
  86. # 关于axis的介绍
  87. # https://zhuanlan.zhihu.com/p/30960190
  88. # 以及np.sum的介绍
  89. # https://docs.scipy.org/doc/numpy/reference/generated/numpy.sum.html
  90. # self.X_train (5000,3072) X[i] (1,3072) (self.X_train - X[i]) (5000,3072)
  91. pass
  92. # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
  93. return dists
  94. def compute_distances_no_loops(self, X):
  95. """
  96. Compute the distance between each test point in X and each training point
  97. in self.X_train using no explicit loops.
  98. Input / Output: Same as compute_distances_two_loops
  99. """
  100. num_test = X.shape[0]
  101. num_train = self.X_train.shape[0]
  102. dists = np.zeros((num_test, num_train))
  103. #########################################################################
  104. # TODO:
  105. #在不使用任何显式循环的情况下,计算所有测试点和所有训练点之间的l2距离,
  106. #并将结果存储在dists中。
  107. #您应该仅使用基本的数组操作来实现此功能。
  108. #不可以使用scipy中的函数以及函数np.linalg.norm()。
  109. #
  110. #提示:尝试使用矩阵乘法和广播总和来计算l2距离。
  111. #########################################################################
  112. # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
  113. # (x-y)^2 = x^2 + y^2 - 2xy
  114. # reshape是为了让两个矩阵有个维度为1,这样子便可进行广播
  115. pass
  116. # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
  117. return dists
  118. def predict_labels(self, dists, k=1):
  119. """
  120. Given a matrix of distances between test points and training points,
  121. predict a label for each test point.
  122. Inputs:
  123. - dists: A numpy array of shape (num_test, num_train) where dists[i, j]
  124. gives the distance betwen the ith test point and the jth training point.
  125. Returns:
  126. - y: A numpy array of shape (num_test,) containing predicted labels for the
  127. test data, where y[i] is the predicted label for the test point X[i].
  128. """
  129. num_test = dists.shape[0]
  130. y_pred = np.zeros(num_test)
  131. for i in range(num_test):
  132. # A list of length k storing the labels of the k nearest neighbors to
  133. # the ith test point.
  134. closest_y = []
  135. #########################################################################
  136. # TODO:
  137. #使用距离矩阵查找第i个测试点的k个最近邻居,
  138. #并使用self.y_train查找这些邻居的标签。
  139. #将这些标签存储在closest_y中。
  140. #
  141. #提示:查阅函数numpy.argsort。
  142. #########################################################################
  143. # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
  144. # numpy.argsort 返回排序好的数列的索引
  145. pass
  146. # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
  147. #########################################################################
  148. # TODO:
  149. #
  150. #现在,你已经找到了k个最近邻的标签,接着需要在closest_y中找到最可能的标签。 #将此标签存储在y_pred [i]中。如果有两个标签可能性一样的话选择索引更小的那个。
  151. #########################################################################
  152. # *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
  153. y_pred[i] = np.bincount(closest_y).argmax()
  154. # *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
  155. return y_pred