浏览代码

上传文件至 'assignment2/notebook_images/daseCV'

master
孙秋实 3 年前
父节点
当前提交
7cffad01fa
共有 12 个文件被更改,包括 2302 次插入0 次删除
  1. 二进制
      assignment2/notebook_images/daseCV/__init__.py
  2. +262
    -0
      assignment2/notebook_images/daseCV/data_utils.py
  3. +293
    -0
      assignment2/notebook_images/daseCV/fast_layers.py
  4. +129
    -0
      assignment2/notebook_images/daseCV/gradient_check.py
  5. +54
    -0
      assignment2/notebook_images/daseCV/im2col.py
  6. +121
    -0
      assignment2/notebook_images/daseCV/im2col_cython.pyx
  7. +105
    -0
      assignment2/notebook_images/daseCV/layer_utils.py
  8. +793
    -0
      assignment2/notebook_images/daseCV/layers.py
  9. +159
    -0
      assignment2/notebook_images/daseCV/optim.py
  10. +14
    -0
      assignment2/notebook_images/daseCV/setup.py
  11. +299
    -0
      assignment2/notebook_images/daseCV/solver.py
  12. +73
    -0
      assignment2/notebook_images/daseCV/vis_utils.py

二进制
assignment2/notebook_images/daseCV/__init__.py 查看文件


+ 262
- 0
assignment2/notebook_images/daseCV/data_utils.py 查看文件

@ -0,0 +1,262 @@
from __future__ import print_function
from builtins import range
from six.moves import cPickle as pickle
import numpy as np
import os
from imageio import imread
import platform
def load_pickle(f):
version = platform.python_version_tuple()
if version[0] == '2':
return pickle.load(f)
elif version[0] == '3':
return pickle.load(f, encoding='latin1')
raise ValueError("invalid python version: {}".format(version))
def load_CIFAR_batch(filename):
""" load single batch of cifar """
with open(filename, 'rb') as f:
datadict = load_pickle(f)
X = datadict['data']
Y = datadict['labels']
X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float")
Y = np.array(Y)
return X, Y
def load_CIFAR10(ROOT):
""" load all of cifar """
xs = []
ys = []
for b in range(1,6):
f = os.path.join(ROOT, 'data_batch_%d' % (b, ))
X, Y = load_CIFAR_batch(f)
xs.append(X)
ys.append(Y)
Xtr = np.concatenate(xs)
Ytr = np.concatenate(ys)
del X, Y
Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch'))
return Xtr, Ytr, Xte, Yte
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000,
subtract_mean=True):
"""
Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
it for classifiers. These are the same steps as we used for the SVM, but
condensed to a single function.
"""
# Load the raw CIFAR-10 data
cifar10_dir = 'daseCV/datasets/cifar-10-batches-py'
X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
# Subsample the data
mask = list(range(num_training, num_training + num_validation))
X_val = X_train[mask]
y_val = y_train[mask]
mask = list(range(num_training))
X_train = X_train[mask]
y_train = y_train[mask]
mask = list(range(num_test))
X_test = X_test[mask]
y_test = y_test[mask]
# Normalize the data: subtract the mean image
if subtract_mean:
mean_image = np.mean(X_train, axis=0)
X_train -= mean_image
X_val -= mean_image
X_test -= mean_image
# Transpose so that channels come first
X_train = X_train.transpose(0, 3, 1, 2).copy()
X_val = X_val.transpose(0, 3, 1, 2).copy()
X_test = X_test.transpose(0, 3, 1, 2).copy()
# Package data into a dictionary
return {
'X_train': X_train, 'y_train': y_train,
'X_val': X_val, 'y_val': y_val,
'X_test': X_test, 'y_test': y_test,
}
def load_tiny_imagenet(path, dtype=np.float32, subtract_mean=True):
"""
Load TinyImageNet. Each of TinyImageNet-100-A, TinyImageNet-100-B, and
TinyImageNet-200 have the same directory structure, so this can be used
to load any of them.
Inputs:
- path: String giving path to the directory to load.
- dtype: numpy datatype used to load the data.
- subtract_mean: Whether to subtract the mean training image.
Returns: A dictionary with the following entries:
- class_names: A list where class_names[i] is a list of strings giving the
WordNet names for class i in the loaded dataset.
- X_train: (N_tr, 3, 64, 64) array of training images
- y_train: (N_tr,) array of training labels
- X_val: (N_val, 3, 64, 64) array of validation images
- y_val: (N_val,) array of validation labels
- X_test: (N_test, 3, 64, 64) array of testing images.
- y_test: (N_test,) array of test labels; if test labels are not available
(such as in student code) then y_test will be None.
- mean_image: (3, 64, 64) array giving mean training image
"""
# First load wnids
with open(os.path.join(path, 'wnids.txt'), 'r') as f:
wnids = [x.strip() for x in f]
# Map wnids to integer labels
wnid_to_label = {wnid: i for i, wnid in enumerate(wnids)}
# Use words.txt to get names for each class
with open(os.path.join(path, 'words.txt'), 'r') as f:
wnid_to_words = dict(line.split('\t') for line in f)
for wnid, words in wnid_to_words.items():
wnid_to_words[wnid] = [w.strip() for w in words.split(',')]
class_names = [wnid_to_words[wnid] for wnid in wnids]
# Next load training data.
X_train = []
y_train = []
for i, wnid in enumerate(wnids):
if (i + 1) % 20 == 0:
print('loading training data for synset %d / %d'
% (i + 1, len(wnids)))
# To figure out the filenames we need to open the boxes file
boxes_file = os.path.join(path, 'train', wnid, '%s_boxes.txt' % wnid)
with open(boxes_file, 'r') as f:
filenames = [x.split('\t')[0] for x in f]
num_images = len(filenames)
X_train_block = np.zeros((num_images, 3, 64, 64), dtype=dtype)
y_train_block = wnid_to_label[wnid] * \
np.ones(num_images, dtype=np.int64)
for j, img_file in enumerate(filenames):
img_file = os.path.join(path, 'train', wnid, 'images', img_file)
img = imread(img_file)
if img.ndim == 2:
## grayscale file
img.shape = (64, 64, 1)
X_train_block[j] = img.transpose(2, 0, 1)
X_train.append(X_train_block)
y_train.append(y_train_block)
# We need to concatenate all training data
X_train = np.concatenate(X_train, axis=0)
y_train = np.concatenate(y_train, axis=0)
# Next load validation data
with open(os.path.join(path, 'val', 'val_annotations.txt'), 'r') as f:
img_files = []
val_wnids = []
for line in f:
img_file, wnid = line.split('\t')[:2]
img_files.append(img_file)
val_wnids.append(wnid)
num_val = len(img_files)
y_val = np.array([wnid_to_label[wnid] for wnid in val_wnids])
X_val = np.zeros((num_val, 3, 64, 64), dtype=dtype)
for i, img_file in enumerate(img_files):
img_file = os.path.join(path, 'val', 'images', img_file)
img = imread(img_file)
if img.ndim == 2:
img.shape = (64, 64, 1)
X_val[i] = img.transpose(2, 0, 1)
# Next load test images
# Students won't have test labels, so we need to iterate over files in the
# images directory.
img_files = os.listdir(os.path.join(path, 'test', 'images'))
X_test = np.zeros((len(img_files), 3, 64, 64), dtype=dtype)
for i, img_file in enumerate(img_files):
img_file = os.path.join(path, 'test', 'images', img_file)
img = imread(img_file)
if img.ndim == 2:
img.shape = (64, 64, 1)
X_test[i] = img.transpose(2, 0, 1)
y_test = None
y_test_file = os.path.join(path, 'test', 'test_annotations.txt')
if os.path.isfile(y_test_file):
with open(y_test_file, 'r') as f:
img_file_to_wnid = {}
for line in f:
line = line.split('\t')
img_file_to_wnid[line[0]] = line[1]
y_test = [wnid_to_label[img_file_to_wnid[img_file]]
for img_file in img_files]
y_test = np.array(y_test)
mean_image = X_train.mean(axis=0)
if subtract_mean:
X_train -= mean_image[None]
X_val -= mean_image[None]
X_test -= mean_image[None]
return {
'class_names': class_names,
'X_train': X_train,
'y_train': y_train,
'X_val': X_val,
'y_val': y_val,
'X_test': X_test,
'y_test': y_test,
'class_names': class_names,
'mean_image': mean_image,
}
def load_models(models_dir):
"""
Load saved models from disk. This will attempt to unpickle all files in a
directory; any files that give errors on unpickling (such as README.txt)
will be skipped.
Inputs:
- models_dir: String giving the path to a directory containing model files.
Each model file is a pickled dictionary with a 'model' field.
Returns:
A dictionary mapping model file names to models.
"""
models = {}
for model_file in os.listdir(models_dir):
with open(os.path.join(models_dir, model_file), 'rb') as f:
try:
models[model_file] = load_pickle(f)['model']
except pickle.UnpicklingError:
continue
return models
def load_imagenet_val(num=None):
"""Load a handful of validation images from ImageNet.
Inputs:
- num: Number of images to load (max of 25)
Returns:
- X: numpy array with shape [num, 224, 224, 3]
- y: numpy array of integer image labels, shape [num]
- class_names: dict mapping integer label to class name
"""
imagenet_fn = 'daseCV/datasets/imagenet_val_25.npz'
if not os.path.isfile(imagenet_fn):
print('file %s not found' % imagenet_fn)
print('Run the following:')
print('cd daseCV/datasets')
print('bash get_imagenet_val.sh')
assert False, 'Need to download imagenet_val_25.npz'
f = np.load(imagenet_fn)
X = f['X']
y = f['y']
class_names = f['label_map'].item()
if num is not None:
X = X[:num]
y = y[:num]
return X, y, class_names

+ 293
- 0
assignment2/notebook_images/daseCV/fast_layers.py 查看文件

@ -0,0 +1,293 @@
from __future__ import print_function
import numpy as np
import torch
import torch.nn as nn
try:
from daseCV.im2col_cython import col2im_cython, im2col_cython
from daseCV.im2col_cython import col2im_6d_cython
except ImportError:
print('run the following from the daseCV directory and try again:')
print('python setup.py build_ext --inplace')
print('You may also need to restart your iPython kernel')
from daseCV.im2col import *
def conv_forward_im2col(x, w, b, conv_param):
"""
A fast implementation of the forward pass for a convolutional layer
based on im2col and col2im.
"""
N, C, H, W = x.shape
num_filters, _, filter_height, filter_width = w.shape
stride, pad = conv_param['stride'], conv_param['pad']
# Check dimensions
assert (W + 2 * pad - filter_width) % stride == 0, 'width does not work'
assert (H + 2 * pad - filter_height) % stride == 0, 'height does not work'
# Create output
out_height = (H + 2 * pad - filter_height) // stride + 1
out_width = (W + 2 * pad - filter_width) // stride + 1
out = np.zeros((N, num_filters, out_height, out_width), dtype=x.dtype)
# x_cols = im2col_indices(x, w.shape[2], w.shape[3], pad, stride)
x_cols = im2col_cython(x, w.shape[2], w.shape[3], pad, stride)
res = w.reshape((w.shape[0], -1)).dot(x_cols) + b.reshape(-1, 1)
out = res.reshape(w.shape[0], out.shape[2], out.shape[3], x.shape[0])
out = out.transpose(3, 0, 1, 2)
cache = (x, w, b, conv_param, x_cols)
return out, cache
def conv_forward_pytorch(x, w, b, conv_param):
N, C, H, W = x.shape
F, _, HH, WW = w.shape
stride, pad = conv_param['stride'], conv_param['pad']
layer = nn.Conv2d(C, F, (HH, WW), stride=stride, padding=pad)
layer.weight = nn.Parameter(torch.tensor(w))
layer.bias = nn.Parameter(torch.tensor(b))
tx = torch.tensor(x, requires_grad=True)
out = layer(tx)
cache = (x, w, b, conv_param, tx, out, layer)
return out, cache
def conv_backward_pytorch(dout, cache):
x, _, _, _, tx, out, layer = cache
out.backward(torch.tensor(dout))
dx = tx.grad.detach().numpy()
dw = layer.weight.grad.detach().numpy()
db = layer.bias.grad.detach().numpy()
return dx, dw, db
def conv_forward_strides(x, w, b, conv_param):
N, C, H, W = x.shape
F, _, HH, WW = w.shape
stride, pad = conv_param['stride'], conv_param['pad']
# Check dimensions
#assert (W + 2 * pad - WW) % stride == 0, 'width does not work'
#assert (H + 2 * pad - HH) % stride == 0, 'height does not work'
# Pad the input
p = pad
x_padded = np.pad(x, ((0, 0), (0, 0), (p, p), (p, p)), mode='constant')
# Figure out output dimensions
H += 2 * pad
W += 2 * pad
out_h = (H - HH) // stride + 1
out_w = (W - WW) // stride + 1
# Perform an im2col operation by picking clever strides
shape = (C, HH, WW, N, out_h, out_w)
strides = (H * W, W, 1, C * H * W, stride * W, stride)
strides = x.itemsize * np.array(strides)
x_stride = np.lib.stride_tricks.as_strided(x_padded,
shape=shape, strides=strides)
x_cols = np.ascontiguousarray(x_stride)
x_cols.shape = (C * HH * WW, N * out_h * out_w)
# Now all our convolutions are a big matrix multiply
res = w.reshape(F, -1).dot(x_cols) + b.reshape(-1, 1)
# Reshape the output
res.shape = (F, N, out_h, out_w)
out = res.transpose(1, 0, 2, 3)
# Be nice and return a contiguous array
# The old version of conv_forward_fast doesn't do this, so for a fair
# comparison we won't either
out = np.ascontiguousarray(out)
cache = (x, w, b, conv_param, x_cols)
return out, cache
def conv_backward_strides(dout, cache):
x, w, b, conv_param, x_cols = cache
stride, pad = conv_param['stride'], conv_param['pad']
N, C, H, W = x.shape
F, _, HH, WW = w.shape
_, _, out_h, out_w = dout.shape
db = np.sum(dout, axis=(0, 2, 3))
dout_reshaped = dout.transpose(1, 0, 2, 3).reshape(F, -1)
dw = dout_reshaped.dot(x_cols.T).reshape(w.shape)
dx_cols = w.reshape(F, -1).T.dot(dout_reshaped)
dx_cols.shape = (C, HH, WW, N, out_h, out_w)
dx = col2im_6d_cython(dx_cols, N, C, H, W, HH, WW, pad, stride)
return dx, dw, db
def conv_backward_im2col(dout, cache):
"""
A fast implementation of the backward pass for a convolutional layer
based on im2col and col2im.
"""
x, w, b, conv_param, x_cols = cache
stride, pad = conv_param['stride'], conv_param['pad']
db = np.sum(dout, axis=(0, 2, 3))
num_filters, _, filter_height, filter_width = w.shape
dout_reshaped = dout.transpose(1, 2, 3, 0).reshape(num_filters, -1)
dw = dout_reshaped.dot(x_cols.T).reshape(w.shape)
dx_cols = w.reshape(num_filters, -1).T.dot(dout_reshaped)
# dx = col2im_indices(dx_cols, x.shape, filter_height, filter_width, pad, stride)
dx = col2im_cython(dx_cols, x.shape[0], x.shape[1], x.shape[2], x.shape[3],
filter_height, filter_width, pad, stride)
return dx, dw, db
conv_forward_fast = conv_forward_strides
conv_backward_fast = conv_backward_strides
def max_pool_forward_fast(x, pool_param):
"""
A fast implementation of the forward pass for a max pooling layer.
This chooses between the reshape method and the im2col method. If the pooling
regions are square and tile the input image, then we can use the reshape
method which is very fast. Otherwise we fall back on the im2col method, which
is not much faster than the naive method.
"""
N, C, H, W = x.shape
pool_height, pool_width = pool_param['pool_height'], pool_param['pool_width']
stride = pool_param['stride']
same_size = pool_height == pool_width == stride
tiles = H % pool_height == 0 and W % pool_width == 0
if same_size and tiles:
out, reshape_cache = max_pool_forward_reshape(x, pool_param)
cache = ('reshape', reshape_cache)
else:
out, im2col_cache = max_pool_forward_im2col(x, pool_param)
cache = ('im2col', im2col_cache)
return out, cache
def max_pool_backward_fast(dout, cache):
"""
A fast implementation of the backward pass for a max pooling layer.
This switches between the reshape method an the im2col method depending on
which method was used to generate the cache.
"""
method, real_cache = cache
if method == 'reshape':
return max_pool_backward_reshape(dout, real_cache)
elif method == 'im2col':
return max_pool_backward_im2col(dout, real_cache)
else:
raise ValueError('Unrecognized method "%s"' % method)
def max_pool_forward_reshape(x, pool_param):
"""
A fast implementation of the forward pass for the max pooling layer that uses
some clever reshaping.
This can only be used for square pooling regions that tile the input.
"""
N, C, H, W = x.shape
pool_height, pool_width = pool_param['pool_height'], pool_param['pool_width']
stride = pool_param['stride']
assert pool_height == pool_width == stride, 'Invalid pool params'
assert H % pool_height == 0
assert W % pool_height == 0
x_reshaped = x.reshape(N, C, H // pool_height, pool_height,
W // pool_width, pool_width)
out = x_reshaped.max(axis=3).max(axis=4)
cache = (x, x_reshaped, out)
return out, cache
def max_pool_backward_reshape(dout, cache):
"""
A fast implementation of the backward pass for the max pooling layer that
uses some clever broadcasting and reshaping.
This can only be used if the forward pass was computed using
max_pool_forward_reshape.
NOTE: If there are multiple argmaxes, this method will assign gradient to
ALL argmax elements of the input rather than picking one. In this case the
gradient will actually be incorrect. However this is unlikely to occur in
practice, so it shouldn't matter much. One possible solution is to split the
upstream gradient equally among all argmax elements; this should result in a
valid subgradient. You can make this happen by uncommenting the line below;
however this results in a significant performance penalty (about 40% slower)
and is unlikely to matter in practice so we don't do it.
"""
x, x_reshaped, out = cache
dx_reshaped = np.zeros_like(x_reshaped)
out_newaxis = out[:, :, :, np.newaxis, :, np.newaxis]
mask = (x_reshaped == out_newaxis)
dout_newaxis = dout[:, :, :, np.newaxis, :, np.newaxis]
dout_broadcast, _ = np.broadcast_arrays(dout_newaxis, dx_reshaped)
dx_reshaped[mask] = dout_broadcast[mask]
dx_reshaped /= np.sum(mask, axis=(3, 5), keepdims=True)
dx = dx_reshaped.reshape(x.shape)
return dx
def max_pool_forward_im2col(x, pool_param):
"""
An implementation of the forward pass for max pooling based on im2col.
This isn't much faster than the naive version, so it should be avoided if
possible.
"""
N, C, H, W = x.shape
pool_height, pool_width = pool_param['pool_height'], pool_param['pool_width']
stride = pool_param['stride']
assert (H - pool_height) % stride == 0, 'Invalid height'
assert (W - pool_width) % stride == 0, 'Invalid width'
out_height = (H - pool_height) // stride + 1
out_width = (W - pool_width) // stride + 1
x_split = x.reshape(N * C, 1, H, W)
x_cols = im2col(x_split, pool_height, pool_width, padding=0, stride=stride)
x_cols_argmax = np.argmax(x_cols, axis=0)
x_cols_max = x_cols[x_cols_argmax, np.arange(x_cols.shape[1])]
out = x_cols_max.reshape(out_height, out_width, N, C).transpose(2, 3, 0, 1)
cache = (x, x_cols, x_cols_argmax, pool_param)
return out, cache
def max_pool_backward_im2col(dout, cache):
"""
An implementation of the backward pass for max pooling based on im2col.
This isn't much faster than the naive version, so it should be avoided if
possible.
"""
x, x_cols, x_cols_argmax, pool_param = cache
N, C, H, W = x.shape
pool_height, pool_width = pool_param['pool_height'], pool_param['pool_width']
stride = pool_param['stride']
dout_reshaped = dout.transpose(2, 3, 0, 1).flatten()
dx_cols = np.zeros_like(x_cols)
dx_cols[x_cols_argmax, np.arange(dx_cols.shape[1])] = dout_reshaped
dx = col2im_indices(dx_cols, (N * C, 1, H, W), pool_height, pool_width,
padding=0, stride=stride)
dx = dx.reshape(x.shape)
return dx

+ 129
- 0
assignment2/notebook_images/daseCV/gradient_check.py 查看文件

@ -0,0 +1,129 @@
from __future__ import print_function
from builtins import range
from past.builtins import xrange
import numpy as np
from random import randrange
def eval_numerical_gradient(f, x, verbose=True, h=0.00001):
"""
a naive implementation of numerical gradient of f at x
- f should be a function that takes a single argument
- x is the point (numpy array) to evaluate the gradient at
"""
fx = f(x) # evaluate function value at original point
grad = np.zeros_like(x)
# iterate over all indexes in x
it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
while not it.finished:
# evaluate function at x+h
ix = it.multi_index
oldval = x[ix]
x[ix] = oldval + h # increment by h
fxph = f(x) # evalute f(x + h)
x[ix] = oldval - h
fxmh = f(x) # evaluate f(x - h)
x[ix] = oldval # restore
# compute the partial derivative with centered formula
grad[ix] = (fxph - fxmh) / (2 * h) # the slope
if verbose:
print(ix, grad[ix])
it.iternext() # step to next dimension
return grad
def eval_numerical_gradient_array(f, x, df, h=1e-5):
"""
Evaluate a numeric gradient for a function that accepts a numpy
array and returns a numpy array.
"""
grad = np.zeros_like(x)
it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
while not it.finished:
ix = it.multi_index
oldval = x[ix]
x[ix] = oldval + h
pos = f(x).copy()
x[ix] = oldval - h
neg = f(x).copy()
x[ix] = oldval
grad[ix] = np.sum((pos - neg) * df) / (2 * h)
it.iternext()
return grad
def eval_numerical_gradient_blobs(f, inputs, output, h=1e-5):
"""
Compute numeric gradients for a function that operates on input
and output blobs.
We assume that f accepts several input blobs as arguments, followed by a
blob where outputs will be written. For example, f might be called like:
f(x, w, out)
where x and w are input Blobs, and the result of f will be written to out.
Inputs:
- f: function
- inputs: tuple of input blobs
- output: output blob
- h: step size
"""
numeric_diffs = []
for input_blob in inputs:
diff = np.zeros_like(input_blob.diffs)
it = np.nditer(input_blob.vals, flags=['multi_index'],
op_flags=['readwrite'])
while not it.finished:
idx = it.multi_index
orig = input_blob.vals[idx]
input_blob.vals[idx] = orig + h
f(*(inputs + (output,)))
pos = np.copy(output.vals)
input_blob.vals[idx] = orig - h
f(*(inputs + (output,)))
neg = np.copy(output.vals)
input_blob.vals[idx] = orig
diff[idx] = np.sum((pos - neg) * output.diffs) / (2.0 * h)
it.iternext()
numeric_diffs.append(diff)
return numeric_diffs
def eval_numerical_gradient_net(net, inputs, output, h=1e-5):
return eval_numerical_gradient_blobs(lambda *args: net.forward(),
inputs, output, h=h)
def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-5):
"""
sample a few random elements and only return numerical
in this dimensions.
"""
for i in range(num_checks):
ix = tuple([randrange(m) for m in x.shape])
oldval = x[ix]
x[ix] = oldval + h # increment by h
fxph = f(x) # evaluate f(x + h)
x[ix] = oldval - h # increment by h
fxmh = f(x) # evaluate f(x - h)
x[ix] = oldval # reset
grad_numerical = (fxph - fxmh) / (2 * h)
grad_analytic = analytic_grad[ix]
rel_error = (abs(grad_numerical - grad_analytic) /
(abs(grad_numerical) + abs(grad_analytic)))
print('numerical: %f analytic: %f, relative error: %e'
%(grad_numerical, grad_analytic, rel_error))

+ 54
- 0
assignment2/notebook_images/daseCV/im2col.py 查看文件

@ -0,0 +1,54 @@
from builtins import range
import numpy as np
def get_im2col_indices(x_shape, field_height, field_width, padding=1, stride=1):
# First figure out what the size of the output should be
N, C, H, W = x_shape
assert (H + 2 * padding - field_height) % stride == 0
assert (W + 2 * padding - field_height) % stride == 0
out_height = (H + 2 * padding - field_height) / stride + 1
out_width = (W + 2 * padding - field_width) / stride + 1
i0 = np.repeat(np.arange(field_height), field_width)
i0 = np.tile(i0, C)
i1 = stride * np.repeat(np.arange(out_height), out_width)
j0 = np.tile(np.arange(field_width), field_height * C)
j1 = stride * np.tile(np.arange(out_width), out_height)
i = i0.reshape(-1, 1) + i1.reshape(1, -1)
j = j0.reshape(-1, 1) + j1.reshape(1, -1)
k = np.repeat(np.arange(C), field_height * field_width).reshape(-1, 1)
return (k, i, j)
def im2col_indices(x, field_height, field_width, padding=1, stride=1):
""" An implementation of im2col based on some fancy indexing """
# Zero-pad the input
p = padding
x_padded = np.pad(x, ((0, 0), (0, 0), (p, p), (p, p)), mode='constant')
k, i, j = get_im2col_indices(x.shape, field_height, field_width, padding,
stride)
cols = x_padded[:, k, i, j]
C = x.shape[1]
cols = cols.transpose(1, 2, 0).reshape(field_height * field_width * C, -1)
return cols
def col2im_indices(cols, x_shape, field_height=3, field_width=3, padding=1,
stride=1):
""" An implementation of col2im based on fancy indexing and np.add.at """
N, C, H, W = x_shape
H_padded, W_padded = H + 2 * padding, W + 2 * padding
x_padded = np.zeros((N, C, H_padded, W_padded), dtype=cols.dtype)
k, i, j = get_im2col_indices(x_shape, field_height, field_width, padding,
stride)
cols_reshaped = cols.reshape(C * field_height * field_width, -1, N)
cols_reshaped = cols_reshaped.transpose(2, 0, 1)
np.add.at(x_padded, (slice(None), k, i, j), cols_reshaped)
if padding == 0:
return x_padded
return x_padded[:, :, padding:-padding, padding:-padding]

+ 121
- 0
assignment2/notebook_images/daseCV/im2col_cython.pyx 查看文件

@ -0,0 +1,121 @@
import numpy as np
cimport numpy as np
cimport cython
# DTYPE = np.float64
# ctypedef np.float64_t DTYPE_t
ctypedef fused DTYPE_t:
np.float32_t
np.float64_t
def im2col_cython(np.ndarray[DTYPE_t, ndim=4] x, int field_height,
int field_width, int padding, int stride):
cdef int N = x.shape[0]
cdef int C = x.shape[1]
cdef int H = x.shape[2]
cdef int W = x.shape[3]
cdef int HH = (H + 2 * padding - field_height) / stride + 1
cdef int WW = (W + 2 * padding - field_width) / stride + 1
cdef int p = padding
cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.pad(x,
((0, 0), (0, 0), (p, p), (p, p)), mode='constant')
cdef np.ndarray[DTYPE_t, ndim=2] cols = np.zeros(
(C * field_height * field_width, N * HH * WW),
dtype=x.dtype)
# Moving the inner loop to a C function with no bounds checking works, but does
# not seem to help performance in any measurable way.
im2col_cython_inner(cols, x_padded, N, C, H, W, HH, WW,
field_height, field_width, padding, stride)
return cols
@cython.boundscheck(False)
cdef int im2col_cython_inner(np.ndarray[DTYPE_t, ndim=2] cols,
np.ndarray[DTYPE_t, ndim=4] x_padded,
int N, int C, int H, int W, int HH, int WW,
int field_height, int field_width, int padding, int stride) except? -1:
cdef int c, ii, jj, row, yy, xx, i, col
for c in range(C):
for yy in range(HH):
for xx in range(WW):
for ii in range(field_height):
for jj in range(field_width):
row = c * field_width * field_height + ii * field_height + jj
for i in range(N):
col = yy * WW * N + xx * N + i
cols[row, col] = x_padded[i, c, stride * yy + ii, stride * xx + jj]
def col2im_cython(np.ndarray[DTYPE_t, ndim=2] cols, int N, int C, int H, int W,
int field_height, int field_width, int padding, int stride):
cdef np.ndarray x = np.empty((N, C, H, W), dtype=cols.dtype)
cdef int HH = (H + 2 * padding - field_height) / stride + 1
cdef int WW = (W + 2 * padding - field_width) / stride + 1
cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.zeros((N, C, H + 2 * padding, W + 2 * padding),
dtype=cols.dtype)
# Moving the inner loop to a C-function with no bounds checking improves
# performance quite a bit for col2im.
col2im_cython_inner(cols, x_padded, N, C, H, W, HH, WW,
field_height, field_width, padding, stride)
if padding > 0:
return x_padded[:, :, padding:-padding, padding:-padding]
return x_padded
@cython.boundscheck(False)
cdef int col2im_cython_inner(np.ndarray[DTYPE_t, ndim=2] cols,
np.ndarray[DTYPE_t, ndim=4] x_padded,
int N, int C, int H, int W, int HH, int WW,
int field_height, int field_width, int padding, int stride) except? -1:
cdef int c, ii, jj, row, yy, xx, i, col
for c in range(C):
for ii in range(field_height):
for jj in range(field_width):
row = c * field_width * field_height + ii * field_height + jj
for yy in range(HH):
for xx in range(WW):
for i in range(N):
col = yy * WW * N + xx * N + i
x_padded[i, c, stride * yy + ii, stride * xx + jj] += cols[row, col]
@cython.boundscheck(False)
@cython.wraparound(False)
cdef col2im_6d_cython_inner(np.ndarray[DTYPE_t, ndim=6] cols,
np.ndarray[DTYPE_t, ndim=4] x_padded,
int N, int C, int H, int W, int HH, int WW,
int out_h, int out_w, int pad, int stride):
cdef int c, hh, ww, n, h, w
for n in range(N):
for c in range(C):
for hh in range(HH):
for ww in range(WW):
for h in range(out_h):
for w in range(out_w):
x_padded[n, c, stride * h + hh, stride * w + ww] += cols[c, hh, ww, n, h, w]
def col2im_6d_cython(np.ndarray[DTYPE_t, ndim=6] cols, int N, int C, int H, int W,
int HH, int WW, int pad, int stride):
cdef np.ndarray x = np.empty((N, C, H, W), dtype=cols.dtype)
cdef int out_h = (H + 2 * pad - HH) / stride + 1
cdef int out_w = (W + 2 * pad - WW) / stride + 1
cdef np.ndarray[DTYPE_t, ndim=4] x_padded = np.zeros((N, C, H + 2 * pad, W + 2 * pad),
dtype=cols.dtype)
col2im_6d_cython_inner(cols, x_padded, N, C, H, W, HH, WW, out_h, out_w, pad, stride)
if pad > 0:
return x_padded[:, :, pad:-pad, pad:-pad]
return x_padded

+ 105
- 0
assignment2/notebook_images/daseCV/layer_utils.py 查看文件

@ -0,0 +1,105 @@
from daseCV.layers import *
from daseCV.fast_layers import *
def affine_relu_forward(x, w, b):
"""
Convenience layer that perorms an affine transform followed by a ReLU
Inputs:
- x: Input to the affine layer
- w, b: Weights for the affine layer
Returns a tuple of:
- out: Output from the ReLU
- cache: Object to give to the backward pass
"""
a, fc_cache = affine_forward(x, w, b)
out, relu_cache = relu_forward(a)
cache = (fc_cache, relu_cache)
return out, cache
def affine_relu_backward(dout, cache):
"""
Backward pass for the affine-relu convenience layer
"""
fc_cache, relu_cache = cache
da = relu_backward(dout, relu_cache)
dx, dw, db = affine_backward(da, fc_cache)
return dx, dw, db
def conv_relu_forward(x, w, b, conv_param):
"""
A convenience layer that performs a convolution followed by a ReLU.
Inputs:
- x: Input to the convolutional layer
- w, b, conv_param: Weights and parameters for the convolutional layer
Returns a tuple of:
- out: Output from the ReLU
- cache: Object to give to the backward pass
"""
a, conv_cache = conv_forward_fast(x, w, b, conv_param)
out, relu_cache = relu_forward(a)
cache = (conv_cache, relu_cache)
return out, cache
def conv_relu_backward(dout, cache):
"""
Backward pass for the conv-relu convenience layer.
"""
conv_cache, relu_cache = cache
da = relu_backward(dout, relu_cache)
dx, dw, db = conv_backward_fast(da, conv_cache)
return dx, dw, db
def conv_bn_relu_forward(x, w, b, gamma, beta, conv_param, bn_param):
a, conv_cache = conv_forward_fast(x, w, b, conv_param)
an, bn_cache = spatial_batchnorm_forward(a, gamma, beta, bn_param)
out, relu_cache = relu_forward(an)
cache = (conv_cache, bn_cache, relu_cache)
return out, cache
def conv_bn_relu_backward(dout, cache):
conv_cache, bn_cache, relu_cache = cache
dan = relu_backward(dout, relu_cache)
da, dgamma, dbeta = spatial_batchnorm_backward(dan, bn_cache)
dx, dw, db = conv_backward_fast(da, conv_cache)
return dx, dw, db, dgamma, dbeta
def conv_relu_pool_forward(x, w, b, conv_param, pool_param):
"""
Convenience layer that performs a convolution, a ReLU, and a pool.
Inputs:
- x: Input to the convolutional layer
- w, b, conv_param: Weights and parameters for the convolutional layer
- pool_param: Parameters for the pooling layer
Returns a tuple of:
- out: Output from the pooling layer
- cache: Object to give to the backward pass
"""
a, conv_cache = conv_forward_fast(x, w, b, conv_param)
s, relu_cache = relu_forward(a)
out, pool_cache = max_pool_forward_fast(s, pool_param)
cache = (conv_cache, relu_cache, pool_cache)
return out, cache
def conv_relu_pool_backward(dout, cache):
"""
Backward pass for the conv-relu-pool convenience layer
"""
conv_cache, relu_cache, pool_cache = cache
ds = max_pool_backward_fast(dout, pool_cache)
da = relu_backward(ds, relu_cache)
dx, dw, db = conv_backward_fast(da, conv_cache)
return dx, dw, db

+ 793
- 0
assignment2/notebook_images/daseCV/layers.py 查看文件

@ -0,0 +1,793 @@
from builtins import range
import numpy as np
def affine_forward(x, w, b):
"""
仿()
x的形状为(N, d_1, ..., d_k)N个样本
x[i](d_1, ..., d_k)reshape成为一个D维向量
D = d_1 * ... * d_kM维的输出向量
Inputs:
- x: A numpy array containing input data, of shape (N, d_1, ..., d_k)
- w: A numpy array of weights, of shape (D, M)
- b: A numpy array of biases, of shape (M,)
Returns a tuple of:
- out: output, of shape (N, M)
- cache: (x, w, b)
"""
out = None
###########################################################################
# TODO: Implement the affine forward pass. Store the result in out. You #
# will need to reshape the input into rows. #
###########################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
pass
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
###########################################################################
# END OF YOUR CODE #
###########################################################################
cache = (x, w, b)
return out, cache
def affine_backward(dout, cache):
"""
仿
Inputs:
- dout: Upstream derivative, of shape (N, M)
- cache: Tuple of:
- x: Input data, of shape (N, d_1, ... d_k)
- w: Weights, of shape (D, M)
- b: Biases, of shape (M,)
Returns a tuple of:
- dx: Gradient with respect to x, of shape (N, d1, ..., d_k)
- dw: Gradient with respect to w, of shape (D, M)
- db: Gradient with respect to b, of shape (M,)
"""
x, w, b = cache
dx, dw, db = None, None, None
###########################################################################
# TODO: Implement the affine backward pass. #
###########################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
pass
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
###########################################################################
# END OF YOUR CODE #
###########################################################################
return dx, dw, db
def relu_forward(x):
"""
线(ReLUs)
Input:
- x: Inputs, of any shape
Returns a tuple of:
- out: Output, of the same shape as x
- cache: x
"""
out = None
###########################################################################
# TODO: Implement the ReLU forward pass. #
###########################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
pass
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
###########################################################################
# END OF YOUR CODE #
###########################################################################
cache = x
return out, cache
def relu_backward(dout, cache):
"""
Computes the backward pass for a layer of rectified linear units (ReLUs).
Input:
- dout: Upstream derivatives, of any shape
- cache: Input x, of same shape as dout
Returns:
- dx: Gradient with respect to x
"""
dx, x = None, cache
###########################################################################
# TODO: Implement the ReLU backward pass. #
###########################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
pass
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
###########################################################################
# END OF YOUR CODE #
###########################################################################
return dx
def batchnorm_forward(x, gamma, beta, bn_param):
"""
Forward pass for batch normalization.
During training the sample mean and (uncorrected) sample variance are
computed from minibatch statistics and used to normalize the incoming data.
During training we also keep an exponentially decaying running mean of the
mean and variance of each feature, and these averages are used to normalize
data at test-time.
At each timestep we update the running averages for mean and variance using
an exponential decay based on the momentum parameter:
running_mean = momentum * running_mean + (1 - momentum) * sample_mean
running_var = momentum * running_var + (1 - momentum) * sample_var
Note that the batch normalization paper suggests a different test-time
behavior: they compute sample mean and variance for each feature using a
large number of training images rather than using a running average. For
this implementation we have chosen to use running averages instead since
they do not require an additional estimation step; the torch7
implementation of batch normalization also uses running averages.
Input:
- x: Data of shape (N, D)
- gamma: Scale parameter of shape (D,)
- beta: Shift paremeter of shape (D,)
- bn_param: Dictionary with the following keys:
- mode: 'train' or 'test'; required
- eps: Constant for numeric stability
- momentum: Constant for running mean / variance.
- running_mean: Array of shape (D,) giving running mean of features
- running_var Array of shape (D,) giving running variance of features
Returns a tuple of:
- out: of shape (N, D)
- cache: A tuple of values needed in the backward pass
"""
mode = bn_param['mode']
eps = bn_param.get('eps', 1e-5)
momentum = bn_param.get('momentum', 0.9)
N, D = x.shape
running_mean = bn_param.get('running_mean', np.zeros(D, dtype=x.dtype))
running_var = bn_param.get('running_var', np.zeros(D, dtype=x.dtype))
out, cache = None, None
if mode == 'train':
#######################################################################
# TODO: Implement the training-time forward pass for batch norm. #
# Use minibatch statistics to compute the mean and variance, use #
# these statistics to normalize the incoming data, and scale and #
# shift the normalized data using gamma and beta. #
# #
# You should store the output in the variable out. Any intermediates #
# that you need for the backward pass should be stored in the cache #
# variable. #
# #
# You should also use your computed sample mean and variance together #
# with the momentum variable to update the running mean and running #
# variance, storing your result in the running_mean and running_var #
# variables. #
# #
# Note that though you should be keeping track of the running #
# variance, you should normalize the data based on the standard #
# deviation (square root of variance) instead! #
# Referencing the original paper (https://arxiv.org/abs/1502.03167) #
# might prove to be helpful. #
#######################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
pass
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
#######################################################################
# END OF YOUR CODE #
#######################################################################
elif mode == 'test':
#######################################################################
# TODO: Implement the test-time forward pass for batch normalization. #
# Use the running mean and variance to normalize the incoming data, #
# then scale and shift the normalized data using gamma and beta. #
# Store the result in the out variable. #
#######################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
pass
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
#######################################################################
# END OF YOUR CODE #
#######################################################################
else:
raise ValueError('Invalid forward batchnorm mode "%s"' % mode)
# Store the updated running means back into bn_param
bn_param['running_mean'] = running_mean
bn_param['running_var'] = running_var
return out, cache
def batchnorm_backward(dout, cache):
"""
Backward pass for batch normalization.
For this implementation, you should write out a computation graph for
batch normalization on paper and propagate gradients backward through
intermediate nodes.
Inputs:
- dout: Upstream derivatives, of shape (N, D)
- cache: Variable of intermediates from batchnorm_forward.
Returns a tuple of:
- dx: Gradient with respect to inputs x, of shape (N, D)
- dgamma: Gradient with respect to scale parameter gamma, of shape (D,)
- dbeta: Gradient with respect to shift parameter beta, of shape (D,)
"""
dx, dgamma, dbeta = None, None, None
###########################################################################
# TODO: Implement the backward pass for batch normalization. Store the #
# results in the dx, dgamma, and dbeta variables. #
# Referencing the original paper (https://arxiv.org/abs/1502.03167) #
# might prove to be helpful. #
###########################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
pass
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
###########################################################################
# END OF YOUR CODE #
###########################################################################
return dx, dgamma, dbeta
def batchnorm_backward_alt(dout, cache):
"""
Alternative backward pass for batch normalization.
For this implementation you should work out the derivatives for the batch
normalizaton backward pass on paper and simplify as much as possible. You
should be able to derive a simple expression for the backward pass.
See the jupyter notebook for more hints.
Note: This implementation should expect to receive the same cache variable
as batchnorm_backward, but might not use all of the values in the cache.
Inputs / outputs: Same as batchnorm_backward
"""
dx, dgamma, dbeta = None, None, None
###########################################################################
# TODO: Implement the backward pass for batch normalization. Store the #
# results in the dx, dgamma, and dbeta variables. #
# #
# After computing the gradient with respect to the centered inputs, you #
# should be able to compute gradients with respect to the inputs in a #
# single statement; our implementation fits on a single 80-character line.#
###########################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
pass
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
###########################################################################
# END OF YOUR CODE #
###########################################################################
return dx, dgamma, dbeta
def layernorm_forward(x, gamma, beta, ln_param):
"""
Forward pass for layer normalization.
During both training and test-time, the incoming data is normalized per data-point,
before being scaled by gamma and beta parameters identical to that of batch normalization.
Note that in contrast to batch normalization, the behavior during train and test-time for
layer normalization are identical, and we do not need to keep track of running averages
of any sort.
Input:
- x: Data of shape (N, D)
- gamma: Scale parameter of shape (D,)
- beta: Shift paremeter of shape (D,)
- ln_param: Dictionary with the following keys:
- eps: Constant for numeric stability
Returns a tuple of:
- out: of shape (N, D)
- cache: A tuple of values needed in the backward pass
"""
out, cache = None, None
eps = ln_param.get('eps', 1e-5)
###########################################################################
# TODO: Implement the training-time forward pass for layer norm. #
# Normalize the incoming data, and scale and shift the normalized data #
# using gamma and beta. #
# HINT: this can be done by slightly modifying your training-time #
# implementation of batch normalization, and inserting a line or two of #
# well-placed code. In particular, can you think of any matrix #
# transformations you could perform, that would enable you to copy over #
# the batch norm code and leave it almost unchanged? #
###########################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
pass
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
###########################################################################
# END OF YOUR CODE #
###########################################################################
return out, cache
def layernorm_backward(dout, cache):
"""
Backward pass for layer normalization.
For this implementation, you can heavily rely on the work you've done already
for batch normalization.
Inputs:
- dout: Upstream derivatives, of shape (N, D)
- cache: Variable of intermediates from layernorm_forward.
Returns a tuple of:
- dx: Gradient with respect to inputs x, of shape (N, D)
- dgamma: Gradient with respect to scale parameter gamma, of shape (D,)
- dbeta: Gradient with respect to shift parameter beta, of shape (D,)
"""
dx, dgamma, dbeta = None, None, None
###########################################################################
# TODO: Implement the backward pass for layer norm. #
# #
# HINT: this can be done by slightly modifying your training-time #
# implementation of batch normalization. The hints to the forward pass #
# still apply! #
###########################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
pass
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
###########################################################################
# END OF YOUR CODE #
###########################################################################
return dx, dgamma, dbeta
def dropout_forward(x, dropout_param):
"""
Performs the forward pass for (inverted) dropout.
Inputs:
- x: Input data, of any shape
- dropout_param: A dictionary with the following keys:
- p: Dropout parameter. We keep each neuron output with probability p.
- mode: 'test' or 'train'. If the mode is train, then perform dropout;
if the mode is test, then just return the input.
- seed: Seed for the random number generator. Passing seed makes this
function deterministic, which is needed for gradient checking but not
in real networks.
Outputs:
- out: Array of the same shape as x.
- cache: tuple (dropout_param, mask). In training mode, mask is the dropout
mask that was used to multiply the input; in test mode, mask is None.
NOTE: Please implement **inverted** dropout, not the vanilla version of dropout.
See http://cs231n.github.io/neural-networks-2/#reg for more details.
NOTE 2: Keep in mind that p is the probability of **keep** a neuron
output; this might be contrary to some sources, where it is referred to
as the probability of dropping a neuron output.
"""
p, mode = dropout_param['p'], dropout_param['mode']
if 'seed' in dropout_param:
np.random.seed(dropout_param['seed'])
mask = None
out = None
if mode == 'train':
#######################################################################
# TODO: 完成训练阶段的dropout正向传播。
# 将dropout掩码存储在mask变量中。
#######################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
pass
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
#######################################################################
# END OF YOUR CODE #
#######################################################################
elif mode == 'test':
#######################################################################
# TODO: 完成测试阶段的dropout正向传播。
#######################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
pass
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
#######################################################################
# END OF YOUR CODE #
#######################################################################
cache = (dropout_param, mask)
out = out.astype(x.dtype, copy=False)
return out, cache
def dropout_backward(dout, cache):
"""
Perform the backward pass for (inverted) dropout.
Inputs:
- dout: Upstream derivatives, of any shape
- cache: (dropout_param, mask) from dropout_forward.
"""
dropout_param, mask = cache
mode = dropout_param['mode']
dx = None
if mode == 'train':
#######################################################################
# TODO: 完成训练阶段的dropout反向传播。
#######################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
pass
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
#######################################################################
# END OF YOUR CODE #
#######################################################################
elif mode == 'test':
dx = dout
return dx
def conv_forward_naive(x, w, b, conv_param):
"""
A naive implementation of the forward pass for a convolutional layer.
The input consists of N data points, each with C channels, height H and
width W. We convolve each input with F different filters, where each filter
spans all C channels and has height HH and width WW.
Input:
- x: Input data of shape (N, C, H, W)
- w: Filter weights of shape (F, C, HH, WW)
- b: Biases, of shape (F,)
- conv_param: A dictionary with the following keys:
- 'stride': The number of pixels between adjacent receptive fields in the
horizontal and vertical directions.
- 'pad': The number of pixels that will be used to zero-pad the input.
During padding, 'pad' zeros should be placed symmetrically (i.e equally on both sides)
along the height and width axes of the input. Be careful not to modfiy the original
input x directly.
Returns a tuple of:
- out: Output data, of shape (N, F, H', W') where H' and W' are given by
H' = 1 + (H + 2 * pad - HH) / stride
W' = 1 + (W + 2 * pad - WW) / stride
- cache: (x, w, b, conv_param)
"""
out = None
###########################################################################
# TODO: 实现卷积正向传播。
# Hint: 你可以使用np.pad函数进行填充。
###########################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
pass
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
###########################################################################
# END OF YOUR CODE #
###########################################################################
cache = (x, w, b, conv_param)
return out, cache
def conv_backward_naive(dout, cache):
"""
A naive implementation of the backward pass for a convolutional layer.
Inputs:
- dout: Upstream derivatives.
- cache: A tuple of (x, w, b, conv_param) as in conv_forward_naive
Returns a tuple of:
- dx: Gradient with respect to x
- dw: Gradient with respect to w
- db: Gradient with respect to b
"""
dx, dw, db = None, None, None
###########################################################################
# TODO: 实现卷积的反向传播
###########################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
pass
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
###########################################################################
# END OF YOUR CODE #
###########################################################################
return dx, dw, db
def max_pool_forward_naive(x, pool_param):
"""
A naive implementation of the forward pass for a max-pooling layer.
Inputs:
- x: Input data, of shape (N, C, H, W)
- pool_param: dictionary with the following keys:
- 'pool_height': The height of each pooling region
- 'pool_width': The width of each pooling region
- 'stride': The distance between adjacent pooling regions
No padding is necessary here. Output size is given by
Returns a tuple of:
- out: Output data, of shape (N, C, H', W') where H' and W' are given by
H' = 1 + (H - pool_height) / stride
W' = 1 + (W - pool_width) / stride
- cache: (x, pool_param)
"""
out = None
###########################################################################
# TODO: 完成最大池化的正向传播。 #
###########################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
pass
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
###########################################################################
# END OF YOUR CODE #
###########################################################################
cache = (x, pool_param)
return out, cache
def max_pool_backward_naive(dout, cache):
"""
A naive implementation of the backward pass for a max-pooling layer.
Inputs:
- dout: Upstream derivatives
- cache: A tuple of (x, pool_param) as in the forward pass.
Returns:
- dx: Gradient with respect to x
"""
dx = None
###########################################################################
# TODO: 完成最大池化的反向传播 #
###########################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
pass
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
###########################################################################
# END OF YOUR CODE #
###########################################################################
return dx
def spatial_batchnorm_forward(x, gamma, beta, bn_param):
"""
Computes the forward pass for spatial batch normalization.
Inputs:
- x: Input data of shape (N, C, H, W)
- gamma: Scale parameter, of shape (C,)
- beta: Shift parameter, of shape (C,)
- bn_param: Dictionary with the following keys:
- mode: 'train' or 'test'; required
- eps: Constant for numeric stability
- momentum: Constant for running mean / variance. momentum=0 means that
old information is discarded completely at every time step, while
momentum=1 means that new information is never incorporated. The
default of momentum=0.9 should work well in most situations.
- running_mean: Array of shape (D,) giving running mean of features
- running_var Array of shape (D,) giving running variance of features
Returns a tuple of:
- out: Output data, of shape (N, C, H, W)
- cache: Values needed for the backward pass
"""
out, cache = None, None
###########################################################################
# TODO: 完成空间批量归一化的正向传播。
#
# HINT: 您可以通过调用上面实现的批量标准化的原始版本来实现空间批量标准化。
# 您的实现应该非常简短;我们的代码少于五行。
###########################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
pass
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
###########################################################################
# END OF YOUR CODE #
###########################################################################
return out, cache
def spatial_batchnorm_backward(dout, cache):
"""
Computes the backward pass for spatial batch normalization.
Inputs:
- dout: Upstream derivatives, of shape (N, C, H, W)
- cache: Values from the forward pass
Returns a tuple of:
- dx: Gradient with respect to inputs, of shape (N, C, H, W)
- dgamma: Gradient with respect to scale parameter, of shape (C,)
- dbeta: Gradient with respect to shift parameter, of shape (C,)
"""
dx, dgamma, dbeta = None, None, None
###########################################################################
# TODO: 为空间批量归一化实现反向传递。
#
# HINT: 您可以通过调用上面实现的批量标准化的原始版本来实现空间批量标准化。
# 您的实现应该非常简短;我们的代码少于五行。
###########################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
pass
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
###########################################################################
# END OF YOUR CODE #
###########################################################################
return dx, dgamma, dbeta
def spatial_groupnorm_forward(x, gamma, beta, G, gn_param):
"""
Computes the forward pass for spatial group normalization.
In contrast to layer normalization, group normalization splits each entry
in the data into G contiguous pieces, which it then normalizes independently.
Per feature shifting and scaling are then applied to the data, in a manner identical to that of batch normalization and layer normalization.
Inputs:
- x: Input data of shape (N, C, H, W)
- gamma: Scale parameter, of shape (C,)
- beta: Shift parameter, of shape (C,)
- G: Integer mumber of groups to split into, should be a divisor of C
- gn_param: Dictionary with the following keys:
- eps: Constant for numeric stability
Returns a tuple of:
- out: Output data, of shape (N, C, H, W)
- cache: Values needed for the backward pass
"""
out, cache = None, None
eps = gn_param.get('eps',1e-5)
###########################################################################
# TODO: 为空间组归一化实现正向传播。
# 这与层归一化的实现极为相似。
# 尤其要考虑如何转换矩阵,以使大部分代码与训练时批处理归一化和层归一化相似!
###########################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
pass
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
###########################################################################
# END OF YOUR CODE #
###########################################################################
return out, cache
def spatial_groupnorm_backward(dout, cache):
"""
Computes the backward pass for spatial group normalization.
Inputs:
- dout: Upstream derivatives, of shape (N, C, H, W)
- cache: Values from the forward pass
Returns a tuple of:
- dx: Gradient with respect to inputs, of shape (N, C, H, W)
- dgamma: Gradient with respect to scale parameter, of shape (C,)
- dbeta: Gradient with respect to shift parameter, of shape (C,)
"""
dx, dgamma, dbeta = None, None, None
###########################################################################
# TODO: 为空间组归一化实现反反向传播。
# 这将与层归一化的实现极为相似。
###########################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
pass
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
###########################################################################
# END OF YOUR CODE #
###########################################################################
return dx, dgamma, dbeta
def svm_loss(x, y):
"""
Computes the loss and gradient using for multiclass SVM classification.
Inputs:
- x: Input data, of shape (N, C) where x[i, j] is the score for the jth
class for the ith input.
- y: Vector of labels, of shape (N,) where y[i] is the label for x[i] and
0 <= y[i] < C
Returns a tuple of:
- loss: Scalar giving the loss
- dx: Gradient of the loss with respect to x
"""
N = x.shape[0]
correct_class_scores = x[np.arange(N), y]
margins = np.maximum(0, x - correct_class_scores[:, np.newaxis] + 1.0)
margins[np.arange(N), y] = 0
loss = np.sum(margins) / N
num_pos = np.sum(margins > 0, axis=1)
dx = np.zeros_like(x)
dx[margins > 0] = 1
dx[np.arange(N), y] -= num_pos
dx /= N
return loss, dx
def softmax_loss(x, y):
"""
Computes the loss and gradient for softmax classification.
Inputs:
- x: Input data, of shape (N, C) where x[i, j] is the score for the jth
class for the ith input.
- y: Vector of labels, of shape (N,) where y[i] is the label for x[i] and
0 <= y[i] < C
Returns a tuple of:
- loss: Scalar giving the loss
- dx: Gradient of the loss with respect to x
"""
shifted_logits = x - np.max(x, axis=1, keepdims=True)
Z = np.sum(np.exp(shifted_logits), axis=1, keepdims=True)
log_probs = shifted_logits - np.log(Z)
probs = np.exp(log_probs)
N = x.shape[0]
loss = -np.sum(log_probs[np.arange(N), y]) / N
dx = probs.copy()
dx[np.arange(N), y] -= 1
dx /= N
return loss, dx

+ 159
- 0
assignment2/notebook_images/daseCV/optim.py 查看文件

@ -0,0 +1,159 @@
import numpy as np
"""
This file implements various first-order update rules that are commonly used
for training neural networks. Each update rule accepts current weights and the
gradient of the loss with respect to those weights and produces the next set of
weights. Each update rule has the same interface:
def update(w, dw, config=None):
Inputs:
- w: A numpy array giving the current weights.
- dw: A numpy array of the same shape as w giving the gradient of the
loss with respect to w.
- config: A dictionary containing hyperparameter values such as learning
rate, momentum, etc. If the update rule requires caching values over many
iterations, then config will also hold these cached values.
Returns:
- next_w: The next point after the update.
- config: The config dictionary to be passed to the next iteration of the
update rule.
NOTE: For most update rules, the default learning rate will probably not
perform well; however the default values of the other hyperparameters should
work well for a variety of different problems.
For efficiency, update rules may perform in-place updates, mutating w and
setting next_w equal to w.
"""
def sgd(w, dw, config=None):
"""
Performs vanilla stochastic gradient descent.
config format:
- learning_rate: Scalar learning rate.
"""
if config is None: config = {}
config.setdefault('learning_rate', 1e-2)
w -= config['learning_rate'] * dw
return w, config
def sgd_momentum(w, dw, config=None):
"""
Performs stochastic gradient descent with momentum.
config format:
- learning_rate: Scalar learning rate.
- momentum: Scalar between 0 and 1 giving the momentum value.
Setting momentum = 0 reduces to sgd.
- velocity: A numpy array of the same shape as w and dw used to store a
moving average of the gradients.
"""
if config is None: config = {}
config.setdefault('learning_rate', 1e-2)
config.setdefault('momentum', 0.9)
v = config.get('velocity', np.zeros_like(w))
next_w = None
###########################################################################
# TODO: Implement the momentum update formula. Store the updated value in #
# the next_w variable. You should also use and update the velocity v. #
###########################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
pass
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
###########################################################################
# END OF YOUR CODE #
###########################################################################
config['velocity'] = v
return next_w, config
def rmsprop(w, dw, config=None):
"""
Uses the RMSProp update rule, which uses a moving average of squared
gradient values to set adaptive per-parameter learning rates.
config format:
- learning_rate: Scalar learning rate.
- decay_rate: Scalar between 0 and 1 giving the decay rate for the squared
gradient cache.
- epsilon: Small scalar used for smoothing to avoid dividing by zero.
- cache: Moving average of second moments of gradients.
"""
if config is None: config = {}
config.setdefault('learning_rate', 1e-2)
config.setdefault('decay_rate', 0.99)
config.setdefault('epsilon', 1e-8)
config.setdefault('cache', np.zeros_like(w))
next_w = None
###########################################################################
# TODO: Implement the RMSprop update formula, storing the next value of w #
# in the next_w variable. Don't forget to update cache value stored in #
# config['cache']. #
###########################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
pass
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
###########################################################################
# END OF YOUR CODE #
###########################################################################
return next_w, config
def adam(w, dw, config=None):
"""
Uses the Adam update rule, which incorporates moving averages of both the
gradient and its square and a bias correction term.
config format:
- learning_rate: Scalar learning rate.
- beta1: Decay rate for moving average of first moment of gradient.
- beta2: Decay rate for moving average of second moment of gradient.
- epsilon: Small scalar used for smoothing to avoid dividing by zero.
- m: Moving average of gradient.
- v: Moving average of squared gradient.
- t: Iteration number.
"""
if config is None: config = {}
config.setdefault('learning_rate', 1e-3)
config.setdefault('beta1', 0.9)
config.setdefault('beta2', 0.999)
config.setdefault('epsilon', 1e-8)
config.setdefault('m', np.zeros_like(w))
config.setdefault('v', np.zeros_like(w))
config.setdefault('t', 0)
next_w = None
###########################################################################
# TODO: Implement the Adam update formula, storing the next value of w in #
# the next_w variable. Don't forget to update the m, v, and t variables #
# stored in config. #
# #
# NOTE: In order to match the reference output, please modify t _before_ #
# using it in any calculations. #
###########################################################################
# *****START OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
pass
# *****END OF YOUR CODE (DO NOT DELETE/MODIFY THIS LINE)*****
###########################################################################
# END OF YOUR CODE #
###########################################################################
return next_w, config

+ 14
- 0
assignment2/notebook_images/daseCV/setup.py 查看文件

@ -0,0 +1,14 @@
from distutils.core import setup
from distutils.extension import Extension
from Cython.Build import cythonize
import numpy
extensions = [
Extension('im2col_cython', ['im2col_cython.pyx'],
include_dirs = [numpy.get_include()]
),
]
setup(
ext_modules = cythonize(extensions),
)

+ 299
- 0
assignment2/notebook_images/daseCV/solver.py 查看文件

@ -0,0 +1,299 @@
from __future__ import print_function, division
from future import standard_library
standard_library.install_aliases()
from builtins import range
from builtins import object
import os
import pickle as pickle
import numpy as np
from daseCV import optim
class Solver(object):
"""
Solver封装了模型训练所需的所有逻辑
Solver使用optim.py中定义好的不同更新规则执行随机梯度下降
solver同时接受训练
Solver实例(learning rate, batch size, etc)
train()
model.params中
solver.loss_history中solver.train_acc_history和
solver.val_acc_history中会分别保存训练集和验证集在每一次epoch时的模型准确率
data = {
'X_train': # training data
'y_train': # training labels
'X_val': # validation data
'y_val': # validation labels
}
model = MyAwesomeModel(hidden_size=100, reg=10)
solver = Solver(model, data,
update_rule='sgd',
optim_config={
'learning_rate': 1e-3,
},
lr_decay=0.95,
num_epochs=10, batch_size=100,
print_every=100)
solver.train()
A Solver works on a model object that must conform to the following API:
- model.params must be a dictionary mapping string parameter names to numpy
arrays containing parameter values.
- model.loss(X, y) must be a function that computes training-time loss and
gradients, and test-time classification scores, with the following inputs
and outputs:
Inputs:
- X: Array giving a minibatch of input data of shape (N, d_1, ..., d_k)
- y: Array of labels, of shape (N,) giving labels for X where y[i] is the
label for X[i].
Returns:
If y is None, run a test-time forward pass and return:
- scores: Array of shape (N, C) giving classification scores for X where
scores[i, c] gives the score of class c for X[i].
If y is not None, run a training time forward and backward pass and
return a tuple of:
- loss: Scalar giving the loss
- grads: Dictionary with the same keys as self.params mapping parameter
names to gradients of the loss with respect to those parameters.
"""
def __init__(self, model, data, **kwargs):
"""
Construct a new Solver instance.
Required arguments:
- model: A model object conforming to the API described above
- data: A dictionary of training and validation data containing:
'X_train': Array, shape (N_train, d_1, ..., d_k) of training images
'X_val': Array, shape (N_val, d_1, ..., d_k) of validation images
'y_train': Array, shape (N_train,) of labels for training images
'y_val': Array, shape (N_val,) of labels for validation images
Optional arguments:
- update_rule: A string giving the name of an update rule in optim.py.
Default is 'sgd'.
- optim_config: A dictionary containing hyperparameters that will be
passed to the chosen update rule. Each update rule requires different
hyperparameters (see optim.py) but all update rules require a
'learning_rate' parameter so that should always be present.
- lr_decay: A scalar for learning rate decay; after each epoch the
learning rate is multiplied by this value.
- batch_size: Size of minibatches used to compute loss and gradient
during training.
- num_epochs: The number of epochs to run for during training.
- print_every: Integer; training losses will be printed every
print_every iterations.
- verbose: Boolean; if set to false then no output will be printed
during training.
- num_train_samples: Number of training samples used to check training
accuracy; default is 1000; set to None to use entire training set.
- num_val_samples: Number of validation samples to use to check val
accuracy; default is None, which uses the entire validation set.
- checkpoint_name: If not None, then save model checkpoints here every
epoch.
"""
self.model = model
self.X_train = data['X_train']
self.y_train = data['y_train']
self.X_val = data['X_val']
self.y_val = data['y_val']
# Unpack keyword arguments
self.update_rule = kwargs.pop('update_rule', 'sgd')
self.optim_config = kwargs.pop('optim_config', {})
self.lr_decay = kwargs.pop('lr_decay', 1.0)
self.batch_size = kwargs.pop('batch_size', 100)
self.num_epochs = kwargs.pop('num_epochs', 10)
self.num_train_samples = kwargs.pop('num_train_samples', 1000)
self.num_val_samples = kwargs.pop('num_val_samples', None)
self.checkpoint_name = kwargs.pop('checkpoint_name', None)
self.print_every = kwargs.pop('print_every', 10)
self.verbose = kwargs.pop('verbose', True)
# Throw an error if there are extra keyword arguments
if len(kwargs) > 0:
extra = ', '.join('"%s"' % k for k in list(kwargs.keys()))
raise ValueError('Unrecognized arguments %s' % extra)
# Make sure the update rule exists, then replace the string
# name with the actual function
if not hasattr(optim, self.update_rule):
raise ValueError('Invalid update_rule "%s"' % self.update_rule)
self.update_rule = getattr(optim, self.update_rule)
self._reset()
def _reset(self):
"""
Set up some book-keeping variables for optimization. Don't call this
manually.
"""
# Set up some variables for book-keeping
self.epoch = 0
self.best_val_acc = 0
self.best_params = {}
self.loss_history = []
self.train_acc_history = []
self.val_acc_history = []
# Make a deep copy of the optim_config for each parameter
self.optim_configs = {}
for p in self.model.params:
d = {k: v for k, v in self.optim_config.items()}
self.optim_configs[p] = d
def _step(self):
"""
Make a single gradient update. This is called by train() and should not
be called manually.
"""
# Make a minibatch of training data
num_train = self.X_train.shape[0]
batch_mask = np.random.choice(num_train, self.batch_size)
X_batch = self.X_train[batch_mask]
y_batch = self.y_train[batch_mask]
# Compute loss and gradient
loss, grads = self.model.loss(X_batch, y_batch)
self.loss_history.append(loss)
# Perform a parameter update
for p, w in self.model.params.items():
dw = grads[p]
config = self.optim_configs[p]
next_w, next_config = self.update_rule(w, dw, config)
self.model.params[p] = next_w
self.optim_configs[p] = next_config
def _save_checkpoint(self):
if self.checkpoint_name is None: return
checkpoint = {
'model': self.model,
'update_rule': self.update_rule,
'lr_decay': self.lr_decay,
'optim_config': self.optim_config,
'batch_size': self.batch_size,
'num_train_samples': self.num_train_samples,
'num_val_samples': self.num_val_samples,
'epoch': self.epoch,
'loss_history': self.loss_history,
'train_acc_history': self.train_acc_history,
'val_acc_history': self.val_acc_history,
}
filename = '%s_epoch_%d.pkl' % (self.checkpoint_name, self.epoch)
if self.verbose:
print('Saving checkpoint to "%s"' % filename)
with open(filename, 'wb') as f:
pickle.dump(checkpoint, f)
def check_accuracy(self, X, y, num_samples=None, batch_size=100):
"""
Check accuracy of the model on the provided data.
Inputs:
- X: Array of data, of shape (N, d_1, ..., d_k)
- y: Array of labels, of shape (N,)
- num_samples: If not None, subsample the data and only test the model
on num_samples datapoints.
- batch_size: Split X and y into batches of this size to avoid using
too much memory.
Returns:
- acc: Scalar giving the fraction of instances that were correctly
classified by the model.
"""
# Maybe subsample the data
N = X.shape[0]
if num_samples is not None and N > num_samples:
mask = np.random.choice(N, num_samples)
N = num_samples
X = X[mask]
y = y[mask]
# Compute predictions in batches
num_batches = N // batch_size
if N % batch_size != 0:
num_batches += 1
y_pred = []
for i in range(num_batches):
start = i * batch_size
end = (i + 1) * batch_size
scores = self.model.loss(X[start:end])
y_pred.append(np.argmax(scores, axis=1))
y_pred = np.hstack(y_pred)
acc = np.mean(y_pred == y)
return acc
def train(self):
"""
Run optimization to train the model.
"""
num_train = self.X_train.shape[0]
iterations_per_epoch = max(num_train // self.batch_size, 1)
num_iterations = self.num_epochs * iterations_per_epoch
for t in range(num_iterations):
self._step()
# Maybe print training loss
if self.verbose and t % self.print_every == 0:
print('(Iteration %d / %d) loss: %f' % (
t + 1, num_iterations, self.loss_history[-1]))
# At the end of every epoch, increment the epoch counter and decay
# the learning rate.
epoch_end = (t + 1) % iterations_per_epoch == 0
if epoch_end:
self.epoch += 1
for k in self.optim_configs:
self.optim_configs[k]['learning_rate'] *= self.lr_decay
# Check train and val accuracy on the first iteration, the last
# iteration, and at the end of each epoch.
first_it = (t == 0)
last_it = (t == num_iterations - 1)
if first_it or last_it or epoch_end:
train_acc = self.check_accuracy(self.X_train, self.y_train,
num_samples=self.num_train_samples)
val_acc = self.check_accuracy(self.X_val, self.y_val,
num_samples=self.num_val_samples)
self.train_acc_history.append(train_acc)
self.val_acc_history.append(val_acc)
self._save_checkpoint()
if self.verbose:
print('(Epoch %d / %d) train acc: %f; val_acc: %f' % (
self.epoch, self.num_epochs, train_acc, val_acc))
# Keep track of the best model
if val_acc > self.best_val_acc:
self.best_val_acc = val_acc
self.best_params = {}
for k, v in self.model.params.items():
self.best_params[k] = v.copy()
# At the end of training swap the best params into the model
self.model.params = self.best_params

+ 73
- 0
assignment2/notebook_images/daseCV/vis_utils.py 查看文件

@ -0,0 +1,73 @@
from builtins import range
from past.builtins import xrange
from math import sqrt, ceil
import numpy as np
def visualize_grid(Xs, ubound=255.0, padding=1):
"""
Reshape a 4D tensor of image data to a grid for easy visualization.
Inputs:
- Xs: Data of shape (N, H, W, C)
- ubound: Output grid will have values scaled to the range [0, ubound]
- padding: The number of blank pixels between elements of the grid
"""
(N, H, W, C) = Xs.shape
grid_size = int(ceil(sqrt(N)))
grid_height = H * grid_size + padding * (grid_size - 1)
grid_width = W * grid_size + padding * (grid_size - 1)
grid = np.zeros((grid_height, grid_width, C))
next_idx = 0
y0, y1 = 0, H
for y in range(grid_size):
x0, x1 = 0, W
for x in range(grid_size):
if next_idx < N:
img = Xs[next_idx]
low, high = np.min(img), np.max(img)
grid[y0:y1, x0:x1] = ubound * (img - low) / (high - low)
# grid[y0:y1, x0:x1] = Xs[next_idx]
next_idx += 1
x0 += W + padding
x1 += W + padding
y0 += H + padding
y1 += H + padding
# grid_max = np.max(grid)
# grid_min = np.min(grid)
# grid = ubound * (grid - grid_min) / (grid_max - grid_min)
return grid
def vis_grid(Xs):
""" visualize a grid of images """
(N, H, W, C) = Xs.shape
A = int(ceil(sqrt(N)))
G = np.ones((A*H+A, A*W+A, C), Xs.dtype)
G *= np.min(Xs)
n = 0
for y in range(A):
for x in range(A):
if n < N:
G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = Xs[n,:,:,:]
n += 1
# normalize to [0,1]
maxg = G.max()
ming = G.min()
G = (G - ming)/(maxg-ming)
return G
def vis_nn(rows):
""" visualize array of arrays of images """
N = len(rows)
D = len(rows[0])
H,W,C = rows[0][0].shape
Xs = rows[0][0]
G = np.ones((N*H+N, D*W+D, C), Xs.dtype)
for y in range(N):
for x in range(D):
G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = rows[y][x]
# normalize to [0,1]
maxg = G.max()
ming = G.min()
G = (G - ming)/(maxg-ming)
return G

正在加载...
取消
保存