Ver código fonte

上传文件至 'assignment1/daseCV'

master
孙秋实 3 anos atrás
pai
commit
eed91e706f
5 arquivos alterados com 614 adições e 0 exclusões
  1. BIN
      assignment1/daseCV/__init__.py
  2. +262
    -0
      assignment1/daseCV/data_utils.py
  3. +150
    -0
      assignment1/daseCV/features.py
  4. +129
    -0
      assignment1/daseCV/gradient_check.py
  5. +73
    -0
      assignment1/daseCV/vis_utils.py

BIN
assignment1/daseCV/__init__.py Ver arquivo


+ 262
- 0
assignment1/daseCV/data_utils.py Ver arquivo

@ -0,0 +1,262 @@
from __future__ import print_function
from builtins import range
from six.moves import cPickle as pickle
import numpy as np
import os
from imageio import imread
import platform
def load_pickle(f):
version = platform.python_version_tuple()
if version[0] == '2':
return pickle.load(f)
elif version[0] == '3':
return pickle.load(f, encoding='latin1')
raise ValueError("invalid python version: {}".format(version))
def load_CIFAR_batch(filename):
""" load single batch of cifar """
with open(filename, 'rb') as f:
datadict = load_pickle(f)
X = datadict['data']
Y = datadict['labels']
X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float")
Y = np.array(Y)
return X, Y
def load_CIFAR10(ROOT):
""" load all of cifar """
xs = []
ys = []
for b in range(1,6):
f = os.path.join(ROOT, 'data_batch_%d' % (b, ))
X, Y = load_CIFAR_batch(f)
xs.append(X)
ys.append(Y)
Xtr = np.concatenate(xs)
Ytr = np.concatenate(ys)
del X, Y
Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch'))
return Xtr, Ytr, Xte, Yte
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000,
subtract_mean=True):
"""
Load the CIFAR-10 dataset from disk and perform preprocessing to prepare
it for classifiers. These are the same steps as we used for the SVM, but
condensed to a single function.
"""
# Load the raw CIFAR-10 data
cifar10_dir = 'daseCV/datasets/cifar-10-batches-py'
X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)
# Subsample the data
mask = list(range(num_training, num_training + num_validation))
X_val = X_train[mask]
y_val = y_train[mask]
mask = list(range(num_training))
X_train = X_train[mask]
y_train = y_train[mask]
mask = list(range(num_test))
X_test = X_test[mask]
y_test = y_test[mask]
# Normalize the data: subtract the mean image
if subtract_mean:
mean_image = np.mean(X_train, axis=0)
X_train -= mean_image
X_val -= mean_image
X_test -= mean_image
# Transpose so that channels come first
X_train = X_train.transpose(0, 3, 1, 2).copy()
X_val = X_val.transpose(0, 3, 1, 2).copy()
X_test = X_test.transpose(0, 3, 1, 2).copy()
# Package data into a dictionary
return {
'X_train': X_train, 'y_train': y_train,
'X_val': X_val, 'y_val': y_val,
'X_test': X_test, 'y_test': y_test,
}
def load_tiny_imagenet(path, dtype=np.float32, subtract_mean=True):
"""
Load TinyImageNet. Each of TinyImageNet-100-A, TinyImageNet-100-B, and
TinyImageNet-200 have the same directory structure, so this can be used
to load any of them.
Inputs:
- path: String giving path to the directory to load.
- dtype: numpy datatype used to load the data.
- subtract_mean: Whether to subtract the mean training image.
Returns: A dictionary with the following entries:
- class_names: A list where class_names[i] is a list of strings giving the
WordNet names for class i in the loaded dataset.
- X_train: (N_tr, 3, 64, 64) array of training images
- y_train: (N_tr,) array of training labels
- X_val: (N_val, 3, 64, 64) array of validation images
- y_val: (N_val,) array of validation labels
- X_test: (N_test, 3, 64, 64) array of testing images.
- y_test: (N_test,) array of test labels; if test labels are not available
(such as in student code) then y_test will be None.
- mean_image: (3, 64, 64) array giving mean training image
"""
# First load wnids
with open(os.path.join(path, 'wnids.txt'), 'r') as f:
wnids = [x.strip() for x in f]
# Map wnids to integer labels
wnid_to_label = {wnid: i for i, wnid in enumerate(wnids)}
# Use words.txt to get names for each class
with open(os.path.join(path, 'words.txt'), 'r') as f:
wnid_to_words = dict(line.split('\t') for line in f)
for wnid, words in wnid_to_words.items():
wnid_to_words[wnid] = [w.strip() for w in words.split(',')]
class_names = [wnid_to_words[wnid] for wnid in wnids]
# Next load training data.
X_train = []
y_train = []
for i, wnid in enumerate(wnids):
if (i + 1) % 20 == 0:
print('loading training data for synset %d / %d'
% (i + 1, len(wnids)))
# To figure out the filenames we need to open the boxes file
boxes_file = os.path.join(path, 'train', wnid, '%s_boxes.txt' % wnid)
with open(boxes_file, 'r') as f:
filenames = [x.split('\t')[0] for x in f]
num_images = len(filenames)
X_train_block = np.zeros((num_images, 3, 64, 64), dtype=dtype)
y_train_block = wnid_to_label[wnid] * \
np.ones(num_images, dtype=np.int64)
for j, img_file in enumerate(filenames):
img_file = os.path.join(path, 'train', wnid, 'images', img_file)
img = imread(img_file)
if img.ndim == 2:
## grayscale file
img.shape = (64, 64, 1)
X_train_block[j] = img.transpose(2, 0, 1)
X_train.append(X_train_block)
y_train.append(y_train_block)
# We need to concatenate all training data
X_train = np.concatenate(X_train, axis=0)
y_train = np.concatenate(y_train, axis=0)
# Next load validation data
with open(os.path.join(path, 'val', 'val_annotations.txt'), 'r') as f:
img_files = []
val_wnids = []
for line in f:
img_file, wnid = line.split('\t')[:2]
img_files.append(img_file)
val_wnids.append(wnid)
num_val = len(img_files)
y_val = np.array([wnid_to_label[wnid] for wnid in val_wnids])
X_val = np.zeros((num_val, 3, 64, 64), dtype=dtype)
for i, img_file in enumerate(img_files):
img_file = os.path.join(path, 'val', 'images', img_file)
img = imread(img_file)
if img.ndim == 2:
img.shape = (64, 64, 1)
X_val[i] = img.transpose(2, 0, 1)
# Next load test images
# Students won't have test labels, so we need to iterate over files in the
# images directory.
img_files = os.listdir(os.path.join(path, 'test', 'images'))
X_test = np.zeros((len(img_files), 3, 64, 64), dtype=dtype)
for i, img_file in enumerate(img_files):
img_file = os.path.join(path, 'test', 'images', img_file)
img = imread(img_file)
if img.ndim == 2:
img.shape = (64, 64, 1)
X_test[i] = img.transpose(2, 0, 1)
y_test = None
y_test_file = os.path.join(path, 'test', 'test_annotations.txt')
if os.path.isfile(y_test_file):
with open(y_test_file, 'r') as f:
img_file_to_wnid = {}
for line in f:
line = line.split('\t')
img_file_to_wnid[line[0]] = line[1]
y_test = [wnid_to_label[img_file_to_wnid[img_file]]
for img_file in img_files]
y_test = np.array(y_test)
mean_image = X_train.mean(axis=0)
if subtract_mean:
X_train -= mean_image[None]
X_val -= mean_image[None]
X_test -= mean_image[None]
return {
'class_names': class_names,
'X_train': X_train,
'y_train': y_train,
'X_val': X_val,
'y_val': y_val,
'X_test': X_test,
'y_test': y_test,
'class_names': class_names,
'mean_image': mean_image,
}
def load_models(models_dir):
"""
Load saved models from disk. This will attempt to unpickle all files in a
directory; any files that give errors on unpickling (such as README.txt)
will be skipped.
Inputs:
- models_dir: String giving the path to a directory containing model files.
Each model file is a pickled dictionary with a 'model' field.
Returns:
A dictionary mapping model file names to models.
"""
models = {}
for model_file in os.listdir(models_dir):
with open(os.path.join(models_dir, model_file), 'rb') as f:
try:
models[model_file] = load_pickle(f)['model']
except pickle.UnpicklingError:
continue
return models
def load_imagenet_val(num=None):
"""Load a handful of validation images from ImageNet.
Inputs:
- num: Number of images to load (max of 25)
Returns:
- X: numpy array with shape [num, 224, 224, 3]
- y: numpy array of integer image labels, shape [num]
- class_names: dict mapping integer label to class name
"""
imagenet_fn = 'daseCV/datasets/imagenet_val_25.npz'
if not os.path.isfile(imagenet_fn):
print('file %s not found' % imagenet_fn)
print('Run the following:')
print('cd daseCV/datasets')
print('bash get_imagenet_val.sh')
assert False, 'Need to download imagenet_val_25.npz'
f = np.load(imagenet_fn)
X = f['X']
y = f['y']
class_names = f['label_map'].item()
if num is not None:
X = X[:num]
y = y[:num]
return X, y, class_names

+ 150
- 0
assignment1/daseCV/features.py Ver arquivo

@ -0,0 +1,150 @@
from __future__ import print_function
from builtins import zip
from builtins import range
from past.builtins import xrange
import matplotlib
import numpy as np
from scipy.ndimage import uniform_filter
def extract_features(imgs, feature_fns, verbose=False):
"""
Given pixel data for images and several feature functions that can operate on
single images, apply all feature functions to all images, concatenating the
feature vectors for each image and storing the features for all images in
a single matrix.
Inputs:
- imgs: N x H X W X C array of pixel data for N images.
- feature_fns: List of k feature functions. The ith feature function should
take as input an H x W x D array and return a (one-dimensional) array of
length F_i.
- verbose: Boolean; if true, print progress.
Returns:
An array of shape (N, F_1 + ... + F_k) where each column is the concatenation
of all features for a single image.
"""
num_images = imgs.shape[0]
if num_images == 0:
return np.array([])
# Use the first image to determine feature dimensions
feature_dims = []
first_image_features = []
for feature_fn in feature_fns:
feats = feature_fn(imgs[0].squeeze())
assert len(feats.shape) == 1, 'Feature functions must be one-dimensional'
feature_dims.append(feats.size)
first_image_features.append(feats)
# Now that we know the dimensions of the features, we can allocate a single
# big array to store all features as columns.
total_feature_dim = sum(feature_dims)
imgs_features = np.zeros((num_images, total_feature_dim))
imgs_features[0] = np.hstack(first_image_features).T
# Extract features for the rest of the images.
for i in range(1, num_images):
idx = 0
for feature_fn, feature_dim in zip(feature_fns, feature_dims):
next_idx = idx + feature_dim
imgs_features[i, idx:next_idx] = feature_fn(imgs[i].squeeze())
idx = next_idx
if verbose and i % 1000 == 999:
print('Done extracting features for %d / %d images' % (i+1, num_images))
return imgs_features
def rgb2gray(rgb):
"""Convert RGB image to grayscale
Parameters:
rgb : RGB image
Returns:
gray : grayscale image
"""
return np.dot(rgb[...,:3], [0.299, 0.587, 0.144])
def hog_feature(im):
"""Compute Histogram of Gradient (HOG) feature for an image
Modified from skimage.feature.hog
http://pydoc.net/Python/scikits-image/0.4.2/skimage.feature.hog
Reference:
Histograms of Oriented Gradients for Human Detection
Navneet Dalal and Bill Triggs, CVPR 2005
Parameters:
im : an input grayscale or rgb image
Returns:
feat: Histogram of Gradient (HOG) feature
"""
# convert rgb to grayscale if needed
if im.ndim == 3:
image = rgb2gray(im)
else:
image = np.at_least_2d(im)
sx, sy = image.shape # image size
orientations = 9 # number of gradient bins
cx, cy = (8, 8) # pixels per cell
gx = np.zeros(image.shape)
gy = np.zeros(image.shape)
gx[:, :-1] = np.diff(image, n=1, axis=1) # compute gradient on x-direction
gy[:-1, :] = np.diff(image, n=1, axis=0) # compute gradient on y-direction
grad_mag = np.sqrt(gx ** 2 + gy ** 2) # gradient magnitude
grad_ori = np.arctan2(gy, (gx + 1e-15)) * (180 / np.pi) + 90 # gradient orientation
n_cellsx = int(np.floor(sx / cx)) # number of cells in x
n_cellsy = int(np.floor(sy / cy)) # number of cells in y
# compute orientations integral images
orientation_histogram = np.zeros((n_cellsx, n_cellsy, orientations))
for i in range(orientations):
# create new integral image for this orientation
# isolate orientations in this range
temp_ori = np.where(grad_ori < 180 / orientations * (i + 1),
grad_ori, 0)
temp_ori = np.where(grad_ori >= 180 / orientations * i,
temp_ori, 0)
# select magnitudes for those orientations
cond2 = temp_ori > 0
temp_mag = np.where(cond2, grad_mag, 0)
orientation_histogram[:,:,i] = uniform_filter(temp_mag, size=(cx, cy))[round(cx/2)::cx, round(cy/2)::cy].T
return orientation_histogram.ravel()
def color_histogram_hsv(im, nbin=10, xmin=0, xmax=255, normalized=True):
"""
Compute color histogram for an image using hue.
Inputs:
- im: H x W x C array of pixel data for an RGB image.
- nbin: Number of histogram bins. (default: 10)
- xmin: Minimum pixel value (default: 0)
- xmax: Maximum pixel value (default: 255)
- normalized: Whether to normalize the histogram (default: True)
Returns:
1D vector of length nbin giving the color histogram over the hue of the
input image.
"""
ndim = im.ndim
bins = np.linspace(xmin, xmax, nbin+1)
hsv = matplotlib.colors.rgb_to_hsv(im/xmax) * xmax
imhist, bin_edges = np.histogram(hsv[:,:,0], bins=bins, density=normalized)
imhist = imhist * np.diff(bin_edges)
# return histogram
return imhist

+ 129
- 0
assignment1/daseCV/gradient_check.py Ver arquivo

@ -0,0 +1,129 @@
from __future__ import print_function
from builtins import range
from past.builtins import xrange
import numpy as np
from random import randrange
def eval_numerical_gradient(f, x, verbose=True, h=0.00001):
"""
a naive implementation of numerical gradient of f at x
- f should be a function that takes a single argument
- x is the point (numpy array) to evaluate the gradient at
"""
fx = f(x) # evaluate function value at original point
grad = np.zeros_like(x)
# iterate over all indexes in x
it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
while not it.finished:
# evaluate function at x+h
ix = it.multi_index
oldval = x[ix]
x[ix] = oldval + h # increment by h
fxph = f(x) # evalute f(x + h)
x[ix] = oldval - h
fxmh = f(x) # evaluate f(x - h)
x[ix] = oldval # restore
# compute the partial derivative with centered formula
grad[ix] = (fxph - fxmh) / (2 * h) # the slope
if verbose:
print(ix, grad[ix])
it.iternext() # step to next dimension
return grad
def eval_numerical_gradient_array(f, x, df, h=1e-5):
"""
Evaluate a numeric gradient for a function that accepts a numpy
array and returns a numpy array.
"""
grad = np.zeros_like(x)
it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
while not it.finished:
ix = it.multi_index
oldval = x[ix]
x[ix] = oldval + h
pos = f(x).copy()
x[ix] = oldval - h
neg = f(x).copy()
x[ix] = oldval
grad[ix] = np.sum((pos - neg) * df) / (2 * h)
it.iternext()
return grad
def eval_numerical_gradient_blobs(f, inputs, output, h=1e-5):
"""
Compute numeric gradients for a function that operates on input
and output blobs.
We assume that f accepts several input blobs as arguments, followed by a
blob where outputs will be written. For example, f might be called like:
f(x, w, out)
where x and w are input Blobs, and the result of f will be written to out.
Inputs:
- f: function
- inputs: tuple of input blobs
- output: output blob
- h: step size
"""
numeric_diffs = []
for input_blob in inputs:
diff = np.zeros_like(input_blob.diffs)
it = np.nditer(input_blob.vals, flags=['multi_index'],
op_flags=['readwrite'])
while not it.finished:
idx = it.multi_index
orig = input_blob.vals[idx]
input_blob.vals[idx] = orig + h
f(*(inputs + (output,)))
pos = np.copy(output.vals)
input_blob.vals[idx] = orig - h
f(*(inputs + (output,)))
neg = np.copy(output.vals)
input_blob.vals[idx] = orig
diff[idx] = np.sum((pos - neg) * output.diffs) / (2.0 * h)
it.iternext()
numeric_diffs.append(diff)
return numeric_diffs
def eval_numerical_gradient_net(net, inputs, output, h=1e-5):
return eval_numerical_gradient_blobs(lambda *args: net.forward(),
inputs, output, h=h)
def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-5):
"""
sample a few random elements and only return numerical
in this dimensions.
"""
for i in range(num_checks):
ix = tuple([randrange(m) for m in x.shape])
oldval = x[ix]
x[ix] = oldval + h # increment by h
fxph = f(x) # evaluate f(x + h)
x[ix] = oldval - h # increment by h
fxmh = f(x) # evaluate f(x - h)
x[ix] = oldval # reset
grad_numerical = (fxph - fxmh) / (2 * h)
grad_analytic = analytic_grad[ix]
rel_error = (abs(grad_numerical - grad_analytic) /
(abs(grad_numerical) + abs(grad_analytic)))
print('numerical: %f analytic: %f, relative error: %e'
%(grad_numerical, grad_analytic, rel_error))

+ 73
- 0
assignment1/daseCV/vis_utils.py Ver arquivo

@ -0,0 +1,73 @@
from builtins import range
from past.builtins import xrange
from math import sqrt, ceil
import numpy as np
def visualize_grid(Xs, ubound=255.0, padding=1):
"""
Reshape a 4D tensor of image data to a grid for easy visualization.
Inputs:
- Xs: Data of shape (N, H, W, C)
- ubound: Output grid will have values scaled to the range [0, ubound]
- padding: The number of blank pixels between elements of the grid
"""
(N, H, W, C) = Xs.shape
grid_size = int(ceil(sqrt(N)))
grid_height = H * grid_size + padding * (grid_size - 1)
grid_width = W * grid_size + padding * (grid_size - 1)
grid = np.zeros((grid_height, grid_width, C))
next_idx = 0
y0, y1 = 0, H
for y in range(grid_size):
x0, x1 = 0, W
for x in range(grid_size):
if next_idx < N:
img = Xs[next_idx]
low, high = np.min(img), np.max(img)
grid[y0:y1, x0:x1] = ubound * (img - low) / (high - low)
# grid[y0:y1, x0:x1] = Xs[next_idx]
next_idx += 1
x0 += W + padding
x1 += W + padding
y0 += H + padding
y1 += H + padding
# grid_max = np.max(grid)
# grid_min = np.min(grid)
# grid = ubound * (grid - grid_min) / (grid_max - grid_min)
return grid
def vis_grid(Xs):
""" visualize a grid of images """
(N, H, W, C) = Xs.shape
A = int(ceil(sqrt(N)))
G = np.ones((A*H+A, A*W+A, C), Xs.dtype)
G *= np.min(Xs)
n = 0
for y in range(A):
for x in range(A):
if n < N:
G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = Xs[n,:,:,:]
n += 1
# normalize to [0,1]
maxg = G.max()
ming = G.min()
G = (G - ming)/(maxg-ming)
return G
def vis_nn(rows):
""" visualize array of arrays of images """
N = len(rows)
D = len(rows[0])
H,W,C = rows[0][0].shape
Xs = rows[0][0]
G = np.ones((N*H+N, D*W+D, C), Xs.dtype)
for y in range(N):
for x in range(D):
G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = rows[y][x]
# normalize to [0,1]
maxg = G.max()
ming = G.min()
G = (G - ming)/(maxg-ming)
return G

Carregando…
Cancelar
Salvar