@ -0,0 +1,262 @@ | |||
from __future__ import print_function | |||
from builtins import range | |||
from six.moves import cPickle as pickle | |||
import numpy as np | |||
import os | |||
from imageio import imread | |||
import platform | |||
def load_pickle(f): | |||
version = platform.python_version_tuple() | |||
if version[0] == '2': | |||
return pickle.load(f) | |||
elif version[0] == '3': | |||
return pickle.load(f, encoding='latin1') | |||
raise ValueError("invalid python version: {}".format(version)) | |||
def load_CIFAR_batch(filename): | |||
""" load single batch of cifar """ | |||
with open(filename, 'rb') as f: | |||
datadict = load_pickle(f) | |||
X = datadict['data'] | |||
Y = datadict['labels'] | |||
X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float") | |||
Y = np.array(Y) | |||
return X, Y | |||
def load_CIFAR10(ROOT): | |||
""" load all of cifar """ | |||
xs = [] | |||
ys = [] | |||
for b in range(1,6): | |||
f = os.path.join(ROOT, 'data_batch_%d' % (b, )) | |||
X, Y = load_CIFAR_batch(f) | |||
xs.append(X) | |||
ys.append(Y) | |||
Xtr = np.concatenate(xs) | |||
Ytr = np.concatenate(ys) | |||
del X, Y | |||
Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch')) | |||
return Xtr, Ytr, Xte, Yte | |||
def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000, | |||
subtract_mean=True): | |||
""" | |||
Load the CIFAR-10 dataset from disk and perform preprocessing to prepare | |||
it for classifiers. These are the same steps as we used for the SVM, but | |||
condensed to a single function. | |||
""" | |||
# Load the raw CIFAR-10 data | |||
cifar10_dir = 'daseCV/datasets/cifar-10-batches-py' | |||
X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) | |||
# Subsample the data | |||
mask = list(range(num_training, num_training + num_validation)) | |||
X_val = X_train[mask] | |||
y_val = y_train[mask] | |||
mask = list(range(num_training)) | |||
X_train = X_train[mask] | |||
y_train = y_train[mask] | |||
mask = list(range(num_test)) | |||
X_test = X_test[mask] | |||
y_test = y_test[mask] | |||
# Normalize the data: subtract the mean image | |||
if subtract_mean: | |||
mean_image = np.mean(X_train, axis=0) | |||
X_train -= mean_image | |||
X_val -= mean_image | |||
X_test -= mean_image | |||
# Transpose so that channels come first | |||
X_train = X_train.transpose(0, 3, 1, 2).copy() | |||
X_val = X_val.transpose(0, 3, 1, 2).copy() | |||
X_test = X_test.transpose(0, 3, 1, 2).copy() | |||
# Package data into a dictionary | |||
return { | |||
'X_train': X_train, 'y_train': y_train, | |||
'X_val': X_val, 'y_val': y_val, | |||
'X_test': X_test, 'y_test': y_test, | |||
} | |||
def load_tiny_imagenet(path, dtype=np.float32, subtract_mean=True): | |||
""" | |||
Load TinyImageNet. Each of TinyImageNet-100-A, TinyImageNet-100-B, and | |||
TinyImageNet-200 have the same directory structure, so this can be used | |||
to load any of them. | |||
Inputs: | |||
- path: String giving path to the directory to load. | |||
- dtype: numpy datatype used to load the data. | |||
- subtract_mean: Whether to subtract the mean training image. | |||
Returns: A dictionary with the following entries: | |||
- class_names: A list where class_names[i] is a list of strings giving the | |||
WordNet names for class i in the loaded dataset. | |||
- X_train: (N_tr, 3, 64, 64) array of training images | |||
- y_train: (N_tr,) array of training labels | |||
- X_val: (N_val, 3, 64, 64) array of validation images | |||
- y_val: (N_val,) array of validation labels | |||
- X_test: (N_test, 3, 64, 64) array of testing images. | |||
- y_test: (N_test,) array of test labels; if test labels are not available | |||
(such as in student code) then y_test will be None. | |||
- mean_image: (3, 64, 64) array giving mean training image | |||
""" | |||
# First load wnids | |||
with open(os.path.join(path, 'wnids.txt'), 'r') as f: | |||
wnids = [x.strip() for x in f] | |||
# Map wnids to integer labels | |||
wnid_to_label = {wnid: i for i, wnid in enumerate(wnids)} | |||
# Use words.txt to get names for each class | |||
with open(os.path.join(path, 'words.txt'), 'r') as f: | |||
wnid_to_words = dict(line.split('\t') for line in f) | |||
for wnid, words in wnid_to_words.items(): | |||
wnid_to_words[wnid] = [w.strip() for w in words.split(',')] | |||
class_names = [wnid_to_words[wnid] for wnid in wnids] | |||
# Next load training data. | |||
X_train = [] | |||
y_train = [] | |||
for i, wnid in enumerate(wnids): | |||
if (i + 1) % 20 == 0: | |||
print('loading training data for synset %d / %d' | |||
% (i + 1, len(wnids))) | |||
# To figure out the filenames we need to open the boxes file | |||
boxes_file = os.path.join(path, 'train', wnid, '%s_boxes.txt' % wnid) | |||
with open(boxes_file, 'r') as f: | |||
filenames = [x.split('\t')[0] for x in f] | |||
num_images = len(filenames) | |||
X_train_block = np.zeros((num_images, 3, 64, 64), dtype=dtype) | |||
y_train_block = wnid_to_label[wnid] * \ | |||
np.ones(num_images, dtype=np.int64) | |||
for j, img_file in enumerate(filenames): | |||
img_file = os.path.join(path, 'train', wnid, 'images', img_file) | |||
img = imread(img_file) | |||
if img.ndim == 2: | |||
## grayscale file | |||
img.shape = (64, 64, 1) | |||
X_train_block[j] = img.transpose(2, 0, 1) | |||
X_train.append(X_train_block) | |||
y_train.append(y_train_block) | |||
# We need to concatenate all training data | |||
X_train = np.concatenate(X_train, axis=0) | |||
y_train = np.concatenate(y_train, axis=0) | |||
# Next load validation data | |||
with open(os.path.join(path, 'val', 'val_annotations.txt'), 'r') as f: | |||
img_files = [] | |||
val_wnids = [] | |||
for line in f: | |||
img_file, wnid = line.split('\t')[:2] | |||
img_files.append(img_file) | |||
val_wnids.append(wnid) | |||
num_val = len(img_files) | |||
y_val = np.array([wnid_to_label[wnid] for wnid in val_wnids]) | |||
X_val = np.zeros((num_val, 3, 64, 64), dtype=dtype) | |||
for i, img_file in enumerate(img_files): | |||
img_file = os.path.join(path, 'val', 'images', img_file) | |||
img = imread(img_file) | |||
if img.ndim == 2: | |||
img.shape = (64, 64, 1) | |||
X_val[i] = img.transpose(2, 0, 1) | |||
# Next load test images | |||
# Students won't have test labels, so we need to iterate over files in the | |||
# images directory. | |||
img_files = os.listdir(os.path.join(path, 'test', 'images')) | |||
X_test = np.zeros((len(img_files), 3, 64, 64), dtype=dtype) | |||
for i, img_file in enumerate(img_files): | |||
img_file = os.path.join(path, 'test', 'images', img_file) | |||
img = imread(img_file) | |||
if img.ndim == 2: | |||
img.shape = (64, 64, 1) | |||
X_test[i] = img.transpose(2, 0, 1) | |||
y_test = None | |||
y_test_file = os.path.join(path, 'test', 'test_annotations.txt') | |||
if os.path.isfile(y_test_file): | |||
with open(y_test_file, 'r') as f: | |||
img_file_to_wnid = {} | |||
for line in f: | |||
line = line.split('\t') | |||
img_file_to_wnid[line[0]] = line[1] | |||
y_test = [wnid_to_label[img_file_to_wnid[img_file]] | |||
for img_file in img_files] | |||
y_test = np.array(y_test) | |||
mean_image = X_train.mean(axis=0) | |||
if subtract_mean: | |||
X_train -= mean_image[None] | |||
X_val -= mean_image[None] | |||
X_test -= mean_image[None] | |||
return { | |||
'class_names': class_names, | |||
'X_train': X_train, | |||
'y_train': y_train, | |||
'X_val': X_val, | |||
'y_val': y_val, | |||
'X_test': X_test, | |||
'y_test': y_test, | |||
'class_names': class_names, | |||
'mean_image': mean_image, | |||
} | |||
def load_models(models_dir): | |||
""" | |||
Load saved models from disk. This will attempt to unpickle all files in a | |||
directory; any files that give errors on unpickling (such as README.txt) | |||
will be skipped. | |||
Inputs: | |||
- models_dir: String giving the path to a directory containing model files. | |||
Each model file is a pickled dictionary with a 'model' field. | |||
Returns: | |||
A dictionary mapping model file names to models. | |||
""" | |||
models = {} | |||
for model_file in os.listdir(models_dir): | |||
with open(os.path.join(models_dir, model_file), 'rb') as f: | |||
try: | |||
models[model_file] = load_pickle(f)['model'] | |||
except pickle.UnpicklingError: | |||
continue | |||
return models | |||
def load_imagenet_val(num=None): | |||
"""Load a handful of validation images from ImageNet. | |||
Inputs: | |||
- num: Number of images to load (max of 25) | |||
Returns: | |||
- X: numpy array with shape [num, 224, 224, 3] | |||
- y: numpy array of integer image labels, shape [num] | |||
- class_names: dict mapping integer label to class name | |||
""" | |||
imagenet_fn = 'daseCV/datasets/imagenet_val_25.npz' | |||
if not os.path.isfile(imagenet_fn): | |||
print('file %s not found' % imagenet_fn) | |||
print('Run the following:') | |||
print('cd daseCV/datasets') | |||
print('bash get_imagenet_val.sh') | |||
assert False, 'Need to download imagenet_val_25.npz' | |||
f = np.load(imagenet_fn) | |||
X = f['X'] | |||
y = f['y'] | |||
class_names = f['label_map'].item() | |||
if num is not None: | |||
X = X[:num] | |||
y = y[:num] | |||
return X, y, class_names |
@ -0,0 +1,150 @@ | |||
from __future__ import print_function | |||
from builtins import zip | |||
from builtins import range | |||
from past.builtins import xrange | |||
import matplotlib | |||
import numpy as np | |||
from scipy.ndimage import uniform_filter | |||
def extract_features(imgs, feature_fns, verbose=False): | |||
""" | |||
Given pixel data for images and several feature functions that can operate on | |||
single images, apply all feature functions to all images, concatenating the | |||
feature vectors for each image and storing the features for all images in | |||
a single matrix. | |||
Inputs: | |||
- imgs: N x H X W X C array of pixel data for N images. | |||
- feature_fns: List of k feature functions. The ith feature function should | |||
take as input an H x W x D array and return a (one-dimensional) array of | |||
length F_i. | |||
- verbose: Boolean; if true, print progress. | |||
Returns: | |||
An array of shape (N, F_1 + ... + F_k) where each column is the concatenation | |||
of all features for a single image. | |||
""" | |||
num_images = imgs.shape[0] | |||
if num_images == 0: | |||
return np.array([]) | |||
# Use the first image to determine feature dimensions | |||
feature_dims = [] | |||
first_image_features = [] | |||
for feature_fn in feature_fns: | |||
feats = feature_fn(imgs[0].squeeze()) | |||
assert len(feats.shape) == 1, 'Feature functions must be one-dimensional' | |||
feature_dims.append(feats.size) | |||
first_image_features.append(feats) | |||
# Now that we know the dimensions of the features, we can allocate a single | |||
# big array to store all features as columns. | |||
total_feature_dim = sum(feature_dims) | |||
imgs_features = np.zeros((num_images, total_feature_dim)) | |||
imgs_features[0] = np.hstack(first_image_features).T | |||
# Extract features for the rest of the images. | |||
for i in range(1, num_images): | |||
idx = 0 | |||
for feature_fn, feature_dim in zip(feature_fns, feature_dims): | |||
next_idx = idx + feature_dim | |||
imgs_features[i, idx:next_idx] = feature_fn(imgs[i].squeeze()) | |||
idx = next_idx | |||
if verbose and i % 1000 == 999: | |||
print('Done extracting features for %d / %d images' % (i+1, num_images)) | |||
return imgs_features | |||
def rgb2gray(rgb): | |||
"""Convert RGB image to grayscale | |||
Parameters: | |||
rgb : RGB image | |||
Returns: | |||
gray : grayscale image | |||
""" | |||
return np.dot(rgb[...,:3], [0.299, 0.587, 0.144]) | |||
def hog_feature(im): | |||
"""Compute Histogram of Gradient (HOG) feature for an image | |||
Modified from skimage.feature.hog | |||
http://pydoc.net/Python/scikits-image/0.4.2/skimage.feature.hog | |||
Reference: | |||
Histograms of Oriented Gradients for Human Detection | |||
Navneet Dalal and Bill Triggs, CVPR 2005 | |||
Parameters: | |||
im : an input grayscale or rgb image | |||
Returns: | |||
feat: Histogram of Gradient (HOG) feature | |||
""" | |||
# convert rgb to grayscale if needed | |||
if im.ndim == 3: | |||
image = rgb2gray(im) | |||
else: | |||
image = np.at_least_2d(im) | |||
sx, sy = image.shape # image size | |||
orientations = 9 # number of gradient bins | |||
cx, cy = (8, 8) # pixels per cell | |||
gx = np.zeros(image.shape) | |||
gy = np.zeros(image.shape) | |||
gx[:, :-1] = np.diff(image, n=1, axis=1) # compute gradient on x-direction | |||
gy[:-1, :] = np.diff(image, n=1, axis=0) # compute gradient on y-direction | |||
grad_mag = np.sqrt(gx ** 2 + gy ** 2) # gradient magnitude | |||
grad_ori = np.arctan2(gy, (gx + 1e-15)) * (180 / np.pi) + 90 # gradient orientation | |||
n_cellsx = int(np.floor(sx / cx)) # number of cells in x | |||
n_cellsy = int(np.floor(sy / cy)) # number of cells in y | |||
# compute orientations integral images | |||
orientation_histogram = np.zeros((n_cellsx, n_cellsy, orientations)) | |||
for i in range(orientations): | |||
# create new integral image for this orientation | |||
# isolate orientations in this range | |||
temp_ori = np.where(grad_ori < 180 / orientations * (i + 1), | |||
grad_ori, 0) | |||
temp_ori = np.where(grad_ori >= 180 / orientations * i, | |||
temp_ori, 0) | |||
# select magnitudes for those orientations | |||
cond2 = temp_ori > 0 | |||
temp_mag = np.where(cond2, grad_mag, 0) | |||
orientation_histogram[:,:,i] = uniform_filter(temp_mag, size=(cx, cy))[round(cx/2)::cx, round(cy/2)::cy].T | |||
return orientation_histogram.ravel() | |||
def color_histogram_hsv(im, nbin=10, xmin=0, xmax=255, normalized=True): | |||
""" | |||
Compute color histogram for an image using hue. | |||
Inputs: | |||
- im: H x W x C array of pixel data for an RGB image. | |||
- nbin: Number of histogram bins. (default: 10) | |||
- xmin: Minimum pixel value (default: 0) | |||
- xmax: Maximum pixel value (default: 255) | |||
- normalized: Whether to normalize the histogram (default: True) | |||
Returns: | |||
1D vector of length nbin giving the color histogram over the hue of the | |||
input image. | |||
""" | |||
ndim = im.ndim | |||
bins = np.linspace(xmin, xmax, nbin+1) | |||
hsv = matplotlib.colors.rgb_to_hsv(im/xmax) * xmax | |||
imhist, bin_edges = np.histogram(hsv[:,:,0], bins=bins, density=normalized) | |||
imhist = imhist * np.diff(bin_edges) | |||
# return histogram | |||
return imhist |
@ -0,0 +1,129 @@ | |||
from __future__ import print_function | |||
from builtins import range | |||
from past.builtins import xrange | |||
import numpy as np | |||
from random import randrange | |||
def eval_numerical_gradient(f, x, verbose=True, h=0.00001): | |||
""" | |||
a naive implementation of numerical gradient of f at x | |||
- f should be a function that takes a single argument | |||
- x is the point (numpy array) to evaluate the gradient at | |||
""" | |||
fx = f(x) # evaluate function value at original point | |||
grad = np.zeros_like(x) | |||
# iterate over all indexes in x | |||
it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) | |||
while not it.finished: | |||
# evaluate function at x+h | |||
ix = it.multi_index | |||
oldval = x[ix] | |||
x[ix] = oldval + h # increment by h | |||
fxph = f(x) # evalute f(x + h) | |||
x[ix] = oldval - h | |||
fxmh = f(x) # evaluate f(x - h) | |||
x[ix] = oldval # restore | |||
# compute the partial derivative with centered formula | |||
grad[ix] = (fxph - fxmh) / (2 * h) # the slope | |||
if verbose: | |||
print(ix, grad[ix]) | |||
it.iternext() # step to next dimension | |||
return grad | |||
def eval_numerical_gradient_array(f, x, df, h=1e-5): | |||
""" | |||
Evaluate a numeric gradient for a function that accepts a numpy | |||
array and returns a numpy array. | |||
""" | |||
grad = np.zeros_like(x) | |||
it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) | |||
while not it.finished: | |||
ix = it.multi_index | |||
oldval = x[ix] | |||
x[ix] = oldval + h | |||
pos = f(x).copy() | |||
x[ix] = oldval - h | |||
neg = f(x).copy() | |||
x[ix] = oldval | |||
grad[ix] = np.sum((pos - neg) * df) / (2 * h) | |||
it.iternext() | |||
return grad | |||
def eval_numerical_gradient_blobs(f, inputs, output, h=1e-5): | |||
""" | |||
Compute numeric gradients for a function that operates on input | |||
and output blobs. | |||
We assume that f accepts several input blobs as arguments, followed by a | |||
blob where outputs will be written. For example, f might be called like: | |||
f(x, w, out) | |||
where x and w are input Blobs, and the result of f will be written to out. | |||
Inputs: | |||
- f: function | |||
- inputs: tuple of input blobs | |||
- output: output blob | |||
- h: step size | |||
""" | |||
numeric_diffs = [] | |||
for input_blob in inputs: | |||
diff = np.zeros_like(input_blob.diffs) | |||
it = np.nditer(input_blob.vals, flags=['multi_index'], | |||
op_flags=['readwrite']) | |||
while not it.finished: | |||
idx = it.multi_index | |||
orig = input_blob.vals[idx] | |||
input_blob.vals[idx] = orig + h | |||
f(*(inputs + (output,))) | |||
pos = np.copy(output.vals) | |||
input_blob.vals[idx] = orig - h | |||
f(*(inputs + (output,))) | |||
neg = np.copy(output.vals) | |||
input_blob.vals[idx] = orig | |||
diff[idx] = np.sum((pos - neg) * output.diffs) / (2.0 * h) | |||
it.iternext() | |||
numeric_diffs.append(diff) | |||
return numeric_diffs | |||
def eval_numerical_gradient_net(net, inputs, output, h=1e-5): | |||
return eval_numerical_gradient_blobs(lambda *args: net.forward(), | |||
inputs, output, h=h) | |||
def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-5): | |||
""" | |||
sample a few random elements and only return numerical | |||
in this dimensions. | |||
""" | |||
for i in range(num_checks): | |||
ix = tuple([randrange(m) for m in x.shape]) | |||
oldval = x[ix] | |||
x[ix] = oldval + h # increment by h | |||
fxph = f(x) # evaluate f(x + h) | |||
x[ix] = oldval - h # increment by h | |||
fxmh = f(x) # evaluate f(x - h) | |||
x[ix] = oldval # reset | |||
grad_numerical = (fxph - fxmh) / (2 * h) | |||
grad_analytic = analytic_grad[ix] | |||
rel_error = (abs(grad_numerical - grad_analytic) / | |||
(abs(grad_numerical) + abs(grad_analytic))) | |||
print('numerical: %f analytic: %f, relative error: %e' | |||
%(grad_numerical, grad_analytic, rel_error)) |
@ -0,0 +1,73 @@ | |||
from builtins import range | |||
from past.builtins import xrange | |||
from math import sqrt, ceil | |||
import numpy as np | |||
def visualize_grid(Xs, ubound=255.0, padding=1): | |||
""" | |||
Reshape a 4D tensor of image data to a grid for easy visualization. | |||
Inputs: | |||
- Xs: Data of shape (N, H, W, C) | |||
- ubound: Output grid will have values scaled to the range [0, ubound] | |||
- padding: The number of blank pixels between elements of the grid | |||
""" | |||
(N, H, W, C) = Xs.shape | |||
grid_size = int(ceil(sqrt(N))) | |||
grid_height = H * grid_size + padding * (grid_size - 1) | |||
grid_width = W * grid_size + padding * (grid_size - 1) | |||
grid = np.zeros((grid_height, grid_width, C)) | |||
next_idx = 0 | |||
y0, y1 = 0, H | |||
for y in range(grid_size): | |||
x0, x1 = 0, W | |||
for x in range(grid_size): | |||
if next_idx < N: | |||
img = Xs[next_idx] | |||
low, high = np.min(img), np.max(img) | |||
grid[y0:y1, x0:x1] = ubound * (img - low) / (high - low) | |||
# grid[y0:y1, x0:x1] = Xs[next_idx] | |||
next_idx += 1 | |||
x0 += W + padding | |||
x1 += W + padding | |||
y0 += H + padding | |||
y1 += H + padding | |||
# grid_max = np.max(grid) | |||
# grid_min = np.min(grid) | |||
# grid = ubound * (grid - grid_min) / (grid_max - grid_min) | |||
return grid | |||
def vis_grid(Xs): | |||
""" visualize a grid of images """ | |||
(N, H, W, C) = Xs.shape | |||
A = int(ceil(sqrt(N))) | |||
G = np.ones((A*H+A, A*W+A, C), Xs.dtype) | |||
G *= np.min(Xs) | |||
n = 0 | |||
for y in range(A): | |||
for x in range(A): | |||
if n < N: | |||
G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = Xs[n,:,:,:] | |||
n += 1 | |||
# normalize to [0,1] | |||
maxg = G.max() | |||
ming = G.min() | |||
G = (G - ming)/(maxg-ming) | |||
return G | |||
def vis_nn(rows): | |||
""" visualize array of arrays of images """ | |||
N = len(rows) | |||
D = len(rows[0]) | |||
H,W,C = rows[0][0].shape | |||
Xs = rows[0][0] | |||
G = np.ones((N*H+N, D*W+D, C), Xs.dtype) | |||
for y in range(N): | |||
for x in range(D): | |||
G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = rows[y][x] | |||
# normalize to [0,1] | |||
maxg = G.max() | |||
ming = G.min() | |||
G = (G - ming)/(maxg-ming) | |||
return G |