diff --git a/assignment1/daseCV/__init__.py b/assignment1/daseCV/__init__.py new file mode 100644 index 0000000..06d7405 Binary files /dev/null and b/assignment1/daseCV/__init__.py differ diff --git a/assignment1/daseCV/data_utils.py b/assignment1/daseCV/data_utils.py new file mode 100644 index 0000000..d3a5fb2 --- /dev/null +++ b/assignment1/daseCV/data_utils.py @@ -0,0 +1,262 @@ +from __future__ import print_function + +from builtins import range +from six.moves import cPickle as pickle +import numpy as np +import os +from imageio import imread +import platform + +def load_pickle(f): + version = platform.python_version_tuple() + if version[0] == '2': + return pickle.load(f) + elif version[0] == '3': + return pickle.load(f, encoding='latin1') + raise ValueError("invalid python version: {}".format(version)) + +def load_CIFAR_batch(filename): + """ load single batch of cifar """ + with open(filename, 'rb') as f: + datadict = load_pickle(f) + X = datadict['data'] + Y = datadict['labels'] + X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float") + Y = np.array(Y) + return X, Y + +def load_CIFAR10(ROOT): + """ load all of cifar """ + xs = [] + ys = [] + for b in range(1,6): + f = os.path.join(ROOT, 'data_batch_%d' % (b, )) + X, Y = load_CIFAR_batch(f) + xs.append(X) + ys.append(Y) + Xtr = np.concatenate(xs) + Ytr = np.concatenate(ys) + del X, Y + Xte, Yte = load_CIFAR_batch(os.path.join(ROOT, 'test_batch')) + return Xtr, Ytr, Xte, Yte + + +def get_CIFAR10_data(num_training=49000, num_validation=1000, num_test=1000, + subtract_mean=True): + """ + Load the CIFAR-10 dataset from disk and perform preprocessing to prepare + it for classifiers. These are the same steps as we used for the SVM, but + condensed to a single function. + """ + # Load the raw CIFAR-10 data + cifar10_dir = 'daseCV/datasets/cifar-10-batches-py' + X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir) + + # Subsample the data + mask = list(range(num_training, num_training + num_validation)) + X_val = X_train[mask] + y_val = y_train[mask] + mask = list(range(num_training)) + X_train = X_train[mask] + y_train = y_train[mask] + mask = list(range(num_test)) + X_test = X_test[mask] + y_test = y_test[mask] + + # Normalize the data: subtract the mean image + if subtract_mean: + mean_image = np.mean(X_train, axis=0) + X_train -= mean_image + X_val -= mean_image + X_test -= mean_image + + # Transpose so that channels come first + X_train = X_train.transpose(0, 3, 1, 2).copy() + X_val = X_val.transpose(0, 3, 1, 2).copy() + X_test = X_test.transpose(0, 3, 1, 2).copy() + + # Package data into a dictionary + return { + 'X_train': X_train, 'y_train': y_train, + 'X_val': X_val, 'y_val': y_val, + 'X_test': X_test, 'y_test': y_test, + } + + +def load_tiny_imagenet(path, dtype=np.float32, subtract_mean=True): + """ + Load TinyImageNet. Each of TinyImageNet-100-A, TinyImageNet-100-B, and + TinyImageNet-200 have the same directory structure, so this can be used + to load any of them. + + Inputs: + - path: String giving path to the directory to load. + - dtype: numpy datatype used to load the data. + - subtract_mean: Whether to subtract the mean training image. + + Returns: A dictionary with the following entries: + - class_names: A list where class_names[i] is a list of strings giving the + WordNet names for class i in the loaded dataset. + - X_train: (N_tr, 3, 64, 64) array of training images + - y_train: (N_tr,) array of training labels + - X_val: (N_val, 3, 64, 64) array of validation images + - y_val: (N_val,) array of validation labels + - X_test: (N_test, 3, 64, 64) array of testing images. + - y_test: (N_test,) array of test labels; if test labels are not available + (such as in student code) then y_test will be None. + - mean_image: (3, 64, 64) array giving mean training image + """ + # First load wnids + with open(os.path.join(path, 'wnids.txt'), 'r') as f: + wnids = [x.strip() for x in f] + + # Map wnids to integer labels + wnid_to_label = {wnid: i for i, wnid in enumerate(wnids)} + + # Use words.txt to get names for each class + with open(os.path.join(path, 'words.txt'), 'r') as f: + wnid_to_words = dict(line.split('\t') for line in f) + for wnid, words in wnid_to_words.items(): + wnid_to_words[wnid] = [w.strip() for w in words.split(',')] + class_names = [wnid_to_words[wnid] for wnid in wnids] + + # Next load training data. + X_train = [] + y_train = [] + for i, wnid in enumerate(wnids): + if (i + 1) % 20 == 0: + print('loading training data for synset %d / %d' + % (i + 1, len(wnids))) + # To figure out the filenames we need to open the boxes file + boxes_file = os.path.join(path, 'train', wnid, '%s_boxes.txt' % wnid) + with open(boxes_file, 'r') as f: + filenames = [x.split('\t')[0] for x in f] + num_images = len(filenames) + + X_train_block = np.zeros((num_images, 3, 64, 64), dtype=dtype) + y_train_block = wnid_to_label[wnid] * \ + np.ones(num_images, dtype=np.int64) + for j, img_file in enumerate(filenames): + img_file = os.path.join(path, 'train', wnid, 'images', img_file) + img = imread(img_file) + if img.ndim == 2: + ## grayscale file + img.shape = (64, 64, 1) + X_train_block[j] = img.transpose(2, 0, 1) + X_train.append(X_train_block) + y_train.append(y_train_block) + + # We need to concatenate all training data + X_train = np.concatenate(X_train, axis=0) + y_train = np.concatenate(y_train, axis=0) + + # Next load validation data + with open(os.path.join(path, 'val', 'val_annotations.txt'), 'r') as f: + img_files = [] + val_wnids = [] + for line in f: + img_file, wnid = line.split('\t')[:2] + img_files.append(img_file) + val_wnids.append(wnid) + num_val = len(img_files) + y_val = np.array([wnid_to_label[wnid] for wnid in val_wnids]) + X_val = np.zeros((num_val, 3, 64, 64), dtype=dtype) + for i, img_file in enumerate(img_files): + img_file = os.path.join(path, 'val', 'images', img_file) + img = imread(img_file) + if img.ndim == 2: + img.shape = (64, 64, 1) + X_val[i] = img.transpose(2, 0, 1) + + # Next load test images + # Students won't have test labels, so we need to iterate over files in the + # images directory. + img_files = os.listdir(os.path.join(path, 'test', 'images')) + X_test = np.zeros((len(img_files), 3, 64, 64), dtype=dtype) + for i, img_file in enumerate(img_files): + img_file = os.path.join(path, 'test', 'images', img_file) + img = imread(img_file) + if img.ndim == 2: + img.shape = (64, 64, 1) + X_test[i] = img.transpose(2, 0, 1) + + y_test = None + y_test_file = os.path.join(path, 'test', 'test_annotations.txt') + if os.path.isfile(y_test_file): + with open(y_test_file, 'r') as f: + img_file_to_wnid = {} + for line in f: + line = line.split('\t') + img_file_to_wnid[line[0]] = line[1] + y_test = [wnid_to_label[img_file_to_wnid[img_file]] + for img_file in img_files] + y_test = np.array(y_test) + + mean_image = X_train.mean(axis=0) + if subtract_mean: + X_train -= mean_image[None] + X_val -= mean_image[None] + X_test -= mean_image[None] + + return { + 'class_names': class_names, + 'X_train': X_train, + 'y_train': y_train, + 'X_val': X_val, + 'y_val': y_val, + 'X_test': X_test, + 'y_test': y_test, + 'class_names': class_names, + 'mean_image': mean_image, + } + + +def load_models(models_dir): + """ + Load saved models from disk. This will attempt to unpickle all files in a + directory; any files that give errors on unpickling (such as README.txt) + will be skipped. + + Inputs: + - models_dir: String giving the path to a directory containing model files. + Each model file is a pickled dictionary with a 'model' field. + + Returns: + A dictionary mapping model file names to models. + """ + models = {} + for model_file in os.listdir(models_dir): + with open(os.path.join(models_dir, model_file), 'rb') as f: + try: + models[model_file] = load_pickle(f)['model'] + except pickle.UnpicklingError: + continue + return models + + +def load_imagenet_val(num=None): + """Load a handful of validation images from ImageNet. + + Inputs: + - num: Number of images to load (max of 25) + + Returns: + - X: numpy array with shape [num, 224, 224, 3] + - y: numpy array of integer image labels, shape [num] + - class_names: dict mapping integer label to class name + """ + imagenet_fn = 'daseCV/datasets/imagenet_val_25.npz' + if not os.path.isfile(imagenet_fn): + print('file %s not found' % imagenet_fn) + print('Run the following:') + print('cd daseCV/datasets') + print('bash get_imagenet_val.sh') + assert False, 'Need to download imagenet_val_25.npz' + f = np.load(imagenet_fn) + X = f['X'] + y = f['y'] + class_names = f['label_map'].item() + if num is not None: + X = X[:num] + y = y[:num] + return X, y, class_names diff --git a/assignment1/daseCV/features.py b/assignment1/daseCV/features.py new file mode 100644 index 0000000..d396b06 --- /dev/null +++ b/assignment1/daseCV/features.py @@ -0,0 +1,150 @@ +from __future__ import print_function +from builtins import zip +from builtins import range +from past.builtins import xrange + +import matplotlib +import numpy as np +from scipy.ndimage import uniform_filter + + +def extract_features(imgs, feature_fns, verbose=False): + """ + Given pixel data for images and several feature functions that can operate on + single images, apply all feature functions to all images, concatenating the + feature vectors for each image and storing the features for all images in + a single matrix. + + Inputs: + - imgs: N x H X W X C array of pixel data for N images. + - feature_fns: List of k feature functions. The ith feature function should + take as input an H x W x D array and return a (one-dimensional) array of + length F_i. + - verbose: Boolean; if true, print progress. + + Returns: + An array of shape (N, F_1 + ... + F_k) where each column is the concatenation + of all features for a single image. + """ + num_images = imgs.shape[0] + if num_images == 0: + return np.array([]) + + # Use the first image to determine feature dimensions + feature_dims = [] + first_image_features = [] + for feature_fn in feature_fns: + feats = feature_fn(imgs[0].squeeze()) + assert len(feats.shape) == 1, 'Feature functions must be one-dimensional' + feature_dims.append(feats.size) + first_image_features.append(feats) + + # Now that we know the dimensions of the features, we can allocate a single + # big array to store all features as columns. + total_feature_dim = sum(feature_dims) + imgs_features = np.zeros((num_images, total_feature_dim)) + imgs_features[0] = np.hstack(first_image_features).T + + # Extract features for the rest of the images. + for i in range(1, num_images): + idx = 0 + for feature_fn, feature_dim in zip(feature_fns, feature_dims): + next_idx = idx + feature_dim + imgs_features[i, idx:next_idx] = feature_fn(imgs[i].squeeze()) + idx = next_idx + if verbose and i % 1000 == 999: + print('Done extracting features for %d / %d images' % (i+1, num_images)) + + return imgs_features + + +def rgb2gray(rgb): + """Convert RGB image to grayscale + + Parameters: + rgb : RGB image + + Returns: + gray : grayscale image + + """ + return np.dot(rgb[...,:3], [0.299, 0.587, 0.144]) + + +def hog_feature(im): + """Compute Histogram of Gradient (HOG) feature for an image + + Modified from skimage.feature.hog + http://pydoc.net/Python/scikits-image/0.4.2/skimage.feature.hog + + Reference: + Histograms of Oriented Gradients for Human Detection + Navneet Dalal and Bill Triggs, CVPR 2005 + + Parameters: + im : an input grayscale or rgb image + + Returns: + feat: Histogram of Gradient (HOG) feature + + """ + + # convert rgb to grayscale if needed + if im.ndim == 3: + image = rgb2gray(im) + else: + image = np.at_least_2d(im) + + sx, sy = image.shape # image size + orientations = 9 # number of gradient bins + cx, cy = (8, 8) # pixels per cell + + gx = np.zeros(image.shape) + gy = np.zeros(image.shape) + gx[:, :-1] = np.diff(image, n=1, axis=1) # compute gradient on x-direction + gy[:-1, :] = np.diff(image, n=1, axis=0) # compute gradient on y-direction + grad_mag = np.sqrt(gx ** 2 + gy ** 2) # gradient magnitude + grad_ori = np.arctan2(gy, (gx + 1e-15)) * (180 / np.pi) + 90 # gradient orientation + + n_cellsx = int(np.floor(sx / cx)) # number of cells in x + n_cellsy = int(np.floor(sy / cy)) # number of cells in y + # compute orientations integral images + orientation_histogram = np.zeros((n_cellsx, n_cellsy, orientations)) + for i in range(orientations): + # create new integral image for this orientation + # isolate orientations in this range + temp_ori = np.where(grad_ori < 180 / orientations * (i + 1), + grad_ori, 0) + temp_ori = np.where(grad_ori >= 180 / orientations * i, + temp_ori, 0) + # select magnitudes for those orientations + cond2 = temp_ori > 0 + temp_mag = np.where(cond2, grad_mag, 0) + orientation_histogram[:,:,i] = uniform_filter(temp_mag, size=(cx, cy))[round(cx/2)::cx, round(cy/2)::cy].T + + return orientation_histogram.ravel() + + +def color_histogram_hsv(im, nbin=10, xmin=0, xmax=255, normalized=True): + """ + Compute color histogram for an image using hue. + + Inputs: + - im: H x W x C array of pixel data for an RGB image. + - nbin: Number of histogram bins. (default: 10) + - xmin: Minimum pixel value (default: 0) + - xmax: Maximum pixel value (default: 255) + - normalized: Whether to normalize the histogram (default: True) + + Returns: + 1D vector of length nbin giving the color histogram over the hue of the + input image. + """ + ndim = im.ndim + bins = np.linspace(xmin, xmax, nbin+1) + hsv = matplotlib.colors.rgb_to_hsv(im/xmax) * xmax + imhist, bin_edges = np.histogram(hsv[:,:,0], bins=bins, density=normalized) + imhist = imhist * np.diff(bin_edges) + + # return histogram + return imhist diff --git a/assignment1/daseCV/gradient_check.py b/assignment1/daseCV/gradient_check.py new file mode 100644 index 0000000..e1189fc --- /dev/null +++ b/assignment1/daseCV/gradient_check.py @@ -0,0 +1,129 @@ +from __future__ import print_function +from builtins import range +from past.builtins import xrange + +import numpy as np +from random import randrange + +def eval_numerical_gradient(f, x, verbose=True, h=0.00001): + """ + a naive implementation of numerical gradient of f at x + - f should be a function that takes a single argument + - x is the point (numpy array) to evaluate the gradient at + """ + + fx = f(x) # evaluate function value at original point + grad = np.zeros_like(x) + # iterate over all indexes in x + it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) + while not it.finished: + + # evaluate function at x+h + ix = it.multi_index + oldval = x[ix] + x[ix] = oldval + h # increment by h + fxph = f(x) # evalute f(x + h) + x[ix] = oldval - h + fxmh = f(x) # evaluate f(x - h) + x[ix] = oldval # restore + + # compute the partial derivative with centered formula + grad[ix] = (fxph - fxmh) / (2 * h) # the slope + if verbose: + print(ix, grad[ix]) + it.iternext() # step to next dimension + + return grad + + +def eval_numerical_gradient_array(f, x, df, h=1e-5): + """ + Evaluate a numeric gradient for a function that accepts a numpy + array and returns a numpy array. + """ + grad = np.zeros_like(x) + it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite']) + while not it.finished: + ix = it.multi_index + + oldval = x[ix] + x[ix] = oldval + h + pos = f(x).copy() + x[ix] = oldval - h + neg = f(x).copy() + x[ix] = oldval + + grad[ix] = np.sum((pos - neg) * df) / (2 * h) + it.iternext() + return grad + + +def eval_numerical_gradient_blobs(f, inputs, output, h=1e-5): + """ + Compute numeric gradients for a function that operates on input + and output blobs. + + We assume that f accepts several input blobs as arguments, followed by a + blob where outputs will be written. For example, f might be called like: + + f(x, w, out) + + where x and w are input Blobs, and the result of f will be written to out. + + Inputs: + - f: function + - inputs: tuple of input blobs + - output: output blob + - h: step size + """ + numeric_diffs = [] + for input_blob in inputs: + diff = np.zeros_like(input_blob.diffs) + it = np.nditer(input_blob.vals, flags=['multi_index'], + op_flags=['readwrite']) + while not it.finished: + idx = it.multi_index + orig = input_blob.vals[idx] + + input_blob.vals[idx] = orig + h + f(*(inputs + (output,))) + pos = np.copy(output.vals) + input_blob.vals[idx] = orig - h + f(*(inputs + (output,))) + neg = np.copy(output.vals) + input_blob.vals[idx] = orig + + diff[idx] = np.sum((pos - neg) * output.diffs) / (2.0 * h) + + it.iternext() + numeric_diffs.append(diff) + return numeric_diffs + + +def eval_numerical_gradient_net(net, inputs, output, h=1e-5): + return eval_numerical_gradient_blobs(lambda *args: net.forward(), + inputs, output, h=h) + + +def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-5): + """ + sample a few random elements and only return numerical + in this dimensions. + """ + + for i in range(num_checks): + ix = tuple([randrange(m) for m in x.shape]) + + oldval = x[ix] + x[ix] = oldval + h # increment by h + fxph = f(x) # evaluate f(x + h) + x[ix] = oldval - h # increment by h + fxmh = f(x) # evaluate f(x - h) + x[ix] = oldval # reset + + grad_numerical = (fxph - fxmh) / (2 * h) + grad_analytic = analytic_grad[ix] + rel_error = (abs(grad_numerical - grad_analytic) / + (abs(grad_numerical) + abs(grad_analytic))) + print('numerical: %f analytic: %f, relative error: %e' + %(grad_numerical, grad_analytic, rel_error)) diff --git a/assignment1/daseCV/vis_utils.py b/assignment1/daseCV/vis_utils.py new file mode 100644 index 0000000..0aa42c0 --- /dev/null +++ b/assignment1/daseCV/vis_utils.py @@ -0,0 +1,73 @@ +from builtins import range +from past.builtins import xrange + +from math import sqrt, ceil +import numpy as np + +def visualize_grid(Xs, ubound=255.0, padding=1): + """ + Reshape a 4D tensor of image data to a grid for easy visualization. + + Inputs: + - Xs: Data of shape (N, H, W, C) + - ubound: Output grid will have values scaled to the range [0, ubound] + - padding: The number of blank pixels between elements of the grid + """ + (N, H, W, C) = Xs.shape + grid_size = int(ceil(sqrt(N))) + grid_height = H * grid_size + padding * (grid_size - 1) + grid_width = W * grid_size + padding * (grid_size - 1) + grid = np.zeros((grid_height, grid_width, C)) + next_idx = 0 + y0, y1 = 0, H + for y in range(grid_size): + x0, x1 = 0, W + for x in range(grid_size): + if next_idx < N: + img = Xs[next_idx] + low, high = np.min(img), np.max(img) + grid[y0:y1, x0:x1] = ubound * (img - low) / (high - low) + # grid[y0:y1, x0:x1] = Xs[next_idx] + next_idx += 1 + x0 += W + padding + x1 += W + padding + y0 += H + padding + y1 += H + padding + # grid_max = np.max(grid) + # grid_min = np.min(grid) + # grid = ubound * (grid - grid_min) / (grid_max - grid_min) + return grid + +def vis_grid(Xs): + """ visualize a grid of images """ + (N, H, W, C) = Xs.shape + A = int(ceil(sqrt(N))) + G = np.ones((A*H+A, A*W+A, C), Xs.dtype) + G *= np.min(Xs) + n = 0 + for y in range(A): + for x in range(A): + if n < N: + G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = Xs[n,:,:,:] + n += 1 + # normalize to [0,1] + maxg = G.max() + ming = G.min() + G = (G - ming)/(maxg-ming) + return G + +def vis_nn(rows): + """ visualize array of arrays of images """ + N = len(rows) + D = len(rows[0]) + H,W,C = rows[0][0].shape + Xs = rows[0][0] + G = np.ones((N*H+N, D*W+D, C), Xs.dtype) + for y in range(N): + for x in range(D): + G[y*H+y:(y+1)*H+y, x*W+x:(x+1)*W+x, :] = rows[y][x] + # normalize to [0,1] + maxg = G.max() + ming = G.min() + G = (G - ming)/(maxg-ming) + return G