|
|
- from __future__ import print_function
- from builtins import range
- from past.builtins import xrange
-
- import numpy as np
- from random import randrange
-
- def eval_numerical_gradient(f, x, verbose=True, h=0.00001):
- """
- a naive implementation of numerical gradient of f at x
- - f should be a function that takes a single argument
- - x is the point (numpy array) to evaluate the gradient at
- """
-
- fx = f(x) # evaluate function value at original point
- grad = np.zeros_like(x)
- # iterate over all indexes in x
- it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
- while not it.finished:
-
- # evaluate function at x+h
- ix = it.multi_index
- oldval = x[ix]
- x[ix] = oldval + h # increment by h
- fxph = f(x) # evalute f(x + h)
- x[ix] = oldval - h
- fxmh = f(x) # evaluate f(x - h)
- x[ix] = oldval # restore
-
- # compute the partial derivative with centered formula
- grad[ix] = (fxph - fxmh) / (2 * h) # the slope
- if verbose:
- print(ix, grad[ix])
- it.iternext() # step to next dimension
-
- return grad
-
-
- def eval_numerical_gradient_array(f, x, df, h=1e-5):
- """
- Evaluate a numeric gradient for a function that accepts a numpy
- array and returns a numpy array.
- """
- grad = np.zeros_like(x)
- it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
- while not it.finished:
- ix = it.multi_index
-
- oldval = x[ix]
- x[ix] = oldval + h
- pos = f(x).copy()
- x[ix] = oldval - h
- neg = f(x).copy()
- x[ix] = oldval
-
- grad[ix] = np.sum((pos - neg) * df) / (2 * h)
- it.iternext()
- return grad
-
-
- def eval_numerical_gradient_blobs(f, inputs, output, h=1e-5):
- """
- Compute numeric gradients for a function that operates on input
- and output blobs.
-
- We assume that f accepts several input blobs as arguments, followed by a
- blob where outputs will be written. For example, f might be called like:
-
- f(x, w, out)
-
- where x and w are input Blobs, and the result of f will be written to out.
-
- Inputs:
- - f: function
- - inputs: tuple of input blobs
- - output: output blob
- - h: step size
- """
- numeric_diffs = []
- for input_blob in inputs:
- diff = np.zeros_like(input_blob.diffs)
- it = np.nditer(input_blob.vals, flags=['multi_index'],
- op_flags=['readwrite'])
- while not it.finished:
- idx = it.multi_index
- orig = input_blob.vals[idx]
-
- input_blob.vals[idx] = orig + h
- f(*(inputs + (output,)))
- pos = np.copy(output.vals)
- input_blob.vals[idx] = orig - h
- f(*(inputs + (output,)))
- neg = np.copy(output.vals)
- input_blob.vals[idx] = orig
-
- diff[idx] = np.sum((pos - neg) * output.diffs) / (2.0 * h)
-
- it.iternext()
- numeric_diffs.append(diff)
- return numeric_diffs
-
-
- def eval_numerical_gradient_net(net, inputs, output, h=1e-5):
- return eval_numerical_gradient_blobs(lambda *args: net.forward(),
- inputs, output, h=h)
-
-
- def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-5):
- """
- sample a few random elements and only return numerical
- in this dimensions.
- """
-
- for i in range(num_checks):
- ix = tuple([randrange(m) for m in x.shape])
-
- oldval = x[ix]
- x[ix] = oldval + h # increment by h
- fxph = f(x) # evaluate f(x + h)
- x[ix] = oldval - h # increment by h
- fxmh = f(x) # evaluate f(x - h)
- x[ix] = oldval # reset
-
- grad_numerical = (fxph - fxmh) / (2 * h)
- grad_analytic = analytic_grad[ix]
- rel_error = (abs(grad_numerical - grad_analytic) /
- (abs(grad_numerical) + abs(grad_analytic)))
- print('numerical: %f analytic: %f, relative error: %e'
- %(grad_numerical, grad_analytic, rel_error))
|