|
|
- #!/usr/bin/python3
- # -*- coding: utf-8 -*-
-
- # library modules
- from math import ceil
- import json
- import time
- import os
- import threading
-
- # External library modules
- import tensorflow as tf
- import numpy as np
-
- # local modules
- from data import LSVRC2010
- import logs
-
- class AlexNet:
- """
- A tensorflow implementation of the paper:
- `AlexNet <https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf>`_
- """
-
- def __init__(self, path, batch_size, resume):
- """
- Build the AlexNet model
- """
- self.logger = logs.get_logger()
-
- self.resume = resume
- self.path = path
- self.batch_size = batch_size
- self.lsvrc2010 = LSVRC2010(self.path, batch_size)
- self.num_classes = len(self.lsvrc2010.wnid2label)
-
- self.lr = 0.001
- self.momentum = 0.9
- self.lambd = tf.constant(0.0005, name='lambda')
- self.input_shape = (None, 227, 227, 3)
- self.output_shape = (None, self.num_classes)
-
- self.logger.info("Creating placeholders for graph...")
- self.create_tf_placeholders()
-
- self.logger.info("Creating variables for graph...")
- self.create_tf_variables()
-
- self.logger.info("Initialize hyper parameters...")
- self.hyper_param = {}
- self.init_hyper_param()
-
- def create_tf_placeholders(self):
- """
- Create placeholders for the graph.
- The input for these will be given while training or testing.
- """
- self.input_image = tf.placeholder(tf.float32, shape=self.input_shape,
- name='input_image')
- self.labels = tf.placeholder(tf.float32, shape=self.output_shape,
- name='output')
- self.learning_rate = tf.placeholder(tf.float32, shape=(),
- name='learning_rate')
- self.dropout = tf.placeholder(tf.float32, shape=(),
- name='dropout')
-
- def create_tf_variables(self):
- """
- Create variables for epoch, batch and global step
- """
- self.global_step = tf.Variable(0, name='global_step', trainable=False)
- self.cur_epoch = tf.Variable(0, name='epoch', trainable=False)
- self.cur_batch = tf.Variable(0, name='batch', trainable=False)
-
- self.increment_epoch_op = tf.assign(self.cur_epoch, self.cur_epoch+1)
- self.increment_batch_op = tf.assign(self.cur_batch, self.cur_batch+1)
- self.init_batch_op = tf.assign(self.cur_batch, 0)
-
- def init_hyper_param(self):
- """
- Store the hyper parameters.
- For each layer store number of filters(kernels)
- and filter size.
- If it's a fully connected layer then store the number of neurons.
- """
- with open('hparam.json') as f:
- self.hyper_param = json.load(f)
-
- def get_filter(self, layer_num, layer_name):
- """
- :param layer_num: Indicates the layer number in the graph
- :type layer_num: int
- :param layer_name: Name of the filter
- """
- layer = 'L' + str(layer_num)
-
- filter_height, filter_width, in_channels = self.hyper_param[layer]['filter_size']
- out_channels = self.hyper_param[layer]['filters']
-
- return tf.Variable(tf.truncated_normal(
- [filter_height, filter_width, in_channels, out_channels],
- dtype = tf.float32, stddev = 1e-2), name = layer_name)
-
- def get_strides(self, layer_num):
- """
- :param layer_num: Indicates the layer number in the graph
- :type layer_num: int
- """
- layer = 'L' + str(layer_num)
-
- stride = self.hyper_param[layer]['stride']
- strides = [1, stride, stride, 1]
-
- return strides
-
- def get_bias(self, layer_num, value=0.0):
- """
- Get the bias variable for current layer
-
- :param layer_num: Indicates the layer number in the graph
- :type layer_num: int
- """
- layer = 'L' + str(layer_num)
- initial = tf.constant(value,
- shape=[self.hyper_param[layer]['filters']],
- name='C' + str(layer_num))
- return tf.Variable(initial, name='B' + str(layer_num))
-
- @property
- def l2_loss(self):
- """
- Compute the l2 loss for all the weights
- """
- conv_bias_names = ['B' + str(i) for i in range(1, 6)]
- weights = []
- for v in tf.trainable_variables():
- if 'biases' in v.name: continue
- if v.name.split(':')[0] in conv_bias_names: continue
- weights.append(v)
-
- return self.lambd * tf.add_n([tf.nn.l2_loss(weight) for weight in weights])
-
- def build_graph(self):
- """
- Build the tensorflow graph for AlexNet.
-
- First 5 layers are Convolutional layers. Out of which
- first 2 and last layer will be followed by *max pooling*
- layers.
-
- Next 2 layers are fully connected layers.
-
- L1_conv -> L1_MP -> L2_conv -> L2_MP -> L3_conv
- -> L4_conv -> L5_conv -> L5_MP -> L6_FC -> L7_FC
-
- Where L1_conv -> Convolutional layer 1
- L5_MP -> Max pooling layer 5
- L7_FC -> Fully Connected layer 7
-
- Use `tf.nn.conv2d` to initialize the filters so
- as to reduce training time and `tf.layers.max_pooling2d`
- as we don't need to initialize in the pooling layer.
- """
- # Layer 1 Convolutional layer
- filter1 = self.get_filter(1, 'L1_filter')
- l1_conv = tf.nn.conv2d(self.input_image, filter1,
- self.get_strides(1),
- padding = self.hyper_param['L1']['padding'],
- name='L1_conv')
- l1_conv = tf.add(l1_conv, self.get_bias(1))
- l1_conv = tf.nn.local_response_normalization(l1_conv,
- depth_radius=5,
- bias=2,
- alpha=1e-4,
- beta=.75)
- l1_conv = tf.nn.relu(l1_conv)
-
- # Layer 1 Max Pooling layer
- l1_MP = tf.layers.max_pooling2d(l1_conv,
- self.hyper_param['L1_MP']['filter_size'],
- self.hyper_param['L1_MP']['stride'],
- name='L1_MP')
-
- # Layer 2 Convolutional layer
- filter2 = self.get_filter(2, 'L2_filter')
- l2_conv = tf.nn.conv2d(l1_MP, filter2,
- self.get_strides(2),
- padding = self.hyper_param['L2']['padding'],
- name='L2_conv')
- l2_conv = tf.add(l2_conv, self.get_bias(2, 1.0))
- l2_conv = tf.nn.local_response_normalization(l2_conv,
- depth_radius=5,
- bias=2,
- alpha=1e-4,
- beta=.75)
- l2_conv = tf.nn.relu(l2_conv)
-
- # Layer 2 Max Pooling layer
- l2_MP = tf.layers.max_pooling2d(l2_conv,
- self.hyper_param['L2_MP']['filter_size'],
- self.hyper_param['L2_MP']['stride'],
- name='L2_MP')
-
- # Layer 3 Convolutional layer
- filter3 = self.get_filter(3, 'L3_filter')
- l3_conv = tf.nn.conv2d(l2_MP, filter3,
- self.get_strides(3),
- padding = self.hyper_param['L3']['padding'],
- name='L3_conv')
- l3_conv = tf.add(l3_conv, self.get_bias(3))
- l3_conv = tf.nn.relu(l3_conv)
-
- # Layer 4 Convolutional layer
- filter4 = self.get_filter(4, 'L4_filter')
- l4_conv = tf.nn.conv2d(l3_conv, filter4,
- self.get_strides(4),
- padding = self.hyper_param['L4']['padding'],
- name='L4_conv')
- l4_conv = tf.add(l4_conv, self.get_bias(4, 1.0))
- l4_conv = tf.nn.relu(l4_conv)
-
- # Layer 5 Convolutional layer
- filter5 = self.get_filter(5, 'L5_filter')
- l5_conv = tf.nn.conv2d(l4_conv, filter5,
- self.get_strides(5),
- padding = self.hyper_param['L5']['padding'],
- name='L5_conv')
- l5_conv = tf.add(l5_conv, self.get_bias(5, 1.0))
- l5_conv = tf.nn.relu(l5_conv)
-
- # Layer 5 Max Pooling layer
- l5_MP = tf.layers.max_pooling2d(l5_conv,
- self.hyper_param['L5_MP']['filter_size'],
- self.hyper_param['L5_MP']['stride'],
- name='L5_MP')
-
- flatten = tf.layers.flatten(l5_MP)
-
- # Layer 6 Fully connected layer
- l6_FC = tf.contrib.layers.fully_connected(flatten,
- self.hyper_param['FC6'])
-
- # Dropout layer
- l6_dropout = tf.nn.dropout(l6_FC, self.dropout,
- name='l6_dropout')
-
- # Layer 7 Fully connected layer
- self.l7_FC = tf.contrib.layers.fully_connected(l6_dropout,
- self.hyper_param['FC7'])
-
- # Dropout layer
- l7_dropout = tf.nn.dropout(self.l7_FC, self.dropout,
- name='l7_dropout')
-
- # final layer before softmax
- self.logits = tf.contrib.layers.fully_connected(l7_dropout,
- self.num_classes, None)
-
- # loss function
- loss_function = tf.nn.softmax_cross_entropy_with_logits(
- logits = self.logits,
- labels = self.labels
- )
-
- # total loss
- self.loss = tf.reduce_mean(loss_function) + self.l2_loss
-
- self.optimizer = tf.train.MomentumOptimizer(self.learning_rate, momentum=self.momentum)\
- .minimize(self.loss, global_step=self.global_step)
-
- correct = tf.equal(tf.argmax(self.logits, 1), tf.argmax(self.labels, 1))
- self.accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
-
- self.top5_correct = tf.nn.in_top_k(self.logits, tf.argmax(self.labels, 1), 5)
- self.top5_accuracy = tf.reduce_mean(tf.cast(self.top5_correct, tf.float32))
-
- self.add_summaries()
-
- def add_summaries(self):
- """
- Add summaries for loss, top1 and top5 accuracies
-
- Add loss, top1 and top5 accuracies to summary files
- in order to visualize in tensorboard
- """
- tf.summary.scalar('loss', self.loss)
- tf.summary.scalar('Top-1-Acc', self.accuracy)
- tf.summary.scalar('Top-5-Acc', self.top5_accuracy)
-
- self.merged = tf.summary.merge_all()
-
- def save_model(self, sess, saver):
- """
- Save the current model
-
- :param sess: Session object
- :param saver: Saver object responsible to store
- """
- model_base_path = os.path.join(os.getcwd(), 'model')
- if not os.path.exists(model_base_path):
- os.mkdir(model_base_path)
- model_save_path = os.path.join(os.getcwd(), 'model', 'model.ckpt')
- save_path = saver.save(sess, model_save_path)
- self.logger.info("Model saved in path: %s", save_path)
-
- def restore_model(self, sess, saver):
- """
- Restore previously saved model
-
- :param sess: Session object
- :param saver: Saver object responsible to store
- """
- model_base_path = os.path.join(os.getcwd(), 'model')
- model_restore_path = os.path.join(os.getcwd(), 'model', 'model.ckpt')
- saver.restore(sess, model_restore_path)
- self.logger.info("Model Restored from path: %s",
- model_restore_path)
-
- def get_summary_writer(self, sess):
- """
- Get summary writer for training and validation
-
- Responsible for creating summary writer so it can
- write summaries to a file so it can be read by
- tensorboard later.
- """
- if not os.path.exists(os.path.join('summary', 'train')):
- os.makedirs(os.path.join('summary', 'train'))
- if not os.path.exists(os.path.join('summary', 'val')):
- os.makedirs(os.path.join('summary', 'val'))
- return (tf.summary.FileWriter(os.path.join(os.getcwd(),
- 'summary', 'train'),
- sess.graph),
- tf.summary.FileWriter(os.path.join(os.getcwd(),
- 'summary', 'val'),
- sess.graph))
-
- def train(self, epochs, thread='false'):
- """
- Train AlexNet.
- """
- batch_step, val_step = 10, 500
-
- self.logger.info("Building the graph...")
- self.build_graph()
-
- init = tf.global_variables_initializer()
-
- saver = tf.train.Saver()
- with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
- (summary_writer_train,
- summary_writer_val) = self.get_summary_writer(sess)
- if self.resume and os.path.exists(os.path.join(os.getcwd(),
- 'model')):
- self.restore_model(sess, saver)
- else:
- sess.run(init)
-
- resume_batch = True
- best_loss = float('inf')
- while sess.run(self.cur_epoch) < epochs:
- losses = []
- accuracies = []
-
- epoch = sess.run(self.cur_epoch)
- if not self.resume or (
- self.resume and not resume_batch):
- sess.run(self.init_batch_op)
- resume_batch = False
- start = time.time()
- gen_batch = self.lsvrc2010.gen_batch
- for images, labels in gen_batch:
- batch_i = sess.run(self.cur_batch)
- # If it's resumed from stored model,
- # this will save from messing up the batch number
- # in subsequent epoch
- if batch_i >= ceil(len(self.lsvrc2010.image_names) / self.batch_size):
- break
- (_, global_step,
- _) = sess.run([self.optimizer,
- self.global_step, self.increment_batch_op],
- feed_dict = {
- self.input_image: images,
- self.labels: labels,
- self.learning_rate: self.lr,
- self.dropout: 0.5
- })
-
- if global_step == 150000:
- self.lr = 0.0001 # Halve the learning rate
-
- if batch_i % batch_step == 0:
- (summary, loss, acc, top5_acc, _top5,
- logits, l7_FC) = sess.run([self.merged, self.loss,
- self.accuracy, self.top5_accuracy,
- self.top5_correct,
- self.logits, self.l7_FC],
- feed_dict = {
- self.input_image: images,
- self.labels: labels,
- self.learning_rate: self.lr,
- self.dropout: 1.0
- })
- losses.append(loss)
- accuracies.append(acc)
- summary_writer_train.add_summary(summary, global_step)
- summary_writer_train.flush()
- end = time.time()
- try:
- self.logger.debug("l7 no of non zeros: %d", np.count_nonzero(l7_FC))
- true_idx = np.where(_top5[0]==True)[0][0]
- self.logger.debug("logit at %d: %s", true_idx,
- str(logits[true_idx]))
- except IndexError as ie:
- self.logger.debug(ie)
- self.logger.info("Time: %f Epoch: %d Batch: %d Loss: %f "
- "Avg loss: %f Accuracy: %f Avg Accuracy: %f "
- "Top 5 Accuracy: %f",
- end - start, epoch, batch_i,
- loss, sum(losses) / len(losses),
- acc, sum(accuracies) / len(accuracies),
- top5_acc)
- start = time.time()
-
- if batch_i % val_step == 0:
- images_val, labels_val = self.lsvrc2010.get_batch_val
- (summary, acc, top5_acc,
- loss) = sess.run([self.merged,
- self.accuracy,
- self.top5_accuracy, self.loss],
- feed_dict = {
- self.input_image: images_val,
- self.labels: labels_val,
- self.learning_rate: self.lr,
- self.dropout: 1.0
- })
- summary_writer_val.add_summary(summary, global_step)
- summary_writer_val.flush()
- self.logger.info("Validation - Accuracy: %f Top 5 Accuracy: %f Loss: %f",
- acc, top5_acc, loss)
-
- cur_loss = sum(losses) / len(losses)
- if cur_loss < best_loss:
- best_loss = cur_loss
- self.save_model(sess, saver)
-
- # Increase epoch number
- sess.run(self.increment_epoch_op)
-
- def test(self):
- step = 10
-
- self.logger_test = logs.get_logger('AlexNetTest', file_name='logs_test.log')
- self.logger_test.info("In Test: Building the graph...")
- self.build_graph()
-
- init = tf.global_variables_initializer()
-
- saver = tf.train.Saver()
- top1_count, top5_count, count = 0, 0, 0
- with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
- self.restore_model(sess, saver)
-
- start = time.time()
- batch = self.lsvrc2010.gen_batch_test
- for i, (patches, labels) in enumerate(batch):
- count += patches[0].shape[0]
- avg_logits = np.zeros((patches[0].shape[0], self.num_classes))
- for patch in patches:
- logits = sess.run(self.logits,
- feed_dict = {
- self.input_image: patch,
- self.dropout: 1.0
- })
- avg_logits += logits
- avg_logits /= len(patches)
- top1_count += np.sum(np.argmax(avg_logits, 1) == labels)
- top5_count += np.sum(avg_logits.argsort()[:, -5:] == \
- np.repeat(labels, 5).reshape(patches[0].shape[0], 5))
-
- if i % step == 0:
- end = time.time()
- self.logger_test.info("Time: %f Step: %d "
- "Avg Accuracy: %f "
- "Avg Top 5 Accuracy: %f",
- end - start, i,
- top1_count / count,
- top5_count / count)
- start = time.time()
-
- self.logger_test.info("Final - Avg Accuracy: %f "
- "Avg Top 5 Accuracy: %f",
- top1_count / count,
- top5_count / count)
-
- if __name__ == '__main__':
- import argparse
- parser = argparse.ArgumentParser()
- parser.add_argument('image_path', metavar = 'image-path',
- help = 'ImageNet dataset path')
- parser.add_argument('--resume', metavar='resume',
- type=lambda x: x != 'False', default=True,
- required=False,
- help='Resume training (True or False)')
- parser.add_argument('--train', help='Train AlexNet')
- parser.add_argument('--test', help='Test AlexNet')
- args = parser.parse_args()
-
- alexnet = AlexNet(args.image_path, batch_size=128, resume=args.resume)
-
- if args.train == 'true':
- alexnet.train(50)
- elif args.test == 'true':
- alexnet.test()
|