# coding=utf-8 import os import shutil import sys import time import cv2 import numpy as np import tensorflow as tf from main import preprocess import json import locale locale.setlocale(locale.LC_ALL, 'C') from scipy.misc import imread #current_directory = os.path.dirname(os.path.abspath(__file__)) #root_path = os.path.abspath(os.path.dirname(current_directory) + os.path.sep + ".") #sys.path.append(sys.path.append(os.getcwd())) sys.path.append(os.getcwd()) from nets import model_train as ctpnmodel from utils.rpn_msr.proposal_layer import proposal_layer from utils.text_connector.detectors import TextDetector from scipy.misc import imread import os from PIL import Image from model.img2seq import Img2SeqModel from model.utils.general import Config, run from model.utils.text import Vocab from model.utils.image import greyscale,predictsize tf.app.flags.DEFINE_string('test_data_path', '/app/image/1.png', '') tf.app.flags.DEFINE_string('output_path', '/app/im2latex_master/results/predict/', '') tf.app.flags.DEFINE_string('gpu', '0', '') tf.app.flags.DEFINE_string('checkpoint_path', '/app/im2latex_master/checkpoints_mlt/', '') FLAGS = tf.app.flags.FLAGS tf.app.flags.DEFINE_integer('language', '2', '') def get_images(): files = [] exts = ['jpg', 'png', 'jpeg', 'JPG'] for parent, dirnames, filenames in os.walk(FLAGS.test_data_path): for filename in filenames: for ext in exts: if filename.endswith(ext): files.append(os.path.join(parent, filename)) break print('Find {} images'.format(len(files))) return files def resize_image(img): img_size = img.shape im_size_min = np.min(img_size[0:2]) im_size_max = np.max(img_size[0:2]) im_scale = float(600) / float(im_size_min) if np.round(im_scale * im_size_max) > 1200: im_scale = float(1200) / float(im_size_max) new_h = int(img_size[0] * im_scale) new_w = int(img_size[1] * im_scale) new_h = new_h if new_h // 16 == 0 else (new_h // 16 + 1) * 16 new_w = new_w if new_w // 16 == 0 else (new_w // 16 + 1) * 16 re_im = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_LINEAR) #cv2.imshow("ss",img) #cv2.waitKey(0) return re_im, (new_h / img_size[0], new_w / img_size[1]) def get_box(): if os.path.exists(FLAGS.output_path): shutil.rmtree(FLAGS.output_path) os.makedirs(FLAGS.output_path) os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) bbox_pred, cls_pred, cls_prob = ctpnmodel.model(input_image,2) variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) # print('Restore from {}'.format(model_path)) sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) saver.restore(sess, model_path) dir_output = "/app/im2latex_master/results/full/" config_vocab = Config(dir_output + "vocab.json") config_model = Config(dir_output + "model.json") vocab = Vocab(config_vocab) model = Img2SeqModel(config_model, dir_output, vocab) model.build_pred() model.restore_session(dir_output + "model.weights4/test-model.ckpt") # print(FLAGS.test_data_path) img = cv2.imread(FLAGS.test_data_path)[:, :, ::-1] h, w, c = img.shape if h > 121: approx, image, (rh, rw) = preprocess.draw_rec(img) img = preprocess.Perspective(image, approx) img = cv2.resize(img, None, None, fx=1.0 / rw, fy=1.0 / rh, interpolation=cv2.INTER_LINEAR) #cv2.imshow("Dd",img) #cv2.waitKey(0) img, (rh, rw) = resize_image(img) h, w, c = img.shape im_info = np.array([h, w, c]).reshape([1, 3]) bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob], feed_dict={input_image: [img], input_im_info: im_info}) textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info, img) scores = textsegs[:, 0:2] # 改 textsegs = textsegs[:, 2:6] # 改 textdetector = TextDetector(DETECT_MODE='H') boxes = textdetector.detect(textsegs, scores, img.shape[:2], img) boxes = np.array(boxes, dtype=np.int) image_box = sorted(boxes, key=(lambda x: (x[1] + x[3], x[0] + x[6]))) for i, box in enumerate(image_box): cv2.polylines(img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0), thickness=2) img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR) cv2.imshow("ss",img) cv2.waitKey(0) return 0 def save_to_file(): if os.path.exists(FLAGS.output_path): shutil.rmtree(FLAGS.output_path) os.makedirs(FLAGS.output_path) os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) bbox_pred, cls_pred, cls_prob = ctpnmodel.model(input_image,2.0) variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) sess=tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) saver.restore(sess, model_path) dir_output = "/app/im2latex_master/results/full/" config_vocab = Config(dir_output + "vocab.json") config_model = Config(dir_output + "model.json") vocab = Vocab(config_vocab) #英文 config_vocab_en = Config(dir_output + "vocabe.json") vocab_en = Vocab(config_vocab_en) model_en = Img2SeqModel(config_model, dir_output, vocab_en) model_en.build_pred() model_en.restore_session(dir_output + "model.weights_en/test-model.ckpt") #print(FLAGS.test_data_path) img = imread(FLAGS.test_data_path) h, w, c = img.shape res = "" if h>40: approx, image, (rh, rw) = preprocess.draw_rec(img) img = preprocess.Perspective(image, approx) img = cv2.resize(img, None, None, fx=1.0 / rw, fy=1.0 / rh, interpolation=cv2.INTER_LINEAR) img, (rh, rw) = resize_image(img) h, w, c = img.shape im_info = np.array([h, w, c]).reshape([1, 3]) bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob], feed_dict={input_image: [img], input_im_info: im_info}) textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info,img) scores = textsegs[:, 0:2] # 改 textsegs = textsegs[:, 2:6] # 改 textdetector = TextDetector(DETECT_MODE='H') boxes = textdetector.detect(textsegs, scores, img.shape[:2],img) boxes = np.array(boxes, dtype=np.int) img2=img.copy() for i, box in enumerate(boxes): if box[8]==1: cv2.polylines(img2, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0), thickness=2) else: cv2.polylines(img2, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(255, 0, 0), thickness=2) img2 = cv2.resize(img2, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR) #cv2.imshow("ss", img2) #cv2.waitKey(0) for i,b in enumerate(boxes): lan=b[8] box = boxes[i] img0 = img[min(box[1], box[3]) - 1:max(box[5], box[7]) + 1, min(box[0], box[2]) - 1:max(box[4], box[6]) + 1, ::-1] #cv2.imshow("ss",img0) #cv2.waitKey(0) """ if lan == 2: img0 = predictsize(img0) #cv2.imshow("ss",img0) #cv2.waitKey(0) img0 = greyscale(img0) hyp = model.predict(img0) res = res + hyp[0] + "\n" model.logger.info(hyp[0]) else: """ img0 = predictsize(img0) #cv2.imshow("ss",img0) #cv2.waitKey(0) img0 = greyscale(img0) hyp = model_en.predict(img0) res = res + hyp[0] + "\n" model_en.logger.info(hyp[0]) #hyp=pytesseract.image_to_string(img0) #res = res + hyp + "\n" #model.logger.info(hyp) res = json.dumps({"res": res}) model_en.logger.info(res) else: #print(0) img = predictsize(img) img0 = greyscale(img) #cv2.imshow("ss", img0) #cv2.waitKey(0) hyps = model_en.predict(img0) res = res + hyps[0] + "\n" model_en.logger.info(hyps[0]) res = json.dumps({"res": res}) model_en.logger.info(res) return 0 ''' cv2.imwrite(os.path.join(FLAGS.output_path, str(i) +'.png'),img[min(box[1],box[3]):max(box[5],box[7]),min(box[0],box[2]) :max(box[4],box[6]), ::-1]) cv2.polylines(img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0), thickness=2) img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR) cv2.imwrite(os.path.join(FLAGS.output_path, os.path.basename(im_fn)), img[:, :, ::-1]) with open(os.path.join(FLAGS.output_path, os.path.splitext(os.path.basename(im_fn))[0]) + ".txt", "w") as f: for i, box in enumerate(boxes): line = ",".join(str(box[k]) for k in range(8)) line += "," + str(scores[i]) + "\r\n" f.writelines(line) ''' def main(argv=None): res=save_to_file() #res=get_box() return res if __name__ == '__main__': tf.app.run()