+# from flask import Flask
+import pdf_to_pics,feature
+import os
+import uuid
+import platform
+from flask import Flask,request,redirect,url_for,Blueprint
+from werkzeug.utils import secure_filename
+from sim import compute_similarity
+if platform.system() == "Windows":
+ slash = '\\'
+ platform.system()=="Linux"
+ slash = '/'
+UPLOAD_FOLDER = 'upload'
+ALLOW_EXTENSIONS = set(['html', 'htm', 'doc', 'docx', 'mht', 'pdf'])
+app = Flask(__name__)
+if not os.path.exists(UPLOAD_FOLDER):
+ os.makedirs(UPLOAD_FOLDER)
+ pass
+# 判断文件后缀是否在列表中
+def allowed_file(filename):
+ return '.' in filename and \
+ filename.rsplit('.', 1)[1] in ALLOW_EXTENSIONS
+# 上传pdf并调用file2pic函数
+def upload_file():
+ if request.method =='POST':
+ #获取post过来的文件名称,从name=file参数中获取
+ file = request.files['file']
+ if file and allowed_file(file.filename):
+ # secure_filename方法会去掉文件名中的中文
+ filename = secure_filename(file.filename)
+ # 因为上次的文件可能有重名,因此使用uuid保存文件
+ file_name = str(uuid.uuid4()) + '.' + filename.rsplit('.', 1)[1]
+ file.save(os.path.join(app.config['UPLOAD_FOLDER'],file_name))
+ base_path = os.getcwd()
+ file_path = base_path + slash + app.config['UPLOAD_FOLDER'] + slash + file_name
+ print(file_path)
+ return redirect(url_for('file2pic',file = file_name))
+ # return redirect(url_for('test'))
+ # return redirect("http://e127.0.0.1:5000/")
+ return '''
Upload new File
+ Select PDF to Upload
+ '''
+# 调用pdf_to_pics.py中的类与函数,拆分图片
+def file2pic(file):
+ pics = pdf_to_pics.to_pics()
+ message = pics.call_pdf2pic(file)
+ print('file2pic')
+ return message
+# 调用sim/compute_similarity.py中的函数计算similarity
+def call_sim(path):
+ the_similarity = compute_similarity.simi()
+ message = the_similarity.similarity(path)
+ print("call_sim")
+ return message
+# @app.route('/tttt/')
+# def call_feature(alist):
+# print("call_feature")
+# the_feature = feature.feat()
+# message = the_feature.call_feature_extraction(alist)
+# # print("call_feature")
+# return message
+if __name__ == "__main__":
+ app.register_blueprint(pdf_to_pics.bp_2pics)
+ # app.run(host='', port=7000) # IP Port
+ app.run()
+from __future__ import print_function
+import cv2
+import numpy as np
+import os
+from flask import render_template
+import zipfile
+class feat:
+ def call_feature_extraction_1(self, folder_path, list, index, tmp):
+ # 按相似度排序
+ list = sorted(list, key=(lambda x: [x[2], x[5]]),reverse=True)
+ print(list)
+ print('---------------')
+ print(index)
+ for i in range(int(index/10)):
+ # 取第一组比较并返回
+ refFilename = list[i][0]
+ # imgname1 = '/home/Jupyterlab/wrl/pic/xiagao/pic/11.jpeg'
+ print("Reading reference image : ", refFilename)
+ imReference = cv2.imread(refFilename, cv2.IMREAD_COLOR)
+ imFilename = list[i][1]
+ # imgname2 = '/home/Jupyterlab/wrl/pic/xiagao/pic/12.jpeg'
+ print("Reading image to align : ", imFilename);
+ im = cv2.imread(imFilename, cv2.IMREAD_COLOR)
+ print(refFilename)
+ print(imFilename)
+ print(folder_path[7:])
+ p1 = refFilename.rfind('/')
+ name1 = refFilename[p1:-4]
+ print(name1)
+ p2 = imFilename.rfind('/')
+ name2 = imFilename[p2 + 1:]
+ print(name2)
+ # Write aligned image to disk.
+ outFilename = "output/" + folder_path[7:]
+ pre = os.getcwd()
+ print("Saving aligned image : ", outFilename)
+ print("Aligning images ...")
+ # Registered image will be resotred in imReg.
+ # The estimated homography will be stored in h.
+ imReg, h, img5 = feat().alignImages(im, imReference)
+ if (str(img5) == 'white'):
+ print('white')
+ # continue
+ else:
+ print(outFilename)
+ # imgwrite需要建好路径!!!!!!
+ if not os.path.exists(outFilename):
+ os.makedirs(outFilename)
+ outFilename1 = outFilename + name1 + name2
+ cv2.imwrite(outFilename1, img5)
+ # continue
+ outFullName = feat.zipDir(outFilename)
+ pre1 = "/home/wwwroot/default/" + outFilename + ".zip"
+ # return str(pre) + '/' + outFilename
+ # return pre1
+ return outFullName
+ def alignImages(self, im1, im2):
+ # Convert images to grayscale
+ im1Gray = cv2.cvtColor(im1, cv2.COLOR_BGR2GRAY)
+ im2Gray = cv2.cvtColor(im2, cv2.COLOR_BGR2GRAY)
+ # Detect ORB features and compute descriptors.
+ orb = cv2.ORB_create(MAX_MATCHES)
+ keypoints1, descriptors1 = orb.detectAndCompute(im1Gray, None)
+ keypoints2, descriptors2 = orb.detectAndCompute(im2Gray, None)
+ if keypoints1 and keypoints2:
+ # Match features.
+ matcher = cv2.DescriptorMatcher_create(cv2.DESCRIPTOR_MATCHER_BRUTEFORCE_HAMMING)
+ matches = matcher.match(descriptors1, descriptors2, None)
+ # Sort matches by score
+ matches.sort(key=lambda x: x.distance, reverse=False)
+ # Remove not so good matches
+ numGoodMatches = int(len(matches) * GOOD_MATCH_PERCENT)
+ matches = matches[:numGoodMatches]
+ # Draw top matches
+ imMatches = cv2.drawMatches(im1, keypoints1, im2, keypoints2, matches, None)
+ cv2.imwrite("matches.jpg", imMatches)
+ # Extract location of good matches
+ points1 = np.zeros((len(matches), 2), dtype=np.float32)
+ points2 = np.zeros((len(matches), 2), dtype=np.float32)
+ for i, match in enumerate(matches):
+ points1[i, :] = keypoints1[match.queryIdx].pt
+ points2[i, :] = keypoints2[match.trainIdx].pt
+ if (points1.size == 0) or (points2.size == 0):
+ return 'white', 'white', 'white'
+ # Find homography
+ h, mask = cv2.findHomography(points1, points2, cv2.RANSAC)
+ # Use homography
+ height, width, channels = im2.shape
+ im1Reg = cv2.warpPerspective(im1, h, (width, height))
+ img1 = cv2.drawKeypoints(im1, keypoints1, im1, color=(255, 0, 255))
+ img2 = cv2.drawKeypoints(im2, keypoints2, im2, color=(255, 0, 255))
+ img5 = cv2.drawMatches(img1, keypoints1, img2, keypoints2, matches, None, flags=2)
+ return im1Reg, h, img5
+ else:
+ return 'white', 'white', 'white'
+ def return_img_stream(self, img_local_path):
+ # """
+ # 工具函数:
+ # 获取本地图片流
+ # :param img_local_path:文件单张图片的本地绝对路径
+ # :return: 图片流
+ # """
+ import base64
+ img_stream = ''
+ with open(img_local_path, 'r') as img_f:
+ img_stream = img_f.read()
+ img_stream = base64.b64encode(img_stream)
+ return img_local_path
+ def zipDir(dirpath):
+ """
+ 压缩指定文件夹
+ :param dirpath: 目标文件夹路径
+ :param outFullName: 压缩文件保存路径+xxxx.zip
+ :return: 无
+ """
+ outFullName = "/home/wwwroot/default/" + dirpath[21:] + ".zip"
+ outFullName1 = "" + dirpath[21:] + ".zip"
+ # if not os.path.exists(outFullName):
+ # os.makedirs(outFullName)
+ zip = zipfile.ZipFile(outFullName, "w", zipfile.ZIP_DEFLATED)
+ for path, dirnames, filenames in os.walk(dirpath):
+ # 去掉目标跟路径,只对目标文件夹下边的文件及文件夹进行压缩
+ fpath = path.replace(dirpath, '')
+ for filename in filenames:
+ zip.write(os.path.join(path, filename), os.path.join(fpath, filename))
+ zip.close()
+ return outFullName1
+# im1 = '/Users/wrl/Desktop/test0506/pic/2303.png'
+# im2 = '/Users/wrl/Desktop/test0506/pic/2304.png'
+# img1 = cv2.imread(im1, cv2.IMREAD_COLOR)
+# img2 = cv2.imread(im2, cv2.IMREAD_COLOR)
+# imReg, h, img5 =feat().alignImages(img1,img2)
+import fitz
+import time
+import re
+import os
+from sim import compute_similarity
+from flask import redirect,url_for,Blueprint
+from PIL import Image
+import math
+import operator
+from functools import reduce
+bp_2pics = Blueprint("2pics", __name__,url_prefix="/2pics")
+# 这个bp似乎没啥用,,?
+class to_pics:
+ def pdf2pic(self, path, pic_path):
+ t0 = time.clock()
+ # 使用正则表达式来查找图片
+ checkXO = r"/Type(?= */XObject)"
+ checkIM = r"/Subtype(?= */Image)"
+ # 打开pdf
+ doc = fitz.open(path)
+ # 图片计数
+ imgcount = 0
+ lenXREF = doc._getXrefLength()
+ print(lenXREF)
+ # 打印PDF的信息
+ print("文件名:{}, 页数: {}, 对象: {}".format(path, len(doc), lenXREF - 1))
+ # 遍历每一个对象
+ for i in range(1, lenXREF):
+ # 定义对象字符串
+ text = doc._getXrefString(i)
+ isXObject = re.search(checkXO, text)
+ # 使用正则表达式查看是否是图片
+ isImage = re.search(checkIM, text)
+ # 如果不是对象也不是图片,则continue
+ if not isXObject or not isImage:
+ continue
+ imgcount += 1
+ # 根据索引生成图像
+ pix = fitz.Pixmap(doc, i)
+ # 根据pdf的路径生成图片的名称
+ # new_name = path.replace('\\', '_') + "_img{}.png".format(imgcount)
+ new_name = "img{}.png".format(imgcount)
+ new_name = new_name.replace(':', '')
+ # 如果pix.n<5,可以直接存为PNG
+ if pix.n < 5:
+ pix.writePNG(os.path.join(pic_path, new_name))
+ # 否则先转换CMYK
+ else:
+ pix0 = fitz.Pixmap(fitz.csRGB, pix)
+ pix0.writePNG(os.path.join(pic_path, new_name))
+ pix0 = None
+ # 释放资源
+ pix = None
+ t1 = time.clock()
+ print("运行时间:{}s".format(t1 - t0))
+ print("提取了{}张图片".format(imgcount))
+ return str(imgcount)
+ def call_pdf2pic(self, file):
+ # pdf路径
+ path = './upload/' + file
+ pic_path = path[:-4]
+ # 创建保存图片的文件夹
+ if os.path.exists(pic_path):
+ print("文件夹已存在,请重新创建新文件夹!")
+ return "already exist"
+ # raise SystemExit
+ else:
+ os.mkdir(pic_path)
+ print(pic_path)
+ m = to_pics().pdf2pic(path, pic_path)
+ # return m
+ return redirect(url_for('call_sim',path = file[:-4]))
+ # return redirect(url_for('call_sim', m=m, pic_path=pic_path))
+# @bp_2pics.route('/tt//')
+# def call_sim(m, pic_path):
+# message = app.similarity(m, pic_path)
+# print("call_sim")
+# return message