@ -0,0 +1,95 @@ | |||||
# from flask import Flask | |||||
import pdf_to_pics,feature | |||||
import os | |||||
import uuid | |||||
import platform | |||||
from flask import Flask,request,redirect,url_for,Blueprint | |||||
from werkzeug.utils import secure_filename | |||||
from sim import compute_similarity | |||||
if platform.system() == "Windows": | |||||
slash = '\\' | |||||
else: | |||||
platform.system()=="Linux" | |||||
slash = '/' | |||||
UPLOAD_FOLDER = 'upload' | |||||
ALLOW_EXTENSIONS = set(['html', 'htm', 'doc', 'docx', 'mht', 'pdf']) | |||||
app = Flask(__name__) | |||||
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER | |||||
#判断文件夹是否存在,如果不存在则创建 | |||||
if not os.path.exists(UPLOAD_FOLDER): | |||||
os.makedirs(UPLOAD_FOLDER) | |||||
else: | |||||
pass | |||||
# 判断文件后缀是否在列表中 | |||||
def allowed_file(filename): | |||||
return '.' in filename and \ | |||||
filename.rsplit('.', 1)[1] in ALLOW_EXTENSIONS | |||||
# 上传pdf并调用file2pic函数 | |||||
@app.route('/',methods=['GET','POST']) | |||||
def upload_file(): | |||||
if request.method =='POST': | |||||
#获取post过来的文件名称,从name=file参数中获取 | |||||
file = request.files['file'] | |||||
if file and allowed_file(file.filename): | |||||
# secure_filename方法会去掉文件名中的中文 | |||||
filename = secure_filename(file.filename) | |||||
# 因为上次的文件可能有重名,因此使用uuid保存文件 | |||||
file_name = str(uuid.uuid4()) + '.' + filename.rsplit('.', 1)[1] | |||||
file.save(os.path.join(app.config['UPLOAD_FOLDER'],file_name)) | |||||
base_path = os.getcwd() | |||||
file_path = base_path + slash + app.config['UPLOAD_FOLDER'] + slash + file_name | |||||
print(file_path) | |||||
return redirect(url_for('file2pic',file = file_name)) | |||||
# return redirect(url_for('test')) | |||||
# return redirect("http://e127.0.0.1:5000/") | |||||
return ''' | |||||
<!doctype html> | |||||
<title>Upload new File</title> | |||||
<h1>Select PDF to Upload</h1> | |||||
<form action="" method=post enctype=multipart/form-data> | |||||
<p><input type=file name=file> | |||||
<input type=submit value=Upload> | |||||
</form> | |||||
''' | |||||
# 调用pdf_to_pics.py中的类与函数,拆分图片 | |||||
@app.route('/tt/<file>') | |||||
def file2pic(file): | |||||
pics = pdf_to_pics.to_pics() | |||||
message = pics.call_pdf2pic(file) | |||||
print('file2pic') | |||||
return message | |||||
# 调用sim/compute_similarity.py中的函数计算similarity | |||||
@app.route('/ttt/<path>') | |||||
def call_sim(path): | |||||
the_similarity = compute_similarity.simi() | |||||
message = the_similarity.similarity(path) | |||||
print("call_sim") | |||||
return message | |||||
# @app.route('/tttt/<alist>') | |||||
# def call_feature(alist): | |||||
# print("call_feature") | |||||
# the_feature = feature.feat() | |||||
# message = the_feature.call_feature_extraction(alist) | |||||
# # print("call_feature") | |||||
# return message | |||||
if __name__ == "__main__": | |||||
app.register_blueprint(pdf_to_pics.bp_2pics) | |||||
# app.run(host='0.0.0.0', port=7000) # IP Port | |||||
app.run() | |||||
@ -0,0 +1,169 @@ | |||||
from __future__ import print_function | |||||
import cv2 | |||||
import numpy as np | |||||
import os | |||||
from flask import render_template | |||||
import zipfile | |||||
MAX_MATCHES = 500 | |||||
GOOD_MATCH_PERCENT = 0.15 | |||||
class feat: | |||||
def call_feature_extraction_1(self, folder_path, list, index, tmp): | |||||
# 按相似度排序 | |||||
list = sorted(list, key=(lambda x: [x[2], x[5]]),reverse=True) | |||||
print(list) | |||||
print('---------------') | |||||
print(index) | |||||
for i in range(int(index/10)): | |||||
# 取第一组比较并返回 | |||||
refFilename = list[i][0] | |||||
# imgname1 = '/home/Jupyterlab/wrl/pic/xiagao/pic/11.jpeg' | |||||
print("Reading reference image : ", refFilename) | |||||
imReference = cv2.imread(refFilename, cv2.IMREAD_COLOR) | |||||
imFilename = list[i][1] | |||||
# imgname2 = '/home/Jupyterlab/wrl/pic/xiagao/pic/12.jpeg' | |||||
print("Reading image to align : ", imFilename); | |||||
im = cv2.imread(imFilename, cv2.IMREAD_COLOR) | |||||
print(refFilename) | |||||
print(imFilename) | |||||
print(folder_path[7:]) | |||||
p1 = refFilename.rfind('/') | |||||
name1 = refFilename[p1:-4] | |||||
print(name1) | |||||
p2 = imFilename.rfind('/') | |||||
name2 = imFilename[p2 + 1:] | |||||
print(name2) | |||||
# Write aligned image to disk. | |||||
outFilename = "output/" + folder_path[7:] | |||||
pre = os.getcwd() | |||||
print("Saving aligned image : ", outFilename) | |||||
print("Aligning images ...") | |||||
# Registered image will be resotred in imReg. | |||||
# The estimated homography will be stored in h. | |||||
imReg, h, img5 = feat().alignImages(im, imReference) | |||||
if (str(img5) == 'white'): | |||||
print('white') | |||||
# continue | |||||
else: | |||||
print(outFilename) | |||||
# imgwrite需要建好路径!!!!!! | |||||
if not os.path.exists(outFilename): | |||||
os.makedirs(outFilename) | |||||
outFilename1 = outFilename + name1 + name2 | |||||
cv2.imwrite(outFilename1, img5) | |||||
# continue | |||||
outFullName = feat.zipDir(outFilename) | |||||
pre1 = "/home/wwwroot/default/" + outFilename + ".zip" | |||||
# return str(pre) + '/' + outFilename | |||||
# return pre1 | |||||
return outFullName | |||||
def alignImages(self, im1, im2): | |||||
# Convert images to grayscale | |||||
im1Gray = cv2.cvtColor(im1, cv2.COLOR_BGR2GRAY) | |||||
im2Gray = cv2.cvtColor(im2, cv2.COLOR_BGR2GRAY) | |||||
# Detect ORB features and compute descriptors. | |||||
orb = cv2.ORB_create(MAX_MATCHES) | |||||
keypoints1, descriptors1 = orb.detectAndCompute(im1Gray, None) | |||||
keypoints2, descriptors2 = orb.detectAndCompute(im2Gray, None) | |||||
if keypoints1 and keypoints2: | |||||
# Match features. | |||||
matcher = cv2.DescriptorMatcher_create(cv2.DESCRIPTOR_MATCHER_BRUTEFORCE_HAMMING) | |||||
matches = matcher.match(descriptors1, descriptors2, None) | |||||
# Sort matches by score | |||||
matches.sort(key=lambda x: x.distance, reverse=False) | |||||
# Remove not so good matches | |||||
numGoodMatches = int(len(matches) * GOOD_MATCH_PERCENT) | |||||
matches = matches[:numGoodMatches] | |||||
# Draw top matches | |||||
imMatches = cv2.drawMatches(im1, keypoints1, im2, keypoints2, matches, None) | |||||
cv2.imwrite("matches.jpg", imMatches) | |||||
# Extract location of good matches | |||||
points1 = np.zeros((len(matches), 2), dtype=np.float32) | |||||
points2 = np.zeros((len(matches), 2), dtype=np.float32) | |||||
for i, match in enumerate(matches): | |||||
points1[i, :] = keypoints1[match.queryIdx].pt | |||||
points2[i, :] = keypoints2[match.trainIdx].pt | |||||
if (points1.size == 0) or (points2.size == 0): | |||||
return 'white', 'white', 'white' | |||||
# Find homography | |||||
h, mask = cv2.findHomography(points1, points2, cv2.RANSAC) | |||||
# Use homography | |||||
height, width, channels = im2.shape | |||||
im1Reg = cv2.warpPerspective(im1, h, (width, height)) | |||||
img1 = cv2.drawKeypoints(im1, keypoints1, im1, color=(255, 0, 255)) | |||||
img2 = cv2.drawKeypoints(im2, keypoints2, im2, color=(255, 0, 255)) | |||||
img5 = cv2.drawMatches(img1, keypoints1, img2, keypoints2, matches, None, flags=2) | |||||
return im1Reg, h, img5 | |||||
else: | |||||
return 'white', 'white', 'white' | |||||
def return_img_stream(self, img_local_path): | |||||
# """ | |||||
# 工具函数: | |||||
# 获取本地图片流 | |||||
# :param img_local_path:文件单张图片的本地绝对路径 | |||||
# :return: 图片流 | |||||
# """ | |||||
import base64 | |||||
img_stream = '' | |||||
with open(img_local_path, 'r') as img_f: | |||||
img_stream = img_f.read() | |||||
img_stream = base64.b64encode(img_stream) | |||||
return img_local_path | |||||
def zipDir(dirpath): | |||||
""" | |||||
压缩指定文件夹 | |||||
:param dirpath: 目标文件夹路径 | |||||
:param outFullName: 压缩文件保存路径+xxxx.zip | |||||
:return: 无 | |||||
""" | |||||
outFullName = "/home/wwwroot/default/" + dirpath[21:] + ".zip" | |||||
outFullName1 = "http://106.75.226.23/" + dirpath[21:] + ".zip" | |||||
# if not os.path.exists(outFullName): | |||||
# os.makedirs(outFullName) | |||||
zip = zipfile.ZipFile(outFullName, "w", zipfile.ZIP_DEFLATED) | |||||
for path, dirnames, filenames in os.walk(dirpath): | |||||
# 去掉目标跟路径,只对目标文件夹下边的文件及文件夹进行压缩 | |||||
fpath = path.replace(dirpath, '') | |||||
for filename in filenames: | |||||
zip.write(os.path.join(path, filename), os.path.join(fpath, filename)) | |||||
zip.close() | |||||
return outFullName1 | |||||
# im1 = '/Users/wrl/Desktop/test0506/pic/2303.png' | |||||
# im2 = '/Users/wrl/Desktop/test0506/pic/2304.png' | |||||
# img1 = cv2.imread(im1, cv2.IMREAD_COLOR) | |||||
# img2 = cv2.imread(im2, cv2.IMREAD_COLOR) | |||||
# imReg, h, img5 =feat().alignImages(img1,img2) | |||||
@ -0,0 +1,91 @@ | |||||
import fitz | |||||
import time | |||||
import re | |||||
import os | |||||
from sim import compute_similarity | |||||
from flask import redirect,url_for,Blueprint | |||||
from PIL import Image | |||||
import math | |||||
import operator | |||||
from functools import reduce | |||||
bp_2pics = Blueprint("2pics", __name__,url_prefix="/2pics") | |||||
# 这个bp似乎没啥用,,? | |||||
class to_pics: | |||||
def pdf2pic(self, path, pic_path): | |||||
t0 = time.clock() | |||||
# 使用正则表达式来查找图片 | |||||
checkXO = r"/Type(?= */XObject)" | |||||
checkIM = r"/Subtype(?= */Image)" | |||||
# 打开pdf | |||||
doc = fitz.open(path) | |||||
# 图片计数 | |||||
imgcount = 0 | |||||
lenXREF = doc._getXrefLength() | |||||
print(lenXREF) | |||||
# 打印PDF的信息 | |||||
print("文件名:{}, 页数: {}, 对象: {}".format(path, len(doc), lenXREF - 1)) | |||||
# 遍历每一个对象 | |||||
for i in range(1, lenXREF): | |||||
# 定义对象字符串 | |||||
text = doc._getXrefString(i) | |||||
isXObject = re.search(checkXO, text) | |||||
# 使用正则表达式查看是否是图片 | |||||
isImage = re.search(checkIM, text) | |||||
# 如果不是对象也不是图片,则continue | |||||
if not isXObject or not isImage: | |||||
continue | |||||
imgcount += 1 | |||||
# 根据索引生成图像 | |||||
pix = fitz.Pixmap(doc, i) | |||||
# 根据pdf的路径生成图片的名称 | |||||
# new_name = path.replace('\\', '_') + "_img{}.png".format(imgcount) | |||||
new_name = "img{}.png".format(imgcount) | |||||
new_name = new_name.replace(':', '') | |||||
# 如果pix.n<5,可以直接存为PNG | |||||
if pix.n < 5: | |||||
pix.writePNG(os.path.join(pic_path, new_name)) | |||||
# 否则先转换CMYK | |||||
else: | |||||
pix0 = fitz.Pixmap(fitz.csRGB, pix) | |||||
pix0.writePNG(os.path.join(pic_path, new_name)) | |||||
pix0 = None | |||||
# 释放资源 | |||||
pix = None | |||||
t1 = time.clock() | |||||
print("运行时间:{}s".format(t1 - t0)) | |||||
print("提取了{}张图片".format(imgcount)) | |||||
return str(imgcount) | |||||
def call_pdf2pic(self, file): | |||||
# pdf路径 | |||||
path = './upload/' + file | |||||
pic_path = path[:-4] | |||||
# 创建保存图片的文件夹 | |||||
if os.path.exists(pic_path): | |||||
print("文件夹已存在,请重新创建新文件夹!") | |||||
return "already exist" | |||||
# raise SystemExit | |||||
else: | |||||
os.mkdir(pic_path) | |||||
print(pic_path) | |||||
m = to_pics().pdf2pic(path, pic_path) | |||||
# return m | |||||
return redirect(url_for('call_sim',path = file[:-4])) | |||||
# return redirect(url_for('call_sim', m=m, pic_path=pic_path)) | |||||
# @bp_2pics.route('/tt/<m>/<pic_path>') | |||||
# def call_sim(m, pic_path): | |||||
# message = app.similarity(m, pic_path) | |||||
# print("call_sim") | |||||
# return message | |||||