@ -0,0 +1,95 @@ | |||
# from flask import Flask | |||
import pdf_to_pics,feature | |||
import os | |||
import uuid | |||
import platform | |||
from flask import Flask,request,redirect,url_for,Blueprint | |||
from werkzeug.utils import secure_filename | |||
from sim import compute_similarity | |||
if platform.system() == "Windows": | |||
slash = '\\' | |||
else: | |||
platform.system()=="Linux" | |||
slash = '/' | |||
UPLOAD_FOLDER = 'upload' | |||
ALLOW_EXTENSIONS = set(['html', 'htm', 'doc', 'docx', 'mht', 'pdf']) | |||
app = Flask(__name__) | |||
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER | |||
#判断文件夹是否存在,如果不存在则创建 | |||
if not os.path.exists(UPLOAD_FOLDER): | |||
os.makedirs(UPLOAD_FOLDER) | |||
else: | |||
pass | |||
# 判断文件后缀是否在列表中 | |||
def allowed_file(filename): | |||
return '.' in filename and \ | |||
filename.rsplit('.', 1)[1] in ALLOW_EXTENSIONS | |||
# 上传pdf并调用file2pic函数 | |||
@app.route('/',methods=['GET','POST']) | |||
def upload_file(): | |||
if request.method =='POST': | |||
#获取post过来的文件名称,从name=file参数中获取 | |||
file = request.files['file'] | |||
if file and allowed_file(file.filename): | |||
# secure_filename方法会去掉文件名中的中文 | |||
filename = secure_filename(file.filename) | |||
# 因为上次的文件可能有重名,因此使用uuid保存文件 | |||
file_name = str(uuid.uuid4()) + '.' + filename.rsplit('.', 1)[1] | |||
file.save(os.path.join(app.config['UPLOAD_FOLDER'],file_name)) | |||
base_path = os.getcwd() | |||
file_path = base_path + slash + app.config['UPLOAD_FOLDER'] + slash + file_name | |||
print(file_path) | |||
return redirect(url_for('file2pic',file = file_name)) | |||
# return redirect(url_for('test')) | |||
# return redirect("http://e127.0.0.1:5000/") | |||
return ''' | |||
<!doctype html> | |||
<title>Upload new File</title> | |||
<h1>Select PDF to Upload</h1> | |||
<form action="" method=post enctype=multipart/form-data> | |||
<p><input type=file name=file> | |||
<input type=submit value=Upload> | |||
</form> | |||
''' | |||
# 调用pdf_to_pics.py中的类与函数,拆分图片 | |||
@app.route('/tt/<file>') | |||
def file2pic(file): | |||
pics = pdf_to_pics.to_pics() | |||
message = pics.call_pdf2pic(file) | |||
print('file2pic') | |||
return message | |||
# 调用sim/compute_similarity.py中的函数计算similarity | |||
@app.route('/ttt/<path>') | |||
def call_sim(path): | |||
the_similarity = compute_similarity.simi() | |||
message = the_similarity.similarity(path) | |||
print("call_sim") | |||
return message | |||
# @app.route('/tttt/<alist>') | |||
# def call_feature(alist): | |||
# print("call_feature") | |||
# the_feature = feature.feat() | |||
# message = the_feature.call_feature_extraction(alist) | |||
# # print("call_feature") | |||
# return message | |||
if __name__ == "__main__": | |||
app.register_blueprint(pdf_to_pics.bp_2pics) | |||
# app.run(host='0.0.0.0', port=7000) # IP Port | |||
app.run() | |||
@ -0,0 +1,169 @@ | |||
from __future__ import print_function | |||
import cv2 | |||
import numpy as np | |||
import os | |||
from flask import render_template | |||
import zipfile | |||
MAX_MATCHES = 500 | |||
GOOD_MATCH_PERCENT = 0.15 | |||
class feat: | |||
def call_feature_extraction_1(self, folder_path, list, index, tmp): | |||
# 按相似度排序 | |||
list = sorted(list, key=(lambda x: [x[2], x[5]]),reverse=True) | |||
print(list) | |||
print('---------------') | |||
print(index) | |||
for i in range(int(index/10)): | |||
# 取第一组比较并返回 | |||
refFilename = list[i][0] | |||
# imgname1 = '/home/Jupyterlab/wrl/pic/xiagao/pic/11.jpeg' | |||
print("Reading reference image : ", refFilename) | |||
imReference = cv2.imread(refFilename, cv2.IMREAD_COLOR) | |||
imFilename = list[i][1] | |||
# imgname2 = '/home/Jupyterlab/wrl/pic/xiagao/pic/12.jpeg' | |||
print("Reading image to align : ", imFilename); | |||
im = cv2.imread(imFilename, cv2.IMREAD_COLOR) | |||
print(refFilename) | |||
print(imFilename) | |||
print(folder_path[7:]) | |||
p1 = refFilename.rfind('/') | |||
name1 = refFilename[p1:-4] | |||
print(name1) | |||
p2 = imFilename.rfind('/') | |||
name2 = imFilename[p2 + 1:] | |||
print(name2) | |||
# Write aligned image to disk. | |||
outFilename = "output/" + folder_path[7:] | |||
pre = os.getcwd() | |||
print("Saving aligned image : ", outFilename) | |||
print("Aligning images ...") | |||
# Registered image will be resotred in imReg. | |||
# The estimated homography will be stored in h. | |||
imReg, h, img5 = feat().alignImages(im, imReference) | |||
if (str(img5) == 'white'): | |||
print('white') | |||
# continue | |||
else: | |||
print(outFilename) | |||
# imgwrite需要建好路径!!!!!! | |||
if not os.path.exists(outFilename): | |||
os.makedirs(outFilename) | |||
outFilename1 = outFilename + name1 + name2 | |||
cv2.imwrite(outFilename1, img5) | |||
# continue | |||
outFullName = feat.zipDir(outFilename) | |||
pre1 = "/home/wwwroot/default/" + outFilename + ".zip" | |||
# return str(pre) + '/' + outFilename | |||
# return pre1 | |||
return outFullName | |||
def alignImages(self, im1, im2): | |||
# Convert images to grayscale | |||
im1Gray = cv2.cvtColor(im1, cv2.COLOR_BGR2GRAY) | |||
im2Gray = cv2.cvtColor(im2, cv2.COLOR_BGR2GRAY) | |||
# Detect ORB features and compute descriptors. | |||
orb = cv2.ORB_create(MAX_MATCHES) | |||
keypoints1, descriptors1 = orb.detectAndCompute(im1Gray, None) | |||
keypoints2, descriptors2 = orb.detectAndCompute(im2Gray, None) | |||
if keypoints1 and keypoints2: | |||
# Match features. | |||
matcher = cv2.DescriptorMatcher_create(cv2.DESCRIPTOR_MATCHER_BRUTEFORCE_HAMMING) | |||
matches = matcher.match(descriptors1, descriptors2, None) | |||
# Sort matches by score | |||
matches.sort(key=lambda x: x.distance, reverse=False) | |||
# Remove not so good matches | |||
numGoodMatches = int(len(matches) * GOOD_MATCH_PERCENT) | |||
matches = matches[:numGoodMatches] | |||
# Draw top matches | |||
imMatches = cv2.drawMatches(im1, keypoints1, im2, keypoints2, matches, None) | |||
cv2.imwrite("matches.jpg", imMatches) | |||
# Extract location of good matches | |||
points1 = np.zeros((len(matches), 2), dtype=np.float32) | |||
points2 = np.zeros((len(matches), 2), dtype=np.float32) | |||
for i, match in enumerate(matches): | |||
points1[i, :] = keypoints1[match.queryIdx].pt | |||
points2[i, :] = keypoints2[match.trainIdx].pt | |||
if (points1.size == 0) or (points2.size == 0): | |||
return 'white', 'white', 'white' | |||
# Find homography | |||
h, mask = cv2.findHomography(points1, points2, cv2.RANSAC) | |||
# Use homography | |||
height, width, channels = im2.shape | |||
im1Reg = cv2.warpPerspective(im1, h, (width, height)) | |||
img1 = cv2.drawKeypoints(im1, keypoints1, im1, color=(255, 0, 255)) | |||
img2 = cv2.drawKeypoints(im2, keypoints2, im2, color=(255, 0, 255)) | |||
img5 = cv2.drawMatches(img1, keypoints1, img2, keypoints2, matches, None, flags=2) | |||
return im1Reg, h, img5 | |||
else: | |||
return 'white', 'white', 'white' | |||
def return_img_stream(self, img_local_path): | |||
# """ | |||
# 工具函数: | |||
# 获取本地图片流 | |||
# :param img_local_path:文件单张图片的本地绝对路径 | |||
# :return: 图片流 | |||
# """ | |||
import base64 | |||
img_stream = '' | |||
with open(img_local_path, 'r') as img_f: | |||
img_stream = img_f.read() | |||
img_stream = base64.b64encode(img_stream) | |||
return img_local_path | |||
def zipDir(dirpath): | |||
""" | |||
压缩指定文件夹 | |||
:param dirpath: 目标文件夹路径 | |||
:param outFullName: 压缩文件保存路径+xxxx.zip | |||
:return: 无 | |||
""" | |||
outFullName = "/home/wwwroot/default/" + dirpath[21:] + ".zip" | |||
outFullName1 = "http://106.75.226.23/" + dirpath[21:] + ".zip" | |||
# if not os.path.exists(outFullName): | |||
# os.makedirs(outFullName) | |||
zip = zipfile.ZipFile(outFullName, "w", zipfile.ZIP_DEFLATED) | |||
for path, dirnames, filenames in os.walk(dirpath): | |||
# 去掉目标跟路径,只对目标文件夹下边的文件及文件夹进行压缩 | |||
fpath = path.replace(dirpath, '') | |||
for filename in filenames: | |||
zip.write(os.path.join(path, filename), os.path.join(fpath, filename)) | |||
zip.close() | |||
return outFullName1 | |||
# im1 = '/Users/wrl/Desktop/test0506/pic/2303.png' | |||
# im2 = '/Users/wrl/Desktop/test0506/pic/2304.png' | |||
# img1 = cv2.imread(im1, cv2.IMREAD_COLOR) | |||
# img2 = cv2.imread(im2, cv2.IMREAD_COLOR) | |||
# imReg, h, img5 =feat().alignImages(img1,img2) | |||
@ -0,0 +1,91 @@ | |||
import fitz | |||
import time | |||
import re | |||
import os | |||
from sim import compute_similarity | |||
from flask import redirect,url_for,Blueprint | |||
from PIL import Image | |||
import math | |||
import operator | |||
from functools import reduce | |||
bp_2pics = Blueprint("2pics", __name__,url_prefix="/2pics") | |||
# 这个bp似乎没啥用,,? | |||
class to_pics: | |||
def pdf2pic(self, path, pic_path): | |||
t0 = time.clock() | |||
# 使用正则表达式来查找图片 | |||
checkXO = r"/Type(?= */XObject)" | |||
checkIM = r"/Subtype(?= */Image)" | |||
# 打开pdf | |||
doc = fitz.open(path) | |||
# 图片计数 | |||
imgcount = 0 | |||
lenXREF = doc._getXrefLength() | |||
print(lenXREF) | |||
# 打印PDF的信息 | |||
print("文件名:{}, 页数: {}, 对象: {}".format(path, len(doc), lenXREF - 1)) | |||
# 遍历每一个对象 | |||
for i in range(1, lenXREF): | |||
# 定义对象字符串 | |||
text = doc._getXrefString(i) | |||
isXObject = re.search(checkXO, text) | |||
# 使用正则表达式查看是否是图片 | |||
isImage = re.search(checkIM, text) | |||
# 如果不是对象也不是图片,则continue | |||
if not isXObject or not isImage: | |||
continue | |||
imgcount += 1 | |||
# 根据索引生成图像 | |||
pix = fitz.Pixmap(doc, i) | |||
# 根据pdf的路径生成图片的名称 | |||
# new_name = path.replace('\\', '_') + "_img{}.png".format(imgcount) | |||
new_name = "img{}.png".format(imgcount) | |||
new_name = new_name.replace(':', '') | |||
# 如果pix.n<5,可以直接存为PNG | |||
if pix.n < 5: | |||
pix.writePNG(os.path.join(pic_path, new_name)) | |||
# 否则先转换CMYK | |||
else: | |||
pix0 = fitz.Pixmap(fitz.csRGB, pix) | |||
pix0.writePNG(os.path.join(pic_path, new_name)) | |||
pix0 = None | |||
# 释放资源 | |||
pix = None | |||
t1 = time.clock() | |||
print("运行时间:{}s".format(t1 - t0)) | |||
print("提取了{}张图片".format(imgcount)) | |||
return str(imgcount) | |||
def call_pdf2pic(self, file): | |||
# pdf路径 | |||
path = './upload/' + file | |||
pic_path = path[:-4] | |||
# 创建保存图片的文件夹 | |||
if os.path.exists(pic_path): | |||
print("文件夹已存在,请重新创建新文件夹!") | |||
return "already exist" | |||
# raise SystemExit | |||
else: | |||
os.mkdir(pic_path) | |||
print(pic_path) | |||
m = to_pics().pdf2pic(path, pic_path) | |||
# return m | |||
return redirect(url_for('call_sim',path = file[:-4])) | |||
# return redirect(url_for('call_sim', m=m, pic_path=pic_path)) | |||
# @bp_2pics.route('/tt/<m>/<pic_path>') | |||
# def call_sim(m, pic_path): | |||
# message = app.similarity(m, pic_path) | |||
# print("call_sim") | |||
# return message | |||