云计算期末大作业 论文图像复用的机器自动检查 魏如蓝 10172100262
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

91 lines
2.8 KiB

3 years ago
  1. import fitz
  2. import time
  3. import re
  4. import os
  5. from sim import compute_similarity
  6. from flask import redirect,url_for,Blueprint
  7. from PIL import Image
  8. import math
  9. import operator
  10. from functools import reduce
  11. bp_2pics = Blueprint("2pics", __name__,url_prefix="/2pics")
  12. # 这个bp似乎没啥用,,?
  13. class to_pics:
  14. def pdf2pic(self, path, pic_path):
  15. t0 = time.clock()
  16. # 使用正则表达式来查找图片
  17. checkXO = r"/Type(?= */XObject)"
  18. checkIM = r"/Subtype(?= */Image)"
  19. # 打开pdf
  20. doc = fitz.open(path)
  21. # 图片计数
  22. imgcount = 0
  23. lenXREF = doc._getXrefLength()
  24. print(lenXREF)
  25. # 打印PDF的信息
  26. print("文件名:{}, 页数: {}, 对象: {}".format(path, len(doc), lenXREF - 1))
  27. # 遍历每一个对象
  28. for i in range(1, lenXREF):
  29. # 定义对象字符串
  30. text = doc._getXrefString(i)
  31. isXObject = re.search(checkXO, text)
  32. # 使用正则表达式查看是否是图片
  33. isImage = re.search(checkIM, text)
  34. # 如果不是对象也不是图片,则continue
  35. if not isXObject or not isImage:
  36. continue
  37. imgcount += 1
  38. # 根据索引生成图像
  39. pix = fitz.Pixmap(doc, i)
  40. # 根据pdf的路径生成图片的名称
  41. # new_name = path.replace('\\', '_') + "_img{}.png".format(imgcount)
  42. new_name = "img{}.png".format(imgcount)
  43. new_name = new_name.replace(':', '')
  44. # 如果pix.n<5,可以直接存为PNG
  45. if pix.n < 5:
  46. pix.writePNG(os.path.join(pic_path, new_name))
  47. # 否则先转换CMYK
  48. else:
  49. pix0 = fitz.Pixmap(fitz.csRGB, pix)
  50. pix0.writePNG(os.path.join(pic_path, new_name))
  51. pix0 = None
  52. # 释放资源
  53. pix = None
  54. t1 = time.clock()
  55. print("运行时间:{}s".format(t1 - t0))
  56. print("提取了{}张图片".format(imgcount))
  57. return str(imgcount)
  58. def call_pdf2pic(self, file):
  59. # pdf路径
  60. path = './upload/' + file
  61. pic_path = path[:-4]
  62. # 创建保存图片的文件夹
  63. if os.path.exists(pic_path):
  64. print("文件夹已存在,请重新创建新文件夹!")
  65. return "already exist"
  66. # raise SystemExit
  67. else:
  68. os.mkdir(pic_path)
  69. print(pic_path)
  70. m = to_pics().pdf2pic(path, pic_path)
  71. # return m
  72. return redirect(url_for('call_sim',path = file[:-4]))
  73. # return redirect(url_for('call_sim', m=m, pic_path=pic_path))
  74. # @bp_2pics.route('/tt/<m>/<pic_path>')
  75. # def call_sim(m, pic_path):
  76. # message = app.similarity(m, pic_path)
  77. # print("call_sim")
  78. # return message