魏如蓝 4 years ago
parent
commit
574e0cfa52
6 changed files with 331 additions and 0 deletions
  1. BIN
      codes/sim/__init__.py
  2. +40
    -0
      codes/sim/aHash.py
  3. +89
    -0
      codes/sim/compute_similarity.py
  4. +36
    -0
      codes/sim/dHash.py
  5. +44
    -0
      codes/sim/histogram.py
  6. +122
    -0
      codes/sim/pHash.py

BIN
codes/sim/__init__.py View File


+ 40
- 0
codes/sim/aHash.py View File

@ -0,0 +1,40 @@
# 正则化图像
def regularizeImage(img, size = (8, 8)):
return img.resize(size).convert('L')
# 计算hash值
def getHashCode(img, size = (8, 8)):
pixel = []
for i in range(size[0]):
for j in range(size[1]):
pixel.append(img.getpixel((i, j)))
mean = sum(pixel) / len(pixel)
result = []
for i in pixel:
if i > mean:
result.append(1)
else:
result.append(0)
return result
# 比较hash值
def compHashCode(hc1, hc2):
cnt = 0
for i, j in zip(hc1, hc2):
if i == j:
cnt += 1
return cnt
# 计算平均哈希算法相似度
def calaHashSimilarity(img1, img2):
img1 = regularizeImage(img1)
img2 = regularizeImage(img2)
hc1 = getHashCode(img1)
hc2 = getHashCode(img2)
return compHashCode(hc1, hc2)
__all__ = ['calaHashSimilarity']

+ 89
- 0
codes/sim/compute_similarity.py View File

@ -0,0 +1,89 @@
from PIL import Image
from multiprocessing import Process
import sim.histogram as htg
import sim.aHash as ah
import sim.pHash as ph
import sim.dHash as dh
import os
import feature
from flask import redirect,url_for
# if __name__ == '__main__':
class simi:
def similarity(self, path):
print("begin")
# print(m)
# print(path)
folder_path = "upload/" + path
dirs = os.listdir(folder_path)
# 文件夹下有tmp张图片
tmp = 0
for item in dirs:
if os.path.isfile(os.path.join(folder_path, item)):
tmp += 1
# print(tmp)
list = [[] for j in range(int(tmp*(tmp-1)*0.5))] # [[], [], [], [], [], []]
index = 0
for i in range(1,tmp+1):
for j in range(i+1,tmp+1):
str1 = folder_path + '/img' + str(i) + '.png'
str2 = folder_path + '/img' + str(j) + '.png'
# print(str1)
# print(index)
# print('------------------')
list[index].append(str1)
list[index].append(str2)
img1 = Image.open(str1)
img2 = Image.open(str2)
img1_htg = htg.regularizeImage(img1)
img2_htg = htg.regularizeImage(img2)
hg1 = img1_htg.histogram()
hg2 = img2_htg.histogram()
# draw the histogram in a no-blocking way
sub_thread = Process(target=htg.drawHistogram, args=(hg1, hg2,))
sub_thread.start()
# print the histogram similarity
htg_result = htg.calMultipleHistogramSimilarity(img1_htg, img2_htg)
list[index].append(htg_result)
print('依据图片直方图距离计算相似度:{}'.format(htg_result))
# aHash Calculation
ah_result = ah.calaHashSimilarity(img1, img2)
list[index].append(ah_result)
print('依据平均哈希算法计算相似度:{}/{}'.format(ah_result, 64))
# pHash Calculation
ph_result = ph.calpHashSimilarity(img1, img2)
list[index].append(ph_result)
print('依据感知哈希算法计算相似度:{}/{}'.format(ph_result, 64))
# dHash Calculation
dh_result = dh.caldHashSimilarity(img1, img2)
list[index].append(dh_result)
print('依据差异哈希算法计算相似度:{}/{}'.format(dh_result, 64))
index = index + 1
# print(list)
# print(index)
# print(tmp)
the_feature = feature.feat()
message = the_feature.call_feature_extraction_1(folder_path,list,index,tmp)
# return redirect(url_for('call_feature',alist = list))
return message

+ 36
- 0
codes/sim/dHash.py View File

@ -0,0 +1,36 @@
# 正则化图像
def regularizeImage(img, size = (9, 8)):
return img.resize(size).convert('L')
# 计算hash值
def getHashCode(img, size = (9, 8)):
result = []
for i in range(size[0] - 1):
for j in range(size[1]):
current_val = img.getpixel((i, j))
next_val = img.getpixel((i + 1, j))
if current_val > next_val:
result.append(1)
else:
result.append(0)
return result
# 比较hash值
def compHashCode(hc1, hc2):
cnt = 0
for i, j in zip(hc1, hc2):
if i == j:
cnt += 1
return cnt
# 计算差异哈希算法相似度
def caldHashSimilarity(img1, img2):
img1 = regularizeImage(img1)
img2 = regularizeImage(img2)
hc1 = getHashCode(img1)
hc2 = getHashCode(img2)
return compHashCode(hc1, hc2)
__all__ = ['caldHashSimilarity']

+ 44
- 0
codes/sim/histogram.py View File

@ -0,0 +1,44 @@
import matplotlib.pyplot as plt
# 正则化图像
def regularizeImage(img, size = (256, 256)):
return img.resize(size).convert('RGB')
# 画出直方图图像
def drawHistogram(hg1, hg2):
plt.plot(range(len(hg1)), hg1, color='blue', linewidth=1.5, label='img1')
plt.plot(range(len(hg2)), hg2, color='red', linewidth=1.5, label='img2')
plt.legend(loc='upper left')
plt.title('Histogram Similarity')
plt.show()
# 分块图像4x4
def splitImage(img, part_size = (64, 64)):
w, h = img.size
pw, ph = part_size
data = []
for i in range(0, w, pw):
for j in range(0, h, ph):
data.append(img.crop((i, j, i + pw, j + ph)).copy())
return data
# 利用单块图片的直方图距离计算相似度
def calSingleHistogramSimilarity(hg1, hg2):
if len(hg1) != len(hg2):
raise Exception('样本点个数不一样')
sum = 0
for x1, x2 in zip(hg1, hg2):
if x1 != x2:
sum += 1 - float(abs(x1 - x2) / max(x1, x2))
else:
sum += 1
return sum / len(hg1)
# 利用分块图片的直方图距离计算相似度
def calMultipleHistogramSimilarity(img1, img2):
answer = 0
for sub_img1, sub_img2 in zip(splitImage(img1), splitImage(img2)):
answer += calSingleHistogramSimilarity(sub_img1.histogram(), sub_img2.histogram())
return float(answer / 16.0)
__all__ = ['regularizeImage', 'drawHistogram', 'calMultipleHistogramSimilarity']

+ 122
- 0
codes/sim/pHash.py View File

@ -0,0 +1,122 @@
import math
import unittest
# 正则化图像
def regularizeImage(img, size = (32, 32)):
return img.resize(size).convert('L')
# 获得图像像素矩阵
def getMatrix(img):
matrix = []
size = img.size
for i in range(size[1]):
pixel = []
for j in range(size[0]):
pixel.append(img.getpixel((j, i)))
matrix.append(pixel)
return matrix
# 计算系数矩阵
def getCoefficient(length):
matrix = []
sqr = 1.0 / math.sqrt(length)
value = []
for i in range(length):
value.append(sqr)
matrix.append(value)
for i in range(1, length):
value = []
for j in range(0, length):
value.append(math.sqrt(2.0 / length) * math.cos(i * math.pi * (j + 0.5) / length))
matrix.append(value)
return matrix
# 计算矩阵转秩
def getTranspose(matrix):
new_matrix = []
for i in range(len(matrix)):
value = []
for j in range(len(matrix[i])):
value.append(matrix[j][i])
new_matrix.append(value)
return new_matrix
# 计算矩阵乘法
def getMultiply(matrix1, matrix2):
new_matrix = []
for i in range(len(matrix1)):
value = []
for j in range(len(matrix2[i])):
ans = 0.0
for h in range(len(matrix1[i])):
ans += matrix1[i][h] * matrix2[h][j]
value.append(ans)
new_matrix.append(value)
return new_matrix
# 计算DCT
def DCT(matrix):
length = len(matrix)
A = getCoefficient(length)
AT = getTranspose(A)
temp = getMultiply(A, matrix)
DCT_matrix = getMultiply(matrix, AT)
return DCT_matrix
# 计算左上角8*8并转化为list
def submatrix_list(matrix, size = (8, 8)):
value = []
for i in range(size[0]):
for j in range(size[1]):
value.append(matrix[i][j])
return value
# 计算hash值
def getHashCode(sub_list):
length = len(sub_list)
mean = sum(sub_list) / length
result = []
for i in sub_list:
if i > mean:
result.append(1)
else:
result.append(0)
return result
# 比较hash值
def compHashCode(hc1, hc2):
cnt = 0
for i, j in zip(hc1, hc2):
if i == j:
cnt += 1
return cnt
# 计算感知哈希算法相似度
def calpHashSimilarity(img1, img2):
img1 = regularizeImage(img1)
img2 = regularizeImage(img2)
matrix1 = getMatrix(img1)
matrix2 = getMatrix(img2)
DCT1 = DCT(matrix1)
DCT2 = DCT(matrix2)
sub_list1 = submatrix_list(DCT1)
sub_list2 = submatrix_list(DCT2)
hc1 = getHashCode(sub_list1)
hc2 = getHashCode(sub_list2)
return compHashCode(hc1, hc2)
# 单元测试
class TestpHash(unittest.TestCase):
def test_getHashCode(self):
self.assertEqual(getHashCode([1, 2, 3]), [0, 0, 1])
if __name__ == '__main__':
unittest.main()
__all__ = ['calpHashSimilarity']

Loading…
Cancel
Save