In [111]:
import torch as tch

vec_seq = tch.tensor([i for i in range(4)])

vec_seq.unsqueeze_(-2).shape

torch.Size([1, 4])

In [112]:
class KLAttention(tch.nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, x):
        # p包含了多少q中的信息? KL[p||q] = \sum_j q(j) (\log q(j) - \log p(j))
        # 现在 x 的每一列都表示一个概率分布, 也就是说 KL[x[i0] || x[i1]]
        # 表示 x[i0] 含有 多少 x[i1] 当中的信息
        # KL[x[i0] || x[i1]] = \sum_j x[i0, j] (\log x[i0, j] - \log x[i1, j])
        EPS = 1e-40
        xlog = (x + EPS).log()
        crs_entropy = tch.einsum('...ij, ...kj -> ...ik', x, xlog)
        uni_entropy = (tch.einsum('...kj, ...kj -> ...k', x, xlog)
                       .unsqueeze(-1))
        return uni_entropy - crs_entropy


attention_layer = KLAttention()

x = tch.tensor(
    [[(i + 1) * (j + 1) * 10 for i in range(128)]
     for j in range(4)],
    dtype=tch.float
).softmax(-1)

print(attention_layer(x).relu().sum(-2))


tensor([0.0001, 0.0004, 0.0009, 0.0013])


In [113]:
import torch

x = torch.tensor([1, 2, 3, 4], dtype=torch.float).softmax(-1)
y = torch.tensor([2, 4, 6, 8], dtype=torch.float).softmax(-1)

print('crs:', torch.einsum('...j, ...j', x, -y.log()))
print('entro:', torch.einsum('...j, ...j', x, x.log()))
print('kl:', torch.einsum('...j, ...j', x, x.log()-y.log()))

crs: tensor(1.1598)
entro: tensor(-0.9475)
kl: tensor(0.2122)


In [114]:
x = torch.tensor([[1, 2, 3, 4], [2, 4, 6, 1000]], 
                 dtype=torch.float).softmax(-1)

xlog = x.log()
crs_entropy = tch.einsum('...ij, ...kj -> ...ik', x, -xlog)
print('crs:',crs_entropy)

entropy = tch.einsum('...ij, ...ij -> ...i', x, xlog).unsqueeze(-1)
print('entro:', entropy)

print('kl:', crs_entropy + entropy)

crs: tensor([[0.9475,    inf],
        [0.4402,    nan]])
entro: tensor([[-0.9475],
        [    nan]])
kl: tensor([[0., inf],
        [nan, nan]])


In [1]:
import numpy as np
from sympy.matrices import Matrix,GramSchmidt

a = np.array([[1,2,-1], [-1,3,1], [4,-1,0]])
a = [Matrix(col) for col in a]
GramSchmidt(a)

[Matrix([
 [ 1],
 [ 2],
 [-1]]),
 Matrix([
 [-5/3],
 [ 5/3],
 [ 5/3]]),
 Matrix([
 [2],
 [0],
 [2]])]

In [3]:
import torch

torch.tensor([[0, 1, 2], [0, 1, 2]])[..., torch.tensor([True, False, True])]

tensor([[0, 2],
        [0, 2]])