Browse Source

上传文件至 'ner'

master
杨天骥 2 years ago
parent
commit
a19f7528b0
9 changed files with 233500 additions and 0 deletions
  1. +57
    -0
      ner/buildLabel.py
  2. +31281
    -0
      ner/explore.ipynb
  3. +369
    -0
      ner/model.py
  4. +82523
    -0
      ner/test.csv
  5. +43
    -0
      ner/test.py
  6. +3
    -0
      ner/test.txt
  7. +143
    -0
      ner/test_spacy.ipynb
  8. +212
    -0
      ner/test_tensor_op.ipynb
  9. +118869
    -0
      ner/train.csv

+ 57
- 0
ner/buildLabel.py View File

@ -0,0 +1,57 @@
import nltk
import os
import json
from random import randint
def splitSubSection(sent):
def takeEven(lst):
for i, elem in enumerate(lst):
if i % 2 == 0:
yield elem
for tok in ['======', '=====', '====', '===']:
sent = '. '.join(takeEven(sent.split(tok)))
return sent
def parseUniSection(concept, uniSection):
text = f'[@@){concept}(@@]'.join(uniSection['text'].split(concept))
for i, link in enumerate(uniSection['links']):
text = text.replace(
uniSection['text'][link['pos_start']:link['pos_end']],
f'[@|){link["text"]}(|@]', 1
)
text = text.replace(f'=={uniSection["title"]}==', '').replace('\n', '. ')
text = splitSubSection(text)
text.replace('|', '')
yield from nltk.sent_tokenize(text)
def parseUniJSON(uniJSON):
for uniConcept in uniJSON:
concept = uniConcept['title']
for section in uniConcept['sections']:
for sent in parseUniSection(concept, section):
if len(sent.split()) < 32:
continue
yield sent
if __name__ == '__main__':
dirList = os.listdir('Dataset')
out = open('train.csv', 'w', encoding='utf-8')
out.write('0\n')
for dirName in dirList:
with open(f'Dataset/{dirName}', 'r', encoding='utf-8') as f:
uniJSON = json.loads(f.read().lower())
for sent in parseUniJSON(uniJSON):
out.write(
'\"' +
sent.replace('\n', ' ')
.replace('\"', '\\\"')
.replace('*', '')
+ '\"\n\n'
)
out.close()

+ 31281
- 0
ner/explore.ipynb
File diff suppressed because it is too large
View File


+ 369
- 0
ner/model.py View File

@ -0,0 +1,369 @@
from random import random
from tkinter import Y
import torch as tch
from transformers import *
from random import *
class SentTokenizer(tch.nn.Module):
def __init__(self) -> None:
import spacy
super().__init__()
BERT_NAME = 'bert-base-uncased'
self.tok = BertTokenizer.from_pretrained(BERT_NAME)
self.dummy = tch.nn.parameter.Parameter(tch.tensor(0.0))
self.en_grammar = spacy.load('en_core_web_sm')
@staticmethod
def unifySent(sentence):
for tok in ['[@@)', '(@@]', '[@|)', '(|@]']:
sentence = ''.join(sentence.split(tok))
return sentence
def maskNoun(self, sentence):
doc = self.en_grammar(sentence)
for chunk in doc.noun_chunks:
text = chunk.text
if random() > 0.8:
sentence = (sentence.replace(text, '[||]', 1)
.replace('[||][||]', '[||]'))
return sentence
def getLabel(self, tokList, tokLabeledList):
label = tch.zeros(len(tokList), dtype=tch.int8,
device=self.dummy.device)
idx, offset = 0, 0
flagMaskIsDefinition = False
flagMaskIsApplication = False
while idx < len(tokList):
idx_ = idx + offset
if idx_ + 4 > len(tokLabeledList):
idx += 1
continue
flag = ''.join(tokLabeledList[idx_:idx_+4])
offset += 4
if flag == '[@@)':
flagMaskIsDefinition = True
elif flag == '(@@]':
flagMaskIsDefinition = False
elif flag == '[@|)':
flagMaskIsApplication = True
elif flag == '(|@]':
flagMaskIsApplication = False
else:
offset -= 4
if flagMaskIsApplication:
label[idx] = 2
elif flagMaskIsDefinition:
label[idx] = 1
idx += 1
return label
@staticmethod
def maskTokList(tokList, tokMasked):
off = 0
for idx, elem in enumerate(tokList):
idx_ = idx + off
flag = ''.join(tokMasked[idx_: idx_+4])
if flag != '[||]':
continue
if tokMasked[idx_+4] == elem:
off += 4
continue
tokList[idx] = '[MASK]'
off -= 1
return tokList
@staticmethod
def randMaskConcept(tokList, label):
for idx, elem in enumerate(tokList):
if label[idx] != 0 and random() < 0.1:
tokList[idx] = '[MASK]'
return tokList
def forward(self, sentence):
# label[i] = concept | rely
uniSent = self.unifySent(sentence)
tokLabeledList = self.tok.tokenize(sentence)
tokList = self.tok.tokenize(uniSent)
label = self.getLabel(tokList, tokLabeledList)
tokList = self.randMaskConcept(tokList, label)
# tokList = ['[CLS]'] + tokList + ['[SEP]']
tokIdList = self.tok.convert_tokens_to_ids(tokList)
tokIdList = tch.tensor([tokIdList], device=self.dummy.device)
# return tokIdList, tokList[1:-1], label
return tokIdList, tokList, label
class Trapezoid(tch.nn.Module):
def __init__(self, in_features, out_features, layers):
super().__init__()
self.dummy = tch.nn.parameter.Parameter(tch.tensor(0.0))
dim_diff = out_features - in_features
self.layer = tch.nn.Sequential(*[
tch.nn.Sequential(
tch.nn.Linear(
in_features + i * dim_diff // layers,
in_features + (i + 1) * dim_diff // layers
),
tch.nn.LeakyReLU())
for i in range(layers)
])
def forward(self, x):
return self.layer(x)
class LinAttention(tch.nn.Module):
def __init__(self, in_features, out_features, attention_features):
super().__init__()
self.dimDown = \
tch.nn.Linear(in_features, attention_features)
self.matKQ = \
tch.nn.Linear(attention_features, attention_features, bias=False)
self.matV = \
tch.nn.Linear(in_features, out_features)
self.leakyRELU = \
tch.nn.LeakyReLU()
def forward(self, x, y):
xp = self.dimDown(x)
attention = tch.einsum(
'...ij, ...kj -> ...ik',
self.matKQ(xp), xp
)
attention = self.leakyRELU(attention)
return tch.einsum(
'...ik, ...kj -> ...ij',
attention, self.matV(y)
)
class KLAttention(tch.nn.Module):
def __init__(self):
super().__init__()
def forward(self, p, x):
# attention[i0, i1] = \sum_j p[i0, j] (\log p[i0, j] - \log p[i1, j])
# 表示 p[i1] 丢失了多少 p[i0] 当中的信息
EPS = 1e-40
plog = -(p + EPS).log()
crs_entropy = tch.einsum('...ij, ...kj -> ...ik', p, plog)
uni_entropy = (tch.einsum('...kj, ...kj -> ...k', p, plog)
.unsqueeze(-1))
# 把因为浮点数运算不准而产生的负数变成 0
attention = (crs_entropy - uni_entropy).relu()
return tch.einsum('...ik, ...kj -> ...ij', attention, x)
class KLTransformer(tch.nn.Module):
def __init__(self):
super().__init__()
self.attention_layer = KLAttention()
self.mlp_layer0 = Trapezoid(768, 768, 5)
self.mlp_layer1 = Trapezoid(768, 768, 5)
def forward(self, x):
p = self.mlp_layer0(x).softmax(-1)
attended_x = self.attention_layer(p, x)
transformed_x = self.mlp_layer1(attended_x)
return x + transformed_x
class DefDiscriminator(tch.nn.Module):
def __init__(self):
super().__init__()
self.transform = KLTransformer()
self.mlp_layer = Trapezoid(768, 3, 5)
def forward(self, x):
y = self.transform(x)
y = self.mlp_layer(y).softmax(-1)
return y
class NERModel(tch.nn.Module):
def __init__(self):
super().__init__()
self.dummy = tch.nn.Parameter(tch.tensor(0.0))
BERT_NAME = 'bert-base-uncased'
self.bert = BertModel.from_pretrained(BERT_NAME)
self.head = DefDiscriminator()
self.type_cnt = tch.nn.Parameter(
tch.tensor([1.0, 1.0, 1.0],
dtype=tch.double,
requires_grad=False)
)
def criterion(self, y, label, giveRate):
ys = [y[..., label == i, i] for i in range(3)]
def uni_criterion(t):
eps = 1e-2 / (t.shape[-2] + 1)
randMask = tch.rand(size=t.shape, device=self.dummy.device) < 0.8
clip = (t < eps) * randMask
return ~clip * (t < 1-eps) * t.log()
loss = [uni_criterion(ys[i]) for i in range(3)]
with tch.no_grad():
self.type_cnt += \
tch.tensor([(label == i).sum() for i in range(3)],
device=self.dummy.device)
tok_cnt = self.type_cnt.sum()
tot_loss = (
- loss[0].sum() * (tok_cnt / self.type_cnt[0]).to(float)
- loss[1].sum() * (tok_cnt / self.type_cnt[1]).to(float)
- loss[2].sum() * (tok_cnt / self.type_cnt[2]).to(float)
)
if giveRate:
cnt = (ys[1] > 1/3).sum().item() + (ys[2] > 1/3).sum().item()
label_cnt = (label > 0).sum().item()
tot_rate = (cnt + (ys[0] > 1/3).sum().item()) / label.shape[-1]
if label_cnt > 0:
rate = cnt / label_cnt
else:
rate = -1
return tot_loss, rate, tot_rate
else:
return tot_loss
def forward(self, x, label=None, giveRate=True):
with tch.no_grad():
y = self.bert(x)[0]
y = self.head(y)
if label is None:
return y
else:
return self.criterion(y, label, giveRate)
if __name__ == '__main__':
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
df = pd.read_csv('train.csv', sep='\n')['0']
device = tch.device('cuda:0')
# 模型
tokenizer = SentTokenizer().to(device)
model = NERModel().to(device)
try:
with open('NER.model', 'rb') as f:
print('find model, load state dict')
model.load_state_dict(tch.load(f))
print('load model state dict success')
except:
pass
# 训练配置
optimizer = tch.optim.RMSprop(model.head.parameters(), lr=1e-5)
BATCH_SIZE = 15
SAVE_ONCE = 5000
try:
with open('NER.optimizer', 'rb') as f:
print('find optimizer, load state dict')
optimizer.load_state_dict(tch.load(f))
print('load optimizer state dict success')
except:
pass
# 可视化
running_loss = 0.0
running_rate = 0.0
running_tot_rate = 0.0
history_loss = []
history_rate = []
history_tot_rate = []
skippedIter = 0
plt.ion()
for epoch in range(1, 5):
dataset_with_progress_bar = tqdm(
enumerate(df.sample(frac=1)), total=len(df))
skippedIter = 0
for i, sentence in dataset_with_progress_bar:
tokIdList, _, label = tokenizer(sentence)
if tokIdList.shape[-1] > 512 or\
(int((label == 1).sum()) == 0):
skippedIter += 1
else:
loss, rate, tot_rate = model(tokIdList, label)
running_loss = \
9e-1*running_loss + 1e-1*loss.item() if running_loss > 0.0 \
else loss.item()
running_rate = \
99e-2*running_rate + 1e-2*rate if rate > 0.0 \
else running_rate
running_tot_rate = \
99e-2*running_tot_rate + 1e-2*tot_rate
dataset_with_progress_bar\
.set_description(
'loss[%-1.5f] rate[%-2.2f%%] '
'tot_rate[%-2.2f%%] sent_len[%-3d] '
'skipped[%d] epoch[%d] '
% (running_loss, running_rate * 100,
running_tot_rate * 100, tokIdList.shape[-1],
skippedIter, epoch)
)
loss.backward()
if (i - skippedIter) % BATCH_SIZE == 0:
optimizer.step()
optimizer.zero_grad()
# 记录训练历史
history_loss.append(running_loss)
history_rate.append(running_rate)
history_tot_rate.append(running_tot_rate)
if len(history_loss) > 20:
history_loss = history_loss[-20:]
if len(history_rate) > 20:
history_rate = history_rate[-20:]
if len(history_tot_rate) > 20:
history_tot_rate = history_tot_rate[-20:]
# 画图
plt.clf()
plt.subplot(1, 2, 1)
plt.plot(range(len(history_loss)),
history_loss, c='red',
label='loss (cross entropy loss)')
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(range(len(history_rate)),
history_rate, c='blue', label='rate (only concepts)')
plt.plot(range(len(history_tot_rate)),
history_tot_rate, c='green', label='rate (all)')
plt.legend()
plt.draw()
plt.pause(0.01)
BATCH_SIZE = randint(15, 25)
if i % SAVE_ONCE == 0:
# 保存模型
dataset_with_progress_bar\
.set_description('saving ! ')
tch.save(optimizer.state_dict(), 'NER.optimizer')
tch.save(model.state_dict(), 'NER.model')
dataset_with_progress_bar\
.set_description('done ! ')
dataset_with_progress_bar\
.set_description('saving ! ')
tch.save(optimizer.state_dict(), 'NER.optimizer')
tch.save(model.state_dict(), 'NER.model')
dataset_with_progress_bar\
.set_description('done ! ')

+ 82523
- 0
ner/test.csv
File diff suppressed because it is too large
View File


+ 43
- 0
ner/test.py View File

@ -0,0 +1,43 @@
from sympy import im
from model import NERModel, SentTokenizer
import torch as tch
device = tch.device('cuda:0')
tokenizer = SentTokenizer().to(device)
model = NERModel().to(device)
print('loading model')
model.load_state_dict(tch.load('Saved/NER.model'))
print('ok')
if __name__ == '__main__':
print('='*20)
while True:
text = input('>>>')
if len(text) >= 512 or len(text) <= 0:
print('Sorry bro. I cannot do this. ')
continue
tokIdList, tokList, label = tokenizer(text)
print('\n')
print(tokIdList)
print(tokList)
prediction = model(tokIdList)
_prediction = (
+ 2 * (prediction[..., 2] > 1/3)
+ 1 * (prediction[..., 1] > 1/3)
+ 0 * (prediction[..., 0] > 1/3)
)
for i, pred in enumerate(_prediction[0].tolist()):
if pred == 2:
print(f'%-25s' % tokList[i], 'reliance',
prediction[..., i, 2].tolist())
if pred == 1:
print(f'%-25s' % tokList[i], 'concept',
prediction[..., i, 1].tolist())
if pred == 0:
print(f'%-25s' % tokList[i], 'nothing',
prediction[..., i, 0].tolist())

+ 3
- 0
ner/test.txt View File

@ -0,0 +1,3 @@
a martingale can be thought of as the fortune at time n of a player who is betting on a fair game.
f is usually denoted dν/du and called the Radon-Nikodym derivative.

+ 143
- 0
ner/test_spacy.ipynb View File

@ -0,0 +1,143 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import spacy\n",
"\n",
"en_grammar = spacy.load('en_core_web_sm')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\75872\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\torchaudio\\backend\\utils.py:67: UserWarning: No audio backend is available.\n",
" warnings.warn('No audio backend is available.')\n"
]
}
],
"source": [
"from transformers import *\n",
"\n",
"BERT_NAME = 'bert-base-uncased'\n",
"tok = BertTokenizer.from_pretrained(BERT_NAME)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"when [[]] ceases and [[]] makes [[]] toward [[]], [[]] says to drop [[]] after [[]] starting at [[]].\n",
"\n"
]
}
],
"source": [
"sentence = \"\"\"\n",
"when the drop ceases and the curve makes an elbow toward less steep decline, cattell's scree test says to drop all further components after the one starting at the elbow.\n",
"\"\"\"\n",
"def maskNoun(sentence: str):\n",
" doc = en_grammar(sentence)\n",
" for noun in doc.noun_chunks:\n",
" sentence = sentence.replace(noun.text, '[[]]', 1)\n",
" return sentence\n",
"print(maskNoun(sentence))"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['when', 'the', 'drop', 'cease', '##s', 'and', 'the', 'curve', 'makes', 'an', 'elbow', 'toward', 'less', 'steep', 'decline', ',', 'cat', '##tell', \"'\", 's', 'sc', '##ree', 'test', 'says', 'to', 'drop', 'all', 'further', 'components', 'after', 'the', 'one', 'starting', 'at', 'the', 'elbow', '.']\n",
"['when', '[', '[', ']', ']', 'cease', '##s', 'and', '[', '[', ']', ']', 'makes', '[', '[', ']', ']', 'toward', '[', '[', ']', ']', ',', '[', '[', ']', ']', 'says', 'to', 'drop', '[', '[', ']', ']', 'after', '[', '[', ']', ']', 'starting', 'at', '[', '[', ']', ']', '.']\n"
]
}
],
"source": [
"tokList = tok.tokenize(sentence)\n",
"tokMask = tok.tokenize(maskNoun(sentence))\n",
"\n",
"print(tokList)\n",
"print(tokMask)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['when', '[MASK]', '[MASK]', 'cease', '##s', 'and', '[MASK]', '[MASK]', 'makes', '[MASK]', '[MASK]', 'toward', '[MASK]', '[MASK]', '[MASK]', ',', '[MASK]', '[MASK]', '[MASK]', '[MASK]', '[MASK]', '[MASK]', '[MASK]', 'says', 'to', 'drop', '[MASK]', '[MASK]', '[MASK]', 'after', '[MASK]', '[MASK]', 'starting', 'at', '[MASK]', '[MASK]', '.']\n",
"['when', '[', '[', ']', ']', 'cease', '##s', 'and', '[', '[', ']', ']', 'makes', '[', '[', ']', ']', 'toward', '[', '[', ']', ']', ',', '[', '[', ']', ']', 'says', 'to', 'drop', '[', '[', ']', ']', 'after', '[', '[', ']', ']', 'starting', 'at', '[', '[', ']', ']', '.']\n"
]
}
],
"source": [
"def maskTokList(self, tokList, tokMask):\n",
" off = 0\n",
" for idx, elem in enumerate(tokList):\n",
" idx_ = idx + off\n",
" flag = ''.join(tokMask[idx_: idx_+4])\n",
" if flag == '[[]]':\n",
" if tokMask[idx_+4] == elem:\n",
" off += 4\n",
" continue\n",
" tokList[idx] = '[MASK]'\n",
" off -= 1\n",
" return tokList\n",
"\n",
"\n",
"print(tokList)\n",
"print(tokMask)\n"
]
}
],
"metadata": {
"interpreter": {
"hash": "f29e8b3fa2d991a6f8847b235850bc2cfc73e5042ba8efb84ff0f4dcd41902ea"
},
"kernelspec": {
"display_name": "Python 3.9.6 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}

+ 212
- 0
ner/test_tensor_op.ipynb View File

@ -0,0 +1,212 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 111,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"torch.Size([1, 4])"
]
},
"execution_count": 111,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import torch as tch\n",
"\n",
"vec_seq = tch.tensor([i for i in range(4)])\n",
"\n",
"vec_seq.unsqueeze_(-2).shape"
]
},
{
"cell_type": "code",
"execution_count": 112,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"tensor([0.0001, 0.0004, 0.0009, 0.0013])\n"
]
}
],
"source": [
"class KLAttention(tch.nn.Module):\n",
" def __init__(self):\n",
" super().__init__()\n",
"\n",
" def forward(self, x):\n",
" # p包含了多少q中的信息? KL[p||q] = \\sum_j q(j) (\\log q(j) - \\log p(j))\n",
" # 现在 x 的每一列都表示一个概率分布, 也就是说 KL[x[i0] || x[i1]]\n",
" # 表示 x[i0] 含有 多少 x[i1] 当中的信息\n",
" # KL[x[i0] || x[i1]] = \\sum_j x[i0, j] (\\log x[i0, j] - \\log x[i1, j])\n",
" EPS = 1e-40\n",
" xlog = (x + EPS).log()\n",
" crs_entropy = tch.einsum('...ij, ...kj -> ...ik', x, xlog)\n",
" uni_entropy = (tch.einsum('...kj, ...kj -> ...k', x, xlog)\n",
" .unsqueeze(-1))\n",
" return uni_entropy - crs_entropy\n",
"\n",
"\n",
"attention_layer = KLAttention()\n",
"\n",
"x = tch.tensor(\n",
" [[(i + 1) * (j + 1) * 10 for i in range(128)]\n",
" for j in range(4)],\n",
" dtype=tch.float\n",
").softmax(-1)\n",
"\n",
"print(attention_layer(x).relu().sum(-2))\n"
]
},
{
"cell_type": "code",
"execution_count": 113,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"crs: tensor(1.1598)\n",
"entro: tensor(-0.9475)\n",
"kl: tensor(0.2122)\n"
]
}
],
"source": [
"import torch\n",
"\n",
"x = torch.tensor([1, 2, 3, 4], dtype=torch.float).softmax(-1)\n",
"y = torch.tensor([2, 4, 6, 8], dtype=torch.float).softmax(-1)\n",
"\n",
"print('crs:', torch.einsum('...j, ...j', x, -y.log()))\n",
"print('entro:', torch.einsum('...j, ...j', x, x.log()))\n",
"print('kl:', torch.einsum('...j, ...j', x, x.log()-y.log()))"
]
},
{
"cell_type": "code",
"execution_count": 114,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"crs: tensor([[0.9475, inf],\n",
" [0.4402, nan]])\n",
"entro: tensor([[-0.9475],\n",
" [ nan]])\n",
"kl: tensor([[0., inf],\n",
" [nan, nan]])\n"
]
}
],
"source": [
"x = torch.tensor([[1, 2, 3, 4], [2, 4, 6, 1000]], \n",
" dtype=torch.float).softmax(-1)\n",
"\n",
"xlog = x.log()\n",
"crs_entropy = tch.einsum('...ij, ...kj -> ...ik', x, -xlog)\n",
"print('crs:',crs_entropy)\n",
"\n",
"entropy = tch.einsum('...ij, ...ij -> ...i', x, xlog).unsqueeze(-1)\n",
"print('entro:', entropy)\n",
"\n",
"print('kl:', crs_entropy + entropy)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Matrix([\n",
" [ 1],\n",
" [ 2],\n",
" [-1]]),\n",
" Matrix([\n",
" [-5/3],\n",
" [ 5/3],\n",
" [ 5/3]]),\n",
" Matrix([\n",
" [2],\n",
" [0],\n",
" [2]])]"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"from sympy.matrices import Matrix,GramSchmidt\n",
"\n",
"a = np.array([[1,2,-1], [-1,3,1], [4,-1,0]])\n",
"a = [Matrix(col) for col in a]\n",
"GramSchmidt(a)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor([[0, 2],\n",
" [0, 2]])"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import torch\n",
"\n",
"torch.tensor([[0, 1, 2], [0, 1, 2]])[..., torch.tensor([True, False, True])]"
]
}
],
"metadata": {
"interpreter": {
"hash": "f29e8b3fa2d991a6f8847b235850bc2cfc73e5042ba8efb84ff0f4dcd41902ea"
},
"kernelspec": {
"display_name": "Python 3.9.6 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}

+ 118869
- 0
ner/train.csv
File diff suppressed because it is too large
View File


Loading…
Cancel
Save