|
@ -0,0 +1,441 @@ |
|
|
|
|
|
% https://zhuanlan.zhihu.com/p/165140693 |
|
|
|
|
|
% https://zhuanlan.zhihu.com/p/36868831 |
|
|
|
|
|
|
|
|
|
|
|
%声明文档类型和比例 |
|
|
|
|
|
\documentclass[aspectratio=169, 10pt, utf8, mathserif]{ctexbeamer} |
|
|
|
|
|
%调用相关的宏包 |
|
|
|
|
|
% \usepackage{beamerfoils} |
|
|
|
|
|
|
|
|
|
|
|
\usepackage[outputdir=./latex-output]{minted} |
|
|
|
|
|
|
|
|
|
|
|
\usepackage{multicol} |
|
|
|
|
|
\setminted{breaklines=true, fontsize=\zihao{-6}} |
|
|
|
|
|
% \PassOptionsToPackage{fontsize=\zihao{-6}}{minted} |
|
|
|
|
|
|
|
|
|
|
|
\definecolor{shadecolor}{RGB}{204,232,207} |
|
|
|
|
|
|
|
|
|
|
|
\usetheme{Berlin} %主题包之一,直接换名字即可 |
|
|
|
|
|
\setbeamertemplate{page number in head/foot}[totalframenumber] |
|
|
|
|
|
|
|
|
|
|
|
\usecolortheme{beaver} %主题色之一,直接换名字即可。 |
|
|
|
|
|
\usefonttheme{professionalfonts} |
|
|
|
|
|
|
|
|
|
|
|
% 设置用acrobat打开就会全屏显示 |
|
|
|
|
|
\hypersetup{pdfpagemode=FullScreen} |
|
|
|
|
|
|
|
|
|
|
|
% 设置logo |
|
|
|
|
|
% \pgfdeclareimage[height=2cm, width=2cm]{university-logo}{120701101} |
|
|
|
|
|
% \logo{\pgfuseimage{university-logo}} |
|
|
|
|
|
|
|
|
|
|
|
\parskip=1.2em |
|
|
|
|
|
|
|
|
|
|
|
%--------------正文开始--------------- |
|
|
|
|
|
\begin{document} |
|
|
|
|
|
|
|
|
|
|
|
%每个章节都有小目录 |
|
|
|
|
|
\AtBeginSubsection[] |
|
|
|
|
|
{ |
|
|
|
|
|
\begin{frame}<beamer> |
|
|
|
|
|
\tableofcontents[currentsection,currentsubsection] |
|
|
|
|
|
\end{frame} |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
\title{《深度学习》实验4讲解} |
|
|
|
|
|
\subtitle{多层感知机/全连接层} |
|
|
|
|
|
\author[岳锦鹏]{岳锦鹏 \\ \small 10213903403} |
|
|
|
|
|
|
|
|
|
|
|
\date{\today} |
|
|
|
|
|
\begin{frame} |
|
|
|
|
|
%\maketitle |
|
|
|
|
|
\titlepage |
|
|
|
|
|
\end{frame} |
|
|
|
|
|
|
|
|
|
|
|
\begin{frame} |
|
|
|
|
|
\frametitle{目录} |
|
|
|
|
|
\tableofcontents[hideallsubsections] |
|
|
|
|
|
\end{frame} |
|
|
|
|
|
|
|
|
|
|
|
\section{整体浏览} |
|
|
|
|
|
|
|
|
|
|
|
\begin{frame}[fragile] |
|
|
|
|
|
首先逐个观察每个填空的部分需要完成哪些内容。 |
|
|
|
|
|
|
|
|
|
|
|
可以看到需要完成ReLU的反向传播过程。 |
|
|
|
|
|
\begin{minted}{python} |
|
|
|
|
|
class Relu: |
|
|
|
|
|
def __init__(self): |
|
|
|
|
|
self.mem = {} |
|
|
|
|
|
|
|
|
|
|
|
def forward(self, x): |
|
|
|
|
|
self.mem['x'] = x |
|
|
|
|
|
return np.where(x > 0, x, np.zeros_like(x)) |
|
|
|
|
|
|
|
|
|
|
|
def backward(self, grad_y): |
|
|
|
|
|
''' |
|
|
|
|
|
grad_y: same shape as x |
|
|
|
|
|
''' |
|
|
|
|
|
|
|
|
|
|
|
# ========== |
|
|
|
|
|
# todo '''请完成激活函数的梯度后传''' |
|
|
|
|
|
# ========== |
|
|
|
|
|
|
|
|
|
|
|
\end{minted} |
|
|
|
|
|
\end{frame} |
|
|
|
|
|
|
|
|
|
|
|
\begin{frame}[fragile] |
|
|
|
|
|
对于主要的模型部分,需要完成计算损失。 |
|
|
|
|
|
\begin{minted}{python} |
|
|
|
|
|
def compute_loss(self, log_prob, labels): |
|
|
|
|
|
''' |
|
|
|
|
|
log_prob is the predicted probabilities |
|
|
|
|
|
labels is the ground truth |
|
|
|
|
|
Please return the loss |
|
|
|
|
|
''' |
|
|
|
|
|
|
|
|
|
|
|
# ========== |
|
|
|
|
|
# todo '''请完成多分类问题的损失计算 损失为: 交叉熵损失 + L2正则项''' |
|
|
|
|
|
# ========== |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
\end{minted} |
|
|
|
|
|
\end{frame} |
|
|
|
|
|
|
|
|
|
|
|
\begin{frame}[fragile] |
|
|
|
|
|
按照给定的网络结构完成前向传播过程。 |
|
|
|
|
|
\begin{minted}{python} |
|
|
|
|
|
def forward(self, x): |
|
|
|
|
|
''' |
|
|
|
|
|
x is the input features |
|
|
|
|
|
Please return the predicted probabilities of x |
|
|
|
|
|
''' |
|
|
|
|
|
|
|
|
|
|
|
# ========== |
|
|
|
|
|
# todo '''请搭建一个MLP前馈神经网络 补全它的前向传播 MLP结构为FFN --> RELU --> FFN --> Softmax''' |
|
|
|
|
|
# ========== |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
\end{minted} |
|
|
|
|
|
\end{frame} |
|
|
|
|
|
|
|
|
|
|
|
\begin{frame}[fragile] |
|
|
|
|
|
完成主模型的后向传播,注意这里可以使用其中各层的反向传播函数。 |
|
|
|
|
|
\begin{minted}{python} |
|
|
|
|
|
def backward(self, label): |
|
|
|
|
|
''' |
|
|
|
|
|
label is the ground truth |
|
|
|
|
|
Please compute the gradients of self.W1 and self.W2 |
|
|
|
|
|
''' |
|
|
|
|
|
|
|
|
|
|
|
# ========== |
|
|
|
|
|
# todo '''补全该前馈神经网络的后向传播算法''' |
|
|
|
|
|
# ========== |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
\end{minted} |
|
|
|
|
|
\end{frame} |
|
|
|
|
|
|
|
|
|
|
|
\begin{frame}[fragile] |
|
|
|
|
|
更新参数,这里要注意不要忘记正则项的损失。 |
|
|
|
|
|
\begin{minted}{python} |
|
|
|
|
|
def update(self): |
|
|
|
|
|
''' |
|
|
|
|
|
Please update self.W1 and self.W2 |
|
|
|
|
|
''' |
|
|
|
|
|
|
|
|
|
|
|
# ========== |
|
|
|
|
|
# todo '''更新该前馈神经网络的参数''' |
|
|
|
|
|
# ========== |
|
|
|
|
|
|
|
|
|
|
|
\end{minted} |
|
|
|
|
|
\end{frame} |
|
|
|
|
|
|
|
|
|
|
|
\section{逐个实现} |
|
|
|
|
|
\subsection{ReLU的反向传播} |
|
|
|
|
|
|
|
|
|
|
|
\begin{frame}[fragile] |
|
|
|
|
|
\begin{multicols}{2} |
|
|
|
|
|
首先看ReLU的反向传播,由于ReLU的公式为(符号和课件中保持一致所以用了$a$和$x$) |
|
|
|
|
|
$$ |
|
|
|
|
|
a = \begin{cases} |
|
|
|
|
|
x,\quad & x>0 \\ |
|
|
|
|
|
0,\quad & x\leqslant 0 \\ |
|
|
|
|
|
\end{cases} |
|
|
|
|
|
$$ |
|
|
|
|
|
所以显然 |
|
|
|
|
|
$$ |
|
|
|
|
|
\frac{\mathrm{d}a}{\mathrm{d}x} = \begin{cases} |
|
|
|
|
|
1,\quad & x>0 \\ |
|
|
|
|
|
0,\quad & x\leqslant 0 \\ |
|
|
|
|
|
\end{cases} |
|
|
|
|
|
$$ |
|
|
|
|
|
\columnbreak |
|
|
|
|
|
\begin{minted}{python} |
|
|
|
|
|
class Relu: |
|
|
|
|
|
def __init__(self): |
|
|
|
|
|
self.mem = {} |
|
|
|
|
|
|
|
|
|
|
|
def forward(self, x): |
|
|
|
|
|
self.mem['x'] = x |
|
|
|
|
|
return np.where(x > 0, x, np.zeros_like(x)) |
|
|
|
|
|
|
|
|
|
|
|
def backward(self, grad_y): |
|
|
|
|
|
''' |
|
|
|
|
|
grad_y: same shape as x |
|
|
|
|
|
''' |
|
|
|
|
|
|
|
|
|
|
|
# ========== |
|
|
|
|
|
# todo '''请完成激活函数的梯度后传''' |
|
|
|
|
|
# ========== |
|
|
|
|
|
|
|
|
|
|
|
\end{minted} |
|
|
|
|
|
\end{multicols} |
|
|
|
|
|
\end{frame} |
|
|
|
|
|
|
|
|
|
|
|
\begin{frame}[fragile] |
|
|
|
|
|
|
|
|
|
|
|
\begin{multicols}{2} |
|
|
|
|
|
由于要计算梯度时要根据输入$x$是否大于0判断,所以这里使用了\mintinline{python}{self.mem}来记忆上次输入的$x$,在反向传播的时候就可以使用记忆的$x$来进行分支,这里可以利用 numpy的批量操作能力实现,\mintinline{python}{grad_y}是传入的梯度,返回的结果应为本层梯度与传入梯度的乘积: |
|
|
|
|
|
$$ |
|
|
|
|
|
return = \frac{\mathrm{d}a}{\mathrm{d}x} \times grad\_y=\begin{cases} |
|
|
|
|
|
grad\_y,\quad & x>0 \\ |
|
|
|
|
|
0,\quad & x\leqslant 0 \\ |
|
|
|
|
|
\end{cases} |
|
|
|
|
|
$$ |
|
|
|
|
|
因此写出代码如下: |
|
|
|
|
|
\columnbreak |
|
|
|
|
|
\begin{minted}{python} |
|
|
|
|
|
class Relu: |
|
|
|
|
|
def __init__(self): |
|
|
|
|
|
self.mem = {} |
|
|
|
|
|
|
|
|
|
|
|
def forward(self, x): |
|
|
|
|
|
self.mem['x'] = x |
|
|
|
|
|
return np.where(x > 0, x, np.zeros_like(x)) |
|
|
|
|
|
|
|
|
|
|
|
def backward(self, grad_y): |
|
|
|
|
|
''' |
|
|
|
|
|
grad_y: same shape as x |
|
|
|
|
|
''' |
|
|
|
|
|
|
|
|
|
|
|
# ========== |
|
|
|
|
|
# todo '''请完成激活函数的梯度后传''' |
|
|
|
|
|
return np.where(self.mem['x'] > 0, grad_y, np.zeros_like(grad_y)) |
|
|
|
|
|
# ========== |
|
|
|
|
|
|
|
|
|
|
|
\end{minted} |
|
|
|
|
|
\end{multicols} |
|
|
|
|
|
\mint{python}|return np.where(self.mem['x'] > 0, grad_y, np.zeros_like(grad_y))| |
|
|
|
|
|
\end{frame} |
|
|
|
|
|
|
|
|
|
|
|
\subsection{交叉熵损失+L2正则项} |
|
|
|
|
|
\begin{frame}[fragile] |
|
|
|
|
|
\begin{multicols}{2} |
|
|
|
|
|
交叉熵损失的函数为 |
|
|
|
|
|
$$ |
|
|
|
|
|
loss=\sum_{\text{每个类别}i} -y_i \log(\hat{y}_i) |
|
|
|
|
|
$$ |
|
|
|
|
|
L2正则项的损失为 |
|
|
|
|
|
$ |
|
|
|
|
|
\lambda \left\Vert W \right\Vert |
|
|
|
|
|
$,$\lambda$为系数,$W$为权重,距离用的是欧几里得距离,即 |
|
|
|
|
|
$$\displaystyle \sqrt{\sum_{W\text{中的每个参数}x} x^{2} }$$ |
|
|
|
|
|
|
|
|
|
|
|
这里有两层网络,也就是两层权重,所以 |
|
|
|
|
|
$$ |
|
|
|
|
|
L2 = \lambda_1 \left\Vert W_1 \right\Vert +\lambda_2 \left\Vert W_2 \right\Vert |
|
|
|
|
|
$$ |
|
|
|
|
|
\columnbreak |
|
|
|
|
|
\begin{minted}{python} |
|
|
|
|
|
def compute_loss(self, log_prob, labels): |
|
|
|
|
|
''' |
|
|
|
|
|
log_prob is the predicted probabilities |
|
|
|
|
|
labels is the ground truth |
|
|
|
|
|
Please return the loss |
|
|
|
|
|
''' |
|
|
|
|
|
|
|
|
|
|
|
# ========== |
|
|
|
|
|
# todo '''请完成多分类问题的损失计算 损失为: 交叉熵损失 + L2正则项''' |
|
|
|
|
|
# ========== |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
\end{minted} |
|
|
|
|
|
\end{multicols} |
|
|
|
|
|
\end{frame} |
|
|
|
|
|
|
|
|
|
|
|
\begin{frame}[fragile] |
|
|
|
|
|
\begin{multicols}{2} |
|
|
|
|
|
\mintinline{python}{log_prob}应该是希望传入已经经过$\log$计算的$\hat{y}$,但是在lab4.ipynb里发现其实是没有经过$\log$计算的\mintinline{python}{pred_y},这里还得自己计算$\log(\hat{y})$,但是$\log (\hat{y}_i)$由于在前向传播的时候计算过就提前缓存在\mintinline{python}{self.log_value}了。 |
|
|
|
|
|
|
|
|
|
|
|
\mintinline{python}{labels}|$y$和\mintinline{python}{self.log_value}|$\log(\hat{y})$是one-hot编码的,形状为[批大小,类别数],根据公式在类别数维度求和,所以是\mintinline{python}{axis=1}。注意还要在批大小维度求平均,即\mintinline{python}{.mean(0)}。 |
|
|
|
|
|
|
|
|
|
|
|
计算距离这里直接使用了\mintinline{python}{np.linalg.norm}。 |
|
|
|
|
|
\columnbreak |
|
|
|
|
|
\begin{minted}{python} |
|
|
|
|
|
def compute_loss(self, log_prob, labels): |
|
|
|
|
|
''' |
|
|
|
|
|
log_prob is the predicted probabilities |
|
|
|
|
|
labels is the ground truth |
|
|
|
|
|
Please return the loss |
|
|
|
|
|
''' |
|
|
|
|
|
|
|
|
|
|
|
# ========== |
|
|
|
|
|
# todo '''请完成多分类问题的损失计算 损失为: 交叉熵损失 + L2正则项''' |
|
|
|
|
|
return - np.sum(labels * self.log_value, axis=1).mean(0) + self.lambda1 * np.linalg.norm(self.W1) + self.lambda1 * np.linalg.norm(self.W2) |
|
|
|
|
|
# ========== |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
\end{minted} |
|
|
|
|
|
\end{multicols} |
|
|
|
|
|
\end{frame} |
|
|
|
|
|
|
|
|
|
|
|
\subsection{主模型的前向传播} |
|
|
|
|
|
|
|
|
|
|
|
\begin{frame}[fragile] |
|
|
|
|
|
\begin{multicols}{2} |
|
|
|
|
|
这里$x$的形状是[批大小,28,28],这里的两个28分别是图像高度和宽度,而且可以观察到\mintinline{python}{self.W1}的形状是[100, 785],但是$28\times 28=784$,说明需要把高度和宽度拉平后还需要拼接一个\mintinline{python}{np.ones}来替代偏置项的作用。即 |
|
|
|
|
|
\mint{python}|np.concatenate((x.reshape(x.shape[0], -1), np.ones((x.shape[0], 1))), axis=1)| |
|
|
|
|
|
|
|
|
|
|
|
在\mintinline{python}{Matmul.backward}的注释中可以看到\\ |
|
|
|
|
|
\mintinline{python}{x: shape(d, N)},所以拼接好之后还需要进行转置。 |
|
|
|
|
|
\columnbreak |
|
|
|
|
|
\begin{minted}{python} |
|
|
|
|
|
def forward(self, x): |
|
|
|
|
|
''' |
|
|
|
|
|
x is the input features |
|
|
|
|
|
Please return the predicted probabilities of x |
|
|
|
|
|
''' |
|
|
|
|
|
|
|
|
|
|
|
# ========== |
|
|
|
|
|
# todo '''请搭建一个MLP前馈神经网络 补全它的前向传播 MLP结构为FFN --> RELU --> FFN --> Softmax''' |
|
|
|
|
|
# ========== |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
\end{minted} |
|
|
|
|
|
\end{multicols} |
|
|
|
|
|
\end{frame} |
|
|
|
|
|
|
|
|
|
|
|
\begin{frame}[fragile] |
|
|
|
|
|
\begin{multicols}{2} |
|
|
|
|
|
在\mintinline{python}{Softmax.forward}的注释中可以看到\mintinline{python}{x: shape(N, c)},因此在进行Softmax操作前还需要再转置回来。 |
|
|
|
|
|
|
|
|
|
|
|
理论上这时候就可以直接返回了,不需要用到\mintinline{python}{self.log},$\log$是在计算交叉熵时才会用到的操作,但是在lab4.ipynb中非要先反向传播再计算损失,反向传播需要\mintinline{python}{self.log.backward},但这又需要先调用过\mintinline{python}{self.log.forward}才能把输入记忆到\mintinline{python}{self.mem}中,才能正确返回梯度。 |
|
|
|
|
|
|
|
|
|
|
|
那没办法,只能先调用一下\mintinline{python}{self.log.forward}把结果缓存起来。 |
|
|
|
|
|
\columnbreak |
|
|
|
|
|
\begin{minted}{python} |
|
|
|
|
|
def forward(self, x): |
|
|
|
|
|
''' |
|
|
|
|
|
x is the input features |
|
|
|
|
|
Please return the predicted probabilities of x |
|
|
|
|
|
''' |
|
|
|
|
|
|
|
|
|
|
|
# ========== |
|
|
|
|
|
# todo '''请搭建一个MLP前馈神经网络 补全它的前向传播 MLP结构为FFN --> RELU --> FFN --> Softmax''' |
|
|
|
|
|
y = np.concatenate((x.reshape(x.shape[0], -1), np.ones((x.shape[0], 1))), axis=1).T # 这形状真难弄 |
|
|
|
|
|
y = self.mul_h1.forward(self.W1, y) |
|
|
|
|
|
y = self.relu.forward(y) |
|
|
|
|
|
y = self.mul_h2.forward(self.W2, y).T |
|
|
|
|
|
y = self.softmax.forward(y) |
|
|
|
|
|
# print(y) |
|
|
|
|
|
# 唉没办法,非要先反向传播再计算损失,那只能把log的结果缓存起来了 |
|
|
|
|
|
self.log_value = self.log.forward(y) |
|
|
|
|
|
return y |
|
|
|
|
|
# ========== |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
\end{minted} |
|
|
|
|
|
\end{multicols} |
|
|
|
|
|
\end{frame} |
|
|
|
|
|
|
|
|
|
|
|
\subsection{主模型的反向传播} |
|
|
|
|
|
\begin{frame}[fragile] |
|
|
|
|
|
\begin{multicols}{2} |
|
|
|
|
|
前面的准备工作都实现了后,这里就很简单了,只需要逐层反向传播就行了。 |
|
|
|
|
|
|
|
|
|
|
|
注意交叉熵损失为 |
|
|
|
|
|
$$ |
|
|
|
|
|
loss=\sum_{\text{每个类别}i} -y_i \log(\hat{y}_i) |
|
|
|
|
|
$$ |
|
|
|
|
|
所以 |
|
|
|
|
|
$$ |
|
|
|
|
|
\frac{\mathrm{d}loss}{\mathrm{d}\log(\hat{y}_i)}= -y_i |
|
|
|
|
|
$$ |
|
|
|
|
|
因此首个梯度为 \mintinline{python}{-label},后续的反向传播就交给各层的\mintinline{python}{backward}函数了。 |
|
|
|
|
|
\columnbreak |
|
|
|
|
|
\begin{minted}{python} |
|
|
|
|
|
def backward(self, label): |
|
|
|
|
|
''' |
|
|
|
|
|
label is the ground truth |
|
|
|
|
|
Please compute the gradients of self.W1 and self.W2 |
|
|
|
|
|
''' |
|
|
|
|
|
|
|
|
|
|
|
# ========== |
|
|
|
|
|
# todo '''补全该前馈神经网络的后向传播算法''' |
|
|
|
|
|
# ========== |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
\end{minted} |
|
|
|
|
|
\end{multicols} |
|
|
|
|
|
\end{frame} |
|
|
|
|
|
|
|
|
|
|
|
\begin{frame}[fragile] |
|
|
|
|
|
\begin{multicols}{2} |
|
|
|
|
|
仍然要注意在Softmax反向传播后需要转置一下。 |
|
|
|
|
|
|
|
|
|
|
|
\mintinline{python}{Matmul.backward}返回的结果为\mintinline{python}{return grad_x, grad_W},这也提示了全连接层要保留对输入和对参数的求导,对输入的求导用来继续反向传播,对参数的求导用来更新参数。 |
|
|
|
|
|
\columnbreak |
|
|
|
|
|
\begin{minted}{python} |
|
|
|
|
|
def backward(self, label): |
|
|
|
|
|
''' |
|
|
|
|
|
label is the ground truth |
|
|
|
|
|
Please compute the gradients of self.W1 and self.W2 |
|
|
|
|
|
''' |
|
|
|
|
|
|
|
|
|
|
|
# ========== |
|
|
|
|
|
# todo '''补全该前馈神经网络的后向传播算法''' |
|
|
|
|
|
temp = self.log.backward(-label) |
|
|
|
|
|
temp = self.softmax.backward(temp).T |
|
|
|
|
|
temp, self.gradient2 = self.mul_h2.backward(temp) |
|
|
|
|
|
temp = self.relu.backward(temp) |
|
|
|
|
|
temp, self.gradient1 = self.mul_h1.backward(temp) |
|
|
|
|
|
# ========== |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
\end{minted} |
|
|
|
|
|
\end{multicols} |
|
|
|
|
|
\end{frame} |
|
|
|
|
|
|
|
|
|
|
|
\subsection{更新参数} |
|
|
|
|
|
\begin{frame}[fragile] |
|
|
|
|
|
\begin{multicols}{2} |
|
|
|
|
|
更新参数只需要按照公式即可,不要忘记L2正则项的梯度,以下以$W_1$为例,$W_2$同理。 |
|
|
|
|
|
|
|
|
|
|
|
$W_1^{(i,j)}$表示$W_1$的第$i$行$j$列的元素,lr表示learning rate,即学习率。 |
|
|
|
|
|
$$ |
|
|
|
|
|
\frac{\mathrm{d}L2}{\mathrm{d}W_1^{(i,j)}}= \frac{2 \lambda_1 W_1^{(i,j)}}{\left\Vert W_1 \right\Vert } |
|
|
|
|
|
$$ |
|
|
|
|
|
|
|
|
|
|
|
$$ |
|
|
|
|
|
W_1 = W_1 - \left( \frac{\mathrm{d}loss}{\mathrm{d}W_1}+\frac{\mathrm{d}L2}{\mathrm{d}W_1} \right) \times lr |
|
|
|
|
|
$$ |
|
|
|
|
|
\columnbreak |
|
|
|
|
|
\begin{minted}{python} |
|
|
|
|
|
def update(self): |
|
|
|
|
|
''' |
|
|
|
|
|
Please update self.W1 and self.W2 |
|
|
|
|
|
''' |
|
|
|
|
|
|
|
|
|
|
|
# ========== |
|
|
|
|
|
# todo '''更新该前馈神经网络的参数''' |
|
|
|
|
|
self.W1 -= (self.gradient1 + 2 * self.lambda1 * self.W1 / np.linalg.norm(self.W1)) * self.lr |
|
|
|
|
|
self.W2 -= (self.gradient2 + 2 * self.lambda1 * self.W2 / np.linalg.norm(self.W2)) * self.lr |
|
|
|
|
|
# ========== |
|
|
|
|
|
|
|
|
|
|
|
\end{minted} |
|
|
|
|
|
\end{multicols} |
|
|
|
|
|
\end{frame} |
|
|
|
|
|
|
|
|
|
|
|
\begin{frame} |
|
|
|
|
|
\zihao{-4}\centering{感谢观看!} |
|
|
|
|
|
\end{frame} |
|
|
|
|
|
\end{document} |