【paddle|动手从头实现LSTM】从头实现了单层LSTM,多层LSTM和多层双向LSTM。
其中多层LSTM:下一层LSTM会将上层LSTM的隐藏状态作为输入,
双向LSTM:将输入数据的时序反过来,最后再将得到的隐藏状态的时序反过来与正向得到的隐藏状态进行拼接。
实现代码如下:
import math
import paddle
import paddle.nn as nn
import torch.nn.functional as F
import numpy as np#自定义LSTM实现
class CustomLSTM(nn.Layer):
def __init__(self, input_sz, hidden_sz,num_layers=2,bias=True,bidirectional=False):
super().__init__()
self.input_size = input_sz
self.hidden_size = hidden_sz
self.bias = bias
self.num_layers = num_layers
self.bidirectional = bidirectional
self.num_directions = 2 if bidirectional else 1
# self._all_weights = {}
self.param_names = []
for layer in range(self.num_layers):
self.param_names.append([])
for direction in range(self.num_directions):
self.input_size = self.input_size if layer == 0 else self.hidden_size * self.num_directions
# i_t
W = self.create_parameter([self.input_size, self.hidden_size * 4])
U = self.create_parameter([self.hidden_size, self.hidden_size * 4])
b = self.create_parameter([self.hidden_size * 4])# print(self.W_c)
layer_params = (W, U, b)suffix = '_reverse' if direction == 1 else ''
self.param_name = ['weight_W{}{}', 'weight_U{}{}']
if bias:
self.param_name += ['bias_{}{}']
self.param_name = [x.format(layer, suffix) for x in self.param_name]
for name, param in zip(self.param_name, layer_params):
setattr(self, name, param)
self.param_names[layer].append(self.param_name)#self.init_weights()
self.all_weights = [[[getattr(self, weight) for weight in weights]
for weights in weights_layer] for weights_layer in self.param_names]def forward(self, x,
init_states=None):
"""Assumes x is of shape (batch, sequence, feature)"""
batch_size, seq_sz, _ = x.shape
if init_states is None:
num_directions = 2 if self.bidirectional else 1
h_t, c_t = (paddle.zeros((self.num_layers * num_directions,batch_size,self.hidden_size)),
paddle.zeros((self.num_layers * num_directions,batch_size,self.hidden_size)))
else:
h_t, c_t = init_statesfor layer in range(self.num_layers):
hidden_seq = []
hidden_seq_reverse = []
self.weight_layer = self.all_weights[layer]
for direction in range(self.num_directions):
# self.param_name = self.param_names[layer]
self.weight = self.weight_layer[direction]
HS = self.hidden_size
h_t, c_t = h_t[0].unsqueeze(0),c_t[0].unsqueeze(0)
for t in range(seq_sz):
x_t = x[:, t, :]
# batch the computations into a single matrix multiplication# gates = x_t @ getattr(self,self.param_name[0]) + h_t @ getattr(self,self.param_name[1]) \
#+ getattr(self,self.param_name[2])
if self.bias:
gates = x_t @ self.weight[0] + h_t @ self.weight[1] \
+ self.weight[2]
else:
gates = x_t @ self.weight[0] + h_t @ self.weight[1]gates = gates[0]
i_t, f_t, g_t, o_t = (
paddle.nn.functional.sigmoid(gates[:, :HS]),# input
paddle.nn.functional.sigmoid(gates[:, HS:HS * 2]),# forget
paddle.tanh(gates[:, HS * 2:HS * 3]),
paddle.nn.functional.sigmoid(gates[:, HS * 3:]),# output
)
c_t = f_t * c_t + i_t * g_t
h_t = o_t * paddle.tanh(c_t)if direction == 0:
if isinstance(hidden_seq, list):
hidden_seq = h_t[0].unsqueeze(1)
else:
hidden_seq = paddle.concat((hidden_seq, h_t[0].unsqueeze(1)), axis=1)if direction == 1:
if isinstance(hidden_seq_reverse, list):
hidden_seq_reverse = h_t[0].unsqueeze(1)
else:
hidden_seq_reverse = paddle.concat((hidden_seq_reverse, h_t[0].unsqueeze(1)), axis=1)
x = paddle.to_tensor(x.detach().cpu().numpy()[:,::-1,:].copy())
if direction == 1:
hidden_seq_reverse = paddle.to_tensor(
hidden_seq_reverse.detach().cpu().numpy()[:, ::-1, :].copy())
hidden_seq = paddle.concat((hidden_seq, hidden_seq_reverse),axis=2)
x = hidden_seq
return hidden_seq, (h_t, c_t)
具体资源下载:从头实现LSTM,从开发到使用和比较完整代码
推荐阅读
- C语言学习|第十一届蓝桥杯省赛 大学B组 C/C++ 第一场
- pytorch|使用pytorch从头实现多层LSTM
- 人工智能|干货!人体姿态估计与运动预测
- 推荐系统论文进阶|CTR预估 论文精读(十一)--Deep Interest Evolution Network(DIEN)
- Python专栏|数据分析的常规流程
- pytorch|YOLOX 阅读笔记
- 读书笔记|《白话大数据和机器学习》学习笔记1
- Pytorch学习|sklearn-SVM 模型保存、交叉验证与网格搜索
- 前沿论文|论文精读(Neural Architecture Search without Training)