import numpy as np
# 定义sigmoid函数
def sigmoid(x):
return 1 / (1 + np.exp(-x))
class RNN:
def __init__(self, input_size, hidden_size, output_size):
# 设定超参数
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
# 初始化权重和偏置
self.Wxh = np.random.randn(hidden_size, input_size) * 0.01 # 输入到隐藏层的权重
self.Whh = np.random.randn(hidden_size, hidden_size) * 0.01 # 隐藏层到隐藏层的权重
self.Why = np.random.randn(output_size, hidden_size) * 0.01 # 隐藏层到输出层的权重
self.bh = np.zeros((hidden_size, 1)) # 隐藏层的偏置
self.by = np.zeros((output_size, 1)) # 输出层的偏置
def forward(self, inputs):
# 初始化隐藏状态和输出
self.h = np.zeros((self.hidden_size, 1))
self.outputs = []
for x in inputs:
# 更新隐藏状态
self.h = np.tanh(np.dot(self.Wxh, x) + np.dot(self.Whh, self.h) + self.bh)
# 计算输出
y = np.dot(self.Why, self.h) + self.by
# 应用sigmoid激活函数
output = sigmoid(y)
self.outputs.append(output)
return self.outputs
def backward(self, inputs, targets, learning_rate=0.1):
# 初始化梯度
dWxh = np.zeros_like(self.Wxh)
dWhh = np.zeros_like(self.Whh)
dWhy = np.zeros_like(self.Why)
dbh = np.zeros_like(self.bh)
dby = np.zeros_like(self.by)
dh_next = np.zeros_like(self.h)
for i in reversed(range(len(inputs))):
# 计算输出误差
dy = self.outputs[i] - targets[i]
# 计算输出层的梯度
dWhy += np.dot(dy, self.h.T)
dby += dy
# 计算隐藏层的误差
dh = np.dot(self.Why.T, dy) + dh_next
# 应用tanh的导数
dh_raw = (1 - self.h ** 2) * dh
# 计算隐藏层的梯度
dWxh += np.dot(dh_raw, inputs[i].T)
dWhh += np.dot(dh_raw, self.h.T)
dbh += dh_raw
# 更新dh_next
dh_next = np.dot(self.Whh.T, dh_raw)
for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
np.clip(dparam, -5, 5, out=dparam) # 防止梯度爆炸
# 更新权重和偏置
self.Wxh -= learning_rate * dWxh
self.Whh -= learning_rate * dWhh
self.Why -= learning_rate * dWhy
self.bh -= learning_rate * dbh
self.by -= learning_rate * dby
# 测试代码
# 定义数据和标签
inputs = [np.array([[1], [0], [1]]), np.array([[0], [1], [0]])]
targets = [np.array([[1]]), np.array([[0]])]
input_size = inputs[0].shape[0]
hidden_size = 25
output_size = targets[0].shape[0]
# 创建RNN模型,并进行训练
rnn = RNN(input_size, hidden_size, output_size)
for epoch in range(1000):
outputs = rnn.forward(inputs)
loss = np.mean((np.array(outputs)-np.array(targets)) ** 2)
rnn.backward(inputs, targets)
if (epoch + 1) % 100 == 0:
print("次数:", epoch + 1, "误差:", loss)
# 在新数据上进行预测
new_input = np.array([[1], [1], [1]])
output = rnn.forward([new_input])
print("输入:", new_input.flatten())
print("输出:", output)
简单的GRU实例代码
点赞
收藏