我在自己学习写人工神经网络的结构,代码放在最后面。
但是运行出来的结果报错,显示:
ValueError: cannot reshape array of size 784 into shape (32,1,newaxis)
另外就是,我试着在感觉有问题的地方,控制台输出检查了一下,发现测试部分的循环到了最后一次,会把batch数量减半,我找不到原因。x_train和 x_test使用的卷积处理函数都是相同的,输入的数据格式也是相同的。
x测试 (32, 28, 28)
x测试 (32, 28, 28)
x测试 (32, 28, 28)
x测试 (16, 28, 28)
最后还有两个小问题,一个是目前前面训练的过程,我感觉误差极大,可能是目前模型太简单只有一层的缘故,我想问一下有什么方法可以提高训练的效率吗,是不是增多神经元层数就会好些?
另一个问题是,我为了符合tf.nn.conv2d()函数的输入格式,采用了tf.squeeze()来处理张量维度的方法是否正确,会不会对输入的数据造成影响?
def output(input, get1, get2, batch):
x = tf.expand_dims(input, 3)
output = tf.nn.conv2d(x, get1, strides=[1, 2, 2, 1], padding='SAME')
output = tf.nn.conv2d(output, get2, strides=[1, 2, 2, 1], padding='SAME')
output = tf.squeeze(output, 3)
output = np.reshape(output, (batch, 1, -1)) # 处理和输出的数据,(组数,1,-1)表示一行与n列
output = tf.cast(output, tf.float64)
# print(output)
return output
程序的代码
import os
from sklearn import datasets
from matplotlib import pyplot as plt
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorflow as tf
from keras import models
import numpy as np
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
x_train = tf.cast(x_train, tf.float64)
y_train = tf.cast(y_train, tf.int32)
x_test = tf.cast(x_test, tf.float64)
y_test = tf.cast(y_test, tf.int32)
train_db = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(32)
test_db = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)
# 特征类型
get_1 = tf.constant(value=np.ones((3, 3), dtype=np.float64), shape=(3, 3, 1, 1))
get_2 = tf.constant(value=np.eye(3, dtype=np.float64), shape=(3, 3, 1, 1))
# 迭代次数
epoch = 50
# 学习率
lr = 0.1
# 每轮分成4个step,loss_all由于存放每轮4个loss的和
loss_all = 0
# 记录每一轮的损失函数于列表
train_loss_results = []
# 记录测试时候的准确率于列表
test_acc = []
# 第一层权重、偏置,并且设置可以更新
w1 = tf.Variable(tf.random.truncated_normal([49, 10], stddev=0.1, seed=1, dtype=np.float64))
b1 = tf.Variable(tf.random.truncated_normal([10], stddev=0.1, seed=1, dtype=np.float64))
# 特征输出函数
def output(input, get1, get2, batch):
x = tf.expand_dims(input, 3)
output = tf.nn.conv2d(x, get1, strides=[1, 2, 2, 1], padding='SAME')
output = tf.nn.conv2d(output, get2, strides=[1, 2, 2, 1], padding='SAME')
output = tf.squeeze(output, 3)
output = np.reshape(output, (batch, 1, -1)) # 处理和输出的数据,(组数,1,-1)表示一行与n列
output = tf.cast(output, tf.float64)
# print(output)
return output
print(x_train.shape)
print(x_test.shape)
# 训练阶段
print('训练开始')
for epoch in range(epoch):
for step1, (x_train, y_train) in enumerate(train_db):
print('x训练', x_train.shape)
# print(step, x_train.shape, y_train.shape)
# print('.......分割线........')
# print(output(x_train, get_1, get_2, 32).shape)
# print('.......分割线........')
# print((tf.matmul(output(x_train, get_1, get_2, 32), w1) + b1).shape)
with tf.GradientTape() as tape:
x = output(x_train, get_1, get_2, 32)
y = tf.matmul(x, w1) + b1
y = tf.nn.softmax(y)
y_ = tf.one_hot(y_train, depth=10)
y_ = tf.cast(y_, tf.float64)
# 计算神经网络误差
loss = tf.reduce_mean(tf.square(y_ - y))
loss_all += loss.numpy()
grads = tape.gradient(loss, [w1, b1]) # 求loss关于【w1,b1】的导数,也就是梯度
# 实现w1、b1的自更新,更新公式为w1 = w1 - lr * w1_gard
w1.assign_sub(lr * grads[0])
b1.assign_sub(lr * grads[1])
print("Epoch {},lodd: {}".format(epoch, loss_all / 4)) # 打印每个epoch的平均误差
train_loss_results.append(loss_all / 4) # 添加入list中,方便绘制曲线
loss_all = 0
total_correct, total_number = 0, 0
for x_test, y_test in test_db: # 测试
print('x测试',x_test.shape)
x = output(x_test, get_1, get_2, 32)
y = tf.matmul(x, w1) + b1
y = tf.nn.softmax(y) # 转化为预测概率矩阵
# print(y.shape)
y = tf.squeeze(y, 1)
# print(y.shape)
pred = tf.argmax(y, axis=1)
# print(pred.shape)
pred = tf.cast(pred, dtype=y_test.dtype)
correct = tf.cast(tf.equal(pred, y_test), dtype=tf.int32)
correct = tf.reduce_sum(correct)
total_correct += int(correct)
total_number += x_test.shape[0]
acc = total_correct / total_number
test_acc.append(acc)
print("Test_acc", acc)
print("............................")
# 绘制损失曲线
plt.title('Loss Curve')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.plot(train_loss_results, label='$Loss$')
plt.legend()
plt.show()
# 绘制准确率曲线
plt.title('Acc Curve')
plt.xlabel('Epoch')
plt.ylabel('Acc')
plt.plot(train_loss_results, label='$Accuracy$')
plt.legend()
plt.show()