撒哈拉牧码人 2020-02-25 17:38 采纳率: 0%
浏览 1109

求助,Tensorflow搭建AlexNet模型是训练集验证集的LOSS不收敛

如题,代码如下,请大佬赐教

# coding:utf-8
import tensorflow as tf
import numpy as np
import time
import os
import cv2
import matplotlib.pyplot as plt

def get_file(file_dir):
    images = []

    labels = []
    for root, sub_folders, files in os.walk(file_dir):

        for name in files:

            images.append(os.path.join(root, name))
            letter = name.split('.')[0]

        # 对标签进行分类
            if letter == 'cat':
                labels = np.append(labels,  [0])
            else:
                labels = np.append(labels,  [1])

    # shuffle(随机打乱)
    temp = np.array([images, labels])
    temp = temp.transpose()  # 建立images 与 labels 之间关系, 以矩阵形式展现
    np.random.shuffle(temp)
    image_list = list(temp[:, 0])
    label_list = list(temp[:, 1])
    label_list = [int(float(i)) for i in label_list]
    print(image_list)
    print(label_list)
    return image_list, label_list  # 返回文件名列表


def _parse_function(image_list, labels_list):
    image_contents = tf.read_file(image_list)
    image = tf.image.decode_jpeg(image_contents, channels=3)
    image = tf.cast(image, tf.float32)
    image = tf.image.resize_image_with_crop_or_pad(image, 227, 227)  # 剪裁或填充处理

    image = tf.image.per_image_standardization(image)  # 图片标准化
    labels = labels_list
    return image, labels


# 将需要读取的数据集地址转换为专用格式
def get_batch(image_list, labels_list, batch_size):
    image_list = tf.cast(image_list, tf.string)
    labels_list = tf.cast(labels_list, tf.int32)
    dataset = tf.data.Dataset.from_tensor_slices((image_list, labels_list))  # 创建dataset

    dataset = dataset.repeat()  # 无限循环
    dataset = dataset.map(_parse_function)
    dataset = dataset.batch(batch_size)
    dataset = dataset.make_one_shot_iterator()
    return dataset


# 正则化处理数据集
def batch_norm(inputs, is_training, is_conv_out=True, decay=0.999):
    scale = tf.Variable(tf.ones([inputs.get_shape()[-1]]))
    beta = tf.Variable(tf.zeros([inputs.get_shape()[-1]]))
    pop_mean = tf.Variable(tf.zeros([inputs.get_shape()[-1]]), trainable=False)
    pop_var = tf.Variable(tf.ones([inputs.get_shape()[-1]]), trainable=False)

    def batch_norm_train():
        if is_conv_out:
            batch_mean, batch_var = tf.nn.moments(inputs, [0, 1, 2])  # 求均值及方差
        else:
            batch_mean, batch_var = tf.nn.moments(inputs, [0])

        train_mean = tf.assign(pop_mean, pop_mean * decay + batch_mean * (1 - decay))
        train_var = tf.assign(pop_var, pop_var * decay + batch_var * (1 - decay))

        with tf.control_dependencies([train_mean, train_var]):  # 在train_mean, train_var计算完条件下继续
            return tf.nn.batch_normalization(inputs, batch_mean, batch_var, beta, scale, 0.001)

    def batch_norm_test():
        return tf.nn.batch_normalization(inputs, pop_mean, pop_var, beta, scale, 0.001)

    batch_normalization = tf.cond(is_training, batch_norm_train, batch_norm_test)
    return batch_normalization


# 建立模型
learning_rate = 1e-4
training_iters = 200
batch_size = 50
display_step = 5
n_classes = 2
n_fc1 = 4096
n_fc2 = 2048

# 构建模型
x = tf.placeholder(tf.float32, [None, 227, 227, 3])
y = tf.placeholder(tf.int32, [None])
is_training = tf.placeholder(tf.bool)

# 字典模式管理权重与偏置参数
W_conv = {
    'conv1': tf.Variable(tf.truncated_normal([11, 11, 3, 96], stddev=0.0001)),
    'conv2': tf.Variable(tf.truncated_normal([5, 5, 96, 256], stddev=0.01)),
    'conv3': tf.Variable(tf.truncated_normal([3, 3, 256, 384], stddev=0.01)),
    'conv4': tf.Variable(tf.truncated_normal([3, 3, 384, 384], stddev=0.01)),
    'conv5': tf.Variable(tf.truncated_normal([3, 3, 384, 256], stddev=0.01)),
    'fc1': tf.Variable(tf.truncated_normal([6 * 6 * 256, n_fc1], stddev=0.1)),
    'fc2': tf.Variable(tf.truncated_normal([n_fc1, n_fc2], stddev=0.1)),
    'fc3': tf.Variable(tf.truncated_normal([n_fc2, n_classes], stddev=0.1)),
}

b_conv = {
    'conv1': tf.Variable(tf.constant(0.0, dtype=tf.float32, shape=[96])),
    'conv2': tf.Variable(tf.constant(0.1, dtype=tf.float32, shape=[256])),
    'conv3': tf.Variable(tf.constant(0.1, dtype=tf.float32, shape=[384])),
    'conv4': tf.Variable(tf.constant(0.1, dtype=tf.float32, shape=[384])),
    'conv5': tf.Variable(tf.constant(0.1, dtype=tf.float32, shape=[256])),
    'fc1': tf.Variable(tf.constant(0.1, dtype=tf.float32, shape=[n_fc1])),
    'fc2': tf.Variable(tf.constant(0.1, dtype=tf.float32, shape=[n_fc2])),
    'fc3': tf.Variable(tf.constant(0.0, dtype=tf.float32, shape=[n_classes])),
}

x_image = tf.reshape(x, [-1, 227, 227, 3])
# 卷积层,池化层,LRN层编写
# 第一层卷积层
# 卷积层1
conv1 = tf.nn.conv2d(x_image, W_conv['conv1'], strides=[1, 4, 4, 1], padding='VALID')
conv1 = tf.nn.bias_add(conv1, b_conv['conv1'])
conv1 = batch_norm(conv1, is_training)
#conv1 = tf.layers.batch_normalization(conv1, training=is_training)
conv1 = tf.nn.relu(conv1)
# 池化层1
pool1 = tf.nn.avg_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID')
# LRN层
norm1 = tf.nn.lrn(pool1, 5, bias=1.0, alpha=0.001 / 9.0, beta=0.75)

# 第二层卷积
# 卷积层2
conv2 = tf.nn.conv2d(norm1, W_conv['conv2'], strides=[1, 1, 1, 1], padding='SAME')
conv2 = tf.nn.bias_add(conv2, b_conv['conv2'])
#conv2 = tf.layers.batch_normalization(conv2, training=is_training)
conv2 = batch_norm(conv2,  is_training)
conv2 = tf.nn.relu(conv2)
# 池化层2
pool2 = tf.nn.avg_pool(conv2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID')
# LRN层
#norm2 = tf.nn.lrn(pool2, 5, bias=1.0, alpha=0.001 / 9.0, beta=0.75)

# 第三层卷积
# 卷积层3
conv3 = tf.nn.conv2d(pool2, W_conv['conv3'], strides=[1, 1, 1, 1], padding='SAME')
conv3 = tf.nn.bias_add(conv3, b_conv['conv3'])
#conv3 = tf.layers.batch_normalization(conv3, training=is_training)
conv3 = batch_norm(conv3, is_training)
conv3 = tf.nn.relu(conv3)

# 第四层卷积
# 卷积层4
conv4 = tf.nn.conv2d(conv3, W_conv['conv4'], strides=[1, 1, 1, 1], padding='SAME')
conv4 = tf.nn.bias_add(conv4, b_conv['conv4'])
#conv4 = tf.layers.batch_normalization(conv4, training=is_training)
conv4 = batch_norm(conv4,  is_training)
conv4 = tf.nn.relu(conv4)

# 第五层卷积
# 卷积层5
conv5 = tf.nn.conv2d(conv4, W_conv['conv5'], strides=[1, 1, 1, 1], padding='SAME')
conv5 = tf.nn.bias_add(conv5, b_conv['conv5'])
#conv5 = tf.layers.batch_normalization(conv5, training=is_training)
conv5 = batch_norm(conv5,  is_training)
conv5 = tf.nn.relu(conv5)
# 池化层5
pool5 = tf.nn.avg_pool(conv5, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='VALID')

# 第六层全连接
reshape = tf.reshape(pool5, [-1, 6 * 6 * 256])
#fc1 = tf.matmul(reshape, W_conv['fc1'])
fc1 = tf.add(tf.matmul(reshape, W_conv['fc1']), b_conv['fc1'])
#fc1 = tf.layers.batch_normalization(fc1, training=is_training)
fc1 = batch_norm(fc1, is_training, False)
fc1 = tf.nn.relu(fc1)
#fc1 = tf.nn.dropout(fc1, 0.5)

# 第七层全连接
#fc2 = tf.matmul(fc1, W_conv['fc2'])
fc2 = tf.add(tf.matmul(fc1, W_conv['fc2']), b_conv['fc2'])
#fc2 = tf.layers.batch_normalization(fc2, training=is_training)
fc2 = batch_norm(fc2, is_training, False)
fc2 = tf.nn.relu(fc2)
#fc2 = tf.nn.dropout(fc2, 0.5)

# 第八层全连接(分类层)
yop = tf.add(tf.matmul(fc2, W_conv['fc3']), b_conv['fc3'])

# 损失函数
#y = tf.stop_gradient(y)
loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=yop, labels=y))

optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss)

#update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
#with tf.control_dependencies(update_ops):  # 保证train_op在update_ops执行之后再执行。
    #train_op = optimizer.minimize(loss)

# 评估模型
correct_predict = tf.nn.in_top_k(yop, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct_predict, tf.float32))
init = tf.global_variables_initializer()


def onehot(labels):  # 独热编码表示数据
    n_sample = len(labels)
    n_class = max(labels) + 1
    onehot_labels = np.zeros((n_sample, n_class))
    onehot_labels[np.arange(n_sample), labels] = 1  # python迭代方法,将每一行对应个置1
    return onehot_labels

save_model = './/model//my-model.ckpt'
# 模型训练
def train(epoch):
    with tf.Session() as sess:
        sess.run(init)

        saver = tf.train.Saver(var_list=tf.global_variables())

        c = []
        b = []
        max_acc = 0
        start_time = time.time()

        step = 0
        global dataset


        dataset = dataset.get_next()
        for i in range(epoch):
            step = i
            image, labels = sess.run(dataset)



            sess.run(optimizer, feed_dict={x: image, y: labels, is_training: True})  # 训练一次
            #if i % 5 == 0:
            loss_record = sess.run(loss, feed_dict={x: image, y: labels, is_training: True})  # 记录一次
            #predict = sess.run(yop, feed_dict={x: image, y: labels, is_training: True})
            acc = sess.run(accuracy, feed_dict={x: image, y: labels, is_training:  True})


            print("step:%d, now the loss is %f" % (step, loss_record))
            #print(predict[0])
            print("acc : %f" % acc)

            c.append(loss_record)
            b.append(acc)
            end_time = time.time()
            print('time:', (end_time - start_time))
            start_time = end_time
            print('-----------%d opench is finished ------------' % (i / 5))
            #if acc > max_acc:
            #    max_acc = acc
            #    saver.save(sess, save_model, global_step=i + 1)
        print('Optimization Finished!')

        #saver.save(sess, save_model)
        print('Model Save Finished!')

        plt.plot(c)
        plt.plot(b)
        plt.xlabel('iter')
        plt.ylabel('loss')
        plt.title('lr=%f, ti=%d, bs=%d' % (learning_rate, training_iters, batch_size))
        plt.tight_layout()
        plt.show()

X_train, y_train = get_file("D://cat_and_dog//cat_dog_train//cat_dog")  # 返回为文件地址
dataset = get_batch(X_train, y_train, 100)
train(100)

数据文件夹为猫狗大战那个25000个图片的文件,不加入正则表达层的时候训练集loss会下降,但是acc维持不变,加入__batch norm__或者__tf.layers.batch__normalization 训练集和验证机的loss都不收敛了

  • 写回答

1条回答 默认 最新

  • CSDN-Ada助手 CSDN-AI 官方账号 2022-10-25 19:24
    关注
    不知道你这个问题是否已经解决, 如果还没有解决的话:

    如果你已经解决了该问题, 非常希望你能够分享一下解决方案, 写成博客, 将相关链接放在评论区, 以帮助更多的人 ^-^
    评论

报告相同问题?

悬赏问题

  • ¥15 如何用stata画出文献中常见的安慰剂检验图
  • ¥15 c语言链表结构体数据插入
  • ¥40 使用MATLAB解答线性代数问题
  • ¥15 COCOS的问题COCOS的问题
  • ¥15 FPGA-SRIO初始化失败
  • ¥15 MapReduce实现倒排索引失败
  • ¥15 ZABBIX6.0L连接数据库报错,如何解决?(操作系统-centos)
  • ¥15 找一位技术过硬的游戏pj程序员
  • ¥15 matlab生成电测深三层曲线模型代码
  • ¥50 随机森林与房贷信用风险模型