类型一(通过自动求导来优化模型参数)
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import optimizers, datasets
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import os
提取mnist数据集
def mnist_dataset():
(x, y), (x_test, y_test) = datasets.mnist.load_data()
x_train,x_valid,y_train,y_valid = train_test_split(x, y, test_size=0.2) #划分验证集
#Normalize归一化
x_train = tf.cast(x_train/255.0, dtype=tf.float32)
x_valid = tf.cast(x_valid/255.0, dtype=tf.float32)
x_test = tf.cast(x_test/255.0, dtype=tf.float32)
#增加维度:( , , )-->( , , , )
x_train = tf.expand_dims(x_train, axis=3)
x_valid = tf.expand_dims(x_valid, axis=3)
x_test = tf.expand_dims(x_test, axis=3)
#对标签数据进行独热编码
y_train = tf.one_hot(y_train, depth=10, dtype=tf.float32)
y_valid = tf.one_hot(y_valid, depth=10, dtype=tf.float32)
y_test = tf.one_hot(y_test, depth=10, dtype=tf.float32)
return (x_train, y_train), (x_valid, y_valid), (x_test, y_test)
#定义模型
class Convolution_NN(keras.Model):
def __init__(self):
super(Convolution_NN, self).__init__() # super(): https://wiki.jikexueyuan.com/project/explore-python/Class/super.html
self.L1_conv = Conv2D(filters=10, kernel_size=(5, 5), activation='relu', padding='same')
self.L2_conv = Conv2D(filters=10, kernel_size=(5, 5), activation='relu', padding='same')
self.pool = MaxPooling2D(pool_size=(2, 2), strides=2)
self.flat = Flatten()
self.dense1 = Dense(100, activation='tanh')
self.dense2 = Dense(10, activation='softmax')
def call(self, inputs):
h1 = self.L1_conv(inputs)
h1_pool = self.pool(h1)
h2 = self.L2_conv(h1_pool)
h2_pool = self.pool(h2)
flat_h = self.flat(h2_pool)
dense1 = self.dense1(flat_h)
logits = self.dense2(dense1)
return logits
#定义交叉熵损失函数
def compute_loss(logits, labels):
return tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels, logits))
#定义预测准确率函数
def compute_accuracy(logits, labels):
predictions = tf.argmax(logits, axis=1)
labels = tf.argmax(labels, axis=1)
return tf.reduce_mean(tf.cast(tf.equal(predictions, labels), tf.float32))
#参数优化
def train_one_step(model, optimizer, x, y):
with tf.GradientTape() as tape:
logits = model(x)
loss = compute_loss(logits, y)
#compute gradient
grads = tape.gradient(loss, model.trainable_variables)
#update to weights
optimizer.apply_gradients(zip(grads, model.trainable_variables))
#------------------------------
if __name__ == '__main__':
(x_train, y_train), (x_valid, y_valid), (x_test, y_test) = mnist_dataset()
#设置训练超参数
training_epochs = 20 #训练轮数
batch_size = 50 #单次训练的样本数(批次的大小) Mini-Batch优化
learning_rate = 0.001 #学习率
model = Convolution_NN()
optimizer = optimizers.Adam(learning_rate=learning_rate)
steps = int(x_train.shape[0]/batch_size) #一轮训练的批次
for epoch in range(training_epochs):
for step in range(steps):
X = x_train[step*batch_size:(step+1)*batch_size]
Y = y_train[step*batch_size:(step+1)*batch_size]
train_one_step(model, optimizer, X, Y)
类型二(通过tf的高阶API-Keras来训练模型参数)
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import optimizers, datasets
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import os
#提取mnist数据集
def mnist_dataset():
(x_train, y_train), (x_test, y_test) = datasets.mnist.load_data()
#Normalize归一化
x_train = tf.cast(x_train/255.0, dtype=tf.float32)
x_test = tf.cast(x_test/255.0, dtype=tf.float32)
#增加维度:( , , )-->( , , , )
x_train = tf.expand_dims(x_train, axis=3)
x_test = tf.expand_dims(x_test, axis=3)
#对标签数据进行独热编码
y_train = tf.one_hot(y_train, depth=10, dtype=tf.float32)
y_test = tf.one_hot(y_test, depth=10, dtype=tf.float32)
return (x_train, y_train), (x_test, y_test)
#定义模型
class Convolution_NN(keras.Model):
def __init__(self):
super(Convolution_NN, self).__init__() # super(): https://wiki.jikexueyuan.com/project/explore-python/Class/super.html
self.L1_conv = Conv2D(filters=10, kernel_size=(5, 5), activation='relu', padding='same')
self.L2_conv = Conv2D(filters=10, kernel_size=(5, 5), activation='relu', padding='same')
self.pool = MaxPooling2D(pool_size=(2, 2), strides=2)
self.flat = Flatten()
self.dense1 = Dense(100, activation='tanh')
self.dense2 = Dense(10, activation='softmax')
def call(self, inputs):
h1 = self.L1_conv(inputs)
h1_pool = self.pool(h1)
h2 = self.L2_conv(h1_pool)
h2_pool = self.pool(h2)
flat_h = self.flat(h2_pool)
dense1 = self.dense1(flat_h)
logits = self.dense2(dense1)
return logits
#------------------------------
if __name__ == '__main__':
#os.environ["CUDA_VISIBLE_DEVICES"] = "-1" #GPU内存不足(降低batch_size),改用CPU运算
(x_train, y_train), (x_test, y_test) = mnist_dataset()
model = Convolution_NN()
optimizer = optimizers.Adam()
model.compile(optimizer=optimizer,
loss='categorical_crossentropy',
metrics=['accuracy'])
#设置训练超参数
training_epochs = 20 #训练轮数
batch_size = 50 #单次训练的样本数(批次的大小) Mini-Batch优化
#训练模型
train_history = model.fit(x_train, y_train,
validation_split=0.2,
epochs=training_epochs,
batch_size=batch_size,
verbose=2)
一句话总结:类型一,我自己优化参数,在GPU上跑,显示:OOM when allocating tensor with shape[48000,28,28,10] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:Conv2D],即内存不足。 类型二,无脑调用API优化参数,很流畅的在GPU上跑。 为什么呢?很迷惑!讲道理Keras优化参数的方法应该和我一致呀,只是它的封装好了,为什么它的可以在GPU上跑,我的就显示内存不足呢?