问题遇到的现象和发生背景
我在tensorflow官网看到的一个代码,原版照抄下来之后,训练正确率只有0.1677
用代码块功能插入代码,请勿粘贴截图。 不用代码块回答率下降 50%
import keras
import tensorflow as tf
import numpy as np
from keras.layers import TextVectorization
import string
import re
batch_size = 32
raw_train_ds = tf.keras.preprocessing.text_dataset_from_directory(
"aclImdb/train",
batch_size=batch_size,
validation_split=0.2,
subset="training",
seed=1337,
)
raw_val_ds = tf.keras.preprocessing.text_dataset_from_directory(
"aclImdb/train",
batch_size=batch_size,
validation_split=0.2,
subset="validation",
seed=1337,
)
raw_test_ds = tf.keras.preprocessing.text_dataset_from_directory(
"aclImdb/test", batch_size=batch_size
)
print(f"Number of batches in raw_train_ds: {raw_train_ds.cardinality()}")
print(f"Number of batches in raw_val_ds: {raw_val_ds.cardinality()}")
print(f"Number of batches in raw_test_ds: {raw_test_ds.cardinality()}")
def custom_standardization(input_data):
lowercase = tf.strings.lower(input_data)
stripped_html = tf.strings.regex_replace(lowercase,'<bar />',' ')
'''
string.punctuation = r"""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
regex_replace的第二个位置可以填入字符串或标量字符串‘tensor’,要使用的正则表达式
'''
return tf.strings.regex_replace(stripped_html,f'[{re.escape(string.punctuation)}]','')
max_features = 20000
embedding_dim = 128
sequence_length = 500
vectorize_layer = TextVectorization(
standardize=custom_standardization,
max_tokens=max_features,
output_mode='int',
output_sequence_length=sequence_length,
)
text_ds = raw_train_ds.map(lambda x, y:x)
vectorize_layer.adapt(text_ds)
def vectorize_text(text,label):
text = tf.expand_dims(text,-1)
return vectorize_layer(text),label
train_ds = raw_train_ds.map(vectorize_text)
val_ds = raw_val_ds.map(vectorize_text)
test_ds = raw_test_ds.map(vectorize_text)
#Do async prefetching / buffering of the data for best performance on GPU
train_ds = train_ds.cache().prefetch(buffer_size = 10)
val_ds = val_ds.cache().prefetch(buffer_size = 10)
test_ds = test_ds.cache().prefetch(buffer_size = 10)
"""
build a model
"""
from keras import layers
inputs = keras.Input(shape=(None,),dtype='int64')
x = layers.Embedding(max_features,embedding_dim)(inputs)
x = layers.Dropout(0.5)(x)
x = layers.Conv1D(128,7,padding='valid',activation='relu',strides=3)(x)
x = layers.Conv1D(128,7,padding='valid',activation='relu',strides=3)(x)
x = layers.GlobalMaxPool1D()(x)
x = layers.Dense(128,activation='relu')(x)
x = layers.Dropout(0.5)(x)
predictions = layers.Dense(1,activation='sigmoid',name='predictions')(x)
model = keras.Model(inputs,predictions)
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
epochs = 3
model.fit(train_ds,validation_data=val_ds,epochs=epochs)
运行结果及详细报错内容
1875/1875 - 30s 13ms/step - loss: -287010357248.0000 - accuracy: 0.1665 - val_loss: -1532206383104.0000 - val_accuracy: 0.1677
Epoch 2/3
1875/1875 - 14s 8ms/step - loss: -10998242082816.0000 - accuracy: 0.1664 - val_loss: -27677966729216.0000 - val_accuracy: 0.1677
Epoch 3/3
1875/1875 - 14s 7ms/step - loss: -73722828423168.0000 - accuracy: 0.1664 - val_loss: -134023263289344.0000 - val_accuracy: 0.1677
我的解答思路和尝试过的方法,不写自己思路的,回答率下降 60%
原版照抄官网,之前一段时间我也遇到过这样的问题,当时的正确率也是这个数字。附一下源代码链接:[https://keras.io/examples/nlp/text_classification_from_scratch/](Text classification from scratch)