def create_model(num_filters, kernel_size, vocab_size, embedding_dim, maxlen):
model = keras.Sequential()
model.add(keras.layers.Embedding(vocab_size, embedding_dim, input_length=maxlen))
model.add(keras.layers.Conv1D(num_filters, kernel_size, activation='relu'))
model.add(keras.layers.GlobalMaxPooling1D())
model.add(keras.layers.Dense(10, activation='relu'))
model.add(keras.layers.Dense (1,activation='softmax'))
model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
return model
param_grid = {"num_filters":[32, 64, 128],
"kernel_size":[3, 5, 7],
"embedding_dim":[50],
"maxlen":[500]}
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import GridSearchCV
# 超参数
epochs = 20
embedding_dim = 50
maxlen = 500
output_file = 'data/output.txt'
# 参数选择
for source, frame in df.groupby('source'):
print('Running grid search for data set :', source)
sentences = df['sentence'].values
y = df['label'].values
sentences_train, sentences_test, y_train, y_test = train_test_split(
sentences, y, test_size=0.25, random_state=1000)
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(sentences_train)
X_train = tokenizer.texts_to_sequences(sentences_train)
X_test = tokenizer.texts_to_sequences(sentences_test)
vocab_size = len(tokenizer.word_index) + 1
X_train = pad_sequences(X_train, padding='post', maxlen=maxlen)
X_test = pad_sequences(X_test, padding='post', maxlen=maxlen)
# 参数空间
param_grid = {"num_filters" : [32, 64, 128],
"kernel_size" : [3, 5, 7],
"vocab_size" : [vocab_size],
"embedding_dim" : [embedding_dim],
"maxlen" : [maxlen]}
model = KerasClassifier(model=create_model,num_filters= 32 , kernel_size= 5 ,vocab_size=vocab_size, embedding_dim=embedding_dim, maxlen=maxlen,
epochs=epochs, batch_size=64,
verbose=1)
grid = GridSearchCV(estimator=model, param_grid=param_grid,
cv=3, verbose=1, scoring='accuracy',error_score='raise')
print("y_train shape:", y_train.shape)
print("X_train shape:", X_train.shape)
grid_result = grid.fit(X_train, y_train)
# 测试结果
test_accuracy = grid.score(X_test, y_test)
with open(output_file, 'a') as f:
s = ('Running {} data set\nBest Accuracy : '
'{:.4f}\n{}\nTest Accuracy : {:.4f}\n\n')
output_string = s.format(
source,
grid_result.best_score_,
grid_result.best_params_,
test_accuracy)
print(output_string)
f.write(output_string)
这是我的代码片段,明明定义了输出层,且我的y_train的形状也是对的,为何还是出错?
raise AttributeError(
AttributeError: Sequential model 'sequential_7' has no defined outputs yet.. Did you mean: 'output'?