执行模型预测时出现： embedding_1_input to have shape (100,)

在执行模型预测时发生代码错误：ValueError: Error when checking input: expected embedding_1_input to have shape (100,) but got array with shape (3,)

测试代码：

from keras.models import load_model
import pandas as pd
import numpy as np
import jieba
import jieba.posseg as pseg
import re
#import csv
import string
#from keras import models
from keras import layers
from keras.utils.np_utils import to_categorical
from keras.preprocessing.text import Tokenizer
from sklearn.preprocessing import LabelEncoder
#from sklearn.feature_extraction.text import CountVectorizer
from gensim.models import word2vec
from keras.preprocessing.sequence import pad_sequences
from keras.layers import *
#from keras.models import Model
#from sklearn import metrics
from keras.models import load_model
from keras.models import Sequential
import matplotlib.pyplot as plt
# 模型的保存


# 模型的加载
model = load_model('TextCNN')
train_data = pd.read_csv('酒店分类.csv', encoding='GB18030',lineterminator='\n')
test_data=pd.read_csv('酒店分类.csv',encoding='GB18030', lineterminator='\n')
predictions = model.predict(test_data)
print(predictions)

模型代码：

import pandas as pd
import numpy as np
import jieba
import jieba.posseg as pseg
import re
#import csv
import string
#from keras import models
from keras import layers
from keras.utils.np_utils import to_categorical
from keras.preprocessing.text import Tokenizer
from sklearn.preprocessing import LabelEncoder
#from sklearn.feature_extraction.text import CountVectorizer
from gensim.models import word2vec
from keras.preprocessing.sequence import pad_sequences
from keras.layers import *
#from keras.models import Model
#from sklearn import metrics
from keras.models import load_model
from keras.models import Sequential
import matplotlib.pyplot as plt
#读入数据集，lineterminator是行分隔符,默认notebook文件保存在c盘用户下面
#newTrain.csv和newTest.csv是和这个文件Untitled5.ipynb在同一个目录下面的
train_data = pd.read_csv('酒店分类.csv', encoding='GB18030',lineterminator='\n')
test_data=pd.read_csv('酒店分类.csv',encoding='GB18030', lineterminator='\n')

#数据的预处理：
#利用LabelEncoder对数据标签进行规格化处理
def encodeLabel(data):
    listLable=[]
    #这里我的标签的名字全都打成了lable，我知道标签的英文是label，如果大家实在看不惯想改过来的话记得前面加载的csv文件的开头的lable也改成label
    for lable in data['分类']:
        listLable.append(lable)
    #到这里都是把lable整合到一起，下面是规格化处理
    le = LabelEncoder()
    resultLable=le.fit_transform(listLable)
    return resultLable

trainLable=encodeLabel(train_data)
testLable=encodeLabel(test_data)
#这里输出testLable给大家看看
print(testLable)
#这里出来是所有review的集合：
def getReview(data):
    listReview=[]
    for review in data['评论内容']:
        listReview.append(review)
    return listReview

trainReview=getReview(train_data)
testReview=getReview(test_data)
#这里输出testReview给大家看看
print(testReview)
#分词：
def stopwordslist():#加载停用词表,这个中文停用词表.txt也是要和文件放在同一目录下的喔，因为我这里用的都是相对路径
    stopwords = [line.strip() for line in open('中文停用词表.txt',encoding='UTF-8').readlines()]
    return stopwords

def deleteStop(sentence):     #去停用词
    stopwords=stopwordslist()
    outstr=""
    for i in sentence:
        if i not in stopwords and i!="\n":
            outstr+=i
    return outstr
def wordCut(Review):
    Mat=[]
    for rec in Review:
        seten=[]
        rec = re.sub('[%s]' % re.escape(string.punctuation), '',rec)
        fenci=jieba.lcut(rec)    #精准模式分词
        stc=deleteStop(fenci)     #去停用词
#         sentence = list(map(lambda x: x.strip().lower() if len(x.strip().lower()) > 0 else None, jieba.cut(stc)))  # 每句话里的单词拿出来
        seg_list=pseg.cut(stc)    #标注词性
        for word,flag in seg_list:
            if flag not in ["nr","ns","nt","nz","m","f","ul","l","r","t"]:  #去掉这些词性（人名、地名等）的词语
                seten.append(word)
        Mat.append(seten)
    return Mat
trainCut=wordCut(trainReview)
testCut=wordCut(testReview)
#看看testCut长什么样子吧,想看的自己去掉注释哈
print(testCut)
wordCut=trainCut+testCut
#下面这几行代码是为了flask部署模型的时候对拿进来的数据进行同样预处理，所以把这些所有的词又存在了wordCut.txt里面（我视频里面也讲清楚了）
fileDic=open('wordCut.txt','w',encoding='UTF-8')
for i in wordCut:
    fileDic.write(" ".join(i))
    fileDic.write('\n')
fileDic.close()
#我们可以看一看读出来是些啥，但是实在是太多了，所以出不来
words = [line.strip().split(" ") for line in open('wordCut.txt',encoding='UTF-8').readlines()]
print(words)

maxLen=100
#word2vec的训练:
# 设置词语向量维度
num_featrues = 100
# 保证被考虑词语的最低频度
min_word_count = 3
# 设置并行化训练使用CPU计算核心数量
num_workers =4
# 设置词语上下文窗口大小
context = 4
model = word2vec.Word2Vec(wordCut, workers=num_workers, size=num_featrues, min_count=min_word_count,window=context)
# 强制单位归一化
model.init_sims(replace=True)
# 输入一个路径，保存训练好的模型，其中./data/model目录事先要存在

model = word2vec.Word2Vec(wordCut, workers=num_workers,size=num_featrues, min_count=min_word_count,window=context)
print(model)
#加载模型，如果之前word2vec已经训练好了直接用这句就好了：

#fit_on_texts函数可以将输入的文本中的每个词编号，编号是根据词频的，词频越大，编号越小
tokenizer=Tokenizer()
tokenizer.fit_on_texts(words)
vocab = tokenizer.word_index  # 得到每个词的编号，这里的vocab已经剔除掉stoplist了
#print(vocab)
#特征数字编号，不足的话会在前面补充0
trainID = tokenizer.texts_to_sequences(trainCut)
# print(trainID)
testID = tokenizer.texts_to_sequences(testCut)
trainSeq=pad_sequences(trainID,maxlen=maxLen)
#print(trainSeq)
testSeq=pad_sequences(testID,maxlen=maxLen)

#标签的独热编码
trainCate = to_categorical(trainLable, num_classes=5)  # 将标签转换为one-hot编码
#print(trainCate)
testCate= to_categorical(testLable, num_classes=5)  # 将标签转换为one-hot编码
#利用训练后的word2vec自定义Embedding的训练矩阵，每行代表一个词（结合独热码和矩阵乘法理解）
embedding_matrix = np.zeros((len(vocab) + 1, 100))
for word, i in vocab.items():
    try:
        embedding_vector = model[str(word)]
        embedding_matrix[i] = embedding_vector
    except KeyError:
        continue

#训练模型
main_input = Input(shape=(maxLen,), dtype='float64')
# 词嵌入（使用预训练word2vec的词向量，自定义权重矩阵，100是输出的词向量维度）
embedder = Embedding(len(vocab) + 1, 100, input_length=maxLen, weights=[embedding_matrix], trainable=False)
model=Sequential()
model.add(embedder)
model.add(Conv1D(256,3,padding='same',activation='relu'))
model.add(MaxPool1D(maxLen-5,3,padding='same'))
model.add(Conv1D(32,3,padding='same',activation='relu'))
model.add(Flatten())
model.add(Dropout(0.3))
model.add(Dense(256,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(units=5,activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
history=model.fit(trainSeq, trainCate, batch_size=256, epochs=66,validation_split=0.2)
model.save("TextCNN2")
#预测与评估
mainModel = load_model('TextCNN2')
result = mainModel.predict(testSeq)  # 预测样本属于每个类别的概率
print(result)
print(np.argmax(result,axis=1))
score = mainModel.evaluate(testSeq,
                           testCate,
                           batch_size=64)
print(score)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train','Valid'],loc='upper left')
plt.show()

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train','Valid'],loc='upper left')
plt.show()
from tensorflow.keras.utils import plot_model
import pydotplus
#参数 ：模型名称，结构图保存位置，是否展示shape
plot_model(model,to_file='textCNN_model1.png',show_shapes=True)

写回答
好问题 0 提建议
追加酬金
关注问题
分享
邀请回答
编辑收藏删除结题
收藏举报

1条回答默认最新

关注

码龄粉丝数原力等级 --

被采纳

被点赞

采纳率
Lotay_天天嵌入式领域优质创作者 2023-05-04 15:17
关注
根据你说的bug原因是模型的输入形状不正确。预期的输入形状应该是 (100,)，但实际得到的输入形状是 (3,)。

这个问题可能是由于在执行 model.predict() 方法时传入了不正确的输入数据。确保你已经正确地对测试数据进行了预处理并将其转化为模型所需的形状。你可以使用 pad_sequences() 方法来将输入序列填充或截断至指定长度。

如果你还是不能解决问题，可以检查下模型中的输入层是否正确定义了输入形状。

解决无用
评论打赏
分享
举报

评论

按下Enter换行，Ctrl+Enter发表内容

报告相同问题？

关注问题

paddle 卷积神经网络训练时报错InvalidArgumentError: The input tensor X of SumOp must have same shape.. paddle 深度学习神经网络
2022-03-24 03:18

回答 1 已采纳你传入的张量是252，2但需要的是21，2考虑使用reshape把维度变化一下飞桨的框架我没用过，pytorch是这么操作的，你搜一搜类似的改变张量shape的函数
Pytorch调用bertEncoderbaTypeError: forward() missing 1 required positional argument: 'attention_mask' bert pytorch 深度学习
2022-07-07 15:35

回答 2 已采纳已解决，根本原因是数据格式的问题，在使用bert_encoder之前，需要将数据格式转换为BertData()格式
Tensorflow 2.0 : When using data tensors as input to a model, you should specify the `steps_per_epoch` argument. tensorflow 人工智能机器学习深度学习神经网络
2019-11-24 14:39

回答 2 已采纳 There is not only steps_per_epoch but also validation_steps parameter, which you also have to spec
Error when checking : expected main_input to have shape (5,) but got array with shape (1,)
2021-12-03 10:25

kunkun_1230的博客我们在python中训练模型时，通常的数据格式为dataframe。在进行模型训练的时候，由于数据量较大所以进行拟合没有问题 model.fit(X_train,y_train) 但是对于训练好的模型，我们...expected main_input to have shape
pytorch加载训练好的模型进行预测时，为什么又开始训练了 python 自然语言处理
2021-06-01 17:16

回答 2 已采纳因为你在run里的训练代码没有封装城函数，直接全局执行，import run.py 就开始xun lian le
embedding的原理 matlab python 深度学习
2022-08-14 21:34

回答 2 已采纳 embedding的原理你可以去搜cbow和skip-gram，说白了就是用两边的字或词预测中间的，或者用中间的字或词预测两边的。第一，每个文本，我们可以取字也可以取词，以字为例，先生成字典{"以":
Flask接口处理post请求时出现问题 flask python 有问必答
2022-12-29 11:35

回答 3 已采纳 send_post方法没有return ，导致打印的结果为None
ValueError: Error when checking input: expected input_image_meta to have shape (None, 14) but got ar
2018-11-08 14:02

LoveWeeknd的博客在跑maskrcnn的时候出现以下错误： Traceback (most recent call last): File "/home/ubuntu/DNN_Projection/Mask_RCNN--Training/samples/nucleus/nucleus.py", line 483, in <module> ...
Flask接口处理post时出现问题 flask python
2022-12-29 11:32

回答 6 已采纳你的函数没有返回值，没有return ,res 就是None
使用DWY100k数据集对UEA进行测试，出现报错：IndexError: index 125000 is out of bounds for axis 0 with size 95500 python tensorflow 深度学习
2023-01-31 00:52

回答 4 已采纳这个错误是由于使用的数据集大小不匹配导致的。报错提示的 index 12500 超出了索引范围，数组大小为 95500。解决方案是检查数据集的大小是否与代码预期的大小匹配，并修正代码中的数据集引用，
facenet中的train_tripletloss.py报错 python 人工智能
2022-06-07 10:28

回答 1 已采纳 embeddings为4096，后面的为3Xargs.embedding_size=384？4096无法整除以384，所以是无法reshape的，所以你应该是改了什么参数了，改回去看看
tensorflow2.0 Error when checking input: expected encode_input.......
2021-06-05 23:55

curiousChen的博客 ValueError: Error when checking input: expected encode_input to have shape (20,) but got array with shape (1,) tf2.0与tf2.0以上不同，直接传入python原数据格式,eg:list会出错，改成np.array(data) _, ...
gan网络/python/深度学习/有偿 bert python 生成对抗网络
2023-01-01 19:31

回答 5 已采纳 TypeError: init() got an unexpected keyword argument 'nhidden'TypeError:init（）获得了意外的关键字参数“nhidden” 根
已解决: ValueError: Error when checking input: expected input_1 to have 4 dimensions, but got array wit
2023-11-10 23:20

猫头虎-人工智能的博客喵，AI研发的小伙伴们，猫头虎博主今天要和大家探讨一个深度学习模型中常见的维度错误。我们会钻研这个错误背后的原因，并展示一些炫酷的代码，以确保你的神经网络不会在这种小错误上摔跤。准备好你的笔记本，让...
tensorflow2.0学习笔记：embedding_padding_pooling
2020-05-19 07:49

凿井而饮的博客对于序列数据的处理，首先要进行编码(Embedding),然后还需处理其不等长的问题(变长输入问题)。 1.Embedding a. One-hot编码：word -> index \[0,0,...,1,0,...](稀疏) b. (Dense) Embedding：Word -> \[1.2,...
没有解决我的问题, 去提问

悬赏问题

¥15 用matlab 设计一个不动点迭代法求解非线性方程组的代码
¥15 牛顿斯科特系数表表示
¥15 arduino 步进电机
¥20 程序进入HardFault_Handler
¥15 oracle集群安装出bug
¥15 关于#python#的问题：自动化测试
¥20 问题请教！vue项目关于Nginx配置nonce安全策略的问题
¥15 教务系统账号被盗号如何追溯设备
¥20 delta降尺度方法，未来数据怎么降尺度
¥15 c# 使用NPOI快速将datatable数据导入excel中指定sheet，要求快速高效

执行模型预测时出现： embedding_1_input to have shape (100,)

1条回答 默认 最新

悬赏问题

1条回答默认最新