版本:python3.7 numpy 1.18.2
利用word2vec进行词向量训练时候遇到的问题
load_data方法是可以运行的
控制台打印出 训练中 然后就报错了
下面是错误信息:
Traceback (most recent call last):
File "F:/Word2Vec-sentiment-master/Word2Vec-sentiment-master/wvtrain.py", line 65, in
Train(y_test, window=3).train()
File "F:/Word2Vec-sentiment-master/Word2Vec-sentiment-master/wvtrain.py", line 41, in train
sentence, size=self.size, window=self.window, workers=2, sg=0
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python37-32\lib\site-packages\gensim\models\word2vec.py", line 783, in init
fast_version=FAST_VERSION)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python37-32\lib\site-packages\gensim\models\base_any2vec.py", line 759, in init
self.build_vocab(sentences=sentences, corpus_file=corpus_file, trim_rule=trim_rule)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python37-32\lib\site-packages\gensim\models\base_any2vec.py", line 936, in build_vocab
sentences=sentences, corpus_file=corpus_file, progress_per=progress_per, trim_rule=trim_rule)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python37-32\lib\site-packages\gensim\models\word2vec.py", line 1592, in scan_vocab
total_words, corpus_count = self._scan_vocab(sentences, progress_per, trim_rule)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python37-32\lib\site-packages\gensim\models\word2vec.py", line 1575, in _scan_vocab
for word in sentence:
TypeError: 'numpy.float64' object is not iterable
class Train(object):
def __init__(self, data_path, size=50, window=5):
self.size = size
self.window = window
self.data_path = data_path
#self.min_count=min_count
def load_data(self):
print('loading data...')
sentence = np.load(self.data_path)
print('loading data done')
return sentence
def train(self):
# Load file
sentence = self.load_data()
print("训练中...")
model = word2vec.Word2Vec(
sentence, size=self.size, window=self.window, workers=2, sg=0
)
# model = word2vec.Word2Vec(sentence, sg=0, hs=1, min_count=1, window=5, size=300)
# Save model
train_model=model(x_train,'train.pkl')
test_model=model(x_test,'test.pkl')
#model_name = 'model_gensim.bin'
model.wv.save_word2vec_format(train_model)
model.wv.save_word2vec_format(test_model)
print("训练完成,模型已存储到{}".format(train_model))
print("训练完成,模型已存储到{}".format(test_model))
if name == "__main__":
# file_path = 'Hupu.txt'
# Train(file_path, window=3).train()
x_train = 'x_train_data.npy'
x_test = 'x_test_data.npy'
y_train = 'y_train_data.npy'
y_test = 'y_test_data.npy'
Train(y_test, window=3).train()
Train(x_train, window=3).train()
Train(y_train, window=3).train()
Train(x_test,window=3).train()