在做字符识别的神经网络,数据集是用序号标好名称的图片,标签取图片的文件名。想用Imagedatagenrator
函数和flow函数,增加样本的泛化性,然后生成数据传入网络,可是这样acc=1/类别数,基本为零。请问哪里出了问题
datagen = ImageDataGenerator(
width_shift_range=0.1,
height_shift_range=0.1
)
def read_train_image(self, name):
myimg = Image.open(name).convert('RGB')
return np.array(myimg)
def train(self):
#训练集
train_img_list = []
train_label_list = []
#测试集
test_img_list = []
test_label_list = []
for file in os.listdir('train'):
files_img_in_array = self.read_train_image(name='train/' + file)
train_img_list.append(files_img_in_array) # Image list add up
train_label_list.append(int(file.split('_')[0])) # lable list addup
for file in os.listdir('test'):
files_img_in_array = self.read_train_image(name='test/' + file)
test_img_list.append(files_img_in_array) # Image list add up
test_label_list.append(int(file.split('_')[0])) # lable list addup
train_img_list = np.array(train_img_list)
train_label_list = np.array(train_label_list)
test_img_list = np.array(train_img_list)
test_label_list = np.array(train_label_list)
train_label_list = np_utils.to_categorical(train_label_list, 5788)
test_label_list = np_utils.to_categorical(test_label_list, 5788)
train_img_list = train_img_list.astype('float32')
test_img_list = test_img_list.astype('float32')
test_img_list /= 255.0
train_img_list /= 255.0
这是图片数据的处理,图片和标签都存到list里。下面是用fit_genrator训练
model.fit_generator(
self.datagen.flow(x=train_img_list, y=train_label_list, batch_size=2),
samples_per_epoch=len(train_img_list),
epochs=10,
validation_data=(test_img_list,test_label_list),
)