将东北大学数据集写入hdf5文件格式过程中出现bug
from config import gray_config as config
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from gaoimage.io import HDF5DatasetWriter
from imutils import paths
import numpy as np
import progressbar
# import json
import cv2
import os
imagePaths = list(paths.list_images(config.IMAGE_PATH))
imageLabels = [p.split(os.path.sep)[-2] for p in imagePaths]
le = LabelEncoder()
imageLabels = le.fit_transform(imageLabels)
# split the original paths to res and test, 240(1440) for res, 60(360) for test
(resPaths, testPaths, resLabels, testLabels) = train_test_split(
imagePaths, imageLabels, test_size=0.2, random_state=42)
# split the res paths to train and validation, 180(1080) for train, 60(360) for validation
(trainPaths, valPaths, trainLabels, valLabels) = train_test_split(
resPaths, resLabels, test_size=0.25, random_state=42)
# construct a list pairing the training, validation, and testing
# image paths along with their corresponding labels and output HDF5 files
datasets = [
("train", trainPaths, trainLabels, config.TRAIN_HDF5),
("val", valPaths, valLabels, config.VAL_HDF5),
("test", testPaths, testLabels, config.TEST_HDF5)
]
# initialize the image preprocessor and the list of RGB channel averages
# (R, G, B) = ([], [], [])
# loop over the dataset tuples
for (dType, paths, labels, outputPath) in datasets:
# create HDF5 writer
print("[INFO] building {}...".format(outputPath))
writer = HDF5DatasetWriter((len(paths), 200, 200, 1), outputPath)
# initialize the progress bar
widgets = ["Building Dataset: ", progressbar.Percentage(), " ",
progressbar.Bar(), " ", progressbar.ETA()]
pbar = progressbar.ProgressBar(maxval=len(paths),
widgets=widgets).start()
# loop over the image paths
for (i, (path, label)) in enumerate(zip(paths, labels)):
# load the image and process it
image = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
image = np.expand_dims(image, axis=2)
# if we are building the training dataset, then compute the
# mean of each channel in the image, then update the respective lists
# if dType == "train":
# (b, g, r) = cv2.mean(image)[:3]
# R.append(r)
# G.append(g)
# B.append(b)
# add the image and label to the HDF5 dataset
writer.add([image], [label])
pbar.update(i)
# close the HDF5 writer
pbar.finish()
writer.close()
from os import path
IMAGE_PATH = "../zhai/dataset1/NEU-CLS/images"
TRAIN_HDF5 = "../zhai/dataset1/NEU-CLS/hdf5/train.hdf5"
VAL_HDF5 = "../zhai/dataset1/NEU-CLS/hdf5/val.hdf5"
TEST_HDF5 = "../zhai/dataset1/NEU-CLS/hdf5/test.hdf5"
OUTPUT_PATH = "gray_output"
figPath = path.sep.join([OUTPUT_PATH, "ms_test1.png"])
jsonPath = path.sep.join([OUTPUT_PATH, "ms_test1.json"])
DATASET_MEAN = "gray_output/NEU_DET_1_mean.json"
报错内容:With n_samples=0, test_size=0.2 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.