Pytorch Dataloader

這個問題 RuntimeError: stack expects each tensor to be equal size, but got [10, 5] at entry 0 and [23, 5] at entry 1



```python
# Preprocess
import os
import cv2
import xml.etree.ElementTree as ET
import csv
from pathlib import Path
import glob
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from keras.preprocessing.image import img_to_array, array_to_img

# Machine Learning
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms


def convert_label(lab):
    result = []
    for str in lab:
        if str == 'leaf':
            result.append(0)
        elif str == 'stem':
            result.append(1)
        elif str == 'soil':
            result.append(2)
    return result


def normalize(img_numpy_array):  # gray scale
    return img_numpy_array / 255.0


def parse_xml(xml_path):
    tree = ET.parse(xml_path)
    root = tree.getroot()

    bounding_boxes = []
    labels = []
    for obj in root.findall('object'):
        name = obj.find('name').text
        bndbox = obj.find('bndbox')
        xmin = int(bndbox.find('xmin').text)
        ymin = int(bndbox.find('ymin').text)
        xmax = int(bndbox.find('xmax').text)
        ymax = int(bndbox.find('ymax').text)
        bounding_boxes.append([xmin, ymin, xmax, ymax])
        labels.append(name)
    size = root.find('size')
    width = int(size.find('width').text)
    height = int(size.find('height').text)

    return bounding_boxes, labels, width, height


""" def write_folder_names_to_text(folder_path, output_file):   #list all name of jpg into txt 
    # Get the list of file names in the folder
    file_names = os.listdir(folder_path)

    # Create or overwrite the output file
    with open(output_file, 'w') as f:
        # Write each file name to a new line in the text file
        for file_name in file_names:
            if file_name.endswith('.jpg'):
                f.write(file_name + '\n')

    print(f"File names written to {output_file} successfully.")


folder_path = "C:/Users/willi/OneDrive/桌面/Dataset/test"
output_file = "C:/Users/willi/OneDrive/桌面/Dataset/test/file_names.txt"
write_folder_names_to_text(folder_path, output_file) """

""" def xml_to_csv(root_dir):                      #XML to CSV format 
    bbox = []
    labels = []
    for xml_file in glob.glob(root_dir + '/*.xml'):
        if os.path.exists(xml_file):
            bboxes, lbls, width, height = parse_xml(xml_file)
            bbox.append(bboxes)
            labels.append(lbls)
            label_in_num = convert_label(lbls)

    print(len(bbox))
    print(len(labels))
    csv_file = os.path.join(root_dir, 'annotations.csv')
    with open(csv_file, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(['filename', 'width', 'height', 'label', 'xmin', 'ymin', 'xmax', 'ymax'])

        for i in range(len(bbox)):
            for j in range(len(bbox[i])):
                writer.writerow([os.path.basename(xml_file), width, height, labels[i][j], bbox[i][j][0], bbox[i][j][1],
                                 bbox[i][j][2], bbox[i][j][3]])

    print(f"CSV file saved at: {csv_file}")  """


def display_image_with_boxes(image, boxes):
    img_with_boxes = image.clone().permute(1, 2, 0).numpy()
    plt.imshow(img_with_boxes)
    ax = plt.gca()
    for bbox in boxes:
        xmin, ymin, xmax, ymax, lbls = bbox
        color = 'blue' if lbls == 0 else 'red'  # blus as leaf , red as stem
        rect = plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, edgecolor=color, linewidth=2)
        ax.add_patch(rect)
    plt.show()


def pad_boxes(boxes, max_num_boxes):
    padded_boxes = np.zeros((max_num_boxes, 4), dtype=np.float32)
    for i, box in enumerate(boxes):
        if i >= max_num_boxes:
            break
            padded_boxes[i] = box
    return padded_boxes


voc_label = {'leaf,stem,soil'}
dict_labels = dict(zip(voc_label, range(len(voc_label))))


class Read_voc(Dataset):
    def __init__(self, root_path):
        super(Read_voc, self).__init__()
        self.root_path = root_path
        self.img_idx = []
        self.anno_idx = []
        self.bbox = []
        self.obj_name = []
        train_txt_path = self.root_path + "/file_names.txt"
        self.img_path = self.root_path
        self.anno_path = self.root_path

        train_txt = open(train_txt_path)
        lines = train_txt.readlines()
        for line in lines:
            name = line.strip().split()[0]
            name = name.rstrip('.jpg')
            self.img_idx.append(self.img_path + name + '.jpg')
            self.anno_idx.append(self.anno_path + name + '.xml')

    def __getitem__(self, item):
        img = Image.open(self.img_idx[item])
        img = transforms.ToTensor()(img)
        normalize(img)
        targets = ET.parse(self.anno_idx[item])
        res = []  # Store annotation information, i.e., coordinates of the bounding box's top left and bottom right
        # points and the target's class label
        result = []
        if os.path.exists(self.root_path):
            bboxes, labels, width, height = parse_xml(self.anno_idx[item])
            lbls = convert_label(labels)  # Convert label to number using convert_label() function
            res.append(bboxes)
            res.append(lbls)
            max_num_boxes = 100  # Determine the maximum number of boxes

            for bbox, label in zip(*res):
                num_boxes = len(bbox)
                bbox_padded = pad_boxes(bbox, max_num_boxes)  # Pad the bounding boxes with zeros
                result.append(bbox_padded + [label])

        else:
            raise Exception('Path does not Exist!')

        # Separate coordinates and labels
        boxes = [box[:-1] for box in result]  # Exclude the label from each bounding box
        labels = [box[-1] for box in result]

        # Convert coordinates and labels to NumPy arrays, then to PyTorch tensors
        boxes_array = np.array(boxes, dtype=np.float32)
        labels_array = np.array(labels, dtype=np.float32)
        boxes_tensor = torch.from_numpy(boxes_array)
        labels_tensor = torch.from_numpy(labels_array)

        return img, boxes_tensor

    def __len__(self):
        return len(self.img_idx)


def main():
    root_dir = "C:/Users/willi/OneDrive/桌面/Dataset/test/"
    image_size = (256, 256)
    train_data = Read_voc(root_path=root_dir)  # DataSet Preprocessing
    img, res = train_data[1]
    print(img.size())
    print(len(res))  # row
    print(len(res[0]))  # cols
    print(train_data.__len__())

    # display_image_with_boxes(img, res)
    # Display image and label.
    train_dataloader = DataLoader(train_data, batch_size=64, shuffle=True, num_workers=0, drop_last=False)
    train_features, train_labels = train_dataloader
    print(train_features.size())


main()

```

写回答
好问题 0 提建议
关注问题
分享
邀请回答
编辑收藏删除结题
收藏举报

1条回答默认最新

CSDN-Ada助手 CSDN-AI 官方账号 2023-08-31 14:33

关注

【以下回答由 GPT 生成】

问题分析：该错误是由于使用Dataloader加载数据时，输入的样本尺寸大小不一致导致的。具体来说，在这个例子中，有两个输入的tensor尺寸不匹配，一个尺寸为[10, 5]，另一个尺寸为[23, 5]。为了解决这个问题，我们需要对数据进行预处理，确保所有的输入tensor具有相同的尺寸。

解决方案：以下是解决该问题的步骤： 1. 首先，我们需要确定所有输入tensor的目标尺寸。在这个例子中，所有输入tensor的目标尺寸为[10, 5]。 2. 在parse_xml函数中，获取bounding box后，我们需要根据目标尺寸将其调整为相同的尺寸。可以使用cv2.resize函数将bounding box调整为目标尺寸。 3. 在parse_xml函数中，获取bounding box后，我们需要在labels列表中添加标签。同时，我们需要将标签字符串转换为相应的数字。可以使用convert_label函数将标签字符串转换为数字。 4. 修改Dataset类的实现，确保样本的尺寸都与目标尺寸一致。具体来说，修改__getitem__方法中的代码，将加载的图像和labels调整为目标尺寸。可以使用cv2.resize函数将图像调整为目标尺寸。同时，可以使用torch.Tensor将labels转换为tensor格式。 5. 创建并使用修改后的Dataset类来创建DataLoader实例。 6. 运行修改后的代码，确保加载器能够正常工作。

下面是具体的代码优化：

# Preprocess
import os
import cv2
import xml.etree.ElementTree as ET
import csv
from pathlib import Path
import glob
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from keras.preprocessing.image import img_to_array, array_to_img

# Machine Learning
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms


def convert_label(lab):
    result = []
    for str in lab:
        if str == 'leaf':
            result.append(0)
        elif str == 'stem':
            result.append(1)
        elif str == 'soil':
            result.append(2)
    return result


def normalize(img_numpy_array):  # gray scale
    return img_numpy_array / 255.0


def parse_xml(xml_path, target_size):
    tree = ET.parse(xml_path)
    root = tree.getroot()

    bounding_boxes = []
    labels = []
    for obj in root.findall('object'):
        name = obj.find('name').text
        bndbox = obj.find('bndbox')
        xmin = int(bndbox.find('xmin').text)
        ymin = int(bndbox.find('ymin').text)
        xmax = int(bndbox.find('xmax').text)
        ymax = int(bndbox.find('ymax').text)

        # Resize bounding box to target size
        xmin = int(xmin * target_size[1] / orig_img_size[1])
        ymin = int(ymin * target_size[0] / orig_img_size[0])
        xmax = int(xmax * target_size[1] / orig_img_size[1])
        ymax = int(ymax * target_size[0] / orig_img_size[0])
        bounding_boxes.append([xmin, ymin, xmax, ymax])

        labels.append(name)

    labels = convert_label(labels)

    return bounding_boxes, labels


class CustomDataset(Dataset):
    def __init__(self, data_folder, target_size):
        self.images = glob.glob(os.path.join(data_folder, '*.jpg'))
        self.annotations = glob.glob(os.path.join(data_folder, '*.xml'))
        self.target_size = target_size

    def __len__(self):
        return len(self.images)

    def __getitem__(self, index):
        image_path = self.images[index]
        annotation_path = self.annotations[index]

        # Load and preprocess image
        img = Image.open(image_path)
        img = img.resize(self.target_size)  # Resize image to target size
        img_numpy_array = img_to_array(img)
        img_tensor = normalize(img_numpy_array)
        img_tensor = torch.from_numpy(img_tensor)

        # Parse XML and preprocess annotations
        bounding_boxes, labels = parse_xml(annotation_path, self.target_size)
        labels = torch.tensor(labels)

        return img_tensor, labels


# Set target size
target_size = (10, 5)

# Set original image size
orig_img_size = (100, 50)

# Set data folder
data_folder = 'path/to/your/data/folder'

# Create dataset instance
dataset = CustomDataset(data_folder, target_size)

# Create dataloader
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Iterate over the dataloader
for images, labels in dataloader:
    # Perform training or evaluation
    pass

请注意，此代码中的parse_xml函数中的图像和bounding box尺寸调整是根据原始图像的大小和目标尺寸进行的。在实际情况中，请根据你的数据和需求进行相应调整。

希望这个解决方案能够解决你遇到的问题。如果还有其他问题，请随时提问。

【相关推荐】

请看👉 ：DataLoader问题解决：RuntimeError: stack expects each tensor to be equal size, but got [3, 200, 200]entry1
除此之外, 这篇博客: pytorch训练模型遇到的问题中的 9、RuntimeError: stack expects each tensor to be equal size, but got [1] at entry 0 and [2] at entry 2 部分也许能够解决你的问题。

如果你已经解决了该问题, 非常希望你能够分享一下解决方案, 写成博客, 将相关链接放在评论区, 以帮助更多的人 ^-^

报告相同问题？

关注问题

PyTorch Dataloader读取时，如何在进程之间传输数据？
2021-10-08 19:39

机器学习与AI生成创作的博客来源｜知乎作者｜Envy 链接｜https://zhuanlan.zhihu.com/p/409629586 编辑｜人工智能前沿讲习最近我在做PyTorch的Dataloader相关的开发，有一个问题让我比较在意：PyTorch的Dataloader在启动多个进程读取样本的...
pytorch 数据预加载
2023-12-04 23:37

SATAN 先生的博客 """** 包装 torch.utils.data.DataLoader, 接收该类的一个实例 loader, 启动一个线程 t, 创建一个队列 qt 将 loader 中的数据预加载到队列 q 中, 以在模型计算时也能启动启动数据加载程序, 节省数据加载时间。
【PyTorch与深度学习】4、PyTorch的Dataset与DataLoader详细使用教程
2024-04-30 15:37

魔理沙偷走了BUG的博客最近做实验发现自己还是基础框架上掌握得不好，于是开始重学一遍PyTorch框架，这个是课程笔记，这个课还是讲的简略，我半小时的课听了一个半小时。
细说PyTorch深度学习：理论、算法、模型与编程实现 03
2024-05-27 21:56

在本节中，我们将深入探讨PyTorch深度学习框架，涵盖其理论基础、核心算法、常见模型以及实际编程实现。PyTorch是一个广泛使用的开源库，由Facebook开发，旨在为机器学习研究提供便利，同时也适合于工业级应用。它以...
PyTorch技术和深度学习——二、PyTorch基础编程
2023-11-11 02:59

千里之行起于足下的博客创建Python列表 a = [1.0, 2.0, 3.0] # 是否为PyTorch张量 print("a = [1.0, 2.0, 3.0]\na是否为PyTorch张量:", torch.is_tensor(a)) print("a[0]: ", a[0]) # 从Python列表中创建PyTorch张量 b = torch.tensor([1.0...
10个PyTorch CUDA编程小技巧，实现高效GPU计算
2025-03-20 17:49

AI科技论谈的博客分享10个PyTorch CUDA编程小技巧，实现高效GPU计算。
Pytorch-pytorch资源
2025-03-12 08:15

PyTorch是一个开源的机器学习库，主要用于深度学习领域，它的编程语言主要是Python。PyTorch的一个重要特点是它对GPU运算的支持，这使得在训练复杂的神经网络模型时可以大大缩短运行时间。PyTorch还支持动态计算图，...
PyTorch学习笔记(6)——DataLoader源代码剖析
2018-08-08 14:40

sooner高的博客当然，因为内容比较多，没有全部展开，这里的主要内容是DataLoader关于数据加载以及分析PyTorch是如何通过Python本身的multiprocessing和Threading等库来保证batch是顺序取出的。额外的内容都会给出链接，在这里不会...
python pytorch 基础操作
2025-06-17 20:38

PyTorch是一种开源的机器学习库，它是基于Python编程语言实现的，其主要特点就是能够提供一个动态的计算图，能够更加直观地进行深度学习模型的构建和训练。本文将对PyTorch的基础操作进行详解。首先，我们需要了解...
深度学习框架pytorch入门与实践源代码.rar
2021-11-21 11:44

1. 数据加载与预处理：PyTorch提供了DataLoader和Dataset类来处理数据。Dataset用于封装数据集，DataLoader则负责批量加载和预处理，如归一化、随机翻转等。 2. 训练流程：典型的训练流程包括初始化模型、定义损失...
没有解决我的问题, 去提问

问题事件

关注

码龄粉丝数原力等级 --

被采纳

被点赞

采纳率
创建了问题 8月31日

Pytorch Dataloader

1条回答 默认 最新

问题事件

1条回答默认最新