璠宝今天写代码了吗 2024-04-23 14:00 采纳率: 0%
浏览 20

图文检索模型内存报错!用的ResNet50训练模型

处理完所有向量等待的时间太痛苦了!!因为有五万张照片
结果最后报错了 我应该怎么优化一下呀
这个保存向量是我刚刚加的 如果有错希望可以帮我一起改正
麻烦了!


import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import os
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model

# 训练权重
num_classes = 1000

# 构建ResNet50模型
base_model = ResNet50(weights=None, include_top=False)
x = base_model.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(num_classes, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)

# 加载预训练的ResNet-50模型
base_model = ResNet50(weights=None, include_top=False)
x = base_model.output
x = GlobalAveragePooling2D()(x)
predictions = Dense(num_classes, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)

# 读取word_test.csv文件
file_path = "C:/Users/wyf/Desktop/泰迪杯/B题-全部数据/B题-数据/附件2/word_test.csv"
word_test = pd.read_csv(file_path, encoding='utf-8')

# 图像路径
image_path = "C:/Users/wyf/Desktop/泰迪杯/B题-全部数据/B题-数据/附件2/ImageData"

# 图像特征提取
model = ResNet50(weights='imagenet', include_top=False)

def image_feature_extraction(image_path):
    img = image.load_img(image_path, target_size=(224, 224))
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = preprocess_input(img_array)
    features = model.predict(img_array)
    return features.flatten()

# 文本特征提取
tfidf = TfidfVectorizer()
word_embeddings = tfidf.fit_transform(word_test['caption'])

# 保存图像特征向量
image_features = np.array([image_feature_extraction(os.path.join(image_path, img)) for img in os.listdir(image_path)])
np.save('image_features.npy', image_features)

# 保存文本特征向量
pd.DataFrame(word_embeddings.toarray()).to_csv('word_embeddings.csv', index=False)

# 多模态特征融合
image_features = np.array([image_feature_extraction(os.path.join(image_path, img)) for img in os.listdir(image_path)])
combined_features = np.hstack((image_features, word_embeddings.toarray()))

# 图像检索
result1 = []

for text in word_test['caption']:
    text_embedding = tfidf.transform([text])
    similarities = []

    for i, feature in enumerate(combined_features):
        similarity = cosine_similarity([feature], text_embedding)
        similarities.append((i, similarity))

    similarities.sort(key=lambda x: x[1], reverse=True)
    top_five_images = [os.listdir(image_path)[i] for i, _ in similarities[:5]]
    result1.append(top_five_images)

result1_df = pd.DataFrame(result1, columns=['image1', 'image2', 'image3', 'image4', 'image5'])
result1_df.to_csv('result1.csv', index=False)

下面是我的报错

...

1/1 [==============================] - 0s 60ms/step
1/1 [==============================] - 0s 56ms/step
Traceback (most recent call last):
  File "c:/Users/wyf/Desktop/泰迪杯/B题-示例数据/2.py", 
line 53, in <module>
    image_features = np.array([image_feature_extraction(os.path.join(image_path, img)) for img in os.listdir(image_path)])
numpy.core._exceptions.MemoryError: Unable to allocate 18.7 GiB for an array with shape (50000, 100352) and data type float32

  • 写回答

8条回答 默认 最新

  • 专家-赤兔[在线] 优质创作者: 编程框架技术领域 2024-04-23 14:00
    关注

    引自免费微信小程序:皆我百晓生

    在你的代码中,内存错误是因为尝试一次性加载所有的图像特征到内存中。为了解决这个问题,你可以采用分批处理的方式,每次处理一部分图像,而不是一次性处理所有图像。

    这里是一个修改后的版本,使用了生成器函数来逐批处理图像特征的计算,这样就可以避免一次性加载所有特征到内存中:

    import concurrent.futures
    import glob
    import os
    import pandas as pd
    import numpy as np
    from sklearn.feature_extraction.text import TfidfVectorizer
    from sklearn.metrics.pairwise import cosine_similarity
    import tensorflow as tf
    from tensorflow.keras.applications import ResNet50
    from tensorflow.keras.applications.resnet50 import preprocess_input
    from tensorflow.keras.preprocessing import image
    from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
    from tensorflow.keras.models import Model
    
    # 训练权重
    num_classes = 1000
    
    # 构建ResNet50模型
    base_model = ResNet50(weights=None, include_top=False)
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    predictions = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=predictions)
    
    # 读取word_test.csv文件
    file_path = "C:/Users/wyf/Desktop/泰迪杯/B题-全部数据/B题-数据/附件2/word_test.csv"
    word_test = pd.read_csv(file_path, encoding='utf-8')
    
    # 图像路径
    image_path = "C:/Users/wyf/Desktop/泰迪杯/B题-全部数据/B题-数据/附件2/ImageData"
    
    # 文本特征提取
    tfidf = TfidfVectorizer()
    word_embeddings = tfidf.fit_transform(word_test['caption'])
    
    # 图像特征提取生成器
    def batch_image_feature_extraction(image_dir, batch_size=1000):
        images = glob.glob(os.path.join(image_dir, '*'))
        for start in range(0, len(images), batch_size):
            end = min(start + batch_size, len(images))
            batch_images = images[start:end]
            batch_features = np.array([image_feature_extraction(img) for img in batch_images])
            yield batch_features
    
    # 保存图像特征向量(分批处理)
    batch_size = 1000
    image_features_list = []
    for batch_features in batch_image_feature_extraction(image_path, batch_size=batch_size):
        image_features_list.append(batch_features)
    image_features = np.concatenate(image_features_list)
    np.save('image_features.npy', image_features)
    
    # 保存文本特征向量
    pd.DataFrame(word_embeddings.toarray()).to_csv('word_embeddings.csv', index=False)
    
    # 多模态特征融合
    combined_features = np.hstack((image_features, word_embeddings.toarray()))
    
    # 图像检索
    result1 = []
    
    for text in word_test['caption']:
        text_embedding = tfidf.transform([text])
        similarities = []
    
        for i, feature in enumerate(combined_features):
            similarity = cosine_similarity([feature], text_embedding)
            similarities.append((i, similarity))
    
        similarities.sort(key=lambda x: x[1], reverse=True)
        top_five_images = [os.listdir(image_path)[i] for i, _ in similarities[:5]]
        result1.append(top_five_images)
    
    result1_df = pd.DataFrame(result1, columns=['image1', 'image2', 'image3', 'image4', 'image5'])
    result1_df.to_csv('result1.csv', index=False)
    
    def image_feature_extraction(image_path):
        img = image.load_img(image_path, target_size=(224, 224))
        img_array = image.img_to_array(img)
        img_array = np.expand_dims(img_array, axis=0)
        img_array = preprocess_input(img_array)
        features = model.predict(img_array)
        return features.flatten()
    

    这段代码将图像特征的提取改为了一个生成器函数batch_image_feature_extraction,它会按批次处理图像,并返回特征数组。通过设置合适的batch_size(例如,这里是1000),可以减少对内存的需求。同时,我也调整了保存图像特征向量的部分,使其能适应新的生成器函数。这样应该可以解决你的内存问题。

    评论 编辑记录

报告相同问题?

问题事件

  • 创建了问题 4月23日