在dataset文件夹中,有36个tif文件,命名为tile_01.tif到tile_36.tif,都是包含六个波段数据的卫星图像,来源于Kaggle Sentinel-2野火数据集。我想使用MPI读入这些文件,将这些文件分配给各个进程,读取每个文件第一个波段并计算其均值,而后使用MPI_Reduce再对这些均值取平均。但是在分配文件给进程时,我发现文件路径没有被正确地传输。这种情况应该怎样处理?
我的代码如下:
#include <iostream>
#include <gdal.h>
#include <gdal_priv.h>
#include <cpl_conv.h>
#include <mpi.h>
#include <vector>
#include <filesystem>
#include <chrono>
#include <cstring>
double calculate_band_mean(const std::string& filename) {
GDALDatasetH dataset = GDALOpen(filename.c_str(), GA_ReadOnly);
if (dataset == nullptr) {
std::cerr << "Failed to open file: " << filename << std::endl;
return 0.0; // 返回 0 表示读取失败
}
GDALRasterBandH band = GDALGetRasterBand(dataset, 1);
int x_size = GDALGetRasterBandXSize(band);
int y_size = GDALGetRasterBandYSize(band);
std::vector<float> data(x_size * y_size);
double sum = 0;
int count = 0;
for (float value : data) {
if (value != 0 && value <= 1 && value >= -1) {
sum += value;
count++;
}
}
GDALClose(dataset);
return (count > 0) ? (sum / count) : 0.0;
}
int main(int argc, char **argv) {
MPI_Init(&argc, &argv);
int rank, size;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
GDALAllRegister();
auto start_time = std::chrono::high_resolution_clock::now();
// 文件路径
std::string input_directory = "dataset"; // 数据集路径为 "dataset"
std::vector<std::string> image_files;
// 仅在 rank 0 时获取文件列表
if (rank == 0) {
for (const auto &entry : std::filesystem::directory_iterator(input_directory)) {
if (entry.path().extension() == ".tif") {
image_files.push_back(entry.path().string());
}
}
}
// 广播文件数量
int image_count = image_files.size();
MPI_Bcast(&image_count, 1, MPI_INT, 0, MPI_COMM_WORLD);
// 计算每个进程要处理的文件数量
std::vector<int> sendcounts(size, 0);
std::vector<int> displs(size, 0);
int files_per_process = image_count / size;
int remaining_files = image_count % size;
for (int i = 0; i < size; ++i) {
sendcounts[i] = files_per_process + (i < remaining_files ? 1 : 0);
displs[i] = (i > 0) ? displs[i - 1] + sendcounts[i - 1] : 0;
}
// 创建用于发送的字符数组
std::vector<char> all_image_files;
if (rank == 0) {
int total_length = 0;
for (const auto& filename : image_files) {
total_length += filename.length() + 1; // +1 for null terminator
}
all_image_files.resize(total_length);
char* ptr = all_image_files.data();
for (const auto& filename : image_files) {
strcpy(ptr, filename.c_str());
ptr += filename.length() + 1; // Move pointer to next string position
}
}
// 使用 MPI_Scatterv 分发文件路径
std::vector<char> local_image_files(sendcounts[rank] * 256); // 假设每个路径最长256个字符
MPI_Scatterv(all_image_files.data(), sendcounts.data(), displs.data(), MPI_CHAR,
local_image_files.data(), sendcounts[rank] * 256, MPI_CHAR, 0, MPI_COMM_WORLD);
// 将接收到的字符数组转换为字符串
std::vector<std::string> local_files;
for (int i = 0; i < sendcounts[rank]; ++i) {
local_files.push_back(std::string(local_image_files.data() + i * 256));
}
// 处理接收到的文件
double local_mean = 0.0;
for (const auto& file : local_files) {
local_mean += calculate_band_mean(file);
}
// 使用 MPI_Reduce 收集均值
double global_mean = 0.0;
MPI_Reduce(&local_mean, &global_mean, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
// 在 rank 0 时计算平均值
if (rank == 0) {
global_mean /= image_count; // 计算整体平均值
std::cout << "Mean of the first band across all images: " << global_mean << std::endl;
}
auto end_time = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> duration = end_time - start_time;
if (rank == 0) {
std::cout << "Total execution time: " << duration.count() << " seconds" << std::endl;
}
MPI_Finalize();
return 0;
}
编译运行后,终端报错如下图: