import pandas as pd
import numpy as np
import os
from concurrent.futures import ProcessPoolExecutor, as_completed
csv_folder_path = r'D:\文档\work_document\xianyu_orders\240701\archive (11)\london_clean'
excel_file_path = r'D:\文档\work_document\xianyu_orders\240701\archive (11)\新建 Microsoft Excel 工作表(1).xlsx'
def process_file(file_path):
try:
new_StdorToU = os.path.splitext(os.path.basename(file_path))[0].split('_')[2]
df_csv = pd.read_csv(file_path)
transform = df_csv.iloc[0:48, 4].values
transform = transform.reshape(1, 48) if transform.size == 48 else None
df = pd.DataFrame(transform) if transform is not None else pd.DataFrame()
return [new_StdorToU] + list(df.iloc[0])
except Exception as e:
print(f"Error processing {file_path}: {e}")
return None # 返回None或者合适的错误标识,避免将异常信息加入到results中
# 初始化results列表
results = []
# 使用ProcessPoolExecutor来并行处理文件
with ProcessPoolExecutor() as executor:
file_paths = [os.path.join(csv_folder_path, f) for f in os.listdir(csv_folder_path) if f.endswith('.csv')]
# 使用字典推导式创建future到文件路径的映射
future_to_file = {executor.submit(process_file, file_path): file_path for file_path in file_paths}
for future in as_completed(future_to_file):
file_path = future_to_file[future]
try:
data = future.result()
if data is not None: # 确保data不是None
results.append(data)
except Exception as e:
print(f"Error processing {file_path}: {e}")
# 将结果转换为DataFrame并写入Excel
df = pd.DataFrame(results)
if not df.empty:
df.to_excel(excel_file_path, index=False)
print('所有CSV文件的数据提取和写入操作完成。')
else:
print('没有数据写入Excel。')
求道友指教,以上代码为什么报这个错误啊?求解决方法
Error processing D:\文档\work_document\xianyu_orders\240701\archive (11)\london_clean\cleaned_household_MAC005567.csv: A process in the process pool was terminated abruptly while the future was running or pending.