import pandas as pd
import random
import string
import os
from openpyxl import Workbook
# 询问要生成的文件大小(以MB为单位)
while True:
try:
file_size = int(input("请输入要生成的Excel文件大小(单位:MB):"))
if file_size > 0:
break
else:
print("请输入一个正整数作为文件大小!")
except ValueError:
print("请输入一个有效的整数作为文件大小!")
# 计算要生成的行数
num_rows = int(file_size * 1024 * 1024 / 100) # 假设每行占用100字节
print(f"将生成 {num_rows} 行数据")
# 生成随机数据
data = {
'列1': [''.join(random.choices(string.ascii_uppercase + string.digits, k=10)) for _ in range(num_rows)],
'列2': [random.randint(1, 100) for _ in range(num_rows)],
'列3': [random.uniform(0, 1) for _ in range(num_rows)]
}
# 创建DataFrame
df = pd.DataFrame(data)
# 指定要保存的文件路径和文件名前缀
output_folder = os.getcwd() # 当前文件夹路径
output_file_prefix = f'random_data_{file_size}M.xlsx'
# 确保文件名唯一
output_file_path = os.path.join(output_folder, output_file_prefix)
file_count = 1
while os.path.exists(output_file_path):
output_file_prefix = f'random_data_{file_size}M_{file_count}.xlsx'
output_file_path = os.path.join(output_folder, output_file_prefix)
file_count += 1
# 判断文件路径是否正确并可写入
try:
with open(output_file_path, 'w') as test_file:
pass
except IOError:
print("指定的文件路径无法写入,请检查文件路径是否正确或具有写入权限!")
exit()
# 拆分数据表
chunk_size = 1048576 # 每个表的最大行数
num_chunks = num_rows // chunk_size + 1
wb = Workbook()
sheet_num = 1
for i in range(num_chunks):
start = i * chunk_size
end = min((i + 1) * chunk_size, num_rows)
df_chunk = df[start:end]
if i > 0 and i % 100 == 0:
wb.save(output_file_path)
print(f"已导出 {i} 个Sheet")
sheet_name = f'Sheet{sheet_num}'
sheet_num += 1
with pd.ExcelWriter(output_file_path, engine='openpyxl', mode='a') as writer:
writer.book = wb
df_chunk.to_excel(writer, sheet_name=sheet_name, index=False)
writer.save()
print(f"已导出 Sheet: {sheet_name}")
wb.save(output_file_path)
print("数据导出完成")
然后产生报错为
Traceback (most recent call last):
File "C:\Users\xx\Desktop\新建文件夹 (2)\1.py", line 72, in <module>
with pd.ExcelWriter(output_file_path, engine='openpyxl', mode='a') as writer:
File "C:\Users\xx\AppData\Local\Programs\Python\Python311\Lib\site-packages\pandas\io\excel\_openpyxl.py", line 75, in __init__
File "C:\Users\xx\AppData\Local\Programs\Python\Python311\Lib\site-packages\openpyxl\reader\excel.py", line 344, in load_workbook
File "C:\Users\xx\AppData\Local\Programs\Python\Python311\Lib\site-packages\openpyxl\reader\excel.py", line 123, in __init__
File "C:\Users\xx\AppData\Local\Programs\Python\Python311\Lib\site-packages\openpyxl\reader\excel.py", line 95, in _validate_archive
File "C:\Users\xx\AppData\Local\Programs\Python\Python311\Lib\zipfile.py", line 1302, in __init__
self._RealGetContents()
File "C:\Users\xx\AppData\Local\Programs\Python\Python311\Lib\zipfile.py", line 1369, in _RealGetContents
raise BadZipFile("File is not a zip file")
zipfile.BadZipFile: File is not a zip file