# 为什么执行完这个json文件合并,就显示个没完没了json的内容?
import os
import json
from datetime import datetime
def merge_filtered_JSON_files(directory_path, output_path):
merged_data = []
processed_files = 0
# Traverse through files in the directory
for filename in os.listdir(directory_path):
file_path = os.path.join(directory_path, filename)
# Filter out JSON files larger than 700MB
if os.path.isfile(file_path) and filename.endswith('.json') and \
os.path.getsize(file_path) <= 700 * 1024 * 1024:
# Check for duplicate filenames
name, extension = os.path.splitext(filename)
new_filename = filename
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
counter = 1
while new_filename in [item.get('filename', '') for item in merged_data]:
new_filename = f"{name}_{counter}_{timestamp}{extension}"
counter += 1
# Open and read the JSON file
with open(file_path, 'r') as file:
data = json.load(file)
# Add data to the merged list (deduplicate)
if data not in merged_data:
data['filename'] = new_filename
merged_data.append(data)
processed_files += 1
print(f"Processed {processed_files} files")
# Save the merged data as a JSON file
with open(output_path, 'w') as output_file:
json.dump(merged_data, output_file, indent=4)
def recursive_print_data(data):
if isinstance(data, dict):
for key, value in data.items():
print(f"{key}: {value}")
recursive_print_data(value)
elif isinstance(data, list):
for item in data:
recursive_print_data(item)
def save_text_file(data, output_path):
with open(output_path, 'w') as file:
if isinstance(data, dict):
for key, value in data.items():
file.write(f"Question: {key}\n")
file.write(f"Answer: {value}\n\n")
elif isinstance(data, list):
for item in data:
question = item.get('question', '')
answer = item.get('answer', '')
file.write(f"Question: {question}\n")
file.write(f"Answer: {answer}\n\n")
else:
file.write(f"{data}\n")
# 目录路径和输出路径
directory_path = '/path/to/your/directory'
output_json_path = '/path/to/your/output.json'
output_txt_path = '/path/to/your/output.txt'
# 合并过滤后的 JSON 文件
merge_filtered_JSON_files(directory_path, output_json_path)
# 打开保存的 JSON 文件并列出基本信息
with open(output_json_path, 'r') as file:
merged_data = json.load(file)
recursive_print_data(merged_data)
# 将对话或问答形式保存为文本文件
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
output_txt_path_with_timestamp = f"{output_txt_path}_{timestamp}"
save_text_file(merged_data, output_txt_path_with_timestamp)
为什么执行完这个json文件合并,就显示个没完没了json的内容?
- 写回答
- 好问题 0 提建议
- 关注问题
- 邀请回答
-
3条回答 默认 最新
Marst·Zhang 2023-07-13 09:14关注问题点: 数据合并后,一直打印信息.
分析: recursive_print_data 这个函数执行输出打印功能.
recursive_print_data 本身还自我嵌套
解决办法: 注释掉第76行的代码,让recursive_print_data函数不要执行.本回答被题主选为最佳回答 , 对您是否有帮助呢?解决 无用评论 打赏 举报