Balesin 2025-07-03 15:12 采纳率: 0%
浏览 13

pyinstaller 打包langchain应用

问题:

使用pyinstaller打包PyQT + langchain + chroma 的项目在invoke时莫名退出

摘要:

使用PyQT、 langchain 和 chroma 构建了一个本地的个人知识库应用,并最终使用pyinstaller把应用打包为exe。当前的问题是,在开发环境中一切正常可以进行知识库的创建以及对某个知识库进行问答,但是当我打包成exe时,对知识库的创建和查询时程序就自动退出了,try catch无异常输出。同时,当我单独打包langchain相关的内容发现功能也是正常的。

描述结果
只使用langchain(开发环境和打包后)正常
langchain + PyQT (开发环境)正常
langchain + PyQT (打包后)失败

详细信息:

初始化 client_manager

            embeddings = OllamaEmbeddings(
                model=self.setting_data.get("embedding_model")
            )

            chroma_client = Chroma(
                persist_directory=self.vector_store_path,
                embedding_function=embeddings,
                client_settings=chromadb.config.Settings(
                    anonymized_telemetry=False
                )
            )

            self.client_manager = ChromaCollectionManager(
                chroma_client,
                embeddings,
                {
                    "persist_directory": self.vector_store_path,
                    "anonymized_telemetry": False
                }
            )

PyQT中调用

self.llm = ChatOllama(
            base_url="http://localhost:11434",
            model=self.setting_data.get("model", "qwen3:0.6b"),
            temperature=self.setting_data.get("temperature"),
            max_tokens=self.setting_data.get("max_length")
)
self.langchain_thread = ChatWithKnowledge(self.llm,context, self.chroma_collection)
self.langchain_thread.response_signal.connect(self.on_ollama_response)
self.langchain_thread.error_signal.connect(self.on_ollama_error)
self.langchain_thread.start()

ChatWithKnowledge


class ChatWithKnowledge(QThread):
    response_signal = pyqtSignal(str)
    error_signal = pyqtSignal(str)

    def __init__(self, llm, store, chroma):
        super().__init__()
        self.llm = llm
        self.store = store
        self.chroma = chroma


    def extract_real_answer(self, content):
        """
        去除AI回复中的think部分,只保留真正的回答。
        例如:如果内容包含</think>,则只保留其后的内容。
        """
        if "</think>" in content:
            return content.split("</think>")[-1].strip()
        return content

    def run(self):
        try:
            print("初始化 retriever chain")
            qa_chain = RetrievalQA.from_chain_type(
                llm=self.llm,
                chain_type="stuff",
                retriever=self.chroma.as_retriever(search_kwargs={"k": 5}),
                return_source_documents=True
            )

            print("初始化知识库")
            """提问并获取答案"""
            if not qa_chain:
                print("知识库未初始化")
                self.error_signal.emit(f"知识库未初始化")
            else:
                print(f"我的问题是:{self.store}")
                try:
                    # 这一步出错了
                    result = qa_chain.invoke({"query": self.store})
                    print("回答:", result)
                    answer = result["result"]
                    sources = result.get("source_documents", [])
                    reference_list = []
                    if sources:
                        for i, doc in enumerate(sources[:3], 1):
                            source = doc.metadata.get('source', '未知来源')
                            reference_list.append(f"<li>{os.path.basename(source)}</li>")

                    reference_html = f"<h4>相关文档:<h4/><ol>{''.join(reference_list)}</ol>"

                    print("===response result===>", answer)
                    self.response_signal.emit(self.extract_real_answer(answer) + reference_html)
                except BaseException as e:
                    print("==发生错误==>", e)
        
                    
        except Exception as e:
            print("==error=>", e)
    
        self.error_signal.emit(f"出现错误,请重试 {e}")

打包配置

# -*- mode: python ; coding: utf-8 -*-
from PyInstaller.utils.hooks import collect_submodules,collect_dynamic_libs
chromadb_hidden_imports = collect_submodules('chromadb')
numpy_hidden_imports = collect_submodules('numpy')
langchain_hidden_imports = collect_submodules('langchain')
lc_chains_hidden_imports = collect_submodules('langchain.chains')
lc_ollama_hidden_imports = collect_submodules('langchain_ollama')
lc_community_hidden_imports = collect_submodules('langchain_community')
lc_core_hidden_imports = collect_submodules('langchain_core')
llama_cpp_hidden_imports = collect_submodules('llama_cpp')

chromadb_libs = collect_dynamic_libs('chromadb')
llama_cpp_libs = collect_dynamic_libs('llama_cpp')
hnswlib_cpp_libs = collect_dynamic_libs('hnswlib')


a = Analysis(
    ['main.py'],
    pathex=['D:\\workspace\\app'],
    binaries=chromadb_libs+llama_cpp_libs+hnswlib_cpp_libs,
    datas=[],
    hiddenimports=[
        'PyQt5',
        'PyQt5.QtCore',
        'PyQt5.QtGui',
        'PyQt5.QtWidgets',
        'docx',
        'PyPDF2',
        'speech_recognition',
        'json',
        'pypdf',
        'sqlite3',
        'chromadb.db.duckdb',
        'hnswlib',
        'faiss',
        "chromadb",
        "chromadb.db",
        "chromadb.utils",
        "chromadb.api",
        "chromadb.ingest",
        "chromadb.segment",
        "chromadb.telemetry",
        "chromadb.config",
        "chromadb.utils.embedding_functions",
        "duckdb",
        "sqlite3",
        "sentence_transformers",
        "tiktoken",
        "langchain",
        "langchain_community",
        "ollama_chroma",
    ] + llama_cpp_hidden_imports+ numpy_hidden_imports + chromadb_hidden_imports + langchain_hidden_imports + lc_chains_hidden_imports + lc_ollama_hidden_imports + lc_community_hidden_imports + lc_core_hidden_imports,
    hookspath=[],
    hooksconfig={},
    runtime_hooks=[],
    excludes=[],
    noarchive=False,
    optimize=0,
)
pyz = PYZ(a.pure)

exe = EXE(
    pyz,
    a.scripts,
    a.binaries,
    a.datas,
    [],
    name='Client',
    debug=True,
    bootloader_ignore_signals=False,
    strip=False,
    upx=True,
    upx_exclude=[],
    runtime_tmpdir=None,
    console=True,
    icon='favicon.ico',
    disable_windowed_traceback=False,
    argv_emulation=False,
    target_arch=None,
    codesign_identity=None,
    entitlements_file=None,
    windows_api=True,
)

运行日志

[PYI-43896:DEBUG] LOADER: running main.py
Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given
['dna-extract6', 'test4', 'test7', 'dna-extract', 'dna-extract3', 'test5', 'test-word', 'test2', 'how2install', 'dna-extract10', 'install', 'dna-extract8', 'install2', 'test1', 'dna-extract11', 'dna-extract2', 'dna-extract12', 'dna-extract9', 'test8', 'test6', 'langchain']
======开始使用知识库进行问答====
====我的问题是=== 使用知识库了
[PYI-47092:DEBUG] LOADER: child process has finished - exiting the wait loop!
[PYI-47092:DEBUG] LOADER: made it out of the waiting loop!
[PYI-47092:DEBUG] LOADER: waiting 250 ms in case we receive WM_QUERYENDSESSION...
[PYI-47092:DEBUG] LOADER: waited 49 ms / 250 ms...
[PYI-47092:DEBUG] LOADER: waited 100 ms / 250 ms...
[PYI-47092:DEBUG] LOADER: waited 160 ms / 250 ms...
[PYI-47092:DEBUG] LOADER: waited 213 ms / 250 ms...
[PYI-47092:DEBUG] LOADER: waited 264 ms / 250 ms...
[PYI-47092:DEBUG] LOADER: done waiting for WM_QUERYENDSESSION - timed-out!
[PYI-47092:DEBUG] LOADER: retrieving process exit code and performing cleanup...
[PYI-47092:DEBUG] LOADER: child process exited (return code: 0)
[PYI-47092:DEBUG] LOADER: performing cleanup...
[PYI-47092:DEBUG] LOADER: removing temporary directory: C:\Users\Yunlong\AppData\Local\Temp\_MEI470922
[PYI-47092:DEBUG] LOADER: temporary directory C:\Users\Yunlong\AppData\Local\Temp\_MEI470922 was successfully removed.
[PYI-47092:DEBUG] LOADER: end of process reached!
  • 写回答

6条回答 默认 最新

  • 檀越@新空间 2025-07-03 15:13
    关注

    下午好🌅🌅🌅
    本答案参考通义千问

    在使用 PyInstaller 打包包含 PyQt + LangChain + Chroma 的项目时,出现 程序在调用 invoke 时自动退出 的问题,但开发环境运行正常。这通常与 PyInstaller 打包过程中对某些依赖项的处理不正确有关。


    一、问题分析

    1. 可能的原因

    • 动态库或模块未被正确打包:LangChain 和 Chroma 使用了大量依赖项(如 chromadb, langchain, ollama 等),如果这些模块没有被 PyInstaller 正确收集,可能导致运行时崩溃。
    • 线程/多进程问题:PyQt 的 QThread 在打包后可能会因为某些资源未正确加载而崩溃。
    • 路径问题:某些文件路径(如 persist_directory)在打包后可能发生变化,导致 Chroma 无法正确读取或写入数据。
    • 异常捕获不完整:虽然代码中有 try-except 捕获异常,但某些错误可能未被捕获,或者在子线程中未被正确输出。

    二、解决方案

    1. 确保所有依赖项都被正确打包

    重点:确保 chromadb, langchain, ollama, pydantic 等关键库的所有子模块和依赖项都被 PyInstaller 正确收集。

    修改后的 spec 文件配置:

    # -*- mode: python ; coding: utf-8 -*-
    from PyInstaller.utils.hooks import collect_submodules, collect_data_files, collect_dynamic_libs
    
    # 收集 langchain 相关模块
    langchain_hidden_imports = collect_submodules('langchain')
    lc_chains_hidden_imports = collect_submodules('langchain.chains')
    lc_core_hidden_imports = collect_submodules('langchain_core')
    lc_langchain_hidden_imports = collect_submodules('langchain.langchain')
    
    # 收集 chromadb 相关模块
    chromadb_hidden_imports = collect_submodules('chromadb')
    chromadb_data_files = collect_data_files('chromadb')
    
    # 收集 ollama 相关模块
    ollama_hidden_imports = collect_submodules('ollama')
    
    # 收集 numpy 相关模块
    numpy_hidden_imports = collect_submodules('numpy')
    
    # 其他依赖
    pydantic_hidden_imports = collect_submodules('pydantic')
    pydantic_settings_hidden_imports = collect_submodules('pydantic_settings')
    requests_hidden_imports = collect_submodules('requests')
    tqdm_hidden_imports = collect_submodules('tqdm')
    
    # 主程序入口
    import sys
    sys.setrecursionlimit(10000)
    
    a = Analysis(
        ['your_main_file.py'],
        pathex=[],
        binaries=[],
        datas=chromadb_data_files,
        hiddenimports=(
            langchain_hidden_imports +
            lc_chains_hidden_imports +
            lc_core_hidden_imports +
            lc_langchain_hidden_imports +
            chromadb_hidden_imports +
            ollama_hidden_imports +
            numpy_hidden_imports +
            pydantic_hidden_imports +
            pydantic_settings_hidden_imports +
            requests_hidden_imports +
            tqdm_hidden_imports
        ),
        hookspath=[],
        runtime_hooks=[],
        excludes=[],
        win_no_prefer_redirects=False,
        win_private_assemblies=False,
        cipher=None,
        noarchive=False,
    )
    
    # 添加额外的数据文件(如模型文件)
    a.datas += [('model_folder', 'path_to_your_model_folder', 'DATA')]
    
    pyz = PYZ(a.pure, a.zipped_data, cipher=None)
    
    exe = EXE(
        pyz,
        a.scripts,
        [],
        exclude_binaries=True,
        name='your_app_name',
        debug=False,
        strip=False,
        upx=True,
        console=True,
        icon='your_icon.ico'
    )
    

    注意:请将 'your_main_file.py' 替换为你的主程序文件名,'path_to_your_model_folder' 替换为实际的模型路径。


    2. 检查 persist_directory 路径是否正确

    重点:确保 Chromapersist_directory 在打包后仍然可访问。

    建议:

    • persist_directory 设置为一个绝对路径,例如:

      self.vector_store_path = os.path.join(os.getcwd(), "vector_store")
      
    • 或者使用 sys._MEIPASS 来获取打包后的资源路径:

      import sys
      import os
      
      if getattr(sys, 'frozen', False):
          # 打包后的路径
          base_path = sys._MEIPASS
      else:
          # 开发环境路径
          base_path = os.path.dirname(os.path.abspath(__file__))
      
      self.vector_store_path = os.path.join(base_path, "vector_store")
      

    3. 检查 QThread 是否正常工作

    重点:确保 QThread 在打包后能够正常启动并执行任务。

    建议:

    • run() 方法中添加更多的调试信息,例如:

      def run(self):
          try:
              print("Starting thread...")
              print(f"Current working directory: {os.getcwd()}")
              print(f"Vector store path: {self.vector_store_path}")
              # ...其他代码...
          except Exception as e:
              print(f"Exception in thread: {e}")
      
    • 如果仍然无法看到输出,可以尝试将日志输出到文件中:

      with open('log.txt', 'w') as f:
          f.write("Thread started\n")
      

    4. 使用 --noconfirm--clean 参数重新打包

    重点:有时候旧的打包缓存会导致问题,清理后再重新打包。

    pyinstaller --noconfirm --clean your_main_file.spec
    

    5. 使用 --onedir 模式测试

    重点:尝试使用 --onedir 模式打包,而不是 --onefile,以排除文件路径问题。

    pyinstaller --onefile your_main_file.py
    

    pyinstaller --onedir your_main_file.py
    

    三、总结

    | 步骤 | 内容 | |------|------| | 1 | 确保所有依赖项被正确打包,包括 langchain, chromadb, ollama, pydantic 等 | | 2 | 检查 persist_directory 路径是否正确,使用 sys._MEIPASS 获取打包后的路径 | | 3 | QThread 中添加更多调试信息,确保线程能正常执行 | | 4 | 使用 --noconfirm--clean 清理旧打包内容 | | 5 | 尝试使用 --onedir 模式打包,排除文件路径问题 |


    四、参考代码修改建议

    修改后的 ChatWithKnowledge 类(增加调试)

    class ChatWithKnowledge(QThread):
        response_signal = pyqtSignal(str)
        error_signal = pyqtSignal(str)
    
        def __init__(self, llm, store, chroma):
            super().__init__()
            self.llm = llm
            self.store = store
            self.chroma = chroma
    
        def run(self):
            try:
                print("Starting thread...")
                print(f"Current working directory: {os.getcwd()}")
                print(f"Vector store path: {self.vector_store_path}")
    
                qa_chain = RetrievalQA.from_chain_type(
                    llm=self.llm,
                    chain_type="stuff",
                    retriever=self.chroma.as_retriever(search_kwargs={"k": 5}),
                    return_source_documents=True
                )
    
                if not qa_chain:
                    self.error_signal.emit("知识库未初始化")
                    return
    
                result = qa_chain.invoke({"query": self.store})
                answer = result["result"]
                sources = result.get("source_documents", [])
                reference_list = []
    
                for i, doc in enumerate(sources[:3], 1):
                    source = doc.metadata.get('source', '未知来源')
                    reference_list.append(f"<li>{os.path.basename(source)}</li>")
    
                reference_html = f"<h4>相关文档:<h4/><ol>{''.join(reference_list)}</ol>"
                self.response_signal.emit(self.extract_real_answer(answer) + reference_html)
    
            except Exception as e:
                print(f"Error in thread: {e}")
                self.error_signal.emit(f"出现错误,请重试 {e}")
    

    五、最后建议

    • 如果以上方法仍不能解决问题,可以尝试使用 pyinstaller --debug all 运行,查看详细的日志信息。
    • 可以考虑使用 pyinstaller--add-data 参数手动添加一些缺失的资源文件。

    希望以上解决方案能帮助你解决 PyInstaller 打包后的问题!如果还有其他疑问,欢迎继续提问。

    评论

报告相同问题?

问题事件

  • 创建了问题 7月3日