"""
Script to build and persist the documentation vector database.
"""
from __future__ import division
import os

from libtbx.langchain.core import get_llm_and_embeddings
from libtbx.langchain.rag import load_all_docs_from_folder, create_and_persist_db


def run(docs_folder_path_list=["./data_docs/"], db_dir=None,
        excluded_dirs=None, provider=None, timeout=300):
    """
    Initializes models and builds the documentation vector database.
    Returns a list of the file paths that were processed.
    """
    if provider is None:
        provider = os.getenv("LLM_PROVIDER", "ollama")

    if provider == 'google':
        if not os.getenv("GOOGLE_API_KEY"):
            raise ValueError("GOOGLE_API_KEY environment variable not set.")
    elif provider == 'openai':
        if not os.getenv("OPENAI_API_KEY"):
            raise ValueError("OPENAI_API_KEY environment variable not set.")

    if not docs_folder_path_list:
        docs_folder_path_list = ["./data_docs/"]

    # Initialize a list for all processed file paths
    all_processed_files = []
    all_docs = []
    for docs_folder_path in docs_folder_path_list:
        # Unpack the two return values
        new_docs, new_files = load_all_docs_from_folder(
            docs_folder_path,
            excluded_dirs=excluded_dirs
        )
        all_docs.extend(new_docs)
        all_processed_files.extend(new_files)

    try:
        llm, embeddings = get_llm_and_embeddings(
            provider=provider, timeout=timeout)
    except ValueError as e:
        print(e)
        raise ValueError("Sorry, unable to set up LLM with %s" % (provider))

    create_and_persist_db(all_docs, embeddings, db_dir)

    # Return the list of processed files
    return all_processed_files


if __name__ == "__main__":
    run()
