Reference Guide

This guide provides the API reference for the core classes of the ExoModel AI framework.

ExoModel

The ExoModel class is the foundation of the framework, combining Pydantic's data validation with LLM-powered capabilities.

`exomodel.exomodel.ExoModel`

Bases: BaseModel

ExoModel: A base class that utilizes an LLM to interact with class structures via prompts, performing "CRUD" operations: create, update, and read.

Source code in exomodel/exomodel.py

class ExoModel(BaseModel):
    """
    ExoModel: A base class that utilizes an LLM to interact with class structures 
    via prompts, performing "CRUD" operations: create, update, and read.
    """
    _rag_sources: list[str] = PrivateAttr(default_factory=list)
    _exo_agent: Optional[ExoAgent] = PrivateAttr(default=None)

    @llm_function
    def call_update_object(self, prompt: str):
        """
        Use this tool STRICTLY when the user explicitly commands to change, modify, 
        or update the entity's fields based on an instruction.
        MANDATORY: Pass the user's update instruction as the 'prompt' argument.
        """
        print(f"[ExoModel] AI Tool Call: call_update_object")
        return self.update_object(prompt)

    @llm_function
    def call_run_object_prompt(self, prompt: str):
        """
        Use this tool when the user asks to brainstorm, improve, give options, rewrite content, 
        or answer complex questions based on the entity's data. 
        MANDATORY: You must pass the user's original request as the 'prompt' argument.
        """
        print(f"[ExoModel] AI Tool Call: call_run_object_prompt")
        return self.run_object_prompt(prompt)

    @llm_function
    def call_run_object_analysis(self):
        """
        Use this tool when the user asks for a critical analysis, evaluation, or deep review 
        of the entity's current state and strategy. 
        This tool requires NO arguments. Just invoke it.
        """
        print(f"[ExoModel] AI Tool Call: call_run_object_analysis")
        return self.run_analysis()

    @llm_function
    def call_run_filling_instructions(self):
        """
        Use this tool when the user asks for guidance, best practices, rules, 
        or instructions on how to fill out, complete, or improve the fields 
        of this entity based on the reference material.
        This tool requires NO arguments. Just invoke it.
        """
        print(f"[ExoModel] AI Tool Call: call_run_filling_instructions")
        return self.run_filling_instructions()

    @classmethod
    def get_rag_sources(cls) -> list[str]:
        """Override this in child classes to return knowledge files/URLs."""
        return []

    @property
    def llm_tools(self):
        """Scans the instance and returns methods decorated with @llm_function as LangChain tools."""
        from langchain_core.tools import StructuredTool
        tools = []
        for attr_name in dir(type(self)):
            # FILTRO CRUCIAL: ignora privados E metadados do Pydantic
            if attr_name.startswith('_') or attr_name.startswith('model_'):
                continue
            try:
                method = getattr(self, attr_name)
                if hasattr(method, "_is_llm_function"):
                    tools.append(StructuredTool.from_function(
                        func=method,
                        name=attr_name,
                        description=method.__doc__ or f"Executes {attr_name}",
                        return_direct=True
                    ))
            except Exception:
                continue
        return tools

    def __init__(self, **data):
        # Extract prompt before Pydantic field processing
        prompt = data.pop("prompt", None)
        super().__init__(**data)

        # Load RAG sources from class definition
        self._rag_sources = self.get_rag_sources()
        if self._rag_sources:
            print(f"[ExoModel] Knowledge sources loaded: {self._rag_sources}")

        # If a prompt is provided during init, update the object immediately
        if prompt:
            self.update_object(prompt)

    def add_rag_source(self, rag_source: str):
        """Adds a source to the internal list (uniqueness enforced)."""
        if rag_source not in self._rag_sources:
            self._rag_sources.append(rag_source)

    def get_json_schema(self):
        """Returns the Pydantic JSON schema."""
        return json.dumps(self.model_json_schema(), indent=2)

    def get_instance_json(self):
        """Returns the current instance data as JSON."""
        return self.model_dump_json(indent=2)   

    def update_object(self, prompt: str) -> dict:
        """
        Updates the entity's fields based on a natural language prompt using 
        LLM Structured Output.

        This method builds a dynamic extraction schema, runs the LLM, and 
        synchronizes the returned data with the current instance fields. It is 
        designed to be resilient, handling both Pydantic objects and raw JSON 
        strings returned by the LLM.

        Args:
            prompt (str): The user's instruction or data for updating the object.

        Returns:
            dict: A dictionary containing the fields that were successfully updated.
        """
        import json
        import re

        # 1. Prepare context and schema
        specialized_prompt = self.__get_prompt_update_object(prompt)
        extraction_schema = self.build_extraction_schema()

        # 2. Execute LLM call
        structured_output = self.run_llm(
            prompt=specialized_prompt, 
            response_schema=extraction_schema, 
            mode="hybrid"
        )

        print(f"[ExoModel] Structured output: {structured_output}")

        if not structured_output:
            print(f"[ExoModel] Warning: LLM returned no output for update.")
            return {}

        try:
            # 3. Handle hybrid output types (Pydantic object vs. Raw String)
            if isinstance(structured_output, str):
                # Attempt to extract JSON from markdown blocks if present
                clean_json = re.sub(r"```json\s?|\s?```", "", structured_output).strip()
                updates = json.loads(clean_json)
            else:
                # Standard Pydantic model handling
                updates = structured_output.model_dump()        

            # 4. Synchronize updates with instance fields
            # We use type(self).model_fields to avoid Pydantic V2.11+ deprecation warnings
            updated_data = {}
            cls_fields = type(self).model_fields

            for field_name, field_value in updates.items():
                if field_name in cls_fields:
                    setattr(self, field_name, field_value)
                    updated_data[field_name] = field_value

            return updated_data

        except Exception as e:
            print(f"[ExoModel] Error synchronizing update: {e}")
            # Log the raw output for debugging in development environments
            if hasattr(self, '_debug') and self._debug:
                print(f"[DEBUG] Raw output: {structured_output}")
            return {}        

    def update_object_old(self, prompt: str):
        """Updates object fields using LLM Structured Output."""
        specialized_prompt = self.__get_prompt_update_object(prompt)
        extraction_schema = self.build_extraction_schema()

        structured_output = self.run_llm(
            specialized_prompt, 
            response_schema=extraction_schema, 
            mode="hybrid"
        )

        if not structured_output:
            return {}

        try:
            updates = structured_output.model_dump()        
            cls_fields = type(self).model_fields 
            for field_name, field_value in updates.items():
                if field_name in cls_fields:
                    setattr(self, field_name, field_value)
            return updates
        except Exception as e:
            print(f"[ExoModel] Error processing update: {e}")
            return {}  

    @classmethod
    def build_extraction_schema(cls):
        """Builds a dynamic Pydantic model for LLM data extraction, filtering complex relations."""
        fields_for_ai = {}
        for name, field in cls.model_fields.items():
            if field.exclude:
                continue

            origin = get_origin(field.annotation)
            args = get_args(field.annotation)

            # Determine base type for List/Union/Optional
            base_type = args[0] if origin in (Union, list, List) and args else field.annotation

            try:
                if isinstance(base_type, type):
                    # Skip nested ExoModels or custom List containers
                    if issubclass(base_type, ExoModel):
                        continue
                    if "ListExoModel" in base_type.__name__:
                        continue
            except Exception:
                pass

            # Only allow simple lists of primitives for standard object updates
            if origin is list or origin is List:
                if base_type not in (str, int, float, bool):
                    continue

            fields_for_ai[name] = (field.annotation, ...)

        return create_model(f"{cls.__name__}Extraction", **fields_for_ai)

    def __get_prompt_update_object(self, prompt: str):
        fields_info = self.get_fields_info()
        entity_name = self.__class__.__name__
        file_path = self._get_prompt_path("update_object.md")

        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                template = f.read()
            return template.format(entity_name=entity_name, prompt=prompt, obj_fields_info=fields_info)
        except FileNotFoundError:
            return f"Error: Prompt template {file_path} not found."   

    def _get_prompt_path(self, filename: str) -> str:
        """
        Resolves the absolute path for a given prompt template file.
        """
        # Get the directory where exomodel.py is located
        current_dir = os.path.dirname(os.path.abspath(__file__))

        # Build path: src/exomodel/prompt/{filename}
        return os.path.join(current_dir, "prompt", filename) 

    def update_field(self, field_name: str, prompt: str):
        """Updates a specific field based on a prompt."""
        if field_name not in type(self).model_fields:
            raise ValueError(f"Field '{field_name}' does not exist in the model.")

        prompt_llm = self.__get_prompt_update_field(field_name, prompt)
        result = self.run_llm(prompt_llm, mode="hybrid")
        setattr(self, field_name, result)
        return result      

    def __get_prompt_update_field(self, field_name: str, prompt: str):
        field_value = getattr(self, field_name)
        entity_name = self.__class__.__name__
        file_path = self._get_prompt_path("update_field.md")

        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                template = f.read()
            return template.format(field_name=field_name, entity_name=entity_name, field_value=field_value, prompt=prompt)
        except FileNotFoundError:
            return f"Error: Prompt template {file_path} not found."   

    def run_object_prompt(self, prompt: str):
        """Executes a general prompt regarding the object state."""
        prompt_llm = self.__get_prompt_run_object_prompt(prompt)
        return self.run_llm(prompt_llm, mode="hybrid")  

    def __get_prompt_run_object_prompt(self, prompt: str):
        entity_name = self.__class__.__name__
        json_data = self.get_instance_json()
        file_path = self._get_prompt_path("run_object_prompt.md")

        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                template = f.read()
            return template.format(prompt=prompt, entity_name=entity_name, json_schema=json_data)
        except FileNotFoundError:
            return f"Error: Prompt template {file_path} not found." 

    def run_analysis(self):
        """Performs a critical analysis of the object using RAG context."""
        prompt = self.__get_prompt_run_analysis()
        return self.run_llm(prompt, mode="specialist")

    def __get_prompt_run_analysis(self):
        entity_name = self.__class__.__name__
        json_data = self.get_instance_json()
        file_path = self._get_prompt_path("run_analysis.md")

        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                template = f.read()
            return template.format(entity_name=entity_name, json_schema=json_data)
        except FileNotFoundError:
            return f"Error: Prompt template {file_path} not found."        

    def run_filling_instructions(self):
        """Retrieves filling guidelines and best practices."""
        prompt = self.__get_prompt_filling_instructions()
        return self.run_llm(prompt, mode="specialist")  

    def __get_prompt_filling_instructions(self):
        entity_name = self.__class__.__name__
        fields_info = self.get_fields_metadata(self.__class__)
        file_path = self._get_prompt_path("filling_instructions.md")

        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                template = f.read()
            return template.format(entity_name=entity_name, fields_info=fields_info)
        except FileNotFoundError:
            return f"Error: Prompt template {file_path} not found." 

    def master_prompt(self, prompt: str):
        """The Orchestrator prompt capable of executing other object tools."""
        llm_prompt = self.__get_master_prompt(prompt)
        print(f"[ExoModel] Master Prompt Initialized\n")
        return self.run_llm(prompt=llm_prompt, mode="orchestrator", use_tools=True)

    def __get_master_prompt(self, prompt: str):
        fields_info = self.get_fields_info()
        entity_name = self.__class__.__name__
        file_path = self._get_prompt_path("master_prompt.md")
        tools_info = self.llm_tools

        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                template = f.read()
            return template.format(entity_name=entity_name, prompt=prompt, obj_fields_info=fields_info, tools_info=tools_info)
        except FileNotFoundError:
            return f"Error: Prompt template {file_path} not found."  

    def run_llm(self, prompt: str, response_schema: Any = None, mode: str = "generalist", use_tools: bool = False):  
        """Unified interface with the ExoAgent."""
        if self._exo_agent is None:
            self._exo_agent = ExoAgent()
            if self._rag_sources:
                self._exo_agent.add_rag_sources(self._rag_sources)

        if use_tools:
            self._exo_agent.set_external_tools(self.llm_tools)
        else:
            self._exo_agent.set_external_tools([])

        return self._exo_agent.run(prompt=prompt, response_schema=response_schema, mode=mode)

    def get_fields_info(self):
        """Simplified string representation of current fields for LLM context."""
        info = []
        for name, field_info in type(self).model_fields.items():
            if field_info.exclude:
                continue
            value = getattr(self, name)
            info.append(f"- {name}: {value}")
        return "\n".join(info)     

    @staticmethod
    def get_fields_metadata(model_class):
        """Returns string containing fields, types, and descriptions for a Pydantic class."""
        metadata = ""
        for name, field_info in model_class.model_fields.items():
            if name == "id" or field_info.exclude:
                continue

            type_name = getattr(field_info.annotation, '__name__', str(field_info.annotation))
            description = field_info.description or ""

            if description:
                metadata += f'{name} (type: {type_name}, info: {description}); '
            else:
                metadata += f'{name} (type: {type_name}); '
        return metadata

    def to_csv(self, delimiter: str = ";", include_header: bool = True) -> str:
        """Converts the current instance to a CSV row."""
        data = self.model_dump(exclude_unset=False)
        output = io.StringIO()
        writer = csv.DictWriter(output, fieldnames=data.keys(), delimiter=delimiter)
        if include_header:
            writer.writeheader()
        writer.writerow(data)
        return output.getvalue().strip()

    def to_ui(self) -> str:
        """Generates a formatted HTML/Markdown string for UI (Telegram/CLI)."""
        lines = [
            f"<b>{self.__class__.__name__.upper()}</b>",
            "━━━━━━━━━━━━━━━━━━━━\n"
        ]

        for name, field in type(self).model_fields.items():
            if field.exclude:
                continue

            value = getattr(self, name, None)
            clean_name = name.replace("_", " ").title()

            if value in [None, "", 0]:
                lines.append(f"⚪ <b>{clean_name}:</b> <i>Not provided</i>")
            elif hasattr(value, 'items') and isinstance(getattr(value, 'items'), list):
                # Logic for nested ExoModel list containers
                item_list = value.items
                if not item_list:
                    lines.append(f"⚪ <b>{clean_name}:</b> <i>Empty list</i>")
                else:
                    lines.append(f"🔵 <b>{clean_name}:</b> {len(item_list)} items registered")
                    limit = min(len(item_list), 5)
                    for idx, item in enumerate(item_list[:limit]):
                        item_label = getattr(item, 'name', getattr(item, 'title', f"Item {idx+1}"))
                        prefix = "└" if idx == limit - 1 else "├"
                        lines.append(f"    {prefix} 🔸 <i>{item_label}</i>")
                    if len(item_list) > 5:
                        lines.append(f"    └ <i>...and {len(item_list) - 5} more</i>")
            else:
                str_value = str(value).replace("<", "&lt;").replace(">", "&gt;")
                if len(str_value) > 300:
                    str_value = str_value[:297] + "..."
                lines.append(f"🟢 <b>{clean_name}:</b> {str_value}")

        lines.append("\n━━━━━━━━━━━━━━━━━━━━")
        return "\n".join(lines)

    def __repr__(self) -> str:
        name_val = getattr(self, 'name', 'unnamed')
        return f"<{self.__class__.__name__} name='{name_val}'>"

`update_object(prompt)`

Updates the entity's fields based on a natural language prompt using LLM Structured Output.

This method builds a dynamic extraction schema, runs the LLM, and synchronizes the returned data with the current instance fields. It is designed to be resilient, handling both Pydantic objects and raw JSON strings returned by the LLM.

Parameters:

Name	Type	Description	Default
`prompt`	`str`	The user's instruction or data for updating the object.	required

Returns:

Name	Type	Description
`dict`	`dict`	A dictionary containing the fields that were successfully updated.

Source code in exomodel/exomodel.py

def update_object(self, prompt: str) -> dict:
    """
    Updates the entity's fields based on a natural language prompt using 
    LLM Structured Output.

    This method builds a dynamic extraction schema, runs the LLM, and 
    synchronizes the returned data with the current instance fields. It is 
    designed to be resilient, handling both Pydantic objects and raw JSON 
    strings returned by the LLM.

    Args:
        prompt (str): The user's instruction or data for updating the object.

    Returns:
        dict: A dictionary containing the fields that were successfully updated.
    """
    import json
    import re

    # 1. Prepare context and schema
    specialized_prompt = self.__get_prompt_update_object(prompt)
    extraction_schema = self.build_extraction_schema()

    # 2. Execute LLM call
    structured_output = self.run_llm(
        prompt=specialized_prompt, 
        response_schema=extraction_schema, 
        mode="hybrid"
    )

    print(f"[ExoModel] Structured output: {structured_output}")

    if not structured_output:
        print(f"[ExoModel] Warning: LLM returned no output for update.")
        return {}

    try:
        # 3. Handle hybrid output types (Pydantic object vs. Raw String)
        if isinstance(structured_output, str):
            # Attempt to extract JSON from markdown blocks if present
            clean_json = re.sub(r"```json\s?|\s?```", "", structured_output).strip()
            updates = json.loads(clean_json)
        else:
            # Standard Pydantic model handling
            updates = structured_output.model_dump()        

        # 4. Synchronize updates with instance fields
        # We use type(self).model_fields to avoid Pydantic V2.11+ deprecation warnings
        updated_data = {}
        cls_fields = type(self).model_fields

        for field_name, field_value in updates.items():
            if field_name in cls_fields:
                setattr(self, field_name, field_value)
                updated_data[field_name] = field_value

        return updated_data

    except Exception as e:
        print(f"[ExoModel] Error synchronizing update: {e}")
        # Log the raw output for debugging in development environments
        if hasattr(self, '_debug') and self._debug:
            print(f"[DEBUG] Raw output: {structured_output}")
        return {}        

`update_field(field_name, prompt)`

Updates a specific field based on a prompt.

Source code in exomodel/exomodel.py

def update_field(self, field_name: str, prompt: str):
    """Updates a specific field based on a prompt."""
    if field_name not in type(self).model_fields:
        raise ValueError(f"Field '{field_name}' does not exist in the model.")

    prompt_llm = self.__get_prompt_update_field(field_name, prompt)
    result = self.run_llm(prompt_llm, mode="hybrid")
    setattr(self, field_name, result)
    return result      

`run_analysis()`

Performs a critical analysis of the object using RAG context.

Source code in exomodel/exomodel.py

def run_analysis(self):
    """Performs a critical analysis of the object using RAG context."""
    prompt = self.__get_prompt_run_analysis()
    return self.run_llm(prompt, mode="specialist")

`run_object_prompt(prompt)`

Executes a general prompt regarding the object state.

Source code in exomodel/exomodel.py

def run_object_prompt(self, prompt: str):
    """Executes a general prompt regarding the object state."""
    prompt_llm = self.__get_prompt_run_object_prompt(prompt)
    return self.run_llm(prompt_llm, mode="hybrid")  

`run_filling_instructions()`

Retrieves filling guidelines and best practices.

Source code in exomodel/exomodel.py

def run_filling_instructions(self):
    """Retrieves filling guidelines and best practices."""
    prompt = self.__get_prompt_filling_instructions()
    return self.run_llm(prompt, mode="specialist")  

`to_csv(delimiter=';', include_header=True)`

Converts the current instance to a CSV row.

Source code in exomodel/exomodel.py

def to_csv(self, delimiter: str = ";", include_header: bool = True) -> str:
    """Converts the current instance to a CSV row."""
    data = self.model_dump(exclude_unset=False)
    output = io.StringIO()
    writer = csv.DictWriter(output, fieldnames=data.keys(), delimiter=delimiter)
    if include_header:
        writer.writeheader()
    writer.writerow(data)
    return output.getvalue().strip()

`to_ui()`

Generates a formatted HTML/Markdown string for UI (Telegram/CLI).

Source code in exomodel/exomodel.py

def to_ui(self) -> str:
    """Generates a formatted HTML/Markdown string for UI (Telegram/CLI)."""
    lines = [
        f"<b>{self.__class__.__name__.upper()}</b>",
        "━━━━━━━━━━━━━━━━━━━━\n"
    ]

    for name, field in type(self).model_fields.items():
        if field.exclude:
            continue

        value = getattr(self, name, None)
        clean_name = name.replace("_", " ").title()

        if value in [None, "", 0]:
            lines.append(f"⚪ <b>{clean_name}:</b> <i>Not provided</i>")
        elif hasattr(value, 'items') and isinstance(getattr(value, 'items'), list):
            # Logic for nested ExoModel list containers
            item_list = value.items
            if not item_list:
                lines.append(f"⚪ <b>{clean_name}:</b> <i>Empty list</i>")
            else:
                lines.append(f"🔵 <b>{clean_name}:</b> {len(item_list)} items registered")
                limit = min(len(item_list), 5)
                for idx, item in enumerate(item_list[:limit]):
                    item_label = getattr(item, 'name', getattr(item, 'title', f"Item {idx+1}"))
                    prefix = "└" if idx == limit - 1 else "├"
                    lines.append(f"    {prefix} 🔸 <i>{item_label}</i>")
                if len(item_list) > 5:
                    lines.append(f"    └ <i>...and {len(item_list) - 5} more</i>")
        else:
            str_value = str(value).replace("<", "&lt;").replace(">", "&gt;")
            if len(str_value) > 300:
                str_value = str_value[:297] + "..."
            lines.append(f"🟢 <b>{clean_name}:</b> {str_value}")

    lines.append("\n━━━━━━━━━━━━━━━━━━━━")
    return "\n".join(lines)

`get_rag_sources()` `classmethod`

Override this in child classes to return knowledge files/URLs.

Source code in exomodel/exomodel.py

@classmethod
def get_rag_sources(cls) -> list[str]:
    """Override this in child classes to return knowledge files/URLs."""
    return []

`add_rag_source(rag_source)`

Adds a source to the internal list (uniqueness enforced).

Source code in exomodel/exomodel.py

def add_rag_source(self, rag_source: str):
    """Adds a source to the internal list (uniqueness enforced)."""
    if rag_source not in self._rag_sources:
        self._rag_sources.append(rag_source)

ExoAgent

The ExoAgent manages LLM interactions, RAG (Retrieval-Augmented Generation) context, and tool orchestration.

`exomodel.exoagent.ExoAgent`

ExoAgent manages LLM interactions, RAG (Retrieval-Augmented Generation) context, and tool orchestration.

Source code in exomodel/exoagent.py

class ExoAgent:
    """
    ExoAgent manages LLM interactions, RAG (Retrieval-Augmented Generation) context,
    and tool orchestration.
    """

    def __init__(self):
        # Configuration
        self.model_id = os.getenv("MY_LLM_MODEL", "google_genai:gemini-2.5-flash-lite")

        # Universal embedding map by provider
        embedding_map = {
            "google_genai": "google_genai:gemini-embedding-001",
            "openai": "openai:text-embedding-3-small",
            "anthropic": "openai:text-embedding-3-small", # Anthropic usually uses OpenAI or Cohere
            "cohere": "cohere:embed-english-v3.0",
            "azure_openai": "azure_openai:text-embedding-3-small"
        }

        # Dynamic Embedding Model selection based on provider
        provider = self.model_id.split(":")[0]

        # Busca no mapa, ou usa o valor da ENV se o usuário quiser sobrescrever totalmente
        self.emb_model = os.getenv("MY_EMB_MODEL") or embedding_map.get(provider, "google_genai:gemini-embedding-001")

        # State Management
        self.sources_queue: List[str] = []
        self.vector_store: Optional[InMemoryVectorStore] = None
        self.rag_tools: List[Any] = []
        self.external_tools: List[Any] = []

        self._agent = None
        self._last_schema = None
        self._current_mode = "generalist"

    def add_rag_sources(self, sources: List[str]):
        """Schedule sources for indexing. Cost: 0 tokens until processed."""
        self.sources_queue.extend(sources)
        self._agent = None  # Force agent rebuild to include new context

    def _process_pending_rag(self):
        """Loads and embeds scheduled sources only when necessary."""
        if not self.sources_queue and self.vector_store is not None:
            return

        documents = []
        for source in self.sources_queue:
            loader = self._get_loader(source)
            docs = loader.load()

            # Enrich metadata per chunk
            for doc in docs:
                doc.metadata["source"] = source
                doc.metadata["indexed_at"] = datetime.now().isoformat()
                doc.metadata["source_type"] = (
                    "pdf" if source.endswith(".pdf")
                    else "web" if source.startswith("http")
                    else "text"
                )
            documents.extend(docs)

        self.sources_queue = []

        if documents:
            splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
            splits = splitter.split_documents(documents)

            if self.vector_store is None:
                self.vector_store = InMemoryVectorStore(init_embeddings(self.emb_model))

            self.vector_store.add_documents(splits)

        # Update RAG tool if vector store is initialized
        if self.vector_store is not None:
            @tool
            def retrieve_context(query: str) -> str:
                """Query the private knowledge base to retrieve factual context."""

                # similarity_search_with_score retorna (doc, score)
                results = self.vector_store.similarity_search_with_score(query, k=5)

                SCORE_THRESHOLD = 0.75  # ajuste conforme o modelo de embedding
                relevant = [
                    (doc, score) for doc, score in results 
                    if score >= SCORE_THRESHOLD
                ]

                if not relevant:
                    return "No sufficiently relevant content found in the knowledge base."

                chunks = []
                for doc, score in relevant:
                    source = doc.metadata.get("source", "unknown")
                    chunks.append(f"[Source: {source} | Relevance: {score:.2f}]\n{doc.page_content}")

                return "\n\n---\n\n".join(chunks)

            self.rag_tools = [retrieve_context]
        else:
            self.rag_tools = []

    @property
    def all_tools(self) -> List[Any]:
        """Combines RAG tools and external action tools."""
        return self.rag_tools + self.external_tools

    def set_external_tools(self, tools: List[Any]):
        """Register external tools and reset agent state."""
        self.external_tools = tools
        self._agent = None

    def _get_loader(self, source: str):
        """Factory method to return the appropriate loader based on source type."""
        if source.lower().endswith(".pdf"):
            return PyPDFLoader(source)

        if source.startswith("http"):
            return WebBaseLoader(
                web_paths=(source,),
                header_template={
                    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
                                  'AppleWebKit/537.36 (KHTML, like Gecko) '
                                  'Chrome/119.0.0.0 Safari/537.36'
                }
            )

        return TextLoader(source)

    def _get_system_prompt(self, mode: str) -> str:
        """Centralized prompt repository for different agent personas."""
        prompts = {
            "generalist": (
                "You are a helpful and direct virtual assistant. "
                "Answer based on your trained knowledge, focusing on clarity and usefulness."
            ),
            "specialist": (
                "You are a Senior Domain Specialist. "
                "Your knowledge comes exclusively from the 'retrieve_context' tool. "
                "Never answer from general knowledge.\n\n"
                "WORKFLOW:\n"
                "1. Call 'retrieve_context' immediately before any response.\n"
                "2. Base your answer solely on the retrieved content.\n"
                "3. If 'retrieve_context' returns no relevant content, "
                "respond: 'No information found in the knowledge base for this query.'\n\n"
                "STYLE: Concise, direct, and objective. "
                "No conversational filler, no code snippets, no requests for more information."
            ),
            "hybrid": (
                "You are a Senior Domain Specialist. "
                "Your primary knowledge source is the 'retrieve_context' tool.\n\n"
                "WORKFLOW:\n"
                "1. Call 'retrieve_context' first.\n"
                "2. Build your answer from the retrieved content.\n"
                "3. If the retrieved content is incomplete, supplement with your general knowledge "
                "— but never contradict what was retrieved.\n"
                "4. When using general knowledge beyond the retrieved content, "
                "signal it explicitly: '[General knowledge]'.\n\n"
                "STYLE: Concise, direct, and objective. "
                "No conversational filler, no code snippets, no requests for more information."
            ),
            "orchestrator": (
                "You are an Orchestrator Agent. Your sole function is to evaluate the user "
                "request and route it to the correct action.\n\n"
                "WORKFLOW:\n"
                "1. TOOL CALL: If the intent matches an available tool, invoke it immediately. "
                "When a tool requires a 'prompt' argument, pass the user's original request "
                "verbatim — never paraphrase or summarize it.\n"
                "2. DIRECT ANSWER: If the request is a simple read-only question answerable "
                "from the current entity state, respond concisely with that data only.\n"
                "3. REJECT: If the request is out of scope or no tool matches, reply exactly: "
                "'I cannot fulfill this request based on the available tools and data.'\n\n"
                "RULES:\n"
                "- Never perform complex logic yourself. Always delegate to tools.\n"
                "- Never hallucinate tool capabilities or arguments.\n"
                "- Output the tool result as-is. No summaries, no filler, no commentary.\n\n"
                "STYLE: Professional, direct, and objective."
            )
        }
        return prompts.get(mode, prompts["generalist"])

    def _init_agent(self, response_schema: Optional[Type] = None, mode: str = "generalist"):
        """
        Reconstructs the agent with updated tools and context using the unified 
        LangChain 0.3+ agent creation pattern.
        """
        # 1. Process any pending RAG documents to update the vector store
        self._process_pending_rag()

        # 2. Fallback to generalist if specialist/hybrid is requested without RAG data
        if mode != "generalist" and not self.rag_tools:
            print(f"Warning: Mode '{mode}' requested without RAG context. Using 'generalist'.")
            mode = "generalist"

        # 3. Retrieve the localized system prompt instructions for the chosen mode
        system_prompt = self._get_system_prompt(mode)
        print(f"\n[INIT] Mode: {mode} | Instructions: {system_prompt[:80]}...")

        # 4. Initialize the unified agent. 
        self._agent = create_agent(
            self.model_id, 
            tools=self.all_tools, 
            system_prompt=system_prompt,
            response_format=response_schema
        )      

    def run(self, prompt: str, response_schema: Optional[Type] = None, mode: str = "generalist") -> Any:
        """Execution entry point for the agent."""
        print(f"\n[RUN] Mode: {mode} | Schema: {response_schema.__name__ if response_schema else 'None'}")

        # Re-initialize if parameters changed
        if not self._agent or mode != self._current_mode or response_schema != self._last_schema:
            self._current_mode = mode
            self._last_schema = response_schema
            self._init_agent(response_schema=response_schema, mode=mode)

        config = {
            "configurable": {"max_tokens": 4096, "temperature": 0},
            "recursion_limit": 10
        }

        print("Waiting for LLM response...")
        result = self._agent.invoke(
            {"messages": [{"role": "user", "content": prompt}]},
            config=config
        )

        # Handle Structured Output
        if response_schema and "structured_response" in result:
            raw_data = result["structured_response"]
            return response_schema(**raw_data) if isinstance(raw_data, dict) else raw_data

        # Extract content from the last AI Message
        final_message = result["messages"][-1]
        final_content = getattr(final_message, 'content', '')

        # Silent Output Fallback (Anti-Mute Trigger)
        if not final_content:
            print("[CRITICAL] Empty response detected. Triggering recovery fallback.")
            rag_text = "No context retrieved."
            for msg in reversed(result['messages']):
                if getattr(msg, 'type', '').lower() in ['tool', 'toolmessage']:
                    rag_text = getattr(msg, 'content', rag_text)
                    break

            final_content = (
                "⚠️ **System Warning:** The AI failed to generate a response. "
                "Displaying raw context found:\n\n"
                f"_{rag_text}_"
            )

        return str(final_content).strip()

    def get_web_markdown(self, url: str) -> str:
        """Fetches a URL and converts its body to clean Markdown."""
        response = requests.get(url, headers={'User-Agent': 'ExoAgentApp/1.0'})
        html_content = response.text

        converter = html2text.HTML2Text()
        converter.ignore_links = False
        converter.bypass_tables = False
        converter.body_width = 0
        converter.ignore_images = True

        markdown_content = converter.handle(html_content)
        return markdown_content[:16000]

`add_rag_sources(sources)`

Schedule sources for indexing. Cost: 0 tokens until processed.

Source code in exomodel/exoagent.py

def add_rag_sources(self, sources: List[str]):
    """Schedule sources for indexing. Cost: 0 tokens until processed."""
    self.sources_queue.extend(sources)
    self._agent = None  # Force agent rebuild to include new context

`set_external_tools(tools)`

Register external tools and reset agent state.

Source code in exomodel/exoagent.py

def set_external_tools(self, tools: List[Any]):
    """Register external tools and reset agent state."""
    self.external_tools = tools
    self._agent = None

`run(prompt, response_schema=None, mode='generalist')`

Execution entry point for the agent.

Source code in exomodel/exoagent.py

def run(self, prompt: str, response_schema: Optional[Type] = None, mode: str = "generalist") -> Any:
    """Execution entry point for the agent."""
    print(f"\n[RUN] Mode: {mode} | Schema: {response_schema.__name__ if response_schema else 'None'}")

    # Re-initialize if parameters changed
    if not self._agent or mode != self._current_mode or response_schema != self._last_schema:
        self._current_mode = mode
        self._last_schema = response_schema
        self._init_agent(response_schema=response_schema, mode=mode)

    config = {
        "configurable": {"max_tokens": 4096, "temperature": 0},
        "recursion_limit": 10
    }

    print("Waiting for LLM response...")
    result = self._agent.invoke(
        {"messages": [{"role": "user", "content": prompt}]},
        config=config
    )

    # Handle Structured Output
    if response_schema and "structured_response" in result:
        raw_data = result["structured_response"]
        return response_schema(**raw_data) if isinstance(raw_data, dict) else raw_data

    # Extract content from the last AI Message
    final_message = result["messages"][-1]
    final_content = getattr(final_message, 'content', '')

    # Silent Output Fallback (Anti-Mute Trigger)
    if not final_content:
        print("[CRITICAL] Empty response detected. Triggering recovery fallback.")
        rag_text = "No context retrieved."
        for msg in reversed(result['messages']):
            if getattr(msg, 'type', '').lower() in ['tool', 'toolmessage']:
                rag_text = getattr(msg, 'content', rag_text)
                break

        final_content = (
            "⚠️ **System Warning:** The AI failed to generate a response. "
            "Displaying raw context found:\n\n"
            f"_{rag_text}_"
        )

    return str(final_content).strip()

`get_web_markdown(url)`

Fetches a URL and converts its body to clean Markdown.

Source code in exomodel/exoagent.py

def get_web_markdown(self, url: str) -> str:
    """Fetches a URL and converts its body to clean Markdown."""
    response = requests.get(url, headers={'User-Agent': 'ExoAgentApp/1.0'})
    html_content = response.text

    converter = html2text.HTML2Text()
    converter.ignore_links = False
    converter.bypass_tables = False
    converter.body_width = 0
    converter.ignore_images = True

    markdown_content = converter.handle(html_content)
    return markdown_content[:16000]

ExoModelList

A specialized container for managing collections of ExoModel entities.

`exomodel.exomodel_list.ExoModelList`

Bases: ExoModel, Generic[T]

ExoModelList: A specialized ExoModel that manages collections of other ExoModels. It handles bulk creation, updates, and CSV/UI representations for lists.

Source code in exomodel/exomodel_list.py

class ExoModelList(ExoModel, Generic[T]):
    """
    ExoModelList: A specialized ExoModel that manages collections of other ExoModels.
    It handles bulk creation, updates, and CSV/UI representations for lists.
    """
    items: List[T] = Field(default_factory=list)

    _item_class: Type[T] = PrivateAttr()

    def __init__(self, item_class: Type[T], prompt: str = "", **data):
        """
        Initializes the list manager for a specific ExoModel subclass.
        :param item_class: The ExoModel class that this list will contain.
        :param prompt: Optional prompt to immediately populate the list.
        """
        super().__init__(**data)
        self._item_class = item_class

        # Inherit RAG sources from the item class itself
        self._rag_sources = self._item_class.get_rag_sources()

        if prompt:
            self.create_list(prompt)

    def _build_list_schema(self) -> Type[BaseModel]:
        """
        Dynamically constructs a list schema (envelope) containing only 
        the allowed fields from the item class for LLM extraction.
        """
        # Filter fields that are not excluded
        fields_to_include = {
            name: (info.annotation, info)
            for name, info in self._item_class.model_fields.items()
            if not info.exclude
        }

        # Create the individual item schema (cleaned from technical fields)
        item_schema_name = f"{self._item_class.__name__}Extraction"
        ItemSchema = create_model(item_schema_name, **fields_to_include)

        # Create the list container that the LLM will fill
        container_name = f"{self._item_class.__name__}ListContainer"
        return create_model(
            container_name,
            items=(List[ItemSchema], Field(
                default_factory=list, 
                description=f"A list of {self._item_class.__name__} objects"
            ))
        )

    def create_list(self, prompt: str) -> "ExoModelList[T]":
        """
        Populates the items list by processing a prompt through the LLM.
        """
        response_schema = self._build_list_schema()
        prompt_llm = self._get_prompt_create_list(prompt=prompt)

        # Use the internal run_llm logic from the parent ExoModel
        result = self.run_llm(
            prompt=prompt_llm, 
            response_schema=response_schema, 
            mode="hybrid"
        )

        extracted_items = []
        if result:
            if isinstance(result, dict):
                validated = response_schema(**result)
                extracted_items = validated.items
            elif hasattr(result, 'items'):
                extracted_items = result.items

        # Casting: Convert temporary schema objects back to the real ExoModel class
        self.items = [
            self._item_class(**item.model_dump()) 
            for item in extracted_items
        ]

        return self

    def update_list(self, prompt: str) -> "ExoModelList[T]":
        """
        Updates the internal list in-place based on a new instruction.
        """
        if not self.items:
            return self.create_list(prompt)

        current_state_csv = self.to_csv()
        evolution_prompt = (
            f"CURRENT LIST STATE (CSV):\n{current_state_csv}\n\n"
            f"UPDATE INSTRUCTION: {prompt}"
        )

        return self.create_list(evolution_prompt)

    def _get_prompt_create_list(self, prompt: str) -> str:
        """Constructs the prompt for the LLM to generate the list."""
        entity_name = self._item_class.__name__

        # Generate field descriptions for the LLM context
        field_descriptions = []
        for name, field in self._item_class.model_fields.items():
            if not field.exclude:
                desc = field.description or "No description provided."
                field_descriptions.append(f"- {name}: {desc}")

        fields_info = "\n".join(field_descriptions)

        # Note: In a final open-source version, this should move to a YAML prompt library
        file_path = self._get_prompt_path("create_list.md")
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                template = f.read()
            return template.format(entity_name=entity_name, prompt=prompt, obj_fields_info=fields_info)
        except FileNotFoundError:
            return f"Create a list of {entity_name} based on: {prompt}. Fields: {fields_info}"

    def to_csv(self, delimiter: str = ";") -> str:
        """Converts the entire list to a single CSV string with headers."""
        if not self.items:
            return ""

        output = [self.items[0].to_csv(delimiter=delimiter, include_header=True)]
        for item in self.items[1:]:
            output.append(item.to_csv(delimiter=delimiter, include_header=False))

        return "\n".join(output)

    def to_ui(self) -> str:
        """Generates a high-quality UI representation for Telegram/CLI."""
        item_title = self._item_class.__name__.upper() if self._item_class else "ITEM"

        lines = [
            f"<b>{item_title} LIST</b>",
            f"<i>Total: {len(self.items)} items</i>",
            "━━━━━━━━━━━━━━━━━━━━\n"
        ]

        if not self.items:
            lines.append("⚪ <i>This list is currently empty.</i>")
        else:
            for i, item in enumerate(self.items, 1):
                lines.append(f"🔹 <b>ITEM #{i}</b>")
                # Reuse the logic from SmartBaseModel but indented
                for name, field in item.model_fields.items():
                    if field.exclude: continue
                    val = getattr(item, name, "---")
                    clean_name = name.replace("_", " ").title()
                    lines.append(f"  ▪️ <b>{clean_name}:</b> {val}")

                if i < len(self.items):
                    lines.append("  " + "┈" * 15)

        lines.append("\n━━━━━━━━━━━━━━━━━━━━")
        return "\n".join(lines)

    def __str__(self) -> str:
        count = len(self.items)
        return f"ExoModelList<{self._item_class.__name__}> (Count: {count})"

    def __repr__(self) -> str:
        return self.__str__()

`create_list(prompt)`

Populates the items list by processing a prompt through the LLM.

Source code in exomodel/exomodel_list.py

def create_list(self, prompt: str) -> "ExoModelList[T]":
    """
    Populates the items list by processing a prompt through the LLM.
    """
    response_schema = self._build_list_schema()
    prompt_llm = self._get_prompt_create_list(prompt=prompt)

    # Use the internal run_llm logic from the parent ExoModel
    result = self.run_llm(
        prompt=prompt_llm, 
        response_schema=response_schema, 
        mode="hybrid"
    )

    extracted_items = []
    if result:
        if isinstance(result, dict):
            validated = response_schema(**result)
            extracted_items = validated.items
        elif hasattr(result, 'items'):
            extracted_items = result.items

    # Casting: Convert temporary schema objects back to the real ExoModel class
    self.items = [
        self._item_class(**item.model_dump()) 
        for item in extracted_items
    ]

    return self

`update_list(prompt)`

Updates the internal list in-place based on a new instruction.

Source code in exomodel/exomodel_list.py

def update_list(self, prompt: str) -> "ExoModelList[T]":
    """
    Updates the internal list in-place based on a new instruction.
    """
    if not self.items:
        return self.create_list(prompt)

    current_state_csv = self.to_csv()
    evolution_prompt = (
        f"CURRENT LIST STATE (CSV):\n{current_state_csv}\n\n"
        f"UPDATE INSTRUCTION: {prompt}"
    )

    return self.create_list(evolution_prompt)

`to_csv(delimiter=';')`

Converts the entire list to a single CSV string with headers.

Source code in exomodel/exomodel_list.py

def to_csv(self, delimiter: str = ";") -> str:
    """Converts the entire list to a single CSV string with headers."""
    if not self.items:
        return ""

    output = [self.items[0].to_csv(delimiter=delimiter, include_header=True)]
    for item in self.items[1:]:
        output.append(item.to_csv(delimiter=delimiter, include_header=False))

    return "\n".join(output)

`to_ui()`

Generates a high-quality UI representation for Telegram/CLI.

Source code in exomodel/exomodel_list.py

def to_ui(self) -> str:
    """Generates a high-quality UI representation for Telegram/CLI."""
    item_title = self._item_class.__name__.upper() if self._item_class else "ITEM"

    lines = [
        f"<b>{item_title} LIST</b>",
        f"<i>Total: {len(self.items)} items</i>",
        "━━━━━━━━━━━━━━━━━━━━\n"
    ]

    if not self.items:
        lines.append("⚪ <i>This list is currently empty.</i>")
    else:
        for i, item in enumerate(self.items, 1):
            lines.append(f"🔹 <b>ITEM #{i}</b>")
            # Reuse the logic from SmartBaseModel but indented
            for name, field in item.model_fields.items():
                if field.exclude: continue
                val = getattr(item, name, "---")
                clean_name = name.replace("_", " ").title()
                lines.append(f"  ▪️ <b>{clean_name}:</b> {val}")

            if i < len(self.items):
                lines.append("  " + "┈" * 15)

    lines.append("\n━━━━━━━━━━━━━━━━━━━━")
    return "\n".join(lines)

Reference Guide

ExoModel

exomodel.exomodel.ExoModel

update_object(prompt)

update_field(field_name, prompt)

run_analysis()

run_object_prompt(prompt)

run_filling_instructions()

to_csv(delimiter=';', include_header=True)

to_ui()

get_rag_sources() classmethod

add_rag_source(rag_source)

ExoAgent

exomodel.exoagent.ExoAgent

add_rag_sources(sources)

set_external_tools(tools)

run(prompt, response_schema=None, mode='generalist')

get_web_markdown(url)

ExoModelList

exomodel.exomodel_list.ExoModelList

create_list(prompt)

update_list(prompt)

to_csv(delimiter=';')

to_ui()

`exomodel.exomodel.ExoModel`

`update_object(prompt)`

`update_field(field_name, prompt)`

`run_analysis()`

`run_object_prompt(prompt)`

`run_filling_instructions()`

`to_csv(delimiter=';', include_header=True)`

`to_ui()`

`get_rag_sources()` `classmethod`

`add_rag_source(rag_source)`

`exomodel.exoagent.ExoAgent`

`add_rag_sources(sources)`

`set_external_tools(tools)`

`run(prompt, response_schema=None, mode='generalist')`

`get_web_markdown(url)`

`exomodel.exomodel_list.ExoModelList`

`create_list(prompt)`

`update_list(prompt)`

`to_csv(delimiter=';')`

`to_ui()`