Services

BaseService #

Bases: MessageQueuePublisherMixin, ABC, BaseModel

Base class for a service.

The general structure of a service is as follows: - A service has a name. - A service has a service definition. - A service uses a message queue to send/receive messages. - A service has a processing loop, for continuous processing of messages. - A service can process a message. - A service can publish a message to another service. - A service can be launched in-process. - A service can be launched as a server. - A service can be registered to the control plane. - A service can be registered to the message queue.

Source code in llama-agents/llama_deploy/services/base.py

class BaseService(MessageQueuePublisherMixin, ABC, BaseModel):
    """Base class for a service.

    The general structure of a service is as follows:
    - A service has a name.
    - A service has a service definition.
    - A service uses a message queue to send/receive messages.
    - A service has a processing loop, for continuous processing of messages.
    - A service can process a message.
    - A service can publish a message to another service.
    - A service can be launched in-process.
    - A service can be launched as a server.
    - A service can be registered to the control plane.
    - A service can be registered to the message queue.
    """

    model_config = ConfigDict(arbitrary_types_allowed=True)
    service_name: str

    @property
    @abstractmethod
    def service_definition(self) -> ServiceDefinition:
        """The service definition."""
        ...

    @abstractmethod
    def as_consumer(self, remote: bool = False) -> BaseMessageQueueConsumer:
        """Get the consumer for the message queue."""
        ...

    @abstractmethod
    async def processing_loop(self) -> None:
        """The processing loop for the service."""
        ...

    @abstractmethod
    async def process_message(self, message: QueueMessage) -> Any:
        """Process a message."""
        ...

    @abstractmethod
    async def launch_local(self) -> asyncio.Task:
        """Launch the service in-process."""
        ...

    @abstractmethod
    async def launch_server(self) -> None:
        """Launch the service as a server."""
        ...

    async def register_to_control_plane(self, control_plane_url: str) -> None:
        """Register the service to the control plane."""
        service_def = self.service_definition
        async with httpx.AsyncClient() as client:
            response = await client.post(
                f"{control_plane_url}/services/register",
                json=service_def.model_dump(),
            )
            response.raise_for_status()

    async def deregister_from_control_plane(self, control_plane_url: str) -> None:
        """Deregister the service from the control plane."""
        async with httpx.AsyncClient() as client:
            response = await client.post(
                f"{control_plane_url}/services/deregister",
                json={"service_name": self.service_name},
            )
            response.raise_for_status()

    async def register_to_message_queue(self) -> StartConsumingCallable:
        """Register the service to the message queue."""
        return await self.message_queue.register_consumer(self.as_consumer(remote=True))

service_definition `abstractmethod` `property` #

service_definition: ServiceDefinition

The service definition.

as_consumer `abstractmethod` #

as_consumer(remote: bool = False) -> BaseMessageQueueConsumer

Get the consumer for the message queue.

Source code in llama-agents/llama_deploy/services/base.py

@abstractmethod
def as_consumer(self, remote: bool = False) -> BaseMessageQueueConsumer:
    """Get the consumer for the message queue."""
    ...

processing_loop `abstractmethod` `async` #

processing_loop() -> None

The processing loop for the service.

Source code in llama-agents/llama_deploy/services/base.py

@abstractmethod
async def processing_loop(self) -> None:
    """The processing loop for the service."""
    ...

process_message `abstractmethod` `async` #

process_message(message: QueueMessage) -> Any

Process a message.

Source code in llama-agents/llama_deploy/services/base.py

@abstractmethod
async def process_message(self, message: QueueMessage) -> Any:
    """Process a message."""
    ...

launch_local `abstractmethod` `async` #

launch_local() -> Task

Launch the service in-process.

Source code in llama-agents/llama_deploy/services/base.py

@abstractmethod
async def launch_local(self) -> asyncio.Task:
    """Launch the service in-process."""
    ...

launch_server `abstractmethod` `async` #

launch_server() -> None

Launch the service as a server.

Source code in llama-agents/llama_deploy/services/base.py

@abstractmethod
async def launch_server(self) -> None:
    """Launch the service as a server."""
    ...

register_to_control_plane `async` #

register_to_control_plane(control_plane_url: str) -> None

Register the service to the control plane.

Source code in llama-agents/llama_deploy/services/base.py

async def register_to_control_plane(self, control_plane_url: str) -> None:
    """Register the service to the control plane."""
    service_def = self.service_definition
    async with httpx.AsyncClient() as client:
        response = await client.post(
            f"{control_plane_url}/services/register",
            json=service_def.model_dump(),
        )
        response.raise_for_status()

deregister_from_control_plane `async` #

deregister_from_control_plane(control_plane_url: str) -> None

Deregister the service from the control plane.

Source code in llama-agents/llama_deploy/services/base.py

async def deregister_from_control_plane(self, control_plane_url: str) -> None:
    """Deregister the service from the control plane."""
    async with httpx.AsyncClient() as client:
        response = await client.post(
            f"{control_plane_url}/services/deregister",
            json={"service_name": self.service_name},
        )
        response.raise_for_status()

register_to_message_queue `async` #

register_to_message_queue() -> StartConsumingCallable

Register the service to the message queue.

Source code in llama-agents/llama_deploy/services/base.py

async def register_to_message_queue(self) -> StartConsumingCallable:
    """Register the service to the message queue."""
    return await self.message_queue.register_consumer(self.as_consumer(remote=True))

AgentService #

Bases: BaseService

Agent Service.

A service that runs an agent locally, processing incoming tasks step-wise in an endless loop.

Messages are published to the message queue, and the agent processes them in a loop, finally returning a message with the completed task.

This AgentService can either be run in a local loop or as a Fast-API server.

Exposes the following endpoints: - GET /: Home endpoint. - POST /process_message: Process a message. - POST /task: Create a task. - GET /messages: Get messages. - POST /toggle_agent_running: Toggle the agent running state. - GET /is_worker_running: Check if the agent is running. - POST /reset_agent: Reset the agent.

Since the agent can launch as a FastAPI server, you can visit /docs for full swagger documentation.

Attributes:

Name	Type	Description
`service_name`	`str`	The name of the service.
`agent`	`AgentRunner`	The agent to run.
`description`	`str`	The description of the service.
`prompt`	`Optional[List[ChatMessage]]`	The prompt messages, meant to be appended to the start of tasks (currently TODO).
`running`	`bool`	Whether the agent is running.
`step_interval`	`float`	The interval in seconds to poll for task completion. Defaults to 0.1s.
`host`	`Optional[str]`	The host to launch a FastAPI server on.
`port`	`Optional[int]`	The port to launch a FastAPI server on.
`raise_exceptions`	`bool`	Whether to raise exceptions in the processing loop.

Examples:

from llama_deploy import AgentService
from llama_index.core.agent import ReActAgent

agent = ReActAgent.from_tools([...], llm=llm)
agent_service = AgentService(
    agent,
    message_queue,
    service_name="my_agent_service",
    description="My Agent Service",
    host="127.0.0.1",
    port=8003,
)

# launch as a server for remote access or documentation
await agent_service.launch_server()

Source code in llama-agents/llama_deploy/services/agent.py

class AgentService(BaseService):
    """Agent Service.

    A service that runs an agent locally, processing incoming tasks step-wise in an endless loop.

    Messages are published to the message queue, and the agent processes them in a loop,
    finally returning a message with the completed task.

    This AgentService can either be run in a local loop or as a Fast-API server.

    Exposes the following endpoints:
    - GET `/`: Home endpoint.
    - POST `/process_message`: Process a message.
    - POST `/task`: Create a task.
    - GET `/messages`: Get messages.
    - POST `/toggle_agent_running`: Toggle the agent running state.
    - GET `/is_worker_running`: Check if the agent is running.
    - POST `/reset_agent`: Reset the agent.

    Since the agent can launch as a FastAPI server, you can visit `/docs` for full swagger documentation.

    Attributes:
        service_name (str):
            The name of the service.
        agent (AgentRunner):
            The agent to run.
        description (str):
            The description of the service.
        prompt (Optional[List[ChatMessage]]):
            The prompt messages, meant to be appended to the start of tasks (currently TODO).
        running (bool):
            Whether the agent is running.
        step_interval (float):
            The interval in seconds to poll for task completion. Defaults to 0.1s.
        host (Optional[str]):
            The host to launch a FastAPI server on.
        port (Optional[int]):
            The port to launch a FastAPI server on.
        raise_exceptions (bool):
            Whether to raise exceptions in the processing loop.

    Examples:
        ```python
        from llama_deploy import AgentService
        from llama_index.core.agent import ReActAgent

        agent = ReActAgent.from_tools([...], llm=llm)
        agent_service = AgentService(
            agent,
            message_queue,
            service_name="my_agent_service",
            description="My Agent Service",
            host="127.0.0.1",
            port=8003,
        )

        # launch as a server for remote access or documentation
        await agent_service.launch_server()
        ```
    """

    service_name: str
    agent: AgentRunner
    description: str = "Local Agent Service."
    prompt: Optional[List[ChatMessage]] = None
    running: bool = True
    step_interval: float = 0.1
    host: Optional[str] = None
    port: Optional[int] = None
    raise_exceptions: bool = False

    _message_queue: BaseMessageQueue = PrivateAttr()
    _app: FastAPI = PrivateAttr()
    _publisher_id: str = PrivateAttr()
    _publish_callback: Optional[PublishCallback] = PrivateAttr()
    _lock: asyncio.Lock = PrivateAttr()
    _tasks_as_tool_calls: Dict[str, ToolCall] = PrivateAttr()

    def __init__(
        self,
        agent: AgentRunner,
        message_queue: BaseMessageQueue,
        running: bool = True,
        description: str = "Agent Server",
        service_name: str = "default_agent",
        prompt: Optional[List[ChatMessage]] = None,
        publish_callback: Optional[PublishCallback] = None,
        step_interval: float = 0.1,
        host: Optional[str] = None,
        port: Optional[int] = None,
        raise_exceptions: bool = False,
    ) -> None:
        super().__init__(
            agent=agent,
            running=running,
            description=description,
            service_name=service_name,
            step_interval=step_interval,
            prompt=prompt,
            host=host,
            port=port,
            raise_exceptions=raise_exceptions,
        )

        self._lock = asyncio.Lock()
        self._tasks_as_tool_calls = {}
        self._message_queue = message_queue
        self._publisher_id = f"{self.__class__.__qualname__}-{uuid.uuid4()}"
        self._publish_callback = publish_callback
        self._app = FastAPI(lifespan=self.lifespan)

        self._app.add_api_route("/", self.home, methods=["GET"], tags=["Agent State"])

        self._app.add_api_route(
            "/process_message",
            self.process_message,
            methods=["POST"],
            tags=["Message Processing"],
        )

        self._app.add_api_route(
            "/task", self.create_task, methods=["POST"], tags=["Tasks"]
        )

        self._app.add_api_route(
            "/messages", self.get_messages, methods=["GET"], tags=["Agent State"]
        )
        self._app.add_api_route(
            "/toggle_agent_running",
            self.toggle_agent_running,
            methods=["POST"],
            tags=["Agent State"],
        )
        self._app.add_api_route(
            "/is_worker_running",
            self.is_worker_running,
            methods=["GET"],
            tags=["Agent State"],
        )
        self._app.add_api_route(
            "/reset_agent", self.reset_agent, methods=["POST"], tags=["Agent State"]
        )

    @property
    def service_definition(self) -> ServiceDefinition:
        """The service definition."""
        return ServiceDefinition(
            service_name=self.service_name,
            description=self.description,
            prompt=self.prompt or [],
            host=self.host,
            port=self.port,
        )

    @property
    def message_queue(self) -> BaseMessageQueue:
        """The message queue."""
        return self._message_queue

    @property
    def publisher_id(self) -> str:
        """The publisher id."""
        return self._publisher_id

    @property
    def publish_callback(self) -> Optional[PublishCallback]:
        """The publish callback, if any."""
        return self._publish_callback

    @property
    def lock(self) -> asyncio.Lock:
        return self._lock

    @property
    def tool_name(self) -> str:
        """The name reserved when this service is used as a tool."""
        return get_tool_name_from_service_name(self.service_name)

    async def processing_loop(self) -> None:
        """The processing loop for the agent."""
        logger.info("Processing initiated.")
        while True:
            try:
                if not self.running:
                    await asyncio.sleep(self.step_interval)
                    continue

                current_tasks = self.agent.list_tasks()
                current_task_ids = [task.task_id for task in current_tasks]

                completed_tasks = self.agent.get_completed_tasks()
                completed_task_ids = [task.task_id for task in completed_tasks]

                for task_id in current_task_ids:
                    if task_id in completed_task_ids:
                        continue

                    step_output = await self.agent.arun_step(task_id)

                    if step_output.is_last:
                        # finalize the response
                        response = self.agent.finalize_response(
                            task_id, step_output=step_output
                        )

                        # convert memory chat messages
                        llama_messages = self.agent.memory.get()
                        history = [ChatMessage(**x.dict()) for x in llama_messages]

                        # publish the completed task
                        async with self.lock:
                            try:
                                tool_call = self._tasks_as_tool_calls.pop(task_id)
                            except KeyError:
                                tool_call = None

                        if tool_call:
                            await self.publish(
                                QueueMessage(
                                    type=tool_call.source_id,
                                    action=ActionTypes.COMPLETED_TOOL_CALL,
                                    data=ToolCallResult(
                                        id_=tool_call.id_,
                                        tool_message=ChatMessage(
                                            content=str(response.response),
                                            role=MessageRole.TOOL,
                                            additional_kwargs={
                                                "name": tool_call.tool_call_bundle.tool_name,
                                                "tool_call_id": tool_call.id_,
                                            },
                                        ),
                                        result=response.response,
                                    ).model_dump(),
                                )
                            )
                        else:
                            await self.publish(
                                QueueMessage(
                                    type=CONTROL_PLANE_NAME,
                                    action=ActionTypes.COMPLETED_TASK,
                                    data=TaskResult(
                                        task_id=task_id,
                                        history=history,
                                        result=response.response,
                                    ).model_dump(),
                                )
                            )
            except Exception as e:
                logger.error(f"Error in {self.service_name} processing_loop: {e}")
                if self.raise_exceptions:
                    # Kill everything
                    # TODO: is there a better way to do this?
                    import signal

                    signal.raise_signal(signal.SIGINT)
                else:
                    await self.message_queue.publish(
                        QueueMessage(
                            type=CONTROL_PLANE_NAME,
                            action=ActionTypes.COMPLETED_TASK,
                            data=TaskResult(
                                task_id=task_id,
                                history=[],
                                result=f"Error during processing: {e}",
                            ).model_dump(),
                        )
                    )

                continue

            await asyncio.sleep(self.step_interval)

    async def process_message(self, message: QueueMessage) -> None:
        """Handling for when a message is received."""
        if message.action == ActionTypes.NEW_TASK:
            new_task = NewTask(**message.data or {})
            task_def = new_task.task
            self.agent.create_task(task_def.input, task_id=task_def.task_id)
            logger.info(f"Created new task: {task_def.task_id}")
        elif message.action == ActionTypes.NEW_TOOL_CALL:
            task_def = TaskDefinition(**message.data or {})
            async with self.lock:
                tool_call_bundle = ToolCallBundle(
                    tool_name=self.tool_name,
                    tool_args=(),
                    tool_kwargs={"input": task_def.input},
                )
                task_as_tool_call = ToolCall(
                    id_=task_def.task_id,
                    source_id=message.publisher_id,
                    tool_call_bundle=tool_call_bundle,
                )
                self._tasks_as_tool_calls[task_def.task_id] = task_as_tool_call
            self.agent.create_task(task_def.input, task_id=task_def.task_id)
            logger.info(f"Created new tool call as task: {task_def.task_id}")
        else:
            raise ValueError(f"Unhandled action: {message.action}")

    def as_consumer(self, remote: bool = False) -> BaseMessageQueueConsumer:
        """Get the consumer for the message queue.

        Args:
            remote (bool):
                Whether to get a remote consumer or local.
                If remote, calls the `process_message` endpoint.
        """
        if remote:
            url = (
                f"http://{self.host}:{self.port}{self._app.url_path_for('process_message')}"
                if self.port
                else f"http://{self.host}{self._app.url_path_for('process_message')}"
            )
            return RemoteMessageConsumer(
                id_=self.publisher_id,
                url=url,
                message_type=self.service_name,
            )

        return CallableMessageConsumer(
            id_=self.publisher_id,
            message_type=self.service_name,
            handler=self.process_message,
        )

    async def launch_local(self) -> asyncio.Task:
        """Launch the agent locally."""
        logger.info(f"{self.service_name} launch_local")
        return asyncio.create_task(self.processing_loop())

    # ---- Server based methods ----

    @asynccontextmanager
    async def lifespan(self, app: FastAPI) -> AsyncGenerator[None, None]:
        """Starts the processing loop when the fastapi app starts."""
        asyncio.create_task(self.processing_loop())
        yield
        self.running = False

    async def home(self) -> Dict[str, str]:
        """Home endpoint. Gets general information about the agent service."""
        tasks = self.agent.list_tasks()

        task_strings = []
        for task in tasks:
            task_output = self.agent.get_task_output(task.task_id)
            status = "COMPLETE" if task_output.is_last else "IN PROGRESS"
            memory_str = "\n".join(
                [f"{x.role}: {x.content}" for x in task.memory.get_all()]
            )
            task_strings.append(f"Agent Task {task.task_id}: {status}\n{memory_str}")

        complete_task_string = "\n".join(task_strings)

        return {
            "service_name": self.service_name,
            "description": self.description,
            "running": str(self.running),
            "step_interval": str(self.step_interval),
            "num_tasks": str(len(tasks)),
            "num_completed_tasks": str(len(self.agent.get_completed_tasks())),
            "prompt": "\n".join([str(x) for x in self.prompt]) if self.prompt else "",
            "type": "agent_service",
            "tasks": complete_task_string,
        }

    async def create_task(self, task: TaskDefinition) -> Dict[str, str]:
        """Create a task."""
        task_id = self.agent.create_task(task, task_id=task.task_id)
        return {"task_id": task_id}

    async def get_messages(self) -> List[_ChatMessage]:
        """Get messages from the agent."""
        messages = self.agent.chat_history

        return [_ChatMessage.from_chat_message(message) for message in messages]

    async def toggle_agent_running(
        self, state: Literal["running", "stopped"]
    ) -> Dict[str, bool]:
        """Toggle the agent running state."""
        self.running = state == "running"

        return {"running": self.running}

    async def is_worker_running(self) -> Dict[str, bool]:
        """Check if the agent is running."""
        return {"running": self.running}

    async def reset_agent(self) -> Dict[str, str]:
        """Reset the agent."""
        self.agent.reset()

        return {"message": "Agent reset"}

    async def launch_server(self) -> None:
        """Launch the agent as a FastAPI server."""
        logger.info(f"Launching {self.service_name} server at {self.host}:{self.port}")
        # uvicorn.run(self._app, host=self.host, port=self.port)

        class CustomServer(uvicorn.Server):
            def install_signal_handlers(self) -> None:
                pass

        cfg = uvicorn.Config(self._app, host=self.host, port=self.port)
        server = CustomServer(cfg)
        await server.serve()

service_definition `property` #

service_definition: ServiceDefinition

The service definition.

message_queue `property` #

message_queue: BaseMessageQueue

The message queue.

publisher_id `property` #

publisher_id: str

The publisher id.

publish_callback `property` #

publish_callback: Optional[PublishCallback]

The publish callback, if any.

tool_name `property` #

tool_name: str

The name reserved when this service is used as a tool.

processing_loop `async` #

processing_loop() -> None

The processing loop for the agent.

Source code in llama-agents/llama_deploy/services/agent.py

async def processing_loop(self) -> None:
    """The processing loop for the agent."""
    logger.info("Processing initiated.")
    while True:
        try:
            if not self.running:
                await asyncio.sleep(self.step_interval)
                continue

            current_tasks = self.agent.list_tasks()
            current_task_ids = [task.task_id for task in current_tasks]

            completed_tasks = self.agent.get_completed_tasks()
            completed_task_ids = [task.task_id for task in completed_tasks]

            for task_id in current_task_ids:
                if task_id in completed_task_ids:
                    continue

                step_output = await self.agent.arun_step(task_id)

                if step_output.is_last:
                    # finalize the response
                    response = self.agent.finalize_response(
                        task_id, step_output=step_output
                    )

                    # convert memory chat messages
                    llama_messages = self.agent.memory.get()
                    history = [ChatMessage(**x.dict()) for x in llama_messages]

                    # publish the completed task
                    async with self.lock:
                        try:
                            tool_call = self._tasks_as_tool_calls.pop(task_id)
                        except KeyError:
                            tool_call = None

                    if tool_call:
                        await self.publish(
                            QueueMessage(
                                type=tool_call.source_id,
                                action=ActionTypes.COMPLETED_TOOL_CALL,
                                data=ToolCallResult(
                                    id_=tool_call.id_,
                                    tool_message=ChatMessage(
                                        content=str(response.response),
                                        role=MessageRole.TOOL,
                                        additional_kwargs={
                                            "name": tool_call.tool_call_bundle.tool_name,
                                            "tool_call_id": tool_call.id_,
                                        },
                                    ),
                                    result=response.response,
                                ).model_dump(),
                            )
                        )
                    else:
                        await self.publish(
                            QueueMessage(
                                type=CONTROL_PLANE_NAME,
                                action=ActionTypes.COMPLETED_TASK,
                                data=TaskResult(
                                    task_id=task_id,
                                    history=history,
                                    result=response.response,
                                ).model_dump(),
                            )
                        )
        except Exception as e:
            logger.error(f"Error in {self.service_name} processing_loop: {e}")
            if self.raise_exceptions:
                # Kill everything
                # TODO: is there a better way to do this?
                import signal

                signal.raise_signal(signal.SIGINT)
            else:
                await self.message_queue.publish(
                    QueueMessage(
                        type=CONTROL_PLANE_NAME,
                        action=ActionTypes.COMPLETED_TASK,
                        data=TaskResult(
                            task_id=task_id,
                            history=[],
                            result=f"Error during processing: {e}",
                        ).model_dump(),
                    )
                )

            continue

        await asyncio.sleep(self.step_interval)

process_message `async` #

process_message(message: QueueMessage) -> None

Handling for when a message is received.

Source code in llama-agents/llama_deploy/services/agent.py

async def process_message(self, message: QueueMessage) -> None:
    """Handling for when a message is received."""
    if message.action == ActionTypes.NEW_TASK:
        new_task = NewTask(**message.data or {})
        task_def = new_task.task
        self.agent.create_task(task_def.input, task_id=task_def.task_id)
        logger.info(f"Created new task: {task_def.task_id}")
    elif message.action == ActionTypes.NEW_TOOL_CALL:
        task_def = TaskDefinition(**message.data or {})
        async with self.lock:
            tool_call_bundle = ToolCallBundle(
                tool_name=self.tool_name,
                tool_args=(),
                tool_kwargs={"input": task_def.input},
            )
            task_as_tool_call = ToolCall(
                id_=task_def.task_id,
                source_id=message.publisher_id,
                tool_call_bundle=tool_call_bundle,
            )
            self._tasks_as_tool_calls[task_def.task_id] = task_as_tool_call
        self.agent.create_task(task_def.input, task_id=task_def.task_id)
        logger.info(f"Created new tool call as task: {task_def.task_id}")
    else:
        raise ValueError(f"Unhandled action: {message.action}")

as_consumer #

as_consumer(remote: bool = False) -> BaseMessageQueueConsumer

Get the consumer for the message queue.

Parameters:

Name	Type	Description	Default
`remote`	`bool`	Whether to get a remote consumer or local. If remote, calls the `process_message` endpoint.	`False`

Source code in llama-agents/llama_deploy/services/agent.py

def as_consumer(self, remote: bool = False) -> BaseMessageQueueConsumer:
    """Get the consumer for the message queue.

    Args:
        remote (bool):
            Whether to get a remote consumer or local.
            If remote, calls the `process_message` endpoint.
    """
    if remote:
        url = (
            f"http://{self.host}:{self.port}{self._app.url_path_for('process_message')}"
            if self.port
            else f"http://{self.host}{self._app.url_path_for('process_message')}"
        )
        return RemoteMessageConsumer(
            id_=self.publisher_id,
            url=url,
            message_type=self.service_name,
        )

    return CallableMessageConsumer(
        id_=self.publisher_id,
        message_type=self.service_name,
        handler=self.process_message,
    )

launch_local `async` #

launch_local() -> Task

Launch the agent locally.

Source code in llama-agents/llama_deploy/services/agent.py

async def launch_local(self) -> asyncio.Task:
    """Launch the agent locally."""
    logger.info(f"{self.service_name} launch_local")
    return asyncio.create_task(self.processing_loop())

lifespan `async` #

lifespan(app: FastAPI) -> AsyncGenerator[None, None]

Starts the processing loop when the fastapi app starts.

Source code in llama-agents/llama_deploy/services/agent.py

@asynccontextmanager
async def lifespan(self, app: FastAPI) -> AsyncGenerator[None, None]:
    """Starts the processing loop when the fastapi app starts."""
    asyncio.create_task(self.processing_loop())
    yield
    self.running = False

home `async` #

home() -> Dict[str, str]

Home endpoint. Gets general information about the agent service.

Source code in llama-agents/llama_deploy/services/agent.py

async def home(self) -> Dict[str, str]:
    """Home endpoint. Gets general information about the agent service."""
    tasks = self.agent.list_tasks()

    task_strings = []
    for task in tasks:
        task_output = self.agent.get_task_output(task.task_id)
        status = "COMPLETE" if task_output.is_last else "IN PROGRESS"
        memory_str = "\n".join(
            [f"{x.role}: {x.content}" for x in task.memory.get_all()]
        )
        task_strings.append(f"Agent Task {task.task_id}: {status}\n{memory_str}")

    complete_task_string = "\n".join(task_strings)

    return {
        "service_name": self.service_name,
        "description": self.description,
        "running": str(self.running),
        "step_interval": str(self.step_interval),
        "num_tasks": str(len(tasks)),
        "num_completed_tasks": str(len(self.agent.get_completed_tasks())),
        "prompt": "\n".join([str(x) for x in self.prompt]) if self.prompt else "",
        "type": "agent_service",
        "tasks": complete_task_string,
    }

create_task `async` #

create_task(task: TaskDefinition) -> Dict[str, str]

Create a task.

Source code in llama-agents/llama_deploy/services/agent.py

async def create_task(self, task: TaskDefinition) -> Dict[str, str]:
    """Create a task."""
    task_id = self.agent.create_task(task, task_id=task.task_id)
    return {"task_id": task_id}

get_messages `async` #

get_messages() -> List[_ChatMessage]

Get messages from the agent.

Source code in llama-agents/llama_deploy/services/agent.py

async def get_messages(self) -> List[_ChatMessage]:
    """Get messages from the agent."""
    messages = self.agent.chat_history

    return [_ChatMessage.from_chat_message(message) for message in messages]

toggle_agent_running `async` #

toggle_agent_running(state: Literal['running', 'stopped']) -> Dict[str, bool]

Toggle the agent running state.

Source code in llama-agents/llama_deploy/services/agent.py

async def toggle_agent_running(
    self, state: Literal["running", "stopped"]
) -> Dict[str, bool]:
    """Toggle the agent running state."""
    self.running = state == "running"

    return {"running": self.running}

is_worker_running `async` #

is_worker_running() -> Dict[str, bool]

Check if the agent is running.

Source code in llama-agents/llama_deploy/services/agent.py

async def is_worker_running(self) -> Dict[str, bool]:
    """Check if the agent is running."""
    return {"running": self.running}

reset_agent `async` #

reset_agent() -> Dict[str, str]

Reset the agent.

Source code in llama-agents/llama_deploy/services/agent.py

async def reset_agent(self) -> Dict[str, str]:
    """Reset the agent."""
    self.agent.reset()

    return {"message": "Agent reset"}

launch_server `async` #

launch_server() -> None

Launch the agent as a FastAPI server.

Source code in llama-agents/llama_deploy/services/agent.py

async def launch_server(self) -> None:
    """Launch the agent as a FastAPI server."""
    logger.info(f"Launching {self.service_name} server at {self.host}:{self.port}")
    # uvicorn.run(self._app, host=self.host, port=self.port)

    class CustomServer(uvicorn.Server):
        def install_signal_handlers(self) -> None:
            pass

    cfg = uvicorn.Config(self._app, host=self.host, port=self.port)
    server = CustomServer(cfg)
    await server.serve()

HumanService #

Bases: BaseService

A human service for providing human-in-the-loop assistance.

When launched locally, it will prompt the user for input, which is blocking!

When launched as a server, it will provide an API for creating and handling tasks.

Exposes the following endpoints: - GET /: Get the service information. - POST /process_message: Process a message. - POST /tasks: Create a task. - GET /tasks: Get all tasks. - GET /tasks/{task_id}: Get a task. - POST /tasks/{task_id}/handle: Handle a task.

Attributes:

Name	Type	Description
`service_name`	`str`	The name of the service.
`description`	`str`	The description of the service.
`running`	`bool`	Whether the service is running.
`step_interval`	`float`	The interval in seconds to poll for tool call results. Defaults to 0.1s.
`host`	`Optional[str]`	The host of the service.
`port`	`Optional[int]`	The port of the service.

Source code in llama-agents/llama_deploy/services/human.py

class HumanService(BaseService):
    """A human service for providing human-in-the-loop assistance.

    When launched locally, it will prompt the user for input, which is blocking!

    When launched as a server, it will provide an API for creating and handling tasks.

    Exposes the following endpoints:
    - GET `/`: Get the service information.
    - POST `/process_message`: Process a message.
    - POST `/tasks`: Create a task.
    - GET `/tasks`: Get all tasks.
    - GET `/tasks/{task_id}`: Get a task.
    - POST `/tasks/{task_id}/handle`: Handle a task.

    Attributes:
        service_name (str): The name of the service.
        description (str): The description of the service.
        running (bool): Whether the service is running.
        step_interval (float): The interval in seconds to poll for tool call results. Defaults to 0.1s.
        host (Optional[str]): The host of the service.
        port (Optional[int]): The port of the service.


    """

    model_config = ConfigDict(arbitrary_types_allowed=True, validate_assignment=True)
    service_name: str
    description: str = "Local Human Service."
    running: bool = True
    step_interval: float = 0.1
    fn_input: HumanInputFn = default_human_input_fn
    human_input_prompt: str = (
        HELP_REQUEST_TEMPLATE_STR  # TODO: use PromptMixin, PromptTemplate
    )
    host: Optional[str] = None
    port: Optional[int] = None

    _outstanding_human_tasks: List["HumanTask"] = PrivateAttr()
    _message_queue: BaseMessageQueue = PrivateAttr()
    _app: FastAPI = PrivateAttr()
    _publisher_id: str = PrivateAttr()
    _publish_callback: Optional[PublishCallback] = PrivateAttr()
    _lock: Lock = PrivateAttr()
    _tasks_as_tool_calls: Dict[str, ToolCall] = PrivateAttr()

    def __init__(
        self,
        message_queue: BaseMessageQueue,
        running: bool = True,
        description: str = "Local Human Service",
        service_name: str = "default_human_service",
        publish_callback: Optional[PublishCallback] = None,
        step_interval: float = 0.1,
        fn_input: HumanInputFn = default_human_input_fn,
        human_input_prompt: Optional[str] = None,
        host: Optional[str] = None,
        port: Optional[int] = None,
    ) -> None:
        human_input_prompt = human_input_prompt or HELP_REQUEST_TEMPLATE_STR
        super().__init__(
            running=running,
            description=description,
            service_name=service_name,
            step_interval=step_interval,
            fn_input=fn_input,
            human_input_prompt=human_input_prompt,
            host=host,
            port=port,
        )

        self._outstanding_human_tasks = []
        self._message_queue = message_queue
        self._publisher_id = f"{self.__class__.__qualname__}-{uuid.uuid4()}"
        self._publish_callback = publish_callback
        self._lock = asyncio.Lock()
        self._tasks_as_tool_calls = {}
        self._app = FastAPI(lifespan=self.lifespan)

        self._app.add_api_route("/", self.home, methods=["GET"], tags=["Human Service"])
        self._app.add_api_route(
            "/process_message",
            self.process_message,
            methods=["POST"],
            tags=["Human Service"],
        )

        self._app.add_api_route(
            "/tasks", self.create_task, methods=["POST"], tags=["Tasks"]
        )
        self._app.add_api_route(
            "/tasks", self.get_tasks, methods=["GET"], tags=["Tasks"]
        )
        self._app.add_api_route(
            "/tasks/{task_id}", self.get_task, methods=["GET"], tags=["Tasks"]
        )
        self._app.add_api_route(
            "/tasks/{task_id}/handle",
            self.handle_task,
            methods=["POST"],
            tags=["Tasks"],
        )

    @property
    def service_definition(self) -> ServiceDefinition:
        """Get the service definition."""
        return ServiceDefinition(
            service_name=self.service_name,
            description=self.description,
            prompt=[],
            host=self.host,
            port=self.port,
        )

    @property
    def message_queue(self) -> BaseMessageQueue:
        """The message queue."""
        return self._message_queue

    @property
    def publisher_id(self) -> str:
        """The publisher ID."""
        return self._publisher_id

    @property
    def publish_callback(self) -> Optional[PublishCallback]:
        """The publish callback, if any."""
        return self._publish_callback

    @property
    def lock(self) -> Lock:
        return self._lock

    @property
    def tool_name(self) -> str:
        """The name reserved when this service is used as a tool."""
        return get_tool_name_from_service_name(self.service_name)

    async def processing_loop(self) -> None:
        """The processing loop for the service."""
        logger.info("Processing initiated.")
        while True:
            if not self.running:
                await asyncio.sleep(self.step_interval)
                continue

            async with self.lock:
                try:
                    human_task = self._outstanding_human_tasks.pop(0)
                    task_def = human_task.task_def
                    tool_call = human_task.tool_call
                except IndexError:
                    await asyncio.sleep(self.step_interval)
                    continue

                logger.info(
                    f"Processing request for human help for task: {task_def.task_id}"
                )

                # process req
                prompt = (
                    self.human_input_prompt.format(input_str=task_def.input)
                    if self.human_input_prompt
                    else task_def.input
                )
                result = await self.fn_input(prompt=prompt, task_id=task_def.task_id)

                # create history
                history = [
                    ChatMessage(
                        role=MessageRole.ASSISTANT,
                        content=HELP_REQUEST_TEMPLATE_STR.format(
                            input_str=task_def.input
                        ),
                    ),
                    ChatMessage(role=MessageRole.USER, content=result),
                ]

                if tool_call:
                    await self.publish(
                        QueueMessage(
                            type=tool_call.source_id,
                            action=ActionTypes.COMPLETED_TOOL_CALL,
                            data=ToolCallResult(
                                id_=tool_call.id_,
                                tool_message=ChatMessage(
                                    content=result,
                                    role=MessageRole.TOOL,
                                    additional_kwargs={
                                        "name": tool_call.tool_call_bundle.tool_name,
                                        "tool_call_id": tool_call.id_,
                                    },
                                ),
                                result=result,
                            ).model_dump(),
                        )
                    )
                else:
                    # publish the completed task
                    await self.publish(
                        QueueMessage(
                            type=CONTROL_PLANE_NAME,
                            action=ActionTypes.COMPLETED_TASK,
                            data=TaskResult(
                                task_id=task_def.task_id,
                                history=history,
                                result=result,
                            ).model_dump(),
                        )
                    )

            await asyncio.sleep(self.step_interval)

    class HumanTask(BaseModel):
        """Container for Tasks to be completed by HumanService."""

        task_def: TaskDefinition
        tool_call: Optional[ToolCall] = None

    async def process_message(self, message: QueueMessage) -> None:
        """Process a message received from the message queue."""
        if message.action == ActionTypes.NEW_TASK:
            new_task = NewTask(**message.data or {})
            task_def = new_task.task
            human_task = self.HumanTask(task_def=task_def)
            logger.info(f"Created new task: {task_def.task_id}")
        elif message.action == ActionTypes.NEW_TOOL_CALL:
            task_def = TaskDefinition(**message.data or {})
            tool_call_bundle = ToolCallBundle(
                tool_name=self.tool_name,
                tool_args=(),
                tool_kwargs={"input": task_def.input},
            )
            task_as_tool_call = ToolCall(
                id_=task_def.task_id,
                source_id=message.publisher_id,
                tool_call_bundle=tool_call_bundle,
            )
            human_task = self.HumanTask(task_def=task_def, tool_call=task_as_tool_call)
            logger.info(f"Created new tool call as task: {task_def.task_id}")
        else:
            raise ValueError(f"Unhandled action: {message.action}")
        async with self.lock:
            self._outstanding_human_tasks.append(human_task)

    def as_consumer(self, remote: bool = False) -> BaseMessageQueueConsumer:
        """Get the consumer for the service.

        Args:
            remote (bool):
                Whether the consumer is remote. Defaults to False.
                If True, the consumer will be a RemoteMessageConsumer that uses the `process_message` endpoint.
        """
        if remote:
            url = (
                f"http://{self.host}:{self.port}{self._app.url_path_for('process_message')}"
                if self.port
                else f"http://{self.host}{self._app.url_path_for('process_message')}"
            )
            return RemoteMessageConsumer(
                id_=self.publisher_id,
                url=url,
                message_type=self.service_name,
            )

        return CallableMessageConsumer(
            id_=self.publisher_id,
            message_type=self.service_name,
            handler=self.process_message,
        )

    async def launch_local(self) -> asyncio.Task:
        """Launch the service in-process."""
        logger.info(f"{self.service_name} launch_local")
        return asyncio.create_task(self.processing_loop())

    # ---- Server based methods ----

    @asynccontextmanager
    async def lifespan(self, app: FastAPI) -> AsyncGenerator[None, None]:
        """Starts the processing loop when the fastapi app starts."""
        asyncio.create_task(self.processing_loop())
        yield
        self.running = False

    async def home(self) -> Dict[str, str]:
        """Get general service information."""
        return {
            "service_name": self.service_name,
            "description": self.description,
            "running": str(self.running),
            "step_interval": str(self.step_interval),
            "num_tasks": str(len(self._outstanding_human_tasks)),
            "tasks": "\n".join([str(task) for task in self._outstanding_human_tasks]),
            "type": "human_service",
        }

    async def create_task(self, task: TaskDefinition) -> Dict[str, str]:
        """Create a task for the human service."""
        async with self.lock:
            human_task = self.HumanTask(task_def=task)
            self._outstanding_human_tasks.append(human_task)
        return {"task_id": task.task_id}

    async def get_tasks(self) -> List[TaskDefinition]:
        """Get all outstanding tasks."""
        async with self.lock:
            return [ht.task_def for ht in self._outstanding_human_tasks]

    async def get_task(self, task_id: str) -> Optional[TaskDefinition]:
        """Get a specific task by ID."""
        async with self.lock:
            for human_task in self._outstanding_human_tasks:
                if human_task.task_def.task_id == task_id:
                    return human_task.task_def
        return None

    async def handle_task(self, task_id: str, result: HumanResponse) -> None:
        """Handle a task by providing a result."""
        async with self.lock:
            for human_task in self._outstanding_human_tasks:
                task_def = human_task.task_def
                if task_def.task_id == task_id:
                    self._outstanding_human_tasks.remove(human_task)
                    break

        logger.info(f"Processing request for human help for task: {task_def.task_id}")

        # create history
        history = [
            ChatMessage(
                role=MessageRole.ASSISTANT,
                content=HELP_REQUEST_TEMPLATE_STR.format(input_str=task_def.input),
            ),
            ChatMessage(role=MessageRole.USER, content=result.result),
        ]

        # publish the completed task
        await self.publish(
            QueueMessage(
                type=CONTROL_PLANE_NAME,
                action=ActionTypes.COMPLETED_TASK,
                data=TaskResult(
                    task_id=task_def.task_id,
                    history=history,
                    result=result.result,
                ).model_dump(),
            )
        )

    async def launch_server(self) -> None:
        """Launch the service as a FastAPI server."""
        logger.info(
            f"Lanching server for {self.service_name} at {self.host}:{self.port}"
        )

        class CustomServer(uvicorn.Server):
            def install_signal_handlers(self) -> None:
                pass

        cfg = uvicorn.Config(self._app, host=self.host, port=self.port)
        server = CustomServer(cfg)
        await server.serve()

    @field_validator("human_input_prompt")
    @classmethod
    def validate_human_input_prompt(cls, v: str) -> str:
        """Check if `input_str` is a prompt key."""
        prompt_params = get_prompt_params(v)
        if "input_str" not in prompt_params:
            raise ValueError(
                "`input_str` should be the only param in the prompt template."
            )
        return v

service_definition `property` #

service_definition: ServiceDefinition

Get the service definition.

message_queue `property` #

message_queue: BaseMessageQueue

The message queue.

publisher_id `property` #

publisher_id: str

The publisher ID.

publish_callback `property` #

publish_callback: Optional[PublishCallback]

The publish callback, if any.

tool_name `property` #

tool_name: str

The name reserved when this service is used as a tool.

HumanTask #

Bases: BaseModel

Container for Tasks to be completed by HumanService.

Source code in llama-agents/llama_deploy/services/human.py

class HumanTask(BaseModel):
    """Container for Tasks to be completed by HumanService."""

    task_def: TaskDefinition
    tool_call: Optional[ToolCall] = None

processing_loop `async` #

processing_loop() -> None

The processing loop for the service.

Source code in llama-agents/llama_deploy/services/human.py

async def processing_loop(self) -> None:
    """The processing loop for the service."""
    logger.info("Processing initiated.")
    while True:
        if not self.running:
            await asyncio.sleep(self.step_interval)
            continue

        async with self.lock:
            try:
                human_task = self._outstanding_human_tasks.pop(0)
                task_def = human_task.task_def
                tool_call = human_task.tool_call
            except IndexError:
                await asyncio.sleep(self.step_interval)
                continue

            logger.info(
                f"Processing request for human help for task: {task_def.task_id}"
            )

            # process req
            prompt = (
                self.human_input_prompt.format(input_str=task_def.input)
                if self.human_input_prompt
                else task_def.input
            )
            result = await self.fn_input(prompt=prompt, task_id=task_def.task_id)

            # create history
            history = [
                ChatMessage(
                    role=MessageRole.ASSISTANT,
                    content=HELP_REQUEST_TEMPLATE_STR.format(
                        input_str=task_def.input
                    ),
                ),
                ChatMessage(role=MessageRole.USER, content=result),
            ]

            if tool_call:
                await self.publish(
                    QueueMessage(
                        type=tool_call.source_id,
                        action=ActionTypes.COMPLETED_TOOL_CALL,
                        data=ToolCallResult(
                            id_=tool_call.id_,
                            tool_message=ChatMessage(
                                content=result,
                                role=MessageRole.TOOL,
                                additional_kwargs={
                                    "name": tool_call.tool_call_bundle.tool_name,
                                    "tool_call_id": tool_call.id_,
                                },
                            ),
                            result=result,
                        ).model_dump(),
                    )
                )
            else:
                # publish the completed task
                await self.publish(
                    QueueMessage(
                        type=CONTROL_PLANE_NAME,
                        action=ActionTypes.COMPLETED_TASK,
                        data=TaskResult(
                            task_id=task_def.task_id,
                            history=history,
                            result=result,
                        ).model_dump(),
                    )
                )

        await asyncio.sleep(self.step_interval)

process_message `async` #

process_message(message: QueueMessage) -> None

Process a message received from the message queue.

Source code in llama-agents/llama_deploy/services/human.py

async def process_message(self, message: QueueMessage) -> None:
    """Process a message received from the message queue."""
    if message.action == ActionTypes.NEW_TASK:
        new_task = NewTask(**message.data or {})
        task_def = new_task.task
        human_task = self.HumanTask(task_def=task_def)
        logger.info(f"Created new task: {task_def.task_id}")
    elif message.action == ActionTypes.NEW_TOOL_CALL:
        task_def = TaskDefinition(**message.data or {})
        tool_call_bundle = ToolCallBundle(
            tool_name=self.tool_name,
            tool_args=(),
            tool_kwargs={"input": task_def.input},
        )
        task_as_tool_call = ToolCall(
            id_=task_def.task_id,
            source_id=message.publisher_id,
            tool_call_bundle=tool_call_bundle,
        )
        human_task = self.HumanTask(task_def=task_def, tool_call=task_as_tool_call)
        logger.info(f"Created new tool call as task: {task_def.task_id}")
    else:
        raise ValueError(f"Unhandled action: {message.action}")
    async with self.lock:
        self._outstanding_human_tasks.append(human_task)

as_consumer #

as_consumer(remote: bool = False) -> BaseMessageQueueConsumer

Get the consumer for the service.

Parameters:

Name	Type	Description	Default
`remote`	`bool`	Whether the consumer is remote. Defaults to False. If True, the consumer will be a RemoteMessageConsumer that uses the `process_message` endpoint.	`False`

Source code in llama-agents/llama_deploy/services/human.py

def as_consumer(self, remote: bool = False) -> BaseMessageQueueConsumer:
    """Get the consumer for the service.

    Args:
        remote (bool):
            Whether the consumer is remote. Defaults to False.
            If True, the consumer will be a RemoteMessageConsumer that uses the `process_message` endpoint.
    """
    if remote:
        url = (
            f"http://{self.host}:{self.port}{self._app.url_path_for('process_message')}"
            if self.port
            else f"http://{self.host}{self._app.url_path_for('process_message')}"
        )
        return RemoteMessageConsumer(
            id_=self.publisher_id,
            url=url,
            message_type=self.service_name,
        )

    return CallableMessageConsumer(
        id_=self.publisher_id,
        message_type=self.service_name,
        handler=self.process_message,
    )

launch_local `async` #

launch_local() -> Task

Launch the service in-process.

Source code in llama-agents/llama_deploy/services/human.py

async def launch_local(self) -> asyncio.Task:
    """Launch the service in-process."""
    logger.info(f"{self.service_name} launch_local")
    return asyncio.create_task(self.processing_loop())

lifespan `async` #

lifespan(app: FastAPI) -> AsyncGenerator[None, None]

Starts the processing loop when the fastapi app starts.

Source code in llama-agents/llama_deploy/services/human.py

@asynccontextmanager
async def lifespan(self, app: FastAPI) -> AsyncGenerator[None, None]:
    """Starts the processing loop when the fastapi app starts."""
    asyncio.create_task(self.processing_loop())
    yield
    self.running = False

home `async` #

home() -> Dict[str, str]

Get general service information.

Source code in llama-agents/llama_deploy/services/human.py

async def home(self) -> Dict[str, str]:
    """Get general service information."""
    return {
        "service_name": self.service_name,
        "description": self.description,
        "running": str(self.running),
        "step_interval": str(self.step_interval),
        "num_tasks": str(len(self._outstanding_human_tasks)),
        "tasks": "\n".join([str(task) for task in self._outstanding_human_tasks]),
        "type": "human_service",
    }

create_task `async` #

create_task(task: TaskDefinition) -> Dict[str, str]

Create a task for the human service.

Source code in llama-agents/llama_deploy/services/human.py

async def create_task(self, task: TaskDefinition) -> Dict[str, str]:
    """Create a task for the human service."""
    async with self.lock:
        human_task = self.HumanTask(task_def=task)
        self._outstanding_human_tasks.append(human_task)
    return {"task_id": task.task_id}

get_tasks `async` #

get_tasks() -> List[TaskDefinition]

Get all outstanding tasks.

Source code in llama-agents/llama_deploy/services/human.py

async def get_tasks(self) -> List[TaskDefinition]:
    """Get all outstanding tasks."""
    async with self.lock:
        return [ht.task_def for ht in self._outstanding_human_tasks]

get_task `async` #

get_task(task_id: str) -> Optional[TaskDefinition]

Get a specific task by ID.

Source code in llama-agents/llama_deploy/services/human.py

async def get_task(self, task_id: str) -> Optional[TaskDefinition]:
    """Get a specific task by ID."""
    async with self.lock:
        for human_task in self._outstanding_human_tasks:
            if human_task.task_def.task_id == task_id:
                return human_task.task_def
    return None

handle_task `async` #

handle_task(task_id: str, result: HumanResponse) -> None

Handle a task by providing a result.

Source code in llama-agents/llama_deploy/services/human.py

async def handle_task(self, task_id: str, result: HumanResponse) -> None:
    """Handle a task by providing a result."""
    async with self.lock:
        for human_task in self._outstanding_human_tasks:
            task_def = human_task.task_def
            if task_def.task_id == task_id:
                self._outstanding_human_tasks.remove(human_task)
                break

    logger.info(f"Processing request for human help for task: {task_def.task_id}")

    # create history
    history = [
        ChatMessage(
            role=MessageRole.ASSISTANT,
            content=HELP_REQUEST_TEMPLATE_STR.format(input_str=task_def.input),
        ),
        ChatMessage(role=MessageRole.USER, content=result.result),
    ]

    # publish the completed task
    await self.publish(
        QueueMessage(
            type=CONTROL_PLANE_NAME,
            action=ActionTypes.COMPLETED_TASK,
            data=TaskResult(
                task_id=task_def.task_id,
                history=history,
                result=result.result,
            ).model_dump(),
        )
    )

launch_server `async` #

launch_server() -> None

Launch the service as a FastAPI server.

Source code in llama-agents/llama_deploy/services/human.py

async def launch_server(self) -> None:
    """Launch the service as a FastAPI server."""
    logger.info(
        f"Lanching server for {self.service_name} at {self.host}:{self.port}"
    )

    class CustomServer(uvicorn.Server):
        def install_signal_handlers(self) -> None:
            pass

    cfg = uvicorn.Config(self._app, host=self.host, port=self.port)
    server = CustomServer(cfg)
    await server.serve()

validate_human_input_prompt `classmethod` #

validate_human_input_prompt(v: str) -> str

Check if input_str is a prompt key.

Source code in llama-agents/llama_deploy/services/human.py

@field_validator("human_input_prompt")
@classmethod
def validate_human_input_prompt(cls, v: str) -> str:
    """Check if `input_str` is a prompt key."""
    prompt_params = get_prompt_params(v)
    if "input_str" not in prompt_params:
        raise ValueError(
            "`input_str` should be the only param in the prompt template."
        )
    return v

ToolService #

Bases: BaseService

A service that executes tools remotely for other services.

This service is responsible for executing tools remotely for other services and agents.

Exposes the following endpoints: - GET /: Home endpoint. - POST /tool_call: Create a tool call. - GET /tool: Get a tool by name. - POST /process_message: Process a message.

Attributes:

Name	Type	Description
`tools`	`List[AsyncBaseTool]`	A list of tools to execute.
`description`	`str`	The description of the tool service.
`running`	`bool`	Whether the service is running.
`step_interval`	`float`	The interval in seconds to poll for tool call results. Defaults to 0.1s.
`host`	`Optional[str]`	The host of the service.
`port`	`Optional[int]`	The port of the service.

Examples:

from llama_deploy import ToolService, MetaServiceTool, SimpleMessageQueue
from llama_index.core.llms import OpenAI
from llama_index.core.agent import FunctionCallingAgentWorker

message_queue = SimpleMessageQueue()

tool_service = ToolService(
    message_queue=message_queue,
    tools=[tool],
    running=True,
    step_interval=0.5,
)

# create a meta tool and use it in any other agent
# this allows remote execution of that tool
meta_tool = MetaServiceTool(
    tool_metadata=tool.metadata,
    message_queue=message_queue,
    tool_service_name=tool_service.service_name,
)
agent = FunctionCallingAgentWorker.from_tools(
    [meta_tool],
    llm=OpenAI(),
).as_agent()

Source code in llama-agents/llama_deploy/services/tool.py

class ToolService(BaseService):
    """A service that executes tools remotely for other services.

    This service is responsible for executing tools remotely for other services and agents.

    Exposes the following endpoints:
    - GET `/`: Home endpoint.
    - POST `/tool_call`: Create a tool call.
    - GET `/tool`: Get a tool by name.
    - POST `/process_message`: Process a message.

    Attributes:
        tools (List[AsyncBaseTool]):
            A list of tools to execute.
        description (str):
            The description of the tool service.
        running (bool):
            Whether the service is running.
        step_interval (float):
            The interval in seconds to poll for tool call results. Defaults to 0.1s.
        host (Optional[str]):
            The host of the service.
        port (Optional[int]):
            The port of the service.

    Examples:
        ```python
        from llama_deploy import ToolService, MetaServiceTool, SimpleMessageQueue
        from llama_index.core.llms import OpenAI
        from llama_index.core.agent import FunctionCallingAgentWorker

        message_queue = SimpleMessageQueue()

        tool_service = ToolService(
            message_queue=message_queue,
            tools=[tool],
            running=True,
            step_interval=0.5,
        )

        # create a meta tool and use it in any other agent
        # this allows remote execution of that tool
        meta_tool = MetaServiceTool(
            tool_metadata=tool.metadata,
            message_queue=message_queue,
            tool_service_name=tool_service.service_name,
        )
        agent = FunctionCallingAgentWorker.from_tools(
            [meta_tool],
            llm=OpenAI(),
        ).as_agent()
        ```
    """

    service_name: str
    tools: List[AsyncBaseTool]
    description: str = "Local Tool Service."
    running: bool = True
    step_interval: float = 0.1
    host: Optional[str] = None
    port: Optional[int] = None

    _outstanding_tool_calls: Dict[str, ToolCall] = PrivateAttr()
    _message_queue: BaseMessageQueue = PrivateAttr()
    _app: FastAPI = PrivateAttr()
    _publisher_id: str = PrivateAttr()
    _publish_callback: Optional[PublishCallback] = PrivateAttr()
    _lock: Lock = PrivateAttr()

    def __init__(
        self,
        message_queue: BaseMessageQueue,
        tools: Optional[List[BaseTool]] = None,
        running: bool = True,
        description: str = "Tool Server",
        service_name: str = "default_tool_service",
        publish_callback: Optional[PublishCallback] = None,
        step_interval: float = 0.1,
        host: Optional[str] = None,
        port: Optional[int] = None,
    ) -> None:
        tools = tools or []
        tools = [adapt_to_async_tool(t) for t in tools]
        super().__init__(
            tools=tools,
            running=running,
            description=description,
            service_name=service_name,
            step_interval=step_interval,
            host=host,
            port=port,
        )

        self._outstanding_tool_calls = {}
        self._message_queue = message_queue
        self._publisher_id = f"{self.__class__.__qualname__}-{uuid.uuid4()}"
        self._publish_callback = publish_callback
        self._lock = asyncio.Lock()
        self._app = FastAPI(lifespan=self.lifespan)

        self._app.add_api_route("/", self.home, methods=["GET"], tags=["Tool Service"])

        self._app.add_api_route(
            "/tool_call", self.create_tool_call, methods=["POST"], tags=["Tool Call"]
        )

        self._app.add_api_route(
            "/tool", self.get_tool_by_name, methods=["GET"], tags=["Tool"]
        )

        self._app.add_api_route(
            "/process_message",
            self.process_message,
            methods=["POST"],
            tags=["Message Processing"],
        )

    @property
    def service_definition(self) -> ServiceDefinition:
        """The service definition."""
        return ServiceDefinition(
            service_name=self.service_name,
            description=self.description,
            prompt=[],
            host=self.host,
            port=self.port,
        )

    @property
    def message_queue(self) -> BaseMessageQueue:
        """The message queue."""
        return self._message_queue

    @property
    def publisher_id(self) -> str:
        """The publisher ID."""
        return self._publisher_id

    @property
    def publish_callback(self) -> Optional[PublishCallback]:
        """The publish callback, if any."""
        return self._publish_callback

    @property
    def lock(self) -> Lock:
        return self._lock

    async def processing_loop(self) -> None:
        """The processing loop for the service."""
        logger.info("Processing initiated.")
        while True:
            if not self.running:
                await asyncio.sleep(self.step_interval)
                continue

            async with self.lock:
                current_tool_calls: List[ToolCall] = [
                    *self._outstanding_tool_calls.values()
                ]
            for tool_call in current_tool_calls:
                tool = get_function_by_name(
                    self.tools, tool_call.tool_call_bundle.tool_name
                )

                logger.info(
                    f"Processing tool call id {tool_call.id_} with {tool.metadata.name}"
                )
                tool_output = await tool.acall(
                    *tool_call.tool_call_bundle.tool_args,
                    **tool_call.tool_call_bundle.tool_kwargs,
                )

                # execute function call
                tool_message = ChatMessage(
                    content=str(tool_output),
                    role=MessageRole.TOOL,
                    additional_kwargs={
                        "name": tool_call.tool_call_bundle.tool_name,
                        "tool_call_id": tool_call.id_,
                    },
                )

                # publish the completed task
                await self.publish(
                    QueueMessage(
                        type=tool_call.source_id,
                        action=ActionTypes.COMPLETED_TOOL_CALL,
                        data=ToolCallResult(
                            id_=tool_call.id_,
                            tool_message=tool_message,
                            result=str(tool_output),
                        ).model_dump(),
                    )
                )

                # clean up
                async with self.lock:
                    del self._outstanding_tool_calls[tool_call.id_]

            await asyncio.sleep(self.step_interval)

    async def process_message(self, message: QueueMessage) -> None:
        """Process a message."""
        if message.action == ActionTypes.NEW_TOOL_CALL:
            tool_call_data = {"source_id": message.publisher_id}
            tool_call_data.update(message.data or {})
            tool_call = ToolCall(**tool_call_data)
            async with self.lock:
                self._outstanding_tool_calls.update({tool_call.id_: tool_call})
        else:
            raise ValueError(f"Unhandled action: {message.action}")

    def as_consumer(self, remote: bool = False) -> BaseMessageQueueConsumer:
        """Get the consumer for the service.

        Args:
            remote (bool):
                Whether the consumer is remote. Defaults to False.
                If True, the consumer will be a RemoteMessageConsumer that uses the `process_message` endpoint.
        """
        if remote:
            url = (
                f"http://{self.host}:{self.port}{self._app.url_path_for('process_message')}"
                if self.port
                else f"http://{self.host}{self._app.url_path_for('process_message')}"
            )
            return RemoteMessageConsumer(
                id_=self.publisher_id,
                url=url,
                message_type=self.service_name,
            )
        return CallableMessageConsumer(
            id_=self.publisher_id,
            message_type=self.service_name,
            handler=self.process_message,
        )

    async def launch_local(self) -> asyncio.Task:
        """Launch the service in-process."""
        return asyncio.create_task(self.processing_loop())

    # ---- Server based methods ----

    @asynccontextmanager
    async def lifespan(self, app: FastAPI) -> AsyncGenerator[None, None]:
        """Starts the processing loop when the fastapi app starts."""
        asyncio.create_task(self.processing_loop())
        yield
        self.running = False

    async def home(self) -> Dict[str, str]:
        """Home endpoint. Returns the general information about the service."""
        return {
            "service_name": self.service_name,
            "description": self.description,
            "running": str(self.running),
            "step_interval": str(self.step_interval),
            "num_tools": str(len(self.tools)),
            "num_outstanding_tool_calls": str(len(self._outstanding_tool_calls)),
            "tool_calls": "\n".join(
                [str(tool_call) for tool_call in self._outstanding_tool_calls.values()]
            ),
            "type": "tool_service",
        }

    async def create_tool_call(self, tool_call: ToolCall) -> Dict[str, str]:
        """Create a tool call."""
        async with self.lock:
            self._outstanding_tool_calls.update({tool_call.id_: tool_call})
        return {"tool_call_id": tool_call.id_}

    async def get_tool_by_name(self, name: str) -> Dict[str, Any]:
        """Get a tool by name."""
        name_to_tool = {tool.metadata.name: tool for tool in self.tools}
        if name not in name_to_tool:
            raise ValueError(f"Tool with name {name} not found")
        return {"tool_metadata": name_to_tool[name].metadata}

    async def launch_server(self) -> None:
        """Launch the service as a FastAPI server."""
        logger.info(f"Launching tool service server at {self.host}:{self.port}")
        # uvicorn.run(self._app, host=self.host, port=self.port)

        class CustomServer(uvicorn.Server):
            def install_signal_handlers(self) -> None:
                pass

        cfg = uvicorn.Config(self._app, host=self.host, port=self.port)
        server = CustomServer(cfg)
        await server.serve()

service_definition `property` #

service_definition: ServiceDefinition

The service definition.

message_queue `property` #

message_queue: BaseMessageQueue

The message queue.

publisher_id `property` #

publisher_id: str

The publisher ID.

publish_callback `property` #

publish_callback: Optional[PublishCallback]

The publish callback, if any.

processing_loop `async` #

processing_loop() -> None

The processing loop for the service.

Source code in llama-agents/llama_deploy/services/tool.py

async def processing_loop(self) -> None:
    """The processing loop for the service."""
    logger.info("Processing initiated.")
    while True:
        if not self.running:
            await asyncio.sleep(self.step_interval)
            continue

        async with self.lock:
            current_tool_calls: List[ToolCall] = [
                *self._outstanding_tool_calls.values()
            ]
        for tool_call in current_tool_calls:
            tool = get_function_by_name(
                self.tools, tool_call.tool_call_bundle.tool_name
            )

            logger.info(
                f"Processing tool call id {tool_call.id_} with {tool.metadata.name}"
            )
            tool_output = await tool.acall(
                *tool_call.tool_call_bundle.tool_args,
                **tool_call.tool_call_bundle.tool_kwargs,
            )

            # execute function call
            tool_message = ChatMessage(
                content=str(tool_output),
                role=MessageRole.TOOL,
                additional_kwargs={
                    "name": tool_call.tool_call_bundle.tool_name,
                    "tool_call_id": tool_call.id_,
                },
            )

            # publish the completed task
            await self.publish(
                QueueMessage(
                    type=tool_call.source_id,
                    action=ActionTypes.COMPLETED_TOOL_CALL,
                    data=ToolCallResult(
                        id_=tool_call.id_,
                        tool_message=tool_message,
                        result=str(tool_output),
                    ).model_dump(),
                )
            )

            # clean up
            async with self.lock:
                del self._outstanding_tool_calls[tool_call.id_]

        await asyncio.sleep(self.step_interval)

process_message `async` #

process_message(message: QueueMessage) -> None

Process a message.

Source code in llama-agents/llama_deploy/services/tool.py

async def process_message(self, message: QueueMessage) -> None:
    """Process a message."""
    if message.action == ActionTypes.NEW_TOOL_CALL:
        tool_call_data = {"source_id": message.publisher_id}
        tool_call_data.update(message.data or {})
        tool_call = ToolCall(**tool_call_data)
        async with self.lock:
            self._outstanding_tool_calls.update({tool_call.id_: tool_call})
    else:
        raise ValueError(f"Unhandled action: {message.action}")

as_consumer #

as_consumer(remote: bool = False) -> BaseMessageQueueConsumer

Get the consumer for the service.

Parameters:

Name	Type	Description	Default
`remote`	`bool`	Whether the consumer is remote. Defaults to False. If True, the consumer will be a RemoteMessageConsumer that uses the `process_message` endpoint.	`False`

Source code in llama-agents/llama_deploy/services/tool.py

def as_consumer(self, remote: bool = False) -> BaseMessageQueueConsumer:
    """Get the consumer for the service.

    Args:
        remote (bool):
            Whether the consumer is remote. Defaults to False.
            If True, the consumer will be a RemoteMessageConsumer that uses the `process_message` endpoint.
    """
    if remote:
        url = (
            f"http://{self.host}:{self.port}{self._app.url_path_for('process_message')}"
            if self.port
            else f"http://{self.host}{self._app.url_path_for('process_message')}"
        )
        return RemoteMessageConsumer(
            id_=self.publisher_id,
            url=url,
            message_type=self.service_name,
        )
    return CallableMessageConsumer(
        id_=self.publisher_id,
        message_type=self.service_name,
        handler=self.process_message,
    )

launch_local `async` #

launch_local() -> Task

Launch the service in-process.

Source code in llama-agents/llama_deploy/services/tool.py

async def launch_local(self) -> asyncio.Task:
    """Launch the service in-process."""
    return asyncio.create_task(self.processing_loop())

lifespan `async` #

lifespan(app: FastAPI) -> AsyncGenerator[None, None]

Starts the processing loop when the fastapi app starts.

Source code in llama-agents/llama_deploy/services/tool.py

@asynccontextmanager
async def lifespan(self, app: FastAPI) -> AsyncGenerator[None, None]:
    """Starts the processing loop when the fastapi app starts."""
    asyncio.create_task(self.processing_loop())
    yield
    self.running = False

home `async` #

home() -> Dict[str, str]

Home endpoint. Returns the general information about the service.

Source code in llama-agents/llama_deploy/services/tool.py

async def home(self) -> Dict[str, str]:
    """Home endpoint. Returns the general information about the service."""
    return {
        "service_name": self.service_name,
        "description": self.description,
        "running": str(self.running),
        "step_interval": str(self.step_interval),
        "num_tools": str(len(self.tools)),
        "num_outstanding_tool_calls": str(len(self._outstanding_tool_calls)),
        "tool_calls": "\n".join(
            [str(tool_call) for tool_call in self._outstanding_tool_calls.values()]
        ),
        "type": "tool_service",
    }

create_tool_call `async` #

create_tool_call(tool_call: ToolCall) -> Dict[str, str]

Create a tool call.

Source code in llama-agents/llama_deploy/services/tool.py

async def create_tool_call(self, tool_call: ToolCall) -> Dict[str, str]:
    """Create a tool call."""
    async with self.lock:
        self._outstanding_tool_calls.update({tool_call.id_: tool_call})
    return {"tool_call_id": tool_call.id_}

get_tool_by_name `async` #

get_tool_by_name(name: str) -> Dict[str, Any]

Get a tool by name.

Source code in llama-agents/llama_deploy/services/tool.py

async def get_tool_by_name(self, name: str) -> Dict[str, Any]:
    """Get a tool by name."""
    name_to_tool = {tool.metadata.name: tool for tool in self.tools}
    if name not in name_to_tool:
        raise ValueError(f"Tool with name {name} not found")
    return {"tool_metadata": name_to_tool[name].metadata}

launch_server `async` #

launch_server() -> None

Launch the service as a FastAPI server.

Source code in llama-agents/llama_deploy/services/tool.py

async def launch_server(self) -> None:
    """Launch the service as a FastAPI server."""
    logger.info(f"Launching tool service server at {self.host}:{self.port}")
    # uvicorn.run(self._app, host=self.host, port=self.port)

    class CustomServer(uvicorn.Server):
        def install_signal_handlers(self) -> None:
            pass

    cfg = uvicorn.Config(self._app, host=self.host, port=self.port)
    server = CustomServer(cfg)
    await server.serve()

ComponentService #

Bases: BaseService

Component service.

Wraps a query pipeline component into a service.

Exposes the following endpoints: - GET /: Home endpoint. - POST /process_message: Process a message.

Attributes:

Name	Type	Description
`component`	`Any`	The query pipeline component.
`description`	`str`	The description of the service.
`running`	`bool`	Whether the service is running.
`step_interval`	`float`	The interval in seconds to poll for tool call results. Defaults to 0.1s.
`host`	`Optional[str]`	The host of the service.
`port`	`Optional[int]`	The port of the service.
`raise_exceptions`	`bool`	Whether to raise exceptions.

Examples:

from llama_deploy import ComponentService
from llama_index.core.query_pipeline import QueryComponent

component_service = ComponentService(
    component=query_component,
    message_queue=message_queue,
    description="component_service",
    service_name="my_component_service",
)

Source code in llama-agents/llama_deploy/services/component.py

class ComponentService(BaseService):
    """Component service.

    Wraps a query pipeline component into a service.

    Exposes the following endpoints:
    - GET `/`: Home endpoint.
    - POST `/process_message`: Process a message.

    Attributes:
        component (Any): The query pipeline component.
        description (str): The description of the service.
        running (bool): Whether the service is running.
        step_interval (float): The interval in seconds to poll for tool call results. Defaults to 0.1s.
        host (Optional[str]): The host of the service.
        port (Optional[int]): The port of the service.
        raise_exceptions (bool): Whether to raise exceptions.

    Examples:
        ```python
        from llama_deploy import ComponentService
        from llama_index.core.query_pipeline import QueryComponent

        component_service = ComponentService(
            component=query_component,
            message_queue=message_queue,
            description="component_service",
            service_name="my_component_service",
        )
        ```
    """

    service_name: str
    component: Any

    description: str = "Component service."
    running: bool = True
    step_interval: float = 0.1
    host: Optional[str] = None
    port: Optional[int] = None
    raise_exceptions: bool = False

    _message_queue: BaseMessageQueue = PrivateAttr()
    _app: FastAPI = PrivateAttr()
    _publisher_id: str = PrivateAttr()
    _publish_callback: Optional[PublishCallback] = PrivateAttr()
    _lock: asyncio.Lock = PrivateAttr()
    _outstanding_calls: Dict[str, Any] = PrivateAttr()

    def __init__(
        self,
        component: Any,
        message_queue: BaseMessageQueue,
        running: bool = True,
        description: str = "Component Server",
        service_name: str = "default_component_service",
        publish_callback: Optional[PublishCallback] = None,
        step_interval: float = 0.1,
        host: Optional[str] = None,
        port: Optional[int] = None,
        raise_exceptions: bool = False,
    ) -> None:
        # HACK: QueryComponent is on pydantic v1
        if not isinstance(component, QueryComponent):
            raise ValueError("Component must be a QueryComponent")

        super().__init__(
            component=component,
            running=running,
            description=description,
            service_name=service_name,
            step_interval=step_interval,
            host=host,
            port=port,
            raise_exceptions=raise_exceptions,
        )

        self._lock = asyncio.Lock()
        # self._tasks_as_tool_calls = {}
        self._message_queue = message_queue
        self._publisher_id = f"{self.__class__.__qualname__}-{uuid.uuid4()}"
        self._publish_callback = publish_callback

        self._outstanding_calls: Dict[str, Any] = {}

        self._app = FastAPI(lifespan=self.lifespan)

        self._app.add_api_route(
            "/", self.home, methods=["GET"], tags=["Component Service"]
        )

        self._app.add_api_route(
            "/process_message",
            self.process_message,
            methods=["POST"],
            tags=["Message Processing"],
        )

    @property
    def service_definition(self) -> ServiceDefinition:
        """Service definition."""
        return ServiceDefinition(
            service_name=self.service_name,
            description=self.description,
            host=self.host,
            port=self.port,
        )

    @property
    def message_queue(self) -> BaseMessageQueue:
        """Message queue."""
        return self._message_queue

    @property
    def publisher_id(self) -> str:
        """Publisher ID."""
        return self._publisher_id

    @property
    def publish_callback(self) -> Optional[PublishCallback]:
        """Publish callback, if any."""
        return self._publish_callback

    @property
    def lock(self) -> asyncio.Lock:
        return self._lock

    async def processing_loop(self) -> None:
        """The processing loop for the service."""
        logger.info("Processing initiated.")
        while True:
            if not self.running:
                await asyncio.sleep(self.step_interval)
                continue

            async with self.lock:
                current_calls = [(t, c) for t, c in self._outstanding_calls.items()]

            for task_id, current_call in current_calls:
                output_dict = await self.component.arun_component(**current_call)

                await self.message_queue.publish(
                    QueueMessage(
                        type=CONTROL_PLANE_NAME,
                        action=ActionTypes.COMPLETED_TASK,
                        data=TaskResult(
                            task_id=task_id,
                            history=[],
                            result=json.dumps(output_dict),
                            data=output_dict,
                        ).model_dump(),
                    )
                )

                # clean up
                async with self.lock:
                    del self._outstanding_calls[task_id]

            await asyncio.sleep(self.step_interval)

    async def process_message(self, message: QueueMessage) -> None:
        """Process a message received from the message queue."""
        if message.action == ActionTypes.NEW_TASK:
            new_task = NewTask(**message.data or {})
            task_def = new_task.task
            async with self.lock:
                self._outstanding_calls[task_def.task_id] = new_task.state[
                    "__input_dict__"
                ]
        else:
            raise ValueError(f"Unhandled action: {message.action}")

    def as_consumer(self, remote: bool = False) -> BaseMessageQueueConsumer:
        """Get the consumer for the message queue.

        Args:
            remote (bool):
                Whether the consumer is remote. Defaults to False.
                If True, the consumer will be a RemoteMessageConsumer that uses the `process_message` endpoint.
        """
        if remote:
            url = (
                f"http://{self.host}:{self.port}{self._app.url_path_for('process_message')}"
                if self.port
                else f"http://{self.host}{self._app.url_path_for('process_message')}"
            )
            return RemoteMessageConsumer(
                id_=self.publisher_id,
                url=url,
                message_type=self.service_name,
            )

        return CallableMessageConsumer(
            id_=self.publisher_id,
            message_type=self.service_name,
            handler=self.process_message,
        )

    async def launch_local(self) -> asyncio.Task:
        """Launch the service in-process."""
        logger.info(f"{self.service_name} launch_local")
        return asyncio.create_task(self.processing_loop())

    # ---- Server based methods ----

    @asynccontextmanager
    async def lifespan(self, app: FastAPI) -> AsyncGenerator[None, None]:
        """Starts the processing loop when the fastapi app starts."""
        asyncio.create_task(self.processing_loop())
        yield
        self.running = False

    async def home(self) -> Dict[str, str]:
        """Home endpoint. Returns general information about the service."""
        return {
            "service_name": self.service_name,
            "description": self.description,
            "running": str(self.running),
            "step_interval": str(self.step_interval),
            "num_outstanding_calls": str(len(self._outstanding_calls)),
            "type": "component_service",
        }

    async def launch_server(self) -> None:
        """Launch the service as a FastAPI server."""
        logger.info(f"Launching {self.service_name} server at {self.host}:{self.port}")
        # uvicorn.run(self._app, host=self.host, port=self.port)

        class CustomServer(uvicorn.Server):
            def install_signal_handlers(self) -> None:
                pass

        cfg = uvicorn.Config(self._app, host=self.host, port=self.port)
        server = CustomServer(cfg)
        await server.serve()

service_definition `property` #

service_definition: ServiceDefinition

Service definition.

message_queue `property` #

message_queue: BaseMessageQueue

Message queue.

publisher_id `property` #

publisher_id: str

Publisher ID.

publish_callback `property` #

publish_callback: Optional[PublishCallback]

Publish callback, if any.

processing_loop `async` #

processing_loop() -> None

The processing loop for the service.

Source code in llama-agents/llama_deploy/services/component.py

async def processing_loop(self) -> None:
    """The processing loop for the service."""
    logger.info("Processing initiated.")
    while True:
        if not self.running:
            await asyncio.sleep(self.step_interval)
            continue

        async with self.lock:
            current_calls = [(t, c) for t, c in self._outstanding_calls.items()]

        for task_id, current_call in current_calls:
            output_dict = await self.component.arun_component(**current_call)

            await self.message_queue.publish(
                QueueMessage(
                    type=CONTROL_PLANE_NAME,
                    action=ActionTypes.COMPLETED_TASK,
                    data=TaskResult(
                        task_id=task_id,
                        history=[],
                        result=json.dumps(output_dict),
                        data=output_dict,
                    ).model_dump(),
                )
            )

            # clean up
            async with self.lock:
                del self._outstanding_calls[task_id]

        await asyncio.sleep(self.step_interval)

process_message `async` #

process_message(message: QueueMessage) -> None

Process a message received from the message queue.

Source code in llama-agents/llama_deploy/services/component.py

async def process_message(self, message: QueueMessage) -> None:
    """Process a message received from the message queue."""
    if message.action == ActionTypes.NEW_TASK:
        new_task = NewTask(**message.data or {})
        task_def = new_task.task
        async with self.lock:
            self._outstanding_calls[task_def.task_id] = new_task.state[
                "__input_dict__"
            ]
    else:
        raise ValueError(f"Unhandled action: {message.action}")

as_consumer #

as_consumer(remote: bool = False) -> BaseMessageQueueConsumer

Get the consumer for the message queue.

Parameters:

Name	Type	Description	Default
`remote`	`bool`	Whether the consumer is remote. Defaults to False. If True, the consumer will be a RemoteMessageConsumer that uses the `process_message` endpoint.	`False`

Source code in llama-agents/llama_deploy/services/component.py

def as_consumer(self, remote: bool = False) -> BaseMessageQueueConsumer:
    """Get the consumer for the message queue.

    Args:
        remote (bool):
            Whether the consumer is remote. Defaults to False.
            If True, the consumer will be a RemoteMessageConsumer that uses the `process_message` endpoint.
    """
    if remote:
        url = (
            f"http://{self.host}:{self.port}{self._app.url_path_for('process_message')}"
            if self.port
            else f"http://{self.host}{self._app.url_path_for('process_message')}"
        )
        return RemoteMessageConsumer(
            id_=self.publisher_id,
            url=url,
            message_type=self.service_name,
        )

    return CallableMessageConsumer(
        id_=self.publisher_id,
        message_type=self.service_name,
        handler=self.process_message,
    )

launch_local `async` #

launch_local() -> Task

Launch the service in-process.

Source code in llama-agents/llama_deploy/services/component.py

async def launch_local(self) -> asyncio.Task:
    """Launch the service in-process."""
    logger.info(f"{self.service_name} launch_local")
    return asyncio.create_task(self.processing_loop())

lifespan `async` #

lifespan(app: FastAPI) -> AsyncGenerator[None, None]

Starts the processing loop when the fastapi app starts.

Source code in llama-agents/llama_deploy/services/component.py

@asynccontextmanager
async def lifespan(self, app: FastAPI) -> AsyncGenerator[None, None]:
    """Starts the processing loop when the fastapi app starts."""
    asyncio.create_task(self.processing_loop())
    yield
    self.running = False

home `async` #

home() -> Dict[str, str]

Home endpoint. Returns general information about the service.

Source code in llama-agents/llama_deploy/services/component.py

async def home(self) -> Dict[str, str]:
    """Home endpoint. Returns general information about the service."""
    return {
        "service_name": self.service_name,
        "description": self.description,
        "running": str(self.running),
        "step_interval": str(self.step_interval),
        "num_outstanding_calls": str(len(self._outstanding_calls)),
        "type": "component_service",
    }

launch_server `async` #

launch_server() -> None

Launch the service as a FastAPI server.

Source code in llama-agents/llama_deploy/services/component.py

async def launch_server(self) -> None:
    """Launch the service as a FastAPI server."""
    logger.info(f"Launching {self.service_name} server at {self.host}:{self.port}")
    # uvicorn.run(self._app, host=self.host, port=self.port)

    class CustomServer(uvicorn.Server):
        def install_signal_handlers(self) -> None:
            pass

    cfg = uvicorn.Config(self._app, host=self.host, port=self.port)
    server = CustomServer(cfg)
    await server.serve()

WorkflowService #

Bases: BaseService

Workflow service.

Wraps a llama-index workflow into a service.

Exposes the following endpoints: - GET /: Home endpoint. - POST /process_message: Process a message.

Attributes:

Name	Type	Description
`workflow`	`Workflow`	The workflow itself.
`description`	`str`	The description of the service.
`running`	`bool`	Whether the service is running.
`step_interval`	`float`	The interval in seconds to poll for tool call results. Defaults to 0.1s.
`host`	`Optional[str]`	The host of the service.
`port`	`Optional[int]`	The port of the service.
`raise_exceptions`	`bool`	Whether to raise exceptions.

Examples:

from llama_deploy import WorkflowService
from llama_index.core.workflow import Workflow

workflow_service = WorkflowService(
    workflow,
    message_queue=message_queue,
    description="workflow_service",
    service_name="my_workflow_service",
)

Source code in llama-agents/llama_deploy/services/workflow.py

class WorkflowService(BaseService):
    """Workflow service.

    Wraps a llama-index workflow into a service.

    Exposes the following endpoints:
    - GET `/`: Home endpoint.
    - POST `/process_message`: Process a message.

    Attributes:
        workflow (Workflow): The workflow itself.
        description (str): The description of the service.
        running (bool): Whether the service is running.
        step_interval (float): The interval in seconds to poll for tool call results. Defaults to 0.1s.
        host (Optional[str]): The host of the service.
        port (Optional[int]): The port of the service.
        raise_exceptions (bool): Whether to raise exceptions.

    Examples:
        ```python
        from llama_deploy import WorkflowService
        from llama_index.core.workflow import Workflow

        workflow_service = WorkflowService(
            workflow,
            message_queue=message_queue,
            description="workflow_service",
            service_name="my_workflow_service",
        )
        ```
    """

    service_name: str
    workflow: Workflow

    description: str = "Workflow service."
    running: bool = True
    step_interval: float = 0.1
    host: Optional[str] = None
    port: Optional[int] = None
    internal_host: Optional[str] = None
    internal_port: Optional[int] = None
    raise_exceptions: bool = False

    _message_queue: BaseMessageQueue = PrivateAttr()
    _app: FastAPI = PrivateAttr()
    _publisher_id: str = PrivateAttr()
    _publish_callback: Optional[PublishCallback] = PrivateAttr()
    _lock: asyncio.Lock = PrivateAttr()
    _outstanding_calls: Dict[str, WorkflowState] = PrivateAttr()

    def __init__(
        self,
        workflow: Workflow,
        message_queue: BaseMessageQueue,
        running: bool = True,
        description: str = "Component Server",
        service_name: str = "default_workflow_service",
        publish_callback: Optional[PublishCallback] = None,
        step_interval: float = 0.1,
        host: Optional[str] = None,
        port: Optional[int] = None,
        internal_host: Optional[str] = None,
        internal_port: Optional[int] = None,
        raise_exceptions: bool = False,
    ) -> None:
        super().__init__(
            workflow=workflow,
            running=running,
            description=description,
            service_name=service_name,
            step_interval=step_interval,
            host=host,
            port=port,
            internal_host=internal_host,
            internal_port=internal_port,
            raise_exceptions=raise_exceptions,
        )

        self._lock = asyncio.Lock()
        self._message_queue = message_queue
        self._publisher_id = f"{self.__class__.__qualname__}-{uuid.uuid4()}"
        self._publish_callback = publish_callback

        self._outstanding_calls: Dict[str, WorkflowState] = {}

        self._app = FastAPI(lifespan=self.lifespan)

        self._app.add_api_route(
            "/", self.home, methods=["GET"], tags=["Workflow Service"]
        )

        self._app.add_api_route(
            "/process_message",
            self.process_message,
            methods=["POST"],
            tags=["Message Processing"],
        )

    @property
    def service_definition(self) -> ServiceDefinition:
        """Service definition."""
        return ServiceDefinition(
            service_name=self.service_name,
            description=self.description,
            host=self.host,
            port=self.port,
        )

    @property
    def message_queue(self) -> BaseMessageQueue:
        """Message queue."""
        return self._message_queue

    @property
    def publisher_id(self) -> str:
        """Publisher ID."""
        return self._publisher_id

    @property
    def publish_callback(self) -> Optional[PublishCallback]:
        """Publish callback, if any."""
        return self._publish_callback

    @property
    def lock(self) -> asyncio.Lock:
        return self._lock

    def load_workflow_state(self, workflow: Workflow, state: WorkflowState) -> Workflow:
        """Fork the workflow with the given state.

        TODO: Support managing the workflow state.
        """
        context_hash = state.hash
        context_str = state.state
        if not context_str:
            return workflow

        if hash(context_str + hash_secret) != context_hash:
            raise ValueError("Context hash does not match. Possible data corruption.")

        # only load context once it's been verified(?)

        return workflow

    def dump_workflow_state(
        self, workflow: Workflow, run_kawrgs: dict
    ) -> WorkflowState:
        """Dump the workflow state.

        TODO: Support managing the workflow state.
        """
        context_bytes = pickle.dumps({})
        context_str = base64.b64encode(context_bytes).decode("ascii")
        context_hash = hash(context_str + hash_secret)

        return WorkflowState(
            hash=context_hash, state=context_str, run_kwargs=run_kawrgs
        )

    async def processing_loop(self) -> None:
        """The processing loop for the service.

        TODO: How do we handle any errors that occur during processing?
        """
        logger.info("Processing initiated.")
        while True:
            if not self.running:
                await asyncio.sleep(self.step_interval)
                continue

            async with self.lock:
                current_calls = [(t, c) for t, c in self._outstanding_calls.items()]

            for task_id, current_call in current_calls:
                # TODO: resume a workflow session?

                # load the state
                self.load_workflow_state(self.workflow, current_call)

                # run the workflow
                # TODO: How do we handle streaming? Websockets?
                result = await self.workflow.run(**current_call.run_kwargs)

                # dump the state
                updated_state = self.dump_workflow_state(
                    self.workflow, current_call.run_kwargs
                )

                await self.message_queue.publish(
                    QueueMessage(
                        type=CONTROL_PLANE_NAME,
                        action=ActionTypes.COMPLETED_TASK,
                        data=TaskResult(
                            task_id=task_id,
                            history=[],
                            result=str(result),
                            data=updated_state.dict(),
                        ).model_dump(),
                    )
                )

                # clean up
                async with self.lock:
                    self._outstanding_calls.pop(task_id, None)

            await asyncio.sleep(self.step_interval)

    async def process_message(self, message: QueueMessage) -> None:
        """Process a message received from the message queue."""
        if message.action == ActionTypes.NEW_TASK:
            new_task = NewTask(**message.data or {})
            async with self.lock:
                new_task.state["run_kwargs"] = json.loads(new_task.task.input)
                workflow_state = WorkflowState(
                    **new_task.state,
                )
                self._outstanding_calls[new_task.task.task_id] = workflow_state
        else:
            raise ValueError(f"Unhandled action: {message.action}")

    def as_consumer(self, remote: bool = False) -> BaseMessageQueueConsumer:
        """Get the consumer for the message queue.

        Args:
            remote (bool):
                Whether the consumer is remote. Defaults to False.
                If True, the consumer will be a RemoteMessageConsumer that uses the `process_message` endpoint.
        """
        if remote:
            url = (
                f"http://{self.host}:{self.port}{self._app.url_path_for('process_message')}"
                if self.port
                else f"http://{self.host}{self._app.url_path_for('process_message')}"
            )
            return RemoteMessageConsumer(
                id_=self.publisher_id,
                url=url,
                message_type=self.service_name,
            )

        return CallableMessageConsumer(
            id_=self.publisher_id,
            message_type=self.service_name,
            handler=self.process_message,
        )

    async def launch_local(self) -> asyncio.Task:
        """Launch the service in-process."""
        logger.info(f"{self.service_name} launch_local")
        return asyncio.create_task(self.processing_loop())

    # ---- Server based methods ----

    @asynccontextmanager
    async def lifespan(self, app: FastAPI) -> AsyncGenerator[None, None]:
        """Starts the processing loop when the fastapi app starts."""
        asyncio.create_task(self.processing_loop())
        yield
        self.running = False

    async def home(self) -> Dict[str, str]:
        """Home endpoint. Returns general information about the service."""
        return {
            "service_name": self.service_name,
            "description": self.description,
            "running": str(self.running),
            "step_interval": str(self.step_interval),
            "num_outstanding_calls": str(len(self._outstanding_calls)),
            "type": "workflow_service",
        }

    async def launch_server(self) -> None:
        """Launch the service as a FastAPI server."""
        host = self.internal_host or self.host
        port = self.internal_port or self.port
        logger.info(f"Launching {self.service_name} server at {host}:{port}")

        class CustomServer(uvicorn.Server):
            def install_signal_handlers(self) -> None:
                pass

        cfg = uvicorn.Config(self._app, host=host, port=port)
        server = CustomServer(cfg)
        await server.serve()

service_definition `property` #

service_definition: ServiceDefinition

Service definition.

message_queue `property` #

message_queue: BaseMessageQueue

Message queue.

publisher_id `property` #

publisher_id: str

Publisher ID.

publish_callback `property` #

publish_callback: Optional[PublishCallback]

Publish callback, if any.

load_workflow_state #

load_workflow_state(workflow: Workflow, state: WorkflowState) -> Workflow

Fork the workflow with the given state.

TODO: Support managing the workflow state.

Source code in llama-agents/llama_deploy/services/workflow.py

def load_workflow_state(self, workflow: Workflow, state: WorkflowState) -> Workflow:
    """Fork the workflow with the given state.

    TODO: Support managing the workflow state.
    """
    context_hash = state.hash
    context_str = state.state
    if not context_str:
        return workflow

    if hash(context_str + hash_secret) != context_hash:
        raise ValueError("Context hash does not match. Possible data corruption.")

    # only load context once it's been verified(?)

    return workflow

dump_workflow_state #

dump_workflow_state(workflow: Workflow, run_kawrgs: dict) -> WorkflowState

Dump the workflow state.

TODO: Support managing the workflow state.

Source code in llama-agents/llama_deploy/services/workflow.py

def dump_workflow_state(
    self, workflow: Workflow, run_kawrgs: dict
) -> WorkflowState:
    """Dump the workflow state.

    TODO: Support managing the workflow state.
    """
    context_bytes = pickle.dumps({})
    context_str = base64.b64encode(context_bytes).decode("ascii")
    context_hash = hash(context_str + hash_secret)

    return WorkflowState(
        hash=context_hash, state=context_str, run_kwargs=run_kawrgs
    )

processing_loop `async` #

processing_loop() -> None

The processing loop for the service.

TODO: How do we handle any errors that occur during processing?

Source code in llama-agents/llama_deploy/services/workflow.py

async def processing_loop(self) -> None:
    """The processing loop for the service.

    TODO: How do we handle any errors that occur during processing?
    """
    logger.info("Processing initiated.")
    while True:
        if not self.running:
            await asyncio.sleep(self.step_interval)
            continue

        async with self.lock:
            current_calls = [(t, c) for t, c in self._outstanding_calls.items()]

        for task_id, current_call in current_calls:
            # TODO: resume a workflow session?

            # load the state
            self.load_workflow_state(self.workflow, current_call)

            # run the workflow
            # TODO: How do we handle streaming? Websockets?
            result = await self.workflow.run(**current_call.run_kwargs)

            # dump the state
            updated_state = self.dump_workflow_state(
                self.workflow, current_call.run_kwargs
            )

            await self.message_queue.publish(
                QueueMessage(
                    type=CONTROL_PLANE_NAME,
                    action=ActionTypes.COMPLETED_TASK,
                    data=TaskResult(
                        task_id=task_id,
                        history=[],
                        result=str(result),
                        data=updated_state.dict(),
                    ).model_dump(),
                )
            )

            # clean up
            async with self.lock:
                self._outstanding_calls.pop(task_id, None)

        await asyncio.sleep(self.step_interval)

process_message `async` #

process_message(message: QueueMessage) -> None

Process a message received from the message queue.

Source code in llama-agents/llama_deploy/services/workflow.py

async def process_message(self, message: QueueMessage) -> None:
    """Process a message received from the message queue."""
    if message.action == ActionTypes.NEW_TASK:
        new_task = NewTask(**message.data or {})
        async with self.lock:
            new_task.state["run_kwargs"] = json.loads(new_task.task.input)
            workflow_state = WorkflowState(
                **new_task.state,
            )
            self._outstanding_calls[new_task.task.task_id] = workflow_state
    else:
        raise ValueError(f"Unhandled action: {message.action}")

as_consumer #

as_consumer(remote: bool = False) -> BaseMessageQueueConsumer

Get the consumer for the message queue.

Parameters:

Name	Type	Description	Default
`remote`	`bool`	Whether the consumer is remote. Defaults to False. If True, the consumer will be a RemoteMessageConsumer that uses the `process_message` endpoint.	`False`

Source code in llama-agents/llama_deploy/services/workflow.py

def as_consumer(self, remote: bool = False) -> BaseMessageQueueConsumer:
    """Get the consumer for the message queue.

    Args:
        remote (bool):
            Whether the consumer is remote. Defaults to False.
            If True, the consumer will be a RemoteMessageConsumer that uses the `process_message` endpoint.
    """
    if remote:
        url = (
            f"http://{self.host}:{self.port}{self._app.url_path_for('process_message')}"
            if self.port
            else f"http://{self.host}{self._app.url_path_for('process_message')}"
        )
        return RemoteMessageConsumer(
            id_=self.publisher_id,
            url=url,
            message_type=self.service_name,
        )

    return CallableMessageConsumer(
        id_=self.publisher_id,
        message_type=self.service_name,
        handler=self.process_message,
    )

launch_local `async` #

launch_local() -> Task

Launch the service in-process.

Source code in llama-agents/llama_deploy/services/workflow.py

async def launch_local(self) -> asyncio.Task:
    """Launch the service in-process."""
    logger.info(f"{self.service_name} launch_local")
    return asyncio.create_task(self.processing_loop())

lifespan `async` #

lifespan(app: FastAPI) -> AsyncGenerator[None, None]

Starts the processing loop when the fastapi app starts.

Source code in llama-agents/llama_deploy/services/workflow.py

@asynccontextmanager
async def lifespan(self, app: FastAPI) -> AsyncGenerator[None, None]:
    """Starts the processing loop when the fastapi app starts."""
    asyncio.create_task(self.processing_loop())
    yield
    self.running = False

home `async` #

home() -> Dict[str, str]

Home endpoint. Returns general information about the service.

Source code in llama-agents/llama_deploy/services/workflow.py

async def home(self) -> Dict[str, str]:
    """Home endpoint. Returns general information about the service."""
    return {
        "service_name": self.service_name,
        "description": self.description,
        "running": str(self.running),
        "step_interval": str(self.step_interval),
        "num_outstanding_calls": str(len(self._outstanding_calls)),
        "type": "workflow_service",
    }

launch_server `async` #

launch_server() -> None

Launch the service as a FastAPI server.

Source code in llama-agents/llama_deploy/services/workflow.py

async def launch_server(self) -> None:
    """Launch the service as a FastAPI server."""
    host = self.internal_host or self.host
    port = self.internal_port or self.port
    logger.info(f"Launching {self.service_name} server at {host}:{port}")

    class CustomServer(uvicorn.Server):
        def install_signal_handlers(self) -> None:
            pass

    cfg = uvicorn.Config(self._app, host=host, port=port)
    server = CustomServer(cfg)
    await server.serve()

WorkflowServiceConfig #

Bases: BaseSettings

Workflow service configuration.

Source code in llama-agents/llama_deploy/services/workflow.py

class WorkflowServiceConfig(BaseSettings):
    """Workflow service configuration."""

    model_config = SettingsConfigDict(env_prefix="WORKFLOW_SERVICE_")

    host: str
    port: int
    internal_host: Optional[str] = None
    internal_port: Optional[int] = None
    service_name: str
    description: str = "A service that wraps a llama-index workflow."
    running: bool = True
    step_interval: float = 0.1
    raise_exceptions: bool = False

options: members: - BaseService - WorkflowService - WorkflowServiceConfig

Services

BaseService #

service_definition abstractmethod property #

as_consumer abstractmethod #

processing_loop abstractmethod async #

process_message abstractmethod async #

launch_local abstractmethod async #

launch_server abstractmethod async #

register_to_control_plane async #

deregister_from_control_plane async #

register_to_message_queue async #

AgentService #

service_definition property #

message_queue property #

publisher_id property #

publish_callback property #

tool_name property #

processing_loop async #

process_message async #

as_consumer #

launch_local async #

lifespan async #

home async #

create_task async #

get_messages async #

toggle_agent_running async #

is_worker_running async #

reset_agent async #

launch_server async #

HumanService #

service_definition property #

message_queue property #

publisher_id property #

publish_callback property #

tool_name property #

HumanTask #

processing_loop async #

process_message async #

as_consumer #

launch_local async #

lifespan async #

home async #

create_task async #

get_tasks async #

get_task async #

handle_task async #

launch_server async #

validate_human_input_prompt classmethod #

ToolService #

service_definition property #

message_queue property #

publisher_id property #

publish_callback property #

processing_loop async #

process_message async #

as_consumer #

launch_local async #

lifespan async #

home async #

create_tool_call async #

get_tool_by_name async #

launch_server async #

ComponentService #

service_definition property #

message_queue property #

publisher_id property #

publish_callback property #

processing_loop async #

process_message async #

as_consumer #

launch_local async #

lifespan async #

home async #

launch_server async #

WorkflowService #

service_definition property #

message_queue property #

publisher_id property #

publish_callback property #

load_workflow_state #

service_definition `abstractmethod` `property` #

as_consumer `abstractmethod` #

processing_loop `abstractmethod` `async` #

process_message `abstractmethod` `async` #

launch_local `abstractmethod` `async` #

launch_server `abstractmethod` `async` #

register_to_control_plane `async` #

deregister_from_control_plane `async` #

register_to_message_queue `async` #

service_definition `property` #

message_queue `property` #

publisher_id `property` #

publish_callback `property` #

tool_name `property` #

processing_loop `async` #

process_message `async` #

launch_local `async` #

lifespan `async` #

home `async` #

create_task `async` #

get_messages `async` #

toggle_agent_running `async` #

is_worker_running `async` #

reset_agent `async` #

launch_server `async` #

service_definition `property` #

message_queue `property` #

publisher_id `property` #

publish_callback `property` #

tool_name `property` #

processing_loop `async` #

process_message `async` #

launch_local `async` #

lifespan `async` #

home `async` #

create_task `async` #

get_tasks `async` #

get_task `async` #

handle_task `async` #

launch_server `async` #

validate_human_input_prompt `classmethod` #

service_definition `property` #

message_queue `property` #

publisher_id `property` #

publish_callback `property` #

processing_loop `async` #

process_message `async` #

launch_local `async` #

lifespan `async` #

home `async` #

create_tool_call `async` #

get_tool_by_name `async` #

launch_server `async` #

service_definition `property` #

message_queue `property` #

publisher_id `property` #

publish_callback `property` #

processing_loop `async` #

process_message `async` #

launch_local `async` #

lifespan `async` #

home `async` #

launch_server `async` #

service_definition `property` #

message_queue `property` #

publisher_id `property` #

publish_callback `property` #

processing_loop `async` #

process_message `async` #

launch_local `async` #

lifespan `async` #

home `async` #

launch_server `async` #