`services`#

BaseService #

Bases: MessageQueuePublisherMixin, ABC

Base class for a service.

The general structure of a service is as follows: - A service has a name. - A service has a service definition. - A service uses a message queue to send/receive messages. - A service has a processing loop, for continuous processing of messages. - A service can process a message. - A service can publish a message to another service. - A service can be launched in-process. - A service can be launched as a server. - A service can be registered to the control plane. - A service can be registered to the message queue.

Source code in llama_deploy/services/base.py

class BaseService(MessageQueuePublisherMixin, ABC):
    """Base class for a service.

    The general structure of a service is as follows:
    - A service has a name.
    - A service has a service definition.
    - A service uses a message queue to send/receive messages.
    - A service has a processing loop, for continuous processing of messages.
    - A service can process a message.
    - A service can publish a message to another service.
    - A service can be launched in-process.
    - A service can be launched as a server.
    - A service can be registered to the control plane.
    - A service can be registered to the message queue.
    """

    def __init__(
        self,
        name: str,
        control_plane_url: str | None = None,  # deprecated
        control_plane_config: ControlPlaneConfig | None = None,
    ) -> None:
        self._service_name = name
        self._control_plane_config = control_plane_config or ControlPlaneConfig()
        self._control_plane_url = self._control_plane_config.url

    @property
    def service_name(self) -> str:
        return self._service_name

    @property
    @abstractmethod
    def service_definition(self) -> ServiceDefinition:
        """The service definition."""
        ...

    @abstractmethod
    def as_consumer(self, remote: bool = False) -> BaseMessageQueueConsumer:
        """Get the consumer for the message queue."""
        ...

    @abstractmethod
    async def processing_loop(self) -> None:
        """The processing loop for the service."""
        ...

    @abstractmethod
    async def process_message(self, message: QueueMessage) -> Any:
        """Process a message."""
        ...

    @abstractmethod
    async def launch_local(self) -> asyncio.Task:
        """Launch the service in-process."""
        ...

    @abstractmethod
    async def launch_server(self) -> None:
        """Launch the service as a server."""
        ...

    async def register_to_control_plane(self, control_plane_url: str) -> None:
        """Register the service to the control plane."""
        self._control_plane_url = control_plane_url
        async with httpx.AsyncClient() as client:
            response = await client.post(
                f"{control_plane_url}/services/register",
                json=self.service_definition.model_dump(),
            )
            response.raise_for_status()
            self._control_plane_config = ControlPlaneConfig(**response.json())

    async def deregister_from_control_plane(self) -> None:
        """Deregister the service from the control plane."""
        if not self._control_plane_url:
            raise ValueError(
                "Control plane URL not set. Call register_to_control_plane first."
            )
        async with httpx.AsyncClient() as client:
            response = await client.post(
                f"{self._control_plane_url}/services/deregister",
                params={"service_name": self.service_name},
            )
            response.raise_for_status()

    async def get_session_state(self, session_id: str) -> dict[str, Any] | None:
        """Get the session state from the control plane."""
        try:
            async with httpx.AsyncClient() as client:
                response = await client.get(
                    f"{self._control_plane_url}/sessions/{session_id}/state"
                )
                if response.status_code == 404:
                    return None
                else:
                    response.raise_for_status()

                return response.json()
        except httpx.ConnectError:
            # Let the service live without a control plane running
            return None

    async def update_session_state(
        self, session_id: str, state: dict[str, Any]
    ) -> None:
        """Update the session state in the control plane."""
        if not self._control_plane_url:
            return

        async with httpx.AsyncClient() as client:
            response = await client.post(
                f"{self._control_plane_url}/sessions/{session_id}/state",
                json=state,
            )
            response.raise_for_status()

    async def register_to_message_queue(self) -> StartConsumingCallable:
        """Register the service to the message queue."""
        return await self.message_queue.register_consumer(
            self.as_consumer(remote=True), topic=self.get_topic(self.service_name)
        )

    async def deregister_from_message_queue(self) -> None:
        return await self.message_queue.deregister_consumer(
            self.as_consumer(remote=True)
        )

    def get_topic(self, msg_type: str) -> str:
        return f"{self._control_plane_config.topic_namespace}.{msg_type}"

service_definition `abstractmethod` `property` #

service_definition: ServiceDefinition

The service definition.

as_consumer `abstractmethod` #

as_consumer(remote: bool = False) -> BaseMessageQueueConsumer

Get the consumer for the message queue.

Source code in llama_deploy/services/base.py

@abstractmethod
def as_consumer(self, remote: bool = False) -> BaseMessageQueueConsumer:
    """Get the consumer for the message queue."""
    ...

processing_loop `abstractmethod` `async` #

processing_loop() -> None

The processing loop for the service.

Source code in llama_deploy/services/base.py

@abstractmethod
async def processing_loop(self) -> None:
    """The processing loop for the service."""
    ...

process_message `abstractmethod` `async` #

process_message(message: QueueMessage) -> Any

Process a message.

Source code in llama_deploy/services/base.py

@abstractmethod
async def process_message(self, message: QueueMessage) -> Any:
    """Process a message."""
    ...

launch_local `abstractmethod` `async` #

launch_local() -> Task

Launch the service in-process.

Source code in llama_deploy/services/base.py

@abstractmethod
async def launch_local(self) -> asyncio.Task:
    """Launch the service in-process."""
    ...

launch_server `abstractmethod` `async` #

launch_server() -> None

Launch the service as a server.

Source code in llama_deploy/services/base.py

@abstractmethod
async def launch_server(self) -> None:
    """Launch the service as a server."""
    ...

register_to_control_plane `async` #

register_to_control_plane(control_plane_url: str) -> None

Register the service to the control plane.

Source code in llama_deploy/services/base.py

async def register_to_control_plane(self, control_plane_url: str) -> None:
    """Register the service to the control plane."""
    self._control_plane_url = control_plane_url
    async with httpx.AsyncClient() as client:
        response = await client.post(
            f"{control_plane_url}/services/register",
            json=self.service_definition.model_dump(),
        )
        response.raise_for_status()
        self._control_plane_config = ControlPlaneConfig(**response.json())

deregister_from_control_plane `async` #

deregister_from_control_plane() -> None

Deregister the service from the control plane.

Source code in llama_deploy/services/base.py

async def deregister_from_control_plane(self) -> None:
    """Deregister the service from the control plane."""
    if not self._control_plane_url:
        raise ValueError(
            "Control plane URL not set. Call register_to_control_plane first."
        )
    async with httpx.AsyncClient() as client:
        response = await client.post(
            f"{self._control_plane_url}/services/deregister",
            params={"service_name": self.service_name},
        )
        response.raise_for_status()

get_session_state `async` #

get_session_state(session_id: str) -> dict[str, Any] | None

Get the session state from the control plane.

Source code in llama_deploy/services/base.py

async def get_session_state(self, session_id: str) -> dict[str, Any] | None:
    """Get the session state from the control plane."""
    try:
        async with httpx.AsyncClient() as client:
            response = await client.get(
                f"{self._control_plane_url}/sessions/{session_id}/state"
            )
            if response.status_code == 404:
                return None
            else:
                response.raise_for_status()

            return response.json()
    except httpx.ConnectError:
        # Let the service live without a control plane running
        return None

update_session_state `async` #

update_session_state(session_id: str, state: dict[str, Any]) -> None

Update the session state in the control plane.

Source code in llama_deploy/services/base.py

async def update_session_state(
    self, session_id: str, state: dict[str, Any]
) -> None:
    """Update the session state in the control plane."""
    if not self._control_plane_url:
        return

    async with httpx.AsyncClient() as client:
        response = await client.post(
            f"{self._control_plane_url}/sessions/{session_id}/state",
            json=state,
        )
        response.raise_for_status()

register_to_message_queue `async` #

register_to_message_queue() -> StartConsumingCallable

Register the service to the message queue.

Source code in llama_deploy/services/base.py

async def register_to_message_queue(self) -> StartConsumingCallable:
    """Register the service to the message queue."""
    return await self.message_queue.register_consumer(
        self.as_consumer(remote=True), topic=self.get_topic(self.service_name)
    )

WorkflowService #

Bases: BaseService

Workflow service.

Wraps a llama-index workflow into a service.

Exposes the following endpoints: - GET /: Home endpoint. - POST /process_message: Process a message.

Examples:

from llama_deploy import WorkflowService, WorkflowServiceConfig
from llama_index.core.workflow import Workflow

workflow_service = WorkflowService(
    workflow,
    message_queue=message_queue,
    config=WorkflowServiceConfig(description="workflow_service")
)

Source code in llama_deploy/services/workflow.py

class WorkflowService(BaseService):
    """Workflow service.

    Wraps a llama-index workflow into a service.

    Exposes the following endpoints:
    - GET `/`: Home endpoint.
    - POST `/process_message`: Process a message.

    Examples:
        ```python
        from llama_deploy import WorkflowService, WorkflowServiceConfig
        from llama_index.core.workflow import Workflow

        workflow_service = WorkflowService(
            workflow,
            message_queue=message_queue,
            config=WorkflowServiceConfig(description="workflow_service")
        )
        ```
    """

    def __init__(
        self,
        workflow: Workflow,
        message_queue: AbstractMessageQueue,
        config: WorkflowServiceConfig,
        publish_callback: Optional[PublishCallback] = None,
    ) -> None:
        super().__init__(config.service_name)

        self.workflow = workflow
        self.config = config

        self._lock = asyncio.Lock()
        self._running = True
        self._message_queue = message_queue
        self._publisher_id = f"{self.__class__.__qualname__}-{uuid.uuid4()}"
        self._publish_callback = publish_callback

        self._outstanding_calls: Dict[str, WorkflowState] = {}
        self._ongoing_tasks: Dict[str, asyncio.Task] = {}
        self._events_buffer: Dict[str, asyncio.Queue] = defaultdict(asyncio.Queue)

        self._app = FastAPI(lifespan=self.lifespan)

        self._app.add_api_route(
            "/", self.home, methods=["GET"], tags=["Workflow Service"]
        )

        self._app.add_api_route(
            "/process_message",
            self.process_message,
            methods=["POST"],
            tags=["Message Processing"],
        )

    @property
    def service_definition(self) -> ServiceDefinition:
        """Service definition."""
        return ServiceDefinition(
            service_name=self.config.service_name,
            description=self.config.description,
            host=self.config.host,
            port=self.config.port,
        )

    @property
    def message_queue(self) -> AbstractMessageQueue:
        """Message queue."""
        return self._message_queue

    @property
    def publisher_id(self) -> str:
        """Publisher ID."""
        return self._publisher_id

    @property
    def publish_callback(self) -> Optional[PublishCallback]:
        """Publish callback, if any."""
        return self._publish_callback

    @property
    def lock(self) -> asyncio.Lock:
        return self._lock

    async def get_workflow_state(self, state: WorkflowState) -> Optional[Context]:
        """Load the existing context from the workflow state.

        TODO: Support managing the workflow state?
        """
        if state.session_id is None:
            return None

        state_dict = await self.get_session_state(state.session_id)
        if state_dict is None:
            return None

        workflow_state_json = state_dict.get(state.session_id, None)

        if workflow_state_json is None:
            return None

        workflow_state = WorkflowState.model_validate_json(workflow_state_json)
        if workflow_state.state is None:
            return None

        context_dict = workflow_state.state
        context_str = json.dumps(context_dict)
        context_hash = hash(context_str + hash_secret)

        if workflow_state.hash is not None and context_hash != workflow_state.hash:
            raise ValueError("Context hash does not match!")

        return Context.from_dict(
            self.workflow,
            workflow_state.state,
            serializer=JsonPickleSerializer(),
        )

    async def set_workflow_state(
        self, ctx: Context, current_state: WorkflowState
    ) -> None:
        """Set the workflow state for this session."""
        context_dict = ctx.to_dict(serializer=JsonPickleSerializer())
        context_str = json.dumps(context_dict)
        context_hash = hash(context_str + hash_secret)

        workflow_state = WorkflowState(
            hash=context_hash,
            state=context_dict,
            run_kwargs=current_state.run_kwargs,
            session_id=current_state.session_id,
            task_id=current_state.task_id,
        )

        if current_state.session_id is None:
            raise ValueError("Session ID is None! Cannot set workflow state.")

        session_state = await self.get_session_state(current_state.session_id)
        if session_state:
            session_state[current_state.session_id] = workflow_state.model_dump_json()

            # Store the state in the control plane
            await self.update_session_state(current_state.session_id, session_state)

    async def process_call(self, current_call: WorkflowState) -> None:
        """Processes a given task, and writes a response to the message queue.

        Handles errors with a generic try/except, and publishes the error message
        as the result.

        Args:
            current_call (WorkflowState):
                The state of the current task, including run_kwargs and other session state.
        """
        # create send_event background task
        close_send_events = asyncio.Event()
        handler = None

        try:
            # load the state
            ctx = await self.get_workflow_state(current_call)

            # run the workflow
            handler = self.workflow.run(ctx=ctx, **current_call.run_kwargs)

            async def send_events(
                handler: WorkflowHandler, close_event: asyncio.Event
            ) -> None:
                if handler.ctx is None:
                    raise ValueError("handler does not have a valid Context.")

                while not close_event.is_set():
                    try:
                        event = self._events_buffer[current_call.task_id].get_nowait()
                        handler.ctx.send_event(event)
                    except asyncio.QueueEmpty:
                        pass
                    await asyncio.sleep(self.config.step_interval)

            _ = asyncio.create_task(send_events(handler, close_send_events))

            index = 0
            async for ev in handler.stream_events():
                # send the event to control plane for client / api server streaming
                logger.debug(f"Publishing event: {ev}")
                await self.message_queue.publish(
                    QueueMessage(
                        type=CONTROL_PLANE_MESSAGE_TYPE,
                        action=ActionTypes.TASK_STREAM,
                        data=TaskStream(
                            task_id=current_call.task_id,
                            session_id=current_call.session_id,
                            data=ev.model_dump(),
                            index=index,
                        ).model_dump(),
                    ),
                    self.get_topic(CONTROL_PLANE_MESSAGE_TYPE),
                )
                index += 1

            final_result = await handler

            # dump the state # dump the state
            await self.set_workflow_state(handler.ctx, current_call)

            logger.info(
                f"Publishing final result: {final_result} to '{self.get_topic(CONTROL_PLANE_MESSAGE_TYPE)}'"
            )
            await self.message_queue.publish(
                QueueMessage(
                    type=CONTROL_PLANE_MESSAGE_TYPE,
                    action=ActionTypes.COMPLETED_TASK,
                    data=TaskResult(
                        task_id=current_call.task_id,
                        history=[],
                        result=str(final_result),
                        data={},
                    ).model_dump(),
                ),
                self.get_topic(CONTROL_PLANE_MESSAGE_TYPE),
            )
        except Exception as e:
            if self.config.raise_exceptions:
                raise e

            logger.error(
                f"Encountered error in task {current_call.task_id}! {str(e)}",
                exc_info=True,
            )
            # dump the state
            if handler is not None:
                await self.set_workflow_state(handler.ctx, current_call)

            # return failure
            await self.message_queue.publish(
                QueueMessage(
                    type=CONTROL_PLANE_MESSAGE_TYPE,
                    action=ActionTypes.COMPLETED_TASK,
                    data=TaskResult(
                        task_id=current_call.task_id,
                        history=[],
                        result=str(e),
                        data={},
                    ).model_dump(),
                ),
                self.get_topic(CONTROL_PLANE_MESSAGE_TYPE),
            )
        finally:
            # clean up
            close_send_events.set()
            async with self.lock:
                self._outstanding_calls.pop(current_call.task_id, None)
            self._ongoing_tasks.pop(current_call.task_id, None)

    async def manage_tasks(self) -> None:
        """Acts as a manager to process outstanding tasks from a queue.

        Limits number of tasks in progress to `self.max_concurrent_tasks`.

        If the number of ongoing tasks is greater than or equal to `self.max_concurrent_tasks`,
        they are buffered until there is room to run it.
        """
        while True:
            if not self._running:
                await asyncio.sleep(self.config.step_interval)
                continue

            # Check for completed tasks
            completed_tasks = [
                task for task in self._ongoing_tasks.values() if task.done()
            ]
            for task in completed_tasks:
                task_id = next(k for k, v in self._ongoing_tasks.items() if v == task)
                self._ongoing_tasks.pop(task_id, None)

            # Start new tasks
            async with self.lock:
                new_calls = [
                    (t, c)
                    for t, c in self._outstanding_calls.items()
                    if t not in self._ongoing_tasks
                ]

            for task_id, current_call in new_calls:
                if len(self._ongoing_tasks) >= self.config.max_concurrent_tasks:
                    break
                task = asyncio.create_task(self.process_call(current_call))
                self._ongoing_tasks[task_id] = task

            await asyncio.sleep(0.1)  # Small sleep to prevent busy-waiting

    async def processing_loop(self) -> None:
        """The processing loop for the service with non-blocking concurrent task execution."""
        logger.info("Processing initiated.")
        try:
            await self.manage_tasks()
        except CancelledError:
            return

    async def process_message(self, message: QueueMessage) -> None:
        """Process a message received from the message queue."""
        if message.action == ActionTypes.NEW_TASK:
            task_def = TaskDefinition(**message.data or {})

            run_kwargs = json.loads(task_def.input)
            workflow_state = WorkflowState(
                session_id=task_def.session_id,
                task_id=task_def.task_id,
                run_kwargs=run_kwargs,
            )

            async with self.lock:
                self._outstanding_calls[task_def.task_id] = workflow_state
        elif message.action == ActionTypes.SEND_EVENT:
            serializer = JsonSerializer()

            task_def = TaskDefinition(**message.data or {})
            event = serializer.deserialize(task_def.input)
            async with self.lock:
                self._events_buffer[task_def.task_id].put_nowait(event)

        else:
            raise ValueError(f"Unhandled action: {message.action}")

    def as_consumer(self, remote: bool = False) -> BaseMessageQueueConsumer:
        """Get the consumer for the message queue.

        Args:
            remote (bool):
                Whether the consumer is remote. Defaults to False.
                If True, the consumer will be a RemoteMessageConsumer that uses the `process_message` endpoint.
        """
        if remote:
            return RemoteMessageConsumer(
                id_=self.publisher_id,
                url=f"{self.config.url}{self._app.url_path_for('process_message')}",
                message_type=self.service_name,
            )

        return CallableMessageConsumer(
            id_=self.publisher_id,
            message_type=self.service_name,
            handler=self.process_message,
        )

    async def launch_local(self) -> asyncio.Task:
        """Launch the service in-process."""
        logger.info(f"{self.service_name} launch_local")
        return asyncio.create_task(self.processing_loop())

    # ---- Server based methods ----

    @asynccontextmanager
    async def lifespan(self, app: FastAPI) -> AsyncGenerator[None, None]:
        """Starts the processing loop when the fastapi app starts."""
        t = asyncio.create_task(self.processing_loop())

        yield

        t.cancel()
        await t
        self._running = False

    async def home(self) -> Dict[str, str]:
        """Home endpoint. Returns general information about the service."""
        return {
            "service_name": self.service_name,
            "description": self.config.description,
            "running": str(self._running),
            "step_interval": str(self.config.step_interval),
            "num_outstanding_calls": str(len(self._outstanding_calls)),
            "type": "workflow_service",
        }

    async def launch_server(self) -> None:
        """Launch the service as a FastAPI server."""
        host = self.config.internal_host or self.config.host
        port = self.config.internal_port or self.config.port
        logger.info(f"Launching {self.service_name} server at {host}:{port}")

        class CustomServer(uvicorn.Server):
            def install_signal_handlers(self) -> None:
                pass

        cfg = uvicorn.Config(self._app, host=host, port=port)
        server = CustomServer(cfg)

        try:
            await server.serve()
        except asyncio.CancelledError:
            await asyncio.gather(server.shutdown(), return_exceptions=True)

service_definition `property` #

service_definition: ServiceDefinition

Service definition.

message_queue `property` #

message_queue: AbstractMessageQueue

Message queue.

publisher_id `property` #

publisher_id: str

Publisher ID.

publish_callback `property` #

publish_callback: Optional[PublishCallback]

Publish callback, if any.

get_workflow_state `async` #

get_workflow_state(state: WorkflowState) -> Optional[Context]

Load the existing context from the workflow state.

TODO: Support managing the workflow state?

Source code in llama_deploy/services/workflow.py

async def get_workflow_state(self, state: WorkflowState) -> Optional[Context]:
    """Load the existing context from the workflow state.

    TODO: Support managing the workflow state?
    """
    if state.session_id is None:
        return None

    state_dict = await self.get_session_state(state.session_id)
    if state_dict is None:
        return None

    workflow_state_json = state_dict.get(state.session_id, None)

    if workflow_state_json is None:
        return None

    workflow_state = WorkflowState.model_validate_json(workflow_state_json)
    if workflow_state.state is None:
        return None

    context_dict = workflow_state.state
    context_str = json.dumps(context_dict)
    context_hash = hash(context_str + hash_secret)

    if workflow_state.hash is not None and context_hash != workflow_state.hash:
        raise ValueError("Context hash does not match!")

    return Context.from_dict(
        self.workflow,
        workflow_state.state,
        serializer=JsonPickleSerializer(),
    )

set_workflow_state `async` #

set_workflow_state(ctx: Context, current_state: WorkflowState) -> None

Set the workflow state for this session.

Source code in llama_deploy/services/workflow.py

async def set_workflow_state(
    self, ctx: Context, current_state: WorkflowState
) -> None:
    """Set the workflow state for this session."""
    context_dict = ctx.to_dict(serializer=JsonPickleSerializer())
    context_str = json.dumps(context_dict)
    context_hash = hash(context_str + hash_secret)

    workflow_state = WorkflowState(
        hash=context_hash,
        state=context_dict,
        run_kwargs=current_state.run_kwargs,
        session_id=current_state.session_id,
        task_id=current_state.task_id,
    )

    if current_state.session_id is None:
        raise ValueError("Session ID is None! Cannot set workflow state.")

    session_state = await self.get_session_state(current_state.session_id)
    if session_state:
        session_state[current_state.session_id] = workflow_state.model_dump_json()

        # Store the state in the control plane
        await self.update_session_state(current_state.session_id, session_state)

process_call `async` #

process_call(current_call: WorkflowState) -> None

Processes a given task, and writes a response to the message queue.

Handles errors with a generic try/except, and publishes the error message as the result.

Parameters:

Name	Type	Description	Default
`current_call`	`WorkflowState`	The state of the current task, including run_kwargs and other session state.	required

Source code in llama_deploy/services/workflow.py

async def process_call(self, current_call: WorkflowState) -> None:
    """Processes a given task, and writes a response to the message queue.

    Handles errors with a generic try/except, and publishes the error message
    as the result.

    Args:
        current_call (WorkflowState):
            The state of the current task, including run_kwargs and other session state.
    """
    # create send_event background task
    close_send_events = asyncio.Event()
    handler = None

    try:
        # load the state
        ctx = await self.get_workflow_state(current_call)

        # run the workflow
        handler = self.workflow.run(ctx=ctx, **current_call.run_kwargs)

        async def send_events(
            handler: WorkflowHandler, close_event: asyncio.Event
        ) -> None:
            if handler.ctx is None:
                raise ValueError("handler does not have a valid Context.")

            while not close_event.is_set():
                try:
                    event = self._events_buffer[current_call.task_id].get_nowait()
                    handler.ctx.send_event(event)
                except asyncio.QueueEmpty:
                    pass
                await asyncio.sleep(self.config.step_interval)

        _ = asyncio.create_task(send_events(handler, close_send_events))

        index = 0
        async for ev in handler.stream_events():
            # send the event to control plane for client / api server streaming
            logger.debug(f"Publishing event: {ev}")
            await self.message_queue.publish(
                QueueMessage(
                    type=CONTROL_PLANE_MESSAGE_TYPE,
                    action=ActionTypes.TASK_STREAM,
                    data=TaskStream(
                        task_id=current_call.task_id,
                        session_id=current_call.session_id,
                        data=ev.model_dump(),
                        index=index,
                    ).model_dump(),
                ),
                self.get_topic(CONTROL_PLANE_MESSAGE_TYPE),
            )
            index += 1

        final_result = await handler

        # dump the state # dump the state
        await self.set_workflow_state(handler.ctx, current_call)

        logger.info(
            f"Publishing final result: {final_result} to '{self.get_topic(CONTROL_PLANE_MESSAGE_TYPE)}'"
        )
        await self.message_queue.publish(
            QueueMessage(
                type=CONTROL_PLANE_MESSAGE_TYPE,
                action=ActionTypes.COMPLETED_TASK,
                data=TaskResult(
                    task_id=current_call.task_id,
                    history=[],
                    result=str(final_result),
                    data={},
                ).model_dump(),
            ),
            self.get_topic(CONTROL_PLANE_MESSAGE_TYPE),
        )
    except Exception as e:
        if self.config.raise_exceptions:
            raise e

        logger.error(
            f"Encountered error in task {current_call.task_id}! {str(e)}",
            exc_info=True,
        )
        # dump the state
        if handler is not None:
            await self.set_workflow_state(handler.ctx, current_call)

        # return failure
        await self.message_queue.publish(
            QueueMessage(
                type=CONTROL_PLANE_MESSAGE_TYPE,
                action=ActionTypes.COMPLETED_TASK,
                data=TaskResult(
                    task_id=current_call.task_id,
                    history=[],
                    result=str(e),
                    data={},
                ).model_dump(),
            ),
            self.get_topic(CONTROL_PLANE_MESSAGE_TYPE),
        )
    finally:
        # clean up
        close_send_events.set()
        async with self.lock:
            self._outstanding_calls.pop(current_call.task_id, None)
        self._ongoing_tasks.pop(current_call.task_id, None)

manage_tasks `async` #

manage_tasks() -> None

Acts as a manager to process outstanding tasks from a queue.

Limits number of tasks in progress to self.max_concurrent_tasks.

If the number of ongoing tasks is greater than or equal to self.max_concurrent_tasks, they are buffered until there is room to run it.

Source code in llama_deploy/services/workflow.py

async def manage_tasks(self) -> None:
    """Acts as a manager to process outstanding tasks from a queue.

    Limits number of tasks in progress to `self.max_concurrent_tasks`.

    If the number of ongoing tasks is greater than or equal to `self.max_concurrent_tasks`,
    they are buffered until there is room to run it.
    """
    while True:
        if not self._running:
            await asyncio.sleep(self.config.step_interval)
            continue

        # Check for completed tasks
        completed_tasks = [
            task for task in self._ongoing_tasks.values() if task.done()
        ]
        for task in completed_tasks:
            task_id = next(k for k, v in self._ongoing_tasks.items() if v == task)
            self._ongoing_tasks.pop(task_id, None)

        # Start new tasks
        async with self.lock:
            new_calls = [
                (t, c)
                for t, c in self._outstanding_calls.items()
                if t not in self._ongoing_tasks
            ]

        for task_id, current_call in new_calls:
            if len(self._ongoing_tasks) >= self.config.max_concurrent_tasks:
                break
            task = asyncio.create_task(self.process_call(current_call))
            self._ongoing_tasks[task_id] = task

        await asyncio.sleep(0.1)  # Small sleep to prevent busy-waiting

processing_loop `async` #

processing_loop() -> None

The processing loop for the service with non-blocking concurrent task execution.

Source code in llama_deploy/services/workflow.py

async def processing_loop(self) -> None:
    """The processing loop for the service with non-blocking concurrent task execution."""
    logger.info("Processing initiated.")
    try:
        await self.manage_tasks()
    except CancelledError:
        return

process_message `async` #

process_message(message: QueueMessage) -> None

Process a message received from the message queue.

Source code in llama_deploy/services/workflow.py

async def process_message(self, message: QueueMessage) -> None:
    """Process a message received from the message queue."""
    if message.action == ActionTypes.NEW_TASK:
        task_def = TaskDefinition(**message.data or {})

        run_kwargs = json.loads(task_def.input)
        workflow_state = WorkflowState(
            session_id=task_def.session_id,
            task_id=task_def.task_id,
            run_kwargs=run_kwargs,
        )

        async with self.lock:
            self._outstanding_calls[task_def.task_id] = workflow_state
    elif message.action == ActionTypes.SEND_EVENT:
        serializer = JsonSerializer()

        task_def = TaskDefinition(**message.data or {})
        event = serializer.deserialize(task_def.input)
        async with self.lock:
            self._events_buffer[task_def.task_id].put_nowait(event)

    else:
        raise ValueError(f"Unhandled action: {message.action}")

as_consumer #

as_consumer(remote: bool = False) -> BaseMessageQueueConsumer

Get the consumer for the message queue.

Parameters:

Name	Type	Description	Default
`remote`	`bool`	Whether the consumer is remote. Defaults to False. If True, the consumer will be a RemoteMessageConsumer that uses the `process_message` endpoint.	`False`

Source code in llama_deploy/services/workflow.py

def as_consumer(self, remote: bool = False) -> BaseMessageQueueConsumer:
    """Get the consumer for the message queue.

    Args:
        remote (bool):
            Whether the consumer is remote. Defaults to False.
            If True, the consumer will be a RemoteMessageConsumer that uses the `process_message` endpoint.
    """
    if remote:
        return RemoteMessageConsumer(
            id_=self.publisher_id,
            url=f"{self.config.url}{self._app.url_path_for('process_message')}",
            message_type=self.service_name,
        )

    return CallableMessageConsumer(
        id_=self.publisher_id,
        message_type=self.service_name,
        handler=self.process_message,
    )

launch_local `async` #

launch_local() -> Task

Launch the service in-process.

Source code in llama_deploy/services/workflow.py

async def launch_local(self) -> asyncio.Task:
    """Launch the service in-process."""
    logger.info(f"{self.service_name} launch_local")
    return asyncio.create_task(self.processing_loop())

lifespan `async` #

lifespan(app: FastAPI) -> AsyncGenerator[None, None]

Starts the processing loop when the fastapi app starts.

Source code in llama_deploy/services/workflow.py

@asynccontextmanager
async def lifespan(self, app: FastAPI) -> AsyncGenerator[None, None]:
    """Starts the processing loop when the fastapi app starts."""
    t = asyncio.create_task(self.processing_loop())

    yield

    t.cancel()
    await t
    self._running = False

home `async` #

home() -> Dict[str, str]

Home endpoint. Returns general information about the service.

Source code in llama_deploy/services/workflow.py

async def home(self) -> Dict[str, str]:
    """Home endpoint. Returns general information about the service."""
    return {
        "service_name": self.service_name,
        "description": self.config.description,
        "running": str(self._running),
        "step_interval": str(self.config.step_interval),
        "num_outstanding_calls": str(len(self._outstanding_calls)),
        "type": "workflow_service",
    }

launch_server `async` #

launch_server() -> None

Launch the service as a FastAPI server.

Source code in llama_deploy/services/workflow.py

async def launch_server(self) -> None:
    """Launch the service as a FastAPI server."""
    host = self.config.internal_host or self.config.host
    port = self.config.internal_port or self.config.port
    logger.info(f"Launching {self.service_name} server at {host}:{port}")

    class CustomServer(uvicorn.Server):
        def install_signal_handlers(self) -> None:
            pass

    cfg = uvicorn.Config(self._app, host=host, port=port)
    server = CustomServer(cfg)

    try:
        await server.serve()
    except asyncio.CancelledError:
        await asyncio.gather(server.shutdown(), return_exceptions=True)

WorkflowServiceConfig #

Bases: BaseSettings

Workflow service configuration.

Parameters:

Name	Type	Default
`host`	`str`	required
`port`	`int`	required
`internal_host`	`str \| None`	`None`
`internal_port`	`int \| None`	`None`
`service_name`	`str`	`'default_workflow_service'`
`description`	`str`	`'A service that wraps a llama-index workflow.'`
`step_interval`	`float`	`0.1`
`max_concurrent_tasks`	`int`	`8`
`raise_exceptions`	`bool`	`False`
`use_tls`	`bool`	`False`

Source code in llama_deploy/services/workflow.py

class WorkflowServiceConfig(BaseSettings):
    """Workflow service configuration."""

    model_config = SettingsConfigDict(env_prefix="LLAMA_DEPLOY_WF_SERVICE_")

    host: str
    port: int
    internal_host: Optional[str] = None
    internal_port: Optional[int] = None
    service_name: str = "default_workflow_service"
    description: str = "A service that wraps a llama-index workflow."
    step_interval: float = 0.1
    max_concurrent_tasks: int = 8
    raise_exceptions: bool = False
    use_tls: bool = False

    @property
    def url(self) -> str:
        if self.use_tls:
            return f"https://{self.host}:{self.port}"
        return f"http://{self.host}:{self.port}"

services#

BaseService #

service_definition abstractmethod property #

as_consumer abstractmethod #

processing_loop abstractmethod async #

process_message abstractmethod async #

launch_local abstractmethod async #

launch_server abstractmethod async #

register_to_control_plane async #

deregister_from_control_plane async #

get_session_state async #

update_session_state async #

register_to_message_queue async #

WorkflowService #

service_definition property #

message_queue property #

publisher_id property #

publish_callback property #

get_workflow_state async #

set_workflow_state async #

process_call async #

manage_tasks async #

processing_loop async #

process_message async #

as_consumer #

launch_local async #

lifespan async #

home async #

launch_server async #

WorkflowServiceConfig #

`services`#

service_definition `abstractmethod` `property` #

as_consumer `abstractmethod` #

processing_loop `abstractmethod` `async` #

process_message `abstractmethod` `async` #

launch_local `abstractmethod` `async` #

launch_server `abstractmethod` `async` #

register_to_control_plane `async` #

deregister_from_control_plane `async` #

get_session_state `async` #

update_session_state `async` #

register_to_message_queue `async` #

service_definition `property` #

message_queue `property` #

publisher_id `property` #

publish_callback `property` #

get_workflow_state `async` #

set_workflow_state `async` #

process_call `async` #

manage_tasks `async` #

processing_loop `async` #

process_message `async` #

launch_local `async` #

lifespan `async` #

home `async` #

launch_server `async` #