Skip to content

Index

MultiModalLLMMetadata #

Bases: BaseModel

Parameters:

Name Type Description Default
context_window int | None

Total number of tokens the model can be input when generating a response.

3900
num_output int | None

Number of tokens the model can output when generating a response.

256
num_input_files int | None

Number of input files the model can take when generating a response.

10
is_function_calling_model bool | None

Set True if the model supports function calling messages, similar to OpenAI's function calling API. For example, converting 'Email Anya to see if she wants to get coffee next Friday' to a function call like send_email(to: string, body: string).

False
model_name str

The model's name used for logging, testing, and sanity checking. For some models this can be automatically discerned. For other models, like locally loaded models, this must be manually specified.

'unknown'
is_chat_model bool

Set True if the model exposes a chat interface (i.e. can be passed a sequence of messages, rather than text), like OpenAI's /v1/chat/completions endpoint.

False
Source code in llama-index-core/llama_index/core/multi_modal_llms/base.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
class MultiModalLLMMetadata(BaseModel):
    model_config = ConfigDict(protected_namespaces=("pydantic_model_",))
    context_window: Optional[int] = Field(
        default=DEFAULT_CONTEXT_WINDOW,
        description=(
            "Total number of tokens the model can be input when generating a response."
        ),
    )
    num_output: Optional[int] = Field(
        default=DEFAULT_NUM_OUTPUTS,
        description="Number of tokens the model can output when generating a response.",
    )
    num_input_files: Optional[int] = Field(
        default=DEFAULT_NUM_INPUT_FILES,
        description="Number of input files the model can take when generating a response.",
    )
    is_function_calling_model: Optional[bool] = Field(
        default=False,
        # SEE: https://openai.com/blog/function-calling-and-other-api-updates
        description=(
            "Set True if the model supports function calling messages, similar to"
            " OpenAI's function calling API. For example, converting 'Email Anya to"
            " see if she wants to get coffee next Friday' to a function call like"
            " `send_email(to: string, body: string)`."
        ),
    )
    model_name: str = Field(
        default="unknown",
        description=(
            "The model's name used for logging, testing, and sanity checking. For some"
            " models this can be automatically discerned. For other models, like"
            " locally loaded models, this must be manually specified."
        ),
    )

    is_chat_model: bool = Field(
        default=False,
        description=(
            "Set True if the model exposes a chat interface (i.e. can be passed a"
            " sequence of messages, rather than text), like OpenAI's"
            " /v1/chat/completions endpoint."
        ),
    )

MultiModalLLM #

Bases: BaseComponent, DispatcherSpanMixin

Multi-Modal LLM interface.

Parameters:

Name Type Description Default
callback_manager CallbackManager

Callback manager that handles callbacks for events within LlamaIndex.

The callback manager provides a way to call handlers on event starts/ends.

Additionally, the callback manager traces the current stack of events. It does this by using a few key attributes. - trace_stack - The current stack of events that have not ended yet. When an event ends, it's removed from the stack. Since this is a contextvar, it is unique to each thread/task. - trace_map - A mapping of event ids to their children events. On the start of events, the bottom of the trace stack is used as the current parent event for the trace map. - trace_id - A simple name for the current trace, usually denoting the entrypoint (query, index_construction, insert, etc.)

Args: handlers (List[BaseCallbackHandler]): list of handlers to use.

Usage: with callback_manager.event(CBEventType.QUERY) as event: event.on_start(payload={key, val}) ... event.on_end(payload={key, val})

<dynamic>
Source code in llama-index-core/llama_index/core/multi_modal_llms/base.py
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
class MultiModalLLM(BaseComponent, DispatcherSpanMixin):
    """Multi-Modal LLM interface."""

    model_config = ConfigDict(arbitrary_types_allowed=True)
    callback_manager: CallbackManager = Field(
        default_factory=CallbackManager, exclude=True
    )

    def __init__(self, *args: Any, **kwargs: Any) -> None:
        # Help static checkers understand this class hierarchy
        super().__init__(*args, **kwargs)

    @property
    @abstractmethod
    def metadata(self) -> MultiModalLLMMetadata:
        """Multi-Modal LLM metadata."""

    @abstractmethod
    def complete(
        self,
        prompt: str,
        image_documents: List[Union[ImageNode, ImageBlock]],
        **kwargs: Any,
    ) -> CompletionResponse:
        """Completion endpoint for Multi-Modal LLM."""

    @abstractmethod
    def stream_complete(
        self,
        prompt: str,
        image_documents: List[Union[ImageNode, ImageBlock]],
        **kwargs: Any,
    ) -> CompletionResponseGen:
        """Streaming completion endpoint for Multi-Modal LLM."""

    @abstractmethod
    def chat(
        self,
        messages: Sequence[ChatMessage],
        **kwargs: Any,
    ) -> ChatResponse:
        """Chat endpoint for Multi-Modal LLM."""

    @abstractmethod
    def stream_chat(
        self,
        messages: Sequence[ChatMessage],
        **kwargs: Any,
    ) -> ChatResponseGen:
        """Stream chat endpoint for Multi-Modal LLM."""

    # ===== Async Endpoints =====

    @abstractmethod
    async def acomplete(
        self,
        prompt: str,
        image_documents: List[Union[ImageNode, ImageBlock]],
        **kwargs: Any,
    ) -> CompletionResponse:
        """Async completion endpoint for Multi-Modal LLM."""

    @abstractmethod
    async def astream_complete(
        self,
        prompt: str,
        image_documents: List[Union[ImageNode, ImageBlock]],
        **kwargs: Any,
    ) -> CompletionResponseAsyncGen:
        """Async streaming completion endpoint for Multi-Modal LLM."""

    @abstractmethod
    async def achat(
        self,
        messages: Sequence[ChatMessage],
        **kwargs: Any,
    ) -> ChatResponse:
        """Async chat endpoint for Multi-Modal LLM."""

    @abstractmethod
    async def astream_chat(
        self,
        messages: Sequence[ChatMessage],
        **kwargs: Any,
    ) -> ChatResponseAsyncGen:
        """Async streaming chat endpoint for Multi-Modal LLM."""

    def __init_subclass__(cls, **kwargs: Any) -> None:
        """
        The callback decorators installs events, so they must be applied before
        the span decorators, otherwise the spans wouldn't contain the events.
        """
        for attr in (
            "complete",
            "acomplete",
            "stream_complete",
            "astream_complete",
            "chat",
            "achat",
            "stream_chat",
            "astream_chat",
        ):
            if callable(method := cls.__dict__.get(attr)):
                if attr.endswith("chat"):
                    setattr(cls, attr, llm_chat_callback()(method))
                else:
                    setattr(cls, attr, llm_completion_callback()(method))
        super().__init_subclass__(**kwargs)

metadata abstractmethod property #

Multi-Modal LLM metadata.

complete abstractmethod #

complete(prompt: str, image_documents: List[Union[ImageNode, ImageBlock]], **kwargs: Any) -> CompletionResponse

Completion endpoint for Multi-Modal LLM.

Source code in llama-index-core/llama_index/core/multi_modal_llms/base.py
92
93
94
95
96
97
98
99
@abstractmethod
def complete(
    self,
    prompt: str,
    image_documents: List[Union[ImageNode, ImageBlock]],
    **kwargs: Any,
) -> CompletionResponse:
    """Completion endpoint for Multi-Modal LLM."""

stream_complete abstractmethod #

stream_complete(prompt: str, image_documents: List[Union[ImageNode, ImageBlock]], **kwargs: Any) -> CompletionResponseGen

Streaming completion endpoint for Multi-Modal LLM.

Source code in llama-index-core/llama_index/core/multi_modal_llms/base.py
101
102
103
104
105
106
107
108
@abstractmethod
def stream_complete(
    self,
    prompt: str,
    image_documents: List[Union[ImageNode, ImageBlock]],
    **kwargs: Any,
) -> CompletionResponseGen:
    """Streaming completion endpoint for Multi-Modal LLM."""

chat abstractmethod #

chat(messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse

Chat endpoint for Multi-Modal LLM.

Source code in llama-index-core/llama_index/core/multi_modal_llms/base.py
110
111
112
113
114
115
116
@abstractmethod
def chat(
    self,
    messages: Sequence[ChatMessage],
    **kwargs: Any,
) -> ChatResponse:
    """Chat endpoint for Multi-Modal LLM."""

stream_chat abstractmethod #

stream_chat(messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponseGen

Stream chat endpoint for Multi-Modal LLM.

Source code in llama-index-core/llama_index/core/multi_modal_llms/base.py
118
119
120
121
122
123
124
@abstractmethod
def stream_chat(
    self,
    messages: Sequence[ChatMessage],
    **kwargs: Any,
) -> ChatResponseGen:
    """Stream chat endpoint for Multi-Modal LLM."""

acomplete abstractmethod async #

acomplete(prompt: str, image_documents: List[Union[ImageNode, ImageBlock]], **kwargs: Any) -> CompletionResponse

Async completion endpoint for Multi-Modal LLM.

Source code in llama-index-core/llama_index/core/multi_modal_llms/base.py
128
129
130
131
132
133
134
135
@abstractmethod
async def acomplete(
    self,
    prompt: str,
    image_documents: List[Union[ImageNode, ImageBlock]],
    **kwargs: Any,
) -> CompletionResponse:
    """Async completion endpoint for Multi-Modal LLM."""

astream_complete abstractmethod async #

astream_complete(prompt: str, image_documents: List[Union[ImageNode, ImageBlock]], **kwargs: Any) -> CompletionResponseAsyncGen

Async streaming completion endpoint for Multi-Modal LLM.

Source code in llama-index-core/llama_index/core/multi_modal_llms/base.py
137
138
139
140
141
142
143
144
@abstractmethod
async def astream_complete(
    self,
    prompt: str,
    image_documents: List[Union[ImageNode, ImageBlock]],
    **kwargs: Any,
) -> CompletionResponseAsyncGen:
    """Async streaming completion endpoint for Multi-Modal LLM."""

achat abstractmethod async #

achat(messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse

Async chat endpoint for Multi-Modal LLM.

Source code in llama-index-core/llama_index/core/multi_modal_llms/base.py
146
147
148
149
150
151
152
@abstractmethod
async def achat(
    self,
    messages: Sequence[ChatMessage],
    **kwargs: Any,
) -> ChatResponse:
    """Async chat endpoint for Multi-Modal LLM."""

astream_chat abstractmethod async #

astream_chat(messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponseAsyncGen

Async streaming chat endpoint for Multi-Modal LLM.

Source code in llama-index-core/llama_index/core/multi_modal_llms/base.py
154
155
156
157
158
159
160
@abstractmethod
async def astream_chat(
    self,
    messages: Sequence[ChatMessage],
    **kwargs: Any,
) -> ChatResponseAsyncGen:
    """Async streaming chat endpoint for Multi-Modal LLM."""