Replicate#

pydantic model llama_index.multi_modal_llms.replicate_multi_modal.ReplicateMultiModal#

Show JSON schema

{
   "title": "ReplicateMultiModal",
   "description": "Multi-Modal LLM interface.",
   "type": "object",
   "properties": {
      "model": {
         "title": "Model",
         "description": "The Multi-Modal model to use from Replicate.",
         "type": "string"
      },
      "temperature": {
         "title": "Temperature",
         "description": "The temperature to use for sampling. Adjusts randomness of outputs, greater than 1 is random and 0 is deterministic.",
         "type": "number"
      },
      "max_new_tokens": {
         "title": "Max New Tokens",
         "description": " The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt",
         "type": "integer"
      },
      "context_window": {
         "title": "Context Window",
         "description": "The maximum number of context tokens for the model.",
         "type": "integer"
      },
      "prompt_key": {
         "title": "Prompt Key",
         "description": "The key to use for the prompt in API calls.",
         "type": "string"
      },
      "image_key": {
         "title": "Image Key",
         "description": "The key to use for the image in API calls.",
         "type": "string"
      },
      "top_p": {
         "title": "Top P",
         "description": "When decoding text, samples from the top p percentage of most likely tokens; lower to ignore less likely tokens.",
         "type": "number"
      },
      "num_beams": {
         "title": "Num Beams",
         "description": "Number of beams for beam search decoding.",
         "type": "integer"
      },
      "repetition_penalty": {
         "title": "Repetition Penalty",
         "description": "Penalty for repeated words in generated text; 1 is no penalty, values greater than 1 discourage repetition, less than 1 encourage it.",
         "type": "number"
      },
      "additional_kwargs": {
         "title": "Additional Kwargs",
         "description": "Additional kwargs for the Replicate API.",
         "type": "object"
      },
      "class_name": {
         "title": "Class Name",
         "type": "string",
         "default": "replicate_multi_modal_llm"
      }
   },
   "required": [
      "model",
      "temperature",
      "max_new_tokens",
      "context_window",
      "prompt_key",
      "image_key",
      "top_p",
      "num_beams",
      "repetition_penalty"
   ]
}

Config

arbitrary_types_allowed: bool = True

Fields

additional_kwargs (Dict[str, Any])
context_window (int)
image_key (str)
max_new_tokens (int)
model (str)
num_beams (int)
prompt_key (str)
repetition_penalty (float)
temperature (float)
top_p (float)

field additional_kwargs: Dict[str, Any] [Optional]#: Additional kwargs for the Replicate API.

field context_window: int [Required]#: The maximum number of context tokens for the model.

field image_key: str [Required]#: The key to use for the image in API calls.

field max_new_tokens: int [Required]#: The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt

field model: str [Required]#: The Multi-Modal model to use from Replicate.

field num_beams: int [Required]#: Number of beams for beam search decoding.

field prompt_key: str [Required]#: The key to use for the prompt in API calls.

field repetition_penalty: float [Required]#: Penalty for repeated words in generated text; 1 is no penalty, values greater than 1 discourage repetition, less than 1 encourage it.

field temperature: float [Required]#: The temperature to use for sampling. Adjusts randomness of outputs, greater than 1 is random and 0 is deterministic.

field top_p: float [Required]#: When decoding text, samples from the top p percentage of most likely tokens; lower to ignore less likely tokens.

async achat(messages: Sequence[ChatMessage], **kwargs: Any) → ChatResponse#: Async chat endpoint for Multi-Modal LLM.

async acomplete(prompt: str, image_documents: Sequence[ImageDocument], **kwargs: Any) → CompletionResponse#: Async completion endpoint for Multi-Modal LLM.

async astream_chat(messages: Sequence[ChatMessage], **kwargs: Any) → AsyncGenerator[ChatResponse, None]#: Async streaming chat endpoint for Multi-Modal LLM.

async astream_complete(prompt: str, image_documents: Sequence[ImageDocument], **kwargs: Any) → AsyncGenerator[CompletionResponse, None]#: Async streaming completion endpoint for Multi-Modal LLM.

chat(messages: Sequence[ChatMessage], **kwargs: Any) → ChatResponse#: Chat endpoint for Multi-Modal LLM.

classmethod class_name() → str#

Get the class name, used as a unique ID in serialization.

This provides a key that makes serialization robust against actual class name changes.

complete(prompt: str, image_documents: Sequence[ImageDocument], **kwargs: Any) → CompletionResponse#: Completion endpoint for Multi-Modal LLM.

stream_chat(messages: Sequence[ChatMessage], **kwargs: Any) → Generator[ChatResponse, None, None]#: Stream chat endpoint for Multi-Modal LLM.

stream_complete(prompt: str, image_documents: Sequence[ImageDocument], **kwargs: Any) → Generator[CompletionResponse, None, None]#: Streaming completion endpoint for Multi-Modal LLM.

property metadata: MultiModalLLMMetadata#: Multi Modal LLM metadata.