Skip to content

LlamaIndex

Replicate

Replicate

ReplicateMultiModal #

Bases: Replicate

Source code in

llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-replicate/llama_index/multi_modal_llms/replicate/base.py

@deprecated(
    reason="This package has been deprecated and will no longer be maintained. Please feel free to contribute to multi modal support in llama-index-llms-replicate instead. See Multi Modal LLMs documentation for a complete guide on migration: https://docs.llamaindex.ai/en/stable/understanding/using_llms/using_llms/#multi-modal-llms",
    version="0.3.2",
)
class ReplicateMultiModal(Replicate):
    model: str = Field(description="The Multi-Modal model to use from Replicate.")
    temperature: float = Field(
        description="The temperature to use for sampling. Adjusts randomness of outputs, greater than 1 is random and 0 is deterministic."
    )
    max_new_tokens: int = Field(
        description=" The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt"
    )
    context_window: int = Field(
        description="The maximum number of context tokens for the model."
    )
    prompt_key: str = Field(description="The key to use for the prompt in API calls.")
    image_key: str = Field(description="The key to use for the image in API calls.")
    top_p: float = Field(
        description="When decoding text, samples from the top p percentage of most likely tokens; lower to ignore less likely tokens."
    )
    num_beams: int = Field(description="Number of beams for beam search decoding.")
    repetition_penalty: float = Field(
        description="Penalty for repeated words in generated text; 1 is no penalty, values greater than 1 discourage repetition, less than 1 encourage it."
    )
    additional_kwargs: Dict[str, Any] = Field(
        default_factory=dict, description="Additional kwargs for the Replicate API."
    )

    _messages_to_prompt: Callable = PrivateAttr()
    _completion_to_prompt: Callable = PrivateAttr()

    def __init__(
        self,
        model: str = REPLICATE_MULTI_MODAL_LLM_MODELS["fuyu-8b"],
        temperature: float = 0.75,
        max_new_tokens: int = 512,
        num_input_files: int = 1,
        additional_kwargs: Optional[Dict[str, Any]] = None,
        context_window: int = DEFAULT_CONTEXT_WINDOW,
        prompt_key: str = "prompt",
        image_key: str = "image",
        repetition_penalty: Optional[float] = 1.0,
        num_beams: Optional[int] = 1,
        top_p: Optional[float] = 0.9,
        messages_to_prompt: Optional[Callable] = None,
        completion_to_prompt: Optional[Callable] = None,
        callback_manager: Optional[CallbackManager] = None,
    ) -> None:
        super().__init__(
            model=model,
            temperature=temperature,
            max_new_tokens=max_new_tokens,
            num_input_files=num_input_files,
            repetition_penalty=repetition_penalty,
            num_beams=num_beams,
            top_p=top_p,
            additional_kwargs=additional_kwargs or {},
            context_window=context_window,
            prompt_key=prompt_key,
            image_key=image_key,
            callback_manager=callback_manager,
        )
        self._messages_to_prompt = messages_to_prompt or generic_messages_to_prompt
        self._completion_to_prompt = completion_to_prompt or (lambda x: x)

    @classmethod
    def class_name(cls) -> str:
        return "replicate_multi_modal_llm"

    @property
    def metadata(self) -> MultiModalLLMMetadata:
        """Multi Modal LLM metadata."""
        return MultiModalLLMMetadata(
            context_window=self.context_window,
            num_output=DEFAULT_NUM_OUTPUTS,
            model_name=self.model,
        )

    @property
    def _model_kwargs(self) -> Dict[str, Any]:
        base_kwargs: Dict[str, Any] = {
            "temperature": self.temperature,
            "max_length": self.context_window,
            "max_new_tokens": self.max_new_tokens,
            "num_beams": self.num_beams,
            "repetition_penalty": self.repetition_penalty,
            "top_p": self.top_p,
        }
        return {
            **base_kwargs,
            **self.additional_kwargs,
        }

    def _get_multi_modal_chat_messages(
        self, prompt: str, image_document: Union[ImageBlock, ImageNode], **kwargs: Any
    ) -> Dict[str, Any]:
        if isinstance(image_document, ImageNode):
            image_doc: ImageBlock = image_node_to_image_block(image_document)
        else:
            image_doc = image_document
        if image_doc.path:
            # load local image file and pass file handler to replicate
            try:
                return {
                    self.prompt_key: prompt,
                    self.image_key: open(image_doc.path, "rb"),
                    **self._model_kwargs,
                    **kwargs,
                }
            except FileNotFoundError:
                raise FileNotFoundError(
                    "Could not load local image file. Please check whether the file exists"
                )
        elif image_doc.url:
            # load remote image url and pass file url to replicate
            return {
                self.prompt_key: prompt,
                self.image_key: image_doc.url,
                **self._model_kwargs,
                **kwargs,
            }
        else:
            raise FileNotFoundError(
                "Could not load image file. Please check whether the file exists"
            )

    def complete(
        self,
        prompt: str,
        image_documents: Sequence[Union[ImageNode, ImageBlock]],
        **kwargs: Any,
    ) -> CompletionResponse:
        response_gen = self.stream_complete(prompt, image_documents, **kwargs)
        response_list = list(response_gen)
        final_response = response_list[-1]
        final_response.delta = None
        return final_response

    def stream_complete(
        self,
        prompt: str,
        image_documents: Sequence[Union[ImageNode, ImageBlock]],
        **kwargs: Any,
    ) -> CompletionResponseGen:
        try:
            import replicate
        except ImportError:
            raise ImportError(
                "Could not import replicate library."
                "Please install replicate with `pip install replicate`"
            )

        # TODO: at the current moment, only support uploading one image document
        if len(image_documents) > 1:
            _logger.warning(
                "ReplicateMultiModal currently only supports uploading one image document"
                "we are using the first image document for completion."
            )

        prompt = self._completion_to_prompt(prompt)
        input_dict = self._get_multi_modal_chat_messages(
            # using the first image for single image completion
            prompt,
            image_documents[0],
            **kwargs,
        )
        if self.model not in REPLICATE_MULTI_MODAL_LLM_MODELS.values():
            raise ValueError(
                f"Unknown model {self.model!r}. Please provide a valid Replicate Multi-Modal model name in:"
                f" {', '.join(REPLICATE_MULTI_MODAL_LLM_MODELS.values())}"
            )

        response_iter = replicate.run(self.model, input=input_dict)

        def gen() -> CompletionResponseGen:
            text = ""
            for delta in response_iter:
                text += delta
                yield CompletionResponse(
                    delta=delta,
                    text=text,
                )

        return gen()

    def chat(
        self,
        messages: Sequence[ChatMessage],
        **kwargs: Any,
    ) -> ChatResponse:
        raise NotImplementedError

    def stream_chat(
        self,
        messages: Sequence[ChatMessage],
        **kwargs: Any,
    ) -> ChatResponseGen:
        raise NotImplementedError

    # ===== Async Endpoints =====

    async def acomplete(
        self,
        prompt: str,
        image_documents: Sequence[Union[ImageNode, ImageBlock]],
        **kwargs: Any,
    ) -> CompletionResponse:
        response_gen = self.stream_complete(prompt, image_documents, **kwargs)
        response_list = list(response_gen)
        final_response = response_list[-1]
        final_response.delta = None
        return final_response

    async def astream_complete(
        self,
        prompt: str,
        image_documents: Sequence[Union[ImageNode, ImageBlock]],
        **kwargs: Any,
    ) -> CompletionResponseAsyncGen:
        try:
            import replicate
        except ImportError:
            raise ImportError(
                "Could not import replicate library."
                "Please install replicate with `pip install replicate`"
            )

        # TODO: at the current moment, only support uploading one image document
        if len(image_documents) > 1:
            _logger.warning(
                "ReplicateMultiModal currently only supports uploading one image document"
                "we are using the first image document for completion."
            )

        prompt = self._completion_to_prompt(prompt)
        input_dict = self._get_multi_modal_chat_messages(
            # using the first image for single image completion
            prompt,
            image_documents[0],
            **kwargs,
        )
        if self.model not in REPLICATE_MULTI_MODAL_LLM_MODELS.values():
            raise ValueError(
                f"Unknown model {self.model!r}. Please provide a valid Replicate Multi-Modal model name in:"
                f" {', '.join(REPLICATE_MULTI_MODAL_LLM_MODELS.values())}"
            )

        response_iter = replicate.run(self.model, input=input_dict)

        async def gen() -> CompletionResponseAsyncGen:
            text = ""
            for delta in response_iter:
                text += delta
                yield CompletionResponse(
                    delta=delta,
                    text=text,
                )

        return gen()

    async def achat(
        self,
        messages: Sequence[ChatMessage],
        **kwargs: Any,
    ) -> ChatResponse:
        raise NotImplementedError

    async def astream_chat(
        self,
        messages: Sequence[ChatMessage],
        **kwargs: Any,
    ) -> ChatResponseAsyncGen:
        raise NotImplementedError

metadata `property` #

metadata: MultiModalLLMMetadata

Multi Modal LLM metadata.