Skip to content

Gemini

GeminiMultiModal #

Bases: Gemini

Gemini multimodal.

This class is a thin wrapper around Gemini to support legacy multimodal completion methods.

Source code in llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-gemini/llama_index/multi_modal_llms/gemini/base.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
class GeminiMultiModal(Gemini):
    """
    Gemini multimodal.

    This class is a thin wrapper around Gemini to support legacy multimodal completion methods.
    """

    @classmethod
    def class_name(cls) -> str:
        return "Gemini_MultiModal_LLM"

    def complete(
        self, prompt: str, image_documents: Sequence[ImageNode], **kwargs: Any
    ) -> CompletionResponse:
        documents = []
        for node in image_documents:
            data = node.resolve_image().read()
            doc = ImageDocument(image=data)
            documents.append(doc)
        msg = generate_gemini_multi_modal_chat_message(
            prompt=prompt, role=MessageRole.USER, image_documents=documents
        )
        response = self.chat(messages=[msg], **kwargs)
        return CompletionResponse(text=response.message.content or "")

    async def acomplete(
        self, prompt: str, image_documents: Sequence[ImageNode], **kwargs: Any
    ) -> CompletionResponse:
        documents = []
        for node in image_documents:
            data = node.resolve_image().read()
            doc = ImageDocument(image=data)
            documents.append(doc)
        msg = generate_gemini_multi_modal_chat_message(
            prompt=prompt, role=MessageRole.USER, image_documents=documents
        )
        response = await self.achat(messages=[msg], **kwargs)
        return CompletionResponse(text=response.message.content or "")

    def stream_complete(
        self, prompt: str, image_documents: Sequence[ImageNode], **kwargs: Any
    ) -> CompletionResponseGen:
        documents = []
        for node in image_documents:
            data = node.resolve_image().read()
            doc = ImageDocument(image=data)
            documents.append(doc)

        msg = generate_gemini_multi_modal_chat_message(
            prompt=prompt, role=MessageRole.USER, image_documents=documents
        )

        def gen() -> CompletionResponseGen:
            for s in self.stream_chat(messages=[msg], **kwargs):
                yield CompletionResponse(
                    text=s.message.content or "", delta=s.delta or ""
                )

        return gen()

    async def astream_complete(
        self, prompt: str, image_documents: Sequence[ImageNode], **kwargs: Any
    ) -> CompletionResponseAsyncGen:
        documents = []
        for node in image_documents:
            data = node.resolve_image().read()
            doc = ImageDocument(image=data)
            documents.append(doc)

        msg = generate_gemini_multi_modal_chat_message(
            prompt=prompt, role=MessageRole.USER, image_documents=documents
        )

        async def gen() -> CompletionResponseAsyncGen:
            streaming_handler = await self.astream_chat(messages=[msg], **kwargs)
            async for chunk in streaming_handler:
                yield CompletionResponse(
                    text=chunk.message.content or "", delta=chunk.delta or ""
                )

        return gen()