21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116 | class AzureOpenAIMultiModal(AzureOpenAI):
@classmethod
def class_name(cls) -> str:
return "azure_openai_multi_modal_llm"
def _get_multi_modal_chat_message(
self,
prompt: str,
role: str,
image_documents: Sequence[ImageNode],
image_detail: Optional[str] = "low",
**kwargs: Any,
) -> ChatMessage:
chat_msg = ChatMessage(role=role, content=prompt)
if not image_documents:
# if image_documents is empty, return text only chat message
return chat_msg
for image_document in image_documents:
# Create the appropriate ContentBlock depending on the document content
if image_document.image:
chat_msg.blocks.append(
ImageBlock(
image=bytes(image_document.image, encoding="utf-8"),
detail=image_detail,
)
)
elif image_document.image_url:
chat_msg.blocks.append(
ImageBlock(url=image_document.image_url, detail=image_detail)
)
elif image_document.image_path:
chat_msg.blocks.append(
ImageBlock(
path=Path(image_document.image_path),
detail=image_detail,
image_mimetype=image_document.image_mimetype
or image_document.metadata.get("file_type"),
)
)
elif f_path := image_document.metadata.get("file_path"):
chat_msg.blocks.append(
ImageBlock(
path=Path(f_path),
detail=image_detail,
image_mimetype=image_document.metadata.get("file_type"),
)
)
return chat_msg
def complete(
self, prompt: str, image_documents: Sequence[ImageNode], **kwargs: Any
) -> CompletionResponse:
chat_message = self._get_multi_modal_chat_message(
prompt=prompt,
role=MessageRole.USER,
image_documents=image_documents,
)
chat_response = self.chat([chat_message], **kwargs)
return chat_response_to_completion_response(chat_response)
def stream_complete(
self, prompt: str, image_documents: Sequence[ImageNode], **kwargs: Any
) -> CompletionResponseGen:
chat_message = self._get_multi_modal_chat_message(
prompt=prompt,
role=MessageRole.USER,
image_documents=image_documents,
)
chat_response = self.stream_chat([chat_message], **kwargs)
return stream_chat_response_to_completion_response(chat_response)
# ===== Async Endpoints =====
async def acomplete(
self, prompt: str, image_documents: Sequence[ImageNode], **kwargs: Any
) -> CompletionResponse:
chat_message = self._get_multi_modal_chat_message(
prompt=prompt,
role=MessageRole.USER,
image_documents=image_documents,
)
chat_response = await self.achat([chat_message], **kwargs)
return chat_response_to_completion_response(chat_response)
async def astream_complete(
self, prompt: str, image_documents: Sequence[ImageNode], **kwargs: Any
) -> CompletionResponseAsyncGen:
chat_message = self._get_multi_modal_chat_message(
prompt=prompt,
role=MessageRole.USER,
image_documents=image_documents,
)
chat_response = await self.astream_chat([chat_message], **kwargs)
return astream_chat_response_to_completion_response(chat_response)
|