classWatsonxLLM(FunctionCallingLLM):""" IBM watsonx.ai large language models. Example: `pip install llama-index-llms-ibm` ```python from llama_index.llms.ibm import WatsonxLLM watsonx_llm = WatsonxLLM( model_id="google/flan-ul2", url="https://us-south.ml.cloud.ibm.com", apikey="*****", project_id="*****", ) ``` """model_id:Optional[str]=Field(default=None,description="Type of model to use.",frozen=True)deployment_id:Optional[str]=Field(default=None,description="Id of deployed model to use.",frozen=True)temperature:Optional[float]=Field(default=None,description="The temperature to use for sampling.",)max_new_tokens:Optional[int]=Field(default=None,description="The maximum number of tokens to generate.",)additional_params:Optional[Dict[str,Any]]=Field(default_factory=None,description="Additional generation params for the watsonx.ai models.",)project_id:Optional[str]=Field(default=None,description="ID of the Watson Studio project.",frozen=True,)space_id:Optional[str]=Field(default=None,description="ID of the Watson Studio space.",frozen=True)url:Optional[SecretStr]=Field(default=None,description="Url to Watson Machine Learning or CPD instance",frozen=True,)apikey:Optional[SecretStr]=Field(default=None,description="Apikey to Watson Machine Learning or CPD instance",frozen=True,)token:Optional[SecretStr]=Field(default=None,description="Token to CPD instance",frozen=True)password:Optional[SecretStr]=Field(default=None,description="Password to CPD instance",frozen=True)username:Optional[SecretStr]=Field(default=None,description="Username to CPD instance",frozen=True)instance_id:Optional[SecretStr]=Field(default=None,description="Instance_id of CPD instance",frozen=True)version:Optional[SecretStr]=Field(default=None,description="Version of CPD instance",frozen=True)verify:Union[str,bool,None]=Field(default=None,description=""" User can pass as verify one of following: the path to a CA_BUNDLE file the path of directory with certificates of trusted CAs True - default path to truststore will be taken False - no verification will be made """,frozen=True,)validate_model:bool=Field(default=True,description="Model id validation",frozen=True)_model:ModelInference=PrivateAttr()_client:Optional[APIClient]=PrivateAttr()_model_info:Optional[Dict[str,Any]]=PrivateAttr()_deployment_info:Optional[Dict[str,Any]]=PrivateAttr()_context_window:Optional[int]=PrivateAttr()_text_generation_params:Dict[str,Any]|None=PrivateAttr()def__init__(self,model_id:Optional[str]=None,deployment_id:Optional[str]=None,temperature:Optional[float]=None,max_new_tokens:Optional[int]=None,additional_params:Optional[Dict[str,Any]]=None,project_id:Optional[str]=None,space_id:Optional[str]=None,url:Optional[str]=None,apikey:Optional[str]=None,token:Optional[str]=None,password:Optional[str]=None,username:Optional[str]=None,instance_id:Optional[str]=None,version:Optional[str]=None,verify:Union[str,bool,None]=None,api_client:Optional[APIClient]=None,validate_model:bool=True,callback_manager:Optional[CallbackManager]=None,**kwargs:Any,)->None:""" Initialize LLM and watsonx.ai ModelInference. """callback_manager=callback_managerorCallbackManager([])additional_params=additional_paramsor{}creds=(resolve_watsonx_credentials(url=url,apikey=apikey,token=token,username=username,password=password,instance_id=instance_id,)ifnotisinstance(api_client,APIClient)else{})super().__init__(model_id=model_id,deployment_id=deployment_id,temperature=temperature,max_new_tokens=max_new_tokens,additional_params=additional_params,project_id=project_id,space_id=space_id,url=creds.get("url"),apikey=creds.get("apikey"),token=creds.get("token"),password=creds.get("password"),username=creds.get("username"),instance_id=creds.get("instance_id"),version=version,verify=verify,_client=api_client,validate_model=validate_model,callback_manager=callback_manager,**kwargs,)self._context_window=kwargs.get("context_window")generation_params={}ifself.temperatureisnotNone:generation_params["temperature"]=self.temperatureifself.max_new_tokensisnotNone:generation_params["max_new_tokens"]=self.max_new_tokensgeneration_params={**generation_params,**additional_params}ifgeneration_params:self._text_generation_params,_=self._split_generation_params(generation_params)else:self._text_generation_params=Noneself._client=api_clientself._model=ModelInference(model_id=model_id,deployment_id=deployment_id,credentials=(Credentials.from_dict({key:value.get_secret_value()ifvalueelseNoneforkey,valueinself._get_credential_kwargs().items()},_verify=self.verify,)ifcredselseNone),params=self._text_generation_params,project_id=self.project_id,space_id=self.space_id,api_client=api_client,validate=validate_model,)self._model_info=Noneself._deployment_info=Nonemodel_config=ConfigDict(protected_namespaces=(),validate_assignment=True)@propertydefmodel_info(self):ifself._model.model_idandself._model_infoisNone:self._model_info=self._model.get_details()returnself._model_info@propertydefdeployment_info(self):ifself._model.deployment_idandself._deployment_infoisNone:self._deployment_info=self._model.get_details()returnself._deployment_info@classmethoddefclass_name(cls)->str:"""Get Class Name."""return"WatsonxLLM"def_get_credential_kwargs(self)->Dict[str,SecretStr|None]:return{"url":self.url,"apikey":self.apikey,"token":self.token,"password":self.password,"username":self.username,"instance_id":self.instance_id,"version":self.version,}@propertydefmetadata(self)->LLMMetadata:ifself.model_id:model_id=self.model_idcontext_window=self.model_info.get("model_limits",{}).get("max_sequence_length")else:model_id=self.deployment_info.get("entity",{}).get("base_model_id")context_window=(self._model._client.foundation_models.get_model_specs(model_id=model_id).get("model_limits",{}).get("max_sequence_length"))returnLLMMetadata(context_window=context_windoworself._context_windoworDEFAULT_CONTEXT_WINDOW,num_output=self.max_new_tokensorDEFAULT_MAX_TOKENS,model_name=model_idorself._model.deployment_id,)@propertydefsample_generation_text_params(self)->Dict[str,Any]:"""Example of Model generation text kwargs that a user can pass to the model."""returnGenTextParamsMetaNames().get_example_values()@propertydefsample_chat_generation_params(self)->Dict[str,Any]:"""Example of Model chat generation kwargs that a user can pass to the model."""returnGenChatParamsMetaNames().get_example_values()def_split_generation_params(self,data:Dict[str,Any])->Tuple[Dict[str,Any]|None,Dict[str,Any]]:params={}kwargs={}sample_generation_kwargs_keys=set(self.sample_generation_text_params.keys())sample_generation_kwargs_keys.add("prompt_variables")forkey,valueindata.items():ifkeyinsample_generation_kwargs_keys:params.update({key:value})else:kwargs.update({key:value})returnparamsifparamselseNone,kwargsdef_split_chat_generation_params(self,data:Dict[str,Any])->Tuple[Dict[str,Any]|None,Dict[str,Any]]:params={}kwargs={}sample_generation_kwargs_keys=set(self.sample_chat_generation_params.keys())forkey,valueindata.items():ifkeyinsample_generation_kwargs_keys:params.update({key:value})else:kwargs.update({key:value})returnparamsifparamselseNone,kwargs@llm_completion_callback()defcomplete(self,prompt:str,formatted:bool=False,**kwargs:Any)->CompletionResponse:params,generation_kwargs=self._split_generation_params(kwargs)if"use_completions"ingeneration_kwargs:delgeneration_kwargs["use_completions"]response=self._model.generate(prompt=prompt,params=self._text_generation_paramsorparams,**generation_kwargs,)returnCompletionResponse(text=self._model._return_guardrails_stats(response).get("generated_text"),raw=response,)@llm_completion_callback()asyncdefacomplete(self,prompt:str,formatted:bool=False,**kwargs:Any)->CompletionResponse:params,generation_kwargs=self._split_generation_params(kwargs)if"use_completions"ingeneration_kwargs:delgeneration_kwargs["use_completions"]response=awaitself._model.agenerate(prompt=prompt,params=self._text_generation_paramsorparams,**generation_kwargs,)returnCompletionResponse(text=self._model._return_guardrails_stats(response).get("generated_text"),raw=response,)@llm_completion_callback()defstream_complete(self,prompt:str,formatted:bool=False,**kwargs:Any)->CompletionResponseGen:params,generation_kwargs=self._split_generation_params(kwargs)stream_response=self._model.generate_text_stream(prompt=prompt,params=self._text_generation_paramsorparams,**generation_kwargs,)defgen()->CompletionResponseGen:content=""ifkwargs.get("raw_response"):forstream_deltainstream_response:stream_delta_text=self._model._return_guardrails_stats(stream_delta).get("generated_text","")content+=stream_delta_textyieldCompletionResponse(text=content,delta=stream_delta_text,raw=stream_delta)else:forstream_deltainstream_response:content+=stream_deltayieldCompletionResponse(text=content,delta=stream_delta)returngen()@llm_completion_callback()asyncdefastream_complete(self,prompt:str,formatted:bool=False,**kwargs:Any)->CompletionResponseAsyncGen:asyncdefgen()->CompletionResponseAsyncGen:formessageinself.stream_complete(prompt,formatted=formatted,**kwargs):yieldmessage# NOTE: convert generator to async generatorreturngen()def_chat(self,messages:Sequence[ChatMessage],**kwargs:Any)->ChatResponse:message_dicts=[to_watsonx_message_dict(message)formessageinmessages]params,generation_kwargs=self._split_chat_generation_params(kwargs)response=self._model.chat(messages=message_dicts,params=params,tools=generation_kwargs.get("tools"),tool_choice=generation_kwargs.get("tool_choice"),tool_choice_option=generation_kwargs.get("tool_choice_option"),)wx_message=response["choices"][0]["message"]message=from_watsonx_message(wx_message)returnChatResponse(message=message,raw=response,)@llm_chat_callback()defchat(self,messages:Sequence[ChatMessage],**kwargs:Any)->ChatResponse:ifkwargs.get("use_completions"):chat_fn=completion_to_chat_decorator(self.complete)else:chat_fn=self._chatreturnchat_fn(messages,**kwargs)asyncdef_achat(self,messages:Sequence[ChatMessage],**kwargs:Any)->ChatResponse:message_dicts=[to_watsonx_message_dict(message)formessageinmessages]params,generation_kwargs=self._split_chat_generation_params(kwargs)response=awaitself._model.achat(messages=message_dicts,params=params,tools=generation_kwargs.get("tools"),tool_choice=generation_kwargs.get("tool_choice"),tool_choice_option=generation_kwargs.get("tool_choice_option"),)wx_message=response["choices"][0]["message"]message=from_watsonx_message(wx_message)returnChatResponse(message=message,raw=response,)@llm_chat_callback()asyncdefachat(self,messages:Sequence[ChatMessage],**kwargs:Any,)->ChatResponse:ifkwargs.get("use_completions"):achat_fn=acompletion_to_chat_decorator(self.acomplete)else:achat_fn=self._achatreturnawaitachat_fn(messages,**kwargs)def_stream_chat(self,messages:Sequence[ChatMessage],**kwargs:Any)->ChatResponseGen:message_dicts=[to_watsonx_message_dict(message)formessageinmessages]params,generation_kwargs=self._split_chat_generation_params(kwargs)stream_response=self._model.chat_stream(messages=message_dicts,params=params,tools=generation_kwargs.get("tools"),tool_choice=generation_kwargs.get("tool_choice"),tool_choice_option=generation_kwargs.get("tool_choice_option"),)defstream_gen()->ChatResponseGen:content=""role=Nonetool_calls=[]forresponseinstream_response:tools_available=Falsedelta=""additional_kwargs={}ifresponse["choices"]:wx_message=response["choices"][0]["delta"]role=wx_message.get("role")orroleorMessageRole.ASSISTANTdelta=wx_message.get("content","")content+=deltaif"tool_calls"inwx_message:tools_available=Trueiftools_available:tool_calls=update_tool_calls(tool_calls,wx_message["tool_calls"])iftool_calls:additional_kwargs["tool_calls"]=tool_callsyieldChatResponse(message=ChatMessage(role=role,content=content,additional_kwargs=additional_kwargs,),delta=delta,raw=response,additional_kwargs=self._get_response_token_counts(response),)returnstream_gen()@llm_chat_callback()defstream_chat(self,messages:Sequence[ChatMessage],**kwargs:Any)->ChatResponseGen:ifkwargs.get("use_completions"):chat_stream_fn=stream_completion_to_chat_decorator(self.stream_complete)else:chat_stream_fn=self._stream_chatreturnchat_stream_fn(messages,**kwargs)@llm_chat_callback()asyncdefastream_chat(self,messages:Sequence[ChatMessage],**kwargs:Any)->ChatResponseAsyncGen:asyncdefgen()->ChatResponseAsyncGen:formessageinself.stream_chat(messages,**kwargs):yieldmessage# NOTE: convert generator to async generatorreturngen()def_prepare_chat_with_tools(self,tools:List["BaseTool"],user_msg:Optional[Union[str,ChatMessage]]=None,chat_history:Optional[List[ChatMessage]]=None,verbose:bool=False,allow_parallel_tool_calls:bool=False,tool_choice:Optional[str]=None,**kwargs:Any,)->Dict[str,Any]:"""Predict and call the tool."""# watsonx uses the same openai tool formattool_specs=[tool.metadata.to_openai_tool()fortoolintools]ifisinstance(user_msg,str):user_msg=ChatMessage(role=MessageRole.USER,content=user_msg)messages=chat_historyor[]ifuser_msg:messages.append(user_msg)chat_with_tools_payload={"messages":messages,"tools":tool_specsorNone,**kwargs,}iftool_choiceisnotNone:chat_with_tools_payload.update({"tool_choice":{"type":"function","function":{"name":tool_choice}}})returnchat_with_tools_payloaddefget_tool_calls_from_response(self,response:ChatResponse,error_on_no_tool_call:bool=True,**kwargs:Any,)->List[ToolSelection]:"""Predict and call the tool."""tool_calls=response.message.additional_kwargs.get("tool_calls",[])iflen(tool_calls)<1:iferror_on_no_tool_call:raiseValueError(f"Expected at least one tool call, but got {len(tool_calls)} tool calls.")else:return[]tool_selections=[]fortool_callintool_calls:ifnotisinstance(tool_call,dict):raiseValueError("Invalid tool_call object")iftool_call.get("type")!="function":raiseValueError("Invalid tool type. Unsupported by watsonx.ai")# this should handle both complete and partial jsonstry:argument_dict=parse_partial_json(tool_call.get("function",{}).get("arguments"))exceptValueError:argument_dict={}tool_selections.append(ToolSelection(tool_id=tool_call.get("id"),tool_name=tool_call.get("function").get("name"),tool_kwargs=argument_dict,))returntool_selectionsdef_get_response_token_counts(self,raw_response:Any)->dict:"""Get the token usage reported by the response."""ifisinstance(raw_response,dict):usage=raw_response.get("usage",{})ifnotusage:return{}prompt_tokens=usage.get("prompt_tokens",0)completion_tokens=usage.get("completion_tokens",0)total_tokens=usage.get("total_tokens",0)else:return{}return{"prompt_tokens":prompt_tokens,"completion_tokens":completion_tokens,"total_tokens":total_tokens,}
defget_tool_calls_from_response(self,response:ChatResponse,error_on_no_tool_call:bool=True,**kwargs:Any,)->List[ToolSelection]:"""Predict and call the tool."""tool_calls=response.message.additional_kwargs.get("tool_calls",[])iflen(tool_calls)<1:iferror_on_no_tool_call:raiseValueError(f"Expected at least one tool call, but got {len(tool_calls)} tool calls.")else:return[]tool_selections=[]fortool_callintool_calls:ifnotisinstance(tool_call,dict):raiseValueError("Invalid tool_call object")iftool_call.get("type")!="function":raiseValueError("Invalid tool type. Unsupported by watsonx.ai")# this should handle both complete and partial jsonstry:argument_dict=parse_partial_json(tool_call.get("function",{}).get("arguments"))exceptValueError:argument_dict={}tool_selections.append(ToolSelection(tool_id=tool_call.get("id"),tool_name=tool_call.get("function").get("name"),tool_kwargs=argument_dict,))returntool_selections