importosfromllama_index.core.llmsimportChatMessagefromllama_index.llms.litellmimportLiteLLM# Set environment variablesos.environ["OPENAI_API_KEY"]="your-openai-api-key"os.environ["COHERE_API_KEY"]="your-cohere-api-key"# Define a chat messagemessage=ChatMessage(role="user",content="Hey! how's it going?")# Initialize LiteLLM with the desired modelllm=LiteLLM(model="gpt-3.5-turbo")# Call the chat method with the messagechat_response=llm.chat([message])# Print the responseprint(chat_response)
Source code in llama-index-integrations/llms/llama-index-llms-litellm/llama_index/llms/litellm/base.py
classLiteLLM(LLM):"""LiteLLM. Examples: `pip install llama-index-llms-litellm` ```python import os from llama_index.core.llms import ChatMessage from llama_index.llms.litellm import LiteLLM # Set environment variables os.environ["OPENAI_API_KEY"] = "your-openai-api-key" os.environ["COHERE_API_KEY"] = "your-cohere-api-key" # Define a chat message message = ChatMessage(role="user", content="Hey! how's it going?") # Initialize LiteLLM with the desired model llm = LiteLLM(model="gpt-3.5-turbo") # Call the chat method with the message chat_response = llm.chat([message]) # Print the response print(chat_response) ``` """model:str=Field(default=DEFAULT_LITELLM_MODEL,description=("The LiteLLM model to use. ""For complete list of providers https://docs.litellm.ai/docs/providers"),)temperature:float=Field(default=DEFAULT_TEMPERATURE,description="The temperature to use during generation.",gte=0.0,lte=1.0,)max_tokens:Optional[int]=Field(description="The maximum number of tokens to generate.",gt=0,)additional_kwargs:Dict[str,Any]=Field(default_factory=dict,description="Additional kwargs for the LLM API.",# for all inputs https://docs.litellm.ai/docs/completion/input)max_retries:int=Field(default=10,description="The maximum number of API retries.")def__init__(self,model:str=DEFAULT_LITELLM_MODEL,temperature:float=DEFAULT_TEMPERATURE,max_tokens:Optional[int]=None,additional_kwargs:Optional[Dict[str,Any]]=None,max_retries:int=10,api_key:Optional[str]=None,api_type:Optional[str]=None,api_base:Optional[str]=None,callback_manager:Optional[CallbackManager]=None,system_prompt:Optional[str]=None,messages_to_prompt:Optional[Callable[[Sequence[ChatMessage]],str]]=None,completion_to_prompt:Optional[Callable[[str],str]]=None,pydantic_program_mode:PydanticProgramMode=PydanticProgramMode.DEFAULT,output_parser:Optional[BaseOutputParser]=None,**kwargs:Any,)->None:if"custom_llm_provider"inkwargs:if(kwargs["custom_llm_provider"]!="ollama"andkwargs["custom_llm_provider"]!="vllm"):# don't check keys for local modelsvalidate_litellm_api_key(api_key,api_type)else:# by default assume it's a hosted endpointvalidate_litellm_api_key(api_key,api_type)additional_kwargs=additional_kwargsor{}ifapi_keyisnotNone:additional_kwargs["api_key"]=api_keyifapi_typeisnotNone:additional_kwargs["api_type"]=api_typeifapi_baseisnotNone:additional_kwargs["api_base"]=api_basesuper().__init__(model=model,temperature=temperature,max_tokens=max_tokens,additional_kwargs=additional_kwargs,max_retries=max_retries,callback_manager=callback_manager,system_prompt=system_prompt,messages_to_prompt=messages_to_prompt,completion_to_prompt=completion_to_prompt,pydantic_program_mode=pydantic_program_mode,output_parser=output_parser,**kwargs,)def_get_model_name(self)->str:model_name=self.modelif"ft-"inmodel_name:# legacy fine-tuningmodel_name=model_name.split(":")[0]elifmodel_name.startswith("ft:"):model_name=model_name.split(":")[1]returnmodel_name@classmethoddefclass_name(cls)->str:return"litellm_llm"@propertydefmetadata(self)->LLMMetadata:returnLLMMetadata(context_window=openai_modelname_to_contextsize(self._get_model_name()),num_output=self.max_tokensor-1,is_chat_model=True,is_function_calling_model=is_function_calling_model(self._get_model_name()),model_name=self.model,)@llm_chat_callback()defchat(self,messages:Sequence[ChatMessage],**kwargs:Any)->ChatResponse:ifself._is_chat_model:chat_fn=self._chatelse:chat_fn=completion_to_chat_decorator(self._complete)returnchat_fn(messages,**kwargs)@llm_chat_callback()defstream_chat(self,messages:Sequence[ChatMessage],**kwargs:Any)->ChatResponseGen:ifself._is_chat_model:stream_chat_fn=self._stream_chatelse:stream_chat_fn=stream_completion_to_chat_decorator(self._stream_complete)returnstream_chat_fn(messages,**kwargs)@llm_completion_callback()defcomplete(self,prompt:str,formatted:bool=False,**kwargs:Any)->CompletionResponse:# litellm assumes all llms are chat llmsifself._is_chat_model:complete_fn=chat_to_completion_decorator(self._chat)else:complete_fn=self._completereturncomplete_fn(prompt,**kwargs)@llm_completion_callback()defstream_complete(self,prompt:str,formatted:bool=False,**kwargs:Any)->CompletionResponseGen:ifself._is_chat_model:stream_complete_fn=stream_chat_to_completion_decorator(self._stream_chat)else:stream_complete_fn=self._stream_completereturnstream_complete_fn(prompt,**kwargs)@propertydef_is_chat_model(self)->bool:# litellm assumes all llms are chat llmsreturnTrue@propertydef_model_kwargs(self)->Dict[str,Any]:base_kwargs={"model":self.model,"temperature":self.temperature,"max_tokens":self.max_tokens,}return{**base_kwargs,**self.additional_kwargs,}def_get_all_kwargs(self,**kwargs:Any)->Dict[str,Any]:return{**self._model_kwargs,**kwargs,}def_chat(self,messages:Sequence[ChatMessage],**kwargs:Any)->ChatResponse:ifnotself._is_chat_model:raiseValueError("This model is not a chat model.")message_dicts=to_openai_message_dicts(messages)all_kwargs=self._get_all_kwargs(**kwargs)if"max_tokens"inall_kwargsandall_kwargs["max_tokens"]isNone:all_kwargs.pop("max_tokens")# don't send max_tokens == None, this throws errors for Non OpenAI providersresponse=completion_with_retry(is_chat_model=self._is_chat_model,max_retries=self.max_retries,messages=message_dicts,stream=False,**all_kwargs,)message_dict=response["choices"][0]["message"]message=from_litellm_message(message_dict)returnChatResponse(message=message,raw=response,additional_kwargs=self._get_response_token_counts(response),)def_stream_chat(self,messages:Sequence[ChatMessage],**kwargs:Any)->ChatResponseGen:ifnotself._is_chat_model:raiseValueError("This model is not a chat model.")message_dicts=to_openai_message_dicts(messages)all_kwargs=self._get_all_kwargs(**kwargs)if"max_tokens"inall_kwargsandall_kwargs["max_tokens"]isNone:all_kwargs.pop("max_tokens")# don't send max_tokens == None, this throws errors for Non OpenAI providersdefgen()->ChatResponseGen:content=""function_call:Optional[dict]=Noneforresponseincompletion_with_retry(is_chat_model=self._is_chat_model,max_retries=self.max_retries,messages=message_dicts,stream=True,**all_kwargs,):delta=response["choices"][0]["delta"]role=delta.get("role","assistant")content_delta=delta.get("content","")or""content+=content_deltafunction_call_delta=delta.get("function_call",None)iffunction_call_deltaisnotNone:iffunction_callisNone:function_call=function_call_delta## ensure we do not add a blank function calliffunction_call.get("function_name","")isNone:delfunction_call["function_name"]else:function_call["arguments"]+=function_call_delta["arguments"]additional_kwargs={}iffunction_callisnotNone:additional_kwargs["function_call"]=function_callyieldChatResponse(message=ChatMessage(role=role,content=content,additional_kwargs=additional_kwargs,),delta=content_delta,raw=response,additional_kwargs=self._get_response_token_counts(response),)returngen()def_complete(self,prompt:str,**kwargs:Any)->CompletionResponse:raiseNotImplementedError("litellm assumes all llms are chat llms.")def_stream_complete(self,prompt:str,**kwargs:Any)->CompletionResponseGen:raiseNotImplementedError("litellm assumes all llms are chat llms.")def_get_max_token_for_prompt(self,prompt:str)->int:try:importtiktokenexceptImportError:raiseImportError("Please install tiktoken to use the max_tokens=None feature.")context_window=self.metadata.context_windowtry:encoding=tiktoken.encoding_for_model(self._get_model_name())exceptKeyError:encoding=encoding=tiktoken.get_encoding("cl100k_base")# default to using cl10k_basetokens=encoding.encode(prompt)max_token=context_window-len(tokens)ifmax_token<=0:raiseValueError(f"The prompt is too long for the model. "f"Please use a prompt that is less than {context_window} tokens.")returnmax_tokendef_get_response_token_counts(self,raw_response:Any)->dict:"""Get the token usage reported by the response."""ifnotisinstance(raw_response,dict):return{}usage=raw_response.get("usage",{})return{"prompt_tokens":usage.get("prompt_tokens",0),"completion_tokens":usage.get("completion_tokens",0),"total_tokens":usage.get("total_tokens",0),}# ===== Async Endpoints =====@llm_chat_callback()asyncdefachat(self,messages:Sequence[ChatMessage],**kwargs:Any,)->ChatResponse:achat_fn:Callable[...,Awaitable[ChatResponse]]ifself._is_chat_model:achat_fn=self._achatelse:achat_fn=acompletion_to_chat_decorator(self._acomplete)returnawaitachat_fn(messages,**kwargs)@llm_chat_callback()asyncdefastream_chat(self,messages:Sequence[ChatMessage],**kwargs:Any,)->ChatResponseAsyncGen:astream_chat_fn:Callable[...,Awaitable[ChatResponseAsyncGen]]ifself._is_chat_model:astream_chat_fn=self._astream_chatelse:astream_chat_fn=astream_completion_to_chat_decorator(self._astream_complete)returnawaitastream_chat_fn(messages,**kwargs)@llm_completion_callback()asyncdefacomplete(self,prompt:str,formatted:bool=False,**kwargs:Any)->CompletionResponse:ifself._is_chat_model:acomplete_fn=achat_to_completion_decorator(self._achat)else:acomplete_fn=self._acompletereturnawaitacomplete_fn(prompt,**kwargs)@llm_completion_callback()asyncdefastream_complete(self,prompt:str,formatted:bool=False,**kwargs:Any)->CompletionResponseAsyncGen:ifself._is_chat_model:astream_complete_fn=astream_chat_to_completion_decorator(self._astream_chat)else:astream_complete_fn=self._astream_completereturnawaitastream_complete_fn(prompt,**kwargs)asyncdef_achat(self,messages:Sequence[ChatMessage],**kwargs:Any)->ChatResponse:ifnotself._is_chat_model:raiseValueError("This model is not a chat model.")message_dicts=to_openai_message_dicts(messages)all_kwargs=self._get_all_kwargs(**kwargs)response=awaitacompletion_with_retry(is_chat_model=self._is_chat_model,max_retries=self.max_retries,messages=message_dicts,stream=False,**all_kwargs,)message_dict=response["choices"][0]["message"]message=from_litellm_message(message_dict)returnChatResponse(message=message,raw=response,additional_kwargs=self._get_response_token_counts(response),)asyncdef_astream_chat(self,messages:Sequence[ChatMessage],**kwargs:Any)->ChatResponseAsyncGen:ifnotself._is_chat_model:raiseValueError("This model is not a chat model.")message_dicts=to_openai_message_dicts(messages)all_kwargs=self._get_all_kwargs(**kwargs)asyncdefgen()->ChatResponseAsyncGen:content=""function_call:Optional[dict]=Noneasyncforresponseinawaitacompletion_with_retry(is_chat_model=self._is_chat_model,max_retries=self.max_retries,messages=message_dicts,stream=True,**all_kwargs,):delta=response["choices"][0]["delta"]role=delta.get("role","assistant")content_delta=delta.get("content","")or""content+=content_deltafunction_call_delta=delta.get("function_call",None)iffunction_call_deltaisnotNone:iffunction_callisNone:function_call=function_call_delta## ensure we do not add a blank function calliffunction_call.get("function_name","")isNone:delfunction_call["function_name"]else:function_call["arguments"]+=function_call_delta["arguments"]additional_kwargs={}iffunction_callisnotNone:additional_kwargs["function_call"]=function_callyieldChatResponse(message=ChatMessage(role=role,content=content,additional_kwargs=additional_kwargs,),delta=content_delta,raw=response,additional_kwargs=self._get_response_token_counts(response),)returngen()asyncdef_acomplete(self,prompt:str,**kwargs:Any)->CompletionResponse:raiseNotImplementedError("litellm assumes all llms are chat llms.")asyncdef_astream_complete(self,prompt:str,**kwargs:Any)->CompletionResponseAsyncGen:raiseNotImplementedError("litellm assumes all llms are chat llms.")