fromllama_index.llms.anthropicimportAnthropicllm=Anthropic(model="claude-instant-1")resp=llm.stream_complete("Paul Graham is ")forrinresp:print(r.delta,end="")
Source code in llama-index-integrations/llms/llama-index-llms-anthropic/llama_index/llms/anthropic/base.py
classAnthropic(FunctionCallingLLM):""" Anthropic LLM. Examples: `pip install llama-index-llms-anthropic` ```python from llama_index.llms.anthropic import Anthropic llm = Anthropic(model="claude-instant-1") resp = llm.stream_complete("Paul Graham is ") for r in resp: print(r.delta, end="") ``` """model:str=Field(default=DEFAULT_ANTHROPIC_MODEL,description="The anthropic model to use.")temperature:float=Field(default=DEFAULT_TEMPERATURE,description="The temperature to use for sampling.",ge=0.0,le=1.0,)max_tokens:int=Field(default=DEFAULT_ANTHROPIC_MAX_TOKENS,description="The maximum number of tokens to generate.",gt=0,)base_url:Optional[str]=Field(default=None,description="The base URL to use.")timeout:Optional[float]=Field(default=None,description="The timeout to use in seconds.",ge=0)max_retries:int=Field(default=10,description="The maximum number of API retries.",ge=0)additional_kwargs:Dict[str,Any]=Field(default_factory=dict,description="Additional kwargs for the anthropic API.")cache_idx:Optional[int]=Field(default=None,description=("Set the cache_control for every message up to and including this index. ""Set to -1 to cache all messages. ""Set to None to disable caching."),)thinking_dict:Optional[Dict[str,Any]]=Field(default=None,description=("Configure thinking controls for the LLM. See the Anthropic API docs for more details. ""For example: thinking_dict={'type': 'enabled', 'budget_tokens': 16000}"),)_client:Union[anthropic.Anthropic,anthropic.AnthropicVertex,anthropic.AnthropicBedrock]=PrivateAttr()_aclient:Union[anthropic.AsyncAnthropic,anthropic.AsyncAnthropicVertex,anthropic.AsyncAnthropicBedrock,]=PrivateAttr()def__init__(self,model:str=DEFAULT_ANTHROPIC_MODEL,temperature:float=DEFAULT_TEMPERATURE,max_tokens:int=DEFAULT_ANTHROPIC_MAX_TOKENS,base_url:Optional[str]=None,timeout:Optional[float]=None,max_retries:int=10,api_key:Optional[str]=None,additional_kwargs:Optional[Dict[str,Any]]=None,callback_manager:Optional[CallbackManager]=None,default_headers:Optional[Dict[str,str]]=None,system_prompt:Optional[str]=None,messages_to_prompt:Optional[Callable[[Sequence[ChatMessage]],str]]=None,completion_to_prompt:Optional[Callable[[str],str]]=None,pydantic_program_mode:PydanticProgramMode=PydanticProgramMode.DEFAULT,output_parser:Optional[BaseOutputParser]=None,region:Optional[str]=None,project_id:Optional[str]=None,aws_region:Optional[str]=None,cache_idx:Optional[int]=None,thinking_dict:Optional[Dict[str,Any]]=None,)->None:additional_kwargs=additional_kwargsor{}callback_manager=callback_managerorCallbackManager([])super().__init__(temperature=temperature,max_tokens=max_tokens,additional_kwargs=additional_kwargs,base_url=base_url,timeout=timeout,max_retries=max_retries,model=model,callback_manager=callback_manager,system_prompt=system_prompt,messages_to_prompt=messages_to_prompt,completion_to_prompt=completion_to_prompt,pydantic_program_mode=pydantic_program_mode,output_parser=output_parser,cache_idx=cache_idx,thinking_dict=thinking_dict,)ifregionandproject_idandnotaws_region:self._client=anthropic.AnthropicVertex(region=region,project_id=project_id,timeout=timeout,max_retries=max_retries,default_headers=default_headers,)self._aclient=anthropic.AsyncAnthropicVertex(region=region,project_id=project_id,timeout=timeout,max_retries=max_retries,default_headers=default_headers,)elifaws_region:# Note: this assumes you have AWS credentials configured.self._client=anthropic.AnthropicBedrock(aws_region=aws_region,)self._aclient=anthropic.AsyncAnthropicBedrock(aws_region=aws_region,)else:self._client=anthropic.Anthropic(api_key=api_key,base_url=base_url,timeout=timeout,max_retries=max_retries,default_headers=default_headers,)self._aclient=anthropic.AsyncAnthropic(api_key=api_key,base_url=base_url,timeout=timeout,max_retries=max_retries,default_headers=default_headers,)@classmethoddefclass_name(cls)->str:return"Anthropic_LLM"@propertydefmetadata(self)->LLMMetadata:returnLLMMetadata(context_window=anthropic_modelname_to_contextsize(self.model),num_output=self.max_tokens,is_chat_model=True,model_name=self.model,is_function_calling_model=is_function_calling_model(self.model),)@propertydeftokenizer(self)->Tokenizer:returnAnthropicTokenizer(self._client,self.model)@propertydef_model_kwargs(self)->Dict[str,Any]:base_kwargs={"model":self.model,"temperature":self.temperature,"max_tokens":self.max_tokens,}return{**base_kwargs,**self.additional_kwargs,}def_get_all_kwargs(self,**kwargs:Any)->Dict[str,Any]:kwargs={**self._model_kwargs,**kwargs,}ifself.thinking_dictand"thinking"notinkwargs:kwargs["thinking"]=self.thinking_dictreturnkwargsdef_get_content_and_tool_calls_and_thinking(self,response:Any)->Tuple[str,List[Dict[str,Any]],Dict[str,Any]]:tool_calls=[]thinking=Nonecontent=""forcontent_blockinresponse.content:ifisinstance(content_block,TextBlock):content+=content_block.text# this assumes a single thinking block, which as of 2025-03-06, is always trueelifisinstance(content_block,ThinkingBlock):thinking=content_block.model_dump()elifisinstance(content_block,ToolUseBlock):tool_calls.append(content_block.model_dump())returncontent,tool_calls,thinking@llm_chat_callback()defchat(self,messages:Sequence[ChatMessage],**kwargs:Any)->ChatResponse:anthropic_messages,system_prompt=messages_to_anthropic_messages(messages,self.cache_idx)all_kwargs=self._get_all_kwargs(**kwargs)response=self._client.messages.create(messages=anthropic_messages,stream=False,system=system_prompt,**all_kwargs,)content,tool_calls,thinking=self._get_content_and_tool_calls_and_thinking(response)returnChatResponse(message=ChatMessage(role=MessageRole.ASSISTANT,content=content,additional_kwargs={"tool_calls":tool_calls,"thinking":thinking},),raw=dict(response),)@llm_completion_callback()defcomplete(self,prompt:str,formatted:bool=False,**kwargs:Any)->CompletionResponse:complete_fn=chat_to_completion_decorator(self.chat)returncomplete_fn(prompt,**kwargs)@llm_chat_callback()defstream_chat(self,messages:Sequence[ChatMessage],**kwargs:Any)->ChatResponseGen:anthropic_messages,system_prompt=messages_to_anthropic_messages(messages,self.cache_idx)all_kwargs=self._get_all_kwargs(**kwargs)response=self._client.messages.create(messages=anthropic_messages,system=system_prompt,stream=True,**all_kwargs)defgen()->ChatResponseGen:content=""content_delta=""thinking=Nonecur_tool_calls:List[ToolUseBlock]=[]cur_tool_call:Optional[ToolUseBlock]=Nonecur_tool_json:str=""role=MessageRole.ASSISTANTforrinresponse:ifisinstance(r,ContentBlockDeltaEvent):ifisinstance(r.delta,TextDelta):content_delta=r.delta.textcontent+=content_deltaelifisinstance(r.delta,SignatureDelta):ifthinkingisNone:thinking=ThinkingBlock(signature=r.delta.signature,thinking="",type="thinking",)else:thinking.signature+=r.delta.signatureelifisinstance(r.delta,ThinkingDelta):ifthinkingisNone:thinking=ThinkingBlock(signature="",thinking=r.delta.thinking,type="thinking",)else:thinking.thinking+=r.delta.thinkingelse:ifnotisinstance(cur_tool_call,ToolUseBlock):raiseValueError("Tool call not started")content_delta=r.delta.partial_jsoncur_tool_json+=content_deltatry:argument_dict=parse_partial_json(cur_tool_json)cur_tool_call.input=argument_dictexceptValueError:passifcur_tool_callisnotNone:tool_calls_to_send=[*cur_tool_calls,cur_tool_call]else:tool_calls_to_send=cur_tool_callsyieldChatResponse(message=ChatMessage(role=role,content=content,additional_kwargs={"tool_calls":[t.dict()fortintool_calls_to_send],"thinking":thinking.model_dump()ifthinkingelseNone,},),delta=content_delta,raw=r,)elifisinstance(r,ContentBlockStartEvent):ifisinstance(r.content_block,ToolUseBlock):cur_tool_call=r.content_blockcur_tool_json=""elifisinstance(r,ContentBlockStopEvent):ifisinstance(cur_tool_call,ToolUseBlock):cur_tool_calls.append(cur_tool_call)returngen()@llm_completion_callback()defstream_complete(self,prompt:str,formatted:bool=False,**kwargs:Any)->CompletionResponseGen:stream_complete_fn=stream_chat_to_completion_decorator(self.stream_chat)returnstream_complete_fn(prompt,**kwargs)@llm_chat_callback()asyncdefachat(self,messages:Sequence[ChatMessage],**kwargs:Any)->ChatResponse:anthropic_messages,system_prompt=messages_to_anthropic_messages(messages,self.cache_idx)all_kwargs=self._get_all_kwargs(**kwargs)response=awaitself._aclient.messages.create(messages=anthropic_messages,system=system_prompt,stream=False,**all_kwargs,)content,tool_calls,thinking=self._get_content_and_tool_calls_and_thinking(response)returnChatResponse(message=ChatMessage(role=MessageRole.ASSISTANT,content=content,additional_kwargs={"tool_calls":tool_calls,"thinking":thinking},),raw=dict(response),)@llm_completion_callback()asyncdefacomplete(self,prompt:str,formatted:bool=False,**kwargs:Any)->CompletionResponse:acomplete_fn=achat_to_completion_decorator(self.achat)returnawaitacomplete_fn(prompt,**kwargs)@llm_chat_callback()asyncdefastream_chat(self,messages:Sequence[ChatMessage],**kwargs:Any)->ChatResponseAsyncGen:anthropic_messages,system_prompt=messages_to_anthropic_messages(messages,self.cache_idx)all_kwargs=self._get_all_kwargs(**kwargs)response=awaitself._aclient.messages.create(messages=anthropic_messages,system=system_prompt,stream=True,**all_kwargs)asyncdefgen()->ChatResponseAsyncGen:content=""content_delta=""thinking=Nonecur_tool_calls:List[ToolUseBlock]=[]cur_tool_call:Optional[ToolUseBlock]=Nonecur_tool_json:str=""role=MessageRole.ASSISTANTasyncforrinresponse:ifisinstance(r,ContentBlockDeltaEvent):ifisinstance(r.delta,TextDelta):content_delta=r.delta.textcontent+=content_deltaelifisinstance(r.delta,SignatureDelta):ifthinkingisNone:thinking=ThinkingBlock(signature=r.delta.signature,thinking="",type="thinking",)else:thinking.signature+=r.delta.signatureelifisinstance(r.delta,ThinkingDelta):ifthinkingisNone:thinking=ThinkingBlock(signature="",thinking=r.delta.thinking,type="thinking",)else:thinking.thinking+=r.delta.thinkingelse:ifnotisinstance(cur_tool_call,ToolUseBlock):raiseValueError("Tool call not started")content_delta=r.delta.partial_jsoncur_tool_json+=content_deltatry:argument_dict=parse_partial_json(cur_tool_json)cur_tool_call.input=argument_dictexceptValueError:passifcur_tool_callisnotNone:tool_calls_to_send=[*cur_tool_calls,cur_tool_call]else:tool_calls_to_send=cur_tool_callsyieldChatResponse(message=ChatMessage(role=role,content=content,additional_kwargs={"tool_calls":[t.dict()fortintool_calls_to_send],"thinking":thinking.model_dump()ifthinkingelseNone,},),delta=content_delta,raw=r,)elifisinstance(r,ContentBlockStartEvent):ifisinstance(r.content_block,ToolUseBlock):cur_tool_call=r.content_blockcur_tool_json=""elifisinstance(r,ContentBlockStopEvent):ifisinstance(cur_tool_call,ToolUseBlock):cur_tool_calls.append(cur_tool_call)returngen()@llm_completion_callback()asyncdefastream_complete(self,prompt:str,formatted:bool=False,**kwargs:Any)->CompletionResponseAsyncGen:astream_complete_fn=astream_chat_to_completion_decorator(self.astream_chat)returnawaitastream_complete_fn(prompt,**kwargs)def_prepare_chat_with_tools(self,tools:List["BaseTool"],user_msg:Optional[Union[str,ChatMessage]]=None,chat_history:Optional[List[ChatMessage]]=None,verbose:bool=False,allow_parallel_tool_calls:bool=False,**kwargs:Any,)->Dict[str,Any]:"""Prepare the chat with tools."""chat_history=chat_historyor[]ifisinstance(user_msg,str):user_msg=ChatMessage(role=MessageRole.USER,content=user_msg)chat_history.append(user_msg)tool_dicts=[]iftools:fortoolintools:tool_dicts.append({"name":tool.metadata.name,"description":tool.metadata.description,"input_schema":tool.metadata.get_parameters_dict(),})if"prompt-caching"inkwargs.get("extra_headers",{}).get("anthropic-beta",""):tool_dicts[-1]["cache_control"]={"type":"ephemeral"}return{"messages":chat_history,"tools":tool_dicts,**kwargs}def_validate_chat_with_tools_response(self,response:ChatResponse,tools:List["BaseTool"],allow_parallel_tool_calls:bool=False,**kwargs:Any,)->ChatResponse:"""Validate the response from chat_with_tools."""ifnotallow_parallel_tool_calls:force_single_tool_call(response)returnresponsedefget_tool_calls_from_response(self,response:"ChatResponse",error_on_no_tool_call:bool=True,**kwargs:Any,)->List[ToolSelection]:"""Predict and call the tool."""tool_calls=response.message.additional_kwargs.get("tool_calls",[])iflen(tool_calls)<1:iferror_on_no_tool_call:raiseValueError(f"Expected at least one tool call, but got {len(tool_calls)} tool calls.")else:return[]tool_selections=[]fortool_callintool_calls:if("input"notintool_callor"id"notintool_callor"name"notintool_call):raiseValueError("Invalid tool call.")iftool_call["type"]!="tool_use":raiseValueError("Invalid tool type. Unsupported by Anthropic")argument_dict=(json.loads(tool_call["input"])ifisinstance(tool_call["input"],str)elsetool_call["input"])tool_selections.append(ToolSelection(tool_id=tool_call["id"],tool_name=tool_call["name"],tool_kwargs=argument_dict,))returntool_selections
defget_tool_calls_from_response(self,response:"ChatResponse",error_on_no_tool_call:bool=True,**kwargs:Any,)->List[ToolSelection]:"""Predict and call the tool."""tool_calls=response.message.additional_kwargs.get("tool_calls",[])iflen(tool_calls)<1:iferror_on_no_tool_call:raiseValueError(f"Expected at least one tool call, but got {len(tool_calls)} tool calls.")else:return[]tool_selections=[]fortool_callintool_calls:if("input"notintool_callor"id"notintool_callor"name"notintool_call):raiseValueError("Invalid tool call.")iftool_call["type"]!="tool_use":raiseValueError("Invalid tool type. Unsupported by Anthropic")argument_dict=(json.loads(tool_call["input"])ifisinstance(tool_call["input"],str)elsetool_call["input"])tool_selections.append(ToolSelection(tool_id=tool_call["id"],tool_name=tool_call["name"],tool_kwargs=argument_dict,))returntool_selections