Source code in llama-index-integrations/embeddings/llama-index-embeddings-text-embeddings-inference/llama_index/embeddings/text_embeddings_inference/base.py
classTextEmbeddingsInference(BaseEmbedding):base_url:str=Field(default=DEFAULT_URL,description="Base URL for the text embeddings service.",)query_instruction:Optional[str]=Field(description="Instruction to prepend to query text.")text_instruction:Optional[str]=Field(description="Instruction to prepend to text.")timeout:float=Field(default=60.0,description="Timeout in seconds for the request.",)truncate_text:bool=Field(default=True,description="Whether to truncate text or not when generating embeddings.",)auth_token:Optional[Union[str,Callable[[str],str]]]=Field(default=None,description="Authentication token or authentication token generating function for authenticated requests",)def__init__(self,model_name:str,base_url:str=DEFAULT_URL,text_instruction:Optional[str]=None,query_instruction:Optional[str]=None,embed_batch_size:int=DEFAULT_EMBED_BATCH_SIZE,timeout:float=60.0,truncate_text:bool=True,callback_manager:Optional[CallbackManager]=None,auth_token:Optional[Union[str,Callable[[str],str]]]=None,):super().__init__(base_url=base_url,model_name=model_name,text_instruction=text_instruction,query_instruction=query_instruction,embed_batch_size=embed_batch_size,timeout=timeout,truncate_text=truncate_text,callback_manager=callback_manager,auth_token=auth_token,)@classmethoddefclass_name(cls)->str:return"TextEmbeddingsInference"def_call_api(self,texts:List[str])->List[List[float]]:importhttpxheaders={"Content-Type":"application/json"}ifself.auth_tokenisnotNone:ifcallable(self.auth_token):headers["Authorization"]=self.auth_token(self.base_url)else:headers["Authorization"]=self.auth_tokenjson_data={"inputs":texts,"truncate":self.truncate_text}withhttpx.Client()asclient:response=client.post(f"{self.base_url}/embed",headers=headers,json=json_data,timeout=self.timeout,)returnresponse.json()asyncdef_acall_api(self,texts:List[str])->List[List[float]]:importhttpxheaders={"Content-Type":"application/json"}ifself.auth_tokenisnotNone:ifcallable(self.auth_token):headers["Authorization"]=self.auth_token(self.base_url)else:headers["Authorization"]=self.auth_tokenjson_data={"inputs":texts,"truncate":self.truncate_text}asyncwithhttpx.AsyncClient()asclient:response=awaitclient.post(f"{self.base_url}/embed",headers=headers,json=json_data,timeout=self.timeout,)returnresponse.json()def_get_query_embedding(self,query:str)->List[float]:"""Get query embedding."""query=format_query(query,self.model_name,self.query_instruction)returnself._call_api([query])[0]def_get_text_embedding(self,text:str)->List[float]:"""Get text embedding."""text=format_text(text,self.model_name,self.text_instruction)returnself._call_api([text])[0]def_get_text_embeddings(self,texts:List[str])->List[List[float]]:"""Get text embeddings."""texts=[format_text(text,self.model_name,self.text_instruction)fortextintexts]returnself._call_api(texts)asyncdef_aget_query_embedding(self,query:str)->List[float]:"""Get query embedding async."""query=format_query(query,self.model_name,self.query_instruction)return(awaitself._acall_api([query]))[0]asyncdef_aget_text_embedding(self,text:str)->List[float]:"""Get text embedding async."""text=format_text(text,self.model_name,self.text_instruction)return(awaitself._acall_api([text]))[0]asyncdef_aget_text_embeddings(self,texts:List[str])->List[Embedding]:texts=[format_text(text,self.model_name,self.text_instruction)fortextintexts]returnawaitself._acall_api(texts)