Source code in llama-index-integrations/embeddings/llama-index-embeddings-huggingface-optimum-intel/llama_index/embeddings/huggingface_optimum_intel/base.py
classIntelEmbedding(BaseEmbedding):folder_name:str=Field(description="Folder name to load from.")max_length:int=Field(description="Maximum length of input.")pooling:str=Field(description="Pooling strategy. One of ['cls', 'mean'].")normalize:str=Field(default=True,description="Normalize embeddings or not.")query_instruction:Optional[str]=Field(description="Instruction to prepend to query text.")text_instruction:Optional[str]=Field(description="Instruction to prepend to text.")cache_folder:Optional[str]=Field(description="Cache folder for huggingface files.")_model:Any=PrivateAttr()_tokenizer:Any=PrivateAttr()_device:Any=PrivateAttr()def__init__(self,folder_name:str,pooling:str="cls",max_length:Optional[int]=None,normalize:bool=True,query_instruction:Optional[str]=None,text_instruction:Optional[str]=None,model:Optional[Any]=None,tokenizer:Optional[Any]=None,embed_batch_size:int=DEFAULT_EMBED_BATCH_SIZE,callback_manager:Optional[CallbackManager]=None,device:Optional[str]=None,):try:fromoptimum.intelimportIPEXModelexceptImportError:raiseImportError("Optimum-Intel requires the following dependencies; please install with ""`pip install optimum[exporters] ""optimum-intel neural-compressor intel_extension_for_pytorch`")self._model=modelorIPEXModel.from_pretrained(folder_name)self._tokenizer=tokenizerorAutoTokenizer.from_pretrained(folder_name)self._device=deviceorinfer_torch_device()ifmax_lengthisNone:try:max_length=int(self._model.config.max_position_embeddings)exceptException:raiseValueError("Unable to find max_length from model config. ""Please provide max_length.")ifpoolingnotin["cls","mean"]:raiseValueError(f"Pooling {pooling} not supported.")super().__init__(embed_batch_size=embed_batch_size,callback_manager=callback_manager,folder_name=folder_name,max_length=max_length,pooling=pooling,normalize=normalize,query_instruction=query_instruction,text_instruction=text_instruction,)@classmethoddefclass_name(cls)->str:return"IntelEmbedding"def_mean_pooling(self,model_output:Any,attention_mask:Any)->Any:"""Mean Pooling - Take attention mask into account for correct averaging."""importtorch# First element of model_output contains all token embeddingstoken_embeddings=model_output[0]input_mask_expanded=(attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float())returntorch.sum(token_embeddings*input_mask_expanded,1)/torch.clamp(input_mask_expanded.sum(1),min=1e-9)def_cls_pooling(self,model_output:list)->Any:"""Use the CLS token as the pooling token."""ifisinstance(model_output,dict):token_embeddings=model_output["last_hidden_state"]else:token_embeddings=model_output[0]returntoken_embeddings[:,0]def_embed(self,sentences:List[str])->List[List[float]]:"""Embed sentences."""encoded_input=self._tokenizer(sentences,padding=True,max_length=self.max_length,truncation=True,return_tensors="pt",)importtorchwithtorch.inference_mode(),torch.cpu.amp.autocast():model_output=self._model(**encoded_input)ifself.pooling=="cls":embeddings=self._cls_pooling(model_output)else:embeddings=self._mean_pooling(model_output,encoded_input["attention_mask"].to(self._device))ifself.normalize:embeddings=torch.nn.functional.normalize(embeddings,p=2,dim=1)returnembeddings.tolist()def_get_query_embedding(self,query:str)->List[float]:"""Get query embedding."""query=format_query(query,self.model_name,self.query_instruction)returnself._embed([query])[0]asyncdef_aget_query_embedding(self,query:str)->List[float]:"""Get query embedding async."""returnself._get_query_embedding(query)asyncdef_aget_text_embedding(self,text:str)->List[float]:"""Get text embedding async."""returnself._get_text_embedding(text)def_get_text_embedding(self,text:str)->List[float]:"""Get text embedding."""text=format_text(text,self.model_name,self.text_instruction)returnself._embed([text])[0]def_get_text_embeddings(self,texts:List[str])->List[List[float]]:"""Get text embeddings."""texts=[format_text(text,self.model_name,self.text_instruction)fortextintexts]returnself._embed(texts)