classFirestoreVectorStore(BasePydanticVectorStore):"""Firestore Vector Store."""stores_text:bool=Trueflat_metadata:bool=Truecollection_name:strbatch_size:Optional[int]=DEFAULT_BATCH_SIZEembedding_key:str="embedding"text_key:str="text"metadata_key:str="metadata"distance_strategy:DistanceMeasure=DistanceMeasure.COSINE_client:Clientdef__init__(self,client:Optional[Client]=None,**kwargs:Any,)->None:"""Initialize params."""super().__init__(**kwargs)object.__setattr__(self,"_client",client_with_user_agent(client))@classmethoddefclass_name(cls)->str:return"FirestoreVectorStore"@propertydefclient(self)->Any:returnself._clientdefadd(self,nodes:List[BaseNode],)->List[str]:"""Add nodes to vector store."""ids=[]entries=[]fornodeinnodes:node_id=node.node_idmetadata=node_to_metadata_dict(node,remove_text=notself.stores_text,flat_metadata=self.flat_metadata,)entry={self.embedding_key:node.get_embedding(),self.metadata_key:metadata,}ids.append(node_id)entries.append(entry)self._upsert_batch(entries,ids)returnidsdefdelete(self,ref_doc_id:str,**delete_kwargs:Any)->None:"""Delete nodes using with ref_doc_id."""docs=(self._client.collection(self.collection_name).where("metadata.ref_doc_id","==",ref_doc_id).stream())self._delete_batch([doc.idfordocindocs])defquery(self,query:VectorStoreQuery,**kwargs:Any)->VectorStoreQueryResult:"""Query vector store."""ifquery.query_embeddingisNone:raiseValueError("Query embedding is required.")filters=_to_firestore_filter(query.filters)ifquery.filterselseNoneresults=self._similarity_search(query.query_embedding,query.similarity_top_k,filters=filters,**kwargs)top_k_ids=[]top_k_nodes=[]top_k_similarities=[]LOGGER.debug(f"Found {len(results)} results.")forresultinresults:# Convert the Firestore document to dictresult_dict=result.to_dict()or{}metadata=result_dict.get(self.metadata_key)or{}fir_vec:Optional[Vector]=result_dict.get(self.embedding_key)iffir_vecisNone:raiseValueError("Embedding is missing in Firestore document.",result.id)embedding=list(fir_vec.to_map_value()["value"])# Convert metadata to node, and add text if availablenode=metadata_dict_to_node(metadata,text=result_dict.get(self.text_key))# Keep track of the top k ids and nodestop_k_ids.append(result.id)top_k_nodes.append(node)top_k_similarities.append(similarity(query.query_embedding,embedding,self._distance_to_similarity_mode(self.distance_strategy),))returnVectorStoreQueryResult(nodes=top_k_nodes,ids=top_k_ids,similarities=top_k_similarities)def_distance_to_similarity_mode(self,distance:DistanceMeasure)->SimilarityMode:"""Convert Firestore's distance measure to similarity mode."""return{DistanceMeasure.COSINE:SimilarityMode.DEFAULT,DistanceMeasure.EUCLIDEAN:SimilarityMode.EUCLIDEAN,DistanceMeasure.DOT_PRODUCT:SimilarityMode.DOT_PRODUCT,}.get(distance,SimilarityMode.DEFAULT)def_delete_batch(self,ids:List[str])->None:"""Delete batch of vectors from Firestore."""db_batch=self._client.batch()forbatchinmore_itertools.chunked(ids,DEFAULT_BATCH_SIZE):fordoc_idinbatch:doc=self._client.collection(self.collection_name).document(doc_id)db_batch.delete(doc)db_batch.commit()def_upsert_batch(self,entries:List[dict],ids:Optional[List[str]])->None:"""Upsert batch of vectors to Firestore."""ifidsandlen(ids)!=len(entries):raiseValueError("Length of ids and entries should be the same.")db_batch=self._client.batch()forbatchinmore_itertools.chunked(entries,DEFAULT_BATCH_SIZE):fori,entryinenumerate(batch):# Convert the embedding array to a Firestore Vectorentry[self.embedding_key]=Vector(entry[self.embedding_key])doc=self._client.collection(self.collection_name).document(ids[i]ifidselseNone)db_batch.set(doc,entry,merge=True)db_batch.commit()def_similarity_search(self,query:List[float],k:int,filters:Union[BaseFilter,BaseCompositeFilter,None]=None,)->List[DocumentSnapshot]:wfilters=Nonecollection=self._client.collection(self.collection_name)iffilters:wfilters=collection.where(filter=filters)results=(wfiltersorcollection).find_nearest(vector_field=self.embedding_key,query_vector=Vector(query),distance_measure=self.distance_strategy,limit=k,)returnresults.get()
defadd(self,nodes:List[BaseNode],)->List[str]:"""Add nodes to vector store."""ids=[]entries=[]fornodeinnodes:node_id=node.node_idmetadata=node_to_metadata_dict(node,remove_text=notself.stores_text,flat_metadata=self.flat_metadata,)entry={self.embedding_key:node.get_embedding(),self.metadata_key:metadata,}ids.append(node_id)entries.append(entry)self._upsert_batch(entries,ids)returnids
Source code in llama-index-integrations/vector_stores/llama-index-vector-stores-firestore/llama_index/vector_stores/firestore/base.py
142143144145146147148149150
defdelete(self,ref_doc_id:str,**delete_kwargs:Any)->None:"""Delete nodes using with ref_doc_id."""docs=(self._client.collection(self.collection_name).where("metadata.ref_doc_id","==",ref_doc_id).stream())self._delete_batch([doc.idfordocindocs])
defquery(self,query:VectorStoreQuery,**kwargs:Any)->VectorStoreQueryResult:"""Query vector store."""ifquery.query_embeddingisNone:raiseValueError("Query embedding is required.")filters=_to_firestore_filter(query.filters)ifquery.filterselseNoneresults=self._similarity_search(query.query_embedding,query.similarity_top_k,filters=filters,**kwargs)top_k_ids=[]top_k_nodes=[]top_k_similarities=[]LOGGER.debug(f"Found {len(results)} results.")forresultinresults:# Convert the Firestore document to dictresult_dict=result.to_dict()or{}metadata=result_dict.get(self.metadata_key)or{}fir_vec:Optional[Vector]=result_dict.get(self.embedding_key)iffir_vecisNone:raiseValueError("Embedding is missing in Firestore document.",result.id)embedding=list(fir_vec.to_map_value()["value"])# Convert metadata to node, and add text if availablenode=metadata_dict_to_node(metadata,text=result_dict.get(self.text_key))# Keep track of the top k ids and nodestop_k_ids.append(result.id)top_k_nodes.append(node)top_k_similarities.append(similarity(query.query_embedding,embedding,self._distance_to_similarity_mode(self.distance_strategy),))returnVectorStoreQueryResult(nodes=top_k_nodes,ids=top_k_ids,similarities=top_k_similarities)