Skip to content

Vertexaivectorsearch

VertexAIVectorStore #

Bases: BasePydanticVectorStore

Vertex AI Vector Search vector store.

In this vector store, embeddings are stored in Vertex AI Vector Store and docs are stored within Cloud Storage bucket.

During query time, the index uses Vertex AI Vector Search to query for the top k most similar nodes.

Parameters:

Name Type Description Default
project_id str)

The Google Cloud Project ID.

None
region str)

The default location making the API calls. It must be the same location as where Vector Search index created and must be regional.

None
index_id str)

The fully qualified resource name of the created index in Vertex AI Vector Search.

None
endpoint_id str

The fully qualified resource name of the created index endpoint in Vertex AI Vector Search.

None
gcs_bucket_name Optional[str]
           The location where the vectors will be stored for
           the index to be created in batch mode.
None
credentials_path Optional[str]
           The path of the Google credentials on the local file
           system.
None

Examples:

pip install llama-index-vector-stores-vertexaivectorsearch

from
vector_store = VertexAIVectorStore(
    project_id=PROJECT_ID,
    region=REGION,
    index_id="<index_resource_name>"
    endpoint_id="<index_endpoint_resource_name>"
)
Source code in llama-index-integrations/vector_stores/llama-index-vector-stores-vertexaivectorsearch/llama_index/vector_stores/vertexaivectorsearch/base.py
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
class VertexAIVectorStore(BasePydanticVectorStore):
    """Vertex AI Vector Search vector store.

    In this vector store, embeddings are stored in Vertex AI Vector Store and
    docs are stored within Cloud Storage bucket.

    During query time, the index uses Vertex AI Vector Search to query for the
    top k most similar nodes.

    Args:
        project_id (str) : The Google Cloud Project ID.
        region (str)     : The default location making the API calls.
                           It must be the same location as where Vector Search
                           index created and must be regional.
        index_id (str)   : The fully qualified resource name of the created
                           index in Vertex AI Vector Search.
        endpoint_id (str): The fully qualified resource name of the created
                           index endpoint in Vertex AI Vector Search.
        gcs_bucket_name (Optional[str]):
                           The location where the vectors will be stored for
                           the index to be created in batch mode.
        credentials_path (Optional[str]):
                           The path of the Google credentials on the local file
                           system.

    Examples:
        `pip install llama-index-vector-stores-vertexaivectorsearch`

        ```python
        from
        vector_store = VertexAIVectorStore(
            project_id=PROJECT_ID,
            region=REGION,
            index_id="<index_resource_name>"
            endpoint_id="<index_endpoint_resource_name>"
        )
        ```
    """

    stores_text: bool = True
    remove_text_from_metadata: bool = True
    flat_metadata: bool = False

    text_key: str

    project_id: str
    region: str
    index_id: str
    endpoint_id: str
    gcs_bucket_name: Optional[str] = None
    credentials_path: Optional[str] = None

    _index: MatchingEngineIndex = PrivateAttr()
    _endpoint: MatchingEngineIndexEndpoint = PrivateAttr()
    _index_metadata: dict = PrivateAttr()
    _stream_update: bool = PrivateAttr()
    _staging_bucket: storage.Bucket = PrivateAttr()
    # _document_storage: GCSDocumentStorage = PrivateAttr()

    def __init__(
        self,
        project_id: Optional[str] = None,
        region: Optional[str] = None,
        index_id: Optional[str] = None,
        endpoint_id: Optional[str] = None,
        gcs_bucket_name: Optional[str] = None,
        credentials_path: Optional[str] = None,
        text_key: str = DEFAULT_TEXT_KEY,
        remove_text_from_metadata: bool = True,
        **kwargs: Any,
    ) -> None:
        super().__init__(
            project_id=project_id,
            region=region,
            index_id=index_id,
            endpoint_id=endpoint_id,
            gcs_bucket_name=gcs_bucket_name,
            credentials_path=credentials_path,
            text_key=text_key,
            remove_text_from_metadata=remove_text_from_metadata,
        )

        """Initialize params."""
        _sdk_manager = VectorSearchSDKManager(
            project_id=project_id, region=region, credentials_path=credentials_path
        )

        # get index and endpoint resource names including metadata
        self._index = _sdk_manager.get_index(index_id=index_id)
        self._endpoint = _sdk_manager.get_endpoint(endpoint_id=endpoint_id)
        self._index_metadata = self._index.to_dict()

        # get index update method from index metadata
        self._stream_update = False
        if self._index_metadata["indexUpdateMethod"] == "STREAM_UPDATE":
            self._stream_update = True

        # get bucket object when available
        if self.gcs_bucket_name:
            self._staging_bucket = _sdk_manager.get_gcs_bucket(
                bucket_name=gcs_bucket_name
            )
        else:
            self._staging_bucket = None

    @classmethod
    def from_params(
        cls,
        project_id: Optional[str] = None,
        region: Optional[str] = None,
        index_id: Optional[str] = None,
        endpoint_id: Optional[str] = None,
        gcs_bucket_name: Optional[str] = None,
        credentials_path: Optional[str] = None,
        text_key: str = DEFAULT_TEXT_KEY,
        **kwargs: Any,
    ) -> "VertexAIVectorStore":
        """Create VertexAIVectorStore from config."""
        return cls(
            project_id=project_id,
            region=region,
            index_name=index_id,
            endpoint_id=endpoint_id,
            gcs_bucket_name=gcs_bucket_name,
            credentials_path=credentials_path,
            text_key=text_key,
            **kwargs,
        )

    @classmethod
    def class_name(cls) -> str:
        return "VertexAIVectorStore"

    @property
    def client(self) -> Any:
        """Get client."""
        return self._index

    @property
    def index(self) -> Any:
        """Get client."""
        return self._index

    @property
    def endpoint(self) -> Any:
        """Get client."""
        return self._endpoint

    @property
    def staging_bucket(self) -> Any:
        """Get client."""
        return self._staging_bucket

    def add(
        self,
        nodes: List[BaseNode],
        is_complete_overwrite: bool = False,
        **add_kwargs: Any,
    ) -> List[str]:
        """Add nodes to index.

        Args:
            nodes: List[BaseNode]: list of nodes with embeddings

        """
        ids = []
        embeddings = []
        metadatas = []
        for node in nodes:
            node_id = node.node_id
            metadata = node_to_metadata_dict(
                node, remove_text=False, flat_metadata=False
            )
            embedding = node.get_embedding()

            ids.append(node_id)
            embeddings.append(embedding)
            metadatas.append(metadata)

        data_points = utils.to_data_points(ids, embeddings, metadatas)
        # self._document_storage.add_documents(list(zip(ids, nodes)))

        if self._stream_update:
            utils.stream_update_index(index=self._index, data_points=data_points)
        else:
            if self._staging_bucket is None:
                raise ValueError(
                    "To update a Vector Search index a staging bucket must"
                    " be defined."
                )
            utils.batch_update_index(
                index=self._index,
                data_points=data_points,
                staging_bucket=self._staging_bucket,
                is_complete_overwrite=is_complete_overwrite,
            )
        return ids

    def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
        """
        Delete nodes using with ref_doc_id.

        Args:
            ref_doc_id (str): The doc_id of the document to delete.
        """
        # get datapoint ids by filter
        filter = {"ref_doc_id": ref_doc_id}
        ids = utils.get_datapoints_by_filter(
            index=self.index, endpoint=self.endpoint, metadata=filter
        )
        # remove datapoints
        self._index.remove_datapoints(datapoint_ids=ids)

    def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult:
        """Query index for top k most similar nodes."""
        query_embedding = None
        if query.mode == VectorStoreQueryMode.DEFAULT:
            query_embedding = [cast(List[float], query.query_embedding)]

        if query.filters is not None:
            if "filter" in kwargs and kwargs["filter"] is not None:
                raise ValueError(
                    "Cannot specify filter via both query and kwargs. "
                    "Use kwargs only for Vertex AI Vector Search specific items that are "
                    "not supported via the generic query interface such as numeric filters."
                )
            filter, num_filter = utils.to_vectorsearch_filter(query.filters)
        else:
            filter = None
            num_filter = None

        matches = utils.find_neighbors(
            index=self._index,
            endpoint=self._endpoint,
            embeddings=query_embedding,
            top_k=query.similarity_top_k,
            filter=filter,
            numeric_filter=num_filter,
        )

        top_k_nodes = []
        top_k_ids = []
        top_k_scores = []

        for match in matches:
            node = utils.to_node(match, self.text_key)
            top_k_ids.append(match.id)
            top_k_scores.append(match.distance)
            top_k_nodes.append(node)

        return VectorStoreQueryResult(
            nodes=top_k_nodes, similarities=top_k_scores, ids=top_k_ids
        )

client property #

client: Any

Get client.

index property #

index: Any

Get client.

endpoint property #

endpoint: Any

Get client.

staging_bucket property #

staging_bucket: Any

Get client.

from_params classmethod #

from_params(project_id: Optional[str] = None, region: Optional[str] = None, index_id: Optional[str] = None, endpoint_id: Optional[str] = None, gcs_bucket_name: Optional[str] = None, credentials_path: Optional[str] = None, text_key: str = DEFAULT_TEXT_KEY, **kwargs: Any) -> VertexAIVectorStore

Create VertexAIVectorStore from config.

Source code in llama-index-integrations/vector_stores/llama-index-vector-stores-vertexaivectorsearch/llama_index/vector_stores/vertexaivectorsearch/base.py
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
@classmethod
def from_params(
    cls,
    project_id: Optional[str] = None,
    region: Optional[str] = None,
    index_id: Optional[str] = None,
    endpoint_id: Optional[str] = None,
    gcs_bucket_name: Optional[str] = None,
    credentials_path: Optional[str] = None,
    text_key: str = DEFAULT_TEXT_KEY,
    **kwargs: Any,
) -> "VertexAIVectorStore":
    """Create VertexAIVectorStore from config."""
    return cls(
        project_id=project_id,
        region=region,
        index_name=index_id,
        endpoint_id=endpoint_id,
        gcs_bucket_name=gcs_bucket_name,
        credentials_path=credentials_path,
        text_key=text_key,
        **kwargs,
    )

add #

add(nodes: List[BaseNode], is_complete_overwrite: bool = False, **add_kwargs: Any) -> List[str]

Add nodes to index.

Parameters:

Name Type Description Default
nodes List[BaseNode]

List[BaseNode]: list of nodes with embeddings

required
Source code in llama-index-integrations/vector_stores/llama-index-vector-stores-vertexaivectorsearch/llama_index/vector_stores/vertexaivectorsearch/base.py
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
def add(
    self,
    nodes: List[BaseNode],
    is_complete_overwrite: bool = False,
    **add_kwargs: Any,
) -> List[str]:
    """Add nodes to index.

    Args:
        nodes: List[BaseNode]: list of nodes with embeddings

    """
    ids = []
    embeddings = []
    metadatas = []
    for node in nodes:
        node_id = node.node_id
        metadata = node_to_metadata_dict(
            node, remove_text=False, flat_metadata=False
        )
        embedding = node.get_embedding()

        ids.append(node_id)
        embeddings.append(embedding)
        metadatas.append(metadata)

    data_points = utils.to_data_points(ids, embeddings, metadatas)
    # self._document_storage.add_documents(list(zip(ids, nodes)))

    if self._stream_update:
        utils.stream_update_index(index=self._index, data_points=data_points)
    else:
        if self._staging_bucket is None:
            raise ValueError(
                "To update a Vector Search index a staging bucket must"
                " be defined."
            )
        utils.batch_update_index(
            index=self._index,
            data_points=data_points,
            staging_bucket=self._staging_bucket,
            is_complete_overwrite=is_complete_overwrite,
        )
    return ids

delete #

delete(ref_doc_id: str, **delete_kwargs: Any) -> None

Delete nodes using with ref_doc_id.

Parameters:

Name Type Description Default
ref_doc_id str

The doc_id of the document to delete.

required
Source code in llama-index-integrations/vector_stores/llama-index-vector-stores-vertexaivectorsearch/llama_index/vector_stores/vertexaivectorsearch/base.py
231
232
233
234
235
236
237
238
239
240
241
242
243
244
def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
    """
    Delete nodes using with ref_doc_id.

    Args:
        ref_doc_id (str): The doc_id of the document to delete.
    """
    # get datapoint ids by filter
    filter = {"ref_doc_id": ref_doc_id}
    ids = utils.get_datapoints_by_filter(
        index=self.index, endpoint=self.endpoint, metadata=filter
    )
    # remove datapoints
    self._index.remove_datapoints(datapoint_ids=ids)

query #

query(query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult

Query index for top k most similar nodes.

Source code in llama-index-integrations/vector_stores/llama-index-vector-stores-vertexaivectorsearch/llama_index/vector_stores/vertexaivectorsearch/base.py
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult:
    """Query index for top k most similar nodes."""
    query_embedding = None
    if query.mode == VectorStoreQueryMode.DEFAULT:
        query_embedding = [cast(List[float], query.query_embedding)]

    if query.filters is not None:
        if "filter" in kwargs and kwargs["filter"] is not None:
            raise ValueError(
                "Cannot specify filter via both query and kwargs. "
                "Use kwargs only for Vertex AI Vector Search specific items that are "
                "not supported via the generic query interface such as numeric filters."
            )
        filter, num_filter = utils.to_vectorsearch_filter(query.filters)
    else:
        filter = None
        num_filter = None

    matches = utils.find_neighbors(
        index=self._index,
        endpoint=self._endpoint,
        embeddings=query_embedding,
        top_k=query.similarity_top_k,
        filter=filter,
        numeric_filter=num_filter,
    )

    top_k_nodes = []
    top_k_ids = []
    top_k_scores = []

    for match in matches:
        node = utils.to_node(match, self.text_key)
        top_k_ids.append(match.id)
        top_k_scores.append(match.distance)
        top_k_nodes.append(node)

    return VectorStoreQueryResult(
        nodes=top_k_nodes, similarities=top_k_scores, ids=top_k_ids
    )