TablestoreVectorStore¶
Tablestore is a fully managed NoSQL cloud database service that enables storage of a massive amount of structured and semi-structured data.
This notebook shows how to use functionality related to the Tablestore
vector database.
To use Tablestore, you must create an instance. Here are the creating instance instructions.
Install¶
In [ ]:
Copied!
%pip install llama-index-vector-stores-tablestore
%pip install llama-index-vector-stores-tablestore
In [ ]:
Copied!
import getpass
import os
os.environ["end_point"] = getpass.getpass("Tablestore end_point:")
os.environ["instance_name"] = getpass.getpass("Tablestore instance_name:")
os.environ["access_key_id"] = getpass.getpass("Tablestore access_key_id:")
os.environ["access_key_secret"] = getpass.getpass(
"Tablestore access_key_secret:"
)
import getpass
import os
os.environ["end_point"] = getpass.getpass("Tablestore end_point:")
os.environ["instance_name"] = getpass.getpass("Tablestore instance_name:")
os.environ["access_key_id"] = getpass.getpass("Tablestore access_key_id:")
os.environ["access_key_secret"] = getpass.getpass(
"Tablestore access_key_secret:"
)
Example¶
Create vector store.
In [ ]:
Copied!
import os
from llama_index.core import MockEmbedding
from llama_index.core.schema import TextNode
from llama_index.core.vector_stores import (
VectorStoreQuery,
MetadataFilters,
MetadataFilter,
FilterCondition,
FilterOperator,
)
from tablestore import FieldSchema, FieldType, VectorMetricType
from llama_index.vector_stores.tablestore import TablestoreVectorStore
vector_dimension = 4
store = TablestoreVectorStore(
endpoint=os.getenv("end_point"),
instance_name=os.getenv("instance_name"),
access_key_id=os.getenv("access_key_id"),
access_key_secret=os.getenv("access_key_secret"),
vector_dimension=vector_dimension,
vector_metric_type=VectorMetricType.VM_COSINE,
# metadata mapping is used to filter non-vector fields.
metadata_mappings=[
FieldSchema(
"type", FieldType.KEYWORD, index=True, enable_sort_and_agg=True
),
FieldSchema(
"time", FieldType.LONG, index=True, enable_sort_and_agg=True
),
],
)
import os
from llama_index.core import MockEmbedding
from llama_index.core.schema import TextNode
from llama_index.core.vector_stores import (
VectorStoreQuery,
MetadataFilters,
MetadataFilter,
FilterCondition,
FilterOperator,
)
from tablestore import FieldSchema, FieldType, VectorMetricType
from llama_index.vector_stores.tablestore import TablestoreVectorStore
vector_dimension = 4
store = TablestoreVectorStore(
endpoint=os.getenv("end_point"),
instance_name=os.getenv("instance_name"),
access_key_id=os.getenv("access_key_id"),
access_key_secret=os.getenv("access_key_secret"),
vector_dimension=vector_dimension,
vector_metric_type=VectorMetricType.VM_COSINE,
# metadata mapping is used to filter non-vector fields.
metadata_mappings=[
FieldSchema(
"type", FieldType.KEYWORD, index=True, enable_sort_and_agg=True
),
FieldSchema(
"time", FieldType.LONG, index=True, enable_sort_and_agg=True
),
],
)
Create table and index.
In [ ]:
Copied!
store.create_table_if_not_exist()
store.create_search_index_if_not_exist()
store.create_table_if_not_exist()
store.create_search_index_if_not_exist()
New a mock embedding for test.
In [ ]:
Copied!
embedder = MockEmbedding(vector_dimension)
embedder = MockEmbedding(vector_dimension)
Prepare some docs.
In [ ]:
Copied!
texts = [
TextNode(
id_="1",
text="The lives of two mob hitmen, a boxer, a gangster and his wife, and a pair of diner bandits intertwine in four tales of violence and redemption.",
metadata={"type": "a", "time": 1995},
),
TextNode(
id_="2",
text="When the menace known as the Joker wreaks havoc and chaos on the people of Gotham, Batman must accept one of the greatest psychological and physical tests of his ability to fight injustice.",
metadata={"type": "a", "time": 1990},
),
TextNode(
id_="3",
text="An insomniac office worker and a devil-may-care soapmaker form an underground fight club that evolves into something much, much more.",
metadata={"type": "a", "time": 2009},
),
TextNode(
id_="4",
text="A thief who steals corporate secrets through the use of dream-sharing technology is given the inverse task of planting an idea into thed of a C.E.O.",
metadata={"type": "a", "time": 2023},
),
TextNode(
id_="5",
text="A computer hacker learns from mysterious rebels about the true nature of his reality and his role in the war against its controllers.",
metadata={"type": "b", "time": 2018},
),
TextNode(
id_="6",
text="Two detectives, a rookie and a veteran, hunt a serial killer who uses the seven deadly sins as his motives.",
metadata={"type": "c", "time": 2010},
),
TextNode(
id_="7",
text="An organized crime dynasty's aging patriarch transfers control of his clandestine empire to his reluctant son.",
metadata={"type": "a", "time": 2023},
),
]
for t in texts:
t.embedding = embedder.get_text_embedding(t.text)
texts = [
TextNode(
id_="1",
text="The lives of two mob hitmen, a boxer, a gangster and his wife, and a pair of diner bandits intertwine in four tales of violence and redemption.",
metadata={"type": "a", "time": 1995},
),
TextNode(
id_="2",
text="When the menace known as the Joker wreaks havoc and chaos on the people of Gotham, Batman must accept one of the greatest psychological and physical tests of his ability to fight injustice.",
metadata={"type": "a", "time": 1990},
),
TextNode(
id_="3",
text="An insomniac office worker and a devil-may-care soapmaker form an underground fight club that evolves into something much, much more.",
metadata={"type": "a", "time": 2009},
),
TextNode(
id_="4",
text="A thief who steals corporate secrets through the use of dream-sharing technology is given the inverse task of planting an idea into thed of a C.E.O.",
metadata={"type": "a", "time": 2023},
),
TextNode(
id_="5",
text="A computer hacker learns from mysterious rebels about the true nature of his reality and his role in the war against its controllers.",
metadata={"type": "b", "time": 2018},
),
TextNode(
id_="6",
text="Two detectives, a rookie and a veteran, hunt a serial killer who uses the seven deadly sins as his motives.",
metadata={"type": "c", "time": 2010},
),
TextNode(
id_="7",
text="An organized crime dynasty's aging patriarch transfers control of his clandestine empire to his reluctant son.",
metadata={"type": "a", "time": 2023},
),
]
for t in texts:
t.embedding = embedder.get_text_embedding(t.text)
Write some docs.
In [ ]:
Copied!
store.add(texts)
store.add(texts)
Out[ ]:
['1', '2', '3', '4', '5', '6', '7']
Delete docs.
In [ ]:
Copied!
store.delete("1")
store.delete("1")
Query with filters.
In [ ]:
Copied!
store.query(
query=VectorStoreQuery(
query_embedding=embedder.get_text_embedding("nature fight physical"),
similarity_top_k=5,
filters=MetadataFilters(
filters=[
MetadataFilter(
key="type", value="a", operator=FilterOperator.EQ
),
MetadataFilter(
key="time", value=2020, operator=FilterOperator.LTE
),
],
condition=FilterCondition.AND,
),
),
)
store.query(
query=VectorStoreQuery(
query_embedding=embedder.get_text_embedding("nature fight physical"),
similarity_top_k=5,
filters=MetadataFilters(
filters=[
MetadataFilter(
key="type", value="a", operator=FilterOperator.EQ
),
MetadataFilter(
key="time", value=2020, operator=FilterOperator.LTE
),
],
condition=FilterCondition.AND,
),
),
)
Out[ ]:
VectorStoreQueryResult(nodes=[TextNode(id_='2', embedding=[0.5, 0.5, 0.5, 0.5], metadata={'time': 1990, 'type': 'a'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='When the menace known as the Joker wreaks havoc and chaos on the people of Gotham, Batman must accept one of the greatest psychological and physical tests of his ability to fight injustice.', mimetype='text/plain', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), TextNode(id_='3', embedding=[0.5, 0.5, 0.5, 0.5], metadata={'time': 2009, 'type': 'a'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='An insomniac office worker and a devil-may-care soapmaker form an underground fight club that evolves into something much, much more.', mimetype='text/plain', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n')], similarities=None, ids=['2', '3'])