Document Store#
- class llama_index.storage.docstore.BaseDocumentStore#
- abstract async adelete_document(doc_id: str, raise_error: bool = True) None #
Delete a document from the store.
- abstract async adelete_ref_doc(ref_doc_id: str, raise_error: bool = True) None #
Delete a ref_doc and all itβs associated nodes.
- abstract async aget_all_ref_doc_info() Optional[Dict[str, RefDocInfo]] #
Get a mapping of ref_doc_id -> RefDocInfo for all ingested documents.
- async aget_node(node_id: str, raise_error: bool = True) BaseNode #
Get node from docstore.
- Parameters
node_id (str) β node id
raise_error (bool) β raise error if node_id not found
- async aget_node_dict(node_id_dict: Dict[int, str]) Dict[int, BaseNode] #
Get node dict from docstore given a mapping of index to node ids.
- Parameters
node_id_dict (Dict[int, str]) β mapping of index to node ids
- async aget_nodes(node_ids: List[str], raise_error: bool = True) List[BaseNode] #
Get nodes from docstore.
- Parameters
node_ids (List[str]) β node ids
raise_error (bool) β raise error if node_id not found
- abstract async aget_ref_doc_info(ref_doc_id: str) Optional[RefDocInfo] #
Get the RefDocInfo for a given ref_doc_id.
- abstract delete_document(doc_id: str, raise_error: bool = True) None #
Delete a document from the store.
- abstract delete_ref_doc(ref_doc_id: str, raise_error: bool = True) None #
Delete a ref_doc and all itβs associated nodes.
- abstract get_all_ref_doc_info() Optional[Dict[str, RefDocInfo]] #
Get a mapping of ref_doc_id -> RefDocInfo for all ingested documents.
- get_node(node_id: str, raise_error: bool = True) BaseNode #
Get node from docstore.
- Parameters
node_id (str) β node id
raise_error (bool) β raise error if node_id not found
- get_node_dict(node_id_dict: Dict[int, str]) Dict[int, BaseNode] #
Get node dict from docstore given a mapping of index to node ids.
- Parameters
node_id_dict (Dict[int, str]) β mapping of index to node ids
- get_nodes(node_ids: List[str], raise_error: bool = True) List[BaseNode] #
Get nodes from docstore.
- Parameters
node_ids (List[str]) β node ids
raise_error (bool) β raise error if node_id not found
- abstract get_ref_doc_info(ref_doc_id: str) Optional[RefDocInfo] #
Get the RefDocInfo for a given ref_doc_id.
- persist(persist_path: str = './storage/docstore.json', fs: Optional[AbstractFileSystem] = None) None #
Persist the docstore to a file.
- llama_index.storage.docstore.DocumentStore#
alias of
SimpleDocumentStore
- class llama_index.storage.docstore.DynamoDBDocumentStore(dynamodb_kvstore: DynamoDBKVStore, namespace: Optional[str] = None, batch_size: int = 1)#
- add_documents(nodes: Sequence[BaseNode], allow_update: bool = True, batch_size: Optional[int] = None, store_text: bool = True) None #
Add a document to the store.
- Parameters
docs (List[BaseDocument]) β documents
allow_update (bool) β allow update of docstore from document
- async adelete_document(doc_id: str, raise_error: bool = True, remove_ref_doc_node: bool = True) None #
Delete a document from the store.
- async adelete_ref_doc(ref_doc_id: str, raise_error: bool = True) None #
Delete a ref_doc and all itβs associated nodes.
- async adocument_exists(doc_id: str) bool #
Check if document exists.
- async aget_all_document_hashes() Dict[str, str] #
Get the stored hash for all documents.
- async aget_all_ref_doc_info() Optional[Dict[str, RefDocInfo]] #
Get a mapping of ref_doc_id -> RefDocInfo for all ingested documents.
- async aget_document(doc_id: str, raise_error: bool = True) Optional[BaseNode] #
Get a document from the store.
- Parameters
doc_id (str) β document id
raise_error (bool) β raise error if doc_id not found
- async aget_document_hash(doc_id: str) Optional[str] #
Get the stored hash for a document, if it exists.
- async aget_node(node_id: str, raise_error: bool = True) BaseNode #
Get node from docstore.
- Parameters
node_id (str) β node id
raise_error (bool) β raise error if node_id not found
- async aget_node_dict(node_id_dict: Dict[int, str]) Dict[int, BaseNode] #
Get node dict from docstore given a mapping of index to node ids.
- Parameters
node_id_dict (Dict[int, str]) β mapping of index to node ids
- async aget_nodes(node_ids: List[str], raise_error: bool = True) List[BaseNode] #
Get nodes from docstore.
- Parameters
node_ids (List[str]) β node ids
raise_error (bool) β raise error if node_id not found
- async aget_ref_doc_info(ref_doc_id: str) Optional[RefDocInfo] #
Get the RefDocInfo for a given ref_doc_id.
- async aref_doc_exists(ref_doc_id: str) bool #
Check if a ref_doc_id has been ingested.
- async aset_document_hash(doc_id: str, doc_hash: str) None #
Set the hash for a given doc_id.
- async aset_document_hashes(doc_hashes: Dict[str, str]) None #
Set the hash for a given doc_id.
- async async_add_documents(nodes: Sequence[BaseNode], allow_update: bool = True, batch_size: Optional[int] = None, store_text: bool = True) None #
Add a document to the store.
- Parameters
docs (List[BaseDocument]) β documents
allow_update (bool) β allow update of docstore from document
- delete_document(doc_id: str, raise_error: bool = True, remove_ref_doc_node: bool = True) None #
Delete a document from the store.
- delete_ref_doc(ref_doc_id: str, raise_error: bool = True) None #
Delete a ref_doc and all itβs associated nodes.
- property docs: Dict[str, BaseNode]#
Get all documents.
- Returns
documents
- Return type
Dict[str, BaseDocument]
- document_exists(doc_id: str) bool #
Check if document exists.
- get_all_document_hashes() Dict[str, str] #
Get the stored hash for all documents.
- get_all_ref_doc_info() Optional[Dict[str, RefDocInfo]] #
Get a mapping of ref_doc_id -> RefDocInfo for all ingested documents.
- get_document(doc_id: str, raise_error: bool = True) Optional[BaseNode] #
Get a document from the store.
- Parameters
doc_id (str) β document id
raise_error (bool) β raise error if doc_id not found
- get_document_hash(doc_id: str) Optional[str] #
Get the stored hash for a document, if it exists.
- get_node(node_id: str, raise_error: bool = True) BaseNode #
Get node from docstore.
- Parameters
node_id (str) β node id
raise_error (bool) β raise error if node_id not found
- get_node_dict(node_id_dict: Dict[int, str]) Dict[int, BaseNode] #
Get node dict from docstore given a mapping of index to node ids.
- Parameters
node_id_dict (Dict[int, str]) β mapping of index to node ids
- get_nodes(node_ids: List[str], raise_error: bool = True) List[BaseNode] #
Get nodes from docstore.
- Parameters
node_ids (List[str]) β node ids
raise_error (bool) β raise error if node_id not found
- get_ref_doc_info(ref_doc_id: str) Optional[RefDocInfo] #
Get the RefDocInfo for a given ref_doc_id.
- persist(persist_path: str = './storage/docstore.json', fs: Optional[AbstractFileSystem] = None) None #
Persist the docstore to a file.
- ref_doc_exists(ref_doc_id: str) bool #
Check if a ref_doc_id has been ingested.
- set_document_hash(doc_id: str, doc_hash: str) None #
Set the hash for a given doc_id.
- set_document_hashes(doc_hashes: Dict[str, str]) None #
Set the hash for a given doc_id.
- class llama_index.storage.docstore.FirestoreDocumentStore(firestore_kvstore: FirestoreKVStore, namespace: Optional[str] = None, batch_size: int = 1)#
Firestore Document (Node) store.
A Firestore store for Document and Node objects.
- Parameters
firestore_kvstore (FirestoreKVStore) β Firestore key-value store
namespace (str) β namespace for the docstore
- add_documents(nodes: Sequence[BaseNode], allow_update: bool = True, batch_size: Optional[int] = None, store_text: bool = True) None #
Add a document to the store.
- Parameters
docs (List[BaseDocument]) β documents
allow_update (bool) β allow update of docstore from document
- async adelete_document(doc_id: str, raise_error: bool = True, remove_ref_doc_node: bool = True) None #
Delete a document from the store.
- async adelete_ref_doc(ref_doc_id: str, raise_error: bool = True) None #
Delete a ref_doc and all itβs associated nodes.
- async adocument_exists(doc_id: str) bool #
Check if document exists.
- async aget_all_document_hashes() Dict[str, str] #
Get the stored hash for all documents.
- async aget_all_ref_doc_info() Optional[Dict[str, RefDocInfo]] #
Get a mapping of ref_doc_id -> RefDocInfo for all ingested documents.
- async aget_document(doc_id: str, raise_error: bool = True) Optional[BaseNode] #
Get a document from the store.
- Parameters
doc_id (str) β document id
raise_error (bool) β raise error if doc_id not found
- async aget_document_hash(doc_id: str) Optional[str] #
Get the stored hash for a document, if it exists.
- async aget_node(node_id: str, raise_error: bool = True) BaseNode #
Get node from docstore.
- Parameters
node_id (str) β node id
raise_error (bool) β raise error if node_id not found
- async aget_node_dict(node_id_dict: Dict[int, str]) Dict[int, BaseNode] #
Get node dict from docstore given a mapping of index to node ids.
- Parameters
node_id_dict (Dict[int, str]) β mapping of index to node ids
- async aget_nodes(node_ids: List[str], raise_error: bool = True) List[BaseNode] #
Get nodes from docstore.
- Parameters
node_ids (List[str]) β node ids
raise_error (bool) β raise error if node_id not found
- async aget_ref_doc_info(ref_doc_id: str) Optional[RefDocInfo] #
Get the RefDocInfo for a given ref_doc_id.
- async aref_doc_exists(ref_doc_id: str) bool #
Check if a ref_doc_id has been ingested.
- async aset_document_hash(doc_id: str, doc_hash: str) None #
Set the hash for a given doc_id.
- async aset_document_hashes(doc_hashes: Dict[str, str]) None #
Set the hash for a given doc_id.
- async async_add_documents(nodes: Sequence[BaseNode], allow_update: bool = True, batch_size: Optional[int] = None, store_text: bool = True) None #
Add a document to the store.
- Parameters
docs (List[BaseDocument]) β documents
allow_update (bool) β allow update of docstore from document
- delete_document(doc_id: str, raise_error: bool = True, remove_ref_doc_node: bool = True) None #
Delete a document from the store.
- delete_ref_doc(ref_doc_id: str, raise_error: bool = True) None #
Delete a ref_doc and all itβs associated nodes.
- property docs: Dict[str, BaseNode]#
Get all documents.
- Returns
documents
- Return type
Dict[str, BaseDocument]
- document_exists(doc_id: str) bool #
Check if document exists.
- classmethod from_database(project: str, database: str, namespace: Optional[str] = None) FirestoreDocumentStore #
- Parameters
project (str) β The project which the client acts on behalf of.
database (str) β The database name that the client targets.
namespace (str) β namespace for the docstore.
- get_all_document_hashes() Dict[str, str] #
Get the stored hash for all documents.
- get_all_ref_doc_info() Optional[Dict[str, RefDocInfo]] #
Get a mapping of ref_doc_id -> RefDocInfo for all ingested documents.
- get_document(doc_id: str, raise_error: bool = True) Optional[BaseNode] #
Get a document from the store.
- Parameters
doc_id (str) β document id
raise_error (bool) β raise error if doc_id not found
- get_document_hash(doc_id: str) Optional[str] #
Get the stored hash for a document, if it exists.
- get_node(node_id: str, raise_error: bool = True) BaseNode #
Get node from docstore.
- Parameters
node_id (str) β node id
raise_error (bool) β raise error if node_id not found
- get_node_dict(node_id_dict: Dict[int, str]) Dict[int, BaseNode] #
Get node dict from docstore given a mapping of index to node ids.
- Parameters
node_id_dict (Dict[int, str]) β mapping of index to node ids
- get_nodes(node_ids: List[str], raise_error: bool = True) List[BaseNode] #
Get nodes from docstore.
- Parameters
node_ids (List[str]) β node ids
raise_error (bool) β raise error if node_id not found
- get_ref_doc_info(ref_doc_id: str) Optional[RefDocInfo] #
Get the RefDocInfo for a given ref_doc_id.
- persist(persist_path: str = './storage/docstore.json', fs: Optional[AbstractFileSystem] = None) None #
Persist the docstore to a file.
- ref_doc_exists(ref_doc_id: str) bool #
Check if a ref_doc_id has been ingested.
- set_document_hash(doc_id: str, doc_hash: str) None #
Set the hash for a given doc_id.
- set_document_hashes(doc_hashes: Dict[str, str]) None #
Set the hash for a given doc_id.
- class llama_index.storage.docstore.KVDocumentStore(kvstore: BaseKVStore, namespace: Optional[str] = None, batch_size: int = 1)#
Document (Node) store.
NOTE: at the moment, this store is primarily used to store Node objects. Each node will be assigned an ID.
The same docstore can be reused across index structures. This allows you to reuse the same storage for multiple index structures; otherwise, each index would create a docstore under the hood.
This will use the same docstore for multiple index structures.
- Parameters
kvstore (BaseKVStore) β key-value store
namespace (str) β namespace for the docstore
- add_documents(nodes: Sequence[BaseNode], allow_update: bool = True, batch_size: Optional[int] = None, store_text: bool = True) None #
Add a document to the store.
- Parameters
docs (List[BaseDocument]) β documents
allow_update (bool) β allow update of docstore from document
- async adelete_document(doc_id: str, raise_error: bool = True, remove_ref_doc_node: bool = True) None #
Delete a document from the store.
- async adelete_ref_doc(ref_doc_id: str, raise_error: bool = True) None #
Delete a ref_doc and all itβs associated nodes.
- async adocument_exists(doc_id: str) bool #
Check if document exists.
- async aget_all_document_hashes() Dict[str, str] #
Get the stored hash for all documents.
- async aget_all_ref_doc_info() Optional[Dict[str, RefDocInfo]] #
Get a mapping of ref_doc_id -> RefDocInfo for all ingested documents.
- async aget_document(doc_id: str, raise_error: bool = True) Optional[BaseNode] #
Get a document from the store.
- Parameters
doc_id (str) β document id
raise_error (bool) β raise error if doc_id not found
- async aget_document_hash(doc_id: str) Optional[str] #
Get the stored hash for a document, if it exists.
- async aget_node(node_id: str, raise_error: bool = True) BaseNode #
Get node from docstore.
- Parameters
node_id (str) β node id
raise_error (bool) β raise error if node_id not found
- async aget_node_dict(node_id_dict: Dict[int, str]) Dict[int, BaseNode] #
Get node dict from docstore given a mapping of index to node ids.
- Parameters
node_id_dict (Dict[int, str]) β mapping of index to node ids
- async aget_nodes(node_ids: List[str], raise_error: bool = True) List[BaseNode] #
Get nodes from docstore.
- Parameters
node_ids (List[str]) β node ids
raise_error (bool) β raise error if node_id not found
- async aget_ref_doc_info(ref_doc_id: str) Optional[RefDocInfo] #
Get the RefDocInfo for a given ref_doc_id.
- async aref_doc_exists(ref_doc_id: str) bool #
Check if a ref_doc_id has been ingested.
- async aset_document_hash(doc_id: str, doc_hash: str) None #
Set the hash for a given doc_id.
- async aset_document_hashes(doc_hashes: Dict[str, str]) None #
Set the hash for a given doc_id.
- async async_add_documents(nodes: Sequence[BaseNode], allow_update: bool = True, batch_size: Optional[int] = None, store_text: bool = True) None #
Add a document to the store.
- Parameters
docs (List[BaseDocument]) β documents
allow_update (bool) β allow update of docstore from document
- delete_document(doc_id: str, raise_error: bool = True, remove_ref_doc_node: bool = True) None #
Delete a document from the store.
- delete_ref_doc(ref_doc_id: str, raise_error: bool = True) None #
Delete a ref_doc and all itβs associated nodes.
- property docs: Dict[str, BaseNode]#
Get all documents.
- Returns
documents
- Return type
Dict[str, BaseDocument]
- document_exists(doc_id: str) bool #
Check if document exists.
- get_all_document_hashes() Dict[str, str] #
Get the stored hash for all documents.
- get_all_ref_doc_info() Optional[Dict[str, RefDocInfo]] #
Get a mapping of ref_doc_id -> RefDocInfo for all ingested documents.
- get_document(doc_id: str, raise_error: bool = True) Optional[BaseNode] #
Get a document from the store.
- Parameters
doc_id (str) β document id
raise_error (bool) β raise error if doc_id not found
- get_document_hash(doc_id: str) Optional[str] #
Get the stored hash for a document, if it exists.
- get_node(node_id: str, raise_error: bool = True) BaseNode #
Get node from docstore.
- Parameters
node_id (str) β node id
raise_error (bool) β raise error if node_id not found
- get_node_dict(node_id_dict: Dict[int, str]) Dict[int, BaseNode] #
Get node dict from docstore given a mapping of index to node ids.
- Parameters
node_id_dict (Dict[int, str]) β mapping of index to node ids
- get_nodes(node_ids: List[str], raise_error: bool = True) List[BaseNode] #
Get nodes from docstore.
- Parameters
node_ids (List[str]) β node ids
raise_error (bool) β raise error if node_id not found
- get_ref_doc_info(ref_doc_id: str) Optional[RefDocInfo] #
Get the RefDocInfo for a given ref_doc_id.
- persist(persist_path: str = './storage/docstore.json', fs: Optional[AbstractFileSystem] = None) None #
Persist the docstore to a file.
- ref_doc_exists(ref_doc_id: str) bool #
Check if a ref_doc_id has been ingested.
- set_document_hash(doc_id: str, doc_hash: str) None #
Set the hash for a given doc_id.
- set_document_hashes(doc_hashes: Dict[str, str]) None #
Set the hash for a given doc_id.
- class llama_index.storage.docstore.MongoDocumentStore(mongo_kvstore: MongoDBKVStore, namespace: Optional[str] = None, batch_size: int = 1)#
Mongo Document (Node) store.
A MongoDB store for Document and Node objects.
- Parameters
mongo_kvstore (MongoDBKVStore) β MongoDB key-value store
namespace (str) β namespace for the docstore
- add_documents(nodes: Sequence[BaseNode], allow_update: bool = True, batch_size: Optional[int] = None, store_text: bool = True) None #
Add a document to the store.
- Parameters
docs (List[BaseDocument]) β documents
allow_update (bool) β allow update of docstore from document
- async adelete_document(doc_id: str, raise_error: bool = True, remove_ref_doc_node: bool = True) None #
Delete a document from the store.
- async adelete_ref_doc(ref_doc_id: str, raise_error: bool = True) None #
Delete a ref_doc and all itβs associated nodes.
- async adocument_exists(doc_id: str) bool #
Check if document exists.
- async aget_all_document_hashes() Dict[str, str] #
Get the stored hash for all documents.
- async aget_all_ref_doc_info() Optional[Dict[str, RefDocInfo]] #
Get a mapping of ref_doc_id -> RefDocInfo for all ingested documents.
- async aget_document(doc_id: str, raise_error: bool = True) Optional[BaseNode] #
Get a document from the store.
- Parameters
doc_id (str) β document id
raise_error (bool) β raise error if doc_id not found
- async aget_document_hash(doc_id: str) Optional[str] #
Get the stored hash for a document, if it exists.
- async aget_node(node_id: str, raise_error: bool = True) BaseNode #
Get node from docstore.
- Parameters
node_id (str) β node id
raise_error (bool) β raise error if node_id not found
- async aget_node_dict(node_id_dict: Dict[int, str]) Dict[int, BaseNode] #
Get node dict from docstore given a mapping of index to node ids.
- Parameters
node_id_dict (Dict[int, str]) β mapping of index to node ids
- async aget_nodes(node_ids: List[str], raise_error: bool = True) List[BaseNode] #
Get nodes from docstore.
- Parameters
node_ids (List[str]) β node ids
raise_error (bool) β raise error if node_id not found
- async aget_ref_doc_info(ref_doc_id: str) Optional[RefDocInfo] #
Get the RefDocInfo for a given ref_doc_id.
- async aref_doc_exists(ref_doc_id: str) bool #
Check if a ref_doc_id has been ingested.
- async aset_document_hash(doc_id: str, doc_hash: str) None #
Set the hash for a given doc_id.
- async aset_document_hashes(doc_hashes: Dict[str, str]) None #
Set the hash for a given doc_id.
- async async_add_documents(nodes: Sequence[BaseNode], allow_update: bool = True, batch_size: Optional[int] = None, store_text: bool = True) None #
Add a document to the store.
- Parameters
docs (List[BaseDocument]) β documents
allow_update (bool) β allow update of docstore from document
- delete_document(doc_id: str, raise_error: bool = True, remove_ref_doc_node: bool = True) None #
Delete a document from the store.
- delete_ref_doc(ref_doc_id: str, raise_error: bool = True) None #
Delete a ref_doc and all itβs associated nodes.
- property docs: Dict[str, BaseNode]#
Get all documents.
- Returns
documents
- Return type
Dict[str, BaseDocument]
- document_exists(doc_id: str) bool #
Check if document exists.
- classmethod from_host_and_port(host: str, port: int, db_name: Optional[str] = None, namespace: Optional[str] = None) MongoDocumentStore #
Load a MongoDocumentStore from a MongoDB host and port.
- classmethod from_uri(uri: str, db_name: Optional[str] = None, namespace: Optional[str] = None) MongoDocumentStore #
Load a MongoDocumentStore from a MongoDB URI.
- get_all_document_hashes() Dict[str, str] #
Get the stored hash for all documents.
- get_all_ref_doc_info() Optional[Dict[str, RefDocInfo]] #
Get a mapping of ref_doc_id -> RefDocInfo for all ingested documents.
- get_document(doc_id: str, raise_error: bool = True) Optional[BaseNode] #
Get a document from the store.
- Parameters
doc_id (str) β document id
raise_error (bool) β raise error if doc_id not found
- get_document_hash(doc_id: str) Optional[str] #
Get the stored hash for a document, if it exists.
- get_node(node_id: str, raise_error: bool = True) BaseNode #
Get node from docstore.
- Parameters
node_id (str) β node id
raise_error (bool) β raise error if node_id not found
- get_node_dict(node_id_dict: Dict[int, str]) Dict[int, BaseNode] #
Get node dict from docstore given a mapping of index to node ids.
- Parameters
node_id_dict (Dict[int, str]) β mapping of index to node ids
- get_nodes(node_ids: List[str], raise_error: bool = True) List[BaseNode] #
Get nodes from docstore.
- Parameters
node_ids (List[str]) β node ids
raise_error (bool) β raise error if node_id not found
- get_ref_doc_info(ref_doc_id: str) Optional[RefDocInfo] #
Get the RefDocInfo for a given ref_doc_id.
- persist(persist_path: str = './storage/docstore.json', fs: Optional[AbstractFileSystem] = None) None #
Persist the docstore to a file.
- ref_doc_exists(ref_doc_id: str) bool #
Check if a ref_doc_id has been ingested.
- set_document_hash(doc_id: str, doc_hash: str) None #
Set the hash for a given doc_id.
- set_document_hashes(doc_hashes: Dict[str, str]) None #
Set the hash for a given doc_id.
- class llama_index.storage.docstore.RedisDocumentStore(redis_kvstore: RedisKVStore, namespace: Optional[str] = None, batch_size: int = 1)#
Redis Document (Node) store.
A Redis store for Document and Node objects.
- Parameters
redis_kvstore (RedisKVStore) β Redis key-value store
namespace (str) β namespace for the docstore
- add_documents(nodes: Sequence[BaseNode], allow_update: bool = True, batch_size: Optional[int] = None, store_text: bool = True) None #
Add a document to the store.
- Parameters
docs (List[BaseDocument]) β documents
allow_update (bool) β allow update of docstore from document
- async adelete_document(doc_id: str, raise_error: bool = True, remove_ref_doc_node: bool = True) None #
Delete a document from the store.
- async adelete_ref_doc(ref_doc_id: str, raise_error: bool = True) None #
Delete a ref_doc and all itβs associated nodes.
- async adocument_exists(doc_id: str) bool #
Check if document exists.
- async aget_all_document_hashes() Dict[str, str] #
Get the stored hash for all documents.
- async aget_all_ref_doc_info() Optional[Dict[str, RefDocInfo]] #
Get a mapping of ref_doc_id -> RefDocInfo for all ingested documents.
- async aget_document(doc_id: str, raise_error: bool = True) Optional[BaseNode] #
Get a document from the store.
- Parameters
doc_id (str) β document id
raise_error (bool) β raise error if doc_id not found
- async aget_document_hash(doc_id: str) Optional[str] #
Get the stored hash for a document, if it exists.
- async aget_node(node_id: str, raise_error: bool = True) BaseNode #
Get node from docstore.
- Parameters
node_id (str) β node id
raise_error (bool) β raise error if node_id not found
- async aget_node_dict(node_id_dict: Dict[int, str]) Dict[int, BaseNode] #
Get node dict from docstore given a mapping of index to node ids.
- Parameters
node_id_dict (Dict[int, str]) β mapping of index to node ids
- async aget_nodes(node_ids: List[str], raise_error: bool = True) List[BaseNode] #
Get nodes from docstore.
- Parameters
node_ids (List[str]) β node ids
raise_error (bool) β raise error if node_id not found
- async aget_ref_doc_info(ref_doc_id: str) Optional[RefDocInfo] #
Get the RefDocInfo for a given ref_doc_id.
- async aref_doc_exists(ref_doc_id: str) bool #
Check if a ref_doc_id has been ingested.
- async aset_document_hash(doc_id: str, doc_hash: str) None #
Set the hash for a given doc_id.
- async aset_document_hashes(doc_hashes: Dict[str, str]) None #
Set the hash for a given doc_id.
- async async_add_documents(nodes: Sequence[BaseNode], allow_update: bool = True, batch_size: Optional[int] = None, store_text: bool = True) None #
Add a document to the store.
- Parameters
docs (List[BaseDocument]) β documents
allow_update (bool) β allow update of docstore from document
- delete_document(doc_id: str, raise_error: bool = True, remove_ref_doc_node: bool = True) None #
Delete a document from the store.
- delete_ref_doc(ref_doc_id: str, raise_error: bool = True) None #
Delete a ref_doc and all itβs associated nodes.
- property docs: Dict[str, BaseNode]#
Get all documents.
- Returns
documents
- Return type
Dict[str, BaseDocument]
- document_exists(doc_id: str) bool #
Check if document exists.
- classmethod from_host_and_port(host: str, port: int, namespace: Optional[str] = None) RedisDocumentStore #
Load a RedisDocumentStore from a Redis host and port.
- classmethod from_redis_client(redis_client: Any, namespace: Optional[str] = None) RedisDocumentStore #
Load a RedisDocumentStore from a Redis Client.
- get_all_document_hashes() Dict[str, str] #
Get the stored hash for all documents.
- get_all_ref_doc_info() Optional[Dict[str, RefDocInfo]] #
Get a mapping of ref_doc_id -> RefDocInfo for all ingested documents.
- get_document(doc_id: str, raise_error: bool = True) Optional[BaseNode] #
Get a document from the store.
- Parameters
doc_id (str) β document id
raise_error (bool) β raise error if doc_id not found
- get_document_hash(doc_id: str) Optional[str] #
Get the stored hash for a document, if it exists.
- get_node(node_id: str, raise_error: bool = True) BaseNode #
Get node from docstore.
- Parameters
node_id (str) β node id
raise_error (bool) β raise error if node_id not found
- get_node_dict(node_id_dict: Dict[int, str]) Dict[int, BaseNode] #
Get node dict from docstore given a mapping of index to node ids.
- Parameters
node_id_dict (Dict[int, str]) β mapping of index to node ids
- get_nodes(node_ids: List[str], raise_error: bool = True) List[BaseNode] #
Get nodes from docstore.
- Parameters
node_ids (List[str]) β node ids
raise_error (bool) β raise error if node_id not found
- get_ref_doc_info(ref_doc_id: str) Optional[RefDocInfo] #
Get the RefDocInfo for a given ref_doc_id.
- persist(persist_path: str = './storage/docstore.json', fs: Optional[AbstractFileSystem] = None) None #
Persist the docstore to a file.
- ref_doc_exists(ref_doc_id: str) bool #
Check if a ref_doc_id has been ingested.
- set_document_hash(doc_id: str, doc_hash: str) None #
Set the hash for a given doc_id.
- set_document_hashes(doc_hashes: Dict[str, str]) None #
Set the hash for a given doc_id.
- class llama_index.storage.docstore.SimpleDocumentStore(simple_kvstore: Optional[SimpleKVStore] = None, namespace: Optional[str] = None, batch_size: int = 1)#
Simple Document (Node) store.
An in-memory store for Document and Node objects.
- Parameters
simple_kvstore (SimpleKVStore) β simple key-value store
namespace (str) β namespace for the docstore
- add_documents(nodes: Sequence[BaseNode], allow_update: bool = True, batch_size: Optional[int] = None, store_text: bool = True) None #
Add a document to the store.
- Parameters
docs (List[BaseDocument]) β documents
allow_update (bool) β allow update of docstore from document
- async adelete_document(doc_id: str, raise_error: bool = True, remove_ref_doc_node: bool = True) None #
Delete a document from the store.
- async adelete_ref_doc(ref_doc_id: str, raise_error: bool = True) None #
Delete a ref_doc and all itβs associated nodes.
- async adocument_exists(doc_id: str) bool #
Check if document exists.
- async aget_all_document_hashes() Dict[str, str] #
Get the stored hash for all documents.
- async aget_all_ref_doc_info() Optional[Dict[str, RefDocInfo]] #
Get a mapping of ref_doc_id -> RefDocInfo for all ingested documents.
- async aget_document(doc_id: str, raise_error: bool = True) Optional[BaseNode] #
Get a document from the store.
- Parameters
doc_id (str) β document id
raise_error (bool) β raise error if doc_id not found
- async aget_document_hash(doc_id: str) Optional[str] #
Get the stored hash for a document, if it exists.
- async aget_node(node_id: str, raise_error: bool = True) BaseNode #
Get node from docstore.
- Parameters
node_id (str) β node id
raise_error (bool) β raise error if node_id not found
- async aget_node_dict(node_id_dict: Dict[int, str]) Dict[int, BaseNode] #
Get node dict from docstore given a mapping of index to node ids.
- Parameters
node_id_dict (Dict[int, str]) β mapping of index to node ids
- async aget_nodes(node_ids: List[str], raise_error: bool = True) List[BaseNode] #
Get nodes from docstore.
- Parameters
node_ids (List[str]) β node ids
raise_error (bool) β raise error if node_id not found
- async aget_ref_doc_info(ref_doc_id: str) Optional[RefDocInfo] #
Get the RefDocInfo for a given ref_doc_id.
- async aref_doc_exists(ref_doc_id: str) bool #
Check if a ref_doc_id has been ingested.
- async aset_document_hash(doc_id: str, doc_hash: str) None #
Set the hash for a given doc_id.
- async aset_document_hashes(doc_hashes: Dict[str, str]) None #
Set the hash for a given doc_id.
- async async_add_documents(nodes: Sequence[BaseNode], allow_update: bool = True, batch_size: Optional[int] = None, store_text: bool = True) None #
Add a document to the store.
- Parameters
docs (List[BaseDocument]) β documents
allow_update (bool) β allow update of docstore from document
- delete_document(doc_id: str, raise_error: bool = True, remove_ref_doc_node: bool = True) None #
Delete a document from the store.
- delete_ref_doc(ref_doc_id: str, raise_error: bool = True) None #
Delete a ref_doc and all itβs associated nodes.
- property docs: Dict[str, BaseNode]#
Get all documents.
- Returns
documents
- Return type
Dict[str, BaseDocument]
- document_exists(doc_id: str) bool #
Check if document exists.
- classmethod from_persist_dir(persist_dir: str = './storage', namespace: Optional[str] = None, fs: Optional[AbstractFileSystem] = None) SimpleDocumentStore #
Create a SimpleDocumentStore from a persist directory.
- Parameters
persist_dir (str) β directory to persist the store
namespace (Optional[str]) β namespace for the docstore
fs (Optional[fsspec.AbstractFileSystem]) β filesystem to use
- classmethod from_persist_path(persist_path: str, namespace: Optional[str] = None, fs: Optional[AbstractFileSystem] = None) SimpleDocumentStore #
Create a SimpleDocumentStore from a persist path.
- Parameters
persist_path (str) β Path to persist the store
namespace (Optional[str]) β namespace for the docstore
fs (Optional[fsspec.AbstractFileSystem]) β filesystem to use
- get_all_document_hashes() Dict[str, str] #
Get the stored hash for all documents.
- get_all_ref_doc_info() Optional[Dict[str, RefDocInfo]] #
Get a mapping of ref_doc_id -> RefDocInfo for all ingested documents.
- get_document(doc_id: str, raise_error: bool = True) Optional[BaseNode] #
Get a document from the store.
- Parameters
doc_id (str) β document id
raise_error (bool) β raise error if doc_id not found
- get_document_hash(doc_id: str) Optional[str] #
Get the stored hash for a document, if it exists.
- get_node(node_id: str, raise_error: bool = True) BaseNode #
Get node from docstore.
- Parameters
node_id (str) β node id
raise_error (bool) β raise error if node_id not found
- get_node_dict(node_id_dict: Dict[int, str]) Dict[int, BaseNode] #
Get node dict from docstore given a mapping of index to node ids.
- Parameters
node_id_dict (Dict[int, str]) β mapping of index to node ids
- get_nodes(node_ids: List[str], raise_error: bool = True) List[BaseNode] #
Get nodes from docstore.
- Parameters
node_ids (List[str]) β node ids
raise_error (bool) β raise error if node_id not found
- get_ref_doc_info(ref_doc_id: str) Optional[RefDocInfo] #
Get the RefDocInfo for a given ref_doc_id.
- persist(persist_path: str = './storage/docstore.json', fs: Optional[AbstractFileSystem] = None) None #
Persist the store.
- ref_doc_exists(ref_doc_id: str) bool #
Check if a ref_doc_id has been ingested.
- set_document_hash(doc_id: str, doc_hash: str) None #
Set the hash for a given doc_id.
- set_document_hashes(doc_hashes: Dict[str, str]) None #
Set the hash for a given doc_id.