28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145 | class SummaryIndex(BaseIndex[IndexList]):
"""Summary Index.
The summary index is a simple data structure where nodes are stored in
a sequence. During index construction, the document texts are
chunked up, converted to nodes, and stored in a list.
During query time, the summary index iterates through the nodes
with some optional filter parameters, and synthesizes an
answer from all the nodes.
Args:
text_qa_template (Optional[BasePromptTemplate]): A Question-Answer Prompt
(see :ref:`Prompt-Templates`).
NOTE: this is a deprecated field.
show_progress (bool): Whether to show tqdm progress bars. Defaults to False.
"""
index_struct_cls = IndexList
def __init__(
self,
nodes: Optional[Sequence[BaseNode]] = None,
objects: Optional[Sequence[IndexNode]] = None,
index_struct: Optional[IndexList] = None,
show_progress: bool = False,
**kwargs: Any,
) -> None:
"""Initialize params."""
super().__init__(
nodes=nodes,
index_struct=index_struct,
show_progress=show_progress,
objects=objects,
**kwargs,
)
def as_retriever(
self,
retriever_mode: Union[str, ListRetrieverMode] = ListRetrieverMode.DEFAULT,
llm: Optional[LLM] = None,
embed_model: Optional[BaseEmbedding] = None,
**kwargs: Any,
) -> BaseRetriever:
from llama_index.core.indices.list.retrievers import (
SummaryIndexEmbeddingRetriever,
SummaryIndexLLMRetriever,
SummaryIndexRetriever,
)
if retriever_mode == ListRetrieverMode.DEFAULT:
return SummaryIndexRetriever(self, object_map=self._object_map, **kwargs)
elif retriever_mode == ListRetrieverMode.EMBEDDING:
embed_model = embed_model or Settings.embed_model
return SummaryIndexEmbeddingRetriever(
self, object_map=self._object_map, embed_model=embed_model, **kwargs
)
elif retriever_mode == ListRetrieverMode.LLM:
llm = llm or Settings.llm
return SummaryIndexLLMRetriever(
self, object_map=self._object_map, llm=llm, **kwargs
)
else:
raise ValueError(f"Unknown retriever mode: {retriever_mode}")
def _build_index_from_nodes(
self,
nodes: Sequence[BaseNode],
show_progress: bool = False,
**build_kwargs: Any,
) -> IndexList:
"""Build the index from documents.
Args:
documents (List[BaseDocument]): A list of documents.
Returns:
IndexList: The created summary index.
"""
index_struct = IndexList()
nodes_with_progress = get_tqdm_iterable(
nodes, show_progress, "Processing nodes"
)
for n in nodes_with_progress:
index_struct.add_node(n)
return index_struct
def _insert(self, nodes: Sequence[BaseNode], **insert_kwargs: Any) -> None:
"""Insert a document."""
for n in nodes:
self._index_struct.add_node(n)
def _delete_node(self, node_id: str, **delete_kwargs: Any) -> None:
"""Delete a node."""
cur_node_ids = self._index_struct.nodes
cur_nodes = self._docstore.get_nodes(cur_node_ids)
nodes_to_keep = [n for n in cur_nodes if n.node_id != node_id]
self._index_struct.nodes = [n.node_id for n in nodes_to_keep]
@property
def ref_doc_info(self) -> Dict[str, RefDocInfo]:
"""Retrieve a dict mapping of ingested documents and their nodes+metadata."""
node_doc_ids = self._index_struct.nodes
nodes = self.docstore.get_nodes(node_doc_ids)
all_ref_doc_info = {}
for node in nodes:
ref_node = node.source_node
if not ref_node:
continue
ref_doc_info = self.docstore.get_ref_doc_info(ref_node.node_id)
if not ref_doc_info:
continue
all_ref_doc_info[ref_node.node_id] = ref_doc_info
return all_ref_doc_info
|