Composable Graph Basic#
# NOTE: This is ONLY necessary in jupyter notebook.
# Details: Jupyter runs an event-loop behind the scenes.
# This results in nested event-loops when we start an event-loop to make async queries.
# This is normally not allowed, we use nest_asyncio to allow it for convenience.
import nest_asyncio
nest_asyncio.apply()
import logging
import sys
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
from llama_index import (
VectorStoreIndex,
EmptyIndex,
TreeIndex,
SummaryIndex,
SimpleDirectoryReader,
ServiceContext,
StorageContext,
)
Download Data#
!mkdir -p 'data/paul_graham/'
!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'
Load Datasets#
Load PG’s essay
# load PG's essay
essay_documents = SimpleDirectoryReader("./data/paul_graham/").load_data()
Building the document indices#
Build a vector index for PG’s essay
Also build an empty index (to store prior knowledge)
# configure
service_context = ServiceContext.from_defaults(chunk_size=512)
storage_context = StorageContext.from_defaults()
# build essay index
essay_index = VectorStoreIndex.from_documents(
essay_documents,
service_context=service_context,
storage_context=storage_context,
)
empty_index = EmptyIndex(
service_context=service_context, storage_context=storage_context
)
Query Indices#
See the response of querying each index
query_engine = essay_index.as_query_engine(
similarity_top_k=3,
response_mode="tree_summarize",
)
response = query_engine.query(
"Tell me about what Sam Altman did during his time in YC",
)
print(str(response))
query_engine = empty_index.as_query_engine(response_mode="generation")
response = query_engine.query(
"Tell me about what Sam Altman did during his time in YC",
)
print(str(response))
Define summary for each index.
essay_index_summary = (
"This document describes Paul Graham's life, from early adulthood to the"
" present day."
)
empty_index_summary = "This can be used for general knowledge purposes."
Define Graph (Summary Index as Parent Index)#
This allows us to synthesize responses both using a knowledge corpus as well as prior knowledge.
from llama_index.indices.composability import ComposableGraph
graph = ComposableGraph.from_indices(
SummaryIndex,
[essay_index, empty_index],
index_summaries=[essay_index_summary, empty_index_summary],
service_context=service_context,
storage_context=storage_context,
)
# [optional] persist to disk
storage_context.persist()
root_id = graph.root_id
# [optional] load from disk
from llama_index.indices.loading import load_graph_from_storage
graph = load_graph_from_storage(storage_context, root_id=root_id)
# configure query engines
custom_query_engines = {
essay_index.index_id: essay_index.as_query_engine(
similarity_top_k=3,
response_mode="tree_summarize",
)
}
# set Logging to DEBUG for more detailed outputs
# ask it a question about Sam Altman
query_engine = graph.as_query_engine(custom_query_engines=custom_query_engines)
response = query_engine.query(
"Tell me about what Sam Altman did during his time in YC",
)
print(str(response))
# Get source of response
print(response.get_formatted_sources())
Define Graph (Tree Index as Parent Index)#
This allows us to “route” a query to either a knowledge-augmented index, or to the LLM itself.
from llama_index.indices.composability import ComposableGraph
# configure retriever
custom_query_engines = {
essay_index.index_id: essay_index.as_query_engine(
similarity_top_k=3,
response_mode="tree_summarize",
)
}
graph2 = ComposableGraph.from_indices(
TreeIndex,
[essay_index, empty_index],
index_summaries=[essay_index_summary, empty_index_summary],
)
# set Logging to DEBUG for more detailed outputs
# ask it a question about NYC
query_engine = graph2.as_query_engine(
custom_query_engines=custom_query_engines
)
response = query_engine.query(
"Tell me about what Paul Graham did growing up?",
)
str(response)
print(response.get_formatted_sources())
response = query_engine.query(
"Tell me about Barack Obama",
)
str(response)
response.get_formatted_sources()