Qdrant Vector Store¶
Creating a Qdrant client¶
In [ ]:
Copied!
%pip install llama-index-vector-stores-qdrant llama-index-readers-file llama-index-embeddings-fastembed llama-index-llms-openai
%pip install llama-index-vector-stores-qdrant llama-index-readers-file llama-index-embeddings-fastembed llama-index-llms-openai
In [ ]:
Copied!
import logging
import sys
import os
import qdrant_client
from IPython.display import Markdown, display
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core import StorageContext
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.embeddings.fastembed import FastEmbedEmbedding
from llama_index.core import Settings
Settings.embed_model = FastEmbedEmbedding(model_name="BAAI/bge-base-en-v1.5")
import logging
import sys
import os
import qdrant_client
from IPython.display import Markdown, display
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core import StorageContext
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.embeddings.fastembed import FastEmbedEmbedding
from llama_index.core import Settings
Settings.embed_model = FastEmbedEmbedding(model_name="BAAI/bge-base-en-v1.5")
If running for the first, time, install the dependencies using:
!pip install -U qdrant_client fastembed
Set your OpenAI key for authenticating the LLM
In [ ]:
Copied!
%env OPENAI_API_KEY=<YOUR_OPENAI_API_KEY>
%env OPENAI_API_KEY=
In [ ]:
Copied!
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
Download Data
In [ ]:
Copied!
!mkdir -p 'data/paul_graham/'
!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'
!mkdir -p 'data/paul_graham/'
!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'
Load the documents¶
In [ ]:
Copied!
# load documents
documents = SimpleDirectoryReader("./data/paul_graham/").load_data()
# load documents
documents = SimpleDirectoryReader("./data/paul_graham/").load_data()
Build the VectorStoreIndex¶
In [ ]:
Copied!
client = qdrant_client.QdrantClient(
# you can use :memory: mode for fast and light-weight experiments,
# it does not require to have Qdrant deployed anywhere
# but requires qdrant-client >= 1.1.1
location=":memory:"
# otherwise set Qdrant instance address with:
# uri="http://<host>:<port>"
# set API KEY for Qdrant Cloud
# api_key="<qdrant-api-key>",
)
client = qdrant_client.QdrantClient(
# you can use :memory: mode for fast and light-weight experiments,
# it does not require to have Qdrant deployed anywhere
# but requires qdrant-client >= 1.1.1
location=":memory:"
# otherwise set Qdrant instance address with:
# uri="http://:"
# set API KEY for Qdrant Cloud
# api_key="",
)
In [ ]:
Copied!
vector_store = QdrantVectorStore(client=client, collection_name="paul_graham")
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
documents,
storage_context=storage_context,
)
vector_store = QdrantVectorStore(client=client, collection_name="paul_graham")
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
documents,
storage_context=storage_context,
)
Query Index¶
In [ ]:
Copied!
# set Logging to DEBUG for more detailed outputs
query_engine = index.as_query_engine()
response = query_engine.query("What did the author do growing up?")
# set Logging to DEBUG for more detailed outputs
query_engine = index.as_query_engine()
response = query_engine.query("What did the author do growing up?")
In [ ]:
Copied!
display(Markdown(f"<b>{response}</b>"))
display(Markdown(f"{response}"))
The author worked on writing and programming before college.
In [ ]:
Copied!
# set Logging to DEBUG for more detailed outputs
query_engine = index.as_query_engine()
response = query_engine.query(
"What did the author do after his time at Viaweb?"
)
# set Logging to DEBUG for more detailed outputs
query_engine = index.as_query_engine()
response = query_engine.query(
"What did the author do after his time at Viaweb?"
)
In [ ]:
Copied!
display(Markdown(f"<b>{response}</b>"))
display(Markdown(f"{response}"))
The author arranged to do freelance work for a group that did projects for customers after his time at Viaweb.
Build the VectorStoreIndex asynchronously¶
In [ ]:
Copied!
# To connect to the same event-loop,
# allows async events to run on notebook
import nest_asyncio
nest_asyncio.apply()
# To connect to the same event-loop,
# allows async events to run on notebook
import nest_asyncio
nest_asyncio.apply()
In [ ]:
Copied!
aclient = qdrant_client.AsyncQdrantClient(
# you can use :memory: mode for fast and light-weight experiments,
# it does not require to have Qdrant deployed anywhere
# but requires qdrant-client >= 1.1.1
location=":memory:"
# otherwise set Qdrant instance address with:
# uri="http://<host>:<port>"
# set API KEY for Qdrant Cloud
# api_key="<qdrant-api-key>",
)
aclient = qdrant_client.AsyncQdrantClient(
# you can use :memory: mode for fast and light-weight experiments,
# it does not require to have Qdrant deployed anywhere
# but requires qdrant-client >= 1.1.1
location=":memory:"
# otherwise set Qdrant instance address with:
# uri="http://:"
# set API KEY for Qdrant Cloud
# api_key="",
)
In [ ]:
Copied!
vector_store = QdrantVectorStore(
aclient=aclient, collection_name="paul_graham", prefer_grpc=True
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
documents,
storage_context=storage_context,
use_async=True,
)
vector_store = QdrantVectorStore(
aclient=aclient, collection_name="paul_graham", prefer_grpc=True
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
documents,
storage_context=storage_context,
use_async=True,
)
Async Query Index¶
In [ ]:
Copied!
query_engine = index.as_query_engine(use_async=True)
response = await query_engine.aquery("What did the author do growing up?")
query_engine = index.as_query_engine(use_async=True)
response = await query_engine.aquery("What did the author do growing up?")
In [ ]:
Copied!
display(Markdown(f"<b>{response}</b>"))
display(Markdown(f"{response}"))
The author worked on writing short stories and programming, particularly on an IBM 1401 computer in 9th grade using an early version of Fortran. Later, the author transitioned to working on microcomputers, starting with a TRS-80 in about 1980, where they wrote simple games, programs, and a word processor.
In [ ]:
Copied!
# set Logging to DEBUG for more detailed outputs
query_engine = index.as_query_engine(use_async=True)
response = await query_engine.aquery(
"What did the author do after his time at Viaweb?"
)
# set Logging to DEBUG for more detailed outputs
query_engine = index.as_query_engine(use_async=True)
response = await query_engine.aquery(
"What did the author do after his time at Viaweb?"
)
In [ ]:
Copied!
display(Markdown(f"<b>{response}</b>"))
display(Markdown(f"{response}"))
The author went on to co-found Y Combinator after his time at Viaweb.