Gradient Base Model#
If you’re opening this Notebook on colab, you will probably need to install LlamaIndex 🦙.
%pip install llama-index-embeddings-langchain
%pip install llama-index-llms-gradient
!pip install llama-index
%pip install llama-index --quiet
%pip install gradientai --quiet
import os
os.environ["GRADIENT_ACCESS_TOKEN"] = "{GRADIENT_ACCESS_TOKEN}"
os.environ["GRADIENT_WORKSPACE_ID"] = "{GRADIENT_WORKSPACE_ID}"
Flow 1: Query Gradient LLM directly#
from llama_index.llms.gradient import GradientBaseModelLLM
llm = GradientBaseModelLLM(
base_model_slug="llama2-7b-chat",
max_tokens=400,
)
result = llm.complete("Can you tell me about large language models?")
print(result)
Flow 2: Retrieval Augmented Generation (RAG) with Gradient LLM#
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.embeddings.langchain import LangchainEmbedding
from langchain.embeddings import HuggingFaceEmbeddings
from llama_index.core.node_parser import SentenceSplitter
Download Data#
!mkdir -p 'data/paul_graham/'
!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'
Load Documents#
documents = SimpleDirectoryReader("./data/paul_graham/").load_data()
Configure Gradient LLM#
embed_model = LangchainEmbedding(HuggingFaceEmbeddings())
splitter = SentenceSplitter(chunk_size=1024)
Setup and Query Index#
index = VectorStoreIndex.from_documents(
documents, transformations=[splitter], embed_model=embed_model
)
query_engine = index.as_query_engine(llm=llm)
response = query_engine.query(
"What did the author do after his time at Y Combinator?"
)
print(response)