Github Repo Reader#
If you’re opening this Notebook on colab, you will probably need to install LlamaIndex 🦙.
%pip install llama-index-readers-github
!pip install llama-index
# This is due to the fact that we use asyncio.loop_until_complete in
# the DiscordReader. Since the Jupyter kernel itself runs on
# an event loop, we need to add some help with nesting
!pip install nest_asyncio httpx
import nest_asyncio
nest_asyncio.apply()
%env OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
from llama_index.core import VectorStoreIndex
from llama_index.readers.github import GithubRepositoryReader
from IPython.display import Markdown, display
import os
%env GITHUB_TOKEN=github_pat_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
github_token = os.environ.get("GITHUB_TOKEN")
owner = "jerryjliu"
repo = "llama_index"
branch = "main"
documents = GithubRepositoryReader(
github_token=github_token,
owner=owner,
repo=repo,
use_parser=False,
verbose=False,
ignore_directories=["examples"],
).load_data(branch=branch)
index = VectorStoreIndex.from_documents(documents)
# import time
# for document in documents:
# print(document.metadata)
# time.sleep(.25)
query_engine = index.as_query_engine()
response = query_engine.query(
"What is the difference between VectorStoreIndex and SummaryIndex?",
verbose=True,
)
display(Markdown(f"<b>{response}</b>"))