Github Repo Reader#

If you’re opening this Notebook on colab, you will probably need to install LlamaIndex 🦙.

%pip install llama-index-readers-github

!pip install llama-index

# This is due to the fact that we use asyncio.loop_until_complete in
# the DiscordReader. Since the Jupyter kernel itself runs on
# an event loop, we need to add some help with nesting
!pip install nest_asyncio httpx
import nest_asyncio

nest_asyncio.apply()

%env OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
from llama_index.core import VectorStoreIndex
from llama_index.readers.github import GithubRepositoryReader
from IPython.display import Markdown, display
import os

%env GITHUB_TOKEN=github_pat_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
github_token = os.environ.get("GITHUB_TOKEN")
owner = "jerryjliu"
repo = "llama_index"
branch = "main"

documents = GithubRepositoryReader(
    github_token=github_token,
    owner=owner,
    repo=repo,
    use_parser=False,
    verbose=False,
    ignore_directories=["examples"],
).load_data(branch=branch)

index = VectorStoreIndex.from_documents(documents)

# import time
# for document in documents:
#     print(document.metadata)
#     time.sleep(.25)
query_engine = index.as_query_engine()
response = query_engine.query(
    "What is the difference between VectorStoreIndex and SummaryIndex?",
    verbose=True,
)

display(Markdown(f"<b>{response}</b>"))