import os import gc import tempfile import uuid import logging import streamlit as st from dotenv import load_dotenv from gitingest import ingest from llama_index.core import Settings, PromptTemplate, VectorStoreIndex, SimpleDirectoryReader from llama_index.core.node_parser import MarkdownNodeParser from llama_index.llms.sambanovasystems import SambaNovaCloud from llama_index.embeddings.huggingface import HuggingFaceEmbedding # Load environment variables from .env load_dotenv() # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Custom exception for application errors class GitHubRAGError(Exception): """Custom exception for GitHub RAG application errors""" pass # Fetch API key for SambaNova SAMBANOVA_API_KEY = os.getenv("SAMBANOVA_API_KEY") if not SAMBANOVA_API_KEY: raise ValueError("SAMBANOVA_API_KEY is not set in environment variables") # Initialize Streamlit session state if "id" not in st.session_state: st.session_state.id = uuid.uuid4() st.session_state.file_cache = {} st.session_state.messages = [] session_id = st.session_state.id @st.cache_resource def load_llm(): """ Load and cache the SambaNova LLM predictor """ return SambaNovaCloud( api_key=SAMBANOVA_API_KEY, model="DeepSeek-R1-Distill-Llama-70B", temperature=0.1, top_p=0.1, ) def reset_chat(): """Clear chat history and free resources""" st.session_state.messages = [] gc.collect() def process_with_gitingets(github_url: str): """Use gitingest to fetch and summarize the GitHub repository""" summary, tree, content = ingest(github_url) return summary, tree, content # --- Sidebar: Load Repository --- with st.sidebar: st.header("Add your GitHub repository!") github_url = st.text_input( "GitHub repo URL", placeholder="https://github.com/user/repo" ) load_btn = st.button("Load Repository") if github_url and load_btn: try: repo_name = github_url.rstrip("/").split("/")[-1] cache_key = f"{session_id}-{repo_name}" # Only process if not cached if cache_key not in st.session_state.file_cache: with st.spinner("Processing repository..."): summary, tree, content = process_with_gitingets(github_url) with tempfile.TemporaryDirectory() as tmpdir: md_path = os.path.join(tmpdir, f"{repo_name}.md") with open(md_path, "w", encoding="utf-8") as f: f.write(content) loader = SimpleDirectoryReader(input_dir=tmpdir) docs = loader.load_data() embed_model = HuggingFaceEmbedding( model_name="nomic-ai/nomic-embed-text-v2-moe", trust_remote_code=True, ) Settings.embed_model = embed_model llm_predictor = load_llm() Settings.llm = llm_predictor node_parser = MarkdownNodeParser() index = VectorStoreIndex.from_documents( documents=docs, transformations=[node_parser], show_progress=True, ) qa_prompt = PromptTemplate( "You are an AI assistant specialized in analyzing GitHub repositories.\n" "Repository structure:\n{tree}\n---\n" "Context:\n{context_str}\n---\n" "Question: {query_str}\nAnswer:" ) query_engine = index.as_query_engine(streaming=True) query_engine.update_prompts({ "response_synthesizer:text_qa_template": qa_prompt }) st.session_state.file_cache[cache_key] = (query_engine, tree) st.success("Repository loaded and indexed. Ready to chat!") else: st.info("Repository already loaded.") except Exception as e: st.error(f"Error loading repository: {e}") logger.error(f"Load error: {e}") # --- Main UI: Chat Interface --- col1, col2 = st.columns([6, 1]) with col1: st.header("Chat with GitHub RAG") with col2: st.button("Clear Chat ↺", on_click=reset_chat) # Display chat history for msg in st.session_state.messages: with st.chat_message(msg["role"]): st.markdown(msg["content"]) # Chat input box if prompt := st.chat_input("Ask a question about the repository..."): st.session_state.messages.append({"role": "user", "content": prompt}) with st.chat_message("user"): st.markdown(prompt) repo_name = github_url.rstrip("/").split("/")[-1] cache_key = f"{session_id}-{repo_name}" if cache_key not in st.session_state.file_cache: st.error("Please load a repository first!") else: query_engine, tree = st.session_state.file_cache[cache_key] with st.chat_message("assistant"): placeholder = st.empty() response_text = "" try: response = query_engine.query(prompt) if hasattr(response, 'response_gen'): for chunk in response.response_gen: response_text += chunk placeholder.markdown(response_text + "▌") else: response_text = str(response) placeholder.markdown(response_text) except GitHubRAGError as e: st.error(str(e)) logger.error(f"Error in chat processing: {e}") response_text = "Sorry, I couldn't process that request." except Exception as e: st.error("An unexpected error occurred while processing your query") logger.error(f"Unexpected error in chat: {e}") response_text = "Sorry, something went wrong." placeholder.markdown(response_text) st.session_state.messages.append({"role": "assistant", "content": response_text})