Spaces:

GuhanAein
/

program-solver-rag

Sleeping

App Files Files Community

GuhanAein commited on Mar 23

Commit

2e97b24

verified ·

1 Parent(s): c3e7791

Update main.py

Browse files

Files changed (1) hide show

main.py +70 -0

main.py CHANGED Viewed

	@@ -0,0 +1,70 @@

+from fastapi import FastAPI
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
+import torch
+from datasets import load_dataset
+from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.vector_stores.faiss import FaissVectorStore
+import faiss
+import os
+from huggingface_hub import login
+app = FastAPI()
+# Log in to Hugging Face using environment variable
+hf_token = os.getenv("HF_TOKEN")
+if not hf_token:
+    raise ValueError("HF_TOKEN environment variable not set")
+login(hf_token)
+# Load Dataset and Prepare Knowledge Base
+ds = load_dataset("codeparrot/apps", "all", split="train")
+os.makedirs("knowledge_base", exist_ok=True)
+for i, example in enumerate(ds.select(range(100))):  # Reduced to 100 for free tier
+    solution = example['solutions'][0] if example['solutions'] else "No solution available"
+    with open(f"knowledge_base/doc_{i}.txt", "w", encoding="utf-8") as f:
+        f.write(f"### Problem\n{example['question']}\n\n### Solution\n{solution}")
+documents = SimpleDirectoryReader("knowledge_base").load_data()
+# Setup RAG
+embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
+Settings.embed_model = embed_model
+d = 384
+faiss_index = faiss.IndexFlatL2(d)
+vector_store = FaissVectorStore(faiss_index=faiss_index)
+index = VectorStoreIndex.from_documents(documents, vector_store=vector_store)
+# Load LLaMA Model
+model_name = "meta-llama/Llama-3.2-1B-Instruct"
+quant_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_compute_dtype=torch.float16,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_use_double_quant=True
+)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    quantization_config=quant_config,
+    device_map="auto" if device == "cuda" else None
+)
+if tokenizer.pad_token is None:
+    tokenizer.pad_token = tokenizer.eos_token
+@app.get("/solve")
+async def solve_problem(problem: str, top_k: int = 1):
+    retriever = index.as_retriever(similarity_top_k=top_k)
+    retrieved_nodes = retriever.retrieve(problem)
+    context = retrieved_nodes[0].text if retrieved_nodes else "No relevant context found."
+    prompt = f"Given the following competitive programming problem:\n\n{problem}\n\nRelevant context:\n{context}\n\nGenerate a solution in Python:"
+    inputs = tokenizer(prompt, return_tensors="pt").to(device)
+    outputs = model.generate(
+        **inputs,
+        max_new_tokens=200,
+        temperature=0.7,
+        top_p=0.9,
+        do_sample=True
+    )
+    solution = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return {"solution": solution, "context": context}