mehakkhan commited on
Commit
12d44b9
·
verified ·
1 Parent(s): 6535dc9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -23
app.py CHANGED
@@ -7,7 +7,6 @@ from langchain_groq import ChatGroq
7
  from langchain.text_splitter import RecursiveCharacterTextSplitter
8
  from langchain.chains import RetrievalQA
9
  from langchain.docstore.document import Document
10
- from tempfile import NamedTemporaryFile
11
  import nbformat
12
 
13
  # Load Groq API Key securely
@@ -15,39 +14,42 @@ os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
15
 
16
  # Helper: Read .ipynb file and extract text
17
  def load_ipynb(file):
18
- with NamedTemporaryFile(delete=False, suffix=".ipynb") as temp_file:
19
- temp_file.write(file.read())
20
- temp_file.flush()
21
- with open(temp_file.name, "r", encoding="utf-8") as f:
22
  nb = nbformat.read(f, as_version=nbformat.NO_CONVERT)
23
  text = ""
24
  for cell in nb.cells:
25
  if cell.cell_type in ["markdown", "code"]:
26
  text += cell.source + "\n\n"
27
- return [Document(page_content=text)]
 
 
 
28
 
29
  # Helper: Read PDF or IPYNB and build retriever chain
30
  def process_files(files):
31
- all_docs = []
32
- for file in files:
33
- if file.name.endswith(".pdf"):
34
- with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
35
- temp_file.write(file.read())
36
- loader = PyPDFLoader(temp_file.name)
37
  all_docs.extend(loader.load())
38
- elif file.name.endswith(".ipynb"):
39
- all_docs.extend(load_ipynb(file))
40
 
41
- splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
42
- chunks = splitter.split_documents(all_docs)
43
 
44
- embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
45
- vectorstore = FAISS.from_documents(chunks, embeddings)
46
- retriever = vectorstore.as_retriever()
47
 
48
- llm = ChatGroq(model_name="llama3-70b-8192", temperature=0)
49
- qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
50
- return qa_chain
 
 
 
51
 
52
  # Global chain
53
  qa_chain = None
@@ -55,12 +57,17 @@ qa_chain = None
55
  def upload_docs(files):
56
  global qa_chain
57
  qa_chain = process_files(files)
 
 
58
  return "✅ PDFs or Notebooks uploaded and processed. Now ask your questions."
59
 
60
  def ask_question(query):
61
  if qa_chain is None:
62
  return "❌ Please upload PDFs or Kaggle Notebooks first."
63
- return qa_chain.run(query)
 
 
 
64
 
65
  # Gradio UI
66
  with gr.Blocks() as app:
 
7
  from langchain.text_splitter import RecursiveCharacterTextSplitter
8
  from langchain.chains import RetrievalQA
9
  from langchain.docstore.document import Document
 
10
  import nbformat
11
 
12
  # Load Groq API Key securely
 
14
 
15
  # Helper: Read .ipynb file and extract text
16
  def load_ipynb(file):
17
+ try:
18
+ with open(file.name, "r", encoding="utf-8") as f:
 
 
19
  nb = nbformat.read(f, as_version=nbformat.NO_CONVERT)
20
  text = ""
21
  for cell in nb.cells:
22
  if cell.cell_type in ["markdown", "code"]:
23
  text += cell.source + "\n\n"
24
+ return [Document(page_content=text)]
25
+ except Exception as e:
26
+ print("Error loading .ipynb:", e)
27
+ return []
28
 
29
  # Helper: Read PDF or IPYNB and build retriever chain
30
  def process_files(files):
31
+ try:
32
+ all_docs = []
33
+ for file in files:
34
+ if file.name.endswith(".pdf"):
35
+ loader = PyPDFLoader(file.name)
 
36
  all_docs.extend(loader.load())
37
+ elif file.name.endswith(".ipynb"):
38
+ all_docs.extend(load_ipynb(file))
39
 
40
+ splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
41
+ chunks = splitter.split_documents(all_docs)
42
 
43
+ embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
44
+ vectorstore = FAISS.from_documents(chunks, embeddings)
45
+ retriever = vectorstore.as_retriever()
46
 
47
+ llm = ChatGroq(model_name="llama3-70b-8192", temperature=0)
48
+ qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
49
+ return qa_chain
50
+ except Exception as e:
51
+ print("Error in processing files:", e)
52
+ return None
53
 
54
  # Global chain
55
  qa_chain = None
 
57
  def upload_docs(files):
58
  global qa_chain
59
  qa_chain = process_files(files)
60
+ if qa_chain is None:
61
+ return "❌ Error processing files. Please make sure the file format is correct."
62
  return "✅ PDFs or Notebooks uploaded and processed. Now ask your questions."
63
 
64
  def ask_question(query):
65
  if qa_chain is None:
66
  return "❌ Please upload PDFs or Kaggle Notebooks first."
67
+ try:
68
+ return qa_chain.run(query)
69
+ except Exception as e:
70
+ return f"⚠ Error answering question: {e}"
71
 
72
  # Gradio UI
73
  with gr.Blocks() as app: