Spaces:

sunbal7
/

AIPaperPilot

Sleeping

App Files Files Community

sunbal7 commited on Feb 23

Commit

2ced9a6

verified ·

1 Parent(s): aae1639

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -21

app.py CHANGED Viewed

@@ -9,9 +9,18 @@ import PyPDF2
 # Model Setup
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model_path = "ibm-granite/granite-3.1-1b-a400m-instruct"
 tokenizer = AutoTokenizer.from_pretrained(model_path)
-model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto")
 model.eval()
 # Embedding Model for FAISS
@@ -23,14 +32,12 @@ index = faiss.IndexFlatL2(dimension)
 docs = []  # Store document texts
 summary = ""  # Store book summary
 # Function to extract text from PDF
 def extract_text_from_pdf(uploaded_file):
     reader = PyPDF2.PdfReader(uploaded_file)
     text = "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
     return text
 # Function to process uploaded documents and generate summary
 def process_documents(files):
     global docs, index, summary
@@ -41,7 +48,6 @@ def process_documents(files):
             text = extract_text_from_pdf(file)
         else:
             text = file.getvalue().decode("utf-8")
         docs.append(text)
     embeddings = embedding_model.encode(docs)
@@ -50,22 +56,18 @@ def process_documents(files):
     # Generate summary after processing documents
     summary = generate_summary("\n".join(docs))
 # Function to generate a book summary
 def generate_summary(text):
     chat = [
         {"role": "system", "content": "You are a helpful AI that summarizes books."},
-        {"role": "user", "content": f"Summarize this book in a short paragraph:\n{text[:4000]}"}  # Limiting input size
     ]
     chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
     input_tokens = tokenizer(chat, return_tensors="pt").to(device)
     output = model.generate(**input_tokens, max_new_tokens=300)
     return tokenizer.batch_decode(output, skip_special_tokens=True)[0]
-# Function to retrieve relevant context
 def retrieve_context(query):
     if index.ntotal == 0:
         return "No documents available. Please upload files first."
@@ -75,36 +77,30 @@ def retrieve_context(query):
     if len(indices) == 0 or indices[0][0] >= len(docs):
         return "No relevant context found."
     return docs[indices[0][0]]
-# Function to generate response using IBM Granite
 def generate_response(query, context):
     chat = [
         {"role": "system", "content": "You are a helpful assistant using retrieved knowledge."},
         {"role": "user", "content": f"Context: {context}\nQuestion: {query}\nAnswer based on context:"},
     ]
     chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
     input_tokens = tokenizer(chat, return_tensors="pt").to(device)
     output = model.generate(**input_tokens, max_new_tokens=200)
     return tokenizer.batch_decode(output, skip_special_tokens=True)[0]
 # Streamlit UI
 st.set_page_config(page_title="📖 AI Book Assistant", page_icon="📚")
 st.title("📖 AI-Powered Book Assistant")
 st.subheader("Upload a book and get its summary or ask questions!")
-uploaded_files = st.file_uploader("Upload a book (PDF or TXT)", accept_multiple_files=False)
-if uploaded_files:
     with st.spinner("Processing book and generating summary..."):
-        process_documents([uploaded_files])
     st.success("Book uploaded and processed!")
     st.markdown("### 📚 Book Summary:")
     st.write(summary)

 # Model Setup
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model_path = "ibm-granite/granite-3.1-1b-a400m-instruct"
 tokenizer = AutoTokenizer.from_pretrained(model_path)
+# Load the model with a conditional to avoid meta tensor issues on CPU vs GPU
+if device == "cpu":
+    model = AutoModelForCausalLM.from_pretrained(model_path)
+else:
+    model = AutoModelForCausalLM.from_pretrained(
+        model_path,
+        device_map="auto",
+        low_cpu_mem_usage=True,
+        torch_dtype=torch.float16,
+    )
 model.eval()
 # Embedding Model for FAISS
 docs = []  # Store document texts
 summary = ""  # Store book summary
 # Function to extract text from PDF
 def extract_text_from_pdf(uploaded_file):
     reader = PyPDF2.PdfReader(uploaded_file)
     text = "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
     return text
 # Function to process uploaded documents and generate summary
 def process_documents(files):
     global docs, index, summary
             text = extract_text_from_pdf(file)
         else:
             text = file.getvalue().decode("utf-8")
         docs.append(text)
     embeddings = embedding_model.encode(docs)
     # Generate summary after processing documents
     summary = generate_summary("\n".join(docs))
 # Function to generate a book summary
 def generate_summary(text):
     chat = [
         {"role": "system", "content": "You are a helpful AI that summarizes books."},
+        {"role": "user", "content": f"Summarize this book in a short paragraph:\n{text[:4000]}"}  # Limiting input size for summarization
     ]
     chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
     input_tokens = tokenizer(chat, return_tensors="pt").to(device)
     output = model.generate(**input_tokens, max_new_tokens=300)
     return tokenizer.batch_decode(output, skip_special_tokens=True)[0]
+# Function to retrieve relevant context using FAISS
 def retrieve_context(query):
     if index.ntotal == 0:
         return "No documents available. Please upload files first."
     if len(indices) == 0 or indices[0][0] >= len(docs):
         return "No relevant context found."
     return docs[indices[0][0]]
+# Function to generate response using IBM Granite model
 def generate_response(query, context):
     chat = [
         {"role": "system", "content": "You are a helpful assistant using retrieved knowledge."},
         {"role": "user", "content": f"Context: {context}\nQuestion: {query}\nAnswer based on context:"},
     ]
     chat = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
     input_tokens = tokenizer(chat, return_tensors="pt").to(device)
     output = model.generate(**input_tokens, max_new_tokens=200)
     return tokenizer.batch_decode(output, skip_special_tokens=True)[0]
 # Streamlit UI
 st.set_page_config(page_title="📖 AI Book Assistant", page_icon="📚")
 st.title("📖 AI-Powered Book Assistant")
 st.subheader("Upload a book and get its summary or ask questions!")
+uploaded_file = st.file_uploader("Upload a book (PDF or TXT)", accept_multiple_files=False)
+if uploaded_file:
     with st.spinner("Processing book and generating summary..."):
+        process_documents([uploaded_file])
     st.success("Book uploaded and processed!")
     st.markdown("### 📚 Book Summary:")
     st.write(summary)