Spaces:

TabasumDev
/

GraniteByte

Sleeping

App Files Files Community

TabasumDev commited on Feb 22

Commit

d1b0bff

verified ·

1 Parent(s): d627145

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -48

app.py CHANGED Viewed

@@ -444,15 +444,18 @@
 # 🔥 Run Streamlit App
 # if __name__ == '__main__':
 #     main()
 import streamlit as st
 import os
 import re
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
-from PyPDF2 import PdfReader
 from peft import get_peft_model, LoraConfig, TaskType
-# ✅ Force CPU execution for Hugging Face Spaces
 device = torch.device("cpu")
 # 🔹 Load IBM Granite Model (CPU-Compatible)
@@ -460,8 +463,8 @@ MODEL_NAME = "ibm-granite/granite-3.1-2b-instruct"
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
-    device_map="cpu",
-    torch_dtype=torch.float32
 )
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
@@ -480,35 +483,18 @@ model.eval()
 # 🛠 Function to Read & Extract Text from PDFs
 def read_files(uploaded_file):
-    try:
-        # 🔥 Step 1: Save file to disk first
-        temp_pdf_path = "temp_uploaded_file.pdf"
-        with open(temp_pdf_path, "wb") as f:
-            f.write(uploaded_file.getbuffer())  # Save the file
-        # 🔥 Step 2: Open the saved file and extract text
-        st.write("📂 Processing saved PDF file...")  # Debugging
-        file_context = ""
-        reader = PdfReader(temp_pdf_path)
-        for page in reader.pages:
             text = page.extract_text()
             if text:
                 file_context += text + "\n"
-        # 🔥 Step 3: Delete the temp file after reading
-        os.remove(temp_pdf_path)
-        if not file_context.strip():
-            st.error("⚠️ No text found. The document might be scanned or encrypted.")
-            return ""
-        st.write(f"✅ Extracted {len(file_context)} characters.")  # Debugging
-        return file_context.strip()
-    except Exception as e:
-        st.error(f"⚠️ Error reading PDF: {e}")
-        return ""
 # 🛠 Function to Format AI Prompts
 def format_prompt(system_msg, user_msg, file_context=""):
@@ -538,25 +524,18 @@ def generate_response(input_text, max_tokens=1000, top_p=0.9, temperature=0.7):
 # 🛠 Function to Clean AI Output
 def post_process(text):
-    cleaned = re.sub(r'戥+', '', text)
     lines = cleaned.splitlines()
     unique_lines = list(dict.fromkeys([line.strip() for line in lines if line.strip()]))
     return "\n".join(unique_lines)
 # 🛠 Function to Handle RAG with IBM Granite & Streamlit
 def granite_simple(prompt, file):
-    if not file:
-        st.error("⚠️ No file detected. Please upload a document.")
-        return ""
-    file_context = read_files(file)
-    if not file_context:
-        st.error("⚠️ No valid text extracted from the document.")
-        return ""
     system_message = "You are IBM Granite, a legal AI assistant specializing in contract analysis."
-    messages = format_prompt(system_message, prompt, file_context)
     input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     response = generate_response(input_text)
@@ -580,8 +559,19 @@ def main():
     uploaded_file = st.file_uploader("📂 Upload a contract document (PDF)", type="pdf")
     if uploaded_file:
-        st.success(f"✅ File uploaded: {uploaded_file.name}")
-        st.write(f"📏 File Size: {uploaded_file.size / 1024:.2f} KB")
         if st.button("🔍 Analyze Document"):
             with st.spinner("Analyzing contract document... ⏳"):
@@ -590,11 +580,9 @@ def main():
                     uploaded_file
                 )
-            if final_answer:
-                st.subheader("📑 Analysis Result")
-                st.write(final_answer)
-            else:
-                st.error("⚠️ No response generated. Please check your input.")
 # 🔥 Run Streamlit App
 if __name__ == '__main__':
@@ -605,6 +593,7 @@ if __name__ == '__main__':
 # import streamlit as st
 # from PyPDF2 import PdfReader

 # 🔥 Run Streamlit App
 # if __name__ == '__main__':
 #     main()
 import streamlit as st
 import os
 import re
 import torch
+import pdfplumber
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from peft import get_peft_model, LoraConfig, TaskType
+# ✅ Force CPU execution
 device = torch.device("cpu")
 # 🔹 Load IBM Granite Model (CPU-Compatible)
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
+    device_map="cpu",  # Force CPU execution
+    torch_dtype=torch.float32
 )
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 # 🛠 Function to Read & Extract Text from PDFs
 def read_files(uploaded_file):
+    file_context = ""
+    with pdfplumber.open(uploaded_file) as pdf:
+        for page in pdf.pages:
             text = page.extract_text()
             if text:
                 file_context += text + "\n"
+    if not file_context.strip():
+        st.error("⚠️ No text extracted. This document may be scanned or encrypted.")
+    return file_context.strip()
 # 🛠 Function to Format AI Prompts
 def format_prompt(system_msg, user_msg, file_context=""):
 # 🛠 Function to Clean AI Output
 def post_process(text):
+    cleaned = re.sub(r'戥+', '', text)  # Remove unwanted symbols
     lines = cleaned.splitlines()
     unique_lines = list(dict.fromkeys([line.strip() for line in lines if line.strip()]))
     return "\n".join(unique_lines)
 # 🛠 Function to Handle RAG with IBM Granite & Streamlit
 def granite_simple(prompt, file):
+    file_context = read_files(file) if file else ""
     system_message = "You are IBM Granite, a legal AI assistant specializing in contract analysis."
+    messages = format_prompt(system_message, prompt, file_context)
     input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     response = generate_response(input_text)
     uploaded_file = st.file_uploader("📂 Upload a contract document (PDF)", type="pdf")
     if uploaded_file:
+        # ✅ Debugging: Show file info
+        st.success(f"✅ File uploaded: {uploaded_file.name}, Size: {uploaded_file.size / 1024:.2f} KB")
+        # ✅ Extract and preview text
+        extracted_text = read_files(uploaded_file)
+        if extracted_text:
+            st.write("📜 Extracted Text Preview:")
+            st.text_area("Extracted Text", extracted_text[:2000], height=200)  # Show first 2000 chars
+        st.write("Click the button below to analyze the contract.")
+        # Force button to always render
+        st.markdown('<style>div.stButton > button {display: block; width: 100%;}</style>', unsafe_allow_html=True)
         if st.button("🔍 Analyze Document"):
             with st.spinner("Analyzing contract document... ⏳"):
                     uploaded_file
                 )
+            # 🔹 Display Analysis Result
+            st.subheader("📑 Analysis Result")
+            st.write(final_answer)
 # 🔥 Run Streamlit App
 if __name__ == '__main__':
 # import streamlit as st
 # from PyPDF2 import PdfReader