Spaces:

TabasumDev
/

GraniteByte

Sleeping

App Files Files Community

TabasumDev commited on Feb 22

Commit

2952b2a

verified ·

1 Parent(s): a493d1c

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -35

app.py CHANGED Viewed

@@ -445,8 +445,6 @@
 # if __name__ == '__main__':
 #     main()
 import streamlit as st
 import os
 import re
@@ -455,7 +453,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 from PyPDF2 import PdfReader
 from peft import get_peft_model, LoraConfig, TaskType
-# ✅ Force CPU execution for Streamlit Cloud
 device = torch.device("cpu")
 # 🔹 Load IBM Granite Model (CPU-Compatible)
@@ -463,8 +461,8 @@ MODEL_NAME = "ibm-granite/granite-3.1-2b-instruct"
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
-    device_map="cpu",  # Force CPU execution
-    torch_dtype=torch.float32  # Use float32 since Streamlit runs on CPU
 )
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
@@ -481,17 +479,27 @@ lora_config = LoraConfig(
 model = get_peft_model(model, lora_config)
 model.eval()
-# 🛠 Function to Read & Extract Text from PDFs
 def read_files(file):
     file_context = ""
-    reader = PdfReader(file)
-    for page in reader.pages:
-        text = page.extract_text()
-        if text:
-            file_context += text + "\n"
-    return file_context.strip()
 # 🛠 Function to Format AI Prompts
 def format_prompt(system_msg, user_msg, file_context=""):
@@ -521,18 +529,25 @@ def generate_response(input_text, max_tokens=1000, top_p=0.9, temperature=0.7):
 # 🛠 Function to Clean AI Output
 def post_process(text):
-    cleaned = re.sub(r'戥+', '', text)  # Remove unwanted symbols
     lines = cleaned.splitlines()
     unique_lines = list(dict.fromkeys([line.strip() for line in lines if line.strip()]))
     return "\n".join(unique_lines)
 # 🛠 Function to Handle RAG with IBM Granite & Streamlit
 def granite_simple(prompt, file):
-    file_context = read_files(file) if file else ""
     system_message = "You are IBM Granite, a legal AI assistant specializing in contract analysis."
     messages = format_prompt(system_message, prompt, file_context)
     input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     response = generate_response(input_text)
@@ -540,7 +555,7 @@ def granite_simple(prompt, file):
 # 🔹 Streamlit UI
 def main():
-    st.set_page_config(page_title="Contract Analysis AI", page_icon="📜", layout="wide")
     st.title("📜 AI-Powered Contract Analysis Tool")
     st.write("Upload a contract document (PDF) for a detailed AI-driven legal and technical analysis.")
@@ -555,26 +570,22 @@ def main():
     # 🔹 File Upload Section
     uploaded_file = st.file_uploader("📂 Upload a contract document (PDF)", type="pdf")
-    if uploaded_file is not None:
-        temp_file_path = "temp_uploaded_contract.pdf"
-        with open(temp_file_path, "wb") as f:
-            f.write(uploaded_file.getbuffer())
-        st.success("✅ File uploaded successfully!")
-        # 🔹 User Input for Analysis
-        user_prompt = "Perform a detailed technical analysis of the attached contract document, highlighting potential risks, legal pitfalls, compliance issues, and areas where contractual terms may lead to future disputes or operational challenges."
         if st.button("🔍 Analyze Document"):
             with st.spinner("Analyzing contract document... ⏳"):
-                final_answer = granite_simple(user_prompt, temp_file_path)
-            # 🔹 Display Analysis Result
-            st.subheader("📑 Analysis Result")
-            st.write(final_answer)
-            # 🔹 Remove Temporary File
-            os.remove(temp_file_path)
 # 🔥 Run Streamlit App
 if __name__ == '__main__':

 # if __name__ == '__main__':
 #     main()
 import streamlit as st
 import os
 import re
 from PyPDF2 import PdfReader
 from peft import get_peft_model, LoraConfig, TaskType
+# ✅ Force CPU execution for Hugging Face Spaces
 device = torch.device("cpu")
 # 🔹 Load IBM Granite Model (CPU-Compatible)
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
+    device_map="cpu",
+    torch_dtype=torch.float32
 )
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 model = get_peft_model(model, lora_config)
 model.eval()
+# 🛠 Function to Read & Extract Text from PDFs (With Debugging)
 def read_files(file):
+    st.write("📂 Processing uploaded file...")  # Debugging
     file_context = ""
+    try:
+        reader = PdfReader(file)
+        for page in reader.pages:
+            text = page.extract_text()
+            if text:
+                file_context += text + "\n"
+        if not file_context.strip():
+            st.error("⚠️ No text found. The document might be scanned or encrypted.")
+            return ""
+        st.write(f"✅ Extracted {len(file_context)} characters.")  # Debugging
+        return file_context.strip()
+    except Exception as e:
+        st.error(f"⚠️ Error reading PDF: {e}")
+        return ""
 # 🛠 Function to Format AI Prompts
 def format_prompt(system_msg, user_msg, file_context=""):
 # 🛠 Function to Clean AI Output
 def post_process(text):
+    cleaned = re.sub(r'戥+', '', text)
     lines = cleaned.splitlines()
     unique_lines = list(dict.fromkeys([line.strip() for line in lines if line.strip()]))
     return "\n".join(unique_lines)
 # 🛠 Function to Handle RAG with IBM Granite & Streamlit
 def granite_simple(prompt, file):
+    if not file:
+        st.error("⚠️ No file detected. Please upload a document.")
+        return ""
+    file_context = read_files(file)
+    if not file_context:
+        st.error("⚠️ No valid text extracted from the document.")
+        return ""
     system_message = "You are IBM Granite, a legal AI assistant specializing in contract analysis."
     messages = format_prompt(system_message, prompt, file_context)
     input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     response = generate_response(input_text)
 # 🔹 Streamlit UI
 def main():
+    st.set_page_config(page_title="Contract Analysis AI", page_icon="📜")
     st.title("📜 AI-Powered Contract Analysis Tool")
     st.write("Upload a contract document (PDF) for a detailed AI-driven legal and technical analysis.")
     # 🔹 File Upload Section
     uploaded_file = st.file_uploader("📂 Upload a contract document (PDF)", type="pdf")
+    if uploaded_file:
+        st.success(f"✅ File uploaded: {uploaded_file.name}")
+        st.write(f"📏 File Size: {uploaded_file.size / 1024:.2f} KB")
         if st.button("🔍 Analyze Document"):
             with st.spinner("Analyzing contract document... ⏳"):
+                final_answer = granite_simple(
+                    "Perform a detailed technical analysis of the attached contract document, highlighting potential risks, legal pitfalls, compliance issues, and areas where contractual terms may lead to future disputes or operational challenges.",
+                    uploaded_file
+                )
+            if final_answer:
+                st.subheader("📑 Analysis Result")
+                st.write(final_answer)
+            else:
+                st.error("⚠️ No response generated. Please check your input.")
 # 🔥 Run Streamlit App
 if __name__ == '__main__':