Spaces:

TabasumDev
/

GraniteByte

Sleeping

App Files Files Community

TabasumDev commited on Feb 22

Commit

461646b

verified ·

1 Parent(s): 6a34b4b

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -50

app.py CHANGED Viewed

@@ -445,29 +445,31 @@
 # if __name__ == '__main__':
 #     main()
 import streamlit as st
 import os
 import re
 import torch
-import pdfplumber
 from transformers import AutoModelForCausalLM, AutoTokenizer
 from peft import get_peft_model, LoraConfig, TaskType
-# ✅ Force CPU execution
-device = torch.device("cpu")
 # 🔹 Load IBM Granite Model (CPU-Compatible)
 MODEL_NAME = "ibm-granite/granite-3.1-2b-instruct"
-model = AutoModelForCausalLM.from_pretrained(
-    MODEL_NAME,
-    device_map="cpu",  # Force CPU execution
-    torch_dtype=torch.float32
-)
-tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 # 🔹 Apply LoRA Fine-Tuning Configuration
 lora_config = LoraConfig(
@@ -478,28 +480,34 @@ lora_config = LoraConfig(
     bias="none",
     task_type=TaskType.CAUSAL_LM
 )
-model = get_peft_model(model, lora_config)
-model.eval()
 # 🛠 Function to Read & Extract Text from PDFs
-def read_files(uploaded_file):
     file_context = ""
-    with pdfplumber.open(uploaded_file) as pdf:
-        for page in pdf.pages:
             text = page.extract_text()
             if text:
                 file_context += text + "\n"
-    if not file_context.strip():
-        st.error("⚠️ No text extracted. This document may be scanned or encrypted.")
-    return file_context.strip()
 # 🛠 Function to Format AI Prompts
 def format_prompt(system_msg, user_msg, file_context=""):
     if file_context:
-        system_msg += f" The user has provided a contract document. Use its context to generate insights, but do not repeat or summarize the document itself."
     return [
         {"role": "system", "content": system_msg},
         {"role": "user", "content": user_msg}
@@ -507,20 +515,25 @@ def format_prompt(system_msg, user_msg, file_context=""):
 # 🛠 Function to Generate AI Responses
 def generate_response(input_text, max_tokens=1000, top_p=0.9, temperature=0.7):
-    model_inputs = tokenizer([input_text], return_tensors="pt").to(device)
-    with torch.no_grad():
-        output = model.generate(
-            **model_inputs,
-            max_new_tokens=max_tokens,
-            do_sample=True,
-            top_p=top_p,
-            temperature=temperature,
-            num_return_sequences=1,
-            pad_token_id=tokenizer.eos_token_id
-        )
-    return tokenizer.decode(output[0], skip_special_tokens=True)
 # 🛠 Function to Clean AI Output
 def post_process(text):
@@ -531,6 +544,7 @@ def post_process(text):
 # 🛠 Function to Handle RAG with IBM Granite & Streamlit
 def granite_simple(prompt, file):
     file_context = read_files(file) if file else ""
     system_message = "You are IBM Granite, a legal AI assistant specializing in contract analysis."
@@ -559,24 +573,12 @@ def main():
     uploaded_file = st.file_uploader("📂 Upload a contract document (PDF)", type="pdf")
     if uploaded_file:
-        # ✅ Debugging: Show file info
-        st.success(f"✅ File uploaded: {uploaded_file.name}, Size: {uploaded_file.size / 1024:.2f} KB")
-        # ✅ Extract and preview text
-        extracted_text = read_files(uploaded_file)
-        if extracted_text:
-            st.write("📜 Extracted Text Preview:")
-            st.text_area("Extracted Text", extracted_text[:2000], height=200)  # Show first 2000 chars
-        st.write("Click the button below to analyze the contract.")
-        # Force button to always render
-        st.markdown('<style>div.stButton > button {display: block; width: 100%;}</style>', unsafe_allow_html=True)
         if st.button("🔍 Analyze Document"):
             with st.spinner("Analyzing contract document... ⏳"):
                 final_answer = granite_simple(
-                    "Perform a detailed technical analysis of the attached contract document, highlighting potential risks, legal pitfalls, compliance issues, and areas where contractual terms may lead to future disputes or operational challenges.",
                     uploaded_file
                 )

 # if __name__ == '__main__':
 #     main()
 import streamlit as st
 import os
 import re
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
+from PyPDF2 import PdfReader
 from peft import get_peft_model, LoraConfig, TaskType
+# ✅ Force CPU execution and disable bitsandbytes
+os.environ["BITSANDBYTES_NOWELCOME"] = "1"
+os.environ["USE_TORCH_CPP_BACKEND"] = "1"
 # 🔹 Load IBM Granite Model (CPU-Compatible)
 MODEL_NAME = "ibm-granite/granite-3.1-2b-instruct"
+try:
+    model = AutoModelForCausalLM.from_pretrained(
+        MODEL_NAME,
+        device_map="cpu",
+        torch_dtype=torch.float32
+    )
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+except Exception as e:
+    st.error(f"🚨 Model loading failed: {str(e)}")
+    st.stop()
 # 🔹 Apply LoRA Fine-Tuning Configuration
 lora_config = LoraConfig(
     bias="none",
     task_type=TaskType.CAUSAL_LM
 )
+try:
+    model = get_peft_model(model, lora_config)
+    model.eval()
+except Exception as e:
+    st.error(f"🚨 LoRA model initialization failed: {str(e)}")
+    st.stop()
 # 🛠 Function to Read & Extract Text from PDFs
+def read_files(file):
+    """Extracts text from uploaded PDF file."""
     file_context = ""
+    try:
+        reader = PdfReader(file)
+        for page in reader.pages:
             text = page.extract_text()
             if text:
                 file_context += text + "\n"
+    except Exception as e:
+        st.error(f"🚨 PDF reading failed: {str(e)}")
+        return ""
+    return file_context.strip() if file_context else "No readable text found in the document."
 # 🛠 Function to Format AI Prompts
 def format_prompt(system_msg, user_msg, file_context=""):
     if file_context:
+        system_msg += " The user has provided a contract document. Analyze it and extract key insights, but do not summarize it."
     return [
         {"role": "system", "content": system_msg},
         {"role": "user", "content": user_msg}
 # 🛠 Function to Generate AI Responses
 def generate_response(input_text, max_tokens=1000, top_p=0.9, temperature=0.7):
+    """Generates response using IBM Granite."""
+    try:
+        model_inputs = tokenizer([input_text], return_tensors="pt").to("cpu")
+        with torch.no_grad():
+            output = model.generate(
+                **model_inputs,
+                max_new_tokens=max_tokens,
+                do_sample=True,
+                top_p=top_p,
+                temperature=temperature,
+                num_return_sequences=1,
+                pad_token_id=tokenizer.eos_token_id
+            )
+        return tokenizer.decode(output[0], skip_special_tokens=True)
+    except Exception as e:
+        return f"🚨 Model inference error: {str(e)}"
 # 🛠 Function to Clean AI Output
 def post_process(text):
 # 🛠 Function to Handle RAG with IBM Granite & Streamlit
 def granite_simple(prompt, file):
+    """Processes PDF and AI response."""
     file_context = read_files(file) if file else ""
     system_message = "You are IBM Granite, a legal AI assistant specializing in contract analysis."
     uploaded_file = st.file_uploader("📂 Upload a contract document (PDF)", type="pdf")
     if uploaded_file:
+        st.success("✅ File uploaded successfully! Click below to analyze.")
         if st.button("🔍 Analyze Document"):
             with st.spinner("Analyzing contract document... ⏳"):
                 final_answer = granite_simple(
+                    "Perform a detailed analysis of the contract, highlighting risks, legal pitfalls, compliance issues, and potential disputes.",
                     uploaded_file
                 )