Spaces:

TabasumDev
/

GraniteByte

Sleeping

App Files Files Community

TabasumDev commited on Feb 22

Commit

a493d1c

verified ·

1 Parent(s): 57038dd

Update app.py

Browse files

Files changed (1) hide show

app.py +193 -54

app.py CHANGED Viewed

@@ -278,6 +278,175 @@
 # ###################################################################################
 import streamlit as st
 import os
 import re
@@ -286,7 +455,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 from PyPDF2 import PdfReader
 from peft import get_peft_model, LoraConfig, TaskType
-# ✅ Force CPU execution for Hugging Face Spaces
 device = torch.device("cpu")
 # 🔹 Load IBM Granite Model (CPU-Compatible)
@@ -295,7 +464,7 @@ MODEL_NAME = "ibm-granite/granite-3.1-2b-instruct"
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
     device_map="cpu",  # Force CPU execution
-    torch_dtype=torch.float32  # Use float32 since Hugging Face runs on CPU
 )
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
@@ -312,21 +481,6 @@ lora_config = LoraConfig(
 model = get_peft_model(model, lora_config)
 model.eval()
-# 🛠 Function to Read & Extract Text from PDFs
-# def read_files(file):
-#     file_context = ""
-#     try:
-#         reader = PdfReader(file)
-#         for page in reader.pages:
-#             text = page.extract_text()
-#             if text:
-#                 file_context += text + "\n"
-#     except Exception as e:
-#         st.error(f"⚠️ Error reading PDF file: {e}")
-#         return ""
-#     return file_context.strip()
 # 🛠 Function to Read & Extract Text from PDFs
 def read_files(file):
     file_context = ""
@@ -337,12 +491,8 @@ def read_files(file):
         if text:
             file_context += text + "\n"
-    if not file_context.strip():
-        return "⚠️ No text found. The document might be scanned or encrypted."
     return file_context.strip()
 # 🛠 Function to Format AI Prompts
 def format_prompt(system_msg, user_msg, file_context=""):
     if file_context:
@@ -354,9 +504,8 @@ def format_prompt(system_msg, user_msg, file_context=""):
 # 🛠 Function to Generate AI Responses
 def generate_response(input_text, max_tokens=1000, top_p=0.9, temperature=0.7):
-    st.write("🔍 Generating response...")  # Debugging message
     model_inputs = tokenizer([input_text], return_tensors="pt").to(device)
     with torch.no_grad():
         output = model.generate(
             **model_inputs,
@@ -367,10 +516,8 @@ def generate_response(input_text, max_tokens=1000, top_p=0.9, temperature=0.7):
             num_return_sequences=1,
             pad_token_id=tokenizer.eos_token_id
         )
-    response = tokenizer.decode(output[0], skip_special_tokens=True)
-    st.write("✅ Response Generated!")  # Debugging message
-    return response
 # 🛠 Function to Clean AI Output
 def post_process(text):
@@ -382,23 +529,18 @@ def post_process(text):
 # 🛠 Function to Handle RAG with IBM Granite & Streamlit
 def granite_simple(prompt, file):
     file_context = read_files(file) if file else ""
-    # Debugging: Show extracted file content preview
-    if not file_context:
-        st.error("⚠️ No content extracted from the PDF. It might be a scanned image or encrypted.")
-        return "Error: No content found in the document."
     system_message = "You are IBM Granite, a legal AI assistant specializing in contract analysis."
     messages = format_prompt(system_message, prompt, file_context)
     input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     response = generate_response(input_text)
     return post_process(response)
 # 🔹 Streamlit UI
 def main():
-    st.set_page_config(page_title="Contract Analysis AI", page_icon="📜")
     st.title("📜 AI-Powered Contract Analysis Tool")
     st.write("Upload a contract document (PDF) for a detailed AI-driven legal and technical analysis.")
@@ -413,38 +555,35 @@ def main():
     # 🔹 File Upload Section
     uploaded_file = st.file_uploader("📂 Upload a contract document (PDF)", type="pdf")
-    if uploaded_file:
-        st.success(f"✅ File uploaded successfully! File Name: {uploaded_file.name}")
-        st.write(f"**File Size:** {uploaded_file.size / 1024:.2f} KB")
-        # Debugging: Show extracted text preview
-        pdf_text = read_files(uploaded_file)
-        if pdf_text:
-            st.write("**Extracted Sample Text:**")
-            st.code(pdf_text[:500])  # Show first 500 characters
-        else:
-            st.error("⚠️ No readable text found in the document.")
-        st.write("Click the button below to analyze the contract.")
-        # Force button to always render
-        st.markdown('<style>div.stButton > button {display: block; width: 100%;}</style>', unsafe_allow_html=True)
         if st.button("🔍 Analyze Document"):
             with st.spinner("Analyzing contract document... ⏳"):
-                final_answer = granite_simple(
-                    "Perform a detailed technical analysis of the attached contract document, highlighting potential risks, legal pitfalls, compliance issues, and areas where contractual terms may lead to future disputes or operational challenges.",
-                    uploaded_file
-                )
             # 🔹 Display Analysis Result
             st.subheader("📑 Analysis Result")
             st.write(final_answer)
 # 🔥 Run Streamlit App
 if __name__ == '__main__':
     main()
 # import streamlit as st
 # from PyPDF2 import PdfReader

 # ###################################################################################
+# import streamlit as st
+# import os
+# import re
+# import torch
+# from transformers import AutoModelForCausalLM, AutoTokenizer
+# from PyPDF2 import PdfReader
+# from peft import get_peft_model, LoraConfig, TaskType
+# # ✅ Force CPU execution for Hugging Face Spaces
+# device = torch.device("cpu")
+# # 🔹 Load IBM Granite Model (CPU-Compatible)
+# MODEL_NAME = "ibm-granite/granite-3.1-2b-instruct"
+# model = AutoModelForCausalLM.from_pretrained(
+#     MODEL_NAME,
+#     device_map="cpu",  # Force CPU execution
+#     torch_dtype=torch.float32  # Use float32 since Hugging Face runs on CPU
+# )
+# tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+# # 🔹 Apply LoRA Fine-Tuning Configuration
+# lora_config = LoraConfig(
+#     r=8,
+#     lora_alpha=32,
+#     target_modules=["q_proj", "v_proj"],
+#     lora_dropout=0.1,
+#     bias="none",
+#     task_type=TaskType.CAUSAL_LM
+# )
+# model = get_peft_model(model, lora_config)
+# model.eval()
+# # 🛠 Function to Read & Extract Text from PDFs
+# # def read_files(file):
+# #     file_context = ""
+# #     try:
+# #         reader = PdfReader(file)
+# #         for page in reader.pages:
+# #             text = page.extract_text()
+# #             if text:
+# #                 file_context += text + "\n"
+# #     except Exception as e:
+# #         st.error(f"⚠️ Error reading PDF file: {e}")
+# #         return ""
+# #     return file_context.strip()
+# # 🛠 Function to Read & Extract Text from PDFs
+# def read_files(file):
+#     file_context = ""
+#     reader = PdfReader(file)
+#     for page in reader.pages:
+#         text = page.extract_text()
+#         if text:
+#             file_context += text + "\n"
+#     if not file_context.strip():
+#         return "⚠️ No text found. The document might be scanned or encrypted."
+#     return file_context.strip()
+# # 🛠 Function to Format AI Prompts
+# def format_prompt(system_msg, user_msg, file_context=""):
+#     if file_context:
+#         system_msg += f" The user has provided a contract document. Use its context to generate insights, but do not repeat or summarize the document itself."
+#     return [
+#         {"role": "system", "content": system_msg},
+#         {"role": "user", "content": user_msg}
+#     ]
+# # 🛠 Function to Generate AI Responses
+# def generate_response(input_text, max_tokens=1000, top_p=0.9, temperature=0.7):
+#     st.write("🔍 Generating response...")  # Debugging message
+#     model_inputs = tokenizer([input_text], return_tensors="pt").to(device)
+#     with torch.no_grad():
+#         output = model.generate(
+#             **model_inputs,
+#             max_new_tokens=max_tokens,
+#             do_sample=True,
+#             top_p=top_p,
+#             temperature=temperature,
+#             num_return_sequences=1,
+#             pad_token_id=tokenizer.eos_token_id
+#         )
+#     response = tokenizer.decode(output[0], skip_special_tokens=True)
+#     st.write("✅ Response Generated!")  # Debugging message
+#     return response
+# # 🛠 Function to Clean AI Output
+# def post_process(text):
+#     cleaned = re.sub(r'戥+', '', text)  # Remove unwanted symbols
+#     lines = cleaned.splitlines()
+#     unique_lines = list(dict.fromkeys([line.strip() for line in lines if line.strip()]))
+#     return "\n".join(unique_lines)
+# # 🛠 Function to Handle RAG with IBM Granite & Streamlit
+# def granite_simple(prompt, file):
+#     file_context = read_files(file) if file else ""
+#     # Debugging: Show extracted file content preview
+#     if not file_context:
+#         st.error("⚠️ No content extracted from the PDF. It might be a scanned image or encrypted.")
+#         return "Error: No content found in the document."
+#     system_message = "You are IBM Granite, a legal AI assistant specializing in contract analysis."
+#     messages = format_prompt(system_message, prompt, file_context)
+#     input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+#     response = generate_response(input_text)
+#     return post_process(response)
+# # 🔹 Streamlit UI
+# def main():
+#     st.set_page_config(page_title="Contract Analysis AI", page_icon="📜")
+#     st.title("📜 AI-Powered Contract Analysis Tool")
+#     st.write("Upload a contract document (PDF) for a detailed AI-driven legal and technical analysis.")
+#     # 🔹 Sidebar Settings
+#     with st.sidebar:
+#         st.header("⚙️ Settings")
+#         max_tokens = st.slider("Max Tokens", 50, 1000, 250, 50)
+#         top_p = st.slider("Top P (sampling)", 0.1, 1.0, 0.9, 0.1)
+#         temperature = st.slider("Temperature (creativity)", 0.1, 1.0, 0.7, 0.1)
+#     # 🔹 File Upload Section
+#     uploaded_file = st.file_uploader("📂 Upload a contract document (PDF)", type="pdf")
+#     if uploaded_file:
+#         st.success(f"✅ File uploaded successfully! File Name: {uploaded_file.name}")
+#         st.write(f"**File Size:** {uploaded_file.size / 1024:.2f} KB")
+#         # Debugging: Show extracted text preview
+#         pdf_text = read_files(uploaded_file)
+#         if pdf_text:
+#             st.write("**Extracted Sample Text:**")
+#             st.code(pdf_text[:500])  # Show first 500 characters
+#         else:
+#             st.error("⚠️ No readable text found in the document.")
+#         st.write("Click the button below to analyze the contract.")
+#         # Force button to always render
+#         st.markdown('<style>div.stButton > button {display: block; width: 100%;}</style>', unsafe_allow_html=True)
+#         if st.button("🔍 Analyze Document"):
+#             with st.spinner("Analyzing contract document... ⏳"):
+#                 final_answer = granite_simple(
+#                     "Perform a detailed technical analysis of the attached contract document, highlighting potential risks, legal pitfalls, compliance issues, and areas where contractual terms may lead to future disputes or operational challenges.",
+#                     uploaded_file
+#                 )
+#             # 🔹 Display Analysis Result
+#             st.subheader("📑 Analysis Result")
+#             st.write(final_answer)
+# 🔥 Run Streamlit App
+# if __name__ == '__main__':
+#     main()
 import streamlit as st
 import os
 import re
 from PyPDF2 import PdfReader
 from peft import get_peft_model, LoraConfig, TaskType
+# ✅ Force CPU execution for Streamlit Cloud
 device = torch.device("cpu")
 # 🔹 Load IBM Granite Model (CPU-Compatible)
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_NAME,
     device_map="cpu",  # Force CPU execution
+    torch_dtype=torch.float32  # Use float32 since Streamlit runs on CPU
 )
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 model = get_peft_model(model, lora_config)
 model.eval()
 # 🛠 Function to Read & Extract Text from PDFs
 def read_files(file):
     file_context = ""
         if text:
             file_context += text + "\n"
     return file_context.strip()
 # 🛠 Function to Format AI Prompts
 def format_prompt(system_msg, user_msg, file_context=""):
     if file_context:
 # 🛠 Function to Generate AI Responses
 def generate_response(input_text, max_tokens=1000, top_p=0.9, temperature=0.7):
     model_inputs = tokenizer([input_text], return_tensors="pt").to(device)
     with torch.no_grad():
         output = model.generate(
             **model_inputs,
             num_return_sequences=1,
             pad_token_id=tokenizer.eos_token_id
         )
+    return tokenizer.decode(output[0], skip_special_tokens=True)
 # 🛠 Function to Clean AI Output
 def post_process(text):
 # 🛠 Function to Handle RAG with IBM Granite & Streamlit
 def granite_simple(prompt, file):
     file_context = read_files(file) if file else ""
     system_message = "You are IBM Granite, a legal AI assistant specializing in contract analysis."
     messages = format_prompt(system_message, prompt, file_context)
     input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     response = generate_response(input_text)
     return post_process(response)
 # 🔹 Streamlit UI
 def main():
+    st.set_page_config(page_title="Contract Analysis AI", page_icon="📜", layout="wide")
     st.title("📜 AI-Powered Contract Analysis Tool")
     st.write("Upload a contract document (PDF) for a detailed AI-driven legal and technical analysis.")
     # 🔹 File Upload Section
     uploaded_file = st.file_uploader("📂 Upload a contract document (PDF)", type="pdf")
+    if uploaded_file is not None:
+        temp_file_path = "temp_uploaded_contract.pdf"
+        with open(temp_file_path, "wb") as f:
+            f.write(uploaded_file.getbuffer())
+        st.success("✅ File uploaded successfully!")
+        # 🔹 User Input for Analysis
+        user_prompt = "Perform a detailed technical analysis of the attached contract document, highlighting potential risks, legal pitfalls, compliance issues, and areas where contractual terms may lead to future disputes or operational challenges."
         if st.button("🔍 Analyze Document"):
             with st.spinner("Analyzing contract document... ⏳"):
+                final_answer = granite_simple(user_prompt, temp_file_path)
             # 🔹 Display Analysis Result
             st.subheader("📑 Analysis Result")
             st.write(final_answer)
+            # 🔹 Remove Temporary File
+            os.remove(temp_file_path)
 # 🔥 Run Streamlit App
 if __name__ == '__main__':
     main()
 # import streamlit as st
 # from PyPDF2 import PdfReader