Spaces:

TabasumDev
/

GraniteByte

Sleeping

App Files Files Community

TabasumDev commited on Feb 22

Commit

57038dd

verified ·

1 Parent(s): b165b5d

Update app.py

Browse files

Files changed (1) hide show

app.py +164 -148

app.py CHANGED Viewed

@@ -278,41 +278,41 @@
 # ###################################################################################
-# import streamlit as st
-# import os
-# import re
-# import torch
-# from transformers import AutoModelForCausalLM, AutoTokenizer
-# from PyPDF2 import PdfReader
-# from peft import get_peft_model, LoraConfig, TaskType
-# # ✅ Force CPU execution for Hugging Face Spaces
-# device = torch.device("cpu")
-# # 🔹 Load IBM Granite Model (CPU-Compatible)
-# MODEL_NAME = "ibm-granite/granite-3.1-2b-instruct"
-# model = AutoModelForCausalLM.from_pretrained(
-#     MODEL_NAME,
-#     device_map="cpu",  # Force CPU execution
-#     torch_dtype=torch.float32  # Use float32 since Hugging Face runs on CPU
-# )
-# tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-# # 🔹 Apply LoRA Fine-Tuning Configuration
-# lora_config = LoraConfig(
-#     r=8,
-#     lora_alpha=32,
-#     target_modules=["q_proj", "v_proj"],
-#     lora_dropout=0.1,
-#     bias="none",
-#     task_type=TaskType.CAUSAL_LM
-# )
-# model = get_peft_model(model, lora_config)
-# model.eval()
-# # 🛠 Function to Read & Extract Text from PDFs
 # def read_files(file):
 #     file_context = ""
 #     try:
@@ -327,131 +327,147 @@
 #     return file_context.strip()
-# # 🛠 Function to Format AI Prompts
-# def format_prompt(system_msg, user_msg, file_context=""):
-#     if file_context:
-#         system_msg += f" The user has provided a contract document. Use its context to generate insights, but do not repeat or summarize the document itself."
-#     return [
-#         {"role": "system", "content": system_msg},
-#         {"role": "user", "content": user_msg}
-#     ]
-# # 🛠 Function to Generate AI Responses
-# def generate_response(input_text, max_tokens=1000, top_p=0.9, temperature=0.7):
-#     st.write("🔍 Generating response...")  # Debugging message
-#     model_inputs = tokenizer([input_text], return_tensors="pt").to(device)
-#     with torch.no_grad():
-#         output = model.generate(
-#             **model_inputs,
-#             max_new_tokens=max_tokens,
-#             do_sample=True,
-#             top_p=top_p,
-#             temperature=temperature,
-#             num_return_sequences=1,
-#             pad_token_id=tokenizer.eos_token_id
-#         )
-#     response = tokenizer.decode(output[0], skip_special_tokens=True)
-#     st.write("✅ Response Generated!")  # Debugging message
-#     return response
-# # 🛠 Function to Clean AI Output
-# def post_process(text):
-#     cleaned = re.sub(r'戥+', '', text)  # Remove unwanted symbols
-#     lines = cleaned.splitlines()
-#     unique_lines = list(dict.fromkeys([line.strip() for line in lines if line.strip()]))
-#     return "\n".join(unique_lines)
-# # 🛠 Function to Handle RAG with IBM Granite & Streamlit
-# def granite_simple(prompt, file):
-#     file_context = read_files(file) if file else ""
-#     # Debugging: Show extracted file content preview
-#     if not file_context:
-#         st.error("⚠️ No content extracted from the PDF. It might be a scanned image or encrypted.")
-#         return "Error: No content found in the document."
-#     system_message = "You are IBM Granite, a legal AI assistant specializing in contract analysis."
-#     messages = format_prompt(system_message, prompt, file_context)
-#     input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-#     response = generate_response(input_text)
-#     return post_process(response)
-# # 🔹 Streamlit UI
-# def main():
-#     st.set_page_config(page_title="Contract Analysis AI", page_icon="📜")
-#     st.title("📜 AI-Powered Contract Analysis Tool")
-#     st.write("Upload a contract document (PDF) for a detailed AI-driven legal and technical analysis.")
-#     # 🔹 Sidebar Settings
-#     with st.sidebar:
-#         st.header("⚙️ Settings")
-#         max_tokens = st.slider("Max Tokens", 50, 1000, 250, 50)
-#         top_p = st.slider("Top P (sampling)", 0.1, 1.0, 0.9, 0.1)
-#         temperature = st.slider("Temperature (creativity)", 0.1, 1.0, 0.7, 0.1)
-#     # 🔹 File Upload Section
-#     uploaded_file = st.file_uploader("📂 Upload a contract document (PDF)", type="pdf")
-#     if uploaded_file:
-#         st.success(f"✅ File uploaded successfully! File Name: {uploaded_file.name}")
-#         st.write(f"**File Size:** {uploaded_file.size / 1024:.2f} KB")
-#         # Debugging: Show extracted text preview
-#         pdf_text = read_files(uploaded_file)
-#         if pdf_text:
-#             st.write("**Extracted Sample Text:**")
-#             st.code(pdf_text[:500])  # Show first 500 characters
-#         else:
-#             st.error("⚠️ No readable text found in the document.")
-#         st.write("Click the button below to analyze the contract.")
-#         # Force button to always render
-#         st.markdown('<style>div.stButton > button {display: block; width: 100%;}</style>', unsafe_allow_html=True)
-#         if st.button("🔍 Analyze Document"):
-#             with st.spinner("Analyzing contract document... ⏳"):
-#                 final_answer = granite_simple(
-#                     "Perform a detailed technical analysis of the attached contract document, highlighting potential risks, legal pitfalls, compliance issues, and areas where contractual terms may lead to future disputes or operational challenges.",
-#                     uploaded_file
-#                 )
-#             # 🔹 Display Analysis Result
-#             st.subheader("📑 Analysis Result")
-#             st.write(final_answer)
-# # 🔥 Run Streamlit App
-# if __name__ == '__main__':
-#     main()
-import streamlit as st
-from PyPDF2 import PdfReader
-st.title("📂 PDF Upload Debugger")
-uploaded_file = st.file_uploader("Upload a PDF", type="pdf")
-if uploaded_file:
-    st.success(f"✅ File uploaded: {uploaded_file.name}")
-    st.write(f"📏 File Size: {uploaded_file.size / 1024:.2f} KB")
-    try:
-        reader = PdfReader(uploaded_file)
-        text = "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
-        if text.strip():
-            st.subheader("Extracted Text (First 500 characters)")
-            st.code(text[:500])  # Show a preview of the text
-        else:
-            st.error("⚠️ No text found. The document might be scanned or encrypted.")
-    except Exception as e:
-        st.error(f"⚠️ Error reading PDF: {e}")
 # ###################################################################################

 # ###################################################################################
+import streamlit as st
+import os
+import re
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from PyPDF2 import PdfReader
+from peft import get_peft_model, LoraConfig, TaskType
+# ✅ Force CPU execution for Hugging Face Spaces
+device = torch.device("cpu")
+# 🔹 Load IBM Granite Model (CPU-Compatible)
+MODEL_NAME = "ibm-granite/granite-3.1-2b-instruct"
+model = AutoModelForCausalLM.from_pretrained(
+    MODEL_NAME,
+    device_map="cpu",  # Force CPU execution
+    torch_dtype=torch.float32  # Use float32 since Hugging Face runs on CPU
+)
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+# 🔹 Apply LoRA Fine-Tuning Configuration
+lora_config = LoraConfig(
+    r=8,
+    lora_alpha=32,
+    target_modules=["q_proj", "v_proj"],
+    lora_dropout=0.1,
+    bias="none",
+    task_type=TaskType.CAUSAL_LM
+)
+model = get_peft_model(model, lora_config)
+model.eval()
+# 🛠 Function to Read & Extract Text from PDFs
 # def read_files(file):
 #     file_context = ""
 #     try:
 #     return file_context.strip()
+# 🛠 Function to Read & Extract Text from PDFs
+def read_files(file):
+    file_context = ""
+    reader = PdfReader(file)
+    for page in reader.pages:
+        text = page.extract_text()
+        if text:
+            file_context += text + "\n"
+    if not file_context.strip():
+        return "⚠️ No text found. The document might be scanned or encrypted."
+    return file_context.strip()
+# 🛠 Function to Format AI Prompts
+def format_prompt(system_msg, user_msg, file_context=""):
+    if file_context:
+        system_msg += f" The user has provided a contract document. Use its context to generate insights, but do not repeat or summarize the document itself."
+    return [
+        {"role": "system", "content": system_msg},
+        {"role": "user", "content": user_msg}
+    ]
+# 🛠 Function to Generate AI Responses
+def generate_response(input_text, max_tokens=1000, top_p=0.9, temperature=0.7):
+    st.write("🔍 Generating response...")  # Debugging message
+    model_inputs = tokenizer([input_text], return_tensors="pt").to(device)
+    with torch.no_grad():
+        output = model.generate(
+            **model_inputs,
+            max_new_tokens=max_tokens,
+            do_sample=True,
+            top_p=top_p,
+            temperature=temperature,
+            num_return_sequences=1,
+            pad_token_id=tokenizer.eos_token_id
+        )
+    response = tokenizer.decode(output[0], skip_special_tokens=True)
+    st.write("✅ Response Generated!")  # Debugging message
+    return response
+# 🛠 Function to Clean AI Output
+def post_process(text):
+    cleaned = re.sub(r'戥+', '', text)  # Remove unwanted symbols
+    lines = cleaned.splitlines()
+    unique_lines = list(dict.fromkeys([line.strip() for line in lines if line.strip()]))
+    return "\n".join(unique_lines)
+# 🛠 Function to Handle RAG with IBM Granite & Streamlit
+def granite_simple(prompt, file):
+    file_context = read_files(file) if file else ""
+    # Debugging: Show extracted file content preview
+    if not file_context:
+        st.error("⚠️ No content extracted from the PDF. It might be a scanned image or encrypted.")
+        return "Error: No content found in the document."
+    system_message = "You are IBM Granite, a legal AI assistant specializing in contract analysis."
+    messages = format_prompt(system_message, prompt, file_context)
+    input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    response = generate_response(input_text)
+    return post_process(response)
+# 🔹 Streamlit UI
+def main():
+    st.set_page_config(page_title="Contract Analysis AI", page_icon="📜")
+    st.title("📜 AI-Powered Contract Analysis Tool")
+    st.write("Upload a contract document (PDF) for a detailed AI-driven legal and technical analysis.")
+    # 🔹 Sidebar Settings
+    with st.sidebar:
+        st.header("⚙️ Settings")
+        max_tokens = st.slider("Max Tokens", 50, 1000, 250, 50)
+        top_p = st.slider("Top P (sampling)", 0.1, 1.0, 0.9, 0.1)
+        temperature = st.slider("Temperature (creativity)", 0.1, 1.0, 0.7, 0.1)
+    # 🔹 File Upload Section
+    uploaded_file = st.file_uploader("📂 Upload a contract document (PDF)", type="pdf")
+    if uploaded_file:
+        st.success(f"✅ File uploaded successfully! File Name: {uploaded_file.name}")
+        st.write(f"**File Size:** {uploaded_file.size / 1024:.2f} KB")
+        # Debugging: Show extracted text preview
+        pdf_text = read_files(uploaded_file)
+        if pdf_text:
+            st.write("**Extracted Sample Text:**")
+            st.code(pdf_text[:500])  # Show first 500 characters
+        else:
+            st.error("⚠️ No readable text found in the document.")
+        st.write("Click the button below to analyze the contract.")
+        # Force button to always render
+        st.markdown('<style>div.stButton > button {display: block; width: 100%;}</style>', unsafe_allow_html=True)
+        if st.button("🔍 Analyze Document"):
+            with st.spinner("Analyzing contract document... ⏳"):
+                final_answer = granite_simple(
+                    "Perform a detailed technical analysis of the attached contract document, highlighting potential risks, legal pitfalls, compliance issues, and areas where contractual terms may lead to future disputes or operational challenges.",
+                    uploaded_file
+                )
+            # 🔹 Display Analysis Result
+            st.subheader("📑 Analysis Result")
+            st.write(final_answer)
+# 🔥 Run Streamlit App
+if __name__ == '__main__':
+    main()
+# import streamlit as st
+# from PyPDF2 import PdfReader
+# st.title("📂 PDF Upload Debugger")
+# uploaded_file = st.file_uploader("Upload a PDF", type="pdf")
+# if uploaded_file:
+#     st.success(f"✅ File uploaded: {uploaded_file.name}")
+#     st.write(f"📏 File Size: {uploaded_file.size / 1024:.2f} KB")
+#     try:
+#         reader = PdfReader(uploaded_file)
+#         text = "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
+#         if text.strip():
+#             st.subheader("Extracted Text (First 500 characters)")
+#             st.code(text[:500])  # Show a preview of the text
+#         else:
+#             st.error("⚠️ No text found. The document might be scanned or encrypted.")
+#     except Exception as e:
+#         st.error(f"⚠️ Error reading PDF: {e}")
 # ###################################################################################