Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -445,8 +445,6 @@
|
|
445 |
# if __name__ == '__main__':
|
446 |
# main()
|
447 |
|
448 |
-
|
449 |
-
|
450 |
import streamlit as st
|
451 |
import os
|
452 |
import re
|
@@ -455,7 +453,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
455 |
from PyPDF2 import PdfReader
|
456 |
from peft import get_peft_model, LoraConfig, TaskType
|
457 |
|
458 |
-
# β
Force CPU execution for
|
459 |
device = torch.device("cpu")
|
460 |
|
461 |
# πΉ Load IBM Granite Model (CPU-Compatible)
|
@@ -463,8 +461,8 @@ MODEL_NAME = "ibm-granite/granite-3.1-2b-instruct"
|
|
463 |
|
464 |
model = AutoModelForCausalLM.from_pretrained(
|
465 |
MODEL_NAME,
|
466 |
-
device_map="cpu",
|
467 |
-
torch_dtype=torch.float32
|
468 |
)
|
469 |
|
470 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
@@ -481,17 +479,27 @@ lora_config = LoraConfig(
|
|
481 |
model = get_peft_model(model, lora_config)
|
482 |
model.eval()
|
483 |
|
484 |
-
# π Function to Read & Extract Text from PDFs
|
485 |
def read_files(file):
|
|
|
486 |
file_context = ""
|
487 |
-
|
488 |
-
|
489 |
-
|
490 |
-
|
491 |
-
|
492 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
493 |
|
494 |
-
|
|
|
|
|
495 |
|
496 |
# π Function to Format AI Prompts
|
497 |
def format_prompt(system_msg, user_msg, file_context=""):
|
@@ -521,18 +529,25 @@ def generate_response(input_text, max_tokens=1000, top_p=0.9, temperature=0.7):
|
|
521 |
|
522 |
# π Function to Clean AI Output
|
523 |
def post_process(text):
|
524 |
-
cleaned = re.sub(r'ζ₯+', '', text)
|
525 |
lines = cleaned.splitlines()
|
526 |
unique_lines = list(dict.fromkeys([line.strip() for line in lines if line.strip()]))
|
527 |
return "\n".join(unique_lines)
|
528 |
|
529 |
# π Function to Handle RAG with IBM Granite & Streamlit
|
530 |
def granite_simple(prompt, file):
|
531 |
-
|
532 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
533 |
system_message = "You are IBM Granite, a legal AI assistant specializing in contract analysis."
|
534 |
-
|
535 |
messages = format_prompt(system_message, prompt, file_context)
|
|
|
536 |
input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
537 |
|
538 |
response = generate_response(input_text)
|
@@ -540,7 +555,7 @@ def granite_simple(prompt, file):
|
|
540 |
|
541 |
# πΉ Streamlit UI
|
542 |
def main():
|
543 |
-
st.set_page_config(page_title="Contract Analysis AI", page_icon="π"
|
544 |
|
545 |
st.title("π AI-Powered Contract Analysis Tool")
|
546 |
st.write("Upload a contract document (PDF) for a detailed AI-driven legal and technical analysis.")
|
@@ -555,26 +570,22 @@ def main():
|
|
555 |
# πΉ File Upload Section
|
556 |
uploaded_file = st.file_uploader("π Upload a contract document (PDF)", type="pdf")
|
557 |
|
558 |
-
if uploaded_file
|
559 |
-
|
560 |
-
|
561 |
-
f.write(uploaded_file.getbuffer())
|
562 |
-
|
563 |
-
st.success("β
File uploaded successfully!")
|
564 |
-
|
565 |
-
# πΉ User Input for Analysis
|
566 |
-
user_prompt = "Perform a detailed technical analysis of the attached contract document, highlighting potential risks, legal pitfalls, compliance issues, and areas where contractual terms may lead to future disputes or operational challenges."
|
567 |
|
568 |
if st.button("π Analyze Document"):
|
569 |
with st.spinner("Analyzing contract document... β³"):
|
570 |
-
final_answer = granite_simple(
|
571 |
-
|
572 |
-
|
573 |
-
|
574 |
-
|
575 |
-
|
576 |
-
|
577 |
-
|
|
|
|
|
578 |
|
579 |
# π₯ Run Streamlit App
|
580 |
if __name__ == '__main__':
|
|
|
445 |
# if __name__ == '__main__':
|
446 |
# main()
|
447 |
|
|
|
|
|
448 |
import streamlit as st
|
449 |
import os
|
450 |
import re
|
|
|
453 |
from PyPDF2 import PdfReader
|
454 |
from peft import get_peft_model, LoraConfig, TaskType
|
455 |
|
456 |
+
# β
Force CPU execution for Hugging Face Spaces
|
457 |
device = torch.device("cpu")
|
458 |
|
459 |
# πΉ Load IBM Granite Model (CPU-Compatible)
|
|
|
461 |
|
462 |
model = AutoModelForCausalLM.from_pretrained(
|
463 |
MODEL_NAME,
|
464 |
+
device_map="cpu",
|
465 |
+
torch_dtype=torch.float32
|
466 |
)
|
467 |
|
468 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
|
|
479 |
model = get_peft_model(model, lora_config)
|
480 |
model.eval()
|
481 |
|
482 |
+
# π Function to Read & Extract Text from PDFs (With Debugging)
|
483 |
def read_files(file):
|
484 |
+
st.write("π Processing uploaded file...") # Debugging
|
485 |
file_context = ""
|
486 |
+
try:
|
487 |
+
reader = PdfReader(file)
|
488 |
+
for page in reader.pages:
|
489 |
+
text = page.extract_text()
|
490 |
+
if text:
|
491 |
+
file_context += text + "\n"
|
492 |
+
|
493 |
+
if not file_context.strip():
|
494 |
+
st.error("β οΈ No text found. The document might be scanned or encrypted.")
|
495 |
+
return ""
|
496 |
+
|
497 |
+
st.write(f"β
Extracted {len(file_context)} characters.") # Debugging
|
498 |
+
return file_context.strip()
|
499 |
|
500 |
+
except Exception as e:
|
501 |
+
st.error(f"β οΈ Error reading PDF: {e}")
|
502 |
+
return ""
|
503 |
|
504 |
# π Function to Format AI Prompts
|
505 |
def format_prompt(system_msg, user_msg, file_context=""):
|
|
|
529 |
|
530 |
# π Function to Clean AI Output
|
531 |
def post_process(text):
|
532 |
+
cleaned = re.sub(r'ζ₯+', '', text)
|
533 |
lines = cleaned.splitlines()
|
534 |
unique_lines = list(dict.fromkeys([line.strip() for line in lines if line.strip()]))
|
535 |
return "\n".join(unique_lines)
|
536 |
|
537 |
# π Function to Handle RAG with IBM Granite & Streamlit
|
538 |
def granite_simple(prompt, file):
|
539 |
+
if not file:
|
540 |
+
st.error("β οΈ No file detected. Please upload a document.")
|
541 |
+
return ""
|
542 |
+
|
543 |
+
file_context = read_files(file)
|
544 |
+
if not file_context:
|
545 |
+
st.error("β οΈ No valid text extracted from the document.")
|
546 |
+
return ""
|
547 |
+
|
548 |
system_message = "You are IBM Granite, a legal AI assistant specializing in contract analysis."
|
|
|
549 |
messages = format_prompt(system_message, prompt, file_context)
|
550 |
+
|
551 |
input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
552 |
|
553 |
response = generate_response(input_text)
|
|
|
555 |
|
556 |
# πΉ Streamlit UI
|
557 |
def main():
|
558 |
+
st.set_page_config(page_title="Contract Analysis AI", page_icon="π")
|
559 |
|
560 |
st.title("π AI-Powered Contract Analysis Tool")
|
561 |
st.write("Upload a contract document (PDF) for a detailed AI-driven legal and technical analysis.")
|
|
|
570 |
# πΉ File Upload Section
|
571 |
uploaded_file = st.file_uploader("π Upload a contract document (PDF)", type="pdf")
|
572 |
|
573 |
+
if uploaded_file:
|
574 |
+
st.success(f"β
File uploaded: {uploaded_file.name}")
|
575 |
+
st.write(f"π File Size: {uploaded_file.size / 1024:.2f} KB")
|
|
|
|
|
|
|
|
|
|
|
|
|
576 |
|
577 |
if st.button("π Analyze Document"):
|
578 |
with st.spinner("Analyzing contract document... β³"):
|
579 |
+
final_answer = granite_simple(
|
580 |
+
"Perform a detailed technical analysis of the attached contract document, highlighting potential risks, legal pitfalls, compliance issues, and areas where contractual terms may lead to future disputes or operational challenges.",
|
581 |
+
uploaded_file
|
582 |
+
)
|
583 |
+
|
584 |
+
if final_answer:
|
585 |
+
st.subheader("π Analysis Result")
|
586 |
+
st.write(final_answer)
|
587 |
+
else:
|
588 |
+
st.error("β οΈ No response generated. Please check your input.")
|
589 |
|
590 |
# π₯ Run Streamlit App
|
591 |
if __name__ == '__main__':
|