TabasumDev commited on
Commit
2952b2a
Β·
verified Β·
1 Parent(s): a493d1c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -35
app.py CHANGED
@@ -445,8 +445,6 @@
445
  # if __name__ == '__main__':
446
  # main()
447
 
448
-
449
-
450
  import streamlit as st
451
  import os
452
  import re
@@ -455,7 +453,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
455
  from PyPDF2 import PdfReader
456
  from peft import get_peft_model, LoraConfig, TaskType
457
 
458
- # βœ… Force CPU execution for Streamlit Cloud
459
  device = torch.device("cpu")
460
 
461
  # πŸ”Ή Load IBM Granite Model (CPU-Compatible)
@@ -463,8 +461,8 @@ MODEL_NAME = "ibm-granite/granite-3.1-2b-instruct"
463
 
464
  model = AutoModelForCausalLM.from_pretrained(
465
  MODEL_NAME,
466
- device_map="cpu", # Force CPU execution
467
- torch_dtype=torch.float32 # Use float32 since Streamlit runs on CPU
468
  )
469
 
470
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
@@ -481,17 +479,27 @@ lora_config = LoraConfig(
481
  model = get_peft_model(model, lora_config)
482
  model.eval()
483
 
484
- # πŸ›  Function to Read & Extract Text from PDFs
485
  def read_files(file):
 
486
  file_context = ""
487
- reader = PdfReader(file)
488
-
489
- for page in reader.pages:
490
- text = page.extract_text()
491
- if text:
492
- file_context += text + "\n"
 
 
 
 
 
 
 
493
 
494
- return file_context.strip()
 
 
495
 
496
  # πŸ›  Function to Format AI Prompts
497
  def format_prompt(system_msg, user_msg, file_context=""):
@@ -521,18 +529,25 @@ def generate_response(input_text, max_tokens=1000, top_p=0.9, temperature=0.7):
521
 
522
  # πŸ›  Function to Clean AI Output
523
  def post_process(text):
524
- cleaned = re.sub(r'ζˆ₯+', '', text) # Remove unwanted symbols
525
  lines = cleaned.splitlines()
526
  unique_lines = list(dict.fromkeys([line.strip() for line in lines if line.strip()]))
527
  return "\n".join(unique_lines)
528
 
529
  # πŸ›  Function to Handle RAG with IBM Granite & Streamlit
530
  def granite_simple(prompt, file):
531
- file_context = read_files(file) if file else ""
532
-
 
 
 
 
 
 
 
533
  system_message = "You are IBM Granite, a legal AI assistant specializing in contract analysis."
534
-
535
  messages = format_prompt(system_message, prompt, file_context)
 
536
  input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
537
 
538
  response = generate_response(input_text)
@@ -540,7 +555,7 @@ def granite_simple(prompt, file):
540
 
541
  # πŸ”Ή Streamlit UI
542
  def main():
543
- st.set_page_config(page_title="Contract Analysis AI", page_icon="πŸ“œ", layout="wide")
544
 
545
  st.title("πŸ“œ AI-Powered Contract Analysis Tool")
546
  st.write("Upload a contract document (PDF) for a detailed AI-driven legal and technical analysis.")
@@ -555,26 +570,22 @@ def main():
555
  # πŸ”Ή File Upload Section
556
  uploaded_file = st.file_uploader("πŸ“‚ Upload a contract document (PDF)", type="pdf")
557
 
558
- if uploaded_file is not None:
559
- temp_file_path = "temp_uploaded_contract.pdf"
560
- with open(temp_file_path, "wb") as f:
561
- f.write(uploaded_file.getbuffer())
562
-
563
- st.success("βœ… File uploaded successfully!")
564
-
565
- # πŸ”Ή User Input for Analysis
566
- user_prompt = "Perform a detailed technical analysis of the attached contract document, highlighting potential risks, legal pitfalls, compliance issues, and areas where contractual terms may lead to future disputes or operational challenges."
567
 
568
  if st.button("πŸ” Analyze Document"):
569
  with st.spinner("Analyzing contract document... ⏳"):
570
- final_answer = granite_simple(user_prompt, temp_file_path)
571
-
572
- # πŸ”Ή Display Analysis Result
573
- st.subheader("πŸ“‘ Analysis Result")
574
- st.write(final_answer)
575
-
576
- # πŸ”Ή Remove Temporary File
577
- os.remove(temp_file_path)
 
 
578
 
579
  # πŸ”₯ Run Streamlit App
580
  if __name__ == '__main__':
 
445
  # if __name__ == '__main__':
446
  # main()
447
 
 
 
448
  import streamlit as st
449
  import os
450
  import re
 
453
  from PyPDF2 import PdfReader
454
  from peft import get_peft_model, LoraConfig, TaskType
455
 
456
+ # βœ… Force CPU execution for Hugging Face Spaces
457
  device = torch.device("cpu")
458
 
459
  # πŸ”Ή Load IBM Granite Model (CPU-Compatible)
 
461
 
462
  model = AutoModelForCausalLM.from_pretrained(
463
  MODEL_NAME,
464
+ device_map="cpu",
465
+ torch_dtype=torch.float32
466
  )
467
 
468
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 
479
  model = get_peft_model(model, lora_config)
480
  model.eval()
481
 
482
+ # πŸ›  Function to Read & Extract Text from PDFs (With Debugging)
483
  def read_files(file):
484
+ st.write("πŸ“‚ Processing uploaded file...") # Debugging
485
  file_context = ""
486
+ try:
487
+ reader = PdfReader(file)
488
+ for page in reader.pages:
489
+ text = page.extract_text()
490
+ if text:
491
+ file_context += text + "\n"
492
+
493
+ if not file_context.strip():
494
+ st.error("⚠️ No text found. The document might be scanned or encrypted.")
495
+ return ""
496
+
497
+ st.write(f"βœ… Extracted {len(file_context)} characters.") # Debugging
498
+ return file_context.strip()
499
 
500
+ except Exception as e:
501
+ st.error(f"⚠️ Error reading PDF: {e}")
502
+ return ""
503
 
504
  # πŸ›  Function to Format AI Prompts
505
  def format_prompt(system_msg, user_msg, file_context=""):
 
529
 
530
  # πŸ›  Function to Clean AI Output
531
  def post_process(text):
532
+ cleaned = re.sub(r'ζˆ₯+', '', text)
533
  lines = cleaned.splitlines()
534
  unique_lines = list(dict.fromkeys([line.strip() for line in lines if line.strip()]))
535
  return "\n".join(unique_lines)
536
 
537
  # πŸ›  Function to Handle RAG with IBM Granite & Streamlit
538
  def granite_simple(prompt, file):
539
+ if not file:
540
+ st.error("⚠️ No file detected. Please upload a document.")
541
+ return ""
542
+
543
+ file_context = read_files(file)
544
+ if not file_context:
545
+ st.error("⚠️ No valid text extracted from the document.")
546
+ return ""
547
+
548
  system_message = "You are IBM Granite, a legal AI assistant specializing in contract analysis."
 
549
  messages = format_prompt(system_message, prompt, file_context)
550
+
551
  input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
552
 
553
  response = generate_response(input_text)
 
555
 
556
  # πŸ”Ή Streamlit UI
557
  def main():
558
+ st.set_page_config(page_title="Contract Analysis AI", page_icon="πŸ“œ")
559
 
560
  st.title("πŸ“œ AI-Powered Contract Analysis Tool")
561
  st.write("Upload a contract document (PDF) for a detailed AI-driven legal and technical analysis.")
 
570
  # πŸ”Ή File Upload Section
571
  uploaded_file = st.file_uploader("πŸ“‚ Upload a contract document (PDF)", type="pdf")
572
 
573
+ if uploaded_file:
574
+ st.success(f"βœ… File uploaded: {uploaded_file.name}")
575
+ st.write(f"πŸ“ File Size: {uploaded_file.size / 1024:.2f} KB")
 
 
 
 
 
 
576
 
577
  if st.button("πŸ” Analyze Document"):
578
  with st.spinner("Analyzing contract document... ⏳"):
579
+ final_answer = granite_simple(
580
+ "Perform a detailed technical analysis of the attached contract document, highlighting potential risks, legal pitfalls, compliance issues, and areas where contractual terms may lead to future disputes or operational challenges.",
581
+ uploaded_file
582
+ )
583
+
584
+ if final_answer:
585
+ st.subheader("πŸ“‘ Analysis Result")
586
+ st.write(final_answer)
587
+ else:
588
+ st.error("⚠️ No response generated. Please check your input.")
589
 
590
  # πŸ”₯ Run Streamlit App
591
  if __name__ == '__main__':