TabasumDev commited on
Commit
461646b
Β·
verified Β·
1 Parent(s): 6a34b4b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -50
app.py CHANGED
@@ -445,29 +445,31 @@
445
  # if __name__ == '__main__':
446
  # main()
447
 
448
-
449
-
450
  import streamlit as st
451
  import os
452
  import re
453
  import torch
454
- import pdfplumber
455
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
456
  from peft import get_peft_model, LoraConfig, TaskType
457
 
458
- # βœ… Force CPU execution
459
- device = torch.device("cpu")
 
460
 
461
  # πŸ”Ή Load IBM Granite Model (CPU-Compatible)
462
  MODEL_NAME = "ibm-granite/granite-3.1-2b-instruct"
463
 
464
- model = AutoModelForCausalLM.from_pretrained(
465
- MODEL_NAME,
466
- device_map="cpu", # Force CPU execution
467
- torch_dtype=torch.float32
468
- )
469
-
470
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 
 
 
471
 
472
  # πŸ”Ή Apply LoRA Fine-Tuning Configuration
473
  lora_config = LoraConfig(
@@ -478,28 +480,34 @@ lora_config = LoraConfig(
478
  bias="none",
479
  task_type=TaskType.CAUSAL_LM
480
  )
481
- model = get_peft_model(model, lora_config)
482
- model.eval()
 
 
 
 
 
483
 
484
  # πŸ›  Function to Read & Extract Text from PDFs
485
- def read_files(uploaded_file):
 
486
  file_context = ""
487
-
488
- with pdfplumber.open(uploaded_file) as pdf:
489
- for page in pdf.pages:
490
  text = page.extract_text()
491
  if text:
492
  file_context += text + "\n"
 
 
 
493
 
494
- if not file_context.strip():
495
- st.error("⚠️ No text extracted. This document may be scanned or encrypted.")
496
-
497
- return file_context.strip()
498
 
499
  # πŸ›  Function to Format AI Prompts
500
  def format_prompt(system_msg, user_msg, file_context=""):
501
  if file_context:
502
- system_msg += f" The user has provided a contract document. Use its context to generate insights, but do not repeat or summarize the document itself."
503
  return [
504
  {"role": "system", "content": system_msg},
505
  {"role": "user", "content": user_msg}
@@ -507,20 +515,25 @@ def format_prompt(system_msg, user_msg, file_context=""):
507
 
508
  # πŸ›  Function to Generate AI Responses
509
  def generate_response(input_text, max_tokens=1000, top_p=0.9, temperature=0.7):
510
- model_inputs = tokenizer([input_text], return_tensors="pt").to(device)
511
-
512
- with torch.no_grad():
513
- output = model.generate(
514
- **model_inputs,
515
- max_new_tokens=max_tokens,
516
- do_sample=True,
517
- top_p=top_p,
518
- temperature=temperature,
519
- num_return_sequences=1,
520
- pad_token_id=tokenizer.eos_token_id
521
- )
 
 
 
 
522
 
523
- return tokenizer.decode(output[0], skip_special_tokens=True)
 
524
 
525
  # πŸ›  Function to Clean AI Output
526
  def post_process(text):
@@ -531,6 +544,7 @@ def post_process(text):
531
 
532
  # πŸ›  Function to Handle RAG with IBM Granite & Streamlit
533
  def granite_simple(prompt, file):
 
534
  file_context = read_files(file) if file else ""
535
 
536
  system_message = "You are IBM Granite, a legal AI assistant specializing in contract analysis."
@@ -559,24 +573,12 @@ def main():
559
  uploaded_file = st.file_uploader("πŸ“‚ Upload a contract document (PDF)", type="pdf")
560
 
561
  if uploaded_file:
562
- # βœ… Debugging: Show file info
563
- st.success(f"βœ… File uploaded: {uploaded_file.name}, Size: {uploaded_file.size / 1024:.2f} KB")
564
-
565
- # βœ… Extract and preview text
566
- extracted_text = read_files(uploaded_file)
567
- if extracted_text:
568
- st.write("πŸ“œ Extracted Text Preview:")
569
- st.text_area("Extracted Text", extracted_text[:2000], height=200) # Show first 2000 chars
570
-
571
- st.write("Click the button below to analyze the contract.")
572
-
573
- # Force button to always render
574
- st.markdown('<style>div.stButton > button {display: block; width: 100%;}</style>', unsafe_allow_html=True)
575
 
576
  if st.button("πŸ” Analyze Document"):
577
  with st.spinner("Analyzing contract document... ⏳"):
578
  final_answer = granite_simple(
579
- "Perform a detailed technical analysis of the attached contract document, highlighting potential risks, legal pitfalls, compliance issues, and areas where contractual terms may lead to future disputes or operational challenges.",
580
  uploaded_file
581
  )
582
 
 
445
  # if __name__ == '__main__':
446
  # main()
447
 
 
 
448
  import streamlit as st
449
  import os
450
  import re
451
  import torch
 
452
  from transformers import AutoModelForCausalLM, AutoTokenizer
453
+ from PyPDF2 import PdfReader
454
  from peft import get_peft_model, LoraConfig, TaskType
455
 
456
+ # βœ… Force CPU execution and disable bitsandbytes
457
+ os.environ["BITSANDBYTES_NOWELCOME"] = "1"
458
+ os.environ["USE_TORCH_CPP_BACKEND"] = "1"
459
 
460
  # πŸ”Ή Load IBM Granite Model (CPU-Compatible)
461
  MODEL_NAME = "ibm-granite/granite-3.1-2b-instruct"
462
 
463
+ try:
464
+ model = AutoModelForCausalLM.from_pretrained(
465
+ MODEL_NAME,
466
+ device_map="cpu",
467
+ torch_dtype=torch.float32
468
+ )
469
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
470
+ except Exception as e:
471
+ st.error(f"🚨 Model loading failed: {str(e)}")
472
+ st.stop()
473
 
474
  # πŸ”Ή Apply LoRA Fine-Tuning Configuration
475
  lora_config = LoraConfig(
 
480
  bias="none",
481
  task_type=TaskType.CAUSAL_LM
482
  )
483
+
484
+ try:
485
+ model = get_peft_model(model, lora_config)
486
+ model.eval()
487
+ except Exception as e:
488
+ st.error(f"🚨 LoRA model initialization failed: {str(e)}")
489
+ st.stop()
490
 
491
  # πŸ›  Function to Read & Extract Text from PDFs
492
+ def read_files(file):
493
+ """Extracts text from uploaded PDF file."""
494
  file_context = ""
495
+ try:
496
+ reader = PdfReader(file)
497
+ for page in reader.pages:
498
  text = page.extract_text()
499
  if text:
500
  file_context += text + "\n"
501
+ except Exception as e:
502
+ st.error(f"🚨 PDF reading failed: {str(e)}")
503
+ return ""
504
 
505
+ return file_context.strip() if file_context else "No readable text found in the document."
 
 
 
506
 
507
  # πŸ›  Function to Format AI Prompts
508
  def format_prompt(system_msg, user_msg, file_context=""):
509
  if file_context:
510
+ system_msg += " The user has provided a contract document. Analyze it and extract key insights, but do not summarize it."
511
  return [
512
  {"role": "system", "content": system_msg},
513
  {"role": "user", "content": user_msg}
 
515
 
516
  # πŸ›  Function to Generate AI Responses
517
  def generate_response(input_text, max_tokens=1000, top_p=0.9, temperature=0.7):
518
+ """Generates response using IBM Granite."""
519
+ try:
520
+ model_inputs = tokenizer([input_text], return_tensors="pt").to("cpu")
521
+
522
+ with torch.no_grad():
523
+ output = model.generate(
524
+ **model_inputs,
525
+ max_new_tokens=max_tokens,
526
+ do_sample=True,
527
+ top_p=top_p,
528
+ temperature=temperature,
529
+ num_return_sequences=1,
530
+ pad_token_id=tokenizer.eos_token_id
531
+ )
532
+
533
+ return tokenizer.decode(output[0], skip_special_tokens=True)
534
 
535
+ except Exception as e:
536
+ return f"🚨 Model inference error: {str(e)}"
537
 
538
  # πŸ›  Function to Clean AI Output
539
  def post_process(text):
 
544
 
545
  # πŸ›  Function to Handle RAG with IBM Granite & Streamlit
546
  def granite_simple(prompt, file):
547
+ """Processes PDF and AI response."""
548
  file_context = read_files(file) if file else ""
549
 
550
  system_message = "You are IBM Granite, a legal AI assistant specializing in contract analysis."
 
573
  uploaded_file = st.file_uploader("πŸ“‚ Upload a contract document (PDF)", type="pdf")
574
 
575
  if uploaded_file:
576
+ st.success("βœ… File uploaded successfully! Click below to analyze.")
 
 
 
 
 
 
 
 
 
 
 
 
577
 
578
  if st.button("πŸ” Analyze Document"):
579
  with st.spinner("Analyzing contract document... ⏳"):
580
  final_answer = granite_simple(
581
+ "Perform a detailed analysis of the contract, highlighting risks, legal pitfalls, compliance issues, and potential disputes.",
582
  uploaded_file
583
  )
584