TabasumDev commited on
Commit
5ae9056
Β·
verified Β·
1 Parent(s): b8ea32a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +164 -6
app.py CHANGED
@@ -265,15 +265,173 @@
265
 
266
 
267
 
268
- import streamlit as st
 
 
269
 
270
- st.title("File Upload Debugging")
271
 
272
- uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
 
 
273
 
274
- if uploaded_file:
275
- st.success(f"File uploaded: {uploaded_file.name}")
276
- st.write(f"File Size: {uploaded_file.size / 1024:.2f} KB")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277
 
278
  # import streamlit as st
279
  # import os
 
265
 
266
 
267
 
268
+ # import streamlit as st
269
+
270
+ # st.title("File Upload Debugging")
271
 
272
+ # uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
273
 
274
+ # if uploaded_file:
275
+ # st.success(f"File uploaded: {uploaded_file.name}")
276
+ # st.write(f"File Size: {uploaded_file.size / 1024:.2f} KB")
277
 
278
+ # ###################################################################################
279
+
280
+
281
+ import streamlit as st
282
+ import os
283
+ import re
284
+ import torch
285
+ from transformers import AutoModelForCausalLM, AutoTokenizer
286
+ from PyPDF2 import PdfReader
287
+ from peft import get_peft_model, LoraConfig, TaskType
288
+
289
+ # βœ… Force CPU execution for Hugging Face Spaces
290
+ device = torch.device("cpu")
291
+
292
+ # πŸ”Ή Load IBM Granite Model (CPU-Compatible)
293
+ MODEL_NAME = "ibm-granite/granite-3.1-2b-instruct"
294
+
295
+ model = AutoModelForCausalLM.from_pretrained(
296
+ MODEL_NAME,
297
+ device_map="cpu", # Force CPU execution
298
+ torch_dtype=torch.float32 # Use float32 since Hugging Face runs on CPU
299
+ )
300
+
301
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
302
+
303
+ # πŸ”Ή Apply LoRA Fine-Tuning Configuration
304
+ lora_config = LoraConfig(
305
+ r=8,
306
+ lora_alpha=32,
307
+ target_modules=["q_proj", "v_proj"],
308
+ lora_dropout=0.1,
309
+ bias="none",
310
+ task_type=TaskType.CAUSAL_LM
311
+ )
312
+ model = get_peft_model(model, lora_config)
313
+ model.eval()
314
+
315
+ # πŸ›  Function to Read & Extract Text from PDFs
316
+ def read_files(file):
317
+ file_context = ""
318
+ try:
319
+ reader = PdfReader(file)
320
+ for page in reader.pages:
321
+ text = page.extract_text()
322
+ if text:
323
+ file_context += text + "\n"
324
+ except Exception as e:
325
+ st.error(f"⚠️ Error reading PDF file: {e}")
326
+ return ""
327
+
328
+ return file_context.strip()
329
+
330
+ # πŸ›  Function to Format AI Prompts
331
+ def format_prompt(system_msg, user_msg, file_context=""):
332
+ if file_context:
333
+ system_msg += f" The user has provided a contract document. Use its context to generate insights, but do not repeat or summarize the document itself."
334
+ return [
335
+ {"role": "system", "content": system_msg},
336
+ {"role": "user", "content": user_msg}
337
+ ]
338
+
339
+ # πŸ›  Function to Generate AI Responses
340
+ def generate_response(input_text, max_tokens=1000, top_p=0.9, temperature=0.7):
341
+ st.write("πŸ” Generating response...") # Debugging message
342
+ model_inputs = tokenizer([input_text], return_tensors="pt").to(device)
343
+
344
+ with torch.no_grad():
345
+ output = model.generate(
346
+ **model_inputs,
347
+ max_new_tokens=max_tokens,
348
+ do_sample=True,
349
+ top_p=top_p,
350
+ temperature=temperature,
351
+ num_return_sequences=1,
352
+ pad_token_id=tokenizer.eos_token_id
353
+ )
354
+
355
+ response = tokenizer.decode(output[0], skip_special_tokens=True)
356
+ st.write("βœ… Response Generated!") # Debugging message
357
+ return response
358
+
359
+ # πŸ›  Function to Clean AI Output
360
+ def post_process(text):
361
+ cleaned = re.sub(r'ζˆ₯+', '', text) # Remove unwanted symbols
362
+ lines = cleaned.splitlines()
363
+ unique_lines = list(dict.fromkeys([line.strip() for line in lines if line.strip()]))
364
+ return "\n".join(unique_lines)
365
+
366
+ # πŸ›  Function to Handle RAG with IBM Granite & Streamlit
367
+ def granite_simple(prompt, file):
368
+ file_context = read_files(file) if file else ""
369
+
370
+ # Debugging: Show extracted file content preview
371
+ if not file_context:
372
+ st.error("⚠️ No content extracted from the PDF. It might be a scanned image or encrypted.")
373
+ return "Error: No content found in the document."
374
+
375
+ system_message = "You are IBM Granite, a legal AI assistant specializing in contract analysis."
376
+
377
+ messages = format_prompt(system_message, prompt, file_context)
378
+ input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
379
+
380
+ response = generate_response(input_text)
381
+ return post_process(response)
382
+
383
+ # πŸ”Ή Streamlit UI
384
+ def main():
385
+ st.set_page_config(page_title="Contract Analysis AI", page_icon="πŸ“œ")
386
+
387
+ st.title("πŸ“œ AI-Powered Contract Analysis Tool")
388
+ st.write("Upload a contract document (PDF) for a detailed AI-driven legal and technical analysis.")
389
+
390
+ # πŸ”Ή Sidebar Settings
391
+ with st.sidebar:
392
+ st.header("βš™οΈ Settings")
393
+ max_tokens = st.slider("Max Tokens", 50, 1000, 250, 50)
394
+ top_p = st.slider("Top P (sampling)", 0.1, 1.0, 0.9, 0.1)
395
+ temperature = st.slider("Temperature (creativity)", 0.1, 1.0, 0.7, 0.1)
396
+
397
+ # πŸ”Ή File Upload Section
398
+ uploaded_file = st.file_uploader("πŸ“‚ Upload a contract document (PDF)", type="pdf")
399
+
400
+ if uploaded_file:
401
+ st.success(f"βœ… File uploaded successfully! File Name: {uploaded_file.name}")
402
+ st.write(f"**File Size:** {uploaded_file.size / 1024:.2f} KB")
403
+
404
+ # Debugging: Show extracted text preview
405
+ pdf_text = read_files(uploaded_file)
406
+ if pdf_text:
407
+ st.write("**Extracted Sample Text:**")
408
+ st.code(pdf_text[:500]) # Show first 500 characters
409
+ else:
410
+ st.error("⚠️ No readable text found in the document.")
411
+
412
+ st.write("Click the button below to analyze the contract.")
413
+
414
+ # Force button to always render
415
+ st.markdown('<style>div.stButton > button {display: block; width: 100%;}</style>', unsafe_allow_html=True)
416
+
417
+ if st.button("πŸ” Analyze Document"):
418
+ with st.spinner("Analyzing contract document... ⏳"):
419
+ final_answer = granite_simple(
420
+ "Perform a detailed technical analysis of the attached contract document, highlighting potential risks, legal pitfalls, compliance issues, and areas where contractual terms may lead to future disputes or operational challenges.",
421
+ uploaded_file
422
+ )
423
+
424
+ # πŸ”Ή Display Analysis Result
425
+ st.subheader("πŸ“‘ Analysis Result")
426
+ st.write(final_answer)
427
+
428
+ # πŸ”₯ Run Streamlit App
429
+ if __name__ == '__main__':
430
+ main()
431
+
432
+
433
+
434
+ # ###################################################################################
435
 
436
  # import streamlit as st
437
  # import os