# # import streamlit as st # # import os # # import re # # import torch # # from transformers import AutoModelForCausalLM, AutoTokenizer # # from PyPDF2 import PdfReader # # from peft import get_peft_model, LoraConfig, TaskType # # # ✅ Force CPU execution for Streamlit Cloud # # device = torch.device("cpu") # # # 🔹 Load IBM Granite Model (CPU-Compatible) # # MODEL_NAME = "ibm-granite/granite-3.1-2b-instruct" # # model = AutoModelForCausalLM.from_pretrained( # # MODEL_NAME, # # device_map="cpu", # Force CPU execution # # torch_dtype=torch.float32 # Use float32 since Streamlit runs on CPU # # ) # # tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) # # # 🔹 Apply LoRA Fine-Tuning Configuration # # lora_config = LoraConfig( # # r=8, # # lora_alpha=32, # # target_modules=["q_proj", "v_proj"], # # lora_dropout=0.1, # # bias="none", # # task_type=TaskType.CAUSAL_LM # # ) # # model = get_peft_model(model, lora_config) # # model.eval() # # # 🛠 Function to Read & Extract Text from PDFs # # def read_files(file): # # file_context = "" # # reader = PdfReader(file) # # for page in reader.pages: # # text = page.extract_text() # # if text: # # file_context += text + "\n" # # return file_context.strip() # # # 🛠 Function to Format AI Prompts # # def format_prompt(system_msg, user_msg, file_context=""): # # if file_context: # # system_msg += f" The user has provided a contract document. Use its context to generate insights, but do not repeat or summarize the document itself." # # return [ # # {"role": "system", "content": system_msg}, # # {"role": "user", "content": user_msg} # # ] # # # 🛠 Function to Generate AI Responses # # def generate_response(input_text, max_tokens=1000, top_p=0.9, temperature=0.7): # # model_inputs = tokenizer([input_text], return_tensors="pt").to(device) # # with torch.no_grad(): # # output = model.generate( # # **model_inputs, # # max_new_tokens=max_tokens, # # do_sample=True, # # top_p=top_p, # # temperature=temperature, # # num_return_sequences=1, # # pad_token_id=tokenizer.eos_token_id # # ) # # return tokenizer.decode(output[0], skip_special_tokens=True) # # # 🛠 Function to Clean AI Output # # def post_process(text): # # cleaned = re.sub(r'戥+', '', text) # Remove unwanted symbols # # lines = cleaned.splitlines() # # unique_lines = list(dict.fromkeys([line.strip() for line in lines if line.strip()])) # # return "\n".join(unique_lines) # # # 🛠 Function to Handle RAG with IBM Granite & Streamlit # # def granite_simple(prompt, file): # # file_context = read_files(file) if file else "" # # system_message = "You are IBM Granite, a legal AI assistant specializing in contract analysis." # # messages = format_prompt(system_message, prompt, file_context) # # input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) # # response = generate_response(input_text) # # return post_process(response) # # # 🔹 Streamlit UI # # def main(): # # st.set_page_config(page_title="Contract Analysis AI", page_icon="📜", layout="wide") # # st.title("📜 AI-Powered Contract Analysis Tool") # # st.write("Upload a contract document (PDF) for a detailed AI-driven legal and technical analysis.") # # # 🔹 Sidebar Settings # # with st.sidebar: # # st.header("⚙️ Settings") # # max_tokens = st.slider("Max Tokens", 50, 1000, 250, 50) # # top_p = st.slider("Top P (sampling)", 0.1, 1.0, 0.9, 0.1) # # temperature = st.slider("Temperature (creativity)", 0.1, 1.0, 0.7, 0.1) # # # 🔹 File Upload Section # # uploaded_file = st.file_uploader("📂 Upload a contract document (PDF)", type="pdf") # # if uploaded_file is not None: # # temp_file_path = "temp_uploaded_contract.pdf" # # with open(temp_file_path, "wb") as f: # # f.write(uploaded_file.getbuffer()) # # st.success("✅ File uploaded successfully!") # # # 🔹 User Input for Analysis # # user_prompt = "Perform a detailed technical analysis of the attached contract document, highlighting potential risks, legal pitfalls, compliance issues, and areas where contractual terms may lead to future disputes or operational challenges." # # if st.button("🔍 Analyze Document"): # # with st.spinner("Analyzing contract document... ⏳"): # # final_answer = granite_simple(user_prompt, temp_file_path) # # # 🔹 Display Analysis Result # # st.subheader("📑 Analysis Result") # # st.write(final_answer) # # # 🔹 Remove Temporary File # # os.remove(temp_file_path) # # # 🔥 Run Streamlit App # # if __name__ == '__main__': # # main() # import streamlit as st # import os # import re # import torch # from transformers import AutoModelForCausalLM, AutoTokenizer # from PyPDF2 import PdfReader # from peft import get_peft_model, LoraConfig, TaskType # # ✅ Auto-detect GPU for Hugging Face Spaces # device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # # 🔹 Load IBM Granite Model (CPU/GPU Compatible) # MODEL_NAME = "ibm-granite/granite-3.1-2b-instruct" # model = AutoModelForCausalLM.from_pretrained( # MODEL_NAME, # device_map="auto", # Auto-detect GPU if available # torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32 # ) # tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) # # 🔹 Apply LoRA Fine-Tuning Configuration # lora_config = LoraConfig( # r=8, # lora_alpha=32, # target_modules=["q_proj", "v_proj"], # lora_dropout=0.1, # bias="none", # task_type=TaskType.CAUSAL_LM # ) # model = get_peft_model(model, lora_config) # model.eval() # # 🛠 Function to Read & Extract Text from PDFs (No Temp File Needed) # def read_files(file): # file_context = "" # reader = PdfReader(file) # for page in reader.pages: # text = page.extract_text() # if text: # file_context += text + "\n" # return file_context.strip() # # 🛠 Function to Format AI Prompts # def format_prompt(system_msg, user_msg, file_context=""): # if file_context: # system_msg += f" The user has provided a contract document. Use its context to generate insights, but do not repeat or summarize the document itself." # return [ # {"role": "system", "content": system_msg}, # {"role": "user", "content": user_msg} # ] # # 🛠 Function to Generate AI Responses # def generate_response(input_text, max_tokens=1000, top_p=0.9, temperature=0.7): # model_inputs = tokenizer([input_text], return_tensors="pt").to(device) # with torch.no_grad(): # output = model.generate( # **model_inputs, # max_new_tokens=max_tokens, # do_sample=True, # top_p=top_p, # temperature=temperature, # num_return_sequences=1, # pad_token_id=tokenizer.eos_token_id # ) # return tokenizer.decode(output[0], skip_special_tokens=True) # # 🛠 Function to Clean AI Output # def post_process(text): # cleaned = re.sub(r'戥+', '', text) # Remove unwanted symbols # lines = cleaned.splitlines() # unique_lines = list(dict.fromkeys([line.strip() for line in lines if line.strip()])) # return "\n".join(unique_lines) # # 🛠 Function to Handle AI Analysis (No Temp File) # def granite_simple(prompt, file_content): # system_message = "You are IBM Granite, a legal AI assistant specializing in contract analysis." # messages = format_prompt(system_message, prompt, file_content) # input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) # response = generate_response(input_text) # return post_process(response) # # 🔹 Streamlit UI # def main(): # st.set_page_config(page_title="Contract Analysis AI", page_icon="📜", layout="wide") # st.title("📜 AI-Powered Contract Analysis Tool") # st.write("Upload a contract document (PDF) for a detailed AI-driven legal and technical analysis.") # # 🔹 Sidebar Settings # with st.sidebar: # st.header("⚙️ Settings") # max_tokens = st.slider("Max Tokens", 50, 1000, 250, 50) # top_p = st.slider("Top P (sampling)", 0.1, 1.0, 0.9, 0.1) # temperature = st.slider("Temperature (creativity)", 0.1, 1.0, 0.7, 0.1) # # 🔹 File Upload Section (No Temp File) # uploaded_file = st.file_uploader("📂 Upload a contract document (PDF)", type="pdf") # if uploaded_file is not None: # st.success("✅ File uploaded successfully!") # # 🔹 Read PDF Content (No Temp File) # file_content = read_files(uploaded_file) # # 🔹 User Input for Analysis # user_prompt = "Perform a detailed technical analysis of the attached contract document, highlighting potential risks, legal pitfalls, compliance issues, and areas where contractual terms may lead to future disputes or operational challenges." # if st.button("🔍 Analyze Document"): # with st.spinner("Analyzing contract document... ⏳"): # final_answer = granite_simple(user_prompt, file_content) # # 🔹 Display Analysis Result # st.subheader("📑 Analysis Result") # st.write(final_answer) # # 🔥 Run Streamlit App # if __name__ == '__main__': # main() # import streamlit as st # st.title("File Upload Debugging") # uploaded_file = st.file_uploader("Upload a PDF file", type="pdf") # if uploaded_file: # st.success(f"File uploaded: {uploaded_file.name}") # st.write(f"File Size: {uploaded_file.size / 1024:.2f} KB") # ################################################################################### # import streamlit as st # import os # import re # import torch # from transformers import AutoModelForCausalLM, AutoTokenizer # from PyPDF2 import PdfReader # from peft import get_peft_model, LoraConfig, TaskType # # ✅ Force CPU execution for Hugging Face Spaces # device = torch.device("cpu") # # 🔹 Load IBM Granite Model (CPU-Compatible) # MODEL_NAME = "ibm-granite/granite-3.1-2b-instruct" # model = AutoModelForCausalLM.from_pretrained( # MODEL_NAME, # device_map="cpu", # Force CPU execution # torch_dtype=torch.float32 # Use float32 since Hugging Face runs on CPU # ) # tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) # # 🔹 Apply LoRA Fine-Tuning Configuration # lora_config = LoraConfig( # r=8, # lora_alpha=32, # target_modules=["q_proj", "v_proj"], # lora_dropout=0.1, # bias="none", # task_type=TaskType.CAUSAL_LM # ) # model = get_peft_model(model, lora_config) # model.eval() # # 🛠 Function to Read & Extract Text from PDFs # # def read_files(file): # # file_context = "" # # try: # # reader = PdfReader(file) # # for page in reader.pages: # # text = page.extract_text() # # if text: # # file_context += text + "\n" # # except Exception as e: # # st.error(f"⚠️ Error reading PDF file: {e}") # # return "" # # return file_context.strip() # # 🛠 Function to Read & Extract Text from PDFs # def read_files(file): # file_context = "" # reader = PdfReader(file) # for page in reader.pages: # text = page.extract_text() # if text: # file_context += text + "\n" # if not file_context.strip(): # return "⚠️ No text found. The document might be scanned or encrypted." # return file_context.strip() # # 🛠 Function to Format AI Prompts # def format_prompt(system_msg, user_msg, file_context=""): # if file_context: # system_msg += f" The user has provided a contract document. Use its context to generate insights, but do not repeat or summarize the document itself." # return [ # {"role": "system", "content": system_msg}, # {"role": "user", "content": user_msg} # ] # # 🛠 Function to Generate AI Responses # def generate_response(input_text, max_tokens=1000, top_p=0.9, temperature=0.7): # st.write("🔍 Generating response...") # Debugging message # model_inputs = tokenizer([input_text], return_tensors="pt").to(device) # with torch.no_grad(): # output = model.generate( # **model_inputs, # max_new_tokens=max_tokens, # do_sample=True, # top_p=top_p, # temperature=temperature, # num_return_sequences=1, # pad_token_id=tokenizer.eos_token_id # ) # response = tokenizer.decode(output[0], skip_special_tokens=True) # st.write("✅ Response Generated!") # Debugging message # return response # # 🛠 Function to Clean AI Output # def post_process(text): # cleaned = re.sub(r'戥+', '', text) # Remove unwanted symbols # lines = cleaned.splitlines() # unique_lines = list(dict.fromkeys([line.strip() for line in lines if line.strip()])) # return "\n".join(unique_lines) # # 🛠 Function to Handle RAG with IBM Granite & Streamlit # def granite_simple(prompt, file): # file_context = read_files(file) if file else "" # # Debugging: Show extracted file content preview # if not file_context: # st.error("⚠️ No content extracted from the PDF. It might be a scanned image or encrypted.") # return "Error: No content found in the document." # system_message = "You are IBM Granite, a legal AI assistant specializing in contract analysis." # messages = format_prompt(system_message, prompt, file_context) # input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) # response = generate_response(input_text) # return post_process(response) # # 🔹 Streamlit UI # def main(): # st.set_page_config(page_title="Contract Analysis AI", page_icon="📜") # st.title("📜 AI-Powered Contract Analysis Tool") # st.write("Upload a contract document (PDF) for a detailed AI-driven legal and technical analysis.") # # 🔹 Sidebar Settings # with st.sidebar: # st.header("⚙️ Settings") # max_tokens = st.slider("Max Tokens", 50, 1000, 250, 50) # top_p = st.slider("Top P (sampling)", 0.1, 1.0, 0.9, 0.1) # temperature = st.slider("Temperature (creativity)", 0.1, 1.0, 0.7, 0.1) # # 🔹 File Upload Section # uploaded_file = st.file_uploader("📂 Upload a contract document (PDF)", type="pdf") # if uploaded_file: # st.success(f"✅ File uploaded successfully! File Name: {uploaded_file.name}") # st.write(f"**File Size:** {uploaded_file.size / 1024:.2f} KB") # # Debugging: Show extracted text preview # pdf_text = read_files(uploaded_file) # if pdf_text: # st.write("**Extracted Sample Text:**") # st.code(pdf_text[:500]) # Show first 500 characters # else: # st.error("⚠️ No readable text found in the document.") # st.write("Click the button below to analyze the contract.") # # Force button to always render # st.markdown('', unsafe_allow_html=True) # if st.button("🔍 Analyze Document"): # with st.spinner("Analyzing contract document... ⏳"): # final_answer = granite_simple( # "Perform a detailed technical analysis of the attached contract document, highlighting potential risks, legal pitfalls, compliance issues, and areas where contractual terms may lead to future disputes or operational challenges.", # uploaded_file # ) # # 🔹 Display Analysis Result # st.subheader("📑 Analysis Result") # st.write(final_answer) # 🔥 Run Streamlit App # if __name__ == '__main__': # main() import streamlit as st import os import re import torch import pdfplumber from transformers import AutoModelForCausalLM, AutoTokenizer from peft import get_peft_model, LoraConfig, TaskType # ✅ Force CPU execution for Streamlit Cloud device = torch.device("cpu") # 🔹 Load IBM Granite Model (No Shard Checkpoints) MODEL_NAME = "ibm-granite/granite-3.1-2b-instruct" model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, device_map="cpu", torch_dtype=torch.float32, ignore_mismatched_sizes=True # 🚀 Fixes sharded checkpoint issues ) tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) # 🔹 Apply LoRA Fine-Tuning Configuration lora_config = LoraConfig( r=8, lora_alpha=32, target_modules=["q_proj", "v_proj"], lora_dropout=0.1, bias="none", task_type=TaskType.CAUSAL_LM ) model = get_peft_model(model, lora_config) model.eval() # 🛠 Function to Read & Extract Text from PDFs (Using pdfplumber) def read_files(file): file_context = "" with pdfplumber.open(file) as reader: for page in reader.pages: text = page.extract_text() if text: file_context += text + "\n" return file_context.strip() # 🛠 Function to Format AI Prompts def format_prompt(system_msg, user_msg, file_context=""): if file_context: system_msg += " The user has provided a contract document. Use its context to generate insights, but do not repeat or summarize the document itself." return [ {"role": "system", "content": system_msg}, {"role": "user", "content": user_msg} ] # 🛠 Function to Generate AI Responses def generate_response(input_text, max_tokens=1000, top_p=0.9, temperature=0.7): model_inputs = tokenizer([input_text], return_tensors="pt").to(device) with torch.no_grad(): output = model.generate( **model_inputs, max_new_tokens=max_tokens, do_sample=True, top_p=top_p, temperature=temperature, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id ) return tokenizer.decode(output[0], skip_special_tokens=True) # 🛠 Function to Clean AI Output def post_process(text): cleaned = re.sub(r'戥+', '', text) # Remove unwanted symbols lines = cleaned.splitlines() unique_lines = list(dict.fromkeys([line.strip() for line in lines if line.strip()])) return "\n".join(unique_lines) # 🛠 Function to Handle RAG with IBM Granite & Streamlit def granite_simple(prompt, file): file_context = read_files(file) if file else "" system_message = "You are IBM Granite, a legal AI assistant specializing in contract analysis." messages = format_prompt(system_message, prompt, file_context) input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) response = generate_response(input_text) return post_process(response) # 🔹 Streamlit UI def main(): st.set_page_config(page_title="Contract Analysis AI", page_icon="📜", layout="wide") st.title("📜 AI-Powered Contract Analysis Tool") st.write("Upload a contract document (PDF) for a detailed AI-driven legal and technical analysis.") # 🔹 Sidebar Settings with st.sidebar: st.header("⚙️ Settings") max_tokens = st.slider("Max Tokens", 50, 1000, 250, 50) top_p = st.slider("Top P (sampling)", 0.1, 1.0, 0.9, 0.1) temperature = st.slider("Temperature (creativity)", 0.1, 1.0, 0.7, 0.1) # 🔹 File Upload Section uploaded_file = st.file_uploader("📂 Upload a contract document (PDF)", type="pdf") if uploaded_file is not None: temp_file_path = "temp_uploaded_contract.pdf" with open(temp_file_path, "wb") as f: f.write(uploaded_file.getbuffer()) st.success("✅ File uploaded successfully!") # 🔹 User Input for Analysis user_prompt = "Perform a detailed technical analysis of the attached contract document, highlighting potential risks, legal pitfalls, compliance issues, and areas where contractual terms may lead to future disputes or operational challenges." if st.button("🔍 Analyze Document"): with st.spinner("Analyzing contract document... ⏳"): final_answer = granite_simple(user_prompt, temp_file_path) # 🔹 Display Analysis Result st.subheader("📑 Analysis Result") st.write(final_answer) # 🔹 Remove Temporary File os.remove(temp_file_path) # 🔥 Run Streamlit App if __name__ == '__main__': main() # import streamlit as st # from PyPDF2 import PdfReader # st.title("📂 PDF Upload Debugger") # uploaded_file = st.file_uploader("Upload a PDF", type="pdf") # if uploaded_file: # st.success(f"✅ File uploaded: {uploaded_file.name}") # st.write(f"📏 File Size: {uploaded_file.size / 1024:.2f} KB") # try: # reader = PdfReader(uploaded_file) # text = "\n".join([page.extract_text() for page in reader.pages if page.extract_text()]) # if text.strip(): # st.subheader("Extracted Text (First 500 characters)") # st.code(text[:500]) # Show a preview of the text # else: # st.error("⚠️ No text found. The document might be scanned or encrypted.") # except Exception as e: # st.error(f"⚠️ Error reading PDF: {e}") # ################################################################################### # import streamlit as st # import os # import re # import torch # from transformers import AutoModelForCausalLM, AutoTokenizer # from PyPDF2 import PdfReader # from peft import get_peft_model, LoraConfig, TaskType # # ✅ Force CPU execution # device = torch.device("cpu") # # 🔹 Load IBM Granite Model (CPU-Compatible) # MODEL_NAME = "ibm-granite/granite-3.1-2b-instruct" # model = AutoModelForCausalLM.from_pretrained( # MODEL_NAME, # device_map="cpu", # Force CPU execution # torch_dtype=torch.float32 # Use float32 since Hugging Face runs on CPU # ) # tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) # # 🔹 Apply LoRA Fine-Tuning Configuration # lora_config = LoraConfig( # r=8, # lora_alpha=32, # target_modules=["q_proj", "v_proj"], # lora_dropout=0.1, # bias="none", # task_type=TaskType.CAUSAL_LM # ) # model = get_peft_model(model, lora_config) # model.eval() # # 🛠 Function to Read & Extract Text from PDFs # def read_files(file): # file_context = "" # reader = PdfReader(file) # for page in reader.pages: # text = page.extract_text() # if text: # file_context += text + "\n" # return file_context.strip() # # 🛠 Function to Format AI Prompts # def format_prompt(system_msg, user_msg, file_context=""): # if file_context: # system_msg += f" The user has provided a contract document. Use its context to generate insights, but do not repeat or summarize the document itself." # return [ # {"role": "system", "content": system_msg}, # {"role": "user", "content": user_msg} # ] # # 🛠 Function to Generate AI Responses # def generate_response(input_text, max_tokens=1000, top_p=0.9, temperature=0.7): # model_inputs = tokenizer([input_text], return_tensors="pt").to(device) # with torch.no_grad(): # output = model.generate( # **model_inputs, # max_new_tokens=max_tokens, # do_sample=True, # top_p=top_p, # temperature=temperature, # num_return_sequences=1, # pad_token_id=tokenizer.eos_token_id # ) # return tokenizer.decode(output[0], skip_special_tokens=True) # # 🛠 Function to Clean AI Output # def post_process(text): # cleaned = re.sub(r'戥+', '', text) # Remove unwanted symbols # lines = cleaned.splitlines() # unique_lines = list(dict.fromkeys([line.strip() for line in lines if line.strip()])) # return "\n".join(unique_lines) # # 🛠 Function to Handle RAG with IBM Granite & Streamlit # def granite_simple(prompt, file): # file_context = read_files(file) if file else "" # system_message = "You are IBM Granite, a legal AI assistant specializing in contract analysis." # messages = format_prompt(system_message, prompt, file_context) # input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) # response = generate_response(input_text) # return post_process(response) # # 🔹 Streamlit UI # def main(): # st.set_page_config(page_title="Contract Analysis AI", page_icon="📜") # st.title("📜 AI-Powered Contract Analysis Tool") # st.write("Upload a contract document (PDF) for a detailed AI-driven legal and technical analysis.") # # 🔹 Sidebar Settings # with st.sidebar: # st.header("⚙️ Settings") # max_tokens = st.slider("Max Tokens", 50, 1000, 250, 50) # top_p = st.slider("Top P (sampling)", 0.1, 1.0, 0.9, 0.1) # temperature = st.slider("Temperature (creativity)", 0.1, 1.0, 0.7, 0.1) # # 🔹 File Upload Section # uploaded_file = st.file_uploader("📂 Upload a contract document (PDF)", type="pdf") # # ✅ Ensure file upload message is displayed # if uploaded_file is not None: # st.session_state["uploaded_file"] = uploaded_file # Persist file in session state # st.success("✅ File uploaded successfully!") # st.write("Click the button below to analyze the contract.") # # Force button to always render # st.markdown('', unsafe_allow_html=True) # if st.button("🔍 Analyze Document"): # with st.spinner("Analyzing contract document... ⏳"): # final_answer = granite_simple( # "Perform a detailed technical analysis of the attached contract document, highlighting potential risks, legal pitfalls, compliance issues, and areas where contractual terms may lead to future disputes or operational challenges.", # uploaded_file # ) # # 🔹 Display Analysis Result # st.subheader("📑 Analysis Result") # st.write(final_answer) # # 🔥 Run Streamlit App # if __name__ == '__main__': # main()