Spaces:

TabasumDev
/

GraniteByte

Sleeping

App Files Files Community

TabasumDev commited on Feb 22

Commit

297372e

verified ·

1 Parent(s): 1a19f21

Update app.py

Browse files

Files changed (1) hide show

app.py +151 -151

app.py CHANGED Viewed

@@ -1,3 +1,140 @@
 # import streamlit as st
 # import os
 # import re
@@ -6,16 +143,16 @@
 # from PyPDF2 import PdfReader
 # from peft import get_peft_model, LoraConfig, TaskType
-# # ✅ Force CPU execution for Streamlit Cloud
-# device = torch.device("cpu")
-# # 🔹 Load IBM Granite Model (CPU-Compatible)
 # MODEL_NAME = "ibm-granite/granite-3.1-2b-instruct"
 # model = AutoModelForCausalLM.from_pretrained(
 #     MODEL_NAME,
-#     device_map="cpu",  # Force CPU execution
-#     torch_dtype=torch.float32  # Use float32 since Streamlit runs on CPU
 # )
 # tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
@@ -32,7 +169,7 @@
 # model = get_peft_model(model, lora_config)
 # model.eval()
-# # 🛠 Function to Read & Extract Text from PDFs
 # def read_files(file):
 #     file_context = ""
 #     reader = PdfReader(file)
@@ -77,13 +214,11 @@
 #     unique_lines = list(dict.fromkeys([line.strip() for line in lines if line.strip()]))
 #     return "\n".join(unique_lines)
-# # 🛠 Function to Handle RAG with IBM Granite & Streamlit
-# def granite_simple(prompt, file):
-#     file_context = read_files(file) if file else ""
 #     system_message = "You are IBM Granite, a legal AI assistant specializing in contract analysis."
-#     messages = format_prompt(system_message, prompt, file_context)
 #     input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 #     response = generate_response(input_text)
@@ -103,161 +238,26 @@
 #         top_p = st.slider("Top P (sampling)", 0.1, 1.0, 0.9, 0.1)
 #         temperature = st.slider("Temperature (creativity)", 0.1, 1.0, 0.7, 0.1)
-#     # 🔹 File Upload Section
 #     uploaded_file = st.file_uploader("📂 Upload a contract document (PDF)", type="pdf")
 #     if uploaded_file is not None:
-#         temp_file_path = "temp_uploaded_contract.pdf"
-#         with open(temp_file_path, "wb") as f:
-#             f.write(uploaded_file.getbuffer())
 #         st.success("✅ File uploaded successfully!")
 #         # 🔹 User Input for Analysis
 #         user_prompt = "Perform a detailed technical analysis of the attached contract document, highlighting potential risks, legal pitfalls, compliance issues, and areas where contractual terms may lead to future disputes or operational challenges."
 #         if st.button("🔍 Analyze Document"):
 #             with st.spinner("Analyzing contract document... ⏳"):
-#                 final_answer = granite_simple(user_prompt, temp_file_path)
 #             # 🔹 Display Analysis Result
 #             st.subheader("📑 Analysis Result")
 #             st.write(final_answer)
-#             # 🔹 Remove Temporary File
-#             os.remove(temp_file_path)
 # # 🔥 Run Streamlit App
 # if __name__ == '__main__':
 #     main()
-import streamlit as st
-import os
-import re
-import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer
-from PyPDF2 import PdfReader
-from peft import get_peft_model, LoraConfig, TaskType
-# ✅ Auto-detect GPU for Hugging Face Spaces
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# 🔹 Load IBM Granite Model (CPU/GPU Compatible)
-MODEL_NAME = "ibm-granite/granite-3.1-2b-instruct"
-model = AutoModelForCausalLM.from_pretrained(
-    MODEL_NAME,
-    device_map="auto",  # Auto-detect GPU if available
-    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
-)
-tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-# 🔹 Apply LoRA Fine-Tuning Configuration
-lora_config = LoraConfig(
-    r=8,
-    lora_alpha=32,
-    target_modules=["q_proj", "v_proj"],
-    lora_dropout=0.1,
-    bias="none",
-    task_type=TaskType.CAUSAL_LM
-)
-model = get_peft_model(model, lora_config)
-model.eval()
-# 🛠 Function to Read & Extract Text from PDFs (No Temp File Needed)
-def read_files(file):
-    file_context = ""
-    reader = PdfReader(file)
-    for page in reader.pages:
-        text = page.extract_text()
-        if text:
-            file_context += text + "\n"
-    return file_context.strip()
-# 🛠 Function to Format AI Prompts
-def format_prompt(system_msg, user_msg, file_context=""):
-    if file_context:
-        system_msg += f" The user has provided a contract document. Use its context to generate insights, but do not repeat or summarize the document itself."
-    return [
-        {"role": "system", "content": system_msg},
-        {"role": "user", "content": user_msg}
-    ]
-# 🛠 Function to Generate AI Responses
-def generate_response(input_text, max_tokens=1000, top_p=0.9, temperature=0.7):
-    model_inputs = tokenizer([input_text], return_tensors="pt").to(device)
-    with torch.no_grad():
-        output = model.generate(
-            **model_inputs,
-            max_new_tokens=max_tokens,
-            do_sample=True,
-            top_p=top_p,
-            temperature=temperature,
-            num_return_sequences=1,
-            pad_token_id=tokenizer.eos_token_id
-        )
-    return tokenizer.decode(output[0], skip_special_tokens=True)
-# 🛠 Function to Clean AI Output
-def post_process(text):
-    cleaned = re.sub(r'戥+', '', text)  # Remove unwanted symbols
-    lines = cleaned.splitlines()
-    unique_lines = list(dict.fromkeys([line.strip() for line in lines if line.strip()]))
-    return "\n".join(unique_lines)
-# 🛠 Function to Handle AI Analysis (No Temp File)
-def granite_simple(prompt, file_content):
-    system_message = "You are IBM Granite, a legal AI assistant specializing in contract analysis."
-    messages = format_prompt(system_message, prompt, file_content)
-    input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-    response = generate_response(input_text)
-    return post_process(response)
-# 🔹 Streamlit UI
-def main():
-    st.set_page_config(page_title="Contract Analysis AI", page_icon="📜", layout="wide")
-    st.title("📜 AI-Powered Contract Analysis Tool")
-    st.write("Upload a contract document (PDF) for a detailed AI-driven legal and technical analysis.")
-    # 🔹 Sidebar Settings
-    with st.sidebar:
-        st.header("⚙️ Settings")
-        max_tokens = st.slider("Max Tokens", 50, 1000, 250, 50)
-        top_p = st.slider("Top P (sampling)", 0.1, 1.0, 0.9, 0.1)
-        temperature = st.slider("Temperature (creativity)", 0.1, 1.0, 0.7, 0.1)
-    # 🔹 File Upload Section (No Temp File)
-    uploaded_file = st.file_uploader("📂 Upload a contract document (PDF)", type="pdf")
-    if uploaded_file is not None:
-        st.success("✅ File uploaded successfully!")
-        # 🔹 Read PDF Content (No Temp File)
-        file_content = read_files(uploaded_file)
-        # 🔹 User Input for Analysis
-        user_prompt = "Perform a detailed technical analysis of the attached contract document, highlighting potential risks, legal pitfalls, compliance issues, and areas where contractual terms may lead to future disputes or operational challenges."
-        if st.button("🔍 Analyze Document"):
-            with st.spinner("Analyzing contract document... ⏳"):
-                final_answer = granite_simple(user_prompt, file_content)
-            # 🔹 Display Analysis Result
-            st.subheader("📑 Analysis Result")
-            st.write(final_answer)
-# 🔥 Run Streamlit App
-if __name__ == '__main__':
-    main()

+# # import streamlit as st
+# # import os
+# # import re
+# # import torch
+# # from transformers import AutoModelForCausalLM, AutoTokenizer
+# # from PyPDF2 import PdfReader
+# # from peft import get_peft_model, LoraConfig, TaskType
+# # # ✅ Force CPU execution for Streamlit Cloud
+# # device = torch.device("cpu")
+# # # 🔹 Load IBM Granite Model (CPU-Compatible)
+# # MODEL_NAME = "ibm-granite/granite-3.1-2b-instruct"
+# # model = AutoModelForCausalLM.from_pretrained(
+# #     MODEL_NAME,
+# #     device_map="cpu",  # Force CPU execution
+# #     torch_dtype=torch.float32  # Use float32 since Streamlit runs on CPU
+# # )
+# # tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+# # # 🔹 Apply LoRA Fine-Tuning Configuration
+# # lora_config = LoraConfig(
+# #     r=8,
+# #     lora_alpha=32,
+# #     target_modules=["q_proj", "v_proj"],
+# #     lora_dropout=0.1,
+# #     bias="none",
+# #     task_type=TaskType.CAUSAL_LM
+# # )
+# # model = get_peft_model(model, lora_config)
+# # model.eval()
+# # # 🛠 Function to Read & Extract Text from PDFs
+# # def read_files(file):
+# #     file_context = ""
+# #     reader = PdfReader(file)
+# #     for page in reader.pages:
+# #         text = page.extract_text()
+# #         if text:
+# #             file_context += text + "\n"
+# #     return file_context.strip()
+# # # 🛠 Function to Format AI Prompts
+# # def format_prompt(system_msg, user_msg, file_context=""):
+# #     if file_context:
+# #         system_msg += f" The user has provided a contract document. Use its context to generate insights, but do not repeat or summarize the document itself."
+# #     return [
+# #         {"role": "system", "content": system_msg},
+# #         {"role": "user", "content": user_msg}
+# #     ]
+# # # 🛠 Function to Generate AI Responses
+# # def generate_response(input_text, max_tokens=1000, top_p=0.9, temperature=0.7):
+# #     model_inputs = tokenizer([input_text], return_tensors="pt").to(device)
+# #     with torch.no_grad():
+# #         output = model.generate(
+# #             **model_inputs,
+# #             max_new_tokens=max_tokens,
+# #             do_sample=True,
+# #             top_p=top_p,
+# #             temperature=temperature,
+# #             num_return_sequences=1,
+# #             pad_token_id=tokenizer.eos_token_id
+# #         )
+# #     return tokenizer.decode(output[0], skip_special_tokens=True)
+# # # 🛠 Function to Clean AI Output
+# # def post_process(text):
+# #     cleaned = re.sub(r'戥+', '', text)  # Remove unwanted symbols
+# #     lines = cleaned.splitlines()
+# #     unique_lines = list(dict.fromkeys([line.strip() for line in lines if line.strip()]))
+# #     return "\n".join(unique_lines)
+# # # 🛠 Function to Handle RAG with IBM Granite & Streamlit
+# # def granite_simple(prompt, file):
+# #     file_context = read_files(file) if file else ""
+# #     system_message = "You are IBM Granite, a legal AI assistant specializing in contract analysis."
+# #     messages = format_prompt(system_message, prompt, file_context)
+# #     input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+# #     response = generate_response(input_text)
+# #     return post_process(response)
+# # # 🔹 Streamlit UI
+# # def main():
+# #     st.set_page_config(page_title="Contract Analysis AI", page_icon="📜", layout="wide")
+# #     st.title("📜 AI-Powered Contract Analysis Tool")
+# #     st.write("Upload a contract document (PDF) for a detailed AI-driven legal and technical analysis.")
+# #     # 🔹 Sidebar Settings
+# #     with st.sidebar:
+# #         st.header("⚙️ Settings")
+# #         max_tokens = st.slider("Max Tokens", 50, 1000, 250, 50)
+# #         top_p = st.slider("Top P (sampling)", 0.1, 1.0, 0.9, 0.1)
+# #         temperature = st.slider("Temperature (creativity)", 0.1, 1.0, 0.7, 0.1)
+# #     # 🔹 File Upload Section
+# #     uploaded_file = st.file_uploader("📂 Upload a contract document (PDF)", type="pdf")
+# #     if uploaded_file is not None:
+# #         temp_file_path = "temp_uploaded_contract.pdf"
+# #         with open(temp_file_path, "wb") as f:
+# #             f.write(uploaded_file.getbuffer())
+# #         st.success("✅ File uploaded successfully!")
+# #         # 🔹 User Input for Analysis
+# #         user_prompt = "Perform a detailed technical analysis of the attached contract document, highlighting potential risks, legal pitfalls, compliance issues, and areas where contractual terms may lead to future disputes or operational challenges."
+# #         if st.button("🔍 Analyze Document"):
+# #             with st.spinner("Analyzing contract document... ⏳"):
+# #                 final_answer = granite_simple(user_prompt, temp_file_path)
+# #             # 🔹 Display Analysis Result
+# #             st.subheader("📑 Analysis Result")
+# #             st.write(final_answer)
+# #             # 🔹 Remove Temporary File
+# #             os.remove(temp_file_path)
+# # # 🔥 Run Streamlit App
+# # if __name__ == '__main__':
+# #     main()
 # import streamlit as st
 # import os
 # import re
 # from PyPDF2 import PdfReader
 # from peft import get_peft_model, LoraConfig, TaskType
+# # ✅ Auto-detect GPU for Hugging Face Spaces
+# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# # 🔹 Load IBM Granite Model (CPU/GPU Compatible)
 # MODEL_NAME = "ibm-granite/granite-3.1-2b-instruct"
 # model = AutoModelForCausalLM.from_pretrained(
 #     MODEL_NAME,
+#     device_map="auto",  # Auto-detect GPU if available
+#     torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
 # )
 # tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 # model = get_peft_model(model, lora_config)
 # model.eval()
+# # 🛠 Function to Read & Extract Text from PDFs (No Temp File Needed)
 # def read_files(file):
 #     file_context = ""
 #     reader = PdfReader(file)
 #     unique_lines = list(dict.fromkeys([line.strip() for line in lines if line.strip()]))
 #     return "\n".join(unique_lines)
+# # 🛠 Function to Handle AI Analysis (No Temp File)
+# def granite_simple(prompt, file_content):
 #     system_message = "You are IBM Granite, a legal AI assistant specializing in contract analysis."
+#     messages = format_prompt(system_message, prompt, file_content)
 #     input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 #     response = generate_response(input_text)
 #         top_p = st.slider("Top P (sampling)", 0.1, 1.0, 0.9, 0.1)
 #         temperature = st.slider("Temperature (creativity)", 0.1, 1.0, 0.7, 0.1)
+#     # 🔹 File Upload Section (No Temp File)
 #     uploaded_file = st.file_uploader("📂 Upload a contract document (PDF)", type="pdf")
 #     if uploaded_file is not None:
 #         st.success("✅ File uploaded successfully!")
+#         # 🔹 Read PDF Content (No Temp File)
+#         file_content = read_files(uploaded_file)
 #         # 🔹 User Input for Analysis
 #         user_prompt = "Perform a detailed technical analysis of the attached contract document, highlighting potential risks, legal pitfalls, compliance issues, and areas where contractual terms may lead to future disputes or operational challenges."
 #         if st.button("🔍 Analyze Document"):
 #             with st.spinner("Analyzing contract document... ⏳"):
+#                 final_answer = granite_simple(user_prompt, file_content)
 #             # 🔹 Display Analysis Result
 #             st.subheader("📑 Analysis Result")
 #             st.write(final_answer)
 # # 🔥 Run Streamlit App
 # if __name__ == '__main__':
 #     main()