TabasumDev commited on
Commit
297372e
Β·
verified Β·
1 Parent(s): 1a19f21

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +151 -151
app.py CHANGED
@@ -1,3 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # import streamlit as st
2
  # import os
3
  # import re
@@ -6,16 +143,16 @@
6
  # from PyPDF2 import PdfReader
7
  # from peft import get_peft_model, LoraConfig, TaskType
8
 
9
- # # βœ… Force CPU execution for Streamlit Cloud
10
- # device = torch.device("cpu")
11
 
12
- # # πŸ”Ή Load IBM Granite Model (CPU-Compatible)
13
  # MODEL_NAME = "ibm-granite/granite-3.1-2b-instruct"
14
 
15
  # model = AutoModelForCausalLM.from_pretrained(
16
  # MODEL_NAME,
17
- # device_map="cpu", # Force CPU execution
18
- # torch_dtype=torch.float32 # Use float32 since Streamlit runs on CPU
19
  # )
20
 
21
  # tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
@@ -32,7 +169,7 @@
32
  # model = get_peft_model(model, lora_config)
33
  # model.eval()
34
 
35
- # # πŸ›  Function to Read & Extract Text from PDFs
36
  # def read_files(file):
37
  # file_context = ""
38
  # reader = PdfReader(file)
@@ -77,13 +214,11 @@
77
  # unique_lines = list(dict.fromkeys([line.strip() for line in lines if line.strip()]))
78
  # return "\n".join(unique_lines)
79
 
80
- # # πŸ›  Function to Handle RAG with IBM Granite & Streamlit
81
- # def granite_simple(prompt, file):
82
- # file_context = read_files(file) if file else ""
83
-
84
  # system_message = "You are IBM Granite, a legal AI assistant specializing in contract analysis."
85
 
86
- # messages = format_prompt(system_message, prompt, file_context)
87
  # input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
88
 
89
  # response = generate_response(input_text)
@@ -103,161 +238,26 @@
103
  # top_p = st.slider("Top P (sampling)", 0.1, 1.0, 0.9, 0.1)
104
  # temperature = st.slider("Temperature (creativity)", 0.1, 1.0, 0.7, 0.1)
105
 
106
- # # πŸ”Ή File Upload Section
107
  # uploaded_file = st.file_uploader("πŸ“‚ Upload a contract document (PDF)", type="pdf")
108
 
109
  # if uploaded_file is not None:
110
- # temp_file_path = "temp_uploaded_contract.pdf"
111
- # with open(temp_file_path, "wb") as f:
112
- # f.write(uploaded_file.getbuffer())
113
-
114
  # st.success("βœ… File uploaded successfully!")
115
 
 
 
 
116
  # # πŸ”Ή User Input for Analysis
117
  # user_prompt = "Perform a detailed technical analysis of the attached contract document, highlighting potential risks, legal pitfalls, compliance issues, and areas where contractual terms may lead to future disputes or operational challenges."
118
 
119
  # if st.button("πŸ” Analyze Document"):
120
  # with st.spinner("Analyzing contract document... ⏳"):
121
- # final_answer = granite_simple(user_prompt, temp_file_path)
122
 
123
  # # πŸ”Ή Display Analysis Result
124
  # st.subheader("πŸ“‘ Analysis Result")
125
  # st.write(final_answer)
126
 
127
- # # πŸ”Ή Remove Temporary File
128
- # os.remove(temp_file_path)
129
-
130
  # # πŸ”₯ Run Streamlit App
131
  # if __name__ == '__main__':
132
  # main()
133
-
134
-
135
-
136
-
137
-
138
- import streamlit as st
139
- import os
140
- import re
141
- import torch
142
- from transformers import AutoModelForCausalLM, AutoTokenizer
143
- from PyPDF2 import PdfReader
144
- from peft import get_peft_model, LoraConfig, TaskType
145
-
146
- # βœ… Auto-detect GPU for Hugging Face Spaces
147
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
148
-
149
- # πŸ”Ή Load IBM Granite Model (CPU/GPU Compatible)
150
- MODEL_NAME = "ibm-granite/granite-3.1-2b-instruct"
151
-
152
- model = AutoModelForCausalLM.from_pretrained(
153
- MODEL_NAME,
154
- device_map="auto", # Auto-detect GPU if available
155
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
156
- )
157
-
158
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
159
-
160
- # πŸ”Ή Apply LoRA Fine-Tuning Configuration
161
- lora_config = LoraConfig(
162
- r=8,
163
- lora_alpha=32,
164
- target_modules=["q_proj", "v_proj"],
165
- lora_dropout=0.1,
166
- bias="none",
167
- task_type=TaskType.CAUSAL_LM
168
- )
169
- model = get_peft_model(model, lora_config)
170
- model.eval()
171
-
172
- # πŸ›  Function to Read & Extract Text from PDFs (No Temp File Needed)
173
- def read_files(file):
174
- file_context = ""
175
- reader = PdfReader(file)
176
-
177
- for page in reader.pages:
178
- text = page.extract_text()
179
- if text:
180
- file_context += text + "\n"
181
-
182
- return file_context.strip()
183
-
184
- # πŸ›  Function to Format AI Prompts
185
- def format_prompt(system_msg, user_msg, file_context=""):
186
- if file_context:
187
- system_msg += f" The user has provided a contract document. Use its context to generate insights, but do not repeat or summarize the document itself."
188
- return [
189
- {"role": "system", "content": system_msg},
190
- {"role": "user", "content": user_msg}
191
- ]
192
-
193
- # πŸ›  Function to Generate AI Responses
194
- def generate_response(input_text, max_tokens=1000, top_p=0.9, temperature=0.7):
195
- model_inputs = tokenizer([input_text], return_tensors="pt").to(device)
196
-
197
- with torch.no_grad():
198
- output = model.generate(
199
- **model_inputs,
200
- max_new_tokens=max_tokens,
201
- do_sample=True,
202
- top_p=top_p,
203
- temperature=temperature,
204
- num_return_sequences=1,
205
- pad_token_id=tokenizer.eos_token_id
206
- )
207
-
208
- return tokenizer.decode(output[0], skip_special_tokens=True)
209
-
210
- # πŸ›  Function to Clean AI Output
211
- def post_process(text):
212
- cleaned = re.sub(r'ζˆ₯+', '', text) # Remove unwanted symbols
213
- lines = cleaned.splitlines()
214
- unique_lines = list(dict.fromkeys([line.strip() for line in lines if line.strip()]))
215
- return "\n".join(unique_lines)
216
-
217
- # πŸ›  Function to Handle AI Analysis (No Temp File)
218
- def granite_simple(prompt, file_content):
219
- system_message = "You are IBM Granite, a legal AI assistant specializing in contract analysis."
220
-
221
- messages = format_prompt(system_message, prompt, file_content)
222
- input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
223
-
224
- response = generate_response(input_text)
225
- return post_process(response)
226
-
227
- # πŸ”Ή Streamlit UI
228
- def main():
229
- st.set_page_config(page_title="Contract Analysis AI", page_icon="πŸ“œ", layout="wide")
230
-
231
- st.title("πŸ“œ AI-Powered Contract Analysis Tool")
232
- st.write("Upload a contract document (PDF) for a detailed AI-driven legal and technical analysis.")
233
-
234
- # πŸ”Ή Sidebar Settings
235
- with st.sidebar:
236
- st.header("βš™οΈ Settings")
237
- max_tokens = st.slider("Max Tokens", 50, 1000, 250, 50)
238
- top_p = st.slider("Top P (sampling)", 0.1, 1.0, 0.9, 0.1)
239
- temperature = st.slider("Temperature (creativity)", 0.1, 1.0, 0.7, 0.1)
240
-
241
- # πŸ”Ή File Upload Section (No Temp File)
242
- uploaded_file = st.file_uploader("πŸ“‚ Upload a contract document (PDF)", type="pdf")
243
-
244
- if uploaded_file is not None:
245
- st.success("βœ… File uploaded successfully!")
246
-
247
- # πŸ”Ή Read PDF Content (No Temp File)
248
- file_content = read_files(uploaded_file)
249
-
250
- # πŸ”Ή User Input for Analysis
251
- user_prompt = "Perform a detailed technical analysis of the attached contract document, highlighting potential risks, legal pitfalls, compliance issues, and areas where contractual terms may lead to future disputes or operational challenges."
252
-
253
- if st.button("πŸ” Analyze Document"):
254
- with st.spinner("Analyzing contract document... ⏳"):
255
- final_answer = granite_simple(user_prompt, file_content)
256
-
257
- # πŸ”Ή Display Analysis Result
258
- st.subheader("πŸ“‘ Analysis Result")
259
- st.write(final_answer)
260
-
261
- # πŸ”₯ Run Streamlit App
262
- if __name__ == '__main__':
263
- main()
 
1
+ # # import streamlit as st
2
+ # # import os
3
+ # # import re
4
+ # # import torch
5
+ # # from transformers import AutoModelForCausalLM, AutoTokenizer
6
+ # # from PyPDF2 import PdfReader
7
+ # # from peft import get_peft_model, LoraConfig, TaskType
8
+
9
+ # # # βœ… Force CPU execution for Streamlit Cloud
10
+ # # device = torch.device("cpu")
11
+
12
+ # # # πŸ”Ή Load IBM Granite Model (CPU-Compatible)
13
+ # # MODEL_NAME = "ibm-granite/granite-3.1-2b-instruct"
14
+
15
+ # # model = AutoModelForCausalLM.from_pretrained(
16
+ # # MODEL_NAME,
17
+ # # device_map="cpu", # Force CPU execution
18
+ # # torch_dtype=torch.float32 # Use float32 since Streamlit runs on CPU
19
+ # # )
20
+
21
+ # # tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
22
+
23
+ # # # πŸ”Ή Apply LoRA Fine-Tuning Configuration
24
+ # # lora_config = LoraConfig(
25
+ # # r=8,
26
+ # # lora_alpha=32,
27
+ # # target_modules=["q_proj", "v_proj"],
28
+ # # lora_dropout=0.1,
29
+ # # bias="none",
30
+ # # task_type=TaskType.CAUSAL_LM
31
+ # # )
32
+ # # model = get_peft_model(model, lora_config)
33
+ # # model.eval()
34
+
35
+ # # # πŸ›  Function to Read & Extract Text from PDFs
36
+ # # def read_files(file):
37
+ # # file_context = ""
38
+ # # reader = PdfReader(file)
39
+
40
+ # # for page in reader.pages:
41
+ # # text = page.extract_text()
42
+ # # if text:
43
+ # # file_context += text + "\n"
44
+
45
+ # # return file_context.strip()
46
+
47
+ # # # πŸ›  Function to Format AI Prompts
48
+ # # def format_prompt(system_msg, user_msg, file_context=""):
49
+ # # if file_context:
50
+ # # system_msg += f" The user has provided a contract document. Use its context to generate insights, but do not repeat or summarize the document itself."
51
+ # # return [
52
+ # # {"role": "system", "content": system_msg},
53
+ # # {"role": "user", "content": user_msg}
54
+ # # ]
55
+
56
+ # # # πŸ›  Function to Generate AI Responses
57
+ # # def generate_response(input_text, max_tokens=1000, top_p=0.9, temperature=0.7):
58
+ # # model_inputs = tokenizer([input_text], return_tensors="pt").to(device)
59
+
60
+ # # with torch.no_grad():
61
+ # # output = model.generate(
62
+ # # **model_inputs,
63
+ # # max_new_tokens=max_tokens,
64
+ # # do_sample=True,
65
+ # # top_p=top_p,
66
+ # # temperature=temperature,
67
+ # # num_return_sequences=1,
68
+ # # pad_token_id=tokenizer.eos_token_id
69
+ # # )
70
+
71
+ # # return tokenizer.decode(output[0], skip_special_tokens=True)
72
+
73
+ # # # πŸ›  Function to Clean AI Output
74
+ # # def post_process(text):
75
+ # # cleaned = re.sub(r'ζˆ₯+', '', text) # Remove unwanted symbols
76
+ # # lines = cleaned.splitlines()
77
+ # # unique_lines = list(dict.fromkeys([line.strip() for line in lines if line.strip()]))
78
+ # # return "\n".join(unique_lines)
79
+
80
+ # # # πŸ›  Function to Handle RAG with IBM Granite & Streamlit
81
+ # # def granite_simple(prompt, file):
82
+ # # file_context = read_files(file) if file else ""
83
+
84
+ # # system_message = "You are IBM Granite, a legal AI assistant specializing in contract analysis."
85
+
86
+ # # messages = format_prompt(system_message, prompt, file_context)
87
+ # # input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
88
+
89
+ # # response = generate_response(input_text)
90
+ # # return post_process(response)
91
+
92
+ # # # πŸ”Ή Streamlit UI
93
+ # # def main():
94
+ # # st.set_page_config(page_title="Contract Analysis AI", page_icon="πŸ“œ", layout="wide")
95
+
96
+ # # st.title("πŸ“œ AI-Powered Contract Analysis Tool")
97
+ # # st.write("Upload a contract document (PDF) for a detailed AI-driven legal and technical analysis.")
98
+
99
+ # # # πŸ”Ή Sidebar Settings
100
+ # # with st.sidebar:
101
+ # # st.header("βš™οΈ Settings")
102
+ # # max_tokens = st.slider("Max Tokens", 50, 1000, 250, 50)
103
+ # # top_p = st.slider("Top P (sampling)", 0.1, 1.0, 0.9, 0.1)
104
+ # # temperature = st.slider("Temperature (creativity)", 0.1, 1.0, 0.7, 0.1)
105
+
106
+ # # # πŸ”Ή File Upload Section
107
+ # # uploaded_file = st.file_uploader("πŸ“‚ Upload a contract document (PDF)", type="pdf")
108
+
109
+ # # if uploaded_file is not None:
110
+ # # temp_file_path = "temp_uploaded_contract.pdf"
111
+ # # with open(temp_file_path, "wb") as f:
112
+ # # f.write(uploaded_file.getbuffer())
113
+
114
+ # # st.success("βœ… File uploaded successfully!")
115
+
116
+ # # # πŸ”Ή User Input for Analysis
117
+ # # user_prompt = "Perform a detailed technical analysis of the attached contract document, highlighting potential risks, legal pitfalls, compliance issues, and areas where contractual terms may lead to future disputes or operational challenges."
118
+
119
+ # # if st.button("πŸ” Analyze Document"):
120
+ # # with st.spinner("Analyzing contract document... ⏳"):
121
+ # # final_answer = granite_simple(user_prompt, temp_file_path)
122
+
123
+ # # # πŸ”Ή Display Analysis Result
124
+ # # st.subheader("πŸ“‘ Analysis Result")
125
+ # # st.write(final_answer)
126
+
127
+ # # # πŸ”Ή Remove Temporary File
128
+ # # os.remove(temp_file_path)
129
+
130
+ # # # πŸ”₯ Run Streamlit App
131
+ # # if __name__ == '__main__':
132
+ # # main()
133
+
134
+
135
+
136
+
137
+
138
  # import streamlit as st
139
  # import os
140
  # import re
 
143
  # from PyPDF2 import PdfReader
144
  # from peft import get_peft_model, LoraConfig, TaskType
145
 
146
+ # # βœ… Auto-detect GPU for Hugging Face Spaces
147
+ # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
148
 
149
+ # # πŸ”Ή Load IBM Granite Model (CPU/GPU Compatible)
150
  # MODEL_NAME = "ibm-granite/granite-3.1-2b-instruct"
151
 
152
  # model = AutoModelForCausalLM.from_pretrained(
153
  # MODEL_NAME,
154
+ # device_map="auto", # Auto-detect GPU if available
155
+ # torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
156
  # )
157
 
158
  # tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 
169
  # model = get_peft_model(model, lora_config)
170
  # model.eval()
171
 
172
+ # # πŸ›  Function to Read & Extract Text from PDFs (No Temp File Needed)
173
  # def read_files(file):
174
  # file_context = ""
175
  # reader = PdfReader(file)
 
214
  # unique_lines = list(dict.fromkeys([line.strip() for line in lines if line.strip()]))
215
  # return "\n".join(unique_lines)
216
 
217
+ # # πŸ›  Function to Handle AI Analysis (No Temp File)
218
+ # def granite_simple(prompt, file_content):
 
 
219
  # system_message = "You are IBM Granite, a legal AI assistant specializing in contract analysis."
220
 
221
+ # messages = format_prompt(system_message, prompt, file_content)
222
  # input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
223
 
224
  # response = generate_response(input_text)
 
238
  # top_p = st.slider("Top P (sampling)", 0.1, 1.0, 0.9, 0.1)
239
  # temperature = st.slider("Temperature (creativity)", 0.1, 1.0, 0.7, 0.1)
240
 
241
+ # # πŸ”Ή File Upload Section (No Temp File)
242
  # uploaded_file = st.file_uploader("πŸ“‚ Upload a contract document (PDF)", type="pdf")
243
 
244
  # if uploaded_file is not None:
 
 
 
 
245
  # st.success("βœ… File uploaded successfully!")
246
 
247
+ # # πŸ”Ή Read PDF Content (No Temp File)
248
+ # file_content = read_files(uploaded_file)
249
+
250
  # # πŸ”Ή User Input for Analysis
251
  # user_prompt = "Perform a detailed technical analysis of the attached contract document, highlighting potential risks, legal pitfalls, compliance issues, and areas where contractual terms may lead to future disputes or operational challenges."
252
 
253
  # if st.button("πŸ” Analyze Document"):
254
  # with st.spinner("Analyzing contract document... ⏳"):
255
+ # final_answer = granite_simple(user_prompt, file_content)
256
 
257
  # # πŸ”Ή Display Analysis Result
258
  # st.subheader("πŸ“‘ Analysis Result")
259
  # st.write(final_answer)
260
 
 
 
 
261
  # # πŸ”₯ Run Streamlit App
262
  # if __name__ == '__main__':
263
  # main()