Spaces:
Running
on
Zero
Running
on
Zero
Daryl Lim
commited on
Commit
·
d1ddeff
1
Parent(s):
6779930
Update app.py
Browse files
app.py
CHANGED
@@ -26,9 +26,9 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
26 |
|
27 |
# Initialize IBM Granite model and tokenizer
|
28 |
print("Loading Granite model and tokenizer...")
|
29 |
-
tokenizer = AutoTokenizer.from_pretrained("ibm-granite/granite-3.
|
30 |
model = AutoModelForCausalLM.from_pretrained(
|
31 |
-
"ibm-granite/granite-3.
|
32 |
device_map="auto",
|
33 |
torch_dtype=torch.bfloat16
|
34 |
)
|
@@ -149,30 +149,30 @@ def download_file_from_url(url: str) -> Optional[str]:
|
|
149 |
return None
|
150 |
|
151 |
# Function to generate a summary using the IBM Granite model
|
152 |
-
def generate_summary(chunks: List[Document], model, tokenizer, summary_type="
|
153 |
"""Generate a summary from document chunks using the IBM Granite model"""
|
154 |
# Concatenate the retrieved chunks
|
155 |
combined_text = " ".join([chunk.page_content for chunk in chunks])
|
156 |
|
157 |
# Create a prompt based on the summary parameters
|
158 |
-
if summary_type == "
|
159 |
-
summary_instruction = "Extract the key sentences from the text to
|
160 |
-
else: #
|
161 |
-
summary_instruction = "
|
162 |
|
163 |
-
if detail_level == "
|
164 |
-
detail_instruction = "
|
165 |
-
elif detail_level == "
|
166 |
-
detail_instruction = "Balance
|
167 |
-
else: #
|
168 |
-
detail_instruction = "Focus on the main points and key
|
169 |
|
170 |
-
if length == "
|
171 |
-
length_instruction = "
|
172 |
-
elif length == "
|
173 |
-
length_instruction = "
|
174 |
-
else: #
|
175 |
-
length_instruction = "
|
176 |
|
177 |
# Construct the full prompt
|
178 |
prompt = f"""<instruction>
|
@@ -223,9 +223,9 @@ def summarize_full_document(retriever, model, tokenizer, summary_params, chunk_s
|
|
223 |
batch,
|
224 |
model,
|
225 |
tokenizer,
|
226 |
-
summary_type=summary_params.get("summary_type", "
|
227 |
-
detail_level=summary_params.get("detail_level", "
|
228 |
-
length=summary_params.get("length", "
|
229 |
)
|
230 |
summaries.append(summary)
|
231 |
|
@@ -235,9 +235,9 @@ def summarize_full_document(retriever, model, tokenizer, summary_params, chunk_s
|
|
235 |
[Document(page_content=s) for s in summaries],
|
236 |
model,
|
237 |
tokenizer,
|
238 |
-
summary_type=summary_params.get("summary_type", "
|
239 |
-
detail_level=summary_params.get("detail_level", "
|
240 |
-
length=summary_params.get("length", "
|
241 |
)
|
242 |
return final_summary
|
243 |
else:
|
@@ -248,9 +248,9 @@ def summarize_full_document(retriever, model, tokenizer, summary_params, chunk_s
|
|
248 |
def process_document(
|
249 |
file_obj: Optional[Union[str, tempfile._TemporaryFileWrapper]] = None,
|
250 |
url: Optional[str] = None,
|
251 |
-
summary_type: str = "
|
252 |
-
detail_level: str = "
|
253 |
-
length: str = "
|
254 |
progress=gr.Progress()
|
255 |
):
|
256 |
"""Process a document file or URL and generate a summary"""
|
@@ -320,8 +320,8 @@ def process_document(
|
|
320 |
# Create Gradio interface
|
321 |
def create_gradio_interface():
|
322 |
"""Create and launch the Gradio interface"""
|
323 |
-
with gr.Blocks(title="
|
324 |
-
gr.Markdown("#
|
325 |
gr.Markdown("Upload a document or provide a URL to generate a summary.")
|
326 |
|
327 |
with gr.Row():
|
@@ -332,23 +332,23 @@ def create_gradio_interface():
|
|
332 |
with gr.Row():
|
333 |
with gr.Column():
|
334 |
summary_type = gr.Radio(
|
335 |
-
choices=["
|
336 |
-
value="
|
337 |
label="Summary Type"
|
338 |
)
|
339 |
|
340 |
with gr.Row():
|
341 |
with gr.Column():
|
342 |
detail_level = gr.Radio(
|
343 |
-
choices=["
|
344 |
-
value="
|
345 |
label="Level of Detail"
|
346 |
)
|
347 |
|
348 |
with gr.Column():
|
349 |
length = gr.Radio(
|
350 |
-
choices=["
|
351 |
-
value="
|
352 |
label="Summary Length"
|
353 |
)
|
354 |
|
|
|
26 |
|
27 |
# Initialize IBM Granite model and tokenizer
|
28 |
print("Loading Granite model and tokenizer...")
|
29 |
+
tokenizer = AutoTokenizer.from_pretrained("ibm-granite/granite-3.2-8b-instruct")
|
30 |
model = AutoModelForCausalLM.from_pretrained(
|
31 |
+
"ibm-granite/granite-3.2-8b-instruct",
|
32 |
device_map="auto",
|
33 |
torch_dtype=torch.bfloat16
|
34 |
)
|
|
|
149 |
return None
|
150 |
|
151 |
# Function to generate a summary using the IBM Granite model
|
152 |
+
def generate_summary(chunks: List[Document], model, tokenizer, summary_type="abstractive", detail_level="medium", length="medium"):
|
153 |
"""Generate a summary from document chunks using the IBM Granite model"""
|
154 |
# Concatenate the retrieved chunks
|
155 |
combined_text = " ".join([chunk.page_content for chunk in chunks])
|
156 |
|
157 |
# Create a prompt based on the summary parameters
|
158 |
+
if summary_type == "extractive":
|
159 |
+
summary_instruction = "Extract the key sentences from the text to create a summary."
|
160 |
+
else: # abstractive
|
161 |
+
summary_instruction = "Generate a comprehensive summary in your own words."
|
162 |
|
163 |
+
if detail_level == "high":
|
164 |
+
detail_instruction = "Include specific details and examples."
|
165 |
+
elif detail_level == "medium":
|
166 |
+
detail_instruction = "Balance key points with some supporting details."
|
167 |
+
else: # low
|
168 |
+
detail_instruction = "Focus only on the main points and key takeaways."
|
169 |
|
170 |
+
if length == "short":
|
171 |
+
length_instruction = "Keep the summary concise and brief."
|
172 |
+
elif length == "medium":
|
173 |
+
length_instruction = "Create a moderate-length summary."
|
174 |
+
else: # long
|
175 |
+
length_instruction = "Provide a comprehensive, detailed summary."
|
176 |
|
177 |
# Construct the full prompt
|
178 |
prompt = f"""<instruction>
|
|
|
223 |
batch,
|
224 |
model,
|
225 |
tokenizer,
|
226 |
+
summary_type=summary_params.get("summary_type", "abstractive"),
|
227 |
+
detail_level=summary_params.get("detail_level", "medium"),
|
228 |
+
length=summary_params.get("length", "medium")
|
229 |
)
|
230 |
summaries.append(summary)
|
231 |
|
|
|
235 |
[Document(page_content=s) for s in summaries],
|
236 |
model,
|
237 |
tokenizer,
|
238 |
+
summary_type=summary_params.get("summary_type", "abstractive"),
|
239 |
+
detail_level=summary_params.get("detail_level", "medium"),
|
240 |
+
length=summary_params.get("length", "medium")
|
241 |
)
|
242 |
return final_summary
|
243 |
else:
|
|
|
248 |
def process_document(
|
249 |
file_obj: Optional[Union[str, tempfile._TemporaryFileWrapper]] = None,
|
250 |
url: Optional[str] = None,
|
251 |
+
summary_type: str = "abstractive",
|
252 |
+
detail_level: str = "medium",
|
253 |
+
length: str = "medium",
|
254 |
progress=gr.Progress()
|
255 |
):
|
256 |
"""Process a document file or URL and generate a summary"""
|
|
|
320 |
# Create Gradio interface
|
321 |
def create_gradio_interface():
|
322 |
"""Create and launch the Gradio interface"""
|
323 |
+
with gr.Blocks(title="Document Summarizer") as app:
|
324 |
+
gr.Markdown("# Document Summarizer")
|
325 |
gr.Markdown("Upload a document or provide a URL to generate a summary.")
|
326 |
|
327 |
with gr.Row():
|
|
|
332 |
with gr.Row():
|
333 |
with gr.Column():
|
334 |
summary_type = gr.Radio(
|
335 |
+
choices=["extractive", "abstractive"],
|
336 |
+
value="abstractive",
|
337 |
label="Summary Type"
|
338 |
)
|
339 |
|
340 |
with gr.Row():
|
341 |
with gr.Column():
|
342 |
detail_level = gr.Radio(
|
343 |
+
choices=["low", "medium", "high"],
|
344 |
+
value="medium",
|
345 |
label="Level of Detail"
|
346 |
)
|
347 |
|
348 |
with gr.Column():
|
349 |
length = gr.Radio(
|
350 |
+
choices=["short", "medium", "long"],
|
351 |
+
value="medium",
|
352 |
label="Summary Length"
|
353 |
)
|
354 |
|