Spaces:

darylalim
/

granite-document-summarization

Running on Zero

App Files Files Community

Daryl Lim commited on 28 days ago

Commit

d1ddeff

1 Parent(s): 6779930

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -36

app.py CHANGED Viewed

@@ -26,9 +26,9 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
 # Initialize IBM Granite model and tokenizer
 print("Loading Granite model and tokenizer...")
-tokenizer = AutoTokenizer.from_pretrained("ibm-granite/granite-3.3-8b-instruct")
 model = AutoModelForCausalLM.from_pretrained(
-    "ibm-granite/granite-3.3-8b-instruct",
     device_map="auto",
     torch_dtype=torch.bfloat16
 )
@@ -149,30 +149,30 @@ def download_file_from_url(url: str) -> Optional[str]:
         return None
 # Function to generate a summary using the IBM Granite model
-def generate_summary(chunks: List[Document], model, tokenizer, summary_type="Abstractive", detail_level="Medium", length="Medium"):
     """Generate a summary from document chunks using the IBM Granite model"""
     # Concatenate the retrieved chunks
     combined_text = " ".join([chunk.page_content for chunk in chunks])
     # Create a prompt based on the summary parameters
-    if summary_type == "Extractive":
-        summary_instruction = "Extract the key sentences from the text to generate a summary. Your response should only include the answer. Do not provide any further explanation."
-    else:  # Abstractive
-        summary_instruction = "Summarize the text. Your response should only include the answer. Do not provide any further explanation."
-    if detail_level == "High":
-        detail_instruction = "Think step by step to identify the main points, then summarize the text."
-    elif detail_level == "Medium":
-        detail_instruction = "Balance the main points and key insights with supporting evidence."
-    else:  # Low
-        detail_instruction = "Focus on the main points and key insights."
-    if length == "Short":
-        length_instruction = "Summarize the text in one sentence."
-    elif length == "Medium":
-        length_instruction = "Summarize the text in seven sentences."
-    else:  # Long
-        length_instruction = "Summarize the text in one paragraph."
     # Construct the full prompt
     prompt = f"""<instruction>
@@ -223,9 +223,9 @@ def summarize_full_document(retriever, model, tokenizer, summary_params, chunk_s
             batch,
             model,
             tokenizer,
-            summary_type=summary_params.get("summary_type", "Abstractive"),
-            detail_level=summary_params.get("detail_level", "Medium"),
-            length=summary_params.get("length", "Medium")
         )
         summaries.append(summary)
@@ -235,9 +235,9 @@ def summarize_full_document(retriever, model, tokenizer, summary_params, chunk_s
             [Document(page_content=s) for s in summaries],
             model,
             tokenizer,
-            summary_type=summary_params.get("summary_type", "Abstractive"),
-            detail_level=summary_params.get("detail_level", "Medium"),
-            length=summary_params.get("length", "Medium")
         )
         return final_summary
     else:
@@ -248,9 +248,9 @@ def summarize_full_document(retriever, model, tokenizer, summary_params, chunk_s
 def process_document(
     file_obj: Optional[Union[str, tempfile._TemporaryFileWrapper]] = None,
     url: Optional[str] = None,
-    summary_type: str = "Abstractive",
-    detail_level: str = "Medium",
-    length: str = "Medium",
     progress=gr.Progress()
 ):
     """Process a document file or URL and generate a summary"""
@@ -320,8 +320,8 @@ def process_document(
 # Create Gradio interface
 def create_gradio_interface():
     """Create and launch the Gradio interface"""
-    with gr.Blocks(title="Granite Document Summarization") as app:
-        gr.Markdown("# Granite Document Summarization")
         gr.Markdown("Upload a document or provide a URL to generate a summary.")
         with gr.Row():
@@ -332,23 +332,23 @@ def create_gradio_interface():
                 with gr.Row():
                     with gr.Column():
                         summary_type = gr.Radio(
-                            choices=["Extractive", "Abstractive"],
-                            value="Abstractive",
                             label="Summary Type"
                         )
                 with gr.Row():
                     with gr.Column():
                         detail_level = gr.Radio(
-                            choices=["Low", "Medium", "High"],
-                            value="Medium",
                             label="Level of Detail"
                         )
                     with gr.Column():
                         length = gr.Radio(
-                            choices=["Short", "Medium", "Long"],
-                            value="Medium",
                             label="Summary Length"
                         )

 # Initialize IBM Granite model and tokenizer
 print("Loading Granite model and tokenizer...")
+tokenizer = AutoTokenizer.from_pretrained("ibm-granite/granite-3.2-8b-instruct")
 model = AutoModelForCausalLM.from_pretrained(
+    "ibm-granite/granite-3.2-8b-instruct",
     device_map="auto",
     torch_dtype=torch.bfloat16
 )
         return None
 # Function to generate a summary using the IBM Granite model
+def generate_summary(chunks: List[Document], model, tokenizer, summary_type="abstractive", detail_level="medium", length="medium"):
     """Generate a summary from document chunks using the IBM Granite model"""
     # Concatenate the retrieved chunks
     combined_text = " ".join([chunk.page_content for chunk in chunks])
     # Create a prompt based on the summary parameters
+    if summary_type == "extractive":
+        summary_instruction = "Extract the key sentences from the text to create a summary."
+    else:  # abstractive
+        summary_instruction = "Generate a comprehensive summary in your own words."
+    if detail_level == "high":
+        detail_instruction = "Include specific details and examples."
+    elif detail_level == "medium":
+        detail_instruction = "Balance key points with some supporting details."
+    else:  # low
+        detail_instruction = "Focus only on the main points and key takeaways."
+    if length == "short":
+        length_instruction = "Keep the summary concise and brief."
+    elif length == "medium":
+        length_instruction = "Create a moderate-length summary."
+    else:  # long
+        length_instruction = "Provide a comprehensive, detailed summary."
     # Construct the full prompt
     prompt = f"""<instruction>
             batch,
             model,
             tokenizer,
+            summary_type=summary_params.get("summary_type", "abstractive"),
+            detail_level=summary_params.get("detail_level", "medium"),
+            length=summary_params.get("length", "medium")
         )
         summaries.append(summary)
             [Document(page_content=s) for s in summaries],
             model,
             tokenizer,
+            summary_type=summary_params.get("summary_type", "abstractive"),
+            detail_level=summary_params.get("detail_level", "medium"),
+            length=summary_params.get("length", "medium")
         )
         return final_summary
     else:
 def process_document(
     file_obj: Optional[Union[str, tempfile._TemporaryFileWrapper]] = None,
     url: Optional[str] = None,
+    summary_type: str = "abstractive",
+    detail_level: str = "medium",
+    length: str = "medium",
     progress=gr.Progress()
 ):
     """Process a document file or URL and generate a summary"""
 # Create Gradio interface
 def create_gradio_interface():
     """Create and launch the Gradio interface"""
+    with gr.Blocks(title="Document Summarizer") as app:
+        gr.Markdown("# Document Summarizer")
         gr.Markdown("Upload a document or provide a URL to generate a summary.")
         with gr.Row():
                 with gr.Row():
                     with gr.Column():
                         summary_type = gr.Radio(
+                            choices=["extractive", "abstractive"],
+                            value="abstractive",
                             label="Summary Type"
                         )
                 with gr.Row():
                     with gr.Column():
                         detail_level = gr.Radio(
+                            choices=["low", "medium", "high"],
+                            value="medium",
                             label="Level of Detail"
                         )
                     with gr.Column():
                         length = gr.Radio(
+                            choices=["short", "medium", "long"],
+                            value="medium",
                             label="Summary Length"
                         )