ar08 commited on
Commit
d712281
·
verified ·
1 Parent(s): 0dec546

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -13
app.py CHANGED
@@ -1,30 +1,36 @@
1
- from transformers import pipeline
2
  import gradio as gr
3
 
4
- # Use the pipeline with optimized settings (no sampling, smaller batch)
 
 
 
5
  summarizer = pipeline(
6
  "summarization",
7
  model="sshleifer/distilbart-cnn-6-6",
8
- # force CPU (if not using GPU)
9
  )
10
 
11
- # Function with higher max length and lower min length for longer summaries
 
 
 
 
 
12
  def summarize_article(text):
 
13
  summary = summarizer(
14
- text,
15
- max_length=250, # ✨ allow longer output
16
- min_length=100, # 🚨 ensure decent length
17
- do_sample=False, # ⚡ makes it deterministic and faster
18
  )
19
  return summary[0]['summary_text']
20
 
21
- # Sample input
22
- default_article = """New York (CNN)When Liana Barrientos was 23 years old, she got married...""" # [TRIMMED for brevity]
23
-
24
- # Generate summary once to display as default
25
  default_summary = summarize_article(default_article)
26
 
27
- # Gradio interface (read-only)
28
  iface = gr.Interface(
29
  fn=summarize_article,
30
  inputs=gr.Textbox(lines=20, label="Article (Read Only)", value=default_article, interactive=False),
 
1
+ from transformers import pipeline, AutoTokenizer
2
  import gradio as gr
3
 
4
+ # Load tokenizer for truncation
5
+ tokenizer = AutoTokenizer.from_pretrained("sshleifer/distilbart-cnn-6-6")
6
+
7
+ # Load summarization pipeline
8
  summarizer = pipeline(
9
  "summarization",
10
  model="sshleifer/distilbart-cnn-6-6",
 
11
  )
12
 
13
+ # Truncation helper 🫶
14
+ def safe_truncate(text, max_tokens=1024):
15
+ tokens = tokenizer.encode(text, truncation=True, max_length=max_tokens)
16
+ return tokenizer.decode(tokens, skip_special_tokens=True)
17
+
18
+ # Summary function with truncation applied
19
  def summarize_article(text):
20
+ short_text = safe_truncate(text) # prevent model from breaking!
21
  summary = summarizer(
22
+ short_text,
23
+ max_length=250,
24
+ min_length=100,
25
+ do_sample=False,
26
  )
27
  return summary[0]['summary_text']
28
 
29
+ # Default example
30
+ default_article = """New York (CNN)When Liana Barrientos was 23 years old, she got married...""" # [shortened for demo]
 
 
31
  default_summary = summarize_article(default_article)
32
 
33
+ # Gradio Interface
34
  iface = gr.Interface(
35
  fn=summarize_article,
36
  inputs=gr.Textbox(lines=20, label="Article (Read Only)", value=default_article, interactive=False),