Spaces:

sairamn
/

text-summarization

Sleeping

App Files Files Community

sairamn commited on Aug 13, 2024

Commit

71eb4da

1 Parent(s): 4ab0d01

Updated version 2.0

Browse files

Files changed (1) hide show

app.py +17 -18

app.py CHANGED Viewed

@@ -2,10 +2,7 @@ import os
 import streamlit as st
 from transformers import BartTokenizer, TFBartForConditionalGeneration
-# Suppress TensorFlow logging for errors only
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
-# Load the model and tokenizer
 model_name = 'facebook-bart-large-cnn'
 tokenizer = BartTokenizer.from_pretrained(model_name)
 model = TFBartForConditionalGeneration.from_pretrained(model_name)
@@ -13,19 +10,18 @@ model = TFBartForConditionalGeneration.from_pretrained(model_name)
 def summarize(text, style):
     input_length = len(tokenizer.encode(text, return_tensors='tf', max_length=1024, truncation=True)[0])
-    # Calculate max_length based on the chosen style
-    if style == 'Accurate':
-        max_length = int(input_length * 0.3)  # Less than one-third
-        min_length = int(input_length * 0.2)
-        length_penalty = 1.0
     elif style == 'Precise':
-        max_length = int(input_length * 0.33)  # One-third
-        min_length = int(input_length * 0.25)
         length_penalty = 1.2
-    else:  # Normal
-        max_length = int(input_length * 0.5)  # Half the length
-        min_length = int(input_length * 0.4)
-        length_penalty = 1.5
     inputs = tokenizer.encode(text, return_tensors='tf', max_length=1024, truncation=True)
     summary_ids = model.generate(
@@ -34,19 +30,20 @@ def summarize(text, style):
         min_length=min_length,
         length_penalty=length_penalty,
         num_beams=4,
         early_stopping=True
     )
     summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)
     return summary
-# Streamlit app
 st.title('Text Summarizer')
 user_input = st.text_area("Enter text to summarize:", "")
-# Dropdown menu for summarization style
 summary_style = st.selectbox(
     'Choose summarization style:',
-    ('Accurate', 'Precise', 'Normal')
 )
 if st.button('Summarize'):
@@ -56,3 +53,5 @@ if st.button('Summarize'):
         st.write(summary)
     else:
         st.write("Please enter some text to summarize.")

 import streamlit as st
 from transformers import BartTokenizer, TFBartForConditionalGeneration
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
 model_name = 'facebook-bart-large-cnn'
 tokenizer = BartTokenizer.from_pretrained(model_name)
 model = TFBartForConditionalGeneration.from_pretrained(model_name)
 def summarize(text, style):
     input_length = len(tokenizer.encode(text, return_tensors='tf', max_length=1024, truncation=True)[0])
+    if style == 'Normal':
+        max_length = int(input_length * 0.6)
+        min_length = int(input_length * 0.5)
+        length_penalty = 1.5
     elif style == 'Precise':
+        max_length = int(input_length * 0.45)
+        min_length = int(input_length * 0.35)
         length_penalty = 1.2
+    else:
+        max_length = int(input_length * 0.4)
+        min_length = int(input_length * 0.3)
+        length_penalty = 1.0
     inputs = tokenizer.encode(text, return_tensors='tf', max_length=1024, truncation=True)
     summary_ids = model.generate(
         min_length=min_length,
         length_penalty=length_penalty,
         num_beams=4,
+        no_repeat_ngram_size=3,
         early_stopping=True
     )
     summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)
+    if not summary.endswith(('.', '!', '?')):
+        summary += '.'
     return summary
 st.title('Text Summarizer')
 user_input = st.text_area("Enter text to summarize:", "")
 summary_style = st.selectbox(
     'Choose summarization style:',
+    ('Normal', 'Precise', 'Accurate')
 )
 if st.button('Summarize'):
         st.write(summary)
     else:
         st.write("Please enter some text to summarize.")
+# End of program 2.0