Spaces:

sairamn
/

text-summarization

Sleeping

App Files Files Community

sairamn commited on Aug 13, 2024

Commit

34f0c1d

1 Parent(s): c18799d

Updated version 1.0

Browse files

Files changed (1) hide show

app.py +42 -5

app.py CHANGED Viewed

@@ -1,21 +1,58 @@
 import streamlit as st
 from transformers import BartTokenizer, TFBartForConditionalGeneration
-model_name = 'facebook/bart-large-cnn'
 tokenizer = BartTokenizer.from_pretrained(model_name)
 model = TFBartForConditionalGeneration.from_pretrained(model_name)
-def summarize(text):
     inputs = tokenizer.encode(text, return_tensors='tf', max_length=1024, truncation=True)
-    summary_ids = model.generate(inputs, max_length=150, min_length=30, length_penalty=2.0, num_beams=4, early_stopping=True)
-    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
     return summary
 st.title('Text Summarizer')
 user_input = st.text_area("Enter text to summarize:", "")
 if st.button('Summarize'):
     if user_input:
-        summary = summarize(user_input)
         st.write(summary)
     else:
         st.write("Please enter some text to summarize.")

+import os
 import streamlit as st
 from transformers import BartTokenizer, TFBartForConditionalGeneration
+# Suppress TensorFlow logging for errors only
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
+# Load the model and tokenizer
+model_name = 'facebook-bart-large-cnn'
 tokenizer = BartTokenizer.from_pretrained(model_name)
 model = TFBartForConditionalGeneration.from_pretrained(model_name)
+def summarize(text, style):
+    input_length = len(tokenizer.encode(text, return_tensors='tf', max_length=1024, truncation=True)[0])
+    # Calculate max_length based on the chosen style
+    if style == 'Accurate':
+        max_length = int(input_length * 0.3)  # Less than one-third
+        min_length = int(input_length * 0.2)
+        length_penalty = 1.0
+    elif style == 'Precise':
+        max_length = int(input_length * 0.33)  # One-third
+        min_length = int(input_length * 0.25)
+        length_penalty = 1.2
+    else:  # Normal
+        max_length = int(input_length * 0.5)  # Half the length
+        min_length = int(input_length * 0.4)
+        length_penalty = 1.5
     inputs = tokenizer.encode(text, return_tensors='tf', max_length=1024, truncation=True)
+    summary_ids = model.generate(
+        inputs,
+        max_length=max_length,
+        min_length=min_length,
+        length_penalty=length_penalty,
+        num_beams=4,
+        early_stopping=True
+    )
+    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)
     return summary
+# Streamlit app
 st.title('Text Summarizer')
 user_input = st.text_area("Enter text to summarize:", "")
+# Dropdown menu for summarization style
+summary_style = st.selectbox(
+    'Choose summarization style:',
+    ('Accurate', 'Precise', 'Normal')
+)
 if st.button('Summarize'):
     if user_input:
+        summary = summarize(user_input, summary_style)
+        st.write("Summary:")
         st.write(summary)
     else:
         st.write("Please enter some text to summarize.")