sairamn commited on
Commit
71eb4da
·
1 Parent(s): 4ab0d01

Updated version 2.0

Browse files
Files changed (1) hide show
  1. app.py +17 -18
app.py CHANGED
@@ -2,10 +2,7 @@ import os
2
  import streamlit as st
3
  from transformers import BartTokenizer, TFBartForConditionalGeneration
4
 
5
- # Suppress TensorFlow logging for errors only
6
  os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
7
-
8
- # Load the model and tokenizer
9
  model_name = 'facebook-bart-large-cnn'
10
  tokenizer = BartTokenizer.from_pretrained(model_name)
11
  model = TFBartForConditionalGeneration.from_pretrained(model_name)
@@ -13,19 +10,18 @@ model = TFBartForConditionalGeneration.from_pretrained(model_name)
13
  def summarize(text, style):
14
  input_length = len(tokenizer.encode(text, return_tensors='tf', max_length=1024, truncation=True)[0])
15
 
16
- # Calculate max_length based on the chosen style
17
- if style == 'Accurate':
18
- max_length = int(input_length * 0.3) # Less than one-third
19
- min_length = int(input_length * 0.2)
20
- length_penalty = 1.0
21
  elif style == 'Precise':
22
- max_length = int(input_length * 0.33) # One-third
23
- min_length = int(input_length * 0.25)
24
  length_penalty = 1.2
25
- else: # Normal
26
- max_length = int(input_length * 0.5) # Half the length
27
- min_length = int(input_length * 0.4)
28
- length_penalty = 1.5
29
 
30
  inputs = tokenizer.encode(text, return_tensors='tf', max_length=1024, truncation=True)
31
  summary_ids = model.generate(
@@ -34,19 +30,20 @@ def summarize(text, style):
34
  min_length=min_length,
35
  length_penalty=length_penalty,
36
  num_beams=4,
 
37
  early_stopping=True
38
  )
39
  summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)
 
 
 
40
  return summary
41
 
42
- # Streamlit app
43
  st.title('Text Summarizer')
44
  user_input = st.text_area("Enter text to summarize:", "")
45
-
46
- # Dropdown menu for summarization style
47
  summary_style = st.selectbox(
48
  'Choose summarization style:',
49
- ('Accurate', 'Precise', 'Normal')
50
  )
51
 
52
  if st.button('Summarize'):
@@ -56,3 +53,5 @@ if st.button('Summarize'):
56
  st.write(summary)
57
  else:
58
  st.write("Please enter some text to summarize.")
 
 
 
2
  import streamlit as st
3
  from transformers import BartTokenizer, TFBartForConditionalGeneration
4
 
 
5
  os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
 
 
6
  model_name = 'facebook-bart-large-cnn'
7
  tokenizer = BartTokenizer.from_pretrained(model_name)
8
  model = TFBartForConditionalGeneration.from_pretrained(model_name)
 
10
  def summarize(text, style):
11
  input_length = len(tokenizer.encode(text, return_tensors='tf', max_length=1024, truncation=True)[0])
12
 
13
+ if style == 'Normal':
14
+ max_length = int(input_length * 0.6)
15
+ min_length = int(input_length * 0.5)
16
+ length_penalty = 1.5
 
17
  elif style == 'Precise':
18
+ max_length = int(input_length * 0.45)
19
+ min_length = int(input_length * 0.35)
20
  length_penalty = 1.2
21
+ else:
22
+ max_length = int(input_length * 0.4)
23
+ min_length = int(input_length * 0.3)
24
+ length_penalty = 1.0
25
 
26
  inputs = tokenizer.encode(text, return_tensors='tf', max_length=1024, truncation=True)
27
  summary_ids = model.generate(
 
30
  min_length=min_length,
31
  length_penalty=length_penalty,
32
  num_beams=4,
33
+ no_repeat_ngram_size=3,
34
  early_stopping=True
35
  )
36
  summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)
37
+
38
+ if not summary.endswith(('.', '!', '?')):
39
+ summary += '.'
40
  return summary
41
 
 
42
  st.title('Text Summarizer')
43
  user_input = st.text_area("Enter text to summarize:", "")
 
 
44
  summary_style = st.selectbox(
45
  'Choose summarization style:',
46
+ ('Normal', 'Precise', 'Accurate')
47
  )
48
 
49
  if st.button('Summarize'):
 
53
  st.write(summary)
54
  else:
55
  st.write("Please enter some text to summarize.")
56
+
57
+ # End of program 2.0