Surbhi commited on
Commit
e1baa80
Β·
1 Parent(s): 21de301

Using BERT and GPT-2

Browse files
Files changed (2) hide show
  1. app.py +55 -0
  2. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import torch
3
+ from transformers import pipeline, GPT2LMHeadModel, GPT2Tokenizer
4
+ from keybert import KeyBERT
5
+ import matplotlib.pyplot as plt
6
+
7
+ # Load models
8
+ kw_model = KeyBERT("sentence-transformers/paraphrase-MiniLM-L6-v2")
9
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
10
+ gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2")
11
+ gpt2_tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
12
+
13
+ st.title("πŸ” AI Summarizer: BERT + GPT-2")
14
+ st.write("Extract key points with **KeyBERT**, summarize with **BERT (BART)** and **GPT-2**, and compare their accuracy.")
15
+
16
+ # User input
17
+ text = st.text_area("Enter text to summarize:")
18
+
19
+ if st.button("Summarize"):
20
+ if not text.strip():
21
+ st.warning("Please enter some text!")
22
+ else:
23
+ # Extract Key Points using KeyBERT
24
+ key_points = kw_model.extract_keywords(text, keyphrase_ngram_range=(1, 2), stop_words='english', top_n=5)
25
+ extracted_points = ", ".join([kp[0] for kp in key_points])
26
+
27
+ # Summarization using BART (BERT-based model)
28
+ bart_summary = summarizer(text, max_length=150, min_length=50, do_sample=False)[0]['summary_text']
29
+
30
+ # Summarization using GPT-2
31
+ inputs = gpt2_tokenizer.encode("Summarize: " + text, return_tensors="pt", max_length=512, truncation=True)
32
+ gpt2_summary_ids = gpt2_model.generate(inputs, max_length=150, num_return_sequences=1, no_repeat_ngram_size=2)
33
+ gpt2_summary = gpt2_tokenizer.decode(gpt2_summary_ids[0], skip_special_tokens=True)
34
+
35
+ # Display results
36
+ st.subheader("πŸ”‘ Key Points")
37
+ st.write(extracted_points)
38
+
39
+ st.subheader("πŸ“– Summary (BERT - BART)")
40
+ st.write(bart_summary)
41
+
42
+ st.subheader("πŸ€– Summary (GPT-2)")
43
+ st.write(gpt2_summary)
44
+
45
+ # Performance Comparison (Word Count)
46
+ bart_length = len(bart_summary.split())
47
+ gpt2_length = len(gpt2_summary.split())
48
+
49
+ # Plotting
50
+ fig, ax = plt.subplots()
51
+ ax.bar(["BERT (BART)", "GPT-2"], [bart_length, gpt2_length], color=["blue", "red"])
52
+ ax.set_ylabel("Word Count")
53
+ ax.set_title("Comparison of Summary Lengths")
54
+
55
+ st.pyplot(fig)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ streamlit
2
+ transformers
3
+ torch
4
+ keybert
5
+ sentence-transformers
6
+ matplotlib