solewarrior commited on
Commit
c279525
Β·
verified Β·
1 Parent(s): f567e4e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -69
app.py CHANGED
@@ -2,9 +2,8 @@ import streamlit as st
2
  from transformers import pipeline
3
  import PyPDF2
4
  import docx
5
- import textwrap
6
 
7
- # Streamlit Page Config
8
  st.set_page_config(
9
  page_title="TextSphere",
10
  page_icon="πŸ€–",
@@ -12,7 +11,6 @@ st.set_page_config(
12
  initial_sidebar_state="expanded"
13
  )
14
 
15
- # Footer
16
  st.markdown("""
17
  <style>
18
  .footer {
@@ -30,106 +28,98 @@ st.markdown("""
30
  </div>
31
  """, unsafe_allow_html=True)
32
 
33
- # Load Model
34
  @st.cache_resource
35
  def load_models():
36
  try:
37
- summarization_model = pipeline("summarization", model="facebook/bart-large-cnn")
 
 
 
38
  except Exception as e:
39
- raise RuntimeError(f"Failed to load model: {str(e)}")
40
- return summarization_model
41
 
42
- summarization_model = load_models()
43
 
44
- # Function to Extract Text from PDF
45
- def extract_text_from_pdf(uploaded_pdf):
46
  try:
47
- pdf_reader = PyPDF2.PdfReader(uploaded_pdf)
48
- pdf_text = ""
49
  for page in pdf_reader.pages:
50
- text = page.extract_text()
51
- if text:
52
- pdf_text += text + "\n"
53
- if not pdf_text.strip():
54
- st.error("No text found in the PDF.")
55
- return None
56
- return pdf_text
57
  except Exception as e:
58
  st.error(f"Error reading the PDF: {e}")
59
  return None
60
 
61
- # Function to Extract Text from TXT
62
- def extract_text_from_txt(uploaded_txt):
63
  try:
64
- return uploaded_txt.read().decode("utf-8").strip()
 
65
  except Exception as e:
66
- st.error(f"Error reading the TXT file: {e}")
67
  return None
68
 
69
- # Function to Extract Text from DOCX
70
- def extract_text_from_docx(uploaded_docx):
71
  try:
72
- doc = docx.Document(uploaded_docx)
73
- return "\n".join([para.text for para in doc.paragraphs]).strip()
74
  except Exception as e:
75
- st.error(f"Error reading the DOCX file: {e}")
76
  return None
77
 
78
- # Function to Split Text into 1024-Token Chunks
79
- def chunk_text(text, max_tokens=1024):
80
- return textwrap.wrap(text, width=max_tokens)
 
 
 
 
 
 
 
 
 
 
81
 
82
- # Sidebar for Task Selection (Default: Text Summarization)
83
  st.sidebar.title("AI Solutions")
84
  option = st.sidebar.selectbox(
85
  "Choose a task",
86
  ["Text Summarization", "Question Answering", "Text Classification", "Language Translation"],
87
- index=0 # Default to "Text Summarization"
88
  )
89
 
90
- # Text Summarization Task
91
  if option == "Text Summarization":
92
- st.title("πŸ“„ Text Summarization")
93
- st.markdown("<h4 style='font-size: 20px;'>- because who needs to read the whole document? πŸ₯΅</h4>", unsafe_allow_html=True)
94
-
95
- uploaded_file = st.file_uploader(
96
- "Upload a document (PDF, TXT, DOCX) - *Note: Processes only 1024 tokens per chunk*",
97
- type=["pdf", "txt", "docx"]
98
- )
99
-
100
- text_to_summarize = ""
101
 
102
  if uploaded_file:
103
  file_type = uploaded_file.name.split(".")[-1].lower()
104
-
105
- if file_type == "pdf":
106
- text_to_summarize = extract_text_from_pdf(uploaded_file)
107
- elif file_type == "txt":
108
- text_to_summarize = extract_text_from_txt(uploaded_file)
109
- elif file_type == "docx":
110
- text_to_summarize = extract_text_from_docx(uploaded_file)
111
- else:
112
- st.error("Unsupported file format.")
113
 
114
  if st.button("Summarize"):
115
- with st.spinner('Summarizing...'):
116
  try:
117
  if text_to_summarize:
118
- chunks = chunk_text(text_to_summarize, max_tokens=1024)
119
- summaries = []
120
-
121
- for chunk in chunks:
122
- input_length = len(chunk.split()) # Count words in the chunk
123
- max_summary_length = max(50, input_length // 2) # Dynamically adjust max_length
124
-
125
- summary = summarization_model(chunk, max_length=max_summary_length, min_length=50, do_sample=False)
126
- summaries.append(summary[0]['summary_text'])
127
-
128
- final_summary = " ".join(summaries) # Combine all chunk summaries
129
-
130
- st.write("### Summary:")
131
- st.write(final_summary)
132
  else:
133
- st.error("Please upload a document first.")
134
  except Exception as e:
135
- st.error(f"Error: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  from transformers import pipeline
3
  import PyPDF2
4
  import docx
5
+ from io import BytesIO
6
 
 
7
  st.set_page_config(
8
  page_title="TextSphere",
9
  page_icon="πŸ€–",
 
11
  initial_sidebar_state="expanded"
12
  )
13
 
 
14
  st.markdown("""
15
  <style>
16
  .footer {
 
28
  </div>
29
  """, unsafe_allow_html=True)
30
 
 
31
  @st.cache_resource
32
  def load_models():
33
  try:
34
+ summarization_model = pipeline(
35
+ "summarization",
36
+ model="facebook/bart-large-cnn"
37
+ )
38
  except Exception as e:
39
+ raise RuntimeError(f"Failed to load models: {str(e)}")
 
40
 
41
+ return summarization_model
42
 
43
+ def extract_text_from_pdf(uploaded_file):
 
44
  try:
45
+ pdf_reader = PyPDF2.PdfReader(uploaded_file)
46
+ text = ""
47
  for page in pdf_reader.pages:
48
+ text += page.extract_text() or "" # Ensure we avoid NoneType issues
49
+ return text.strip()
 
 
 
 
 
50
  except Exception as e:
51
  st.error(f"Error reading the PDF: {e}")
52
  return None
53
 
54
+ def extract_text_from_docx(uploaded_file):
 
55
  try:
56
+ doc = docx.Document(uploaded_file)
57
+ return "\n".join([para.text for para in doc.paragraphs])
58
  except Exception as e:
59
+ st.error(f"Error reading the DOCX: {e}")
60
  return None
61
 
62
+ def extract_text_from_txt(uploaded_file):
 
63
  try:
64
+ return uploaded_file.read().decode("utf-8")
 
65
  except Exception as e:
66
+ st.error(f"Error reading the TXT file: {e}")
67
  return None
68
 
69
+ def extract_text_from_file(uploaded_file, file_type):
70
+ if file_type == "pdf":
71
+ return extract_text_from_pdf(uploaded_file)
72
+ elif file_type == "docx":
73
+ return extract_text_from_docx(uploaded_file)
74
+ elif file_type == "txt":
75
+ return extract_text_from_txt(uploaded_file)
76
+ return None
77
+
78
+ try:
79
+ summarization_model = load_models()
80
+ except Exception as e:
81
+ st.error(f"An error occurred while loading models: {e}")
82
 
 
83
  st.sidebar.title("AI Solutions")
84
  option = st.sidebar.selectbox(
85
  "Choose a task",
86
  ["Text Summarization", "Question Answering", "Text Classification", "Language Translation"],
87
+ index=0 # Makes Text Summarization the default
88
  )
89
 
 
90
  if option == "Text Summarization":
91
+ st.title("Text Summarization")
92
+ st.markdown("<h4 style='font-size: 20px;'>- because who needs to read the whole document, anyway? πŸ₯΅</h4>", unsafe_allow_html=True)
93
+
94
+ uploaded_file = st.file_uploader("Upload a document (PDF, DOCX, TXT) [Limit: 1024 Tokens]", type=["pdf", "docx", "txt"])
95
+
96
+ text_to_summarize = st.text_area("Enter text to summarize (or leave empty if uploading a file):")
 
 
 
97
 
98
  if uploaded_file:
99
  file_type = uploaded_file.name.split(".")[-1].lower()
100
+ text_to_summarize = extract_text_from_file(uploaded_file, file_type)
 
 
 
 
 
 
 
 
101
 
102
  if st.button("Summarize"):
103
+ with st.spinner('Summarizing text...'):
104
  try:
105
  if text_to_summarize:
106
+ summary = summarization_model(text_to_summarize[:1024], max_length=300, min_length=50, do_sample=False)
107
+ st.write("Summary:", summary[0]['summary_text'])
108
+ st.balloons()
 
 
 
 
 
 
 
 
 
 
 
109
  else:
110
+ st.error("Please enter text or upload a document for summarization.")
111
  except Exception as e:
112
+ st.error(f"An error occurred: {e}")
113
+
114
+ elif option == "Question Answering":
115
+ st.title("Question Answering")
116
+ st.write("Coming soon... πŸš€")
117
+
118
+ elif option == "Text Classification":
119
+ st.title("Text Classification")
120
+ st.write("Coming soon... πŸš€")
121
+
122
+ elif option == "Language Translation":
123
+ st.title("Language Translation")
124
+ st.write("Coming soon... πŸš€")
125
+