Spaces:
Sleeping
Sleeping
Effah Kofi Boakye Yiadom
commited on
Upload 3 files
Browse files- requirements.txt +2 -0
- streamlit.py +20 -0
- summarize.py +72 -0
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
openai
|
2 |
+
streamlit
|
streamlit.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from summarize import load_text, summarize_text
|
3 |
+
|
4 |
+
# Streamlit app
|
5 |
+
st.title("Text Summarizer with Hugging Face")
|
6 |
+
|
7 |
+
# User input for URL
|
8 |
+
url = st.text_input("Enter the URL of the article or blog post:")
|
9 |
+
|
10 |
+
if st.button("Summarize"):
|
11 |
+
if url:
|
12 |
+
text = load_text(url)
|
13 |
+
if text:
|
14 |
+
summary = summarize_text(text)
|
15 |
+
st.subheader("Summary:")
|
16 |
+
st.write(summary["output_text"])
|
17 |
+
else:
|
18 |
+
st.error("Failed to load text from the URL.")
|
19 |
+
else:
|
20 |
+
st.error("Please enter a valid URL.")
|
summarize.py
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import time
|
3 |
+
import streamlit as st
|
4 |
+
import together
|
5 |
+
import requests
|
6 |
+
|
7 |
+
# ✅ Load Together API key from environment variable
|
8 |
+
os.getenv("TOGETHERAI_API_KEY") == "9c679ff9d00b1c3e633ef3bb0ec44a8002a2b74d2f535d709b1bd85d26a168ee"
|
9 |
+
|
10 |
+
# ✅ Set API key for Together AI
|
11 |
+
together.api_key = "9c679ff9d00b1c3e633ef3bb0ec44a8002a2b74d2f535d709b1bd85d26a168ee"
|
12 |
+
|
13 |
+
# ✅ Function to load text from a URL
|
14 |
+
def load_text(url):
|
15 |
+
"""Fetches text from a URL"""
|
16 |
+
try:
|
17 |
+
headers = {"User-Agent": os.getenv("USER_AGENT", "Mozilla/5.0")}
|
18 |
+
response = requests.get(url, headers=headers)
|
19 |
+
response.raise_for_status()
|
20 |
+
return response.text[:20000] # Doubled input limit for more context
|
21 |
+
except Exception as e:
|
22 |
+
st.error(f"Error loading URL: {e}")
|
23 |
+
return None
|
24 |
+
|
25 |
+
# ✅ Function to summarize text with richer, more detailed output
|
26 |
+
def summarize_text(text, max_retries=3, retry_delay=5):
|
27 |
+
"""Summarize text using Together AI API with retry logic for extensive output"""
|
28 |
+
text = text[:20000] # Increased input limit to 20,000 characters (~5,000 tokens)
|
29 |
+
|
30 |
+
# Enhanced prompt for a more detailed, comprehensive summary
|
31 |
+
prompt = f"""
|
32 |
+
You are an expert summarizer tasked with creating a highly detailed, comprehensive, and well-structured summary of the url provided.
|
33 |
+
Provide a summary in 20-35 sentences that thoroughly captures the main points, key details, significant insights, and important examples or arguments presented in the text.
|
34 |
+
Ensure the summary is informative, coherent, and rich in content, avoiding vague or overly simplistic statements.
|
35 |
+
Include context where relevant and aim to give a complete picture of the text’s purpose and findings.
|
36 |
+
|
37 |
+
TEXT: {text}
|
38 |
+
|
39 |
+
SUMMARY:
|
40 |
+
"""
|
41 |
+
|
42 |
+
for attempt in range(max_retries):
|
43 |
+
try:
|
44 |
+
response = together.Complete.create(
|
45 |
+
model="mistralai/Mixtral-8x7B-Instruct-v0.1",
|
46 |
+
prompt=prompt,
|
47 |
+
max_tokens=600, # Increased to 600 tokens (~450-600 words) for a longer, detailed summary
|
48 |
+
temperature=0.3, # Kept low for focus and coherence
|
49 |
+
)
|
50 |
+
|
51 |
+
# Debugging: Print full API response (optional, remove in production)
|
52 |
+
print("Raw API Response:", response)
|
53 |
+
|
54 |
+
# Extract summary from response
|
55 |
+
if isinstance(response, dict) and "output" in response:
|
56 |
+
summary = response["output"]
|
57 |
+
return {"output_text": summary.strip()}
|
58 |
+
elif isinstance(response, dict) and "choices" in response:
|
59 |
+
summary = response["choices"][0]["text"]
|
60 |
+
return {"output_text": summary.strip()}
|
61 |
+
else:
|
62 |
+
raise KeyError("Unexpected API response format.")
|
63 |
+
|
64 |
+
except Exception as e:
|
65 |
+
if "rate_limit_exceeded" in str(e) and attempt < max_retries - 1:
|
66 |
+
st.warning(f"Attempt {attempt + 1}/{max_retries}: Rate limit exceeded. Retrying in {retry_delay} seconds...")
|
67 |
+
time.sleep(retry_delay)
|
68 |
+
else:
|
69 |
+
st.error(f"❌ Error during summarization: {e}")
|
70 |
+
return {"output_text": "Summarization failed. Try again later."}
|
71 |
+
|
72 |
+
return {"output_text": "Summarization failed after multiple attempts."}
|