Effah Kofi Boakye Yiadom commited on
Commit
83f6764
·
verified ·
1 Parent(s): 6ebbc78

Upload 3 files

Browse files
Files changed (3) hide show
  1. requirements.txt +2 -0
  2. streamlit.py +20 -0
  3. summarize.py +72 -0
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ openai
2
+ streamlit
streamlit.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from summarize import load_text, summarize_text
3
+
4
+ # Streamlit app
5
+ st.title("Text Summarizer with Hugging Face")
6
+
7
+ # User input for URL
8
+ url = st.text_input("Enter the URL of the article or blog post:")
9
+
10
+ if st.button("Summarize"):
11
+ if url:
12
+ text = load_text(url)
13
+ if text:
14
+ summary = summarize_text(text)
15
+ st.subheader("Summary:")
16
+ st.write(summary["output_text"])
17
+ else:
18
+ st.error("Failed to load text from the URL.")
19
+ else:
20
+ st.error("Please enter a valid URL.")
summarize.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import streamlit as st
4
+ import together
5
+ import requests
6
+
7
+ # ✅ Load Together API key from environment variable
8
+ os.getenv("TOGETHERAI_API_KEY") == "9c679ff9d00b1c3e633ef3bb0ec44a8002a2b74d2f535d709b1bd85d26a168ee"
9
+
10
+ # ✅ Set API key for Together AI
11
+ together.api_key = "9c679ff9d00b1c3e633ef3bb0ec44a8002a2b74d2f535d709b1bd85d26a168ee"
12
+
13
+ # ✅ Function to load text from a URL
14
+ def load_text(url):
15
+ """Fetches text from a URL"""
16
+ try:
17
+ headers = {"User-Agent": os.getenv("USER_AGENT", "Mozilla/5.0")}
18
+ response = requests.get(url, headers=headers)
19
+ response.raise_for_status()
20
+ return response.text[:20000] # Doubled input limit for more context
21
+ except Exception as e:
22
+ st.error(f"Error loading URL: {e}")
23
+ return None
24
+
25
+ # ✅ Function to summarize text with richer, more detailed output
26
+ def summarize_text(text, max_retries=3, retry_delay=5):
27
+ """Summarize text using Together AI API with retry logic for extensive output"""
28
+ text = text[:20000] # Increased input limit to 20,000 characters (~5,000 tokens)
29
+
30
+ # Enhanced prompt for a more detailed, comprehensive summary
31
+ prompt = f"""
32
+ You are an expert summarizer tasked with creating a highly detailed, comprehensive, and well-structured summary of the url provided.
33
+ Provide a summary in 20-35 sentences that thoroughly captures the main points, key details, significant insights, and important examples or arguments presented in the text.
34
+ Ensure the summary is informative, coherent, and rich in content, avoiding vague or overly simplistic statements.
35
+ Include context where relevant and aim to give a complete picture of the text’s purpose and findings.
36
+
37
+ TEXT: {text}
38
+
39
+ SUMMARY:
40
+ """
41
+
42
+ for attempt in range(max_retries):
43
+ try:
44
+ response = together.Complete.create(
45
+ model="mistralai/Mixtral-8x7B-Instruct-v0.1",
46
+ prompt=prompt,
47
+ max_tokens=600, # Increased to 600 tokens (~450-600 words) for a longer, detailed summary
48
+ temperature=0.3, # Kept low for focus and coherence
49
+ )
50
+
51
+ # Debugging: Print full API response (optional, remove in production)
52
+ print("Raw API Response:", response)
53
+
54
+ # Extract summary from response
55
+ if isinstance(response, dict) and "output" in response:
56
+ summary = response["output"]
57
+ return {"output_text": summary.strip()}
58
+ elif isinstance(response, dict) and "choices" in response:
59
+ summary = response["choices"][0]["text"]
60
+ return {"output_text": summary.strip()}
61
+ else:
62
+ raise KeyError("Unexpected API response format.")
63
+
64
+ except Exception as e:
65
+ if "rate_limit_exceeded" in str(e) and attempt < max_retries - 1:
66
+ st.warning(f"Attempt {attempt + 1}/{max_retries}: Rate limit exceeded. Retrying in {retry_delay} seconds...")
67
+ time.sleep(retry_delay)
68
+ else:
69
+ st.error(f"❌ Error during summarization: {e}")
70
+ return {"output_text": "Summarization failed. Try again later."}
71
+
72
+ return {"output_text": "Summarization failed after multiple attempts."}