Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -7,17 +7,11 @@ from newspaper import Article
|
|
7 |
import transformers
|
8 |
from transformers import GPT2Tokenizer
|
9 |
|
10 |
-
#
|
11 |
transformers.logging.set_verbosity_error()
|
12 |
|
13 |
def text_prompt(request: str, page_url: str, api_key: str, temp: float):
|
14 |
-
|
15 |
-
Fetches the article at page_url, extracts text with newspaper3k,
|
16 |
-
trims it to ~1800 GPT-2 tokens, and sends it along with 'request'
|
17 |
-
to OpenAI's Completion API.
|
18 |
-
Returns: (full_input_text, ai_response, total_tokens_used_or_error)
|
19 |
-
"""
|
20 |
-
# 1) Fetch and parse the page
|
21 |
try:
|
22 |
headers = {'User-Agent': 'Chrome/83.0.4103.106'}
|
23 |
resp = requests.get(page_url, headers=headers, timeout=10)
|
@@ -26,87 +20,63 @@ def text_prompt(request: str, page_url: str, api_key: str, temp: float):
|
|
26 |
page.set_html(resp.text)
|
27 |
page.parse()
|
28 |
except Exception as e:
|
29 |
-
return "", f"
|
30 |
|
31 |
-
#
|
32 |
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
33 |
sentences = page.text.split('.')
|
34 |
-
tokens_accum = []
|
35 |
-
|
36 |
-
|
37 |
-
toks = tokenizer.tokenize(sentence + ".")
|
38 |
if len(tokens_accum) + len(toks) > 1800:
|
39 |
break
|
40 |
-
tokens_accum
|
41 |
-
truncated_text +=
|
42 |
-
truncated_text = truncated_text.strip()
|
43 |
num_input_tokens = len(tokens_accum)
|
44 |
|
45 |
-
# 3) If there's enough content, call OpenAI
|
46 |
if num_input_tokens < 10:
|
47 |
-
return page.text, f"
|
48 |
|
|
|
49 |
openai.api_key = api_key
|
50 |
try:
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
|
|
|
|
55 |
temperature=temp,
|
|
|
56 |
top_p=0.9,
|
57 |
)
|
58 |
-
ai_text
|
59 |
-
total_tokens =
|
60 |
-
# Collapse whitespace
|
61 |
-
ai_text = re.sub(r'\s+', ' ', ai_text)
|
62 |
return page.text, ai_text, total_tokens
|
63 |
-
except Exception as e:
|
64 |
-
return page.text, f"--- OpenAI API error: {e} ---", num_input_tokens
|
65 |
|
|
|
|
|
66 |
|
67 |
if __name__ == "__main__":
|
68 |
-
# Build the Gradio interface
|
69 |
iface = gr.Interface(
|
70 |
fn=text_prompt,
|
71 |
inputs=[
|
72 |
-
gr.Textbox(
|
73 |
-
gr.Textbox(
|
74 |
-
gr.Textbox(
|
75 |
-
gr.Slider(0.0,
|
76 |
],
|
77 |
outputs=[
|
78 |
gr.Textbox(label="Input Text:"),
|
79 |
gr.Textbox(label="AI Output:"),
|
80 |
gr.Textbox(label="Total Tokens:")
|
81 |
],
|
82 |
-
|
83 |
-
|
84 |
-
"Summarize the following text as a list:",
|
85 |
-
"https://blog.google/outreach-initiatives/google-org/our-commitment-on-using-ai-to-accelerate-progress-on-global-development-goals/",
|
86 |
-
"", 0.3
|
87 |
-
],
|
88 |
-
[
|
89 |
-
"Generate a summary of the following text. Give me an overview of main business impact from the text following this template:\n- Summary:\n- Business Impact:\n- Companies:",
|
90 |
-
"https://ai.googleblog.com/2019/10/quantum-supremacy-using-programmable.html",
|
91 |
-
"", 0.7
|
92 |
-
],
|
93 |
-
[
|
94 |
-
"Generate the next insights based on the following text. Indicates N/A if the information is not available in the text.\n- Summary:\n- Acquisition Price:\n- Why is this important for the acquirer:\n- Business Line for the acquirer:\n- Tech Focus for the acquired (list):",
|
95 |
-
"https://techcrunch.com/2022/09/28/eqt-acquires-billtrust-a-company-automating-the-invoice-to-cash-process-for-1-7b/",
|
96 |
-
"", 0.3
|
97 |
-
]
|
98 |
-
],
|
99 |
-
title="ChatGPT / GPT-3 Info Extraction from URL",
|
100 |
-
description=(
|
101 |
-
"Fetches text from a URL using newspaper3k, trims it to ~1800 GPT-2 tokens, "
|
102 |
-
"then queries OpenAI's text-davinci-003. Enter your prompt, URL, API key, and temperature."
|
103 |
-
)
|
104 |
)
|
105 |
|
106 |
-
# Launch Gradio with queuing (default concurrency)
|
107 |
try:
|
108 |
iface.queue()
|
109 |
iface.launch()
|
110 |
except Exception as e:
|
111 |
-
|
112 |
-
print("Failed to launch Gradio interface:", e)
|
|
|
7 |
import transformers
|
8 |
from transformers import GPT2Tokenizer
|
9 |
|
10 |
+
# Silence transformers backend warnings
|
11 |
transformers.logging.set_verbosity_error()
|
12 |
|
13 |
def text_prompt(request: str, page_url: str, api_key: str, temp: float):
|
14 |
+
# Fetch & parse
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
try:
|
16 |
headers = {'User-Agent': 'Chrome/83.0.4103.106'}
|
17 |
resp = requests.get(page_url, headers=headers, timeout=10)
|
|
|
20 |
page.set_html(resp.text)
|
21 |
page.parse()
|
22 |
except Exception as e:
|
23 |
+
return "", f"Error fetching URL: {e}", ""
|
24 |
|
25 |
+
# Tokenize & truncate to ~1800 GPT-2 tokens
|
26 |
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
|
27 |
sentences = page.text.split('.')
|
28 |
+
tokens_accum, truncated_text = [], ""
|
29 |
+
for sent in sentences:
|
30 |
+
toks = tokenizer.tokenize(sent + ".")
|
|
|
31 |
if len(tokens_accum) + len(toks) > 1800:
|
32 |
break
|
33 |
+
tokens_accum += toks
|
34 |
+
truncated_text += sent + ". "
|
|
|
35 |
num_input_tokens = len(tokens_accum)
|
36 |
|
|
|
37 |
if num_input_tokens < 10:
|
38 |
+
return page.text, f"Not enough text ({num_input_tokens} tokens)", num_input_tokens
|
39 |
|
40 |
+
# Call GPT-4o mini via ChatCompletion
|
41 |
openai.api_key = api_key
|
42 |
try:
|
43 |
+
chat_resp = openai.ChatCompletion.create(
|
44 |
+
model="gpt-4o-mini",
|
45 |
+
messages=[
|
46 |
+
{"role": "system", "content": "You are a helpful assistant."},
|
47 |
+
{"role": "user", "content": request + "\n\n>>\n" + truncated_text + "\n<<"}
|
48 |
+
],
|
49 |
temperature=temp,
|
50 |
+
max_tokens=2048,
|
51 |
top_p=0.9,
|
52 |
)
|
53 |
+
ai_text = re.sub(r'\s+', ' ', chat_resp.choices[0].message.content).strip()
|
54 |
+
total_tokens = chat_resp.usage.total_tokens
|
|
|
|
|
55 |
return page.text, ai_text, total_tokens
|
|
|
|
|
56 |
|
57 |
+
except Exception as e:
|
58 |
+
return page.text, f"OpenAI API error: {e}", num_input_tokens
|
59 |
|
60 |
if __name__ == "__main__":
|
|
|
61 |
iface = gr.Interface(
|
62 |
fn=text_prompt,
|
63 |
inputs=[
|
64 |
+
gr.Textbox(label="Prompt:"),
|
65 |
+
gr.Textbox(label="URL to parse:"),
|
66 |
+
gr.Textbox(label="API-Key:", type="password"),
|
67 |
+
gr.Slider(0.0,1.0,value=0.3, label="Temperature:")
|
68 |
],
|
69 |
outputs=[
|
70 |
gr.Textbox(label="Input Text:"),
|
71 |
gr.Textbox(label="AI Output:"),
|
72 |
gr.Textbox(label="Total Tokens:")
|
73 |
],
|
74 |
+
title="GPT-4o-mini URL Summarizer",
|
75 |
+
description="Uses GPT-4o-mini via ChatCompletion to summarize webpage text."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
)
|
77 |
|
|
|
78 |
try:
|
79 |
iface.queue()
|
80 |
iface.launch()
|
81 |
except Exception as e:
|
82 |
+
print("Failed to launch:", e)
|
|