Spaces:
Sleeping
Sleeping
File size: 2,699 Bytes
5363aef 3eaa349 5363aef 3eaa349 5363aef 608e720 14df869 3eaa349 5363aef 3eaa349 14df869 d310922 608e720 3eaa349 608e720 3eaa349 d310922 14df869 3eaa349 14df869 608e720 5394b7a 14df869 3eaa349 608e720 14df869 3eaa349 608e720 3eaa349 14df869 5363aef 14df869 3eaa349 14df869 3eaa349 14df869 3eaa349 14df869 3eaa349 608e720 14df869 5363aef 3eaa349 14df869 3eaa349 14df869 3eaa349 e16e8bc 3eaa349 14df869 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
import gradio as gr
import os
import openai
import requests
import re
from newspaper import Article
import transformers
from transformers import GPT2Tokenizer
# Silence transformers backend warnings
transformers.logging.set_verbosity_error()
def text_prompt(request: str, page_url: str, api_key: str, temp: float):
# Fetch & parse
try:
headers = {'User-Agent': 'Chrome/83.0.4103.106'}
resp = requests.get(page_url, headers=headers, timeout=10)
resp.raise_for_status()
page = Article('')
page.set_html(resp.text)
page.parse()
except Exception as e:
return "", f"Error fetching URL: {e}", ""
# Tokenize & truncate to ~1800 GPT-2 tokens
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
sentences = page.text.split('.')
tokens_accum, truncated_text = [], ""
for sent in sentences:
toks = tokenizer.tokenize(sent + ".")
if len(tokens_accum) + len(toks) > 1800:
break
tokens_accum += toks
truncated_text += sent + ". "
num_input_tokens = len(tokens_accum)
if num_input_tokens < 10:
return page.text, f"Not enough text ({num_input_tokens} tokens)", num_input_tokens
# Call GPT-4o mini via ChatCompletion
openai.api_key = api_key
try:
chat_resp = openai.ChatCompletion.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": request + "\n\n>>\n" + truncated_text + "\n<<"}
],
temperature=temp,
max_tokens=2048,
top_p=0.9,
)
ai_text = re.sub(r'\s+', ' ', chat_resp.choices[0].message.content).strip()
total_tokens = chat_resp.usage.total_tokens
return page.text, ai_text, total_tokens
except Exception as e:
return page.text, f"OpenAI API error: {e}", num_input_tokens
if __name__ == "__main__":
iface = gr.Interface(
fn=text_prompt,
inputs=[
gr.Textbox(label="Prompt:"),
gr.Textbox(label="URL to parse:"),
gr.Textbox(label="API-Key:", type="password"),
gr.Slider(0.0,1.0,value=0.3, label="Temperature:")
],
outputs=[
gr.Textbox(label="Input Text:"),
gr.Textbox(label="AI Output:"),
gr.Textbox(label="Total Tokens:")
],
title="GPT-4o-mini URL Summarizer",
description="Uses GPT-4o-mini via ChatCompletion to summarize webpage text."
)
try:
iface.queue()
iface.launch()
except Exception as e:
print("Failed to launch:", e) |