dromerosm commited on
Commit
14df869
·
verified ·
1 Parent(s): 3eaa349

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -59
app.py CHANGED
@@ -7,17 +7,11 @@ from newspaper import Article
7
  import transformers
8
  from transformers import GPT2Tokenizer
9
 
10
- # --- Silence Transformers backend warnings (since you only need the tokenizer) ---
11
  transformers.logging.set_verbosity_error()
12
 
13
  def text_prompt(request: str, page_url: str, api_key: str, temp: float):
14
- """
15
- Fetches the article at page_url, extracts text with newspaper3k,
16
- trims it to ~1800 GPT-2 tokens, and sends it along with 'request'
17
- to OpenAI's Completion API.
18
- Returns: (full_input_text, ai_response, total_tokens_used_or_error)
19
- """
20
- # 1) Fetch and parse the page
21
  try:
22
  headers = {'User-Agent': 'Chrome/83.0.4103.106'}
23
  resp = requests.get(page_url, headers=headers, timeout=10)
@@ -26,87 +20,63 @@ def text_prompt(request: str, page_url: str, api_key: str, temp: float):
26
  page.set_html(resp.text)
27
  page.parse()
28
  except Exception as e:
29
- return "", f"--- Error fetching/parsing URL: {e} ---", ""
30
 
31
- # 2) Tokenize & truncate to ~1800 tokens
32
  tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
33
  sentences = page.text.split('.')
34
- tokens_accum = []
35
- truncated_text = ""
36
- for sentence in sentences:
37
- toks = tokenizer.tokenize(sentence + ".")
38
  if len(tokens_accum) + len(toks) > 1800:
39
  break
40
- tokens_accum.extend(toks)
41
- truncated_text += sentence + ". "
42
- truncated_text = truncated_text.strip()
43
  num_input_tokens = len(tokens_accum)
44
 
45
- # 3) If there's enough content, call OpenAI
46
  if num_input_tokens < 10:
47
- return page.text, f"--- Not enough text to summarize ({num_input_tokens} tokens) ---", num_input_tokens
48
 
 
49
  openai.api_key = api_key
50
  try:
51
- completion = openai.Completion.create(
52
- engine="text-davinci-003",
53
- prompt=request + "\n\n>>\n" + truncated_text + "\n<<",
54
- max_tokens=2048,
 
 
55
  temperature=temp,
 
56
  top_p=0.9,
57
  )
58
- ai_text = completion.choices[0].text.strip()
59
- total_tokens = completion.usage.total_tokens
60
- # Collapse whitespace
61
- ai_text = re.sub(r'\s+', ' ', ai_text)
62
  return page.text, ai_text, total_tokens
63
- except Exception as e:
64
- return page.text, f"--- OpenAI API error: {e} ---", num_input_tokens
65
 
 
 
66
 
67
  if __name__ == "__main__":
68
- # Build the Gradio interface
69
  iface = gr.Interface(
70
  fn=text_prompt,
71
  inputs=[
72
- gr.Textbox(lines=1, placeholder="Enter your prompt here...", label="Prompt:"),
73
- gr.Textbox(lines=1, placeholder="Enter the URL here...", label="URL to parse:"),
74
- gr.Textbox(lines=1, placeholder="Enter your API key here...", label="API-Key:", type="password"),
75
- gr.Slider(0.0, 1.0, value=0.3, label="Temperature:")
76
  ],
77
  outputs=[
78
  gr.Textbox(label="Input Text:"),
79
  gr.Textbox(label="AI Output:"),
80
  gr.Textbox(label="Total Tokens:")
81
  ],
82
- examples=[
83
- [
84
- "Summarize the following text as a list:",
85
- "https://blog.google/outreach-initiatives/google-org/our-commitment-on-using-ai-to-accelerate-progress-on-global-development-goals/",
86
- "", 0.3
87
- ],
88
- [
89
- "Generate a summary of the following text. Give me an overview of main business impact from the text following this template:\n- Summary:\n- Business Impact:\n- Companies:",
90
- "https://ai.googleblog.com/2019/10/quantum-supremacy-using-programmable.html",
91
- "", 0.7
92
- ],
93
- [
94
- "Generate the next insights based on the following text. Indicates N/A if the information is not available in the text.\n- Summary:\n- Acquisition Price:\n- Why is this important for the acquirer:\n- Business Line for the acquirer:\n- Tech Focus for the acquired (list):",
95
- "https://techcrunch.com/2022/09/28/eqt-acquires-billtrust-a-company-automating-the-invoice-to-cash-process-for-1-7b/",
96
- "", 0.3
97
- ]
98
- ],
99
- title="ChatGPT / GPT-3 Info Extraction from URL",
100
- description=(
101
- "Fetches text from a URL using newspaper3k, trims it to ~1800 GPT-2 tokens, "
102
- "then queries OpenAI's text-davinci-003. Enter your prompt, URL, API key, and temperature."
103
- )
104
  )
105
 
106
- # Launch Gradio with queuing (default concurrency)
107
  try:
108
  iface.queue()
109
  iface.launch()
110
  except Exception as e:
111
- # Print the error so it shows up in your logs/terminal
112
- print("Failed to launch Gradio interface:", e)
 
7
  import transformers
8
  from transformers import GPT2Tokenizer
9
 
10
+ # Silence transformers backend warnings
11
  transformers.logging.set_verbosity_error()
12
 
13
  def text_prompt(request: str, page_url: str, api_key: str, temp: float):
14
+ # Fetch & parse
 
 
 
 
 
 
15
  try:
16
  headers = {'User-Agent': 'Chrome/83.0.4103.106'}
17
  resp = requests.get(page_url, headers=headers, timeout=10)
 
20
  page.set_html(resp.text)
21
  page.parse()
22
  except Exception as e:
23
+ return "", f"Error fetching URL: {e}", ""
24
 
25
+ # Tokenize & truncate to ~1800 GPT-2 tokens
26
  tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
27
  sentences = page.text.split('.')
28
+ tokens_accum, truncated_text = [], ""
29
+ for sent in sentences:
30
+ toks = tokenizer.tokenize(sent + ".")
 
31
  if len(tokens_accum) + len(toks) > 1800:
32
  break
33
+ tokens_accum += toks
34
+ truncated_text += sent + ". "
 
35
  num_input_tokens = len(tokens_accum)
36
 
 
37
  if num_input_tokens < 10:
38
+ return page.text, f"Not enough text ({num_input_tokens} tokens)", num_input_tokens
39
 
40
+ # Call GPT-4o mini via ChatCompletion
41
  openai.api_key = api_key
42
  try:
43
+ chat_resp = openai.ChatCompletion.create(
44
+ model="gpt-4o-mini",
45
+ messages=[
46
+ {"role": "system", "content": "You are a helpful assistant."},
47
+ {"role": "user", "content": request + "\n\n>>\n" + truncated_text + "\n<<"}
48
+ ],
49
  temperature=temp,
50
+ max_tokens=2048,
51
  top_p=0.9,
52
  )
53
+ ai_text = re.sub(r'\s+', ' ', chat_resp.choices[0].message.content).strip()
54
+ total_tokens = chat_resp.usage.total_tokens
 
 
55
  return page.text, ai_text, total_tokens
 
 
56
 
57
+ except Exception as e:
58
+ return page.text, f"OpenAI API error: {e}", num_input_tokens
59
 
60
  if __name__ == "__main__":
 
61
  iface = gr.Interface(
62
  fn=text_prompt,
63
  inputs=[
64
+ gr.Textbox(label="Prompt:"),
65
+ gr.Textbox(label="URL to parse:"),
66
+ gr.Textbox(label="API-Key:", type="password"),
67
+ gr.Slider(0.0,1.0,value=0.3, label="Temperature:")
68
  ],
69
  outputs=[
70
  gr.Textbox(label="Input Text:"),
71
  gr.Textbox(label="AI Output:"),
72
  gr.Textbox(label="Total Tokens:")
73
  ],
74
+ title="GPT-4o-mini URL Summarizer",
75
+ description="Uses GPT-4o-mini via ChatCompletion to summarize webpage text."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  )
77
 
 
78
  try:
79
  iface.queue()
80
  iface.launch()
81
  except Exception as e:
82
+ print("Failed to launch:", e)