dromerosm commited on
Commit
3eaa349
·
verified ·
1 Parent(s): 5fe0442

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -71
app.py CHANGED
@@ -1,93 +1,112 @@
1
  import gradio as gr
2
  import os
3
  import openai
4
- from newspaper import Article
5
- import json
6
  import re
 
 
7
  from transformers import GPT2Tokenizer
8
- import requests
9
 
 
 
10
 
11
- # define the text summarizer function
12
- def text_prompt(request, page_url, contraseña, temp):
 
 
 
 
 
 
13
  try:
14
  headers = {'User-Agent': 'Chrome/83.0.4103.106'}
15
- response = requests.get(page_url, headers=headers)
16
- html = response.text
17
-
18
  page = Article('')
19
- page.set_html(html)
20
  page.parse()
21
-
22
  except Exception as e:
23
- return "", f"--- An error occurred while processing the URL: {e} ---", ""
24
-
 
25
  tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
26
  sentences = page.text.split('.')
27
-
28
- tokens = []
29
- page_text = ""
30
-
31
  for sentence in sentences:
32
- tokens.extend(tokenizer.tokenize(sentence))
33
-
34
- # Trim text to a maximum of 1800 tokens
35
- if len(tokens) > 1800:
36
  break
37
- page_text += sentence + ". "
38
-
39
- # Delete the last space
40
- page_text = page_text.strip()
41
 
42
- num_tokens = len(tokens)
 
 
43
 
44
- if num_tokens > 10:
45
- openai.api_key = contraseña
46
- # get the response from openai API
47
- try:
48
- response = openai.Completion.create(
49
- engine="text-davinci-003",
50
- prompt=request + "\n\n" + ">>\n" + page_text + "\n<<",
51
- max_tokens=2048,
52
- temperature=temp,
53
- top_p=0.9,
54
- )
55
- # get the response text
56
- response_text = response.choices[0].text
57
- total_tokens = response["usage"]["total_tokens"]
 
 
58
 
59
- # clean the response text
60
- response_text = re.sub(r'\s+', ' ', response_text)
61
- response_text = response_text.strip()
62
- return page.text, response_text, total_tokens
63
- except Exception as e:
64
- return page.text, f"--- An error occurred while processing the request: {e} ---", num_tokens
65
- return page.text, "--- Min number of tokens:", num_tokens
66
 
67
- # define the gradio interface
68
- iface = gr.Interface(
69
- fn=text_prompt,
70
- inputs=[gr.Textbox(lines=1, placeholder="Enter your prompt here...", label="Prompt:", type="text"),
71
- gr.Textbox(lines=1, placeholder="Enter the URL here...", label="URL to parse:", type="text"),
72
- gr.Textbox(lines=1, placeholder="Enter your API-key here...", label="API-Key:", type="password"),
73
- gr.Slider(0.0,1.0, value=0.3, label="Temperature:")
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  ],
75
- outputs=[gr.Textbox(label="Input:"), gr.Textbox(label="Output:"), gr.Textbox(label="Total Tokens:")],
76
- examples=[["Summarize the following text as a list:","https://blog.google/outreach-initiatives/google-org/our-commitment-on-using-ai-to-accelerate-progress-on-global-development-goals/","",0.3],
77
- ["Generate a summary of the following text. Give me an overview of main business impact from the text following this template:\n- Summary:\n- Business Impact:\n- Companies:", "https://ai.googleblog.com/2019/10/quantum-supremacy-using-programmable.html","",0.7],
78
- ["Generate the next insights based on the following text. Indicates N/A if the information is not available in the text.\n- Summary:\n- Acquisition Price:\n- Why is this important for the acquirer:\n- Business Line for the acquirer:\n- Tech Focus for the acquired (list):","https://techcrunch.com/2022/09/28/eqt-acquires-billtrust-a-company-automating-the-invoice-to-cash-process-for-1-7b/","",0.3]
79
- ],
80
- title="ChatGPT / GPT-3 info extraction from URL",
81
- description="This tool allows querying the text retrieved from the URL with newspaper3k lib and using OpenAI's [text-davinci-003] engine.\nThe URL text can be referenced in the prompt as \"following text\".\nA GPT2 tokenizer is included to ensure that the 1.800 token limit for OpenAI queries is not exceeded. Provide a prompt with your request, the url for text retrieval, your api-key and temperature to process the text."
82
- )
83
-
84
- # error capturing in integration as a component
85
-
86
- error_message = ""
 
 
 
 
 
87
 
88
- try:
89
- iface.queue(concurrency_count=20)
90
- iface.launch()
91
- except Exception as e:
92
- error_message = "An error occurred: " + str(e)
93
- iface.outputs[1].value = error_message
 
 
1
  import gradio as gr
2
  import os
3
  import openai
4
+ import requests
 
5
  import re
6
+ from newspaper import Article
7
+ import transformers
8
  from transformers import GPT2Tokenizer
 
9
 
10
+ # --- Silence Transformers backend warnings (since you only need the tokenizer) ---
11
+ transformers.logging.set_verbosity_error()
12
 
13
+ def text_prompt(request: str, page_url: str, api_key: str, temp: float):
14
+ """
15
+ Fetches the article at page_url, extracts text with newspaper3k,
16
+ trims it to ~1800 GPT-2 tokens, and sends it along with 'request'
17
+ to OpenAI's Completion API.
18
+ Returns: (full_input_text, ai_response, total_tokens_used_or_error)
19
+ """
20
+ # 1) Fetch and parse the page
21
  try:
22
  headers = {'User-Agent': 'Chrome/83.0.4103.106'}
23
+ resp = requests.get(page_url, headers=headers, timeout=10)
24
+ resp.raise_for_status()
 
25
  page = Article('')
26
+ page.set_html(resp.text)
27
  page.parse()
 
28
  except Exception as e:
29
+ return "", f"--- Error fetching/parsing URL: {e} ---", ""
30
+
31
+ # 2) Tokenize & truncate to ~1800 tokens
32
  tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
33
  sentences = page.text.split('.')
34
+ tokens_accum = []
35
+ truncated_text = ""
 
 
36
  for sentence in sentences:
37
+ toks = tokenizer.tokenize(sentence + ".")
38
+ if len(tokens_accum) + len(toks) > 1800:
 
 
39
  break
40
+ tokens_accum.extend(toks)
41
+ truncated_text += sentence + ". "
42
+ truncated_text = truncated_text.strip()
43
+ num_input_tokens = len(tokens_accum)
44
 
45
+ # 3) If there's enough content, call OpenAI
46
+ if num_input_tokens < 10:
47
+ return page.text, f"--- Not enough text to summarize ({num_input_tokens} tokens) ---", num_input_tokens
48
 
49
+ openai.api_key = api_key
50
+ try:
51
+ completion = openai.Completion.create(
52
+ engine="text-davinci-003",
53
+ prompt=request + "\n\n>>\n" + truncated_text + "\n<<",
54
+ max_tokens=2048,
55
+ temperature=temp,
56
+ top_p=0.9,
57
+ )
58
+ ai_text = completion.choices[0].text.strip()
59
+ total_tokens = completion.usage.total_tokens
60
+ # Collapse whitespace
61
+ ai_text = re.sub(r'\s+', ' ', ai_text)
62
+ return page.text, ai_text, total_tokens
63
+ except Exception as e:
64
+ return page.text, f"--- OpenAI API error: {e} ---", num_input_tokens
65
 
 
 
 
 
 
 
 
66
 
67
+ if __name__ == "__main__":
68
+ # Build the Gradio interface
69
+ iface = gr.Interface(
70
+ fn=text_prompt,
71
+ inputs=[
72
+ gr.Textbox(lines=1, placeholder="Enter your prompt here...", label="Prompt:"),
73
+ gr.Textbox(lines=1, placeholder="Enter the URL here...", label="URL to parse:"),
74
+ gr.Textbox(lines=1, placeholder="Enter your API key here...", label="API-Key:", type="password"),
75
+ gr.Slider(0.0, 1.0, value=0.3, label="Temperature:")
76
+ ],
77
+ outputs=[
78
+ gr.Textbox(label="Input Text:"),
79
+ gr.Textbox(label="AI Output:"),
80
+ gr.Textbox(label="Total Tokens:")
81
+ ],
82
+ examples=[
83
+ [
84
+ "Summarize the following text as a list:",
85
+ "https://blog.google/outreach-initiatives/google-org/our-commitment-on-using-ai-to-accelerate-progress-on-global-development-goals/",
86
+ "", 0.3
87
  ],
88
+ [
89
+ "Generate a summary of the following text. Give me an overview of main business impact from the text following this template:\n- Summary:\n- Business Impact:\n- Companies:",
90
+ "https://ai.googleblog.com/2019/10/quantum-supremacy-using-programmable.html",
91
+ "", 0.7
92
+ ],
93
+ [
94
+ "Generate the next insights based on the following text. Indicates N/A if the information is not available in the text.\n- Summary:\n- Acquisition Price:\n- Why is this important for the acquirer:\n- Business Line for the acquirer:\n- Tech Focus for the acquired (list):",
95
+ "https://techcrunch.com/2022/09/28/eqt-acquires-billtrust-a-company-automating-the-invoice-to-cash-process-for-1-7b/",
96
+ "", 0.3
97
+ ]
98
+ ],
99
+ title="ChatGPT / GPT-3 Info Extraction from URL",
100
+ description=(
101
+ "Fetches text from a URL using newspaper3k, trims it to ~1800 GPT-2 tokens, "
102
+ "then queries OpenAI's text-davinci-003. Enter your prompt, URL, API key, and temperature."
103
+ )
104
+ )
105
 
106
+ # Launch Gradio with queuing (default concurrency)
107
+ try:
108
+ iface.queue()
109
+ iface.launch()
110
+ except Exception as e:
111
+ # Print the error so it shows up in your logs/terminal
112
+ print("Failed to launch Gradio interface:", e)