Shreyas94 commited on
Commit
0b843b7
·
verified ·
1 Parent(s): 11b62a8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +119 -47
app.py CHANGED
@@ -1,7 +1,6 @@
1
  import os
2
  import urllib
3
  import requests
4
- import feedparser
5
  from bs4 import BeautifulSoup
6
  import torch
7
  import gradio as gr
@@ -38,7 +37,68 @@ def fetch_news(term, num_results=2):
38
  results.append({"link": entry.link, "text": entry.title})
39
  logger.debug(f"Fetched news results: {results}")
40
  return results
41
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  # Function to format the prompt for the language model
43
  def format_prompt(user_prompt, chat_history):
44
  logger.debug(f"Formatting prompt with user prompt: {user_prompt} and chat history: {chat_history}")
@@ -72,26 +132,40 @@ def model_inference(
72
  if web_search:
73
  logger.debug("Performing news search")
74
  news_results = fetch_news(user_prompt["text"])
75
- news_text = ' '.join([f"Link: {res['link']}\nText: {res['text']}\n\n" for res in news_results])
76
- formatted_prompt = format_prompt(f"{user_prompt['text']} [NEWS] {news_text}", chat_history)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  else:
78
  formatted_prompt = format_prompt(user_prompt["text"], chat_history)
79
-
80
- inputs = tokenizer(formatted_prompt, return_tensors="pt").to(DEVICE)
81
- if model:
82
- outputs = model.generate(
83
- **inputs,
84
- max_new_tokens=max_new_tokens,
85
- repetition_penalty=repetition_penalty,
86
- do_sample=True,
87
- temperature=temperature,
88
- top_p=top_p
89
- )
90
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
91
- else:
92
- response = "Model is not available. Please try again later."
93
- logger.debug(f"Model response: {response}")
94
- return response
95
  else:
96
  return "Image input not supported in this implementation."
97
 
@@ -154,44 +228,42 @@ chatbot = gr.Chatbot(
154
 
155
  # Define Gradio interface
156
  def chat_interface(user_input, history, web_search, decoding_strategy, temperature, max_new_tokens, repetition_penalty, top_p):
157
- logger.debug(f"Chat interface called with user_input: {user_input}")
158
- if isinstance(user_input, str):
159
- user_input = {"text": user_input, "files": []}
 
160
  response = model_inference(
161
- user_input,
162
- history,
163
- web_search,
164
- temperature,
165
- max_new_tokens,
166
- repetition_penalty,
167
- top_p,
168
- tokenizer=tokenizer # Pass tokenizer to model_inference
169
  )
170
- history.append((user_input["text"], response))
171
- logger.debug(f"Updated chat history: {history}")
172
- return history, history
173
 
174
- # Create Gradio interface
175
  interface = gr.Interface(
176
  fn=chat_interface,
177
  inputs=[
178
- gr.Textbox(label="User Input"),
179
- gr.State([]),
180
- gr.Checkbox(label="Fetch News", value=True),
181
  decoding_strategy,
182
  temperature,
183
  max_new_tokens,
184
  repetition_penalty,
185
  top_p
186
  ],
187
- outputs=[
188
- chatbot,
189
- gr.State([])
190
- ],
191
- title="OpenGPT-4o-Chatty",
192
- description="An AI assistant capable of insightful conversations and news fetching."
193
  )
194
 
195
- if __name__ == "__main__":
196
- logger.debug("Launching Gradio interface")
197
- interface.launch()
 
1
  import os
2
  import urllib
3
  import requests
 
4
  from bs4 import BeautifulSoup
5
  import torch
6
  import gradio as gr
 
37
  results.append({"link": entry.link, "text": entry.title})
38
  logger.debug(f"Fetched news results: {results}")
39
  return results
40
+
41
+ # Function to perform a Google search and return the results
42
+ def search(term, num_results=2, lang="en", timeout=5, safe="active", ssl_verify=None):
43
+ logger.debug(f"Starting search for term: {term}")
44
+ escaped_term = urllib.parse.quote_plus(term)
45
+ start = 0
46
+ all_results = []
47
+ max_chars_per_page = 8000
48
+
49
+ with requests.Session() as session:
50
+ while start < num_results:
51
+ try:
52
+ resp = session.get(
53
+ url="https://www.google.com/search",
54
+ headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0"},
55
+ params={
56
+ "q": term,
57
+ "num": num_results - start,
58
+ "hl": lang,
59
+ "start": start,
60
+ "safe": safe,
61
+ },
62
+ timeout=timeout,
63
+ verify=ssl_verify,
64
+ )
65
+ resp.raise_for_status()
66
+ soup = BeautifulSoup(resp.text, "html.parser")
67
+ result_block = soup.find_all("div", attrs={"class": "g"})
68
+ if not result_block:
69
+ start += 1
70
+ continue
71
+ for result in result_block:
72
+ link = result.find("a", href=True)
73
+ if link:
74
+ link = link["href"]
75
+ try:
76
+ webpage = session.get(link, headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0"})
77
+ webpage.raise_for_status()
78
+ visible_text = extract_text_from_webpage(webpage.text)
79
+ if len(visible_text) > max_chars_per_page:
80
+ visible_text = visible_text[:max_chars_per_page] + "..."
81
+ all_results.append({"link": link, "text": visible_text})
82
+ except requests.exceptions.RequestException as e:
83
+ logger.error(f"Error fetching or processing {link}: {e}")
84
+ all_results.append({"link": link, "text": None})
85
+ else:
86
+ all_results.append({"link": None, "text": None})
87
+ start += len(result_block)
88
+ except Exception as e:
89
+ logger.error(f"Error during search: {e}")
90
+ break
91
+ logger.debug(f"Search results: {all_results}")
92
+ return all_results
93
+
94
+ # Function to extract visible text from HTML content
95
+ def extract_text_from_webpage(html_content):
96
+ soup = BeautifulSoup(html_content, "html.parser")
97
+ for tag in soup(["script", "style", "header", "footer", "nav"]):
98
+ tag.extract()
99
+ visible_text = soup.get_text(strip=True)
100
+ return visible_text
101
+
102
  # Function to format the prompt for the language model
103
  def format_prompt(user_prompt, chat_history):
104
  logger.debug(f"Formatting prompt with user prompt: {user_prompt} and chat history: {chat_history}")
 
132
  if web_search:
133
  logger.debug("Performing news search")
134
  news_results = fetch_news(user_prompt["text"])
135
+ news2 = ' '.join([f"Link: {res['link']}\nText: {res['text']}\n\n" for res in news_results])
136
+ formatted_prompt = format_prompt(f"{user_prompt['text']} [NEWS] {news2}", chat_history)
137
+ inputs = tokenizer(formatted_prompt, return_tensors="pt").to(DEVICE)
138
+ if model:
139
+ outputs = model.generate(
140
+ **inputs,
141
+ max_new_tokens=max_new_tokens,
142
+ repetition_penalty=repetition_penalty,
143
+ do_sample=True,
144
+ temperature=temperature,
145
+ top_p=top_p
146
+ )
147
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
148
+ else:
149
+ response = "Model is not available. Please try again later."
150
+ logger.debug(f"Model response: {response}")
151
+ return response
152
  else:
153
  formatted_prompt = format_prompt(user_prompt["text"], chat_history)
154
+ inputs = tokenizer(formatted_prompt, return_tensors="pt").to(DEVICE)
155
+ if model:
156
+ outputs = model.generate(
157
+ **inputs,
158
+ max_new_tokens=max_new_tokens,
159
+ repetition_penalty=repetition_penalty,
160
+ do_sample=True,
161
+ temperature=temperature,
162
+ top_p=top_p
163
+ )
164
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
165
+ else:
166
+ response = "Model is not available. Please try again later."
167
+ logger.debug(f"Model response: {response}")
168
+ return response
 
169
  else:
170
  return "Image input not supported in this implementation."
171
 
 
228
 
229
  # Define Gradio interface
230
  def chat_interface(user_input, history, web_search, decoding_strategy, temperature, max_new_tokens, repetition_penalty, top_p):
231
+ # Ensure the tokenizer is accessible within the function scope
232
+ global tokenizer
233
+
234
+ # Perform model inference
235
  response = model_inference(
236
+ user_prompt=user_input,
237
+ chat_history=history,
238
+ web_search=web_search,
239
+ temperature=temperature,
240
+ max_new_tokens=max_new_tokens,
241
+ repetition_penalty=repetition_penalty,
242
+ top_p=top_p,
243
+ tokenizer=tokenizer # Pass tokenizer to the model_inference function
244
  )
245
+
246
+ # Return the response
247
+ return response
248
 
249
+ # Define the Gradio interface components
250
  interface = gr.Interface(
251
  fn=chat_interface,
252
  inputs=[
253
+ gr.Textbox(label="User Input", placeholder="Type your message here..."),
254
+ gr.Textbox(label="Chat History", placeholder="Chat history will appear here..."),
255
+ gr.Checkbox(label="Perform Web Search", default=False),
256
  decoding_strategy,
257
  temperature,
258
  max_new_tokens,
259
  repetition_penalty,
260
  top_p
261
  ],
262
+ outputs=gr.Textbox(label="Assistant Response"),
263
+ live=True,
264
+ layout="vertical",
265
+ theme="compact"
 
 
266
  )
267
 
268
+ # Launch the Gradio interface
269
+ interface.launch()