|
import gradio as gr |
|
import requests |
|
import json |
|
import os |
|
from datetime import datetime, timedelta |
|
from huggingface_hub import InferenceClient |
|
|
|
|
|
API_KEY = os.getenv("SERPHOUSE_API_KEY") |
|
HF_TOKEN = os.getenv("HF_TOKEN") |
|
|
|
MAJOR_COUNTRIES = [ |
|
"United States", "United Kingdom", "Canada", "Australia", "Germany", |
|
"France", "Japan", "South Korea", "China", "India", |
|
"Brazil", "Mexico", "Russia", "Italy", "Spain", |
|
"Netherlands", "Sweden", "Switzerland", "Norway", "Denmark", |
|
"Finland", "Belgium", "Austria", "New Zealand", "Ireland", |
|
"Singapore", "Hong Kong", "Israel", "United Arab Emirates", "Saudi Arabia", |
|
"South Africa", "Turkey", "Egypt", "Poland", "Czech Republic", |
|
"Hungary", "Greece", "Portugal", "Argentina", "Chile", |
|
"Colombia", "Peru", "Venezuela", "Thailand", "Malaysia", |
|
"Indonesia", "Philippines", "Vietnam", "Pakistan", "Bangladesh" |
|
] |
|
|
|
def search_serphouse(query, country, page=1, num_result=10): |
|
url = "https://api.serphouse.com/serp/live" |
|
|
|
now = datetime.utcnow() |
|
yesterday = now - timedelta(days=1) |
|
date_range = f"{yesterday.strftime('%Y-%m-%d')},{now.strftime('%Y-%m-%d')}" |
|
|
|
payload = { |
|
"data": { |
|
"q": query, |
|
"domain": "google.com", |
|
"loc": country, |
|
"lang": "en", |
|
"device": "desktop", |
|
"serp_type": "news", |
|
"page": str(page), |
|
"verbatim": "1", |
|
"num": str(num_result), |
|
"date_range": date_range |
|
} |
|
} |
|
|
|
headers = { |
|
"accept": "application/json", |
|
"content-type": "application/json", |
|
"authorization": f"Bearer {API_KEY}" |
|
} |
|
|
|
try: |
|
response = requests.post(url, json=payload, headers=headers) |
|
response.raise_for_status() |
|
return response.json() |
|
except requests.RequestException as e: |
|
error_msg = f"Error: {str(e)}" |
|
if hasattr(response, 'text'): |
|
error_msg += f"\nResponse content: {response.text}" |
|
return {"error": error_msg} |
|
|
|
def format_results_from_raw(results): |
|
try: |
|
if isinstance(results, dict) and "error" in results: |
|
return "Error: " + results["error"], [] |
|
|
|
if not isinstance(results, dict): |
|
raise ValueError("๊ฒฐ๊ณผ๊ฐ ์ฌ์ ํ์์ด ์๋๋๋ค.") |
|
|
|
|
|
if 'results' in results: |
|
results_content = results['results'] |
|
if 'results' in results_content: |
|
results_content = results_content['results'] |
|
if 'news' in results_content: |
|
news_results = results_content['news'] |
|
else: |
|
news_results = [] |
|
else: |
|
news_results = [] |
|
else: |
|
news_results = [] |
|
|
|
if not news_results: |
|
return "๊ฒ์ ๊ฒฐ๊ณผ๊ฐ ์์ต๋๋ค.", [] |
|
|
|
articles = [] |
|
|
|
for idx, result in enumerate(news_results, 1): |
|
title = result.get("title", "์ ๋ชฉ ์์") |
|
link = result.get("url", result.get("link", "#")) |
|
snippet = result.get("snippet", "๋ด์ฉ ์์") |
|
channel = result.get("channel", result.get("source", "์ ์ ์์")) |
|
time = result.get("time", result.get("date", "์ ์ ์๋ ์๊ฐ")) |
|
image_url = result.get("img", result.get("thumbnail", "")) |
|
|
|
articles.append({ |
|
"index": idx, |
|
"title": title, |
|
"link": link, |
|
"snippet": snippet, |
|
"channel": channel, |
|
"time": time, |
|
"image_url": image_url |
|
}) |
|
|
|
return "", articles |
|
|
|
except Exception as e: |
|
error_message = f"๊ฒฐ๊ณผ ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}" |
|
return "Error: " + error_message, [] |
|
|
|
def serphouse_search(query, country): |
|
page = 1 |
|
num_result = 10 |
|
results = search_serphouse(query, country, page, num_result) |
|
error_message, articles = format_results_from_raw(results) |
|
return error_message, articles |
|
|
|
|
|
hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=HF_TOKEN) |
|
|
|
def summarize_article(title, snippet): |
|
try: |
|
|
|
prompt = f"๋ค์ ๋ด์ค ์ ๋ชฉ๊ณผ ์์ฝ์ ๋ฐํ์ผ๋ก ํ๊ตญ์ด๋ก 3๋ฌธ์ฅ์ผ๋ก ์์ฝํ์ธ์:\n์ ๋ชฉ: {title}\n์์ฝ: {snippet}" |
|
summary = hf_client.text_generation(prompt, max_new_tokens=500) |
|
return summary |
|
except Exception as e: |
|
return f"์์ฝ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}" |
|
|
|
css = """ |
|
footer { |
|
visibility: hidden; |
|
} |
|
""" |
|
|
|
|
|
with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css, title="NewsAI ์๋น์ค") as iface: |
|
gr.Markdown("๊ฒ์์ด๋ฅผ ์
๋ ฅํ๊ณ ์ํ๋ ๊ตญ๊ฐ๋ฅผ ์ ํํ๋ฉด, ๊ฒ์์ด์ ์ผ์นํ๋ 24์๊ฐ ์ด๋ด ๋ด์ค๋ฅผ ์ต๋ 10๊ฐ ์ถ๋ ฅํฉ๋๋ค.") |
|
|
|
with gr.Column(): |
|
with gr.Row(): |
|
query = gr.Textbox(label="๊ฒ์์ด") |
|
country = gr.Dropdown(MAJOR_COUNTRIES, label="๊ตญ๊ฐ", value="South Korea") |
|
search_button = gr.Button("๊ฒ์") |
|
|
|
|
|
article_components = [] |
|
for i in range(10): |
|
with gr.Group(visible=False) as article_group: |
|
title = gr.Markdown() |
|
image = gr.Image(width=200, height=150) |
|
snippet = gr.Markdown() |
|
info = gr.Markdown() |
|
analyze_button = gr.Button("๋ถ์") |
|
summary_output = gr.Markdown(visible=False) |
|
|
|
article_components.append({ |
|
'group': article_group, |
|
'title': title, |
|
'image': image, |
|
'snippet': snippet, |
|
'info': info, |
|
'analyze_button': analyze_button, |
|
'summary_output': summary_output, |
|
}) |
|
|
|
def search_and_display(query, country): |
|
error_message, articles = serphouse_search(query, country) |
|
outputs = [] |
|
if error_message: |
|
outputs.append(gr.update(value=error_message, visible=True)) |
|
|
|
for comp in article_components: |
|
outputs.extend([ |
|
gr.update(visible=False), |
|
gr.update(), |
|
gr.update(), |
|
gr.update(), |
|
gr.update(), |
|
gr.update(), |
|
gr.update(visible=False), |
|
]) |
|
return outputs |
|
else: |
|
|
|
for idx, comp in enumerate(article_components): |
|
if idx < len(articles): |
|
article = articles[idx] |
|
comp['group'].visible = True |
|
comp['title'].value = f"### [{article['title']}]({article['link']})" |
|
if article['image_url'] and not article['image_url'].startswith("data:image"): |
|
comp['image'].value = article['image_url'] |
|
comp['image'].visible = True |
|
else: |
|
comp['image'].visible = False |
|
comp['snippet'].value = f"**์์ฝ:** {article['snippet']}" |
|
comp['info'].value = f"**์ถ์ฒ:** {article['channel']} | **์๊ฐ:** {article['time']}" |
|
comp['summary_output'].visible = False |
|
|
|
|
|
def create_analyze_function(article_title, article_snippet): |
|
def analyze_article(): |
|
summary = summarize_article(article_title, article_snippet) |
|
return gr.update(value=summary, visible=True) |
|
return analyze_article |
|
|
|
comp['analyze_button'].click( |
|
create_analyze_function(article['title'], article['snippet']), |
|
inputs=[], |
|
outputs=comp['summary_output'] |
|
) |
|
|
|
outputs.extend([ |
|
gr.update(visible=True), |
|
gr.update(), |
|
gr.update(), |
|
gr.update(), |
|
gr.update(), |
|
gr.update(), |
|
gr.update(visible=False), |
|
]) |
|
else: |
|
|
|
comp['group'].visible = False |
|
outputs.extend([ |
|
gr.update(visible=False), |
|
gr.update(), |
|
gr.update(), |
|
gr.update(), |
|
gr.update(), |
|
gr.update(), |
|
gr.update(visible=False), |
|
]) |
|
return outputs |
|
|
|
|
|
search_outputs = [] |
|
search_outputs.append(gr.Markdown(visible=False)) |
|
for comp in article_components: |
|
search_outputs.append(comp['group']) |
|
search_outputs.append(comp['title']) |
|
search_outputs.append(comp['image']) |
|
search_outputs.append(comp['snippet']) |
|
search_outputs.append(comp['info']) |
|
search_outputs.append(comp['analyze_button']) |
|
search_outputs.append(comp['summary_output']) |
|
|
|
search_button.click( |
|
search_and_display, |
|
inputs=[query, country], |
|
outputs=search_outputs |
|
) |
|
|
|
iface.launch(auth=("gini", "pick")) |
|
|