|
import gradio as gr |
|
import requests |
|
import json |
|
import os |
|
from datetime import datetime, timedelta |
|
from huggingface_hub import InferenceClient |
|
|
|
|
|
API_KEY = os.getenv("SERPHOUSE_API_KEY") |
|
HF_TOKEN = os.getenv("HF_TOKEN") |
|
|
|
|
|
COUNTRY_CODE_MAPPING = { |
|
"United States": "2840", |
|
"South Korea": "2458", |
|
|
|
} |
|
|
|
MAJOR_COUNTRIES = list(COUNTRY_CODE_MAPPING.keys()) |
|
|
|
def search_serphouse(query, country, page=1, num_result=10): |
|
url = "https://api.serphouse.com/serp/live" |
|
|
|
now = datetime.utcnow() |
|
yesterday = now - timedelta(days=1) |
|
date_range = f"{yesterday.strftime('%Y-%m-%d')},{now.strftime('%Y-%m-%d')}" |
|
|
|
|
|
loc_id = COUNTRY_CODE_MAPPING.get(country, "2840") |
|
|
|
payload = { |
|
"data": { |
|
"q": query, |
|
"domain": "google.com", |
|
"loc": loc_id, |
|
"lang": "en", |
|
"device": "desktop", |
|
"serp_type": "news", |
|
"page": str(page), |
|
"verbatim": "1", |
|
"num": str(num_result), |
|
"date_range": date_range |
|
} |
|
} |
|
|
|
headers = { |
|
"accept": "application/json", |
|
"content-type": "application/json", |
|
"authorization": f"Bearer {API_KEY}" |
|
} |
|
|
|
try: |
|
response = requests.post(url, json=payload, headers=headers) |
|
response.raise_for_status() |
|
return response.json() |
|
except requests.RequestException as e: |
|
error_msg = f"Error: {str(e)}" |
|
if hasattr(response, 'text'): |
|
error_msg += f"\nResponse content: {response.text}" |
|
return {"error": error_msg} |
|
|
|
def format_results_from_raw(results): |
|
try: |
|
if isinstance(results, dict) and "error" in results: |
|
return "Error: " + results["error"], [] |
|
|
|
if not isinstance(results, dict): |
|
raise ValueError("๊ฒฐ๊ณผ๊ฐ ์ฌ์ ํ์์ด ์๋๋๋ค.") |
|
|
|
|
|
if 'results' in results: |
|
results_content = results['results'] |
|
if 'results' in results_content: |
|
results_content = results_content['results'] |
|
if 'news' in results_content: |
|
news_results = results_content['news'] |
|
else: |
|
news_results = [] |
|
else: |
|
news_results = [] |
|
else: |
|
news_results = [] |
|
|
|
if not news_results: |
|
return "๊ฒ์ ๊ฒฐ๊ณผ๊ฐ ์์ต๋๋ค.", [] |
|
|
|
articles = [] |
|
|
|
for idx, result in enumerate(news_results, 1): |
|
title = result.get("title", "์ ๋ชฉ ์์") |
|
link = result.get("url", result.get("link", "#")) |
|
snippet = result.get("snippet", "๋ด์ฉ ์์") |
|
channel = result.get("channel", result.get("source", "์ ์ ์์")) |
|
time = result.get("time", result.get("date", "์ ์ ์๋ ์๊ฐ")) |
|
image_url = result.get("img", result.get("thumbnail", "")) |
|
|
|
articles.append({ |
|
"index": idx, |
|
"title": title, |
|
"link": link, |
|
"snippet": snippet, |
|
"channel": channel, |
|
"time": time, |
|
"image_url": image_url |
|
}) |
|
|
|
return "", articles |
|
|
|
except Exception as e: |
|
error_message = f"๊ฒฐ๊ณผ ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}" |
|
return "Error: " + error_message, [] |
|
|
|
def serphouse_search(query, country): |
|
page = 1 |
|
num_result = 10 |
|
results = search_serphouse(query, country, page, num_result) |
|
error_message, articles = format_results_from_raw(results) |
|
return error_message, articles |
|
|
|
|
|
hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=HF_TOKEN) |
|
|
|
def summarize_article(title, snippet): |
|
try: |
|
prompt = f"๋ค์ ๋ด์ค ์ ๋ชฉ๊ณผ ์์ฝ์ ๋ฐํ์ผ๋ก ํ๊ตญ์ด๋ก 3๋ฌธ์ฅ์ผ๋ก ์์ฝํ์ธ์:\n์ ๋ชฉ: {title}\n์์ฝ: {snippet}" |
|
summary = hf_client.text_generation(prompt, max_new_tokens=500) |
|
return summary |
|
except Exception as e: |
|
return f"์์ฝ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}" |
|
|
|
css = """ |
|
footer { |
|
visibility: hidden; |
|
} |
|
""" |
|
|
|
|
|
with gr.Blocks(css=css, title="NewsAI ์๋น์ค") as iface: |
|
gr.Markdown("๊ฒ์์ด๋ฅผ ์
๋ ฅํ๊ณ ์ํ๋ ๊ตญ๊ฐ๋ฅผ ์ ํํ๋ฉด, ๊ฒ์์ด์ ์ผ์นํ๋ 24์๊ฐ ์ด๋ด ๋ด์ค๋ฅผ ์ต๋ 10๊ฐ ์ถ๋ ฅํฉ๋๋ค.") |
|
|
|
with gr.Column(): |
|
with gr.Row(): |
|
query = gr.Textbox(label="๊ฒ์์ด") |
|
country = gr.Dropdown(MAJOR_COUNTRIES, label="๊ตญ๊ฐ", value="South Korea") |
|
search_button = gr.Button("๊ฒ์") |
|
|
|
|
|
|
|
|
|
|
|
articles_state = gr.State([]) |
|
|
|
|
|
article_components = [] |
|
for i in range(10): |
|
with gr.Group(visible=False) as article_group: |
|
title = gr.Markdown() |
|
image = gr.Image(width=200, height=150) |
|
snippet = gr.Markdown() |
|
info = gr.Markdown() |
|
analyze_button = gr.Button("๋ถ์") |
|
summary_output = gr.Markdown(visible=False) |
|
|
|
article_components.append({ |
|
'group': article_group, |
|
'title': title, |
|
'image': image, |
|
'snippet': snippet, |
|
'info': info, |
|
'analyze_button': analyze_button, |
|
'summary_output': summary_output, |
|
'index': i, |
|
}) |
|
|
|
def search_and_display(query, country, articles_state): |
|
with gr.Progress() as progress: |
|
progress(0, desc="์ฒ๋ฆฌ์ค์
๋๋ค. ์ ์๋ง ๊ธฐ๋ค๋ฆฌ์ธ์.") |
|
|
|
error_message, articles = serphouse_search(query, country) |
|
outputs = [] |
|
if error_message: |
|
outputs.append(gr.update(value=error_message, visible=True)) |
|
for comp in article_components: |
|
outputs.extend([ |
|
gr.update(visible=False), |
|
gr.update(), |
|
gr.update(), |
|
gr.update(), |
|
gr.update(), |
|
gr.update(visible=False), |
|
]) |
|
articles_state = [] |
|
else: |
|
outputs.append(gr.update(value="", visible=False)) |
|
for idx, comp in enumerate(article_components): |
|
if idx < len(articles): |
|
article = articles[idx] |
|
|
|
image_url = article['image_url'] |
|
if image_url and not image_url.startswith('data:image'): |
|
image_update = gr.update(value=image_url, visible=True) |
|
else: |
|
image_update = gr.update(value=None, visible=False) |
|
|
|
outputs.extend([ |
|
gr.update(visible=True), |
|
gr.update(value=f"### [{article['title']}]({article['link']})"), |
|
image_update, |
|
gr.update(value=f"**์์ฝ:** {article['snippet']}"), |
|
gr.update(value=f"**์ถ์ฒ:** {article['channel']} | **์๊ฐ:** {article['time']}"), |
|
gr.update(visible=False), |
|
]) |
|
else: |
|
outputs.extend([ |
|
gr.update(visible=False), |
|
gr.update(), |
|
gr.update(), |
|
gr.update(), |
|
gr.update(), |
|
gr.update(visible=False), |
|
]) |
|
articles_state = articles |
|
outputs.append(articles_state) |
|
|
|
return outputs |
|
|
|
|
|
search_outputs = [] |
|
error_output = gr.Markdown(visible=False) |
|
search_outputs.append(error_output) |
|
for comp in article_components: |
|
search_outputs.append(comp['group']) |
|
search_outputs.append(comp['title']) |
|
search_outputs.append(comp['image']) |
|
search_outputs.append(comp['snippet']) |
|
search_outputs.append(comp['info']) |
|
search_outputs.append(comp['summary_output']) |
|
search_outputs.append(articles_state) |
|
|
|
|
|
search_button.click( |
|
search_and_display, |
|
inputs=[query, country, articles_state], |
|
outputs=search_outputs |
|
) |
|
|
|
|
|
for idx, comp in enumerate(article_components): |
|
def create_analyze_function(index=idx): |
|
def analyze_article(articles): |
|
with gr.Progress() as progress: |
|
progress(0, desc="์ฒ๋ฆฌ์ค์
๋๋ค. ์ ์๋ง ๊ธฐ๋ค๋ฆฌ์ธ์.") |
|
if articles and index < len(articles): |
|
article = articles[index] |
|
summary = summarize_article(article['title'], article['snippet']) |
|
return gr.update(value=summary, visible=True) |
|
else: |
|
return gr.update(value="๊ธฐ์ฌ ์ ๋ณด๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค.", visible=True) |
|
return analyze_article |
|
|
|
comp['analyze_button'].click( |
|
create_analyze_function(), |
|
inputs=[articles_state], |
|
outputs=comp['summary_output'] |
|
) |
|
|
|
iface.launch(auth=("gini", "pick")) |
|
|