ainews-db / app.py
seawolf2357's picture
Update app.py
1d7d055 verified
raw
history blame
12.3 kB
import gradio as gr
import requests
import json
import os
from datetime import datetime, timedelta
from huggingface_hub import InferenceClient
API_KEY = os.getenv("SERPHOUSE_API_KEY")
hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=os.getenv("HF_TOKEN"))
COUNTRY_CODES = {
"United States": "US",
"United Kingdom": "GB",
"Canada": "CA",
"Australia": "AU",
"Germany": "DE",
"France": "FR",
"Japan": "JP",
"South Korea": "KR",
"China": "CN",
"Taiwan": "TW", # ๋Œ€๋งŒ ์ถ”๊ฐ€
"India": "IN",
"Brazil": "BR",
"Mexico": "MX",
"Russia": "RU",
"Italy": "IT",
"Spain": "ES",
"Netherlands": "NL",
"Singapore": "SG",
"Hong Kong": "HK",
"Indonesia": "ID",
"Malaysia": "MY",
"Philippines": "PH",
"Thailand": "TH",
"Vietnam": "VN",
"Belgium": "BE",
"Denmark": "DK",
"Finland": "FI",
"Ireland": "IE",
"Norway": "NO",
"Poland": "PL",
"Sweden": "SE",
"Switzerland": "CH",
"Austria": "AT",
"Czech Republic": "CZ",
"Greece": "GR",
"Hungary": "HU",
"Portugal": "PT",
"Romania": "RO",
"Turkey": "TR",
"Israel": "IL",
"Saudi Arabia": "SA",
"United Arab Emirates": "AE",
"South Africa": "ZA",
"Argentina": "AR",
"Chile": "CL",
"Colombia": "CO",
"Peru": "PE",
"Venezuela": "VE",
"New Zealand": "NZ",
"Bangladesh": "BD",
"Pakistan": "PK",
"Egypt": "EG",
"Morocco": "MA",
"Nigeria": "NG",
"Kenya": "KE",
"Ukraine": "UA",
"Croatia": "HR",
"Slovakia": "SK",
"Bulgaria": "BG",
"Serbia": "RS",
"Estonia": "EE",
"Latvia": "LV",
"Lithuania": "LT",
"Slovenia": "SI",
"Luxembourg": "LU",
"Malta": "MT",
"Cyprus": "CY",
"Iceland": "IS"
}
def is_english(text):
# ์˜์–ด๋กœ๋งŒ ๊ตฌ์„ฑ๋˜์–ด ์žˆ๋Š”์ง€ ํ™•์ธ
return all(ord(char) < 128 for char in text.replace(' ', ''))
COUNTRY_LANGUAGES = {
"South Korea": "ko",
"Japan": "ja",
"China": "zh",
"Taiwan": "zh-tw", # ๋Œ€๋งŒ์–ด(๋ฒˆ์ฒด ์ค‘๊ตญ์–ด) ์ถ”๊ฐ€
"Russia": "ru",
"France": "fr",
"Germany": "de",
"Spain": "es",
"Italy": "it",
"Netherlands": "nl",
"Portugal": "pt",
"Thailand": "th",
"Vietnam": "vi",
"Indonesia": "id",
"Malaysia": "ms",
"Saudi Arabia": "ar",
"United Arab Emirates": "ar",
"Egypt": "ar",
"Morocco": "ar",
"Greece": "el",
"Poland": "pl",
"Czech Republic": "cs",
"Hungary": "hu",
"Turkey": "tr",
"Romania": "ro",
"Bulgaria": "bg",
"Croatia": "hr",
"Serbia": "sr",
"Slovakia": "sk",
"Slovenia": "sl",
"Estonia": "et",
"Latvia": "lv",
"Lithuania": "lt",
"Ukraine": "uk",
"Israel": "he",
"Bangladesh": "bn",
"Pakistan": "ur",
"Finland": "fi",
"Denmark": "da",
"Norway": "no",
"Sweden": "sv",
"Iceland": "is",
"Philippines": "fil",
"Brazil": "pt-br",
"Argentina": "es-ar",
"Chile": "es-cl",
"Colombia": "es-co",
"Peru": "es-pe",
"Venezuela": "es-ve"
}
# ๋ฒˆ์—ญ ํ”„๋กฌํ”„ํŠธ ์ˆ˜์ •
def translate_query(query, country):
try:
if is_english(query):
print(f"English query detected, using original: {query}")
return query[:255]
if country == "South Korea":
return query[:255]
if country in COUNTRY_LANGUAGES:
query = query[:100]
target_lang = COUNTRY_LANGUAGES[country]
# ๋ฒˆ์—ญ ํ”„๋กฌํ”„ํŠธ ๊ฐœ์„ 
prompt = f"""Translate this text to {target_lang} language.
For Japanese, use Kanji and Kana.
For Chinese (China), use Simplified Chinese.
For Chinese (Taiwan), use Traditional Chinese.
For Korean, use Hangul.
Only output the translated text without any explanation.
Text to translate: {query}"""
translated = hf_client.text_generation(
prompt,
max_new_tokens=50,
temperature=0.1
)
translated = translated.strip()[:255]
print(f"Original query: {query}")
print(f"Translated query: {translated}")
return translated
return query[:255]
except Exception as e:
print(f"Translation error: {str(e)}")
return query[:255]
# MAJOR_COUNTRIES ์ •์˜ ์ˆ˜์ •
MAJOR_COUNTRIES = list(COUNTRY_CODES.keys()) # COUNTRY_LOCATIONS ๋Œ€์‹  COUNTRY_CODES ์‚ฌ์šฉ
# search_serphouse ํ•จ์ˆ˜ ์ˆ˜์ •
def search_serphouse(query, country, page=1, num_result=10):
url = "https://api.serphouse.com/serp/live"
# ๊ฒ€์ƒ‰์–ด ๋ฒˆ์—ญ
translated_query = translate_query(query, country)
print(f"Original query: {query}")
print(f"Translated query: {translated_query}")
payload = {
"data": {
"q": translated_query,
"domain": "google.com",
"loc": country, # ๊ตญ๊ฐ€ ์ด๋ฆ„ ์‚ฌ์šฉ
"lang": COUNTRY_LANGUAGES.get(country, "en"),
"device": "desktop",
"serp_type": "web", # web์œผ๋กœ ๋ณ€๊ฒฝ
"page": "1",
"verbatim": "0",
"gfilter": "0",
"num_result": "10" # num_result๋กœ ๋ณ€๊ฒฝ
}
}
headers = {
"accept": "application/json",
"content-type": "application/json",
"authorization": f"Bearer {API_KEY}"
}
try:
response = requests.post(url, json=payload, headers=headers)
print("Request payload:", json.dumps(payload, indent=2, ensure_ascii=False))
print("Response status:", response.status_code)
print("Response content:", response.text[:500]) # ์‘๋‹ต ๋‚ด์šฉ ํ™•์ธ
response.raise_for_status()
return {"results": response.json(), "translated_query": translated_query}
except requests.RequestException as e:
error_msg = f"Error: {str(e)}"
if hasattr(response, 'text'):
error_msg += f"\nResponse content: {response.text}"
return {"error": error_msg, "translated_query": query}
def format_results_from_raw(response_data):
if "error" in response_data:
return "Error: " + response_data["error"], []
try:
results = response_data["results"]
translated_query = response_data["translated_query"]
news_results = results.get('results', {}).get('results', {}).get('news', [])
if not news_results:
return "๊ฒ€์ƒ‰ ๊ฒฐ๊ณผ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.", []
articles = []
for idx, result in enumerate(news_results, 1):
articles.append({
"index": idx,
"title": result.get("title", "์ œ๋ชฉ ์—†์Œ"),
"link": result.get("url", result.get("link", "#")),
"snippet": result.get("snippet", "๋‚ด์šฉ ์—†์Œ"),
"channel": result.get("channel", result.get("source", "์•Œ ์ˆ˜ ์—†์Œ")),
"time": result.get("time", result.get("date", "์•Œ ์ˆ˜ ์—†๋Š” ์‹œ๊ฐ„")),
"image_url": result.get("img", result.get("thumbnail", "")),
"translated_query": translated_query
})
return "", articles
except Exception as e:
return f"๊ฒฐ๊ณผ ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {str(e)}", []
def serphouse_search(query, country):
response_data = search_serphouse(query, country)
return format_results_from_raw(response_data)
css = """
footer {visibility: hidden;}
"""
with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css, title="NewsAI ์„œ๋น„์Šค") as iface:
gr.Markdown("๊ฒ€์ƒ‰์–ด๋ฅผ ์ž…๋ ฅํ•˜๊ณ  ์›ํ•˜๋Š” ๊ตญ๊ฐ€๋ฅผ ์„ ํƒํ•˜๋ฉด, ํ•ด๋‹น ๊ตญ๊ฐ€์˜ ์–ธ์–ด๋กœ ๋ฒˆ์—ญ๋œ ๊ฒ€์ƒ‰์–ด๋กœ ๋‰ด์Šค๋ฅผ ๊ฒ€์ƒ‰ํ•ฉ๋‹ˆ๋‹ค.")
with gr.Column():
with gr.Row():
query = gr.Textbox(label="๊ฒ€์ƒ‰์–ด")
country = gr.Dropdown(MAJOR_COUNTRIES, label="๊ตญ๊ฐ€", value="South Korea")
# ๋ฒˆ์—ญ๋œ ๊ฒ€์ƒ‰์–ด ํ‘œ์‹œ ์ปดํฌ๋„ŒํŠธ
translated_display = gr.Markdown(visible=True)
search_button = gr.Button("๊ฒ€์ƒ‰", variant="primary")
progress = gr.Progress()
status_message = gr.Markdown(visible=False)
articles_state = gr.State([])
article_components = []
for i in range(100):
with gr.Group(visible=False) as article_group:
title = gr.Markdown()
image = gr.Image(width=200, height=150)
snippet = gr.Markdown()
info = gr.Markdown()
article_components.append({
'group': article_group,
'title': title,
'image': image,
'snippet': snippet,
'info': info,
'index': i,
})
def search_and_display(query, country, articles_state, progress=gr.Progress()):
progress(0, desc="๊ฒ€์ƒ‰ ์‹œ์ž‘...")
# ๋ฒˆ์—ญ ๋ฐ ํ‘œ์‹œ ํ…์ŠคํŠธ ์ฒ˜๋ฆฌ
translated_query = translate_query(query, country)
if is_english(query):
translated_display_text = f"์˜์–ด ๊ฒ€์ƒ‰์–ด: {query}"
elif country == "South Korea":
translated_display_text = f"๊ฒ€์ƒ‰์–ด: {query}"
elif translated_query != query:
translated_display_text = f"์›๋ณธ ๊ฒ€์ƒ‰์–ด: {query}\n๋ฒˆ์—ญ๋œ ๊ฒ€์ƒ‰์–ด: {translated_query}"
else:
translated_display_text = f"๊ฒ€์ƒ‰์–ด: {query}"
progress(0.2, desc="๊ฒ€์ƒ‰ ์ค‘...")
response_data = search_serphouse(query, country)
error_message, articles = format_results_from_raw(response_data)
outputs = [gr.update(value=translated_display_text, visible=True)]
if error_message:
outputs.append(gr.update(value=error_message, visible=True))
for comp in article_components:
outputs.extend([
gr.update(visible=False), gr.update(), gr.update(),
gr.update(), gr.update()
])
articles_state = []
else:
outputs.append(gr.update(value="", visible=False))
total_articles = len(articles)
for idx, comp in enumerate(article_components):
progress((idx + 1) / total_articles, desc=f"๊ฒฐ๊ณผ ํ‘œ์‹œ ์ค‘... {idx + 1}/{total_articles}")
if idx < len(articles):
article = articles[idx]
image_url = article['image_url']
image_update = gr.update(value=image_url, visible=True) if image_url and not image_url.startswith('data:image') else gr.update(value=None, visible=False)
outputs.extend([
gr.update(visible=True),
gr.update(value=f"### [{article['title']}]({article['link']})"),
image_update,
gr.update(value=f"**์š”์•ฝ:** {article['snippet']}"),
gr.update(value=f"**์ถœ์ฒ˜:** {article['channel']} | **์‹œ๊ฐ„:** {article['time']}")
])
else:
outputs.extend([
gr.update(visible=False), gr.update(), gr.update(),
gr.update(), gr.update()
])
articles_state = articles
progress(1.0, desc="์™„๋ฃŒ!")
outputs.append(articles_state)
outputs.append(gr.update(visible=False))
return outputs
# ๊ฒ€์ƒ‰ ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ ์—…๋ฐ์ดํŠธ๋  ์ถœ๋ ฅ ์ปดํฌ๋„ŒํŠธ ๋ชฉ๋ก
search_outputs = [translated_display, gr.Markdown(visible=False)] # ๋ฒˆ์—ญ ๊ฒฐ๊ณผ ํ‘œ์‹œ ์ปดํฌ๋„ŒํŠธ
for comp in article_components:
search_outputs.extend([
comp['group'],
comp['title'],
comp['image'],
comp['snippet'],
comp['info']
])
search_outputs.extend([articles_state, status_message])
# ๊ฒ€์ƒ‰ ๋ฒ„ํŠผ ํด๋ฆญ ์ด๋ฒคํŠธ ์„ค์ •
search_button.click(
fn=search_and_display,
inputs=[query, country, articles_state],
outputs=search_outputs,
show_progress=True
)
iface.launch()