|
import gradio as gr |
|
import requests |
|
import json |
|
import os |
|
from datetime import datetime, timedelta |
|
from concurrent.futures import ThreadPoolExecutor |
|
from functools import lru_cache |
|
from requests.adapters import HTTPAdapter |
|
from requests.packages.urllib3.util.retry import Retry |
|
from openai import OpenAI |
|
from bs4 import BeautifulSoup |
|
import re |
|
import pathlib |
|
import sqlite3 |
|
import pytz |
|
|
|
|
|
KOREAN_COMPANIES = [ |
|
"SAMSUNG", |
|
"HYNIX", |
|
"HYUNDAI", |
|
"KIA", |
|
"LG", |
|
"HANWHA", |
|
"SKT", |
|
"Lotte", |
|
"KOGAS", |
|
"KEPCO", |
|
"SK", |
|
"POSCO", |
|
"DOOSAN", |
|
"WOORI", |
|
"KAKAO", |
|
"Celltrion" |
|
] |
|
|
|
def convert_to_seoul_time(timestamp_str): |
|
try: |
|
|
|
dt = datetime.strptime(timestamp_str, '%Y-%m-%d %H:%M:%S') |
|
|
|
|
|
seoul_tz = pytz.timezone('Asia/Seoul') |
|
|
|
|
|
seoul_time = seoul_tz.localize(dt) |
|
|
|
return seoul_time.strftime('%Y-%m-%d %H:%M:%S KST') |
|
except Exception as e: |
|
print(f"์๊ฐ ๋ณํ ์ค๋ฅ: {str(e)}") |
|
return timestamp_str |
|
|
|
|
|
|
|
def analyze_sentiment_batch(articles, client): |
|
try: |
|
|
|
combined_text = "\n\n".join([ |
|
f"์ ๋ชฉ: {article.get('title', '')}\n๋ด์ฉ: {article.get('snippet', '')}" |
|
for article in articles |
|
]) |
|
|
|
prompt = f"""๋ค์ ๋ด์ค ๋ชจ์์ ๋ํด ์ ๋ฐ์ ์ธ ๊ฐ์ฑ ๋ถ์์ ์ํํ์ธ์: |
|
|
|
๋ด์ค ๋ด์ฉ: |
|
{combined_text} |
|
|
|
๋ค์ ํ์์ผ๋ก ๋ถ์ํด์ฃผ์ธ์: |
|
1. ์ ๋ฐ์ ๊ฐ์ฑ: [๊ธ์ /๋ถ์ /์ค๋ฆฝ] |
|
2. ์ฃผ์ ๊ธ์ ์ ์์: |
|
- [ํญ๋ชฉ1] |
|
- [ํญ๋ชฉ2] |
|
3. ์ฃผ์ ๋ถ์ ์ ์์: |
|
- [ํญ๋ชฉ1] |
|
- [ํญ๋ชฉ2] |
|
4. ์ข
ํฉ ํ๊ฐ: [์์ธ ์ค๋ช
] |
|
""" |
|
|
|
response = client.chat.completions.create( |
|
model="CohereForAI/c4ai-command-r-plus-08-2024", |
|
messages=[{"role": "user", "content": prompt}], |
|
temperature=0.3, |
|
max_tokens=1000 |
|
) |
|
|
|
return response.choices[0].message.content |
|
except Exception as e: |
|
return f"๊ฐ์ฑ ๋ถ์ ์คํจ: {str(e)}" |
|
|
|
|
|
def init_db(): |
|
db_path = pathlib.Path("search_results.db") |
|
conn = sqlite3.connect(db_path) |
|
c = conn.cursor() |
|
c.execute('''CREATE TABLE IF NOT EXISTS searches |
|
(id INTEGER PRIMARY KEY AUTOINCREMENT, |
|
keyword TEXT, |
|
country TEXT, |
|
results TEXT, |
|
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP)''') |
|
conn.commit() |
|
conn.close() |
|
|
|
def save_to_db(keyword, country, results): |
|
conn = sqlite3.connect("search_results.db") |
|
c = conn.cursor() |
|
|
|
|
|
seoul_tz = pytz.timezone('Asia/Seoul') |
|
now = datetime.now(seoul_tz) |
|
|
|
|
|
timestamp = now.strftime('%Y-%m-%d %H:%M:%S') |
|
|
|
c.execute("""INSERT INTO searches |
|
(keyword, country, results, timestamp) |
|
VALUES (?, ?, ?, ?)""", |
|
(keyword, country, json.dumps(results), timestamp)) |
|
|
|
conn.commit() |
|
conn.close() |
|
|
|
|
|
def load_from_db(keyword, country): |
|
conn = sqlite3.connect("search_results.db") |
|
c = conn.cursor() |
|
c.execute("SELECT results, timestamp FROM searches WHERE keyword=? AND country=? ORDER BY timestamp DESC LIMIT 1", |
|
(keyword, country)) |
|
result = c.fetchone() |
|
conn.close() |
|
if result: |
|
return json.loads(result[0]), convert_to_seoul_time(result[1]) |
|
return None, None |
|
|
|
|
|
def display_results(articles): |
|
output = "" |
|
for idx, article in enumerate(articles, 1): |
|
output += f"### {idx}. {article['title']}\n" |
|
output += f"์ถ์ฒ: {article['channel']}\n" |
|
output += f"์๊ฐ: {article['time']}\n" |
|
output += f"๋งํฌ: {article['link']}\n" |
|
output += f"์์ฝ: {article['snippet']}\n\n" |
|
return output |
|
|
|
|
|
def search_company(company): |
|
error_message, articles = serphouse_search(company, "United States") |
|
if not error_message and articles: |
|
save_to_db(company, "United States", articles) |
|
return display_results(articles) |
|
return f"{company}์ ๋ํ ๊ฒ์ ๊ฒฐ๊ณผ๊ฐ ์์ต๋๋ค." |
|
|
|
|
|
def load_company(company): |
|
results, timestamp = load_from_db(company, "United States") |
|
if results: |
|
return f"### {company} ๊ฒ์ ๊ฒฐ๊ณผ\n์ ์ฅ ์๊ฐ: {timestamp}\n\n" + display_results(results) |
|
return f"{company}์ ๋ํ ์ ์ฅ๋ ๊ฒฐ๊ณผ๊ฐ ์์ต๋๋ค." |
|
|
|
|
|
def show_stats(): |
|
conn = sqlite3.connect("search_results.db") |
|
c = conn.cursor() |
|
|
|
output = "## ํ๊ตญ ๊ธฐ์
๋ด์ค ๋ถ์ ๋ฆฌํฌํธ\n\n" |
|
|
|
for company in KOREAN_COMPANIES: |
|
c.execute(""" |
|
SELECT results, timestamp |
|
FROM searches |
|
WHERE keyword = ? |
|
ORDER BY timestamp DESC |
|
LIMIT 1 |
|
""", (company,)) |
|
|
|
result = c.fetchone() |
|
if result: |
|
results_json, timestamp = result |
|
articles = json.loads(results_json) |
|
seoul_time = convert_to_seoul_time(timestamp) |
|
|
|
output += f"### {company}\n" |
|
output += f"- ๋ง์ง๋ง ์
๋ฐ์ดํธ: {seoul_time}\n" |
|
output += f"- ์ ์ฅ๋ ๊ธฐ์ฌ ์: {len(articles)}๊ฑด\n\n" |
|
|
|
if articles: |
|
|
|
sentiment_analysis = analyze_sentiment_batch(articles, client) |
|
output += "#### ๋ด์ค ๊ฐ์ฑ ๋ถ์\n" |
|
output += f"{sentiment_analysis}\n\n" |
|
|
|
output += "---\n\n" |
|
|
|
conn.close() |
|
return output |
|
|
|
|
|
|
|
|
|
|
|
|
|
ACCESS_TOKEN = os.getenv("HF_TOKEN") |
|
if not ACCESS_TOKEN: |
|
raise ValueError("HF_TOKEN environment variable is not set") |
|
|
|
client = OpenAI( |
|
base_url="https://api-inference.huggingface.co/v1/", |
|
api_key=ACCESS_TOKEN, |
|
) |
|
|
|
MAX_COUNTRY_RESULTS = 100 |
|
MAX_GLOBAL_RESULTS = 1000 |
|
|
|
def create_article_components(max_results): |
|
article_components = [] |
|
for i in range(max_results): |
|
with gr.Group(visible=False) as article_group: |
|
title = gr.Markdown() |
|
image = gr.Image(width=200, height=150) |
|
snippet = gr.Markdown() |
|
info = gr.Markdown() |
|
|
|
article_components.append({ |
|
'group': article_group, |
|
'title': title, |
|
'image': image, |
|
'snippet': snippet, |
|
'info': info, |
|
'index': i, |
|
}) |
|
return article_components |
|
|
|
API_KEY = os.getenv("SERPHOUSE_API_KEY") |
|
|
|
|
|
COUNTRY_LANGUAGES = { |
|
"United States": "en", |
|
"KOREA": "ko", |
|
"United Kingdom": "en", |
|
"Taiwan": "zh-TW", |
|
"Canada": "en", |
|
"Australia": "en", |
|
"Germany": "de", |
|
"France": "fr", |
|
"Japan": "ja", |
|
"China": "zh", |
|
"India": "hi", |
|
"Brazil": "pt", |
|
"Mexico": "es", |
|
"Russia": "ru", |
|
"Italy": "it", |
|
"Spain": "es", |
|
"Netherlands": "nl", |
|
"Singapore": "en", |
|
"Hong Kong": "zh-HK", |
|
"Indonesia": "id", |
|
"Malaysia": "ms", |
|
"Philippines": "tl", |
|
"Thailand": "th", |
|
"Vietnam": "vi", |
|
"Belgium": "nl", |
|
"Denmark": "da", |
|
"Finland": "fi", |
|
"Ireland": "en", |
|
"Norway": "no", |
|
"Poland": "pl", |
|
"Sweden": "sv", |
|
"Switzerland": "de", |
|
"Austria": "de", |
|
"Czech Republic": "cs", |
|
"Greece": "el", |
|
"Hungary": "hu", |
|
"Portugal": "pt", |
|
"Romania": "ro", |
|
"Turkey": "tr", |
|
"Israel": "he", |
|
"Saudi Arabia": "ar", |
|
"United Arab Emirates": "ar", |
|
"South Africa": "en", |
|
"Argentina": "es", |
|
"Chile": "es", |
|
"Colombia": "es", |
|
"Peru": "es", |
|
"Venezuela": "es", |
|
"New Zealand": "en", |
|
"Bangladesh": "bn", |
|
"Pakistan": "ur", |
|
"Egypt": "ar", |
|
"Morocco": "ar", |
|
"Nigeria": "en", |
|
"Kenya": "sw", |
|
"Ukraine": "uk", |
|
"Croatia": "hr", |
|
"Slovakia": "sk", |
|
"Bulgaria": "bg", |
|
"Serbia": "sr", |
|
"Estonia": "et", |
|
"Latvia": "lv", |
|
"Lithuania": "lt", |
|
"Slovenia": "sl", |
|
"Luxembourg": "fr", |
|
"Malta": "mt", |
|
"Cyprus": "el", |
|
"Iceland": "is" |
|
} |
|
|
|
COUNTRY_LOCATIONS = { |
|
"United States": "United States", |
|
"KOREA": "kr", |
|
"United Kingdom": "United Kingdom", |
|
"Taiwan": "Taiwan", |
|
"Canada": "Canada", |
|
"Australia": "Australia", |
|
"Germany": "Germany", |
|
"France": "France", |
|
"Japan": "Japan", |
|
"China": "China", |
|
"India": "India", |
|
"Brazil": "Brazil", |
|
"Mexico": "Mexico", |
|
"Russia": "Russia", |
|
"Italy": "Italy", |
|
"Spain": "Spain", |
|
"Netherlands": "Netherlands", |
|
"Singapore": "Singapore", |
|
"Hong Kong": "Hong Kong", |
|
"Indonesia": "Indonesia", |
|
"Malaysia": "Malaysia", |
|
"Philippines": "Philippines", |
|
"Thailand": "Thailand", |
|
"Vietnam": "Vietnam", |
|
"Belgium": "Belgium", |
|
"Denmark": "Denmark", |
|
"Finland": "Finland", |
|
"Ireland": "Ireland", |
|
"Norway": "Norway", |
|
"Poland": "Poland", |
|
"Sweden": "Sweden", |
|
"Switzerland": "Switzerland", |
|
"Austria": "Austria", |
|
"Czech Republic": "Czech Republic", |
|
"Greece": "Greece", |
|
"Hungary": "Hungary", |
|
"Portugal": "Portugal", |
|
"Romania": "Romania", |
|
"Turkey": "Turkey", |
|
"Israel": "Israel", |
|
"Saudi Arabia": "Saudi Arabia", |
|
"United Arab Emirates": "United Arab Emirates", |
|
"South Africa": "South Africa", |
|
"Argentina": "Argentina", |
|
"Chile": "Chile", |
|
"Colombia": "Colombia", |
|
"Peru": "Peru", |
|
"Venezuela": "Venezuela", |
|
"New Zealand": "New Zealand", |
|
"Bangladesh": "Bangladesh", |
|
"Pakistan": "Pakistan", |
|
"Egypt": "Egypt", |
|
"Morocco": "Morocco", |
|
"Nigeria": "Nigeria", |
|
"Kenya": "Kenya", |
|
"Ukraine": "Ukraine", |
|
"Croatia": "Croatia", |
|
"Slovakia": "Slovakia", |
|
"Bulgaria": "Bulgaria", |
|
"Serbia": "Serbia", |
|
"Estonia": "Estonia", |
|
"Latvia": "Latvia", |
|
"Lithuania": "Lithuania", |
|
"Slovenia": "Slovenia", |
|
"Luxembourg": "Luxembourg", |
|
"Malta": "Malta", |
|
"Cyprus": "Cyprus", |
|
"Iceland": "Iceland" |
|
} |
|
|
|
|
|
|
|
COUNTRY_LANGUAGES_EAST_ASIA = { |
|
"KOREA": "ko", |
|
"Taiwan": "zh-TW", |
|
"Japan": "ja", |
|
"China": "zh", |
|
"Hong Kong": "zh-HK" |
|
} |
|
|
|
COUNTRY_LOCATIONS_EAST_ASIA = { |
|
"KOREA": "KOREA", |
|
"Taiwan": "Taiwan", |
|
"Japan": "Japan", |
|
"China": "China", |
|
"Hong Kong": "Hong Kong" |
|
} |
|
|
|
|
|
COUNTRY_LANGUAGES_SOUTHEAST_ASIA_OCEANIA = { |
|
"Indonesia": "id", |
|
"Malaysia": "ms", |
|
"Philippines": "tl", |
|
"Thailand": "th", |
|
"Vietnam": "vi", |
|
"Singapore": "en", |
|
"Papua New Guinea": "en", |
|
"Australia": "en", |
|
"New Zealand": "en" |
|
} |
|
|
|
COUNTRY_LOCATIONS_SOUTHEAST_ASIA_OCEANIA = { |
|
"Indonesia": "Indonesia", |
|
"Malaysia": "Malaysia", |
|
"Philippines": "Philippines", |
|
"Thailand": "Thailand", |
|
"Vietnam": "Vietnam", |
|
"Singapore": "Singapore", |
|
"Papua New Guinea": "Papua New Guinea", |
|
"Australia": "Australia", |
|
"New Zealand": "New Zealand" |
|
} |
|
|
|
|
|
COUNTRY_LANGUAGES_EAST_EUROPE = { |
|
"Poland": "pl", |
|
"Czech Republic": "cs", |
|
"Greece": "el", |
|
"Hungary": "hu", |
|
"Romania": "ro", |
|
"Ukraine": "uk", |
|
"Croatia": "hr", |
|
"Slovakia": "sk", |
|
"Bulgaria": "bg", |
|
"Serbia": "sr", |
|
"Estonia": "et", |
|
"Latvia": "lv", |
|
"Lithuania": "lt", |
|
"Slovenia": "sl", |
|
"Malta": "mt", |
|
"Cyprus": "el", |
|
"Iceland": "is", |
|
"Russia": "ru" |
|
} |
|
|
|
COUNTRY_LOCATIONS_EAST_EUROPE = { |
|
"Poland": "Poland", |
|
"Czech Republic": "Czech Republic", |
|
"Greece": "Greece", |
|
"Hungary": "Hungary", |
|
"Romania": "Romania", |
|
"Ukraine": "Ukraine", |
|
"Croatia": "Croatia", |
|
"Slovakia": "Slovakia", |
|
"Bulgaria": "Bulgaria", |
|
"Serbia": "Serbia", |
|
"Estonia": "Estonia", |
|
"Latvia": "Latvia", |
|
"Lithuania": "Lithuania", |
|
"Slovenia": "Slovenia", |
|
"Malta": "Malta", |
|
"Cyprus": "Cyprus", |
|
"Iceland": "Iceland", |
|
"Russia": "Russia" |
|
} |
|
|
|
|
|
COUNTRY_LANGUAGES_WEST_EUROPE = { |
|
"Germany": "de", |
|
"France": "fr", |
|
"Italy": "it", |
|
"Spain": "es", |
|
"Netherlands": "nl", |
|
"Belgium": "nl", |
|
"Ireland": "en", |
|
"Sweden": "sv", |
|
"Switzerland": "de", |
|
"Austria": "de", |
|
"Portugal": "pt", |
|
"Luxembourg": "fr", |
|
"United Kingdom": "en" |
|
} |
|
|
|
COUNTRY_LOCATIONS_WEST_EUROPE = { |
|
"Germany": "Germany", |
|
"France": "France", |
|
"Italy": "Italy", |
|
"Spain": "Spain", |
|
"Netherlands": "Netherlands", |
|
"Belgium": "Belgium", |
|
"Ireland": "Ireland", |
|
"Sweden": "Sweden", |
|
"Switzerland": "Switzerland", |
|
"Austria": "Austria", |
|
"Portugal": "Portugal", |
|
"Luxembourg": "Luxembourg", |
|
"United Kingdom": "United Kingdom" |
|
} |
|
|
|
|
|
COUNTRY_LANGUAGES_ARAB_AFRICA = { |
|
"South Africa": "en", |
|
"Nigeria": "en", |
|
"Kenya": "sw", |
|
"Egypt": "ar", |
|
"Morocco": "ar", |
|
"Saudi Arabia": "ar", |
|
"United Arab Emirates": "ar", |
|
"Israel": "he" |
|
} |
|
|
|
COUNTRY_LOCATIONS_ARAB_AFRICA = { |
|
"South Africa": "South Africa", |
|
"Nigeria": "Nigeria", |
|
"Kenya": "Kenya", |
|
"Egypt": "Egypt", |
|
"Morocco": "Morocco", |
|
"Saudi Arabia": "Saudi Arabia", |
|
"United Arab Emirates": "United Arab Emirates", |
|
"Israel": "Israel" |
|
} |
|
|
|
|
|
COUNTRY_LANGUAGES_AMERICA = { |
|
"United States": "en", |
|
"Canada": "en", |
|
"Mexico": "es", |
|
"Brazil": "pt", |
|
"Argentina": "es", |
|
"Chile": "es", |
|
"Colombia": "es", |
|
"Peru": "es", |
|
"Venezuela": "es" |
|
} |
|
|
|
COUNTRY_LOCATIONS_AMERICA = { |
|
"United States": "United States", |
|
"Canada": "Canada", |
|
"Mexico": "Mexico", |
|
"Brazil": "Brazil", |
|
"Argentina": "Argentina", |
|
"Chile": "Chile", |
|
"Colombia": "Colombia", |
|
"Peru": "Peru", |
|
"Venezuela": "Venezuela" |
|
} |
|
|
|
|
|
REGIONS = [ |
|
"๋์์์", |
|
"๋๋จ์์์/์ค์ธ์๋์", |
|
"๋์ ๋ฝ", |
|
"์์ ๋ฝ", |
|
"์ค๋/์ํ๋ฆฌ์นด", |
|
"์๋ฉ๋ฆฌ์นด" |
|
] |
|
|
|
|
|
@lru_cache(maxsize=100) |
|
def translate_query(query, country): |
|
try: |
|
if is_english(query): |
|
return query |
|
|
|
if country in COUNTRY_LANGUAGES: |
|
if country == "South Korea": |
|
return query |
|
|
|
target_lang = COUNTRY_LANGUAGES[country] |
|
|
|
url = "https://translate.googleapis.com/translate_a/single" |
|
params = { |
|
"client": "gtx", |
|
"sl": "auto", |
|
"tl": target_lang, |
|
"dt": "t", |
|
"q": query |
|
} |
|
|
|
session = requests.Session() |
|
retries = Retry(total=3, backoff_factor=0.5) |
|
session.mount('https://', HTTPAdapter(max_retries=retries)) |
|
|
|
response = session.get(url, params=params, timeout=(5, 10)) |
|
translated_text = response.json()[0][0][0] |
|
return translated_text |
|
|
|
return query |
|
|
|
except Exception as e: |
|
print(f"๋ฒ์ญ ์ค๋ฅ: {str(e)}") |
|
return query |
|
|
|
|
|
@lru_cache(maxsize=200) |
|
def translate_to_korean(text): |
|
try: |
|
url = "https://translate.googleapis.com/translate_a/single" |
|
params = { |
|
"client": "gtx", |
|
"sl": "auto", |
|
"tl": "ko", |
|
"dt": "t", |
|
"q": text |
|
} |
|
|
|
session = requests.Session() |
|
retries = Retry(total=3, backoff_factor=0.5) |
|
session.mount('https://', HTTPAdapter(max_retries=retries)) |
|
|
|
response = session.get(url, params=params, timeout=(5, 10)) |
|
translated_text = response.json()[0][0][0] |
|
return translated_text |
|
except Exception as e: |
|
print(f"ํ๊ธ ๋ฒ์ญ ์ค๋ฅ: {str(e)}") |
|
return text |
|
|
|
def is_english(text): |
|
return all(ord(char) < 128 for char in text.replace(' ', '').replace('-', '').replace('_', '')) |
|
|
|
def is_korean(text): |
|
return any('\uAC00' <= char <= '\uD7A3' for char in text) |
|
|
|
def search_serphouse(query, country, page=1, num_result=10): |
|
url = "https://api.serphouse.com/serp/live" |
|
|
|
now = datetime.utcnow() |
|
yesterday = now - timedelta(days=1) |
|
date_range = f"{yesterday.strftime('%Y-%m-%d')},{now.strftime('%Y-%m-%d')}" |
|
|
|
translated_query = translate_query(query, country) |
|
|
|
payload = { |
|
"data": { |
|
"q": translated_query, |
|
"domain": "google.com", |
|
"loc": COUNTRY_LOCATIONS.get(country, "United States"), |
|
"lang": COUNTRY_LANGUAGES.get(country, "en"), |
|
"device": "desktop", |
|
"serp_type": "news", |
|
"page": "1", |
|
"num": "100", |
|
"date_range": date_range, |
|
"sort_by": "date" |
|
} |
|
} |
|
|
|
headers = { |
|
"accept": "application/json", |
|
"content-type": "application/json", |
|
"authorization": f"Bearer {API_KEY}" |
|
} |
|
|
|
try: |
|
|
|
session = requests.Session() |
|
|
|
|
|
retries = Retry( |
|
total=5, |
|
backoff_factor=1, |
|
status_forcelist=[500, 502, 503, 504, 429], |
|
allowed_methods=["POST"] |
|
) |
|
|
|
|
|
adapter = HTTPAdapter(max_retries=retries) |
|
session.mount('http://', adapter) |
|
session.mount('https://', adapter) |
|
|
|
|
|
response = session.post( |
|
url, |
|
json=payload, |
|
headers=headers, |
|
timeout=(30, 30) |
|
) |
|
|
|
response.raise_for_status() |
|
return {"results": response.json(), "translated_query": translated_query} |
|
|
|
except requests.exceptions.Timeout: |
|
return { |
|
"error": "๊ฒ์ ์๊ฐ์ด ์ด๊ณผ๋์์ต๋๋ค. ์ ์ ํ ๋ค์ ์๋ํด์ฃผ์ธ์.", |
|
"translated_query": query |
|
} |
|
except requests.exceptions.RequestException as e: |
|
return { |
|
"error": f"๊ฒ์ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}", |
|
"translated_query": query |
|
} |
|
except Exception as e: |
|
return { |
|
"error": f"์๊ธฐ์น ์์ ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}", |
|
"translated_query": query |
|
} |
|
|
|
def format_results_from_raw(response_data): |
|
if "error" in response_data: |
|
return "Error: " + response_data["error"], [] |
|
|
|
try: |
|
results = response_data["results"] |
|
translated_query = response_data["translated_query"] |
|
|
|
news_results = results.get('results', {}).get('results', {}).get('news', []) |
|
if not news_results: |
|
return "๊ฒ์ ๊ฒฐ๊ณผ๊ฐ ์์ต๋๋ค.", [] |
|
|
|
|
|
korean_domains = ['.kr', 'korea', 'korean', 'yonhap', 'hankyung', 'chosun', |
|
'donga', 'joins', 'hani', 'koreatimes', 'koreaherald'] |
|
korean_keywords = ['korea', 'korean', 'seoul', 'busan', 'incheon', 'daegu', |
|
'gwangju', 'daejeon', 'ulsan', 'sejong'] |
|
|
|
filtered_articles = [] |
|
for idx, result in enumerate(news_results, 1): |
|
url = result.get("url", result.get("link", "")).lower() |
|
title = result.get("title", "").lower() |
|
channel = result.get("channel", result.get("source", "")).lower() |
|
|
|
|
|
is_korean_content = any(domain in url or domain in channel for domain in korean_domains) or \ |
|
any(keyword in title.lower() for keyword in korean_keywords) |
|
|
|
if not is_korean_content: |
|
filtered_articles.append({ |
|
"index": idx, |
|
"title": result.get("title", "์ ๋ชฉ ์์"), |
|
"link": url, |
|
"snippet": result.get("snippet", "๋ด์ฉ ์์"), |
|
"channel": result.get("channel", result.get("source", "์ ์ ์์")), |
|
"time": result.get("time", result.get("date", "์ ์ ์๋ ์๊ฐ")), |
|
"image_url": result.get("img", result.get("thumbnail", "")), |
|
"translated_query": translated_query |
|
}) |
|
|
|
return "", filtered_articles |
|
except Exception as e: |
|
return f"๊ฒฐ๊ณผ ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {str(e)}", [] |
|
|
|
def serphouse_search(query, country): |
|
response_data = search_serphouse(query, country) |
|
return format_results_from_raw(response_data) |
|
|
|
|
|
def search_and_display(query, country, articles_state, progress=gr.Progress()): |
|
with ThreadPoolExecutor(max_workers=3) as executor: |
|
progress(0, desc="๊ฒ์์ด ๋ฒ์ญ ์ค...") |
|
future_translation = executor.submit(translate_query, query, country) |
|
translated_query = future_translation.result() |
|
translated_display = f"**์๋ณธ ๊ฒ์์ด:** {query}\n**๋ฒ์ญ๋ ๊ฒ์์ด:** {translated_query}" if translated_query != query else f"**๊ฒ์์ด:** {query}" |
|
|
|
progress(0.3, desc="๊ฒ์ ์ค...") |
|
response_data = search_serphouse(query, country) |
|
|
|
progress(0.6, desc="๊ฒฐ๊ณผ ์ฒ๋ฆฌ ์ค...") |
|
error_message, articles = format_results_from_raw(response_data) |
|
|
|
outputs = [] |
|
outputs.append(gr.update(value="๊ฒ์์ ์งํ์ค์
๋๋ค...", visible=True)) |
|
outputs.append(gr.update(value=translated_display, visible=True)) |
|
|
|
if error_message: |
|
outputs.append(gr.update(value=error_message, visible=True)) |
|
for comp in article_components: |
|
outputs.extend([ |
|
gr.update(visible=False), gr.update(), gr.update(), |
|
gr.update(), gr.update() |
|
]) |
|
articles_state = [] |
|
else: |
|
outputs.append(gr.update(value="", visible=False)) |
|
if not error_message and articles: |
|
futures = [] |
|
for article in articles: |
|
future = executor.submit(translate_to_korean, article['snippet']) |
|
futures.append((article, future)) |
|
|
|
progress(0.8, desc="๋ฒ์ญ ์ฒ๋ฆฌ ์ค...") |
|
for article, future in futures: |
|
article['korean_summary'] = future.result() |
|
|
|
total_articles = len(articles) |
|
for idx, comp in enumerate(article_components): |
|
progress((idx + 1) / total_articles, desc=f"๊ฒฐ๊ณผ ํ์ ์ค... {idx + 1}/{total_articles}") |
|
if idx < len(articles): |
|
article = articles[idx] |
|
image_url = article['image_url'] |
|
image_update = gr.update(value=image_url, visible=True) if image_url and not image_url.startswith('data:image') else gr.update(value=None, visible=False) |
|
|
|
outputs.extend([ |
|
gr.update(visible=True), |
|
gr.update(value=f"### [{article['title']}]({article['link']})"), |
|
image_update, |
|
gr.update(value=f"**์์ฝ:** {article['snippet']}\n\n**ํ๊ธ ์์ฝ:** {article['korean_summary']}"), |
|
gr.update(value=f"**์ถ์ฒ:** {article['channel']} | **์๊ฐ:** {article['time']}") |
|
]) |
|
else: |
|
outputs.extend([ |
|
gr.update(visible=False), gr.update(), gr.update(), |
|
gr.update(), gr.update() |
|
]) |
|
articles_state = articles |
|
|
|
progress(1.0, desc="์๋ฃ!") |
|
outputs.append(articles_state) |
|
outputs[0] = gr.update(value="", visible=False) |
|
|
|
return outputs |
|
|
|
def get_region_countries(region): |
|
"""์ ํ๋ ์ง์ญ์ ๊ตญ๊ฐ ๋ฐ ์ธ์ด ์ ๋ณด ๋ฐํ""" |
|
if region == "๋์์์": |
|
return COUNTRY_LOCATIONS_EAST_ASIA, COUNTRY_LANGUAGES_EAST_ASIA |
|
elif region == "๋๋จ์์์/์ค์ธ์๋์": |
|
return COUNTRY_LOCATIONS_SOUTHEAST_ASIA_OCEANIA, COUNTRY_LANGUAGES_SOUTHEAST_ASIA_OCEANIA |
|
elif region == "๋์ ๋ฝ": |
|
return COUNTRY_LOCATIONS_EAST_EUROPE, COUNTRY_LANGUAGES_EAST_EUROPE |
|
elif region == "์์ ๋ฝ": |
|
return COUNTRY_LOCATIONS_WEST_EUROPE, COUNTRY_LANGUAGES_WEST_EUROPE |
|
elif region == "์ค๋/์ํ๋ฆฌ์นด": |
|
return COUNTRY_LOCATIONS_ARAB_AFRICA, COUNTRY_LANGUAGES_ARAB_AFRICA |
|
elif region == "์๋ฉ๋ฆฌ์นด": |
|
return COUNTRY_LOCATIONS_AMERICA, COUNTRY_LANGUAGES_AMERICA |
|
return {}, {} |
|
|
|
def search_global(query, region, articles_state_global): |
|
"""์ง์ญ๋ณ ๊ฒ์ ํจ์""" |
|
status_msg = f"{region} ์ง์ญ ๊ฒ์์ ์์ํฉ๋๋ค..." |
|
all_results = [] |
|
|
|
outputs = [ |
|
gr.update(value=status_msg, visible=True), |
|
gr.update(value=f"**๊ฒ์์ด:** {query}", visible=True), |
|
] |
|
|
|
for _ in global_article_components: |
|
outputs.extend([ |
|
gr.update(visible=False), gr.update(), gr.update(), |
|
gr.update(), gr.update() |
|
]) |
|
outputs.append([]) |
|
|
|
yield outputs |
|
|
|
|
|
locations, languages = get_region_countries(region) |
|
total_countries = len(locations) |
|
|
|
for idx, (country, location) in enumerate(locations.items(), 1): |
|
try: |
|
status_msg = f"{region} - {country} ๊ฒ์ ์ค... ({idx}/{total_countries} ๊ตญ๊ฐ)" |
|
outputs[0] = gr.update(value=status_msg, visible=True) |
|
yield outputs |
|
|
|
error_message, articles = serphouse_search(query, country) |
|
if not error_message and articles: |
|
for article in articles: |
|
article['source_country'] = country |
|
article['region'] = region |
|
|
|
all_results.extend(articles) |
|
sorted_results = sorted(all_results, key=lambda x: x.get('time', ''), reverse=True) |
|
|
|
seen_urls = set() |
|
unique_results = [] |
|
for article in sorted_results: |
|
url = article.get('link', '') |
|
if url not in seen_urls: |
|
seen_urls.add(url) |
|
unique_results.append(article) |
|
|
|
unique_results = unique_results[:MAX_GLOBAL_RESULTS] |
|
|
|
outputs = [ |
|
gr.update(value=f"{region} - {idx}/{total_countries} ๊ตญ๊ฐ ๊ฒ์ ์๋ฃ\nํ์ฌ๊น์ง ๋ฐ๊ฒฌ๋ ๋ด์ค: {len(unique_results)}๊ฑด", visible=True), |
|
gr.update(value=f"**๊ฒ์์ด:** {query} | **์ง์ญ:** {region}", visible=True), |
|
] |
|
|
|
for idx, comp in enumerate(global_article_components): |
|
if idx < len(unique_results): |
|
article = unique_results[idx] |
|
image_url = article.get('image_url', '') |
|
image_update = gr.update(value=image_url, visible=True) if image_url and not image_url.startswith('data:image') else gr.update(value=None, visible=False) |
|
|
|
korean_summary = translate_to_korean(article['snippet']) |
|
|
|
outputs.extend([ |
|
gr.update(visible=True), |
|
gr.update(value=f"### [{article['title']}]({article['link']})"), |
|
image_update, |
|
gr.update(value=f"**์์ฝ:** {article['snippet']}\n\n**ํ๊ธ ์์ฝ:** {korean_summary}"), |
|
gr.update(value=f"**์ถ์ฒ:** {article['channel']} | **๊ตญ๊ฐ:** {article['source_country']} | **์ง์ญ:** {article['region']} | **์๊ฐ:** {article['time']}") |
|
]) |
|
else: |
|
outputs.extend([ |
|
gr.update(visible=False), |
|
gr.update(), |
|
gr.update(), |
|
gr.update(), |
|
gr.update() |
|
]) |
|
|
|
outputs.append(unique_results) |
|
yield outputs |
|
|
|
except Exception as e: |
|
print(f"Error searching {country}: {str(e)}") |
|
continue |
|
|
|
final_status = f"{region} ๊ฒ์ ์๋ฃ! ์ด {len(unique_results)}๊ฐ์ ๋ด์ค๊ฐ ๋ฐ๊ฒฌ๋์์ต๋๋ค." |
|
outputs[0] = gr.update(value=final_status, visible=True) |
|
yield outputs |
|
|
|
css = """ |
|
/* ์ ์ญ ์คํ์ผ */ |
|
footer {visibility: hidden;} |
|
|
|
/* ๋ ์ด์์ ์ปจํ
์ด๋ */ |
|
#status_area { |
|
background: rgba(255, 255, 255, 0.9); |
|
padding: 15px; |
|
border-bottom: 1px solid #ddd; |
|
margin-bottom: 20px; |
|
box-shadow: 0 2px 5px rgba(0,0,0,0.1); |
|
} |
|
|
|
#results_area { |
|
padding: 10px; |
|
margin-top: 10px; |
|
} |
|
|
|
/* ํญ ์คํ์ผ */ |
|
.tabs { |
|
border-bottom: 2px solid #ddd !important; |
|
margin-bottom: 20px !important; |
|
} |
|
|
|
.tab-nav { |
|
border-bottom: none !important; |
|
margin-bottom: 0 !important; |
|
} |
|
|
|
.tab-nav button { |
|
font-weight: bold !important; |
|
padding: 10px 20px !important; |
|
} |
|
|
|
.tab-nav button.selected { |
|
border-bottom: 2px solid #1f77b4 !important; |
|
color: #1f77b4 !important; |
|
} |
|
|
|
/* ์ํ ๋ฉ์์ง */ |
|
#status_area .markdown-text { |
|
font-size: 1.1em; |
|
color: #2c3e50; |
|
padding: 10px 0; |
|
} |
|
|
|
/* ๊ธฐ๋ณธ ์ปจํ
์ด๋ */ |
|
.group { |
|
border: 1px solid #eee; |
|
padding: 15px; |
|
margin-bottom: 15px; |
|
border-radius: 5px; |
|
background: white; |
|
} |
|
|
|
/* ๋ฒํผ ์คํ์ผ */ |
|
.primary-btn { |
|
background: #1f77b4 !important; |
|
border: none !important; |
|
} |
|
|
|
/* ์
๋ ฅ ํ๋ */ |
|
.textbox { |
|
border: 1px solid #ddd !important; |
|
border-radius: 4px !important; |
|
} |
|
|
|
/* ํ๋ก๊ทธ๋ ์ค๋ฐ ์ปจํ
์ด๋ */ |
|
.progress-container { |
|
position: fixed; |
|
top: 0; |
|
left: 0; |
|
width: 100%; |
|
height: 6px; |
|
background: #e0e0e0; |
|
z-index: 1000; |
|
} |
|
|
|
/* ํ๋ก๊ทธ๋ ์ค๋ฐ */ |
|
.progress-bar { |
|
height: 100%; |
|
background: linear-gradient(90deg, #2196F3, #00BCD4); |
|
box-shadow: 0 0 10px rgba(33, 150, 243, 0.5); |
|
transition: width 0.3s ease; |
|
animation: progress-glow 1.5s ease-in-out infinite; |
|
} |
|
|
|
/* ํ๋ก๊ทธ๋ ์ค ํ
์คํธ */ |
|
.progress-text { |
|
position: fixed; |
|
top: 8px; |
|
left: 50%; |
|
transform: translateX(-50%); |
|
background: #333; |
|
color: white; |
|
padding: 4px 12px; |
|
border-radius: 15px; |
|
font-size: 14px; |
|
z-index: 1001; |
|
box-shadow: 0 2px 5px rgba(0,0,0,0.2); |
|
} |
|
|
|
/* ํ๋ก๊ทธ๋ ์ค๋ฐ ์ ๋๋ฉ์ด์
*/ |
|
@keyframes progress-glow { |
|
0% { |
|
box-shadow: 0 0 5px rgba(33, 150, 243, 0.5); |
|
} |
|
50% { |
|
box-shadow: 0 0 20px rgba(33, 150, 243, 0.8); |
|
} |
|
100% { |
|
box-shadow: 0 0 5px rgba(33, 150, 243, 0.5); |
|
} |
|
} |
|
|
|
/* ๋ฐ์ํ ๋์์ธ */ |
|
@media (max-width: 768px) { |
|
.group { |
|
padding: 10px; |
|
margin-bottom: 15px; |
|
} |
|
|
|
.progress-text { |
|
font-size: 12px; |
|
padding: 3px 10px; |
|
} |
|
} |
|
|
|
/* ๋ก๋ฉ ์ํ ํ์ ๊ฐ์ */ |
|
.loading { |
|
opacity: 0.7; |
|
pointer-events: none; |
|
transition: opacity 0.3s ease; |
|
} |
|
|
|
/* ๊ฒฐ๊ณผ ์ปจํ
์ด๋ ์ ๋๋ฉ์ด์
*/ |
|
.group { |
|
transition: all 0.3s ease; |
|
opacity: 0; |
|
transform: translateY(20px); |
|
} |
|
|
|
.group.visible { |
|
opacity: 1; |
|
transform: translateY(0); |
|
} |
|
|
|
/* Examples ์คํ์ผ๋ง */ |
|
.examples-table { |
|
margin-top: 10px !important; |
|
margin-bottom: 20px !important; |
|
} |
|
|
|
.examples-table button { |
|
background-color: #f0f0f0 !important; |
|
border: 1px solid #ddd !important; |
|
border-radius: 4px !important; |
|
padding: 5px 10px !important; |
|
margin: 2px !important; |
|
transition: all 0.3s ease !important; |
|
} |
|
|
|
.examples-table button:hover { |
|
background-color: #e0e0e0 !important; |
|
transform: translateY(-1px) !important; |
|
box-shadow: 0 2px 5px rgba(0,0,0,0.1) !important; |
|
} |
|
|
|
.examples-table .label { |
|
font-weight: bold !important; |
|
color: #444 !important; |
|
margin-bottom: 5px !important; |
|
} |
|
""" |
|
|
|
|
|
def get_article_content(url): |
|
try: |
|
headers = { |
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' |
|
} |
|
session = requests.Session() |
|
retries = Retry(total=3, backoff_factor=0.5) |
|
session.mount('https://', HTTPAdapter(max_retries=retries)) |
|
|
|
response = session.get(url, headers=headers, timeout=30) |
|
response.raise_for_status() |
|
soup = BeautifulSoup(response.content, 'html.parser') |
|
|
|
|
|
title = soup.find('meta', property='og:title') or soup.find('title') |
|
title = title.get('content', '') if hasattr(title, 'get') else title.string if title else '' |
|
|
|
description = soup.find('meta', property='og:description') or soup.find('meta', {'name': 'description'}) |
|
description = description.get('content', '') if description else '' |
|
|
|
|
|
article_content = '' |
|
|
|
|
|
content_selectors = [ |
|
'article', '.article-body', '.article-content', '#article-body', |
|
'.story-body', '.post-content', '.entry-content', '.content-body', |
|
'[itemprop="articleBody"]', '.story-content' |
|
] |
|
|
|
for selector in content_selectors: |
|
content = soup.select_one(selector) |
|
if content: |
|
|
|
for tag in content.find_all(['script', 'style', 'nav', 'header', 'footer', 'aside']): |
|
tag.decompose() |
|
|
|
|
|
paragraphs = content.find_all(['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6']) |
|
if paragraphs: |
|
article_content = '\n\n'.join([p.get_text().strip() for p in paragraphs if p.get_text().strip()]) |
|
break |
|
|
|
|
|
if not article_content: |
|
paragraphs = soup.find_all('p') |
|
article_content = '\n\n'.join([p.get_text().strip() for p in paragraphs if len(p.get_text().strip()) > 50]) |
|
|
|
|
|
full_content = f"Title: {title}\n\nDescription: {description}\n\nContent:\n{article_content}" |
|
|
|
|
|
full_content = re.sub(r'\s+', ' ', full_content) |
|
full_content = re.sub(r'\n\s*\n', '\n\n', full_content) |
|
|
|
return full_content.strip() |
|
|
|
except Exception as e: |
|
print(f"Crawling error details: {str(e)}") |
|
return f"Error crawling content: {str(e)}" |
|
|
|
def respond(url, history, system_message, max_tokens, temperature, top_p): |
|
if not url.startswith('http'): |
|
history.append((url, "์ฌ๋ฐ๋ฅธ URL์ ์
๋ ฅํด์ฃผ์ธ์.")) |
|
return history |
|
|
|
try: |
|
article_content = get_article_content(url) |
|
|
|
translation_prompt = f"""๋ค์ ์๋ฌธ ๊ธฐ์ฌ๋ฅผ ํ๊ตญ์ด๋ก ๋ฒ์ญํ๊ณ ๊ธฐ์ฌ๋ฅผ ์์ฑํด์ฃผ์ธ์. |
|
|
|
1๋จ๊ณ: ์ ๋ฌธ ๋ฒ์ญ |
|
===๋ฒ์ญ ์์=== |
|
{article_content} |
|
===๋ฒ์ญ ๋=== |
|
|
|
2๋จ๊ณ: ๊ธฐ์ฌ ์์ฑ ๊ฐ์ด๋๋ผ์ธ |
|
๋ค์ ์๊ตฌ์ฌํญ์ ๋ฐ๋ผ ํ๊ตญ์ด ๊ธฐ์ฌ๋ฅผ ์์ฑํ์ธ์: |
|
|
|
1. ๊ตฌ์กฐ |
|
- ํค๋๋ผ์ธ: ํต์ฌ ๋ด์ฉ์ ๋ด์ ๊ฐ๋ ฅํ ์ ๋ชฉ |
|
- ๋ถ์ ๋ชฉ: ํค๋๋ผ์ธ ๋ณด์ ์ค๋ช
|
|
- ๋ฆฌ๋๋ฌธ: ๊ธฐ์ฌ์ ํต์ฌ์ ์์ฝํ ์ฒซ ๋ฌธ๋จ |
|
- ๋ณธ๋ฌธ: ์์ธ ๋ด์ฉ ์ ๊ฐ |
|
|
|
2. ์์ฑ ๊ท์น |
|
- ๊ฐ๊ด์ ์ด๊ณ ์ ํํ ์ฌ์ค ์ ๋ฌ |
|
- ๋ฌธ์ฅ์ '๋ค.'๋ก ์ข
๊ฒฐ |
|
- ๋จ๋ฝ ๊ฐ ์์ฐ์ค๋ฌ์ด ํ๋ฆ |
|
- ์ธ์ฉ๊ตฌ๋ ๋ฐ์ดํ ์ฒ๋ฆฌ |
|
- ํต์ฌ ์ ๋ณด๋ฅผ ์๋ถ๋ถ์ ๋ฐฐ์น |
|
- ์ ๋ฌธ ์ฉ์ด๋ ์ ์ ํ ์ค๋ช
์ถ๊ฐ |
|
|
|
3. ํ์ |
|
- ์ ์ ํ ๋จ๋ฝ ๊ตฌ๋ถ |
|
- ์ฝ๊ธฐ ์ฌ์ด ๋ฌธ์ฅ ๊ธธ์ด |
|
- ๋
ผ๋ฆฌ์ ์ธ ์ ๋ณด ๊ตฌ์ฑ |
|
|
|
๊ฐ ๋จ๊ณ๋ '===๋ฒ์ญ===', '===๊ธฐ์ฌ==='๋ก ๋ช
ํํ ๊ตฌ๋ถํ์ฌ ์ถ๋ ฅํ์ธ์. |
|
""" |
|
|
|
messages = [ |
|
{ |
|
"role": "system", |
|
"content": system_message |
|
}, |
|
{"role": "user", "content": translation_prompt} |
|
] |
|
|
|
history.append((url, "๋ฒ์ญ ๋ฐ ๊ธฐ์ฌ ์์ฑ์ ์์ํฉ๋๋ค...")) |
|
|
|
full_response = "" |
|
for message in client.chat.completions.create( |
|
model="CohereForAI/c4ai-command-r-plus-08-2024", |
|
max_tokens=max_tokens, |
|
stream=True, |
|
temperature=temperature, |
|
top_p=top_p, |
|
messages=messages, |
|
): |
|
if hasattr(message.choices[0].delta, 'content'): |
|
token = message.choices[0].delta.content |
|
if token: |
|
full_response += token |
|
history[-1] = (url, full_response) |
|
yield history |
|
|
|
except Exception as e: |
|
error_message = f"์ฒ๋ฆฌ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}" |
|
history.append((url, error_message)) |
|
yield history |
|
|
|
return history |
|
|
|
|
|
def continue_writing(history, system_message, max_tokens, temperature, top_p): |
|
if not history: |
|
return history |
|
|
|
last_response = history[-1][1] if history else "" |
|
continue_prompt = f"""์ด์ ๋ด์ฉ์ ์ด์ด์ ๊ณ์ ์์ฑํด์ฃผ์ธ์. |
|
๋ง์ง๋ง ์๋ต: {last_response} |
|
|
|
์ถ๊ฐ ์ง์นจ: |
|
1. ์ด์ ๋ด์ฉ์ ๋งฅ๋ฝ์ ์ ์งํ๋ฉฐ ์์ฐ์ค๋ฝ๊ฒ ์ด์ด์ ์์ฑ |
|
2. ์๋ก์ด ์ ๋ณด๋ ์์ธ ์ค๋ช
์ ์ถ๊ฐ |
|
3. ํ์ํ ๊ฒฝ์ฐ ๋ณด์ถฉ ์ค๋ช
์ด๋ ๋ถ์ ์ ๊ณต |
|
4. ๊ธฐ์ฌ ํ์๊ณผ ์คํ์ผ ์ ์ง |
|
5. ํ์ํ ๊ฒฝ์ฐ ์ถ๊ฐ์ ์ธ ์ด๋ฏธ์ง ํ๋กฌํํธ ์์ฑ |
|
""" |
|
|
|
|
|
messages = [ |
|
{"role": "system", "content": system_message}, |
|
{"role": "user", "content": continue_prompt} |
|
] |
|
|
|
try: |
|
full_response = "" |
|
for message in client.chat.completions.create( |
|
model="CohereForAI/c4ai-command-r-plus-08-2024", |
|
max_tokens=max_tokens, |
|
stream=True, |
|
temperature=temperature, |
|
top_p=top_p, |
|
messages=messages, |
|
): |
|
if hasattr(message.choices[0].delta, 'content'): |
|
token = message.choices[0].delta.content |
|
if token: |
|
full_response += token |
|
|
|
new_history = history.copy() |
|
new_history.append(("๊ณ์ ์์ฑ", full_response)) |
|
yield new_history |
|
|
|
except Exception as e: |
|
error_message = f"๊ณ์ ์์ฑ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {str(e)}" |
|
new_history = history.copy() |
|
new_history.append(("์ค๋ฅ", error_message)) |
|
yield new_history |
|
|
|
return history |
|
|
|
with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css, title="NewsAI ์๋น์ค") as iface: |
|
init_db() |
|
|
|
with gr.Tabs(): |
|
|
|
with gr.Tab("DB ๊ฒ์"): |
|
gr.Markdown("## ํ๊ตญ ์ฃผ์ ๊ธฐ์
๋ฏธ๊ตญ ๋ด์ค DB") |
|
gr.Markdown("๊ฐ ๊ธฐ์
์ ๋ฏธ๊ตญ ๋ด์ค๋ฅผ ๊ฒ์ํ์ฌ DB์ ์ ์ฅํ๊ณ ๋ถ๋ฌ์ฌ ์ ์์ต๋๋ค.") |
|
|
|
with gr.Column(): |
|
for i in range(0, len(KOREAN_COMPANIES), 2): |
|
with gr.Row(): |
|
|
|
with gr.Column(): |
|
company = KOREAN_COMPANIES[i] |
|
with gr.Group(): |
|
gr.Markdown(f"### {company}") |
|
with gr.Row(): |
|
search_btn = gr.Button(f"๊ฒ์", variant="primary") |
|
load_btn = gr.Button(f"์ถ๋ ฅ", variant="secondary") |
|
result_display = gr.Markdown() |
|
|
|
search_btn.click( |
|
fn=lambda c=company: search_company(c), |
|
outputs=result_display |
|
) |
|
load_btn.click( |
|
fn=lambda c=company: load_company(c), |
|
outputs=result_display |
|
) |
|
|
|
|
|
if i + 1 < len(KOREAN_COMPANIES): |
|
with gr.Column(): |
|
company = KOREAN_COMPANIES[i + 1] |
|
with gr.Group(): |
|
gr.Markdown(f"### {company}") |
|
with gr.Row(): |
|
search_btn = gr.Button(f"๊ฒ์", variant="primary") |
|
load_btn = gr.Button(f"์ถ๋ ฅ", variant="secondary") |
|
result_display = gr.Markdown() |
|
|
|
search_btn.click( |
|
fn=lambda c=company: search_company(c), |
|
outputs=result_display |
|
) |
|
load_btn.click( |
|
fn=lambda c=company: load_company(c), |
|
outputs=result_display |
|
) |
|
|
|
|
|
with gr.Row(): |
|
stats_btn = gr.Button("์ ์ฒด ๊ฒ์ ํต๊ณ ๋ณด๊ธฐ", variant="secondary") |
|
stats_display = gr.Markdown() |
|
|
|
stats_btn.click( |
|
fn=show_stats, |
|
outputs=stats_display |
|
) |
|
|
|
|
|
with gr.Tab("๊ตญ๊ฐ๋ณ"): |
|
gr.Markdown("๊ฒ์์ด๋ฅผ ์
๋ ฅํ๊ณ ์ํ๋ ๊ตญ๊ฐ(ํ๊ตญ ์ ์ธ)๋ฅผ๋ฅผ ์ ํํ๋ฉด, ๊ฒ์์ด์ ์ผ์นํ๋ 24์๊ฐ ์ด๋ด ๋ด์ค๋ฅผ ์ต๋ 100๊ฐ ์ถ๋ ฅํฉ๋๋ค.") |
|
gr.Markdown("๊ตญ๊ฐ ์ ํํ ๊ฒ์์ด์ 'ํ๊ธ'์ ์
๋ ฅํ๋ฉด ํ์ง ์ธ์ด๋ก ๋ฒ์ญ๋์ด ๊ฒ์ํฉ๋๋ค. ์: 'Taiwan' ๊ตญ๊ฐ ์ ํํ '์ผ์ฑ' ์
๋ ฅ์ 'ไธๆ'์ผ๋ก ์๋ ๊ฒ์") |
|
|
|
with gr.Column(): |
|
with gr.Row(): |
|
query = gr.Textbox(label="๊ฒ์์ด") |
|
country = gr.Dropdown( |
|
choices=sorted(list(COUNTRY_LOCATIONS.keys())), |
|
label="๊ตญ๊ฐ", |
|
value="United States" |
|
) |
|
|
|
|
|
gr.Examples( |
|
examples=[ |
|
"artificial intelligence", |
|
"NVIDIA", |
|
"OPENAI", |
|
"META LLAMA", |
|
"black forest labs", |
|
"GOOGLE gemini", |
|
"anthropic Claude", |
|
"X.AI", |
|
"HUGGINGFACE", |
|
"HYNIX", |
|
"Large Language model", |
|
"CHATGPT", |
|
"StabilityAI", |
|
"MISTRALAI", |
|
"QWEN", |
|
"MIDJOURNEY", |
|
"GPU" |
|
], |
|
inputs=query, |
|
label="์์ฃผ ์ฌ์ฉ๋๋ ๊ฒ์์ด" |
|
) |
|
|
|
status_message = gr.Markdown("", visible=True) |
|
translated_query_display = gr.Markdown(visible=False) |
|
search_button = gr.Button("๊ฒ์", variant="primary") |
|
|
|
progress = gr.Progress() |
|
articles_state = gr.State([]) |
|
|
|
article_components = [] |
|
for i in range(100): |
|
with gr.Group(visible=False) as article_group: |
|
title = gr.Markdown() |
|
image = gr.Image(width=200, height=150) |
|
snippet = gr.Markdown() |
|
info = gr.Markdown() |
|
|
|
article_components.append({ |
|
'group': article_group, |
|
'title': title, |
|
'image': image, |
|
'snippet': snippet, |
|
'info': info, |
|
'index': i, |
|
}) |
|
|
|
|
|
with gr.Tab("์ ์ธ๊ณ"): |
|
gr.Markdown("๋๋ฅ๋ณ๋ก 24์๊ฐ ์ด๋ด ๋ด์ค๋ฅผ ๊ฒ์ํฉ๋๋ค.") |
|
|
|
with gr.Column(): |
|
with gr.Column(elem_id="status_area"): |
|
with gr.Row(): |
|
query_global = gr.Textbox(label="๊ฒ์์ด") |
|
region_select = gr.Dropdown( |
|
choices=REGIONS, |
|
label="์ง์ญ ์ ํ", |
|
value="๋์์์" |
|
) |
|
search_button_global = gr.Button("๊ฒ์", variant="primary") |
|
|
|
status_message_global = gr.Markdown("") |
|
translated_query_display_global = gr.Markdown("") |
|
|
|
with gr.Column(elem_id="results_area"): |
|
articles_state_global = gr.State([]) |
|
global_article_components = [] |
|
for i in range(MAX_GLOBAL_RESULTS): |
|
with gr.Group(visible=False) as article_group: |
|
title = gr.Markdown() |
|
image = gr.Image(width=200, height=150) |
|
snippet = gr.Markdown() |
|
info = gr.Markdown() |
|
|
|
global_article_components.append({ |
|
'group': article_group, |
|
'title': title, |
|
'image': image, |
|
'snippet': snippet, |
|
'info': info, |
|
'index': i, |
|
}) |
|
|
|
|
|
with gr.Tab("AI ๊ธฐ์ฌ ์์ฑ"): |
|
gr.Markdown("๋ด์ค URL์ ์
๋ ฅํ๋ฉด AI๊ฐ ํ๊ตญ์ด๋ก ๋ฒ์ญํ์ฌ ๊ธฐ์ฌ ํ์์ผ๋ก ์์ฑํฉ๋๋ค.") |
|
gr.Markdown("์ด๋ฏธ์ง ์์ฑ: https://huggingface.co/spaces/ginipick/FLUXllama ") |
|
|
|
with gr.Column(): |
|
chatbot = gr.Chatbot(height=600) |
|
|
|
with gr.Row(): |
|
url_input = gr.Textbox( |
|
label="๋ด์ค URL", |
|
placeholder="https://..." |
|
) |
|
|
|
with gr.Row(): |
|
translate_button = gr.Button("๊ธฐ์ฌ ์์ฑ", variant="primary") |
|
continue_button = gr.Button("๊ณ์ ์ด์ด์ ์์ฑ", variant="secondary") |
|
|
|
with gr.Accordion("๊ณ ๊ธ ์ค์ ", open=False): |
|
system_message = gr.Textbox( |
|
value="""You are a professional translator and journalist. Follow these steps strictly: |
|
1. TRANSLATION |
|
- Start with ===๋ฒ์ญ=== marker |
|
- Provide accurate Korean translation |
|
- Maintain original meaning and context |
|
2. ARTICLE WRITING |
|
- Start with ===๊ธฐ์ฌ=== marker |
|
- Write a new Korean news article based on the translation |
|
- Follow newspaper article format |
|
- Use formal news writing style |
|
- End sentences with '๋ค.' |
|
- Include headline and subheadline |
|
- Organize paragraphs clearly |
|
- Put key information first |
|
- Use quotes appropriately |
|
|
|
3. IMAGE PROMPT GENERATION |
|
- Start with ===์ด๋ฏธ์ง ํ๋กฌํํธ=== marker |
|
- Create detailed Korean prompts for image generation |
|
- Prompts should reflect the article's main theme and content |
|
- Include key visual elements mentioned in the article |
|
- Specify style, mood, and composition |
|
- Format: "์ด๋ฏธ์ง ์ค๋ช
: [์์ธ ์ค๋ช
]" |
|
- Add style keywords: "์คํ์ผ: [๊ด๋ จ ํค์๋๋ค]" |
|
- Add mood keywords: "๋ถ์๊ธฐ: [๊ด๋ จ ํค์๋๋ค]" |
|
IMPORTANT: |
|
- Must complete all three steps in order |
|
- Clearly separate each section with markers |
|
- Never skip or combine steps |
|
- Ensure image prompts align with article content""", |
|
label="System message" |
|
) |
|
|
|
max_tokens = gr.Slider( |
|
minimum=1, |
|
maximum=7800, |
|
value=7624, |
|
step=1, |
|
label="Max new tokens" |
|
) |
|
temperature = gr.Slider( |
|
minimum=0.1, |
|
maximum=4.0, |
|
value=0.7, |
|
step=0.1, |
|
label="Temperature" |
|
) |
|
top_p = gr.Slider( |
|
minimum=0.1, |
|
maximum=1.0, |
|
value=0.95, |
|
step=0.05, |
|
label="Top-P" |
|
) |
|
|
|
|
|
|
|
search_outputs = [status_message, translated_query_display, gr.Markdown(visible=False)] |
|
for comp in article_components: |
|
search_outputs.extend([ |
|
comp['group'], comp['title'], comp['image'], |
|
comp['snippet'], comp['info'] |
|
]) |
|
search_outputs.append(articles_state) |
|
|
|
search_button.click( |
|
fn=search_and_display, |
|
inputs=[query, country, articles_state], |
|
outputs=search_outputs, |
|
show_progress=True |
|
) |
|
|
|
|
|
global_search_outputs = [status_message_global, translated_query_display_global] |
|
for comp in global_article_components: |
|
global_search_outputs.extend([ |
|
comp['group'], comp['title'], comp['image'], |
|
comp['snippet'], comp['info'] |
|
]) |
|
global_search_outputs.append(articles_state_global) |
|
|
|
search_button_global.click( |
|
fn=search_global, |
|
inputs=[query_global, region_select, articles_state_global], |
|
outputs=global_search_outputs, |
|
show_progress=True |
|
) |
|
|
|
|
|
translate_button.click( |
|
fn=respond, |
|
inputs=[ |
|
url_input, |
|
chatbot, |
|
system_message, |
|
max_tokens, |
|
temperature, |
|
top_p, |
|
], |
|
outputs=chatbot |
|
) |
|
|
|
|
|
continue_button.click( |
|
fn=continue_writing, |
|
inputs=[ |
|
chatbot, |
|
system_message, |
|
max_tokens, |
|
temperature, |
|
top_p, |
|
], |
|
outputs=chatbot |
|
) |
|
|
|
iface.launch( |
|
server_name="0.0.0.0", |
|
server_port=7860, |
|
share=True, |
|
auth=("gini","pick"), |
|
ssl_verify=False, |
|
show_error=True |
|
) |