ginipick commited on
Commit
d41bc49
·
verified ·
1 Parent(s): c526ac9

Delete app-backup1.py

Browse files
Files changed (1) hide show
  1. app-backup1.py +0 -415
app-backup1.py DELETED
@@ -1,415 +0,0 @@
1
- import gradio as gr
2
- import requests
3
- import json
4
- import os
5
- from datetime import datetime, timedelta
6
- from huggingface_hub import InferenceClient
7
-
8
-
9
-
10
- API_KEY = os.getenv("SERPHOUSE_API_KEY")
11
- # hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus-08-2024", token=os.getenv("HF_TOKEN"))
12
-
13
- # 국가별 언어 코드 매핑
14
- COUNTRY_LANGUAGES = {
15
- "United States": "en",
16
- "United Kingdom": "en",
17
- "Taiwan": "zh-TW", # 대만어(번체 중국어)
18
- "Canada": "en",
19
- "Australia": "en",
20
- "Germany": "de",
21
- "France": "fr",
22
- "Japan": "ja",
23
- "South Korea": "ko",
24
- "China": "zh",
25
- "India": "hi",
26
- "Brazil": "pt",
27
- "Mexico": "es",
28
- "Russia": "ru",
29
- "Italy": "it",
30
- "Spain": "es",
31
- "Netherlands": "nl",
32
- "Singapore": "en",
33
- "Hong Kong": "zh-HK",
34
- "Indonesia": "id",
35
- "Malaysia": "ms",
36
- "Philippines": "tl",
37
- "Thailand": "th",
38
- "Vietnam": "vi",
39
- "Belgium": "nl",
40
- "Denmark": "da",
41
- "Finland": "fi",
42
- "Ireland": "en",
43
- "Norway": "no",
44
- "Poland": "pl",
45
- "Sweden": "sv",
46
- "Switzerland": "de",
47
- "Austria": "de",
48
- "Czech Republic": "cs",
49
- "Greece": "el",
50
- "Hungary": "hu",
51
- "Portugal": "pt",
52
- "Romania": "ro",
53
- "Turkey": "tr",
54
- "Israel": "he",
55
- "Saudi Arabia": "ar",
56
- "United Arab Emirates": "ar",
57
- "South Africa": "en",
58
- "Argentina": "es",
59
- "Chile": "es",
60
- "Colombia": "es",
61
- "Peru": "es",
62
- "Venezuela": "es",
63
- "New Zealand": "en",
64
- "Bangladesh": "bn",
65
- "Pakistan": "ur",
66
- "Egypt": "ar",
67
- "Morocco": "ar",
68
- "Nigeria": "en",
69
- "Kenya": "sw",
70
- "Ukraine": "uk",
71
- "Croatia": "hr",
72
- "Slovakia": "sk",
73
- "Bulgaria": "bg",
74
- "Serbia": "sr",
75
- "Estonia": "et",
76
- "Latvia": "lv",
77
- "Lithuania": "lt",
78
- "Slovenia": "sl",
79
- "Luxembourg": "fr",
80
- "Malta": "mt",
81
- "Cyprus": "el",
82
- "Iceland": "is"
83
- }
84
-
85
- COUNTRY_LOCATIONS = {
86
- "United States": "United States",
87
- "United Kingdom": "United Kingdom",
88
- "Taiwan": "Taiwan", # 국가명 사용
89
- "Canada": "Canada",
90
- "Australia": "Australia",
91
- "Germany": "Germany",
92
- "France": "France",
93
- "Japan": "Japan",
94
- "South Korea": "South Korea",
95
- "China": "China",
96
- "India": "India",
97
- "Brazil": "Brazil",
98
- "Mexico": "Mexico",
99
- "Russia": "Russia",
100
- "Italy": "Italy",
101
- "Spain": "Spain",
102
- "Netherlands": "Netherlands",
103
- "Singapore": "Singapore",
104
- "Hong Kong": "Hong Kong",
105
- "Indonesia": "Indonesia",
106
- "Malaysia": "Malaysia",
107
- "Philippines": "Philippines",
108
- "Thailand": "Thailand",
109
- "Vietnam": "Vietnam",
110
- "Belgium": "Belgium",
111
- "Denmark": "Denmark",
112
- "Finland": "Finland",
113
- "Ireland": "Ireland",
114
- "Norway": "Norway",
115
- "Poland": "Poland",
116
- "Sweden": "Sweden",
117
- "Switzerland": "Switzerland",
118
- "Austria": "Austria",
119
- "Czech Republic": "Czech Republic",
120
- "Greece": "Greece",
121
- "Hungary": "Hungary",
122
- "Portugal": "Portugal",
123
- "Romania": "Romania",
124
- "Turkey": "Turkey",
125
- "Israel": "Israel",
126
- "Saudi Arabia": "Saudi Arabia",
127
- "United Arab Emirates": "United Arab Emirates",
128
- "South Africa": "South Africa",
129
- "Argentina": "Argentina",
130
- "Chile": "Chile",
131
- "Colombia": "Colombia",
132
- "Peru": "Peru",
133
- "Venezuela": "Venezuela",
134
- "New Zealand": "New Zealand",
135
- "Bangladesh": "Bangladesh",
136
- "Pakistan": "Pakistan",
137
- "Egypt": "Egypt",
138
- "Morocco": "Morocco",
139
- "Nigeria": "Nigeria",
140
- "Kenya": "Kenya",
141
- "Ukraine": "Ukraine",
142
- "Croatia": "Croatia",
143
- "Slovakia": "Slovakia",
144
- "Bulgaria": "Bulgaria",
145
- "Serbia": "Serbia",
146
- "Estonia": "Estonia",
147
- "Latvia": "Latvia",
148
- "Lithuania": "Lithuania",
149
- "Slovenia": "Slovenia",
150
- "Luxembourg": "Luxembourg",
151
- "Malta": "Malta",
152
- "Cyprus": "Cyprus",
153
- "Iceland": "Iceland"
154
- }
155
-
156
- MAJOR_COUNTRIES = list(COUNTRY_LOCATIONS.keys())
157
-
158
- def translate_query(query, country):
159
- try:
160
- # 영어 입력 확인
161
- if is_english(query):
162
- print(f"영어 검색어 감지 - 원본 사용: {query}")
163
- return query
164
-
165
- # 선택된 국가가 번역 지원 국가인 경우
166
- if country in COUNTRY_LANGUAGES:
167
- # South Korea 선택시 한글 입력은 그대로 사용
168
- if country == "South Korea":
169
- print(f"한국 선택 - 원본 사용: {query}")
170
- return query
171
-
172
- target_lang = COUNTRY_LANGUAGES[country]
173
- print(f"번역 시도: {query} -> {country}({target_lang})")
174
-
175
- url = f"https://translate.googleapis.com/translate_a/single"
176
- params = {
177
- "client": "gtx",
178
- "sl": "auto",
179
- "tl": target_lang,
180
- "dt": "t",
181
- "q": query
182
- }
183
-
184
- response = requests.get(url, params=params)
185
- translated_text = response.json()[0][0][0]
186
- print(f"번역 완료: {query} -> {translated_text} ({country})")
187
- return translated_text
188
-
189
- return query
190
-
191
- except Exception as e:
192
- print(f"번역 오류: {str(e)}")
193
- return query
194
-
195
- def translate_to_korean(text):
196
- try:
197
- url = "https://translate.googleapis.com/translate_a/single"
198
- params = {
199
- "client": "gtx",
200
- "sl": "auto",
201
- "tl": "ko",
202
- "dt": "t",
203
- "q": text
204
- }
205
-
206
- response = requests.get(url, params=params)
207
- translated_text = response.json()[0][0][0]
208
- return translated_text
209
- except Exception as e:
210
- print(f"한글 번역 오류: {str(e)}")
211
- return text
212
-
213
- def is_english(text):
214
- return all(ord(char) < 128 for char in text.replace(' ', '').replace('-', '').replace('_', ''))
215
-
216
- def is_korean(text):
217
- return any('\uAC00' <= char <= '\uD7A3' for char in text)
218
-
219
- def search_serphouse(query, country, page=1, num_result=10):
220
- url = "https://api.serphouse.com/serp/live"
221
-
222
- now = datetime.utcnow()
223
- yesterday = now - timedelta(days=1)
224
- date_range = f"{yesterday.strftime('%Y-%m-%d')},{now.strftime('%Y-%m-%d')}"
225
-
226
- translated_query = translate_query(query, country)
227
- print(f"Original query: {query}")
228
- print(f"Translated query: {translated_query}")
229
-
230
- payload = {
231
- "data": {
232
- "q": translated_query,
233
- "domain": "google.com",
234
- "loc": COUNTRY_LOCATIONS.get(country, "United States"),
235
- "lang": COUNTRY_LANGUAGES.get(country, "en"),
236
- "device": "desktop",
237
- "serp_type": "news",
238
- "page": "1",
239
- "num": "10",
240
- "date_range": date_range,
241
- "sort_by": "date"
242
- }
243
- }
244
-
245
- headers = {
246
- "accept": "application/json",
247
- "content-type": "application/json",
248
- "authorization": f"Bearer {API_KEY}"
249
- }
250
-
251
- try:
252
- response = requests.post(url, json=payload, headers=headers)
253
- print("Request payload:", json.dumps(payload, indent=2, ensure_ascii=False))
254
- print("Response status:", response.status_code)
255
-
256
- response.raise_for_status()
257
- return {"results": response.json(), "translated_query": translated_query}
258
- except requests.RequestException as e:
259
- return {"error": f"Error: {str(e)}", "translated_query": query}
260
-
261
- def format_results_from_raw(response_data):
262
- if "error" in response_data:
263
- return "Error: " + response_data["error"], []
264
-
265
- try:
266
- results = response_data["results"]
267
- translated_query = response_data["translated_query"]
268
-
269
- news_results = results.get('results', {}).get('results', {}).get('news', [])
270
- if not news_results:
271
- return "검색 결과가 없습니다.", []
272
-
273
- articles = []
274
- for idx, result in enumerate(news_results, 1):
275
- articles.append({
276
- "index": idx,
277
- "title": result.get("title", "제목 없음"),
278
- "link": result.get("url", result.get("link", "#")),
279
- "snippet": result.get("snippet", "내용 없음"),
280
- "channel": result.get("channel", result.get("source", "알 수 없음")),
281
- "time": result.get("time", result.get("date", "알 수 없는 시간")),
282
- "image_url": result.get("img", result.get("thumbnail", "")),
283
- "translated_query": translated_query
284
- })
285
- return "", articles
286
- except Exception as e:
287
- return f"결과 처리 중 오류 발생: {str(e)}", []
288
-
289
- def serphouse_search(query, country):
290
- response_data = search_serphouse(query, country)
291
- return format_results_from_raw(response_data)
292
-
293
- css = """
294
- footer {visibility: hidden;}
295
- """
296
-
297
- with gr.Blocks(theme="Nymbo/Nymbo_Theme", css=css, title="NewsAI 서비스") as iface:
298
- gr.Markdown("검색어를 입력하고 원하는 국가(67개국)를 선택하면, 검색어와 일치하는 24시간 이내 뉴스를 최대 100개 출력합니다.")
299
- gr.Markdown("국가 선택후 검색어에 '한글'을 입력하면 현지 언어로 번역되어 검색합니다. 예: 'Taiwan' 국가 선택후 '삼성' 입력시 '三星'으로 자동 검색")
300
-
301
- with gr.Column():
302
- with gr.Row():
303
- query = gr.Textbox(label="검색어")
304
- country = gr.Dropdown(MAJOR_COUNTRIES, label="국가", value="South Korea")
305
-
306
- # 검색 상태 메시지
307
- status_message = gr.Markdown("", visible=True)
308
-
309
- # 번역 결과를 보여주는 컴포넌트
310
- translated_query_display = gr.Markdown(visible=False)
311
-
312
- search_button = gr.Button("검색", variant="primary")
313
-
314
- progress = gr.Progress()
315
- articles_state = gr.State([])
316
-
317
- article_components = []
318
- for i in range(100):
319
- with gr.Group(visible=False) as article_group:
320
- title = gr.Markdown()
321
- image = gr.Image(width=200, height=150)
322
- snippet = gr.Markdown()
323
- info = gr.Markdown()
324
-
325
- article_components.append({
326
- 'group': article_group,
327
- 'title': title,
328
- 'image': image,
329
- 'snippet': snippet,
330
- 'info': info,
331
- 'index': i,
332
- })
333
-
334
- def search_and_display(query, country, articles_state, progress=gr.Progress()):
335
- # 검색 상태 메시지 업데이트
336
- status_msg = "검색을 진행중입니다. 잠시만 기다리세요..."
337
-
338
- progress(0, desc="검색어 번역 중...")
339
-
340
- # 검색어 번역
341
- translated_query = translate_query(query, country)
342
- translated_display = f"**원본 검색어:** {query}\n**번역된 검색어:** {translated_query}" if translated_query != query else f"**검색어:** {query}"
343
-
344
- progress(0.2, desc="검색 시작...")
345
- error_message, articles = serphouse_search(query, country)
346
- progress(0.5, desc="결과 처리 중...")
347
-
348
- outputs = []
349
- outputs.append(gr.update(value=status_msg, visible=True)) # 상태 메시지
350
- outputs.append(gr.update(value=translated_display, visible=True)) # 번역 결과
351
-
352
- if error_message:
353
- outputs.append(gr.update(value=error_message, visible=True))
354
- for comp in article_components:
355
- outputs.extend([
356
- gr.update(visible=False), gr.update(), gr.update(),
357
- gr.update(), gr.update()
358
- ])
359
- articles_state = []
360
- else:
361
- outputs.append(gr.update(value="", visible=False))
362
- total_articles = len(articles)
363
- for idx, comp in enumerate(article_components):
364
- progress((idx + 1) / total_articles, desc=f"결과 표시 중... {idx + 1}/{total_articles}")
365
- if idx < len(articles):
366
- article = articles[idx]
367
- image_url = article['image_url']
368
- image_update = gr.update(value=image_url, visible=True) if image_url and not image_url.startswith('data:image') else gr.update(value=None, visible=False)
369
-
370
- # 요약 내용 한글 번역
371
- korean_summary = translate_to_korean(article['snippet'])
372
-
373
- outputs.extend([
374
- gr.update(visible=True),
375
- gr.update(value=f"### [{article['title']}]({article['link']})"),
376
- image_update,
377
- gr.update(value=f"**요약:** {article['snippet']}\n\n**한글 요약:** {korean_summary}"),
378
- gr.update(value=f"**출처:** {article['channel']} | **시간:** {article['time']}")
379
- ])
380
- else:
381
- outputs.extend([
382
- gr.update(visible=False), gr.update(), gr.update(),
383
- gr.update(), gr.update()
384
- ])
385
- articles_state = articles
386
-
387
- progress(1.0, desc="완료!")
388
- outputs.append(articles_state)
389
-
390
- # 검색 완료 후 상태 메시지 숨김
391
- outputs[0] = gr.update(value="", visible=False)
392
-
393
- return outputs
394
-
395
- search_outputs = [
396
- status_message,
397
- translated_query_display,
398
- gr.Markdown(visible=False)
399
- ]
400
-
401
- for comp in article_components:
402
- search_outputs.extend([
403
- comp['group'], comp['title'], comp['image'],
404
- comp['snippet'], comp['info']
405
- ])
406
- search_outputs.append(articles_state)
407
-
408
- search_button.click(
409
- search_and_display,
410
- inputs=[query, country, articles_state],
411
- outputs=search_outputs,
412
- show_progress=True
413
- )
414
-
415
- iface.launch(auth=("it1","chosun1"))