ginipick commited on
Commit
8b8afd7
Β·
verified Β·
1 Parent(s): 73cef2e

Upload 2 files

Browse files
Files changed (2) hide show
  1. app (33).py +889 -0
  2. requirements (14).txt +16 -0
app (33).py ADDED
@@ -0,0 +1,889 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ──────────────────────────────── Imports ────────────────────────────────
2
+ import os, json, re, logging, requests, markdown, time, io
3
+ from datetime import datetime
4
+
5
+ import streamlit as st
6
+ from openai import OpenAI # OpenAI 라이브러리
7
+
8
+ from gradio_client import Client
9
+ import pandas as pd
10
+ import PyPDF2 # For handling PDF files
11
+
12
+ # ──────────────────────────────── Environment Variables / Constants ─────────────────────────
13
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
14
+ BRAVE_KEY = os.getenv("SERPHOUSE_API_KEY", "") # Keep this name
15
+ BRAVE_ENDPOINT = "https://api.search.brave.com/res/v1/web/search"
16
+ IMAGE_API_URL = "http://211.233.58.201:7896"
17
+ MAX_TOKENS = 7999
18
+
19
+ # Blog template and style definitions (in English)
20
+ BLOG_TEMPLATES = {
21
+ "ginigen": "Recommended style by Ginigen",
22
+ "standard": "Standard 8-step framework blog",
23
+ "tutorial": "Step-by-step tutorial format",
24
+ "review": "Product/service review format",
25
+ "storytelling": "Storytelling format",
26
+ "seo_optimized": "SEO-optimized blog"
27
+ }
28
+
29
+ BLOG_TONES = {
30
+ "professional": "Professional and formal tone",
31
+ "casual": "Friendly and conversational tone",
32
+ "humorous": "Humorous approach",
33
+ "storytelling": "Story-driven approach"
34
+ }
35
+
36
+ # Example blog topics
37
+ EXAMPLE_TOPICS = {
38
+ "example1": "Changes to the real estate tax system in 2025: Impact on average households and tax-saving strategies",
39
+ "example2": "Summer festivals in 2025: A comprehensive guide to major regional events and hidden attractions",
40
+ "example3": "Emerging industries to watch in 2025: An investment guide focused on AI opportunities"
41
+ }
42
+
43
+ # ──────────────────────────────── Logging ────────────────────────────────
44
+ logging.basicConfig(level=logging.INFO,
45
+ format="%(asctime)s - %(levelname)s - %(message)s")
46
+
47
+ # ──────────────────────────────── OpenAI Client ──────────────────────────
48
+
49
+ # OpenAI ν΄λΌμ΄μ–ΈνŠΈμ— νƒ€μž„μ•„μ›ƒκ³Ό μž¬μ‹œλ„ 둜직 μΆ”κ°€
50
+ @st.cache_resource
51
+ def get_openai_client():
52
+ """Create an OpenAI client with timeout and retry settings."""
53
+ if not OPENAI_API_KEY:
54
+ raise RuntimeError("⚠️ OPENAI_API_KEY ν™˜κ²½ λ³€μˆ˜κ°€ μ„€μ •λ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€.")
55
+ return OpenAI(
56
+ api_key=OPENAI_API_KEY,
57
+ timeout=60.0, # νƒ€μž„μ•„μ›ƒ 60초둜 μ„€μ •
58
+ max_retries=3 # μž¬μ‹œλ„ 횟수 3회둜 μ„€μ •
59
+ )
60
+
61
+ # ──────────────────────────────── Blog Creation System Prompt ─────────────
62
+ def get_system_prompt(template="ginigen", tone="professional", word_count=1750, include_search_results=False, include_uploaded_files=False) -> str:
63
+ """
64
+ Generate a system prompt that includes:
65
+ - The 8-step blog writing framework
66
+ - The selected template and tone
67
+ - Guidelines for using web search results and uploaded files
68
+ """
69
+
70
+ # Ginigen recommended style prompt (English version)
71
+ ginigen_prompt = """
72
+ ## 🌟 Professional Blogger System Prompt
73
+
74
+ ### βœ… Official 8-step Prompt
75
+
76
+ Follow these 8 steps exactly in order to write the blog post:
77
+
78
+ ### 1. Start with a greeting and empathy
79
+ - Open with a friendly tone that draws the reader in
80
+ - Ask questions or present scenarios that resonate with the reader's real-life concerns
81
+
82
+ ### 2. Clearly present the problem
83
+ - Pinpoint the exact and realistic problem the reader is facing
84
+ - Emphasize the seriousness or urgency of this problem to maintain interest
85
+
86
+ ### 3. Analyze the cause of the problem to build credibility
87
+ - Explain the causes of the problem logically
88
+ - Clearly and specifically present your analysis so the reader can understand it easily
89
+ - Include data, examples, or references if necessary
90
+
91
+ ### 4. Offer a concrete solution
92
+ - Provide specific, actionable steps to solve the problem
93
+ - Give tips, strategies, and guidelines so the reader can implement them right away
94
+
95
+ ### 5. Provide social proof
96
+ - Include real success stories, reviews, user experiences, or data
97
+ - Keep details factual and believable so the reader can trust the content
98
+
99
+ ### 6. Call to action (CTA)
100
+ - Encourage the reader to take specific actions immediately
101
+ - Use urgent language such as "right now," "from today," or "immediately" to drive action
102
+
103
+ ### 7. Add constraints or warnings to increase authenticity
104
+ - Acknowledge that the solution might not work for everyone
105
+ - Show sincerity and scarcity, which boosts trust
106
+
107
+ ### 8. Express gratitude and guide them to further connection
108
+ - Thank the reader for their time
109
+ - Provide a natural lead-in to the next post, or ask for comments/subscriptions
110
+
111
+ ---
112
+
113
+ ### 🚩 Writing Style Tips
114
+ - Maintain a friendly and human-like tone
115
+ - Frequently use questions and a conversational style to engage the reader
116
+ - Use clear headings, horizontal lines, bullet points, or numbered lists for readability
117
+ - Include real-life examples and specific data where possible
118
+ """
119
+
120
+ # Standard 8-step framework (English version)
121
+ base_prompt = """
122
+ You are an expert in writing professional blog posts. For every blog writing request, strictly follow this 8-step framework to produce a coherent, engaging post:
123
+
124
+ Reader Connection Phase
125
+ 1.1. Friendly greeting to build rapport
126
+ 1.2. Reflect actual reader concerns through introductory questions
127
+ 1.3. Stimulate immediate interest in the topic
128
+
129
+ Problem Definition Phase
130
+ 2.1. Define the reader's pain points in detail
131
+ 2.2. Analyze the urgency and impact of the problem
132
+ 2.3. Build a consensus on why it needs to be solved
133
+
134
+ Establish Expertise Phase
135
+ 3.1. Analyze based on objective data
136
+ 3.2. Cite expert views and research findings
137
+ 3.3. Use real-life examples to further clarify the issue
138
+
139
+ Solution Phase
140
+ 4.1. Provide step-by-step guidance
141
+ 4.2. Suggest practical tips that can be applied immediately
142
+ 4.3. Mention potential obstacles and how to overcome them
143
+
144
+ Build Trust Phase
145
+ 5.1. Present actual success stories
146
+ 5.2. Quote real user feedback
147
+ 5.3. Use objective data to prove effectiveness
148
+
149
+ Action Phase
150
+ 6.1. Suggest the first clear step the reader can take
151
+ 6.2. Urge timely action by emphasizing urgency
152
+ 6.3. Motivate by highlighting incentives or benefits
153
+
154
+ Authenticity Phase
155
+ 7.1. Transparently disclose any limits of the solution
156
+ 7.2. Admit that individual experiences may vary
157
+ 7.3. Mention prerequisites or cautionary points
158
+
159
+ Relationship Continuation Phase
160
+ 8.1. Conclude with sincere gratitude
161
+ 8.2. Preview upcoming content to build anticipation
162
+ 8.3. Provide channels for further communication
163
+ """
164
+
165
+ # Additional guidelines for each template
166
+ template_guides = {
167
+ "tutorial": """
168
+ This blog should be in a tutorial style:
169
+ - Clearly state the goal and the final outcome first
170
+ - Provide step-by-step explanations with clear separations
171
+ - Indicate where images could be inserted for each step
172
+ - Mention approximate time requirements and difficulty level
173
+ - List necessary tools or prerequisite knowledge
174
+ - Give troubleshooting tips and common mistakes to avoid
175
+ - Conclude with suggestions for next steps or advanced applications
176
+ """,
177
+ "review": """
178
+ This blog should be in a review style:
179
+ - Separate objective facts from subjective opinions
180
+ - Clearly list your evaluation criteria
181
+ - Discuss both pros and cons in a balanced way
182
+ - Compare with similar products/services
183
+ - Specify the target audience for whom it is suitable
184
+ - Provide concrete use cases and outcomes
185
+ - Conclude with a final recommendation or alternatives
186
+ """,
187
+ "storytelling": """
188
+ This blog should be in a storytelling style:
189
+ - Start with a real or hypothetical person or case
190
+ - Emphasize emotional connection with the problem scenario
191
+ - Follow a narrative structure centered on conflict and resolution
192
+ - Include meaningful insights or lessons learned
193
+ - Maintain an emotional thread the reader can relate to
194
+ - Balance storytelling with useful information
195
+ - Encourage the reader to reflect on their own story
196
+ """,
197
+ "seo_optimized": """
198
+ This blog should be SEO-optimized:
199
+ - Include the main keyword in the title, headings, and first paragraph
200
+ - Spread related keywords naturally throughout the text
201
+ - Keep paragraphs around 300-500 characters
202
+ - Use question-based subheadings
203
+ - Make use of lists, tables, and bold text to diversify formatting
204
+ - Indicate where internal links could be inserted
205
+ - Provide sufficient content of at least 2000-3000 characters
206
+ """
207
+ }
208
+
209
+ # Additional guidelines for each tone
210
+ tone_guides = {
211
+ "professional": "Use a professional, authoritative voice. Clearly explain any technical terms and present data or research to maintain a logical flow.",
212
+ "casual": "Use a relaxed, conversational style. Employ personal experiences, relatable examples, and a friendly voice (e.g., 'It's super useful!').",
213
+ "humorous": "Use humor and witty expressions. Add funny analogies or jokes while preserving accuracy and usefulness.",
214
+ "storytelling": "Write as if telling a story, with emotional depth and narrative flow. Incorporate characters, settings, conflicts, and resolutions."
215
+ }
216
+
217
+ # Guidelines for using search results
218
+ search_guide = """
219
+ Guidelines for Using Search Results:
220
+ - Accurately incorporate key information from the search results into the blog
221
+ - Include recent data, statistics, and case studies from the search results
222
+ - When quoting, specify the source within the text (e.g., "According to XYZ website...")
223
+ - At the end of the blog, add a "References" section and list major sources with links
224
+ - If there are conflicting pieces of information, present multiple perspectives
225
+ - Make sure to reflect the latest trends and data from the search results
226
+ """
227
+
228
+ # Guidelines for using uploaded files
229
+ upload_guide = """
230
+ Guidelines for Using Uploaded Files (Highest Priority):
231
+ - The uploaded files must be a main source of information for the blog
232
+ - Carefully examine the data, statistics, or examples in the file and integrate them
233
+ - Directly quote and thoroughly explain any key figures or claims from the file
234
+ - Highlight the file content as a crucial aspect of the blog
235
+ - Mention the source clearly, e.g., "According to the uploaded data..."
236
+ - For CSV files, detail important stats or numerical data in the blog
237
+ - For PDF files, quote crucial segments or statements
238
+ - For text files, integrate relevant content effectively
239
+ - Even if the file content seems tangential, do your best to connect it to the blog topic
240
+ - Keep consistency throughout and ensure the file's data is appropriately reflected
241
+ """
242
+
243
+ # Choose base prompt
244
+ if template == "ginigen":
245
+ final_prompt = ginigen_prompt
246
+ else:
247
+ final_prompt = base_prompt
248
+
249
+ # If the user chose a specific template (and not ginigen), append the relevant guidelines
250
+ if template != "ginigen" and template in template_guides:
251
+ final_prompt += "\n" + template_guides[template]
252
+
253
+ # If a specific tone is selected, append that guideline
254
+ if tone in tone_guides:
255
+ final_prompt += f"\n\nTone and Manner: {tone_guides[tone]}"
256
+
257
+ # If web search results should be included
258
+ if include_search_results:
259
+ final_prompt += f"\n\n{search_guide}"
260
+
261
+ # If uploaded files should be included
262
+ if include_uploaded_files:
263
+ final_prompt += f"\n\n{upload_guide}"
264
+
265
+ # Word count guidelines
266
+ final_prompt += (
267
+ f"\n\nWriting Requirements:\n"
268
+ f"9.1. Word Count: around {word_count-250}-{word_count+250} characters\n"
269
+ f"9.2. Paragraph Length: 3-4 sentences each\n"
270
+ f"9.3. Visual Cues: Use subheadings, separators, and bullet/numbered lists\n"
271
+ f"9.4. Data: Cite all sources\n"
272
+ f"9.5. Readability: Use clear paragraph breaks and highlights where necessary"
273
+ )
274
+
275
+ return final_prompt
276
+
277
+ # ──────────────────────────────── Brave Search API ────────────────────────
278
+ @st.cache_data(ttl=3600)
279
+ def brave_search(query: str, count: int = 20):
280
+ """
281
+ Call the Brave Web Search API β†’ list[dict]
282
+ Returns fields: index, title, link, snippet, displayed_link
283
+ """
284
+ if not BRAVE_KEY:
285
+ raise RuntimeError("⚠️ SERPHOUSE_API_KEY (Brave API Key) environment variable is empty.")
286
+
287
+ headers = {
288
+ "Accept": "application/json",
289
+ "Accept-Encoding": "gzip",
290
+ "X-Subscription-Token": BRAVE_KEY
291
+ }
292
+ params = {"q": query, "count": str(count)}
293
+
294
+ for attempt in range(3):
295
+ try:
296
+ r = requests.get(BRAVE_ENDPOINT, headers=headers, params=params, timeout=15)
297
+ r.raise_for_status()
298
+ data = r.json()
299
+
300
+ logging.info(f"Brave search result data structure: {list(data.keys())}")
301
+
302
+ raw = data.get("web", {}).get("results") or data.get("results", [])
303
+ if not raw:
304
+ logging.warning(f"No Brave search results found. Response: {data}")
305
+ raise ValueError("No search results found.")
306
+
307
+ arts = []
308
+ for i, res in enumerate(raw[:count], 1):
309
+ url = res.get("url", res.get("link", ""))
310
+ host = re.sub(r"https?://(www\.)?", "", url).split("/")[0]
311
+ arts.append({
312
+ "index": i,
313
+ "title": res.get("title", "No title"),
314
+ "link": url,
315
+ "snippet": res.get("description", res.get("text", "No snippet")),
316
+ "displayed_link": host
317
+ })
318
+
319
+ logging.info(f"Brave search success: {len(arts)} results")
320
+ return arts
321
+
322
+ except Exception as e:
323
+ logging.error(f"Brave search failure (attempt {attempt+1}/3): {e}")
324
+ if attempt < 2:
325
+ time.sleep(2)
326
+
327
+ return []
328
+
329
+ def mock_results(query: str) -> str:
330
+ """Fallback search results if API fails"""
331
+ ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
332
+ return (f"# Fallback Search Content (Generated: {ts})\n\n"
333
+ f"The search API request failed. Please generate the blog based on any pre-existing knowledge about '{query}'.\n\n"
334
+ f"You may consider the following points:\n\n"
335
+ f"- Basic concepts and importance of {query}\n"
336
+ f"- Commonly known related statistics or trends\n"
337
+ f"- Typical expert opinions on this subject\n"
338
+ f"- Questions that readers might have\n\n"
339
+ f"Note: This is fallback guidance, not real-time data.\n\n")
340
+
341
+ def do_web_search(query: str) -> str:
342
+ """Perform web search and format the results."""
343
+ try:
344
+ arts = brave_search(query, 20)
345
+ if not arts:
346
+ logging.warning("No search results, using fallback content")
347
+ return mock_results(query)
348
+
349
+ hdr = "# Web Search Results\nUse the information below to enhance the reliability of your blog. When you quote, please cite the source, and add a References section at the end of the blog.\n\n"
350
+ body = "\n".join(
351
+ f"### Result {a['index']}: {a['title']}\n\n{a['snippet']}\n\n"
352
+ f"**Source**: [{a['displayed_link']}]({a['link']})\n\n---\n"
353
+ for a in arts
354
+ )
355
+ return hdr + body
356
+ except Exception as e:
357
+ logging.error(f"Web search process failed: {str(e)}")
358
+ return mock_results(query)
359
+
360
+ # ──────────────────────────────── File Upload Handling ─────────────────────
361
+ def process_text_file(file):
362
+ """Handle text file"""
363
+ try:
364
+ content = file.read()
365
+ file.seek(0)
366
+
367
+ text = content.decode('utf-8', errors='ignore')
368
+ if len(text) > 10000:
369
+ text = text[:9700] + "...(truncated)..."
370
+
371
+ result = f"## Text File: {file.name}\n\n"
372
+ result += text
373
+ return result
374
+ except Exception as e:
375
+ logging.error(f"Error processing text file: {str(e)}")
376
+ return f"Error processing text file: {str(e)}"
377
+
378
+ def process_csv_file(file):
379
+ """Handle CSV file"""
380
+ try:
381
+ content = file.read()
382
+ file.seek(0)
383
+
384
+ df = pd.read_csv(io.BytesIO(content))
385
+ result = f"## CSV File: {file.name}\n\n"
386
+ result += f"- Rows: {len(df)}\n"
387
+ result += f"- Columns: {len(df.columns)}\n"
388
+ result += f"- Column Names: {', '.join(df.columns.tolist())}\n\n"
389
+
390
+ result += "### Data Preview\n\n"
391
+ preview_df = df.head(10)
392
+ try:
393
+ markdown_table = preview_df.to_markdown(index=False)
394
+ if markdown_table:
395
+ result += markdown_table + "\n\n"
396
+ else:
397
+ result += "Unable to display CSV data.\n\n"
398
+ except Exception as e:
399
+ logging.error(f"Markdown table conversion error: {e}")
400
+ result += "Displaying data as text:\n\n"
401
+ result += str(preview_df) + "\n\n"
402
+
403
+ num_cols = df.select_dtypes(include=['number']).columns
404
+ if len(num_cols) > 0:
405
+ result += "### Basic Statistical Information\n\n"
406
+ try:
407
+ stats_df = df[num_cols].describe().round(2)
408
+ stats_markdown = stats_df.to_markdown()
409
+ if stats_markdown:
410
+ result += stats_markdown + "\n\n"
411
+ else:
412
+ result += "Unable to display statistical information.\n\n"
413
+ except Exception as e:
414
+ logging.error(f"Statistical info conversion error: {e}")
415
+ result += "Unable to generate statistical information.\n\n"
416
+
417
+ return result
418
+ except Exception as e:
419
+ logging.error(f"CSV file processing error: {str(e)}")
420
+ return f"Error processing CSV file: {str(e)}"
421
+
422
+ def process_pdf_file(file):
423
+ """Handle PDF file"""
424
+ try:
425
+ # Read file in bytes
426
+ file_bytes = file.read()
427
+ file.seek(0)
428
+
429
+ # Use PyPDF2
430
+ pdf_file = io.BytesIO(file_bytes)
431
+ reader = PyPDF2.PdfReader(pdf_file, strict=False)
432
+
433
+ # Basic info
434
+ result = f"## PDF File: {file.name}\n\n"
435
+ result += f"- Total pages: {len(reader.pages)}\n\n"
436
+
437
+ # Extract text by page (limit to first 5 pages)
438
+ max_pages = min(5, len(reader.pages))
439
+ all_text = ""
440
+
441
+ for i in range(max_pages):
442
+ try:
443
+ page = reader.pages[i]
444
+ page_text = page.extract_text()
445
+
446
+ current_page_text = f"### Page {i+1}\n\n"
447
+ if page_text and len(page_text.strip()) > 0:
448
+ # Limit to 1500 characters per page
449
+ if len(page_text) > 1500:
450
+ current_page_text += page_text[:1500] + "...(truncated)...\n\n"
451
+ else:
452
+ current_page_text += page_text + "\n\n"
453
+ else:
454
+ current_page_text += "(No text could be extracted from this page)\n\n"
455
+
456
+ all_text += current_page_text
457
+
458
+ # If total text is too long, break
459
+ if len(all_text) > 8000:
460
+ all_text += "...(truncating remaining pages; PDF is too large)...\n\n"
461
+ break
462
+
463
+ except Exception as page_err:
464
+ logging.error(f"Error processing PDF page {i+1}: {str(page_err)}")
465
+ all_text += f"### Page {i+1}\n\n(Error extracting content: {str(page_err)})\n\n"
466
+
467
+ if len(reader.pages) > max_pages:
468
+ all_text += f"\nNote: Only the first {max_pages} pages are shown out of {len(reader.pages)} total.\n\n"
469
+
470
+ result += "### PDF Content\n\n" + all_text
471
+ return result
472
+
473
+ except Exception as e:
474
+ logging.error(f"PDF file processing error: {str(e)}")
475
+ return f"## PDF File: {file.name}\n\nError occurred: {str(e)}\n\nThis PDF file cannot be processed."
476
+
477
+ def process_uploaded_files(files):
478
+ """Combine the contents of all uploaded files into one string."""
479
+ if not files:
480
+ return None
481
+
482
+ result = "# Uploaded File Contents\n\n"
483
+ result += "Below is the content from the files provided by the user. Integrate this data as a main source of information for the blog.\n\n"
484
+
485
+ for file in files:
486
+ try:
487
+ ext = file.name.split('.')[-1].lower()
488
+ if ext == 'txt':
489
+ result += process_text_file(file) + "\n\n---\n\n"
490
+ elif ext == 'csv':
491
+ result += process_csv_file(file) + "\n\n---\n\n"
492
+ elif ext == 'pdf':
493
+ result += process_pdf_file(file) + "\n\n---\n\n"
494
+ else:
495
+ result += f"### Unsupported File: {file.name}\n\n---\n\n"
496
+ except Exception as e:
497
+ logging.error(f"File processing error {file.name}: {e}")
498
+ result += f"### File processing error: {file.name}\n\nError: {e}\n\n---\n\n"
499
+
500
+ return result
501
+
502
+ # ──────────────────────────────── Image & Utility ─────────────────────────
503
+ def generate_image(prompt, w=768, h=768, g=3.5, steps=30, seed=3):
504
+ """Image generation function."""
505
+ if not prompt:
506
+ return None, "Insufficient prompt"
507
+ try:
508
+ res = Client(IMAGE_API_URL).predict(
509
+ prompt=prompt, width=w, height=h, guidance=g,
510
+ inference_steps=steps, seed=seed,
511
+ do_img2img=False, init_image=None,
512
+ image2image_strength=0.8, resize_img=True,
513
+ api_name="/generate_image"
514
+ )
515
+ return res[0], f"Seed: {res[1]}"
516
+ except Exception as e:
517
+ logging.error(e)
518
+ return None, str(e)
519
+
520
+ def extract_image_prompt(blog_text: str, topic: str):
521
+ """
522
+ Generate a single-line English image prompt from the blog content.
523
+ """
524
+ client = get_openai_client()
525
+
526
+ try:
527
+ response = client.chat.completions.create(
528
+ model="gpt-4.1-mini", # 일반적으둜 μ‚¬μš© κ°€λŠ₯ν•œ λͺ¨λΈλ‘œ μ„€μ •
529
+ messages=[
530
+ {"role": "system", "content": "Generate a single-line English image prompt from the following text. Return only the prompt text, nothing else."},
531
+ {"role": "user", "content": f"Topic: {topic}\n\n---\n{blog_text}\n\n---"}
532
+ ],
533
+ temperature=1,
534
+ max_tokens=80,
535
+ top_p=1
536
+ )
537
+
538
+ return response.choices[0].message.content.strip()
539
+ except Exception as e:
540
+ logging.error(f"OpenAI image prompt generation error: {e}")
541
+ return f"A professional photo related to {topic}, high quality"
542
+
543
+ def md_to_html(md: str, title="Ginigen Blog"):
544
+ """Convert Markdown to HTML."""
545
+ return f"<!DOCTYPE html><html><head><title>{title}</title><meta charset='utf-8'></head><body>{markdown.markdown(md)}</body></html>"
546
+
547
+ def keywords(text: str, top=5):
548
+ """Simple keyword extraction."""
549
+ cleaned = re.sub(r"[^κ°€-힣a-zA-Z0-9\s]", "", text)
550
+ return " ".join(cleaned.split()[:top])
551
+
552
+ # ──────────────────────────────── Streamlit UI ────────────────────────────
553
+ def ginigen_app():
554
+ st.title("Ginigen Blog")
555
+
556
+ # Set default session state
557
+ if "ai_model" not in st.session_state:
558
+ st.session_state.ai_model = "gpt-4.1-mini" # κ³ μ • λͺ¨λΈ μ„€μ •
559
+ if "messages" not in st.session_state:
560
+ st.session_state.messages = []
561
+ if "auto_save" not in st.session_state:
562
+ st.session_state.auto_save = True
563
+ if "generate_image" not in st.session_state:
564
+ st.session_state.generate_image = False
565
+ if "web_search_enabled" not in st.session_state:
566
+ st.session_state.web_search_enabled = True
567
+ if "blog_template" not in st.session_state:
568
+ st.session_state.blog_template = "ginigen" # Ginigen recommended style by default
569
+ if "blog_tone" not in st.session_state:
570
+ st.session_state.blog_tone = "professional"
571
+ if "word_count" not in st.session_state:
572
+ st.session_state.word_count = 1750
573
+
574
+ # Sidebar UI
575
+ sb = st.sidebar
576
+ sb.title("Blog Settings")
577
+
578
+ # λͺ¨λΈ 선택 제거 (κ³ μ • λͺ¨λΈ μ‚¬μš©)
579
+
580
+ sb.subheader("Blog Style Settings")
581
+ sb.selectbox(
582
+ "Blog Template",
583
+ options=list(BLOG_TEMPLATES.keys()),
584
+ format_func=lambda x: BLOG_TEMPLATES[x],
585
+ key="blog_template"
586
+ )
587
+
588
+ sb.selectbox(
589
+ "Blog Tone",
590
+ options=list(BLOG_TONES.keys()),
591
+ format_func=lambda x: BLOG_TONES[x],
592
+ key="blog_tone"
593
+ )
594
+
595
+ sb.slider("Blog Length (word count)", 800, 3000, key="word_count")
596
+
597
+
598
+ # Example topics
599
+ sb.subheader("Example Topics")
600
+ c1, c2, c3 = sb.columns(3)
601
+ if c1.button("Real Estate Tax", key="ex1"):
602
+ process_example(EXAMPLE_TOPICS["example1"])
603
+ if c2.button("Summer Festivals", key="ex2"):
604
+ process_example(EXAMPLE_TOPICS["example2"])
605
+ if c3.button("Investment Guide", key="ex3"):
606
+ process_example(EXAMPLE_TOPICS["example3"])
607
+
608
+ sb.subheader("Other Settings")
609
+ sb.toggle("Auto Save", key="auto_save")
610
+ sb.toggle("Auto Image Generation", key="generate_image")
611
+
612
+ web_search_enabled = sb.toggle("Use Web Search", value=st.session_state.web_search_enabled)
613
+ st.session_state.web_search_enabled = web_search_enabled
614
+
615
+ if web_search_enabled:
616
+ st.sidebar.info("βœ… Web search results will be integrated into the blog.")
617
+
618
+ # Download the latest blog (markdown/HTML)
619
+ latest_blog = next(
620
+ (m["content"] for m in reversed(st.session_state.messages)
621
+ if m["role"] == "assistant" and m["content"].strip()),
622
+ None
623
+ )
624
+ if latest_blog:
625
+ title_match = re.search(r"# (.*?)(\n|$)", latest_blog)
626
+ title = title_match.group(1).strip() if title_match else "blog"
627
+ sb.subheader("Download Latest Blog")
628
+ d1, d2 = sb.columns(2)
629
+ d1.download_button("Download as Markdown", latest_blog,
630
+ file_name=f"{title}.md", mime="text/markdown")
631
+ d2.download_button("Download as HTML", md_to_html(latest_blog, title),
632
+ file_name=f"{title}.html", mime="text/html")
633
+
634
+ # JSON conversation record upload
635
+ up = sb.file_uploader("Load Conversation History (.json)", type=["json"], key="json_uploader")
636
+ if up:
637
+ try:
638
+ st.session_state.messages = json.load(up)
639
+ sb.success("Conversation history loaded successfully")
640
+ except Exception as e:
641
+ sb.error(f"Failed to load: {e}")
642
+
643
+ # JSON conversation record download
644
+ if sb.button("Download Conversation as JSON"):
645
+ sb.download_button(
646
+ "Save",
647
+ data=json.dumps(st.session_state.messages, ensure_ascii=False, indent=2),
648
+ file_name="chat_history.json",
649
+ mime="application/json"
650
+ )
651
+
652
+ # File Upload
653
+ st.subheader("File Upload")
654
+ uploaded_files = st.file_uploader(
655
+ "Upload files to be referenced in your blog (txt, csv, pdf)",
656
+ type=["txt", "csv", "pdf"],
657
+ accept_multiple_files=True,
658
+ key="file_uploader"
659
+ )
660
+
661
+ if uploaded_files:
662
+ file_count = len(uploaded_files)
663
+ st.success(f"{file_count} files uploaded. They will be referenced in the blog.")
664
+
665
+ with st.expander("Preview Uploaded Files", expanded=False):
666
+ for idx, file in enumerate(uploaded_files):
667
+ st.write(f"**File Name:** {file.name}")
668
+ ext = file.name.split('.')[-1].lower()
669
+
670
+ if ext == 'txt':
671
+ preview = file.read(1000).decode('utf-8', errors='ignore')
672
+ file.seek(0)
673
+ st.text_area(
674
+ f"Preview of {file.name}",
675
+ preview + ("..." if len(preview) >= 1000 else ""),
676
+ height=150
677
+ )
678
+ elif ext == 'csv':
679
+ try:
680
+ df = pd.read_csv(file)
681
+ file.seek(0)
682
+ st.write("CSV Preview (up to 5 rows)")
683
+ st.dataframe(df.head(5))
684
+ except Exception as e:
685
+ st.error(f"CSV preview failed: {e}")
686
+ elif ext == 'pdf':
687
+ try:
688
+ file_bytes = file.read()
689
+ file.seek(0)
690
+
691
+ pdf_file = io.BytesIO(file_bytes)
692
+ reader = PyPDF2.PdfReader(pdf_file, strict=False)
693
+
694
+ pc = len(reader.pages)
695
+ st.write(f"PDF File: {pc} pages")
696
+
697
+ if pc > 0:
698
+ try:
699
+ page_text = reader.pages[0].extract_text()
700
+ preview = page_text[:500] if page_text else "(No text extracted)"
701
+ st.text_area("Preview of the first page", preview + "...", height=150)
702
+ except:
703
+ st.warning("Failed to extract text from the first page")
704
+ except Exception as e:
705
+ st.error(f"PDF preview failed: {e}")
706
+
707
+ if idx < file_count - 1:
708
+ st.divider()
709
+
710
+ # Display existing messages
711
+ for m in st.session_state.messages:
712
+ with st.chat_message(m["role"]):
713
+ st.markdown(m["content"])
714
+ if "image" in m:
715
+ st.image(m["image"], caption=m.get("image_caption", ""))
716
+
717
+ # User input
718
+ prompt = st.chat_input("Enter a blog topic or keywords.")
719
+ if prompt:
720
+ process_input(prompt, uploaded_files)
721
+
722
+ def process_example(topic):
723
+ """Process the selected example topic."""
724
+ process_input(topic, [])
725
+
726
+ def process_input(prompt: str, uploaded_files):
727
+ # Add user's message
728
+ if not any(m["role"] == "user" and m["content"] == prompt for m in st.session_state.messages):
729
+ st.session_state.messages.append({"role": "user", "content": prompt})
730
+
731
+ with st.chat_message("user"):
732
+ st.markdown(prompt)
733
+
734
+ with st.chat_message("assistant"):
735
+ placeholder = st.empty()
736
+ message_placeholder = st.empty()
737
+ full_response = ""
738
+
739
+ use_web_search = st.session_state.web_search_enabled
740
+ has_uploaded_files = bool(uploaded_files) and len(uploaded_files) > 0
741
+
742
+ try:
743
+ # μƒνƒœ ν‘œμ‹œλ₯Ό μœ„ν•œ μƒνƒœ μ»΄ν¬λ„ŒνŠΈ
744
+ status = st.status("Preparing to generate blog...")
745
+ status.update(label="Initializing client...")
746
+
747
+ client = get_openai_client()
748
+
749
+ # Prepare conversation messages
750
+ messages = []
751
+
752
+ # Web search
753
+ search_content = None
754
+ if use_web_search:
755
+ status.update(label="Performing web search...")
756
+ with st.spinner("Searching the web..."):
757
+ search_content = do_web_search(keywords(prompt, top=5))
758
+
759
+ # Process uploaded files β†’ content
760
+ file_content = None
761
+ if has_uploaded_files:
762
+ status.update(label="Processing uploaded files...")
763
+ with st.spinner("Analyzing files..."):
764
+ file_content = process_uploaded_files(uploaded_files)
765
+
766
+ # Build system prompt
767
+ status.update(label="Preparing blog draft...")
768
+ sys_prompt = get_system_prompt(
769
+ template=st.session_state.blog_template,
770
+ tone=st.session_state.blog_tone,
771
+ word_count=st.session_state.word_count,
772
+ include_search_results=use_web_search,
773
+ include_uploaded_files=has_uploaded_files
774
+ )
775
+
776
+ # OpenAI API 호좜 μ€€λΉ„
777
+ status.update(label="Writing blog content...")
778
+
779
+ # λ©”μ‹œμ§€ ꡬ성
780
+ api_messages = [
781
+ {"role": "system", "content": sys_prompt}
782
+ ]
783
+
784
+ user_content = prompt
785
+
786
+ # 검색 κ²°κ³Όκ°€ 있으면 μ‚¬μš©μž ν”„λ‘¬ν”„νŠΈμ— μΆ”κ°€
787
+ if search_content:
788
+ user_content += "\n\n" + search_content
789
+
790
+ # 파일 λ‚΄μš©μ΄ 있으면 μ‚¬μš©μž ν”„λ‘¬ν”„νŠΈμ— μΆ”κ°€
791
+ if file_content:
792
+ user_content += "\n\n" + file_content
793
+
794
+ # μ‚¬μš©μž λ©”μ‹œμ§€ μΆ”κ°€
795
+ api_messages.append({"role": "user", "content": user_content})
796
+
797
+ # OpenAI API 슀트리밍 호좜 - κ³ μ • λͺ¨λΈ "gpt-4.1-mini" μ‚¬μš©
798
+ try:
799
+ # 슀트리밍 λ°©μ‹μœΌλ‘œ API 호좜
800
+ stream = client.chat.completions.create(
801
+ model="gpt-4.1-mini", # κ³ μ • λͺ¨λΈ μ‚¬μš©
802
+ messages=api_messages,
803
+ temperature=1,
804
+ max_tokens=MAX_TOKENS,
805
+ top_p=1,
806
+ stream=True # 슀트리밍 ν™œμ„±ν™”
807
+ )
808
+
809
+ # 슀트리밍 응닡 처리
810
+ for chunk in stream:
811
+ if chunk.choices and len(chunk.choices) > 0 and chunk.choices[0].delta.content is not None:
812
+ content_delta = chunk.choices[0].delta.content
813
+ full_response += content_delta
814
+ message_placeholder.markdown(full_response + "β–Œ")
815
+
816
+ # μ΅œμ’… 응닡 ν‘œμ‹œ (μ»€μ„œ 제거)
817
+ message_placeholder.markdown(full_response)
818
+ status.update(label="Blog completed!", state="complete")
819
+
820
+ except Exception as api_error:
821
+ error_message = str(api_error)
822
+ logging.error(f"API error: {error_message}")
823
+ status.update(label=f"Error: {error_message}", state="error")
824
+ raise Exception(f"Blog generation error: {error_message}")
825
+
826
+ # 이미지 생성
827
+ answer_entry_saved = False
828
+ if st.session_state.generate_image and full_response:
829
+ with st.spinner("Generating image..."):
830
+ try:
831
+ ip = extract_image_prompt(full_response, prompt)
832
+ img, cap = generate_image(ip)
833
+ if img:
834
+ st.image(img, caption=cap)
835
+ st.session_state.messages.append({
836
+ "role": "assistant",
837
+ "content": full_response,
838
+ "image": img,
839
+ "image_caption": cap
840
+ })
841
+ answer_entry_saved = True
842
+ except Exception as img_error:
843
+ logging.error(f"Image generation error: {str(img_error)}")
844
+ st.warning("이미지 생성에 μ‹€νŒ¨ν–ˆμŠ΅λ‹ˆλ‹€. λΈ”λ‘œκ·Έ μ½˜ν…μΈ λ§Œ μ €μž₯λ©λ‹ˆλ‹€.")
845
+
846
+ # Save the answer if not saved above
847
+ if not answer_entry_saved and full_response:
848
+ st.session_state.messages.append({"role": "assistant", "content": full_response})
849
+
850
+ # Download buttons
851
+ if full_response:
852
+ st.subheader("Download This Blog")
853
+ c1, c2 = st.columns(2)
854
+ c1.download_button(
855
+ "Markdown",
856
+ data=full_response,
857
+ file_name=f"{prompt[:30]}.md",
858
+ mime="text/markdown"
859
+ )
860
+ c2.download_button(
861
+ "HTML",
862
+ data=md_to_html(full_response, prompt[:30]),
863
+ file_name=f"{prompt[:30]}.html",
864
+ mime="text/html"
865
+ )
866
+
867
+ # Auto save
868
+ if st.session_state.auto_save and st.session_state.messages:
869
+ try:
870
+ fn = f"chat_history_auto_{datetime.now():%Y%m%d_%H%M%S}.json"
871
+ with open(fn, "w", encoding="utf-8") as fp:
872
+ json.dump(st.session_state.messages, fp, ensure_ascii=False, indent=2)
873
+ except Exception as e:
874
+ logging.error(f"Auto-save failed: {e}")
875
+
876
+ except Exception as e:
877
+ error_message = str(e)
878
+ placeholder.error(f"An error occurred: {error_message}")
879
+ logging.error(f"Process input error: {error_message}")
880
+ ans = f"An error occurred while processing your request: {error_message}"
881
+ st.session_state.messages.append({"role": "assistant", "content": ans})
882
+
883
+
884
+ # ──────────────────────────────── main ────────────────────────────────────
885
+ def main():
886
+ ginigen_app()
887
+
888
+ if __name__ == "__main__":
889
+ main()
requirements (14).txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ anthropic
3
+ gradio
4
+ gradio-client
5
+ httpx
6
+ requests
7
+ python-dotenv
8
+ markdown
9
+ weasyprint
10
+ beautifulsoup4>=4.12.0
11
+ lxml>=4.9.0
12
+ pillow
13
+ pandas
14
+ PyPDF2
15
+ openai
16
+ tabulate