File size: 16,651 Bytes
e373289
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c8c583c
e373289
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5164922
e373289
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c8c583c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e373289
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c8c583c
e373289
c8c583c
e373289
 
 
 
 
 
 
 
 
 
 
 
 
 
c8c583c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e373289
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c8c583c
 
 
e373289
c8c583c
e373289
 
c8c583c
e373289
 
 
c8c583c
 
 
 
 
 
 
 
e373289
 
 
 
c8c583c
 
 
e373289
 
 
 
 
 
 
 
 
 
c8c583c
 
 
 
 
 
 
 
 
 
 
e373289
 
c8c583c
 
e373289
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
import json
import os
import time
import uuid
import tempfile
from PIL import Image, ImageDraw, ImageFont
import gradio as gr
import base64
import mimetypes

from google import genai
from google.genai import types

def save_binary_file(file_name, data):
    with open(file_name, "wb") as f:
        f.write(data)

def generate(text, file_name, api_key, model="gemini-2.0-flash-exp"):
    # Initialize client using provided api_key (or fallback to env variable)
    client = genai.Client(api_key=(api_key.strip() if api_key and api_key.strip() != ""
                                     else os.environ.get("GEMINI_API_KEY")))
    
    try:
        print("Uploading file to Gemini API...")
        files = [ client.files.upload(file=file_name) ]
        
        contents = [
            types.Content(
                role="user",
                parts=[
                    types.Part.from_uri(
                        file_uri=files[0].uri,
                        mime_type=files[0].mime_type,
                    ),
                    types.Part.from_text(text=text),
                ],
            ),
        ]
        generate_content_config = types.GenerateContentConfig(
            temperature=0,  # Lower temperature for more consistent, conservative results
            top_p=0.92,
            max_output_tokens=8192,
            response_modalities=["image", "text"],
            response_mime_type="text/plain",
            # Additional parameters to encourage subtle, natural results
            safety_settings=[
                {
                    "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
                    "threshold": "BLOCK_MEDIUM_AND_ABOVE"
                }
            ]
        )

        text_response = ""
        image_path = None
        # Create a temporary file to potentially store image data
        with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
            temp_path = tmp.name
            print("Sending request to Gemini API...")
            # Add a timeout to prevent indefinite waiting
            start_time = time.time()
            max_wait_time = 60  # Maximum wait time in seconds
            
            try:
                stream = client.models.generate_content_stream(
                    model=model,
                    contents=contents,
                    config=generate_content_config,
                )
                
                for chunk in stream:
                    # Check for timeout
                    if time.time() - start_time > max_wait_time:
                        print("Gemini API request timed out after", max_wait_time, "seconds")
                        break
                        
                    if not chunk.candidates or not chunk.candidates[0].content or not chunk.candidates[0].content.parts:
                        continue
                    candidate = chunk.candidates[0].content.parts[0]
                    # Check for inline image data
                    if candidate.inline_data:
                        save_binary_file(temp_path, candidate.inline_data.data)
                        print(f"Smile enhancement image generated: {temp_path}")
                        image_path = temp_path
                        # If an image is found, we assume that is the desired output.
                        break
                    else:
                        # Accumulate text response if no inline_data is present.
                        text_response += chunk.text + "\n"
                        print("Received text response from Gemini API")
            except Exception as e:
                print(f"Error during content generation: {str(e)}")
                # Continue with the function, returning empty responses
    except Exception as e:
        print(f"Error in Gemini API setup: {str(e)}")
        return None, f"Error: {str(e)}"
    finally:
        # Always clean up files
        try:
            if 'files' in locals() and files:
                del files
        except:
            pass
            
    return image_path, text_response

def assess_image_quality(original_image, enhanced_image):
    """
    Assesses the quality of the enhanced image based on specific criteria.
    Returns a tuple of (is_acceptable, feedback_message)
    """
    try:
        # Check if enhanced image exists
        if enhanced_image is None:
            return False, "No enhanced image generated"
        
        # Image dimension checks
        if enhanced_image.size[0] < 100 or enhanced_image.size[1] < 100:
            return False, "Enhanced image appears to be too small or improperly sized"
            
        # Check that the enhanced image has similar dimensions to the original
        # This helps ensure facial proportions are maintained
        width_diff = abs(original_image.size[0] - enhanced_image.size[0])
        height_diff = abs(original_image.size[1] - enhanced_image.size[1])
        
        # If dimensions are significantly different, it suggests the image proportions changed
        if width_diff > 20 or height_diff > 20:
            return False, "Enhanced image dimensions differ significantly from original, suggesting facial proportions may have changed"
        
        # Check image has proper RGB channels for natural skin tones
        if enhanced_image.mode != 'RGB':
            return False, "Enhanced image does not have the correct color mode"
        
        # For now, we'll do basic checks and assume the model follows guidelines
        return True, "Image passes quality assessment criteria"
    except Exception as e:
        print(f"Error in quality assessment: {str(e)}")
        # Default to not accepting the image if assessment fails
        return False, f"Assessment error: {str(e)}"

def compare_image_results(results_list):
    """
    Compares multiple generated images and returns the best one.
    If no valid results, returns None.
    """
    if not results_list or all(img is None for img in results_list):
        return None
        
    # Filter out None values
    valid_results = [img for img in results_list if img is not None]
    
    if not valid_results:
        return None
    
    # If there's only one valid result, return it
    if len(valid_results) == 1:
        return valid_results[0]
    
    # For now, we just return the last valid result
    # In a more advanced implementation, this could use computer vision techniques
    # to analyze facial features, smile quality, and natural appearance
    
    print(f"Comparing {len(valid_results)} valid results and selecting best one")
    return valid_results[-1]  # Return the last attempt as potentially the best one

def process_smile_enhancement(input_image, max_attempts=3):
    try:
        if input_image is None:
            return None, "", ""
            
        # Get API key from environment variable
        gemini_api_key = "AIzaSyCVzRDxkuvtaS1B22F_F-zl0ehhXR0nuU8"
        if not gemini_api_key:
            print("Error: GEMINI_API_KEY not found in environment variables")
            return [input_image], "", "API key not configured"
            
        # Save the input image to a temporary file
        with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
            input_path = tmp.name
            input_image.save(input_path)
            print(f"Input image saved to {input_path}")
            
        # Initialize attempt counter and result variables    
        current_attempt = 0
        result_images = []  # Store all generated images for comparison
        feedback_history = []
        max_processing_time = 150  # Maximum time in seconds for overall processing
        start_processing_time = time.time()
        
        while current_attempt < max_attempts:
            # Check if overall processing time exceeded
            if time.time() - start_processing_time > max_processing_time:
                print(f"Overall processing time exceeded {max_processing_time} seconds")
                break
                
            current_attempt += 1
            print(f"Starting processing attempt {current_attempt}/{max_attempts}...")
            
            # Create a comprehensive prompt for true smile enhancement that affects facial features naturally
            # Adjust prompt based on previous attempts if needed
            prompt = """
            Create a naturally enhanced smile that focuses primarily on the overall facial expression rather than perfect teeth. Make the following personalized improvements:

- Focus on enhancing the OVERALL SMILE EXPRESSION with natural eye crinkles, cheeks, and subtle facial changes
- Create authentic "Duchenne smile" characteristics with proper eye corner crinkles (crow's feet) appropriate for this person's age
- Enhance the natural lifting of cheeks that occurs in genuine smiles WITHOUT widening the face
- Add the characteristic slight narrowing of the eyes that happens in genuine smiles
- Create subtle dimples ONLY if they already exist in the original image
- Boost the overall joyful expression while maintaining the person's unique facial structure
- Maintain natural-looking nasolabial folds (smile lines) consistent with the smile intensity
- Subtly complement existing teeth - they should remain natural looking with their original character

IMPORTANT GUIDELINES:
- FOCUS ON THE SMILE AS A COMPLETE FACIAL EXPRESSION - not just teeth
- The most important aspects are eye crinkles, cheek raising, and natural facial expressions
- Teeth should be subtly complemented but NOT the main focus of the enhancement
- PRESERVE THE PERSON'S NATURAL DENTAL CHARACTERISTICS - teeth should look like THEIR teeth
- Keep teeth coloration natural and appropriate for the person - avoid any artificial whitening
- Maintain all natural imperfections in tooth alignment that give character to the smile
- Create a genuine, authentic-looking smile that affects the entire face naturally
- ABSOLUTELY CRITICAL: DO NOT widen the face or change face width/shape at all
- Preserve the person's identity completely (extremely important)
- Preserve exact facial proportions of the original image
- Maintain natural-looking results appropriate for the person's age and face structure
- Keep the original background, lighting, and image quality intact
- Ensure the enhanced smile looks natural, genuine, and believable
- Create a smile that looks like a moment of true happiness for THIS specific person
            """
            
            # If not the first attempt, add previous feedback to the prompt
            if current_attempt > 1 and feedback_history:
                prompt += """
                
                IMPORTANT FEEDBACK FROM PREVIOUS ATTEMPT:
                """ + " ".join(feedback_history) + """
                Please address these issues in this new attempt.
                """
            
            # Process silently
            print(f"Processing attempt {current_attempt}/{max_attempts}...")
            
            # Set timeout for individual API call
            api_call_timeout = time.time() + 45  # 45 second timeout for API call
            
            try:
                # Process the image using Google's Gemini model with timeout
                image_path, text_response = generate(text=prompt, file_name=input_path, api_key=gemini_api_key)
                
                # Check if API call timeout occurred
                if time.time() > api_call_timeout:
                    print("API call timeout occurred")
                    feedback_history.append("API call timed out, trying again with simplified request.")
                    continue
                    
                print(f"API response received: Image path: {image_path is not None}, Text length: {len(text_response)}")
                
                if image_path:
                    # Load and convert the image if needed
                    try:
                        current_result = Image.open(image_path)
                        if current_result.mode == "RGBA":
                            current_result = current_result.convert("RGB")
                        
                        print("Successfully loaded generated image for attempt " + str(current_attempt))
                        
                        # Assess the quality of the enhanced image
                        is_acceptable, assessment_feedback = assess_image_quality(input_image, current_result)
                        print(f"Image quality assessment: {is_acceptable}, {assessment_feedback}")
                        
                        if is_acceptable:
                            # Store the acceptable result for later comparison
                            result_images.append(current_result)
                            print(f"Added acceptable result from attempt {current_attempt} to results list")
                            
                            # Continue with additional attempts to potentially get even better results
                            if current_attempt < max_attempts:
                                feedback_history.append("Previous attempt successful, trying to further improve...")
                                continue
                        else:
                            # Image didn't pass quality assessment, add feedback for next attempt
                            feedback_history.append(assessment_feedback)
                            
                            # Still store the result for potential use if no better options are found
                            result_images.append(current_result)
                            
                    except Exception as img_error:
                        print(f"Error processing the generated image: {str(img_error)}")
                        feedback_history.append(f"Error with image: {str(img_error)}")
                else:
                    # No image was generated, only text response
                    print("No image was generated, only text response")
                    feedback_history.append("No image was generated in the previous attempt.")
            except Exception as gen_error:
                print(f"Error during generation attempt {current_attempt}: {str(gen_error)}")
                feedback_history.append(f"Error during processing: {str(gen_error)}")
        
        # Compare all results and select the best one
        print(f"All attempts completed. Comparing {len(result_images)} results")
        
        if result_images:
            # Select the best result from all generated images
            best_result = compare_image_results(result_images)
            if best_result:
                print("Returning best result from multiple attempts")
                success_message = "Enhancement completed after multiple attempts to find the best result"
                return [best_result], "", success_message
        
        # Return the original image as a fallback without messages
        print("Returning original image as fallback - no valid results generated")
        return [input_image], "", "No satisfactory enhancements could be generated"
    except Exception as e:
        # Return the original image silently on error
        print(f"Overall error in process_smile_enhancement: {str(e)}")
        return [input_image], "", ""

# Create a clean interface with minimal UI elements and no settings/deployment info
with gr.Blocks(title="Smile Enhancement", css="footer {visibility: hidden} .gradio-container {min-height: 0 !important}") as demo:
    with gr.Row():
        with gr.Column():
            image_input = gr.Image(
                type="pil",
                label=None,
                image_mode="RGB",
                elem_classes="upload-box"
            )
            
            submit_btn = gr.Button("Enhance Smile with Natural Expressions", elem_classes="generate-btn")
        
        with gr.Column():
            output_gallery = gr.Gallery(label=None)
            
            # Simplify feedback to minimize UI elements
            feedback_text = gr.Textbox(label=None, visible=True, elem_classes="status-box")
            
            # Hidden element for structure
            output_text = gr.Textbox(visible=False)
    
    submit_btn.click(
        fn=process_smile_enhancement,
        inputs=[image_input],
        outputs=[output_gallery, output_text, feedback_text]
    )

# Launch the app without showing Gradio branding or share links
demo.queue(max_size=50).launch(
    show_api=False,
    share=False, 
    show_error=True,
    server_name="0.0.0.0", 
)