Spaces:

MINEOGO
/

pro-zephyr-coder

Running

App Files Files Community

MINEOGO commited on 5 days ago

Commit

155c47a

verified ·

1 Parent(s): de1581d

Update app.py

Browse files

Files changed (1) hide show

app.py +148 -95

app.py CHANGED Viewed

@@ -3,9 +3,17 @@ from huggingface_hub import InferenceClient
 import os
 import re # For post-processing fallback
 # --- Configuration ---
 API_TOKEN = os.getenv("HF_TOKEN", None)
-MODEL = "HuggingFaceH4/zephyr-7b-beta" # Or choose another suitable model
 # --- Initialize Inference Client ---
 try:
@@ -19,9 +27,25 @@ try:
     print("Inference Client initialized successfully.")
 except Exception as e:
     print(f"Error initializing Inference Client: {e}")
-    raise gr.Error(f"Failed to initialize the AI model client for '{MODEL}'. Check model name, network, and HF_TOKEN secret if applicable. Error: {e}")
-# --- Core Code Generation Function ---
 def generate_code(
     prompt: str,
     backend_choice: str,
@@ -33,16 +57,23 @@ def generate_code(
     """
     Generates website code based on user prompt and choices.
     Aims for richer CSS, emphasizes completeness, and strictly outputs ONLY raw code.
-    Yields the code token by token for live updates.
     """
     print(f"--- Generating Code ---")
     print(f"Prompt: {prompt[:100]}...")
     print(f"Backend Context: {backend_choice}")
     print(f"File Structure: {file_structure}")
-    # Log the max_tokens value being used for this request
     print(f"Settings: Max Tokens={max_tokens}, Temp={temperature}, Top-P={top_p}")
-    # --- Dynamically Build System Message ---
     if file_structure == "Single File":
         file_structure_instruction = (
             "- **File Structure is 'Single File':** Generate ONLY a single, complete `index.html` file. "
@@ -58,15 +89,14 @@ def generate_code(
             "- Inside `index.html`, link `style.css` in the `<head>` and include `script.js` before `</body>` if generated."
          )
-    # Assemble the full system message - Emphasizing completeness and NO premature stopping
     system_message = (
         "You are an expert frontend web developer AI. Your primary goal is to generate **complete, visually appealing, modern, and well-styled** frontend code (HTML, CSS, client-side JS) based *only* on the user's description and selected options. "
         "Follow ALL these rules with EXTREME STRICTNESS:\n"
         "1.  **STYLE & DETAIL:** Generate rich, detailed code. Use **plenty of CSS** for layout, spacing, typography, colors, and effects. Aim for a high-quality visual result.\n"
-        "2.  **COMPLETENESS:** Generate the *entire* requested code structure. Ensure all files/sections are fully generated and properly closed (e.g., closing HTML tags `</html>`, CSS braces `}`, script tags `</script>`). **DO NOT STOP GENERATING PREMATURELY.** Finish the whole task.\n"
         "3.  **RAW CODE ONLY:** Your *entire* response MUST consist *only* of the requested source code. NO extra text, NO explanations, NO apologies, NO introductions, NO summaries, NO comments about the code (except standard code comments), NO MARKDOWN formatting (like ```html), and ***ABSOLUTELY NO CONVERSATIONAL TEXT OR TAGS*** like `<|user|>` or `<|assistant|>`.\n"
         "4.  **IMMEDIATE CODE START:** The response MUST begin *directly* with the first character of the code (`<!DOCTYPE html>` or `<!-- index.html -->`).\n"
-        "5.  **IMMEDIATE CODE END:** The response MUST end *immediately* after the very last character of the generated code (`</html>`, `}`, `;`, etc.). DO NOT add *any* text, spaces, or newlines after the code concludes.\n"
         "6.  **MANDATORY `index.html`:** Always generate the content for `index.html`.\n"
         f"7.  **FILE STRUCTURE ({file_structure}):** Strictly follow ONLY the instructions for the *selected* file structure:\n"
         f"    {file_structure_instruction}\n"
@@ -76,21 +106,21 @@ def generate_code(
         "REMEMBER: Create COMPLETE, visually appealing code. Output ONLY raw code. START immediately with code. FINISH the entire code generation. END immediately with code. NO extra text/tags."
     )
-    # --- Construct the messages for the API ---
     messages = [
         {"role": "system", "content": system_message},
         {"role": "user", "content": f"Generate the complete website frontend code for: {prompt}"}
     ]
-    # --- Stream the response from the API ---
     response_stream = ""
     full_response_for_cleaning = ""
-    token_count = 0 # Add a simple counter for debugging
     try:
         print("Sending request to Hugging Face Inference API...")
         stream = client.chat_completion(
             messages=messages,
-            max_tokens=max_tokens, # Use the value from the slider
             stream=True,
             temperature=temperature,
             top_p=top_p,
@@ -98,52 +128,42 @@ def generate_code(
         for message in stream:
             token = message.choices[0].delta.content
             if isinstance(token, str):
-                token_count += 1 # Crude approximation of tokens received
                 response_stream += token
                 full_response_for_cleaning += token
-                # Log progress occasionally for debugging if needed
-                # if token_count % 100 == 0:
-                #    print(f"Stream progress: Received ~{token_count} tokens...")
-                yield response_stream # Yield cumulative response for live update
         print(f"API stream finished. Received ~{token_count} tokens. Raw length: {len(full_response_for_cleaning)}")
-        # Check if received tokens are close to max_tokens, indicating potential cutoff
-        if token_count >= max_tokens - 10: # Check if close to the limit (allowing for slight variations)
              print(f"WARNING: Generation might have been cut short due to reaching max_tokens limit ({max_tokens}).")
-             # Optionally, append a warning to the output itself, though it violates the "code only" rule
-             # full_response_for_cleaning += "\n\n<!-- WARNING: Output may be incomplete due to max_tokens limit. -->"
-        # --- Post-Processing (Fallback Safety Net) ---
         cleaned_response = full_response_for_cleaning.strip()
         cleaned_response = re.sub(r"^\s*```[a-z]*\s*\n?", "", cleaned_response)
         cleaned_response = re.sub(r"\n?\s*```\s*$", "", cleaned_response)
         cleaned_response = re.sub(r"<\s*\|?\s*(user|assistant)\s*\|?\s*>", "", cleaned_response, flags=re.IGNORECASE)
-        common_phrases = [
-            "Here is the code:", "Okay, here is the code:", "Here's the code:",
-            "Sure, here is the code you requested:", "Let me know if you need anything else."
-        ]
-        # Simple check, might need more robust cleaning if issues persist
-        for phrase in common_phrases:
-             # Check start
             if cleaned_response.lower().startswith(phrase.lower()):
                 cleaned_response = cleaned_response[len(phrase):].lstrip()
-            # Check end - be careful not to remove parts of valid code
-            # This end check is risky, might remove valid closing comments or similar.
-            # Consider removing if it causes issues.
-            # if cleaned_response.lower().endswith(phrase.lower()):
-            #      cleaned_response = cleaned_response[:-len(phrase)].rstrip()
-        yield cleaned_response.strip() # Yield final cleaned response
     except Exception as e:
         error_message = f"An error occurred during the API call: {e}"
         print(error_message)
-        yield f"## Error\n\nFailed to generate code.\n**Reason:** {e}\n\nPlease check the model status, your connection, and API token (if applicable)."
 # --- Build Gradio Interface using Blocks ---
 with gr.Blocks(css=".gradio-container { max-width: 90% !important; }") as demo:
     gr.Markdown("# ✨ Website Code Generator ✨")
     gr.Markdown(
@@ -151,82 +171,115 @@ with gr.Blocks(css=".gradio-container { max-width: 90% !important; }") as demo:
         "The code appears live below.\n"
         "**Important:**\n"
         "1. This generator creates code based *only* on your initial description. To refine, modify your description and generate again.\n"
-        "2. **If the code output stops abruptly**, it likely hit the 'Max New Tokens' limit. **Increase the slider value below** and try again!" # Added explanation
     )
     with gr.Row():
         with gr.Column(scale=2):
-            prompt_input = gr.Textbox(
-                label="Website Description",
-                placeholder="e.g., A modern portfolio landing page with smooth scroll nav, stylish hero, project cards with hover effects, contact form.",
-                lines=6,
-            )
-            backend_radio = gr.Radio(
-                ["Static", "Flask", "Node.js"], label="Backend Context Hint", value="Static",
-                info="Hint for AI (e.g., {{var}}) - generates ONLY frontend code."
-            )
-            file_structure_radio = gr.Radio(
-                ["Multiple Files", "Single File"], label="Output File Structure", value="Multiple Files",
-                info="Choose 'Single File' (all in index.html) or 'Multiple Files' (separate css/js)."
-            )
             generate_button = gr.Button("🎨 Generate Stylish Website Code", variant="primary")
         with gr.Column(scale=3):
-            code_output = gr.Code(
-                label="Generated Code (Raw Output - Aiming for Style!)",
-                language="html",
-                lines=30,
-                interactive=False,
-            )
     with gr.Accordion("Advanced Generation Settings", open=False):
-        # INCREASED max_tokens range and default value
-        max_tokens_slider = gr.Slider(
-            minimum=512,
-            maximum=4096, # Set maximum to model's limit (Zephyr 7B can handle this)
-            value=3072,   # Increased default significantly
-            step=256,     # Larger steps might be practical
-            label="Max New Tokens",
-            info="Max length of generated code. Increase if output is cut off!" # Updated info
-            )
-        temperature_slider = gr.Slider(
-            minimum=0.1, maximum=1.2, value=0.7, step=0.1, label="Temperature",
-            info="Controls randomness. Lower=focused, Higher=creative."
-            )
-        top_p_slider = gr.Slider(
-            minimum=0.1, maximum=1.0, value=0.9, step=0.05, label="Top-P",
-            info="Alternative randomness control."
-            )
-    # --- Connect Inputs/Outputs ---
     generate_button.click(
         fn=generate_code,
-        inputs=[
-            prompt_input,
-            backend_radio,
-            file_structure_radio,
-            max_tokens_slider,
-            temperature_slider,
-            top_p_slider,
-        ],
         outputs=code_output,
     )
-    # --- Examples ---
     gr.Examples(
         examples=[
-            ["A simple counter page with a number display, an increment button, and a decrement button. Style the buttons nicely and center everything.", "Static", "Single File"],
-            ["A responsive product grid for an e-commerce site. Each card needs an image, title, price, and 'Add to Cart' button with a hover effect. Use modern CSS.", "Static", "Multiple Files"],
-            ["A personal blog homepage featuring a clean header with navigation, a main content area for post summaries (placeholders ok), and a simple footer. Use a nice font.", "Flask", "Multiple Files"],
-            ["A 'Coming Soon' page with a large countdown timer (use JS), a background image, and an email signup form. Make it look sleek.", "Static", "Multiple Files"]
         ],
         inputs=[prompt_input, backend_radio, file_structure_radio],
         label="Example Prompts (Aiming for Style)"
     )
 # --- Launch ---
 if __name__ == "__main__":
-    print("Starting Gradio app...")
-    # Ensure queue is enabled for Spaces
-    demo.queue(max_size=10).launch()
-    print("Gradio app launched.")

 import os
 import re # For post-processing fallback
+# --- FastAPI & Pydantic Imports ---
+from fastapi import FastAPI, Request, HTTPException, Depends, Body
+from fastapi.responses import JSONResponse
+from pydantic import BaseModel, Field
+from typing import Literal, Optional
 # --- Configuration ---
 API_TOKEN = os.getenv("HF_TOKEN", None)
+MODEL = "HuggingFaceH4/zephyr-7b-beta"
+# --- Define the Secret Key for the API ---
+API_SECRET_KEY = "onlyfordearygt" # Keep this secure in a real application (e.g., env variable)
 # --- Initialize Inference Client ---
 try:
     print("Inference Client initialized successfully.")
 except Exception as e:
     print(f"Error initializing Inference Client: {e}")
+    # We still want the Gradio app to potentially load, but maybe show an error
+    # raise gr.Error(f"Failed to initialize the AI model client for '{MODEL}'. Check model name, network, and HF_TOKEN secret if applicable. Error: {e}")
+    client = None # Set client to None so we can check later
+    print("WARNING: AI Client initialization failed. API/Generation will not work.")
+# --- Pydantic Model for API Request Body ---
+class GenerateRequest(BaseModel):
+    prompt: str
+    backend_choice: Literal["Static", "Flask", "Node.js"] = "Static"
+    file_structure: Literal["Single File", "Multiple Files"] = "Multiple Files"
+    max_tokens: Optional[int] = Field(default=3072, gt=128, le=4096) # Add validation
+    temperature: Optional[float] = Field(default=0.7, gt=0.0, le=2.0)
+    top_p: Optional[float] = Field(default=0.9, gt=0.0, le=1.0)
+    secret_key: str # Required for authentication
+# --- Core Code Generation Function (Mostly Unchanged) ---
+# Note: This function is now also used by the API
 def generate_code(
     prompt: str,
     backend_choice: str,
     """
     Generates website code based on user prompt and choices.
     Aims for richer CSS, emphasizes completeness, and strictly outputs ONLY raw code.
+    Yields the code token by token for live updates (for Gradio UI).
+    The *final* yielded value is the complete, cleaned code (for API).
     """
+    # Check if client initialized properly
+    if client is None:
+         final_error_message = "## Error\n\nAI Model Client not initialized. Generation is unavailable."
+         print(final_error_message)
+         # Yield the error for Gradio, return it for API callers later
+         yield final_error_message
+         return # Stop execution for this generator
     print(f"--- Generating Code ---")
     print(f"Prompt: {prompt[:100]}...")
     print(f"Backend Context: {backend_choice}")
     print(f"File Structure: {file_structure}")
     print(f"Settings: Max Tokens={max_tokens}, Temp={temperature}, Top-P={top_p}")
     if file_structure == "Single File":
         file_structure_instruction = (
             "- **File Structure is 'Single File':** Generate ONLY a single, complete `index.html` file. "
             "- Inside `index.html`, link `style.css` in the `<head>` and include `script.js` before `</body>` if generated."
          )
     system_message = (
         "You are an expert frontend web developer AI. Your primary goal is to generate **complete, visually appealing, modern, and well-styled** frontend code (HTML, CSS, client-side JS) based *only* on the user's description and selected options. "
         "Follow ALL these rules with EXTREME STRICTNESS:\n"
         "1.  **STYLE & DETAIL:** Generate rich, detailed code. Use **plenty of CSS** for layout, spacing, typography, colors, and effects. Aim for a high-quality visual result.\n"
+        "2.  **COMPLETENESS:** Generate the *entire* requested code structure. Ensure all files/sections are fully generated and properly closed. **DO NOT STOP GENERATING PREMATURELY.** Finish the whole task.\n"
         "3.  **RAW CODE ONLY:** Your *entire* response MUST consist *only* of the requested source code. NO extra text, NO explanations, NO apologies, NO introductions, NO summaries, NO comments about the code (except standard code comments), NO MARKDOWN formatting (like ```html), and ***ABSOLUTELY NO CONVERSATIONAL TEXT OR TAGS*** like `<|user|>` or `<|assistant|>`.\n"
         "4.  **IMMEDIATE CODE START:** The response MUST begin *directly* with the first character of the code (`<!DOCTYPE html>` or `<!-- index.html -->`).\n"
+        "5.  **IMMEDIATE CODE END:** The response MUST end *immediately* after the very last character of the generated code. DO NOT add *any* text, spaces, or newlines after the code concludes.\n"
         "6.  **MANDATORY `index.html`:** Always generate the content for `index.html`.\n"
         f"7.  **FILE STRUCTURE ({file_structure}):** Strictly follow ONLY the instructions for the *selected* file structure:\n"
         f"    {file_structure_instruction}\n"
         "REMEMBER: Create COMPLETE, visually appealing code. Output ONLY raw code. START immediately with code. FINISH the entire code generation. END immediately with code. NO extra text/tags."
     )
     messages = [
         {"role": "system", "content": system_message},
         {"role": "user", "content": f"Generate the complete website frontend code for: {prompt}"}
     ]
     response_stream = ""
     full_response_for_cleaning = ""
+    token_count = 0
+    last_yielded_value = "" # Store the last value for the API
     try:
         print("Sending request to Hugging Face Inference API...")
         stream = client.chat_completion(
             messages=messages,
+            max_tokens=max_tokens,
             stream=True,
             temperature=temperature,
             top_p=top_p,
         for message in stream:
             token = message.choices[0].delta.content
             if isinstance(token, str):
+                token_count += 1
                 response_stream += token
                 full_response_for_cleaning += token
+                last_yielded_value = response_stream # Keep updating last value during stream
+                yield response_stream # Yield cumulative response for live UI update
         print(f"API stream finished. Received ~{token_count} tokens. Raw length: {len(full_response_for_cleaning)}")
+        if token_count >= max_tokens - 10:
              print(f"WARNING: Generation might have been cut short due to reaching max_tokens limit ({max_tokens}).")
+        # --- Post-Processing ---
         cleaned_response = full_response_for_cleaning.strip()
         cleaned_response = re.sub(r"^\s*```[a-z]*\s*\n?", "", cleaned_response)
         cleaned_response = re.sub(r"\n?\s*```\s*$", "", cleaned_response)
         cleaned_response = re.sub(r"<\s*\|?\s*(user|assistant)\s*\|?\s*>", "", cleaned_response, flags=re.IGNORECASE)
+        # Remove common phrases only if they are clearly at the start/end and unlikely to be code
+        common_phrases_start = ["Here is the code:", "Okay, here is the code:", "Here's the code:", "Sure, here is the code you requested:"]
+        for phrase in common_phrases_start:
             if cleaned_response.lower().startswith(phrase.lower()):
                 cleaned_response = cleaned_response[len(phrase):].lstrip()
+        last_yielded_value = cleaned_response.strip() # Final cleaned value
+        yield last_yielded_value # Yield final cleaned response for Gradio UI
     except Exception as e:
         error_message = f"An error occurred during the API call: {e}"
         print(error_message)
+        final_error_message = f"## Error\n\nFailed to generate code.\n**Reason:** {e}"
+        # Yield error for Gradio UI
+        yield final_error_message
+        # Ensure the generator stops, API will handle the exception based on this
+        # For the API, we will raise an exception in the route handler if needed
 # --- Build Gradio Interface using Blocks ---
+# Define this *before* creating the FastAPI app that might mount it
 with gr.Blocks(css=".gradio-container { max-width: 90% !important; }") as demo:
     gr.Markdown("# ✨ Website Code Generator ✨")
     gr.Markdown(
         "The code appears live below.\n"
         "**Important:**\n"
         "1. This generator creates code based *only* on your initial description. To refine, modify your description and generate again.\n"
+        "2. **If the code output stops abruptly**, it likely hit the 'Max New Tokens' limit. **Increase the slider value below** and try again!\n"
+        "3. An API endpoint is available at `/api/generate` (POST request, requires secret key)." # Notify about API
     )
     with gr.Row():
         with gr.Column(scale=2):
+            prompt_input = gr.Textbox(label="Website Description", placeholder="e.g., A modern portfolio...", lines=6,)
+            backend_radio = gr.Radio(["Static", "Flask", "Node.js"], label="Backend Context Hint", value="Static", info="Hint for AI - generates ONLY frontend code.")
+            file_structure_radio = gr.Radio(["Multiple Files", "Single File"], label="Output File Structure", value="Multiple Files", info="Choose 'Single File' or 'Multiple Files'.")
             generate_button = gr.Button("🎨 Generate Stylish Website Code", variant="primary")
         with gr.Column(scale=3):
+            code_output = gr.Code(label="Generated Code (Raw Output - Aiming for Style!)", language="html", lines=30, interactive=False,)
     with gr.Accordion("Advanced Generation Settings", open=False):
+        max_tokens_slider = gr.Slider(minimum=512, maximum=4096, value=3072, step=256, label="Max New Tokens", info="Max length. Increase if output is cut off!")
+        temperature_slider = gr.Slider(minimum=0.1, maximum=1.2, value=0.7, step=0.1, label="Temperature", info="Controls randomness.")
+        top_p_slider = gr.Slider( minimum=0.1, maximum=1.0, value=0.9, step=0.05, label="Top-P", info="Alternative randomness control.")
+    # --- Connect Gradio Inputs/Outputs ---
     generate_button.click(
         fn=generate_code,
+        inputs=[prompt_input, backend_radio, file_structure_radio, max_tokens_slider, temperature_slider, top_p_slider,],
         outputs=code_output,
     )
+    # --- Gradio Examples ---
     gr.Examples(
         examples=[
+            ["A simple counter page...", "Static", "Single File"],
+            ["A responsive product grid...", "Static", "Multiple Files"],
+            ["A personal blog homepage...", "Flask", "Multiple Files"],
+            ["A 'Coming Soon' page...", "Static", "Multiple Files"]
         ],
         inputs=[prompt_input, backend_radio, file_structure_radio],
         label="Example Prompts (Aiming for Style)"
     )
+# --- Create FastAPI App and Mount Gradio ---
+# This approach uses FastAPI as the main server and mounts Gradio onto it
+app = FastAPI()
+# --- Define the API Endpoint ---
+@app.post("/api/generate")
+async def api_generate_code(payload: GenerateRequest):
+    """
+    API endpoint to generate website code.
+    Requires a secret key for authentication.
+    """
+    print(f"API Request received for prompt: {payload.prompt[:50]}...")
+    # --- Authentication ---
+    if payload.secret_key != API_SECRET_KEY:
+        print("API Authentication failed: Invalid secret key.")
+        raise HTTPException(status_code=403, detail="Invalid secret key")
+    # Check if HF client is available
+    if client is None:
+        print("API Error: AI Client not initialized.")
+        raise HTTPException(status_code=503, detail="AI Model Client not initialized. Service unavailable.")
+    print("API Authentication successful.")
+    # --- Call the generator function and consume it ---
+    final_code = ""
+    try:
+        # Consume the generator to get the last yielded value (the complete code)
+        code_generator = generate_code(
+            prompt=payload.prompt,
+            backend_choice=payload.backend_choice,
+            file_structure=payload.file_structure,
+            max_tokens=payload.max_tokens,
+            temperature=payload.temperature,
+            top_p=payload.top_p,
+        )
+        for code_chunk in code_generator:
+            final_code = code_chunk # Keep overwriting until the last one
+        # Check if the final result indicates an error from within generate_code
+        if final_code.strip().startswith("## Error"):
+             print(f"API Error during generation: {final_code}")
+             # Extract reason if possible, otherwise return generic error
+             reason = final_code.split("Reason:**")[-1].strip() if "Reason:**" in final_code else "Generation failed internally."
+             raise HTTPException(status_code=500, detail=f"Code generation failed: {reason}")
+        print(f"API generated code length: {len(final_code)}")
+        # --- Return the final code ---
+        return JSONResponse(content={"generated_code": final_code})
+    except HTTPException as http_exc:
+        # Re-raise HTTPException if it's already one (like auth failure or internal error)
+        raise http_exc
+    except Exception as e:
+        # Catch any other unexpected errors during generation/consumption
+        print(f"API - Unexpected Error during generation: {e}")
+        raise HTTPException(status_code=500, detail=f"An unexpected error occurred: {str(e)}")
+# --- Mount the Gradio app onto the FastAPI app ---
+# The Gradio UI will be available at the root path "/"
+app = gr.mount_gradio_app(app, demo, path="/")
 # --- Launch ---
+# Use Uvicorn to run the FastAPI app (which now includes Gradio)
 if __name__ == "__main__":
+    import uvicorn
+    print("Starting FastAPI server with Gradio mounted...")
+    # Recommended settings for Hugging Face Spaces: host="0.0.0.0", port=7860
+    # You might need to adjust port if running locally and 7860 is taken.
+    uvicorn.run(app, host="0.0.0.0", port=7860)
+    # Note: demo.launch() is no longer used directly here, as uvicorn runs the combined app.