Spaces:

MINEOGO
/

pro-zephyr-coder

Running

File size: 6,787 Bytes

import gradio as gr
from huggingface_hub import InferenceClient
import os

# --- Configuration ---
# Use environment variable for token, fallback to default if not set
# For Spaces, set the HF_TOKEN secret
API_TOKEN = os.getenv("HF_TOKEN", None)
MODEL = "HuggingFaceH4/zephyr-7b-beta"

# --- Initialize Inference Client ---
try:
    if API_TOKEN:
        print("Using HF Token.")
        client = InferenceClient(model=MODEL, token=API_TOKEN)
    else:
        print("HF Token not found. Running without token (may lead to rate limits).")
        client = InferenceClient(model=MODEL)
except Exception as e:
    print(f"Error initializing Inference Client: {e}")
    # Optionally, raise the error or handle it gracefully
    raise gr.Error(f"Failed to initialize the AI model client. Please check model name and token. Error: {e}")

# --- Core Code Generation Function ---
def generate_code(
    prompt: str,
    backend_choice: str,
    file_structure: str,
    max_tokens: int,
    temperature: float,
    top_p: float,
):
    """
    Generates website code based on user prompt and choices.
    Yields the code token by token for live updates.
    """
    print(f"Received prompt: {prompt}")
    print(f"Backend choice: {backend_choice}")
    print(f"File structure: {file_structure}")
    print(f"Max tokens: {max_tokens}, Temp: {temperature}, Top-p: {top_p}")

    # --- System Message (Internal) ---
    # This guides the AI's behavior. It's not user-editable in the UI.
    system_message = (
        "You are an expert frontend web developer AI. Your task is to generate HTML, CSS, and JavaScript code "
        "for a website based on the user's description. \n"
        "**Constraints:**\n"
        "- ALWAYS generate a complete `index.html` file.\n"
        "- ONLY output the raw code for the requested files.\n"
        "- Do NOT include any explanations, comments about the code, or introductory sentences.\n"
        "- Start the response *directly* with the code.\n"
        "- If 'Multiple Files' is selected, structure the output clearly:\n"
        "  - Start the HTML section with a clear marker like `<!-- index.html -->`\n"
        "  - Start the CSS section with a clear marker like `/* style.css */`\n"
        "  - Start the JavaScript section (if needed) with `// script.js`\n"
        "  - Ensure each file's code follows its respective marker.\n"
        "- If 'Single File' is selected, embed CSS within `<style>` tags in the `<head>` and JavaScript within `<script>` tags "
        "at the end of the `<body>` of the `index.html` file.\n"
        f"- Consider the user's choice of backend context ('{backend_choice}') when generating placeholders or structure, but focus on the frontend code.\n"
        f"- Generate based on the file structure preference: '{file_structure}'."
    )

    # --- Construct the messages for the API ---
    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": prompt}
    ]

    # --- Stream the response from the API ---
    response_stream = ""
    try:
        print("Sending request to API...")
        for message in client.chat_completion(
            messages=messages,
            max_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
            stop_sequences=["<|endoftext|>", "<|im_end|>", "</s>"] # Add common stop tokens
        ):
            token = message.choices[0].delta.content
            # Basic check to ensure token is a string (it should be)
            if isinstance(token, str):
                response_stream += token
                yield response_stream # Yield the cumulative response for live update
            # Add a small safety break if generation seems stuck or too long (optional)
            # if len(response_stream) > max_tokens * 5: # Heuristic limit
            #     print("Warning: Exceeded heuristic length limit, stopping generation.")
            #     break

        print("API stream finished.")

    except Exception as e:
        print(f"Error during API call: {e}")
        yield f"An error occurred while generating the code: {e}"

# --- Build Gradio Interface using Blocks ---
with gr.Blocks() as demo:
    gr.Markdown("# Website Code Generator 🚀")
    gr.Markdown(
        "Describe the website you want, choose your options, and the AI will generate the frontend code (HTML, CSS, JS). "
        "The code will appear live in the text editor below."
    )

    with gr.Row():
        with gr.Column(scale=2):
            prompt_input = gr.Textbox(
                label="Website Description",
                placeholder="e.g., A simple portfolio website with a header, an 'About Me' section, a project grid, and a contact form.",
                lines=4,
            )
            backend_radio = gr.Radio(
                ["Static", "Flask", "Node.js"],
                label="Backend Context",
                value="Static",
                info="How should the AI structure the frontend (e.g., template placeholders for Flask)?",
            )
            file_structure_radio = gr.Radio(
                ["Single File", "Multiple Files"],
                label="Output File Structure",
                value="Multiple Files",
                info="Generate everything in index.html or separate CSS/JS files?",
            )
            generate_button = gr.Button("Generate Website Code", variant="primary")

        with gr.Column(scale=3):
            code_output = gr.Code(
                label="Generated Code",
                language="html", # Start with html, might contain css/js too
                lines=25,
                interactive=False, # Read-only display
            )

    with gr.Accordion("Advanced Generation Settings", open=False):
        max_tokens_slider = gr.Slider(
            minimum=256, maximum=4096, value=1500, step=64, label="Max New Tokens"
        )
        temperature_slider = gr.Slider(
            minimum=0.1, maximum=1.5, value=0.7, step=0.1, label="Temperature"
        )
        top_p_slider = gr.Slider(
            minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P (Nucleus Sampling)"
        )

    # --- Connect Inputs/Outputs to the Function ---
    generate_button.click(
        fn=generate_code,
        inputs=[
            prompt_input,
            backend_radio,
            file_structure_radio,
            max_tokens_slider,
            temperature_slider,
            top_p_slider,
        ],
        outputs=code_output,
        api_name="generate_website_code" # Optional: for API usage
    )

# --- Launch the App ---
if __name__ == "__main__":
    demo.queue() # Enable queuing for handling multiple users
    demo.launch(debug=True) # Set debug=False for production/Spaces