File size: 10,396 Bytes
dfc5e93
 
5f4f3c1
58a5e73
cd13883
 
 
58a5e73
cd13883
 
 
58a5e73
 
cd13883
58a5e73
cd13883
 
 
 
58a5e73
83207ef
cd13883
 
 
 
 
 
58a5e73
cd13883
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58a5e73
cd13883
 
 
58a5e73
 
 
cd13883
 
 
 
 
 
 
58a5e73
 
 
 
 
 
cd13883
58a5e73
 
 
 
 
 
 
 
83207ef
cd13883
83207ef
cd13883
 
83207ef
853d569
cd13883
58a5e73
5f4f3c1
853d569
58a5e73
853d569
58a5e73
cd13883
58a5e73
dfc5e93
b752712
58a5e73
b752712
 
dfc5e93
58a5e73
 
 
 
 
 
 
 
 
 
 
 
cd13883
58a5e73
 
 
 
cd13883
58a5e73
cd13883
58a5e73
 
 
 
 
 
 
 
 
 
 
cd13883
 
58a5e73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cd13883
58a5e73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cd13883
 
 
58a5e73
 
 
f4eb547
0950920
58a5e73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83207ef
58a5e73
83207ef
58a5e73
 
 
83207ef
 
dfc5e93
58a5e73
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
import gradio as gr
from huggingface_hub import InferenceClient
import re
import time # For potential brief pauses if needed

# --- Hugging Face Token (Optional but Recommended) ---
# from huggingface_hub import login
# login("YOUR_HUGGINGFACE_TOKEN")

# --- Inference Client ---
try:
    # You might need to specify the model URL directly if the alias isn't working
    # client = InferenceClient(model="https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta")
    client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
    client.timeout = 120 # Increase timeout for potentially long generations
except Exception as e:
    print(f"Error initializing InferenceClient: {e}")
    client = None

# --- Parsing Function (from previous good version) ---
def parse_files(raw_response):
    """
    Parses filenames and code blocks from the raw AI output.
    """
    if not raw_response:
        return []

    # Pattern: Look for a filename line followed by content until the next filename line or end of string.
    pattern = re.compile(
        r"^\s*([\w\-.\/\\]+\.\w+)\s*\n"  # Filename line (must have an extension)
        r"(.*?)"                        # Capture content (non-greedy)
        r"(?=\n\s*[\w\-.\/\\]+\.\w+\s*\n|\Z)", # Lookahead for next filename or end of string
        re.DOTALL | re.MULTILINE
    )
    files = pattern.findall(raw_response)

    cleaned_files = []
    for name, content in files:
        # Remove common code block markers (``` optionally followed by lang)
        content_cleaned = re.sub(r"^\s*```[a-zA-Z]*\n?", "", content, flags=re.MULTILINE)
        content_cleaned = re.sub(r"\n?```\s*$", "", content_cleaned, flags=re.MULTILINE)
        cleaned_files.append((name.strip(), content_cleaned.strip()))

    # Fallback if no files parsed but content exists
    if not cleaned_files and raw_response.strip():
        if any(c in raw_response for c in ['<','>','{','}',';','(',')']):
            print("Warning: No filenames found, defaulting to index.html")
            lang = "html"
            if "{" in raw_response and "}" in raw_response and ":" in raw_response: lang = "css"
            elif "function" in raw_response or "const" in raw_response or "let" in raw_response: lang = "javascript"
            default_filename = "index.html"
            if lang == "css": default_filename = "style.css"
            elif lang == "javascript": default_filename = "script.js"
            cleaned_files.append((default_filename, raw_response.strip()))

    return cleaned_files

# --- Streaming and Parsing Orchestrator ---
def stream_and_parse_code(prompt, backend, system_message, max_tokens, temperature, top_p):
    """
    Streams raw output to one component and generates final tabs for another.
    This function acts as the main callback for the button click.
    """
    if not client:
        error_msg = "Error: Inference Client not available."
        yield {
            live_output: error_msg,
            final_tabs: gr.Tabs(tabs=[gr.TabItem(label="Error", children=[gr.Textbox(value=error_msg)])])
        }
        return # Stop execution

    # --- Prepare for Streaming ---
    full_sys_msg = f"""
You are a code generation AI. Given a prompt, generate the necessary files for a website using the {backend} backend.
Always include an index.html file.
Respond ONLY with filenames and the raw code for each file.
Each file must start with its filename on a new line. Example:

index.html
<!DOCTYPE html>
<html></html>

style.css
body {{}}

Ensure the code is complete. NO commentary, NO explanations, NO markdown formatting like backticks (```).
Start generating the files now.
""".strip() + ("\n" + system_message if system_message else "")

    messages = [
        {"role": "system", "content": full_sys_msg},
        {"role": "user", "content": prompt}
    ]

    full_raw_response = ""
    error_occurred = False
    error_message = ""

    # Initial state update
    yield {
        live_output: "Generating stream...",
        # Set initial tabs state to indicate loading
        final_tabs: gr.Tabs(tabs=[gr.TabItem(label="Generating...")])
    }

    # --- Streaming Loop ---
    try:
        stream = client.chat_completion(
            messages,
            max_tokens=int(max_tokens),
            stream=True,
            temperature=temperature,
            top_p=top_p
        )
        for chunk in stream:
            token = chunk.choices[0].delta.content
            if token:
                full_raw_response += token
                # Yield updates for the live raw output component
                # Keep tabs in a 'generating' state during the stream
                yield {
                    live_output: full_raw_response,
                    final_tabs: gr.Tabs(tabs=[gr.TabItem(label="Streaming...")]) # Keep showing streaming
                }
                # time.sleep(0.01) # Optional small delay if updates are too fast

    except Exception as e:
        print(f"Error during AI streaming: {e}")
        error_message = f"Error during AI generation: {e}\n\nPartial Response:\n{full_raw_response}"
        error_occurred = True
        # Update live output with error, keep tabs showing error state
        yield {
            live_output: error_message,
            final_tabs: gr.Tabs(tabs=[gr.TabItem(label="Error")])
        }


    # --- Post-Streaming: Parsing and Final Tab Generation ---
    if error_occurred:
        # If an error happened during stream, create an error tab
         final_tabs_update = gr.Tabs(tabs=[
            gr.TabItem(label="Error", children=[gr.Textbox(value=error_message, label="Generation Error")])
        ])
    else:
        # Parse the complete raw response
        print("\n--- Final Raw AI Response ---")
        print(full_raw_response)
        print("--------------------------\n")
        files = parse_files(full_raw_response)

        if not files:
            # Handle case where parsing failed or AI gave empty/invalid response
            no_files_msg = "AI finished, but did not return recognizable file content. See raw output above."
            final_tabs_update = gr.Tabs(tabs=[
                gr.TabItem(label="Output", children=[gr.Textbox(value=no_files_msg, label="Result")])
            ])
            # Update live output as well if needed
            yield { live_output: full_raw_response + "\n\n" + no_files_msg, final_tabs: final_tabs_update }
            return # Exit if no files

        # --- Create Tabs (if files were parsed successfully) ---
        tabs_content = []
        for name, content in files:
            name = name.strip()
            content = content.strip()
            if not name or not content:
                print(f"Skipping file with empty name or content: Name='{name}'")
                continue

            lang = "text" # Default
            if name.endswith((".html", ".htm")): lang = "html"
            elif name.endswith(".css"): lang = "css"
            elif name.endswith(".js"): lang = "javascript"
            elif name.endswith(".py"): lang = "python"
            elif name.endswith(".json"): lang = "json"
            elif name.endswith(".md"): lang = "markdown"
            elif name.endswith((".sh", ".bash")): lang = "bash"

            tab_item = gr.TabItem(label=name, elem_id=f"tab_{name.replace('.', '_').replace('/', '_')}", children=[
                gr.Code(value=content, language=lang, label=name)
            ])
            tabs_content.append(tab_item)

        if not tabs_content: # Handle case where parsing found files, but they were filtered out
             final_tabs_update = gr.Tabs(tabs=[gr.TabItem(label="Output", children=[gr.Textbox(value="No valid files generated.", label="Result")])])
        else:
            final_tabs_update = gr.Tabs(tabs=tabs_content) # Create the final Tabs component


    # --- Final Update ---
    # Yield the final state for both components
    yield {
        live_output: full_raw_response if not error_occurred else error_message, # Show final raw response or error
        final_tabs: final_tabs_update # Show the generated tabs or error tab
    }


# --- Gradio UI Definition ---
with gr.Blocks(css=".gradio-container { max-width: 95% !important; }") as demo: # Wider interface
    gr.Markdown("## WebGen AI β€” One Prompt β†’ Full Website Generator")
    gr.Markdown("Generates website code based on your description. Raw output streams live, final files appear in tabs below.")

    with gr.Row():
        with gr.Column(scale=2):
            prompt = gr.Textbox(label="Describe your website", placeholder="E.g., a simple portfolio site with a dark mode toggle", lines=3)
            backend = gr.Dropdown(["Static", "Flask", "Node.js"], value="Static", label="Backend Technology")
            with gr.Accordion("Advanced Options", open=False):
                system_message = gr.Textbox(label="Extra instructions for the AI (System Message)", placeholder="Optional: e.g., 'Use Bootstrap 5', 'Prefer functional components in React'", value="")
                max_tokens = gr.Slider(minimum=256, maximum=4096, value=1536, step=64, label="Max Tokens (Length)")
                temperature = gr.Slider(minimum=0.1, maximum=1.5, value=0.7, step=0.1, label="Temperature (Creativity)")
                top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (Sampling)")
            generate_button = gr.Button("✨ Generate Code ✨", variant="primary")

        with gr.Column(scale=3):
            gr.Markdown("#### Live Raw Output Stream")
            # Component to show the live, unparsed stream
            live_output = gr.Code(label="Raw AI Stream", language="text", lines=15, interactive=False)

    gr.Markdown("---")
    gr.Markdown("#### Final Generated Files (Tabs)")
    # Placeholder for the final structured tabs
    final_tabs = gr.Tabs(elem_id="output_tabs")


    # Button click action - uses the orchestrator function
    generate_button.click(
        stream_and_parse_code, # Call the main function that handles streaming and parsing
        inputs=[prompt, backend, system_message, max_tokens, temperature, top_p],
        # Outputs dictionary maps function yields to components
        outputs=[live_output, final_tabs],
        show_progress="hidden" # Hide default progress bar as we show live stream
    )

if __name__ == "__main__":
    demo.launch(debug=True)