Spaces:

amd
/

gemma3-27b-mi-amd

Running

Lohia, Aditya commited on Mar 17

Commit

03542ad

1 Parent(s): f6136a1

update: finalize changes

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,10 +6,11 @@ from dialog import get_dialog_box
 from gateway import check_server_health, request_generation
 # CONSTANTS
-MAX_NEW_TOKENS: int = 4096
 # GET ENVIRONMENT VARIABLES
-CLOUD_GATEWAY_API = os.getenv("API_ENDPOINT")
 def toggle_ui():
@@ -77,7 +78,11 @@ def generate(
 chat_interface = gr.ChatInterface(
     fn=generate,
     additional_inputs=[
-        gr.Textbox(label="System prompt", lines=6),
         gr.Slider(
             label="Max New Tokens",
             minimum=1,
@@ -116,8 +121,9 @@ chat_interface = gr.ChatInterface(
     ],
     stop_btn=None,
     examples=[
         [
-            "I need to be in Japan for 10 days, going to Tokyo, Kyoto and Osaka for Cherry Blossom. Think about number of attractions in each of them and allocate number of days to each city. Make public transport recommendations."
         ],
         ["Can you explain briefly to me what is the Python programming language?"],
         ["Explain the plot of Cinderella in a sentence."],
@@ -152,7 +158,8 @@ with gr.Blocks(css="style.css", fill_height=True) as demo:
 if __name__ == "__main__":
-    demo.queue(
-        max_size=int(os.getenv("QUEUE")),
-        default_concurrency_limit=int(os.getenv("CONCURRENCY_LIMIT")),
-    ).launch()

 from gateway import check_server_health, request_generation
 # CONSTANTS
+MAX_NEW_TOKENS: int = 2048
 # GET ENVIRONMENT VARIABLES
+# CLOUD_GATEWAY_API = os.getenv("API_ENDPOINT")
+CLOUD_GATEWAY_API = "http://aac1.amd.com:7003"
 def toggle_ui():
 chat_interface = gr.ChatInterface(
     fn=generate,
     additional_inputs=[
+        gr.Textbox(
+            label="System prompt",
+            value="You are a highly capable AI assistant. Provide accurate, concise, and fact-based responses that are directly relevant to the user's query. Avoid speculation, ensure logical consistency, and maintain clarity in longer outputs. Keep answers well-structured and under 1200 tokens unless explicitly requested otherwise.",
+            lines=3,
+        ),
         gr.Slider(
             label="Max New Tokens",
             minimum=1,
     ],
     stop_btn=None,
     examples=[
+        ["Plan a three-day trips to Washington DC for Spring Blossom."],
         [
+            "Compose a joyful and short musical piece for kids to celebrate Spring sunshine and blossom."
         ],
         ["Can you explain briefly to me what is the Python programming language?"],
         ["Explain the plot of Cinderella in a sentence."],
 if __name__ == "__main__":
+    # demo.queue(
+    #     max_size=int(os.getenv("QUEUE")),
+    #     default_concurrency_limit=int(os.getenv("CONCURRENCY_LIMIT")),
+    # ).launch()
+    demo.queue().launch()