Spaces:

akashmadisetty
/

Test

Running

App Files Files Community

akashmadisetty commited on Mar 29

Commit

45c882e

1 Parent(s): c1d34f4

OM

Browse files

Files changed (1) hide show

app.py +32 -30

app.py CHANGED Viewed

@@ -44,15 +44,12 @@ def load_model(hf_token):
                     token=hf_token
                 )
-                # Load model with safe configuration
                 global_model = AutoModelForCausalLM.from_pretrained(
                     model_name,
                     torch_dtype=torch.float16,
                     device_map="auto",
-                    token=hf_token,
-                    use_cache=True,
-                    low_cpu_mem_usage=True,
-                    attn_implementation="flash_attention_2" if torch.cuda.is_available() else "eager"
                 )
                 model_loaded = True
@@ -162,28 +159,15 @@ def generate_text(prompt, max_length=1024, temperature=0.7, top_p=0.95):
         return "Please enter a prompt to generate text."
     try:
         inputs = global_tokenizer(prompt, return_tensors="pt").to(global_model.device)
-        generation_config = {
-            "max_length": max_length,
-            "do_sample": True,
-            "pad_token_id": global_tokenizer.eos_token_id,
-        }
-        # Only add temperature if it's not too low (can cause probability issues)
-        if temperature >= 0.2:
-            generation_config["temperature"] = temperature
-        else:
-            generation_config["temperature"] = 0.2
-        # Only add top_p if it's valid
-        if 0 < top_p < 1:
-            generation_config["top_p"] = top_p
-        # Generate text with safer parameters
         outputs = global_model.generate(
-            **inputs,
-            **generation_config
         )
         # Decode and return the generated text
@@ -191,8 +175,9 @@ def generate_text(prompt, max_length=1024, temperature=0.7, top_p=0.95):
         return generated_text
     except Exception as e:
         error_msg = str(e)
         if "probability tensor" in error_msg:
-            return "Error: There was a problem with the generation parameters. Try using higher temperature (0.5+) and top_p values (0.9+)."
         else:
             return f"Error generating text: {error_msg}"
@@ -247,12 +232,27 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
                 )
             with gr.Column(scale=1):
-                auth_button = gr.Button("Authenticate")
-        auth_status = gr.Markdown("Please authenticate to use the model.")
         auth_button.click(
-            fn=load_model,
             inputs=[hf_token],
             outputs=[auth_status]
         )
@@ -1019,6 +1019,8 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
     # Load default token if available
     if DEFAULT_HF_TOKEN:
-        demo.load(fn=load_model, inputs=[hf_token], outputs=[auth_status])
-demo.launch()

                     token=hf_token
                 )
+                # Load model with minimal configuration to avoid errors
                 global_model = AutoModelForCausalLM.from_pretrained(
                     model_name,
                     torch_dtype=torch.float16,
                     device_map="auto",
+                    token=hf_token
                 )
                 model_loaded = True
         return "Please enter a prompt to generate text."
     try:
+        # Keep generation simple to avoid errors
         inputs = global_tokenizer(prompt, return_tensors="pt").to(global_model.device)
+        # Use simpler generation parameters that work reliably
         outputs = global_model.generate(
+            inputs.input_ids,
+            max_length=min(2048, max_length + len(inputs.input_ids[0])),
+            temperature=max(0.3, temperature),  # Prevent too low temperature
+            do_sample=True
         )
         # Decode and return the generated text
         return generated_text
     except Exception as e:
         error_msg = str(e)
+        print(f"Generation error: {error_msg}")
         if "probability tensor" in error_msg:
+            return "Error: There was a problem with the generation parameters. Try using simpler parameters or a different prompt."
         else:
             return f"Error generating text: {error_msg}"
                 )
             with gr.Column(scale=1):
+                auth_button = gr.Button("Authenticate", variant="primary")
+        with gr.Group(visible=True) as auth_message_group:
+            auth_status = gr.Markdown("Please authenticate to use the model.")
+        def authenticate(token):
+            auth_message_group.visible = True
+            return "Loading model... Please wait, this may take a minute."
+        def auth_complete(token):
+            result = load_model(token)
+            return result
+        # Two-step authentication to show loading message
         auth_button.click(
+            fn=authenticate,
+            inputs=[hf_token],
+            outputs=[auth_status],
+            queue=False
+        ).then(
+            fn=auth_complete,
             inputs=[hf_token],
             outputs=[auth_status]
         )
     # Load default token if available
     if DEFAULT_HF_TOKEN:
+        demo.load(fn=authenticate, inputs=[hf_token], outputs=[auth_status]).then(
+            fn=auth_complete, inputs=[hf_token], outputs=[auth_status]
+        )
+demo.launch(share=False)