Spaces:
Runtime error
Runtime error
import gradio as gr | |
import spaces | |
import torch | |
from transformers import BitsAndBytesConfig, pipeline | |
quant_config = BitsAndBytesConfig( | |
load_in_4bit=True, | |
bnb_4bit_quant_type="nf4", | |
bnb_4bit_compute_dtype=torch.float16, | |
bnb_4bit_use_double_quant=True | |
) | |
model = pipeline( | |
"text-generation", | |
model="unsloth/DeepSeek-R1-Distill-Llama-8B", | |
quantization_config=quant_config, | |
device_map="auto" | |
) | |
# Increased to 5 minutes | |
def chat_response(message, history): | |
# Add explicit initialization check | |
if not hasattr(chat_response, "pipe"): | |
chat_response.pipe = pipeline(...) | |
# Add timeout handling | |
try: | |
response = chat_response.pipe(...) | |
return response[0]['generated_text'][-1]["content"] | |
except RuntimeError as e: | |
return f"GPU timeout: {str(e)}" | |
demo = gr.ChatInterface( | |
chat_response, | |
chatbot=gr.Chatbot(height=500, type="messages"), # Explicit type | |
textbox=gr.Textbox(placeholder="Ask me anything...", container=False, scale=7), | |
title="DeepSeek-Llama-8B Chat", | |
examples=[["What is AI?"]], | |
retry_btn=None, | |
undo_btn=None | |
) | |
demo.launch() | |