Spaces:

legaltextai
/

test_model

Runtime error

File size: 1,186 Bytes

21b2e67
c0eecae
a65b66d
609a014
 
 
 
 
 
 
 
c0eecae
d46b7dc
 
 
609a014
 
d46b7dc
 
609a014
5b130c8
d46b7dc
5b130c8
 
 
d46b7dc
5b130c8
 
 
 
 
 
 
c0eecae
d46b7dc
 
5b130c8
d46b7dc
5b130c8
 
 
 
21b2e67
5b130c8
c0eecae

import gradio as gr
import spaces
import torch
from transformers import BitsAndBytesConfig, pipeline

quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True
)

model = pipeline(
    "text-generation",
    model="unsloth/DeepSeek-R1-Distill-Llama-8B",
    quantization_config=quant_config,
    device_map="auto"
)


@spaces.GPU(duration=300)  # Increased to 5 minutes
def chat_response(message, history):
    # Add explicit initialization check
    if not hasattr(chat_response, "pipe"):
        chat_response.pipe = pipeline(...)
    
    # Add timeout handling
    try:
        response = chat_response.pipe(...)
        return response[0]['generated_text'][-1]["content"]
    except RuntimeError as e:
        return f"GPU timeout: {str(e)}"
        

demo = gr.ChatInterface(
    chat_response,
    chatbot=gr.Chatbot(height=500, type="messages"),  # Explicit type
    textbox=gr.Textbox(placeholder="Ask me anything...", container=False, scale=7),
    title="DeepSeek-Llama-8B Chat",
    examples=[["What is AI?"]],
    retry_btn=None,
    undo_btn=None
)

demo.launch()