Spaces:

Akjava
/

AIChat-matcha-tts-onnx-en

Sleeping

File size: 3,720 Bytes

abed9dd
 
 
 
 
ae274fc
249f47f
 
 
 
 
 
 
 
 
 
31a98e0
4f262bf
afeb14b
249f47f
 
3f30de8
249f47f
 
 
 
 
 
 
ae274fc
4adfe65
249f47f
 
4adfe65
 
 
 
 
249f47f
 
 
 
ae274fc
249f47f
 
 
 
 
ae274fc
249f47f
abed9dd
8ce7d79
abed9dd
2c6942f
249f47f
 
 
 
 
faac8da
abed9dd
 
 
 
 
 
 
 
 
 
 
 
249f47f
 
 
f0d8be5
8ce7d79
249f47f
 
 
 
 
 
 
873ccb3
5a1795d
249f47f
 
 
06d3491
249f47f
19c4612
0d37ebe
c2fa3e3
19c4612
 
abb1cf0
19c4612
 
78a0b1e
 
76c7f05
 
 
 
7687287
249f47f
abed9dd
249f47f

import spaces
import os
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import gradio as gr

text_generator = None
is_hugging_face = False
def init():
    global text_generator
    huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
    if not huggingface_token:
        pass
        print("no HUGGINGFACE_TOKEN if you need set secret ")
        #raise ValueError("HUGGINGFACE_TOKEN environment variable is not set")
    
    model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
    model_id = "google/gemma-2b"
    model_id = "Qwen/Qwen2.5-0.5B-Instruct"
    
    device = "auto" # torch.device("cuda" if torch.cuda.is_available() else "cpu")
    #device = "cuda"
    dtype = torch.bfloat16
    
    tokenizer = AutoTokenizer.from_pretrained(model_id, token=huggingface_token)
    
    print(model_id,device,dtype)
    histories = []
    #model = None

    model = AutoModelForCausalLM.from_pretrained(
            model_id, token=huggingface_token ,torch_dtype=dtype,device_map=device
        )
    text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer,torch_dtype=dtype,device_map=device ) #pipeline has not to(device)
       
    
    if not is_hugging_face:
         
        if next(model.parameters()).is_cuda:
            print("The model is on a GPU")
        else:
            print("The model is on a CPU")
    
        #print(f"text_generator.device='{text_generator.device}")
        if str(text_generator.device).strip() == 'cuda':
            print("The pipeline is using a GPU")
        else:
            print("The pipeline is using a CPU")
    
    print("initialized")

@spaces.GPU
def generate_text(messages):
    global text_generator
    if is_hugging_face:#need everytime initialize for ZeroGPU
        model = AutoModelForCausalLM.from_pretrained(
                model_id, token=huggingface_token ,torch_dtype=dtype,device_map=device
            )
        text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer,torch_dtype=dtype,device_map=device ) #pipeline has not to(device)
    result = text_generator(messages, max_new_tokens=32, do_sample=True, temperature=0.7)

    generated_output = result[0]["generated_text"]
    if isinstance(generated_output, list):
        for message in reversed(generated_output):
            if message.get("role") == "assistant":
                content= message.get("content", "No content found.")
                return content
            
        return "No assistant response found."
    else:
        return "Unexpected output format."



def call_generate_text(message, history):
    if len(message) == 0:
        message.append({"role": "system", "content": "you response around 10 words"})
   # history.append({"role": "user", "content": message})
    print(message)
    print(history)
   
    messages = history+[{"role":"user","content":message}]
    try:
        text = generate_text(messages)
        messages +=  [{"role":"assistant","content":text}]
        return "",messages
    except RuntimeError  as e:
        print(f"An unexpected error occurred: {e}")
       
    return "",history


with gr.Blocks(title="LLM with TTS") as demo:
    gr.Markdown("LLM and TTS models will change without notice.")
    js = """
    function(chatbot){
    console.log(chatbot[chatbot.length -1])
    }
    """
    chatbot = gr.Chatbot(type="messages")
    chatbot.change(None,[chatbot],[],js=js)
    msg = gr.Textbox()
    clear = gr.ClearButton([msg, chatbot])
    
#demo = gr.ChatInterface(call_generate_text,chatbot=chatbot,type="messages")
    msg.submit(call_generate_text, [msg, chatbot], [msg, chatbot])

if __name__ == "__main__":
    init()
    demo.launch(share=True)