File size: 2,878 Bytes
709d394 486a2f6 cf5249f 709d394 349f644 1a5d1dc 5cf089b 4fa0d27 a13c01c 515f252 f7f4304 38fedf1 a0376c8 709d394 3f46449 9b3882c a0376c8 3f46449 709d394 3ea359d 709d394 dc0acc6 709d394 ef2fea2 600a2a9 709d394 29437cc 709d394 1cdad52 4f6966f b5aae38 6e1661f cd0aa02 6e1661f cd0aa02 a13c01c 6e1661f a13c01c b5aae38 acf224c 7fc9307 acf224c 7fc9307 a13c01c 8325138 1cdad52 4f6966f 1cdad52 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
import spaces
import gradio as gr
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig,AwqConfig
import torch
import os
# key = os.environ.get("key")
from huggingface_hub import login
# login(key)
nf4_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_use_double_quant=True,
bnb_4bit_compute_dtype=torch.bfloat16
)
model_id = "Qwen/Qwen1.5-14B-Chat"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id,
# load_in_8bit=True,
# quantization_config=nf4_config
)
@spaces.GPU
def generate_response(user_input, max_new_tokens, temperature):
os.system("nvidia-smi")
messages = [{"role": "user", "content": user_input}]
input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
input_ids = input_ids.to(model.device)
gen_tokens = model.generate(
input_ids = input_ids,
max_new_tokens=max_new_tokens,
do_sample=True,
temperature=temperature,
)
gen_text = tokenizer.decode(gen_tokens[0], skip_special_tokens=True)
if gen_text.startswith(user_input):
gen_text = gen_text[len(user_input):].lstrip()
return gen_text
examples = [
{"message": "What is the weather like today?", "max_new_tokens": 250, "temperature": 0.5},
{"message": "Tell me a joke.", "max_new_tokens": 650, "temperature": 0.7},
{"message": "Explain the concept of machine learning.", "max_new_tokens": 980, "temperature": 0.4}
]
example_choices = [f"Example {i+1}" for i in range(len(examples))]
def load_example(choice):
index = example_choices.index(choice)
example = examples[index]
return example["message"], example["max_new_tokens"], example["temperature"]
with gr.Blocks() as demo:
with gr.Row():
max_new_tokens_slider = gr.Slider(minimum=100, maximum=4000, value=980, label="Max New Tokens")
temperature_slider = gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.3, label="Temperature")
message_box = gr.Textbox(lines=2, label="Your Message")
generate_button = gr.Button("Try🫡Command-R")
output_box = gr.Textbox(label="🫡Command-R")
generate_button.click(
fn=generate_response,
inputs=[message_box, max_new_tokens_slider, temperature_slider],
outputs=output_box
)
example_dropdown = gr.Dropdown(label="🫡Load Example", choices=example_choices)
example_button = gr.Button("🫡Load")
example_button.click(
fn=load_example,
inputs=example_dropdown,
outputs=[message_box, max_new_tokens_slider, temperature_slider]
)
demo.launch()
|