Spaces:
Runtime error
Runtime error
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import torch | |
# β Model name update karo | |
model_name = "TheBloke/Pygmalion-13B-SuperHOT-8K-GPTQ" | |
# β Tokenizer Load karo | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
# β Model Load karo (Automatic Device Selection) | |
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16) | |
def chat(prompt): | |
inputs = tokenizer(prompt, return_tensors="pt").to("cuda") | |
output = model.generate(**inputs, max_new_tokens=200, do_sample=True, temperature=0.7, top_p=0.9) | |
return tokenizer.decode(output[0], skip_special_tokens=True) | |
# β Test Chat | |
while True: | |
user_input = input("You: ") | |
if user_input.lower() in ["exit", "quit"]: | |
break | |
response = chat(user_input) | |
print("AI:", response) |