MoinRomanticbot / app.py
syedmoinms's picture
Update app.py
f12abd4 verified
raw
history blame
824 Bytes
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
# βœ… Model name update karo
model_name = "TheBloke/Pygmalion-13B-SuperHOT-8K-GPTQ"
# βœ… Tokenizer Load karo
tokenizer = AutoTokenizer.from_pretrained(model_name)
# βœ… Model Load karo (Automatic Device Selection)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)
def chat(prompt):
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
output = model.generate(**inputs, max_new_tokens=200, do_sample=True, temperature=0.7, top_p=0.9)
return tokenizer.decode(output[0], skip_special_tokens=True)
# βœ… Test Chat
while True:
user_input = input("You: ")
if user_input.lower() in ["exit", "quit"]:
break
response = chat(user_input)
print("AI:", response)