Spaces:
Running
Running
File size: 2,516 Bytes
bf5c1c9 a89ce24 bf5c1c9 207c16a 5517f9c bf5c1c9 638158f 6b6d0ef 7a4525a 6b6d0ef 7a4525a 207c16a a89ce24 bf5c1c9 5517f9c bf5c1c9 c36c2b7 6e0a07a bf5c1c9 5517f9c 207c16a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
from fastapi.staticfiles import StaticFiles
from fastapi.responses import FileResponse
from pydantic import BaseModel
from fastapi import FastAPI
import os
from transformers import GPT2LMHeadModel, GPT2Tokenizer, AutoModelForCausalLM, AutoTokenizer
import torch
app = FastAPI()
name = "microsoft/DialoGPT-small"
# microsoft/DialoGPT-small
# microsoft/DialoGPT-medium
# microsoft/DialoGPT-large
# mistralai/Mixtral-8x7B-Instruct-v0.1
# Load the Hugging Face GPT-2 model and tokenizer
model = AutoModelForCausalLM.from_pretrained(name)
tokenizer = AutoTokenizer.from_pretrained(name)
gpt2model = GPT2LMHeadModel.from_pretrained(name)
gpt2tokenizer = GPT2Tokenizer.from_pretrained(name)
class req(BaseModel):
prompt: str
length: int
@app.get("/")
def read_root():
return FileResponse(path="templates/index.html", media_type="text/html")
@app.post("/api")
def read_root(data: req):
print("Prompt:", data.prompt)
print("Length:", data.length)
if name == "microsoft/DialoGPT-small" or name == "microsoft/DialoGPT-medium" or name == "microsoft/DialoGPT-large":
# tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-small")
# model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-small")
step = 1
# encode the new user input, add the eos_token and return a tensor in Pytorch
new_user_input_ids = tokenizer.encode(data.prompt + tokenizer.eos_token, return_tensors='pt')
# append the new user input tokens to the chat history
bot_input_ids = torch.cat([chat_history_ids, new_user_input_ids], dim=-1) if step > 0 else new_user_input_ids
# generated a response while limiting the total chat history to 1000 tokens,
chat_history_ids = model.generate(bot_input_ids, max_length=1000, pad_token_id=tokenizer.eos_token_id)
generated_text = tokenizer.decode(chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True)
answer_data = { "answer": generated_text }
print("Answer:", generated_text)
return answer_data
else:
input_text = data.prompt
# Tokenize the input text
input_ids = gpt2tokenizer.encode(input_text, return_tensors="pt")
# Generate output using the model
output_ids = model.generate(input_ids, max_length=data.length, num_beams=5, no_repeat_ngram_size=2)
generated_text = gpt2tokenizer.decode(output_ids[0], skip_special_tokens=True)
answer_data = { "answer": generated_text }
print("Answer:", generated_text)
return answer_data |