RocksPers commited on
Commit
52af2c5
·
verified ·
1 Parent(s): 63587d0

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -0
app.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ import torch
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer
5
+
6
+ app = FastAPI()
7
+
8
+ # Cargar modelo
9
+ device = "cuda" if torch.cuda.is_available() else "cpu"
10
+ model_name = "nikravan/glm-4vq"
11
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
12
+ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
13
+
14
+ class Query(BaseModel):
15
+ question: str
16
+
17
+ @app.post("/predict")
18
+ def predict(data: Query):
19
+ inputs = tokenizer(data.question, return_tensors="pt").to(device)
20
+ outputs = model.generate(**inputs, max_length=200)
21
+ return {"answer": tokenizer.decode(outputs[0])}