Spaces:
Running
Running
Philippe Kaplan
commited on
Commit
·
1a7e5d6
1
Parent(s):
027d8aa
try fast api
Browse files
app.py
CHANGED
@@ -1,4 +1,8 @@
|
|
1 |
from huggingface_hub import InferenceClient
|
|
|
|
|
|
|
|
|
2 |
import gradio as gr
|
3 |
|
4 |
client = InferenceClient(
|
@@ -16,8 +20,7 @@ def format_prompt(message, history, system_message=None):
|
|
16 |
return prompt
|
17 |
|
18 |
def generate(
|
19 |
-
prompt, history, system_message=None, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0
|
20 |
-
):
|
21 |
temperature = float(temperature)
|
22 |
if temperature < 1e-2:
|
23 |
temperature = 1e-2
|
@@ -88,6 +91,21 @@ additional_inputs=[
|
|
88 |
)
|
89 |
]
|
90 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
|
92 |
gr.ChatInterface(
|
93 |
fn=generate,
|
|
|
1 |
from huggingface_hub import InferenceClient
|
2 |
+
from fastapi import FastAPI
|
3 |
+
from pydantic import BaseModel
|
4 |
+
|
5 |
+
|
6 |
import gradio as gr
|
7 |
|
8 |
client = InferenceClient(
|
|
|
20 |
return prompt
|
21 |
|
22 |
def generate(
|
23 |
+
prompt, history, system_message=None, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0):
|
|
|
24 |
temperature = float(temperature)
|
25 |
if temperature < 1e-2:
|
26 |
temperature = 1e-2
|
|
|
91 |
)
|
92 |
]
|
93 |
|
94 |
+
class Item(BaseModel):
|
95 |
+
prompt: str
|
96 |
+
history: list
|
97 |
+
system_prompt: str
|
98 |
+
temperature: float = 0.0
|
99 |
+
max_new_tokens: int = 1048
|
100 |
+
top_p: float = 0.15
|
101 |
+
repetition_penalty: float = 1.0
|
102 |
+
app = FastAPI()
|
103 |
+
|
104 |
+
@app.post("/generate/")
|
105 |
+
async def generate_text(item: Item):
|
106 |
+
return {"response": generate(item.prompt, item.history, item.system_message, item.temperature, item.max_new_tokens, item.top_p, item.repetition_penalty)}
|
107 |
+
|
108 |
+
|
109 |
|
110 |
gr.ChatInterface(
|
111 |
fn=generate,
|