Philippe Kaplan commited on
Commit
1a7e5d6
·
1 Parent(s): 027d8aa

try fast api

Browse files
Files changed (1) hide show
  1. app.py +20 -2
app.py CHANGED
@@ -1,4 +1,8 @@
1
  from huggingface_hub import InferenceClient
 
 
 
 
2
  import gradio as gr
3
 
4
  client = InferenceClient(
@@ -16,8 +20,7 @@ def format_prompt(message, history, system_message=None):
16
  return prompt
17
 
18
  def generate(
19
- prompt, history, system_message=None, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0,
20
- ):
21
  temperature = float(temperature)
22
  if temperature < 1e-2:
23
  temperature = 1e-2
@@ -88,6 +91,21 @@ additional_inputs=[
88
  )
89
  ]
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
  gr.ChatInterface(
93
  fn=generate,
 
1
  from huggingface_hub import InferenceClient
2
+ from fastapi import FastAPI
3
+ from pydantic import BaseModel
4
+
5
+
6
  import gradio as gr
7
 
8
  client = InferenceClient(
 
20
  return prompt
21
 
22
  def generate(
23
+ prompt, history, system_message=None, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0):
 
24
  temperature = float(temperature)
25
  if temperature < 1e-2:
26
  temperature = 1e-2
 
91
  )
92
  ]
93
 
94
+ class Item(BaseModel):
95
+ prompt: str
96
+ history: list
97
+ system_prompt: str
98
+ temperature: float = 0.0
99
+ max_new_tokens: int = 1048
100
+ top_p: float = 0.15
101
+ repetition_penalty: float = 1.0
102
+ app = FastAPI()
103
+
104
+ @app.post("/generate/")
105
+ async def generate_text(item: Item):
106
+ return {"response": generate(item.prompt, item.history, item.system_message, item.temperature, item.max_new_tokens, item.top_p, item.repetition_penalty)}
107
+
108
+
109
 
110
  gr.ChatInterface(
111
  fn=generate,