Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -24,6 +24,11 @@ try:
|
|
24 |
max_length: int = 1024
|
25 |
temperature: float = 0.7
|
26 |
|
|
|
|
|
|
|
|
|
|
|
27 |
class GenerateResponse(BaseModel):
|
28 |
generated_text: str
|
29 |
|
@@ -38,14 +43,17 @@ try:
|
|
38 |
formatted_prompt += f"<|system|>\n{request.system_prompt}</s>\n"
|
39 |
for message in request.messages:
|
40 |
if message.role == "system":
|
41 |
-
formatted_prompt += f"
|
42 |
elif message.role == "user":
|
43 |
-
formatted_prompt += f"
|
44 |
elif message.role == "assistant":
|
45 |
-
formatted_prompt += f"
|
46 |
|
47 |
# Add final assistant prefix for generation
|
48 |
-
formatted_prompt += "
|
|
|
|
|
|
|
49 |
|
50 |
output = pipe(
|
51 |
formatted_prompt,
|
@@ -55,14 +63,26 @@ try:
|
|
55 |
)[0]['generated_text']
|
56 |
|
57 |
# Extract only the newly generated assistant response
|
58 |
-
response_text = output.split("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
|
60 |
-
return GenerateResponse(generated_text=response_text)
|
|
|
|
|
61 |
#try:
|
62 |
#output = pipe(request.prompt)[0]['generated_text']
|
63 |
#return GenerateResponse(generated_text=output)
|
64 |
#except Exception as e:
|
65 |
-
#
|
66 |
|
67 |
except Exception as e:
|
68 |
print(f"Error: {e}")
|
|
|
24 |
max_length: int = 1024
|
25 |
temperature: float = 0.7
|
26 |
|
27 |
+
class TokenUsage(BaseModel):
|
28 |
+
prompt_tokens: int
|
29 |
+
completion_tokens: int
|
30 |
+
total_tokens: int
|
31 |
+
|
32 |
class GenerateResponse(BaseModel):
|
33 |
generated_text: str
|
34 |
|
|
|
43 |
formatted_prompt += f"<|system|>\n{request.system_prompt}</s>\n"
|
44 |
for message in request.messages:
|
45 |
if message.role == "system":
|
46 |
+
formatted_prompt += f"<system>\n{message.content}\n</system>\n"
|
47 |
elif message.role == "user":
|
48 |
+
formatted_prompt += f"<user>\n{message.content}\n</user>\n"
|
49 |
elif message.role == "assistant":
|
50 |
+
formatted_prompt += f"<assistant>\n{message.content}\n</assistant>\n"
|
51 |
|
52 |
# Add final assistant prefix for generation
|
53 |
+
formatted_prompt += "<assistant>\n"
|
54 |
+
|
55 |
+
# Count tokens in the prompt
|
56 |
+
prompt_tokens = len(tokenizer.encode(formatted_prompt))
|
57 |
|
58 |
output = pipe(
|
59 |
formatted_prompt,
|
|
|
63 |
)[0]['generated_text']
|
64 |
|
65 |
# Extract only the newly generated assistant response
|
66 |
+
response_text = output.split("<assistant>\n")[-1].split("</assistant>")[0]
|
67 |
+
|
68 |
+
# Count tokens in the completion
|
69 |
+
full_output_tokens = len(tokenizer.encode(output))
|
70 |
+
completion_tokens = full_output_tokens - prompt_tokens
|
71 |
+
|
72 |
+
usage = TokenUsage(
|
73 |
+
prompt_tokens=prompt_tokens,
|
74 |
+
completion_tokens=completion_tokens,
|
75 |
+
total_tokens=prompt_tokens + completion_tokens
|
76 |
+
)
|
77 |
|
78 |
+
return GenerateResponse(generated_text=response_text,usage=usage)
|
79 |
+
except Exception as e:
|
80 |
+
raise HTTPException(status_code=500, detail=str(e))
|
81 |
#try:
|
82 |
#output = pipe(request.prompt)[0]['generated_text']
|
83 |
#return GenerateResponse(generated_text=output)
|
84 |
#except Exception as e:
|
85 |
+
#
|
86 |
|
87 |
except Exception as e:
|
88 |
print(f"Error: {e}")
|