petertill commited on
Commit
12e43e4
·
verified ·
1 Parent(s): 3e90d44

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -7
app.py CHANGED
@@ -24,6 +24,11 @@ try:
24
  max_length: int = 1024
25
  temperature: float = 0.7
26
 
 
 
 
 
 
27
  class GenerateResponse(BaseModel):
28
  generated_text: str
29
 
@@ -38,14 +43,17 @@ try:
38
  formatted_prompt += f"<|system|>\n{request.system_prompt}</s>\n"
39
  for message in request.messages:
40
  if message.role == "system":
41
- formatted_prompt += f"<|system|>\n{message.content}</s>\n"
42
  elif message.role == "user":
43
- formatted_prompt += f"<|user|>\n{message.content}</s>\n"
44
  elif message.role == "assistant":
45
- formatted_prompt += f"<|assistant|>\n{message.content}</s>\n"
46
 
47
  # Add final assistant prefix for generation
48
- formatted_prompt += "<|assistant|>\n"
 
 
 
49
 
50
  output = pipe(
51
  formatted_prompt,
@@ -55,14 +63,26 @@ try:
55
  )[0]['generated_text']
56
 
57
  # Extract only the newly generated assistant response
58
- response_text = output.split("<|assistant|>\n")[-1].split("</s>")[0]
 
 
 
 
 
 
 
 
 
 
59
 
60
- return GenerateResponse(generated_text=response_text)
 
 
61
  #try:
62
  #output = pipe(request.prompt)[0]['generated_text']
63
  #return GenerateResponse(generated_text=output)
64
  #except Exception as e:
65
- #raise HTTPException(status_code=500, detail=str(e))
66
 
67
  except Exception as e:
68
  print(f"Error: {e}")
 
24
  max_length: int = 1024
25
  temperature: float = 0.7
26
 
27
+ class TokenUsage(BaseModel):
28
+ prompt_tokens: int
29
+ completion_tokens: int
30
+ total_tokens: int
31
+
32
  class GenerateResponse(BaseModel):
33
  generated_text: str
34
 
 
43
  formatted_prompt += f"<|system|>\n{request.system_prompt}</s>\n"
44
  for message in request.messages:
45
  if message.role == "system":
46
+ formatted_prompt += f"<system>\n{message.content}\n</system>\n"
47
  elif message.role == "user":
48
+ formatted_prompt += f"<user>\n{message.content}\n</user>\n"
49
  elif message.role == "assistant":
50
+ formatted_prompt += f"<assistant>\n{message.content}\n</assistant>\n"
51
 
52
  # Add final assistant prefix for generation
53
+ formatted_prompt += "<assistant>\n"
54
+
55
+ # Count tokens in the prompt
56
+ prompt_tokens = len(tokenizer.encode(formatted_prompt))
57
 
58
  output = pipe(
59
  formatted_prompt,
 
63
  )[0]['generated_text']
64
 
65
  # Extract only the newly generated assistant response
66
+ response_text = output.split("<assistant>\n")[-1].split("</assistant>")[0]
67
+
68
+ # Count tokens in the completion
69
+ full_output_tokens = len(tokenizer.encode(output))
70
+ completion_tokens = full_output_tokens - prompt_tokens
71
+
72
+ usage = TokenUsage(
73
+ prompt_tokens=prompt_tokens,
74
+ completion_tokens=completion_tokens,
75
+ total_tokens=prompt_tokens + completion_tokens
76
+ )
77
 
78
+ return GenerateResponse(generated_text=response_text,usage=usage)
79
+ except Exception as e:
80
+ raise HTTPException(status_code=500, detail=str(e))
81
  #try:
82
  #output = pipe(request.prompt)[0]['generated_text']
83
  #return GenerateResponse(generated_text=output)
84
  #except Exception as e:
85
+ #
86
 
87
  except Exception as e:
88
  print(f"Error: {e}")