Spaces:
Runtime error
Runtime error
File size: 6,232 Bytes
c194fcd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
from gradio.components import Component
import gradio as gr
import uuid
from abc import ABC, abstractclassmethod
class BaseTCOModel(ABC):
# TO DO: Find way to specify which component should be used for computing cost
def __setattr__(self, name, value):
if isinstance(value, Component):
self._components.append(value)
self.__dict__[name] = value
def __init__(self):
super(BaseTCOModel, self).__setattr__("_components", [])
def get_components(self) -> list[Component]:
return self._components
def get_name(self):
return self.name
@abstractclassmethod
def compute_cost_per_token(self):
pass
@abstractclassmethod
def render(self):
pass
def set_name(self, name):
self.name = name
self.id = name + str(uuid.uuid4())
class OpenAIModel(BaseTCOModel):
def __init__(self):
self.set_name("(SaaS) OpenAI")
super().__init__()
def render(self):
def on_model_change(model):
if model == "GPT-4":
print("GPT4")
return gr.Dropdown.update(choices=["8K", "32K"])
else:
print("GPT3.5")
return gr.Dropdown.update(choices=["4K", "16K"])
self.model = gr.Dropdown(["GPT-4", "GPT-3.5 Turbo"], value="GPT-4",
label="OpenAI model",
interactive=True, visible=False)
self.context_length = gr.Dropdown(["8K", "32K"], value="8K", interactive=True,
label="Context size",
visible=False)
self.model.change(on_model_change, inputs=self.model, outputs=self.context_length)
self.input_length = gr.Number(350, label="Average number of input tokens",
interactive=True, visible=False)
def compute_cost_per_token(self, model, context_length, input_length):
"""Cost per token = """
model = model[0]
context_length = context_length[0]
if model == "GPT-4" and context_length == "8K":
cost_per_1k_input_tokens = 0.03
elif model == "GPT-4" and context_length == "32K":
cost_per_1k_input_tokens = 0.06
elif model == "GPT-3.5" and context_length == "4K":
cost_per_1k_input_tokens = 0.0015
else:
cost_per_1k_input_tokens = 0.003
cost_per_output_token = cost_per_1k_input_tokens * input_length / 1000
return cost_per_output_token
class OpenSourceModel(BaseTCOModel):
def __init__(self):
self.set_name("(Open source) Deploy yourself")
super().__init__()
def render(self):
vm_choices = ["1x Nvidia A100 (Azure NC24ads A100 v4)",
"2x Nvidia A100 (Azure NC48ads A100 v4)"]
def on_model_change(model):
if model == "Llama 2 7B":
return gr.Dropdown.update(choices=vm_choices)
else:
not_supported_vm = ["1x Nvidia A100 (Azure NC24ads A100 v4)"]
choices = [x for x in vm_choices if x not in not_supported_vm]
return gr.Dropdown.update(choices=choices)
def on_vm_change(model, vm):
# TO DO: load info from CSV
if model == "Llama 2 7B" and vm == "1x Nvidia A100 (Azure NC24ads A100 v4)":
return gr.Number.update(value=900)
elif model == "Llama 2 7B" and vm == "2x Nvidia A100 (Azure NC48ads A100 v4)":
return gr.Number.update(value=1800)
self.model = gr.Dropdown(["Llama 2 7B", "Llama 2 70B"], value="Llama 2 7B", visible=False)
self.vm = gr.Dropdown(vm_choices,
visible=False,
label="Instance of VM with GPU"
)
self.tokens_per_second = gr.Number(visible=False,
label="Number of tokens per second for this specific model and VM instance",
interactive=False
)
self.input_length = gr.Number(350, label="Average number of input tokens",
interactive=True, visible=False)
self.model.change(on_model_change, inputs=self.model, outputs=self.vm)
self.vm.change(on_vm_change, inputs=[self.model, self.vm], outputs=self.tokens_per_second)
self.maxed_out = gr.Slider(minimum=1, value=80, label="% maxed out",
info="How much the GPU is fully used.",
interactive=True,
visible=False)
def compute_cost_per_token(self, tokens_per_second, maxed_out):
return maxed_out
class ModelPage:
def __init__(self, Models: BaseTCOModel):
self.models: list[BaseTCOModel] = []
for Model in Models:
model = Model()
self.models.append(model)
def render(self):
for model in self.models:
model.render()
def get_all_components(self) -> list[Component]:
output = []
for model in self.models:
output += model.get_components()
return output
def make_model_visible(self, name:str):
# First decide which indexes
output = []
for model in self.models:
if model.get_name() == name:
output+= [gr.update(visible=True)] * len(model.get_components())
else:
output+= [gr.update(visible=False)] * len(model.get_components())
return output
def compute_cost_per_token(self, *args):
begin=0
current_model = args[-1]
for model in self.models:
model_n_args = len(model.get_components())
model_args = args[begin:begin+model_n_args]
if current_model == model.get_name():
model_tco = model.compute_cost_per_token(*model_args)
return f"Model {current_model} has TCO {model_tco}"
begin = begin+model_n_args
|