garrethlee commited on
Commit
065f5a8
·
verified ·
1 Parent(s): b55a3eb

Add gemma model

Browse files
Files changed (1) hide show
  1. app.py +21 -16
app.py CHANGED
@@ -5,7 +5,20 @@ from huggingface_hub import InferenceClient
5
  """
6
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
7
  """
8
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  @spaces.GPU
11
  def respond(
@@ -16,7 +29,6 @@ def respond(
16
  temperature,
17
  top_p,
18
  ):
19
- messages = [{"role": "system", "content": system_message}]
20
 
21
  for val in history:
22
  if val[0]:
@@ -26,20 +38,13 @@ def respond(
26
 
27
  messages.append({"role": "user", "content": message})
28
 
29
- response = ""
30
-
31
- for message in client.chat_completion(
32
  messages,
33
- max_tokens=max_tokens,
34
- stream=True,
35
- temperature=temperature,
36
- top_p=top_p,
37
- ):
38
- token = message.choices[0].delta.content
39
-
40
- response += token
41
- yield response
42
-
43
 
44
  """
45
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
@@ -47,7 +52,7 @@ For information on how to customize the ChatInterface, peruse the gradio docs: h
47
  demo = gr.ChatInterface(
48
  respond,
49
  additional_inputs=[
50
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
51
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
52
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
53
  gr.Slider(
 
5
  """
6
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
7
  """
8
+
9
+ model_id = "GoToCompany/gemma2-9b-cpt-sahabatai-v1-instruct"
10
+
11
+ pipeline = transformers.pipeline(
12
+ "text-generation",
13
+ model=model_id,
14
+ model_kwargs={"torch_dtype": torch.bfloat16},
15
+ device_map="auto",
16
+ )
17
+
18
+ terminators = [
19
+ pipeline.tokenizer.eos_token_id,
20
+ pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
21
+ ]
22
 
23
  @spaces.GPU
24
  def respond(
 
29
  temperature,
30
  top_p,
31
  ):
 
32
 
33
  for val in history:
34
  if val[0]:
 
38
 
39
  messages.append({"role": "user", "content": message})
40
 
41
+ outputs = pipeline(
 
 
42
  messages,
43
+ max_new_tokens=256,
44
+ eos_token_id=terminators,
45
+ )
46
+
47
+ yield outputs[0]["generated_text"][-1]
 
 
 
 
 
48
 
49
  """
50
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 
52
  demo = gr.ChatInterface(
53
  respond,
54
  additional_inputs=[
55
+ gr.Textbox(value="Kamu adalah seorang asisten yang baik", label="System message"),
56
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
57
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
58
  gr.Slider(