rtrm HF Staff nsarrazin HF Staff commited on
Commit
4ac7321
·
unverified ·
1 Parent(s): a8a9533

Kubernetes infra (#1088)

Browse files

* feat(chart): create kube infra

* feat(chart): create kube infra

* feat(chart): create kube infra

* feat(chart): create kube infra

* feat(chart): create kube infra

* feat(chart): create kube infra

* feat(chart): create kube infra

* feat(chart): create kube infra

* feat(chart): create kube infra

* feat(chart): create kube infra

* feat(chart): create kube infra

* feat(chart): create kube infra

* feat(chart): add hpa

* feat(chart): add service monitor

* feat(chart): test

* feat(chart): fix lint

* feat(chart): remove pm2

* feat(chart): fix

* feat(chart): fix

* feat(chart): fix

* feat(chart): fix

* feat(chart): fix

* feat(chart): fix

* feat(chart): fix

* feat(chart): fix

* feat(chart): fix

* feat(chart): fix

* feat(chart): fix

* feat(chart): fix

* feat(chart): fix

* feat(chart): fix

* feat(chart): fix

* update replicas count

* json logs

* use internal image for prod env

* Add LOG_LEVEL in prod yaml

* Get rid of unused staging env

* add an image build for internal registry

---------

Co-authored-by: Nathan Sarrazin <[email protected]>

.github/workflows/deploy-prod.yml ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Deploy to k8s
2
+ on:
3
+ # run this workflow manually from the Actions tab
4
+ workflow_dispatch:
5
+
6
+ jobs:
7
+ build-and-publish-huggingchat-image:
8
+ runs-on: ubuntu-latest
9
+ steps:
10
+ - name: Checkout
11
+ uses: actions/checkout@v4
12
+
13
+ - name: Tailscale
14
+ uses: huggingface/tailscale-action@main
15
+ with:
16
+ authkey: ${{ secrets.TAILSCALE_AUTHKEY }}
17
+
18
+ - name: Extract package version
19
+ id: package-version
20
+ run: |
21
+ VERSION=$(jq -r .version package.json)
22
+ echo "VERSION=$VERSION" >> $GITHUB_OUTPUT
23
+ MAJOR=$(echo $VERSION | cut -d '.' -f1)
24
+ echo "MAJOR=$MAJOR" >> $GITHUB_OUTPUT
25
+ MINOR=$(echo $VERSION | cut -d '.' -f1).$(echo $VERSION | cut -d '.' -f2)
26
+ echo "MINOR=$MINOR" >> $GITHUB_OUTPUT
27
+
28
+ - name: Docker metadata
29
+ id: meta
30
+ uses: docker/metadata-action@v5
31
+ with:
32
+ images: |
33
+ ghcr.io/huggingface/chat-ui
34
+ tags: |
35
+ type=raw,value=${{ steps.package-version.outputs.VERSION }},enable=${{github.event_name == 'release'}}
36
+ type=raw,value=${{ steps.package-version.outputs.MAJOR }},enable=${{github.event_name == 'release'}}
37
+ type=raw,value=${{ steps.package-version.outputs.MINOR }},enable=${{github.event_name == 'release'}}
38
+ type=raw,value=latest,enable={{is_default_branch}}
39
+ type=sha,enable={{is_default_branch}}
40
+
41
+ - name: Set up QEMU
42
+ uses: docker/setup-qemu-action@v3
43
+
44
+ - name: Set up Docker Buildx
45
+ uses: docker/setup-buildx-action@v3
46
+
47
+ - name: Login to Registry
48
+ uses: docker/login-action@v2
49
+ with:
50
+ registry: registry.internal.huggingface.tech
51
+ username: ${{ secrets.DOCKER_INTERNAL_USERNAME }}
52
+ password: ${{ secrets.DOCKER_INTERNAL_PASSWORD }}
53
+
54
+ - name: Build and Publish Docker Image without DB
55
+ uses: docker/build-push-action@v5
56
+ with:
57
+ context: .
58
+ file: Dockerfile
59
+ push: ${{ github.event_name != 'pull_request' }}
60
+ tags: ${{ steps.meta.outputs.tags }}
61
+ labels: ${{ steps.meta.outputs.labels }}
62
+ platforms: linux/amd64
63
+ cache-to: type=gha,mode=max,scope=amd64
64
+ cache-from: type=gha,scope=amd64
65
+ provenance: false
66
+ build-args: |
67
+ INCLUDE_DB=false
68
+ APP_BASE=/chat
69
+ PUBLIC_APP_COLOR=yellow
.github/workflows/deploy-release.yml CHANGED
@@ -1,4 +1,4 @@
1
- name: Deploy to production
2
  on:
3
  # run this workflow manually from the Actions tab
4
  workflow_dispatch:
 
1
+ name: Deploy to production spaces
2
  on:
3
  # run this workflow manually from the Actions tab
4
  workflow_dispatch:
.github/workflows/deploy-staging.yml DELETED
@@ -1,24 +0,0 @@
1
- name: Deploy to staging environment
2
- on:
3
- push:
4
- branches: [main]
5
-
6
- # to run this workflow manually from the Actions tab
7
- workflow_dispatch:
8
-
9
- jobs:
10
- sync-to-hub:
11
- runs-on: ubuntu-latest
12
- steps:
13
- - name: Check large files
14
- uses: ActionsDesk/[email protected]
15
- with:
16
- filesizelimit: 10485760 # this is 10MB so we can sync to HF Spaces
17
- - uses: actions/checkout@v3
18
- with:
19
- fetch-depth: 0
20
- lfs: true
21
- - name: Push to hub
22
- env:
23
- HF_DEPLOYMENT_TOKEN: ${{ secrets.HF_DEPLOYMENT_TOKEN }}
24
- run: git push https://nsarrazin:[email protected]/spaces/huggingchat/chat-ui-staging main
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.prettierignore CHANGED
@@ -3,6 +3,7 @@ node_modules
3
  /build
4
  /.svelte-kit
5
  /package
 
6
  .env
7
  .env.*
8
  !.env.example
 
3
  /build
4
  /.svelte-kit
5
  /package
6
+ /chart
7
  .env
8
  .env.*
9
  !.env.example
chart/Chart.yaml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ apiVersion: v2
2
+ name: chat-ui
3
+ version: 0.0.0-latest
4
+ type: application
5
+ icon: https://huggingface.co/front/assets/huggingface_logo-noborder.svg
chart/env/prod.yaml ADDED
@@ -0,0 +1,333 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ image:
2
+ repository: registry.internal.huggingface.tech/chat-ui
3
+ name: chat-ui
4
+
5
+ nodeSelector:
6
+ role-hub-utils: "true"
7
+
8
+ tolerations:
9
+ - key: CriticalAddonsOnly
10
+ operator: Equal
11
+
12
+ ingress:
13
+ path: "/chat"
14
+ annotations:
15
+ external-dns.alpha.kubernetes.io/hostname: "chat-ui.hub-alb.huggingface.tech"
16
+ alb.ingress.kubernetes.io/healthcheck-path: "/healthcheck"
17
+ alb.ingress.kubernetes.io/listen-ports: "[{\"HTTP\": 80}, {\"HTTPS\": 443}]"
18
+ alb.ingress.kubernetes.io/group.name: "hub-prod"
19
+ alb.ingress.kubernetes.io/scheme: "internet-facing"
20
+ alb.ingress.kubernetes.io/ssl-redirect: "443"
21
+ alb.ingress.kubernetes.io/tags: "Env=prod,Project=hub,Terraform=true"
22
+ alb.ingress.kubernetes.io/target-node-labels: "role-hub-utils=true"
23
+ kubernetes.io/ingress.class: "alb"
24
+
25
+ envVars:
26
+ ALTERNATIVE_REDIRECT_URLS: '["huggingchat://login/callback"]'
27
+ APP_BASE: "/chat"
28
+ ENABLE_ASSISTANTS: "true"
29
+ ENABLE_ASSISTANTS_RAG: "true"
30
+ EXPOSE_API: "true"
31
+ MESSAGES_BEFORE_LOGIN: 0
32
+ METRICS_PORT: 5565
33
+ LOG_LEVEL: "debug"
34
+ MODELS: >
35
+ [
36
+ {
37
+ "name" : "CohereForAI/c4ai-command-r-plus",
38
+ "tokenizer": "Xenova/c4ai-command-r-v01-tokenizer",
39
+ "description": "Command R+ is Cohere's latest LLM and is the first open weight model to beat GPT4 in the Chatbot Arena!",
40
+ "modelUrl": "https://huggingface.co/CohereForAI/c4ai-command-r-plus",
41
+ "websiteUrl": "https://docs.cohere.com/docs/command-r-plus",
42
+ "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/cohere-logo.png",
43
+ "parameters": {
44
+ "stop": ["<|END_OF_TURN_TOKEN|>"],
45
+ "truncate" : 28672,
46
+ "max_new_tokens" : 4096,
47
+ "temperature" : 0.3
48
+ },
49
+ "promptExamples" : [
50
+ {
51
+ "title": "Write an email from bullet list",
52
+ "prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: \n\n- Wine (x10)\n- Eggs (x24)\n- Bread (x12)"
53
+ }, {
54
+ "title": "Code a snake game",
55
+ "prompt": "Code a basic snake game in python, give explanations for each step."
56
+ }, {
57
+ "title": "Assist in a task",
58
+ "prompt": "How do I make a delicious lemon cheesecake?"
59
+ }
60
+ ]
61
+ },
62
+ {
63
+ "name" : "meta-llama/Meta-Llama-3-70B-Instruct",
64
+ "description": "Generation over generation, Meta Llama 3 demonstrates state-of-the-art performance on a wide range of industry benchmarks and offers new capabilities, including improved reasoning.",
65
+ "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/meta-logo.png",
66
+ "modelUrl": "https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct",
67
+ "websiteUrl": "https://llama.meta.com/llama3/",
68
+ "tokenizer" : "philschmid/meta-llama-3-tokenizer",
69
+ "promptExamples" : [
70
+ {
71
+ "title": "Write an email from bullet list",
72
+ "prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: \n\n- Wine (x10)\n- Eggs (x24)\n- Bread (x12)"
73
+ }, {
74
+ "title": "Code a snake game",
75
+ "prompt": "Code a basic snake game in python, give explanations for each step."
76
+ }, {
77
+ "title": "Assist in a task",
78
+ "prompt": "How do I make a delicious lemon cheesecake?"
79
+ }
80
+ ],
81
+ "parameters": {
82
+ "stop": ["<|eot_id|>"],
83
+ "truncate": 6144,
84
+ "max_new_tokens": 2047
85
+ }
86
+ },
87
+ {
88
+ "name" : "HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1",
89
+ "tokenizer": "HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1",
90
+ "description": "Zephyr 141B-A35B is a fine-tuned version of Mistral 8x22B, trained using ORPO, a novel alignment algorithm.",
91
+ "modelUrl": "https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1",
92
+ "websiteUrl": "https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1",
93
+ "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/zephyr-logo.png",
94
+ "parameters": {
95
+ "truncate" : 24576,
96
+ "max_new_tokens" : 8192,
97
+ },
98
+ "preprompt" : "You are Zephyr, an assistant developed by KAIST AI, Argilla, and Hugging Face. You should give concise responses to very simple questions, but provide thorough responses to more complex and open-ended questions. You are happy to help with writing, analysis, question answering, math, coding, and all sorts of other tasks.",
99
+ "promptExamples" : [
100
+ {
101
+ "title": "Write a poem",
102
+ "prompt": "Write a poem to help me remember the first 10 elements on the periodic table, giving each element its own line."
103
+ }, {
104
+ "title": "Code a snake game",
105
+ "prompt": "Code a basic snake game in python, give explanations for each step."
106
+ }, {
107
+ "title": "Assist in a task",
108
+ "prompt": "How do I make a delicious lemon cheesecake?"
109
+ }
110
+ ]
111
+ },
112
+ {
113
+ "name" : "mistralai/Mixtral-8x7B-Instruct-v0.1",
114
+ "description" : "The latest MoE model from Mistral AI! 8x7B and outperforms Llama 2 70B in most benchmarks.",
115
+ "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/mistral-logo.png",
116
+ "websiteUrl" : "https://mistral.ai/news/mixtral-of-experts/",
117
+ "modelUrl": "https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1",
118
+ "tokenizer": "mistralai/Mixtral-8x7B-Instruct-v0.1",
119
+ "preprompt" : "",
120
+ "chatPromptTemplate": "<s> {{#each messages}}{{#ifUser}}[INST]{{#if @first}}{{#if @root.preprompt}}{{@root.preprompt}}\n{{/if}}{{/if}} {{content}} [/INST]{{/ifUser}}{{#ifAssistant}} {{content}}</s> {{/ifAssistant}}{{/each}}",
121
+ "parameters" : {
122
+ "temperature" : 0.6,
123
+ "top_p" : 0.95,
124
+ "repetition_penalty" : 1.2,
125
+ "top_k" : 50,
126
+ "truncate" : 24576,
127
+ "max_new_tokens" : 8192,
128
+ "stop" : ["</s>"]
129
+ },
130
+ "promptExamples" : [
131
+ {
132
+ "title": "Write an email from bullet list",
133
+ "prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: \n\n- Wine (x10)\n- Eggs (x24)\n- Bread (x12)"
134
+ }, {
135
+ "title": "Code a snake game",
136
+ "prompt": "Code a basic snake game in python, give explanations for each step."
137
+ }, {
138
+ "title": "Assist in a task",
139
+ "prompt": "How do I make a delicious lemon cheesecake?"
140
+ }
141
+ ]
142
+ },
143
+ {
144
+ "name" : "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
145
+ "description" : "Nous Hermes 2 Mixtral 8x7B DPO is the new flagship Nous Research model trained over the Mixtral 8x7B MoE LLM.",
146
+ "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/nous-logo.png",
147
+ "websiteUrl" : "https://nousresearch.com/",
148
+ "modelUrl": "https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
149
+ "tokenizer": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
150
+ "chatPromptTemplate" : "{{#if @root.preprompt}}<|im_start|>system\n{{@root.preprompt}}<|im_end|>\n{{/if}}{{#each messages}}{{#ifUser}}<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n{{/ifUser}}{{#ifAssistant}}{{content}}<|im_end|>\n{{/ifAssistant}}{{/each}}",
151
+ "promptExamples": [
152
+ {
153
+ "title": "Write an email from bullet list",
154
+ "prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: \n\n- Wine (x10)\n- Eggs (x24)\n- Bread (x12)"
155
+ }, {
156
+ "title": "Code a snake game",
157
+ "prompt": "Code a basic snake game in python, give explanations for each step."
158
+ }, {
159
+ "title": "Assist in a task",
160
+ "prompt": "How do I make a delicious lemon cheesecake?"
161
+ }
162
+ ],
163
+ "parameters": {
164
+ "temperature": 0.7,
165
+ "top_p": 0.95,
166
+ "repetition_penalty": 1,
167
+ "top_k": 50,
168
+ "truncate": 24576,
169
+ "max_new_tokens": 2048,
170
+ "stop": ["<|im_end|>"]
171
+ }
172
+ },
173
+ {
174
+ "name" : "google/gemma-1.1-7b-it",
175
+ "description": "Gemma 7B 1.1 is the latest release in the Gemma family of lightweight models built by Google, trained using a novel RLHF method.",
176
+ "websiteUrl" : "https://blog.google/technology/developers/gemma-open-models/",
177
+ "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/google-logo.png",
178
+ "modelUrl": "https://huggingface.co/google/gemma-1.1-7b-it",
179
+ "preprompt": "",
180
+ "chatPromptTemplate" : "{{#each messages}}{{#ifUser}}<start_of_turn>user\n{{#if @first}}{{#if @root.preprompt}}{{@root.preprompt}}\n{{/if}}{{/if}}{{content}}<end_of_turn>\n<start_of_turn>model\n{{/ifUser}}{{#ifAssistant}}{{content}}<end_of_turn>\n{{/ifAssistant}}{{/each}}",
181
+ "promptExamples": [
182
+ {
183
+ "title": "Write an email from bullet list",
184
+ "prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: \n\n- Wine (x10)\n- Eggs (x24)\n- Bread (x12)"
185
+ }, {
186
+ "title": "Code a snake game",
187
+ "prompt": "Code a basic snake game in python, give explanations for each step."
188
+ }, {
189
+ "title": "Assist in a task",
190
+ "prompt": "How do I make a delicious lemon cheesecake?"
191
+ }
192
+ ],
193
+ "parameters": {
194
+ "do_sample": true,
195
+ "truncate": 7168,
196
+ "max_new_tokens": 1024,
197
+ "stop" : ["<end_of_turn>"]
198
+ }
199
+ },
200
+
201
+ {
202
+ "name": "mistralai/Mistral-7B-Instruct-v0.2",
203
+ "displayName": "mistralai/Mistral-7B-Instruct-v0.2",
204
+ "description": "Mistral 7B is a new Apache 2.0 model, released by Mistral AI that outperforms Llama2 13B in benchmarks.",
205
+ "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/mistral-logo.png",
206
+ "websiteUrl": "https://mistral.ai/news/announcing-mistral-7b/",
207
+ "modelUrl": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2",
208
+ "tokenizer": "mistralai/Mistral-7B-Instruct-v0.2",
209
+ "preprompt": "",
210
+ "chatPromptTemplate" : "<s>{{#each messages}}{{#ifUser}}[INST] {{#if @first}}{{#if @root.preprompt}}{{@root.preprompt}}\n{{/if}}{{/if}}{{content}} [/INST]{{/ifUser}}{{#ifAssistant}}{{content}}</s>{{/ifAssistant}}{{/each}}",
211
+ "parameters": {
212
+ "temperature": 0.3,
213
+ "top_p": 0.95,
214
+ "repetition_penalty": 1.2,
215
+ "top_k": 50,
216
+ "truncate": 3072,
217
+ "max_new_tokens": 1024,
218
+ "stop": ["</s>"]
219
+ },
220
+ "promptExamples": [
221
+ {
222
+ "title": "Write an email from bullet list",
223
+ "prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: \n\n- Wine (x10)\n- Eggs (x24)\n- Bread (x12)"
224
+ }, {
225
+ "title": "Code a snake game",
226
+ "prompt": "Code a basic snake game in python, give explanations for each step."
227
+ }, {
228
+ "title": "Assist in a task",
229
+ "prompt": "How do I make a delicious lemon cheesecake?"
230
+ }
231
+ ]
232
+ },
233
+ {
234
+ "name": "microsoft/Phi-3-mini-4k-instruct",
235
+ "tokenizer": "microsoft/Phi-3-mini-4k-instruct",
236
+ "description" : "Phi-3 Mini-4K-Instruct is a 3.8B parameters, lightweight, state-of-the-art open model built upon datasets used for Phi-2.",
237
+ "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/microsoft-logo.png",
238
+ "modelUrl": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct",
239
+ "websiteUrl": "https://azure.microsoft.com/en-us/blog/introducing-phi-3-redefining-whats-possible-with-slms/",
240
+ "preprompt": "",
241
+ "chatPromptTemplate": "<s>{{preprompt}}{{#each messages}}{{#ifUser}}<|user|>\n{{content}}<|end|>\n<|assistant|>\n{{/ifUser}}{{#ifAssistant}}{{content}}<|end|>\n{{/ifAssistant}}{{/each}}",
242
+ "parameters": {
243
+ "stop": ["<|end|>", "<|endoftext|>", "<|assistant|>"],
244
+ "max_new_tokens": 1024,
245
+ "truncate": 3071
246
+ },
247
+ "promptExamples": [
248
+ {
249
+ "title": "Write an email from bullet list",
250
+ "prompt": "As a restaurant owner, write a professional email to the supplier to get these products every week: \n\n- Wine (x10)\n- Eggs (x24)\n- Bread (x12)"
251
+ }, {
252
+ "title": "Code a snake game",
253
+ "prompt": "Code a basic snake game in python, give explanations for each step."
254
+ }, {
255
+ "title": "Assist in a task",
256
+ "prompt": "How do I make a delicious lemon cheesecake?"
257
+ }
258
+ ]
259
+ },
260
+ {
261
+ "name": "meta-llama/Meta-Llama-3-8B-Instruct",
262
+ "tokenizer" : "philschmid/meta-llama-3-tokenizer",
263
+ "parameters": {
264
+ "temperature": 0.1,
265
+ "stop": ["<|eot_id|>"],
266
+ },
267
+ "unlisted": true
268
+ }
269
+ ]
270
+ NODE_ENV: "prod"
271
+ NODE_LOG_STRUCTURED_DATA: true
272
+ OLD_MODELS: >
273
+ [
274
+ { "name": "bigcode/starcoder" },
275
+ { "name": "OpenAssistant/oasst-sft-6-llama-30b-xor" },
276
+ { "name": "HuggingFaceH4/zephyr-7b-alpha" },
277
+ { "name": "openchat/openchat_3.5" },
278
+ { "name": "openchat/openchat-3.5-1210" },
279
+ { "name": "tiiuae/falcon-180B-chat" },
280
+ { "name": "codellama/CodeLlama-34b-Instruct-hf" },
281
+ { "name": "google/gemma-7b-it" },
282
+ { "name": "meta-llama/Llama-2-70b-chat-hf" },
283
+ { "name": "codellama/CodeLlama-70b-Instruct-hf" },
284
+ { "name": "openchat/openchat-3.5-0106" }
285
+ ]
286
+ PUBLIC_ORIGIN: "https://huggingface.co"
287
+ PUBLIC_SHARE_PREFIX: "https://hf.co/chat"
288
+ PUBLIC_ANNOUNCEMENT_BANNERS: "[]"
289
+ PUBLIC_APP_NAME: "HuggingChat"
290
+ PUBLIC_APP_ASSETS: "huggingchat"
291
+ PUBLIC_APP_COLOR: "yellow"
292
+ PUBLIC_APP_DESCRIPTION: "Making the community's best AI chat models available to everyone."
293
+ PUBLIC_APP_DISCLAIMER_MESSAGE: "Disclaimer: AI is an area of active research with known problems such as biased generation and misinformation. Do not use this application for high-stakes decisions or advice."
294
+ PUBLIC_APP_DATA_SHARING: 0
295
+ PUBLIC_APP_DISCLAIMER: 1
296
+ PUBLIC_PLAUSIBLE_SCRIPT_URL: "/js/script.js"
297
+ PUBLIC_APPLE_APP_ID: "6476778843"
298
+ REQUIRE_FEATURED_ASSISTANTS: "true"
299
+ TASK_MODEL: "meta-llama/Meta-Llama-3-8B-Instruct"
300
+ TEXT_EMBEDDING_MODELS: >
301
+ [{
302
+ "name": "bge-base-en-v1-5-sxa",
303
+ "displayName": "bge-base-en-v1-5-sxa",
304
+ "chunkCharLength": 512,
305
+ "endpoints": [{
306
+ "type": "tei",
307
+ "url": "https://huggingchat-tei.hf.space/"
308
+ }]
309
+ }]
310
+ WEBSEARCH_BLOCKLIST: '["youtube.com", "twitter.com"]'
311
+
312
+ externalSecrets:
313
+ enabled: true
314
+ secretStoreName: "chat-ui-prod-secretstore"
315
+ secretName: "chat-ui-prod-secrets"
316
+ parameters:
317
+ MONGODB_URL: "hub-prod-chat-ui-mongodb-url"
318
+ OPENID_CONFIG: "hub-prod-chat-ui-openid-config"
319
+ SERPER_API_KEY: "hub-prod-chat-ui-serper-api-key"
320
+ HF_TOKEN: "hub-prod-chat-ui-hf-token"
321
+ WEBHOOK_URL_REPORT_ASSISTANT: "hub-prod-chat-ui-webhook-report-assistant"
322
+ ADMIN_API_SECRET: "hub-prod-chat-ui-admin-api-secret"
323
+ USAGE_LIMITS: "hub-prod-chat-ui-usage-limits"
324
+
325
+ autoscaling:
326
+ enabled: true
327
+ minReplicas: 6
328
+ maxReplicas: 30
329
+ targetMemoryUtilizationPercentage: "70"
330
+ targetCPUUtilizationPercentage: "70"
331
+
332
+ monitoring:
333
+ enabled: true
chart/templates/_helpers.tpl ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{- define "name" -}}
2
+ {{- default $.Release.Name | trunc 63 | trimSuffix "-" -}}
3
+ {{- end -}}
4
+
5
+ {{- define "app.name" -}}
6
+ chat-ui
7
+ {{- end -}}
8
+
9
+ {{- define "labels.standard" -}}
10
+ release: {{ $.Release.Name | quote }}
11
+ heritage: {{ $.Release.Service | quote }}
12
+ chart: "{{ include "name" . }}"
13
+ app: "{{ include "app.name" . }}"
14
+ {{- end -}}
15
+
16
+ {{- define "labels.resolver" -}}
17
+ release: {{ $.Release.Name | quote }}
18
+ heritage: {{ $.Release.Service | quote }}
19
+ chart: "{{ include "name" . }}"
20
+ app: "{{ include "app.name" . }}-resolver"
21
+ {{- end -}}
22
+
chart/templates/config.yaml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ apiVersion: v1
2
+ kind: ConfigMap
3
+ metadata:
4
+ labels: {{ include "labels.standard" . | nindent 4 }}
5
+ name: {{ include "name" . }}
6
+ namespace: {{ .Release.Namespace }}
7
+ data:
8
+ {{- range $key, $value := $.Values.envVars }}
9
+ {{ $key }}: {{ $value | quote }}
10
+ {{- end }}
chart/templates/deployment.yaml ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ apiVersion: apps/v1
2
+ kind: Deployment
3
+ metadata:
4
+ labels: {{ include "labels.standard" . | nindent 4 }}
5
+ name: {{ include "name" . }}
6
+ namespace: {{ .Release.Namespace }}
7
+ spec:
8
+ progressDeadlineSeconds: 600
9
+ {{- if not $.Values.autoscaling.enabled }}
10
+ replicas: {{ .Values.replicas }}
11
+ {{- end }}
12
+ revisionHistoryLimit: 10
13
+ selector:
14
+ matchLabels: {{ include "labels.standard" . | nindent 6 }}
15
+ strategy:
16
+ rollingUpdate:
17
+ maxSurge: 25%
18
+ maxUnavailable: 25%
19
+ type: RollingUpdate
20
+ template:
21
+ metadata:
22
+ labels: {{ include "labels.standard" . | nindent 8 }}
23
+ {{- if $.Values.envVars.NODE_LOG_STRUCTURED_DATA }}
24
+ annotations:
25
+ co.elastic.logs/json.expand_keys: "true"
26
+ {{- end }}
27
+ spec:
28
+ containers:
29
+ - name: chat-ui
30
+ image: "{{ .Values.image.repository }}/{{ .Values.image.name }}:{{ .Values.image.tag }}"
31
+ imagePullPolicy: {{ .Values.image.pullPolicy }}
32
+ readinessProbe:
33
+ failureThreshold: 30
34
+ periodSeconds: 10
35
+ httpGet:
36
+ path: {{ $.Values.envVars.APP_BASE | default "" }}/healthcheck
37
+ port: {{ $.Values.envVars.APP_PORT | default 3000 | int }}
38
+ livenessProbe:
39
+ failureThreshold: 30
40
+ periodSeconds: 10
41
+ httpGet:
42
+ path: {{ $.Values.envVars.APP_BASE | default "" }}/healthcheck
43
+ port: {{ $.Values.envVars.APP_PORT | default 3000 | int }}
44
+ ports:
45
+ - containerPort: {{ $.Values.envVars.APP_PORT | default 3000 | int }}
46
+ name: http
47
+ protocol: TCP
48
+ {{- if $.Values.monitoring.enabled }}
49
+ - containerPort: {{ $.Values.envVars.METRICS_PORT | default 5565 | int }}
50
+ name: metrics
51
+ protocol: TCP
52
+ {{- end }}
53
+ resources: {{ toYaml .Values.resources | nindent 12 }}
54
+ envFrom:
55
+ - configMapRef:
56
+ name: {{ include "name" . }}
57
+ {{- if $.Values.externalSecrets.enabled }}
58
+ - secretRef:
59
+ name: {{ $.Values.externalSecrets.secretName }}
60
+ {{- end }}
61
+ nodeSelector: {{ toYaml .Values.nodeSelector | nindent 8 }}
62
+ tolerations: {{ toYaml .Values.tolerations | nindent 8 }}
63
+ volumes:
64
+ - name: config
65
+ configMap:
66
+ name: {{ include "name" . }}
chart/templates/hpa.yaml ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{- if $.Values.autoscaling.enabled }}
2
+ apiVersion: autoscaling/v2
3
+ kind: HorizontalPodAutoscaler
4
+ metadata:
5
+ labels: {{ include "labels.standard" . | nindent 4 }}
6
+ name: {{ include "name" . }}
7
+ namespace: {{ .Release.Namespace }}
8
+ spec:
9
+ scaleTargetRef:
10
+ apiVersion: apps/v1
11
+ kind: Deployment
12
+ name: {{ include "name" . }}
13
+ minReplicas: {{ $.Values.autoscaling.minReplicas }}
14
+ maxReplicas: {{ $.Values.autoscaling.maxReplicas }}
15
+ metrics:
16
+ {{- if ne "" $.Values.autoscaling.targetMemoryUtilizationPercentage }}
17
+ - type: Resource
18
+ resource:
19
+ name: memory
20
+ target:
21
+ type: Utilization
22
+ averageUtilization: {{ $.Values.autoscaling.targetMemoryUtilizationPercentage | int }}
23
+ {{- end }}
24
+ {{- if ne "" $.Values.autoscaling.targetCPUUtilizationPercentage }}
25
+ - type: Resource
26
+ resource:
27
+ name: cpu
28
+ target:
29
+ type: Utilization
30
+ averageUtilization: {{ $.Values.autoscaling.targetCPUUtilizationPercentage | int }}
31
+ {{- end }}
32
+ behavior:
33
+ scaleDown:
34
+ stabilizationWindowSeconds: 600
35
+ policies:
36
+ - type: Percent
37
+ value: 10
38
+ periodSeconds: 60
39
+ scaleUp:
40
+ stabilizationWindowSeconds: 0
41
+ policies:
42
+ - type: Pods
43
+ value: 1
44
+ periodSeconds: 30
45
+ {{- end }}
chart/templates/ingress.yaml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ apiVersion: networking.k8s.io/v1
2
+ kind: Ingress
3
+ metadata:
4
+ annotations: {{ toYaml .Values.ingress.annotations | nindent 4 }}
5
+ labels: {{ include "labels.standard" . | nindent 4 }}
6
+ name: {{ include "name" . }}
7
+ namespace: {{ .Release.Namespace }}
8
+ spec:
9
+ rules:
10
+ - host: {{ .Values.domain }}
11
+ http:
12
+ paths:
13
+ - backend:
14
+ service:
15
+ name: {{ include "name" . }}
16
+ port:
17
+ name: http
18
+ path: {{ $.Values.ingress.path | default "/" }}
19
+ pathType: Prefix
chart/templates/secrets.yaml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{- if .Values.externalSecrets.enabled }}
2
+ apiVersion: "external-secrets.io/v1beta1"
3
+ kind: ExternalSecret
4
+ metadata:
5
+ labels: {{ include "labels.standard" . | nindent 4 }}
6
+ name: {{ include "name" $ }}-external-secret
7
+ namespace: {{ $.Release.Namespace }}
8
+ spec:
9
+ refreshInterval: 1h
10
+ secretStoreRef:
11
+ name: {{ .Values.externalSecrets.secretStoreName }}
12
+ kind: SecretStore
13
+ target:
14
+ name: {{ .Values.externalSecrets.secretName }}
15
+ data:
16
+ {{- range $key, $value := .Values.externalSecrets.parameters }}
17
+ - secretKey: {{ $key | quote }}
18
+ remoteRef:
19
+ key: {{ $value | quote }}
20
+ {{- end }}
21
+ {{- end }}
chart/templates/service-monitor.yaml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{- if $.Values.monitoring.enabled }}
2
+ apiVersion: monitoring.coreos.com/v1
3
+ kind: ServiceMonitor
4
+ metadata:
5
+ labels: {{ include "labels.standard" . | nindent 4 }}
6
+ name: {{ include "name" . }}
7
+ namespace: {{ .Release.Namespace }}
8
+ spec:
9
+ selector:
10
+ matchLabels: {{ include "labels.standard" . | nindent 6 }}
11
+ endpoints:
12
+ - port: metrics
13
+ path: /metrics
14
+ interval: 15s
15
+ {{- end }}
chart/templates/service.yaml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ apiVersion: v1
2
+ kind: Service
3
+ metadata:
4
+ name: "{{ include "name" . }}"
5
+ annotations: {{ toYaml .Values.service.annotations | nindent 4 }}
6
+ namespace: {{ .Release.Namespace }}
7
+ labels: {{ include "labels.standard" . | nindent 4 }}
8
+ spec:
9
+ ports:
10
+ - name: http
11
+ port: 80
12
+ protocol: TCP
13
+ targetPort: http
14
+ {{- if $.Values.monitoring.enabled }}
15
+ - name: metrics
16
+ port: 5565
17
+ protocol: TCP
18
+ targetPort: http
19
+ {{- end }}
20
+ selector: {{ include "labels.standard" . | nindent 4 }}
21
+ type: {{.Values.service.type}}
chart/values.yaml ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ image:
2
+ repository: ghcr.io/huggingface
3
+ name: chat-ui
4
+ tag: 0.0.0-latest
5
+ pullPolicy: IfNotPresent
6
+
7
+ replicas: 3
8
+
9
+ domain: huggingface.co
10
+
11
+ service:
12
+ type: NodePort
13
+ annotations: { }
14
+
15
+ ingress:
16
+ path: "/"
17
+ annotations: { }
18
+
19
+ resources:
20
+ requests:
21
+ cpu: 1
22
+ memory: 8Gi
23
+ limits:
24
+ cpu: 1
25
+ memory: 8Gi
26
+ nodeSelector: {}
27
+ tolerations: []
28
+
29
+ envVars: { }
30
+ externalSecrets:
31
+ enabled: false
32
+ secretStoreName: ""
33
+ secretName: ""
34
+ parameters: { }
35
+
36
+ autoscaling:
37
+ enabled: false
38
+ minReplicas: 1
39
+ maxReplicas: 2
40
+ targetMemoryUtilizationPercentage: ""
41
+ targetCPUUtilizationPercentage: ""
42
+
43
+ monitoring:
44
+ enabled: false