Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
feat(chart): use inference proxy (#1688)
Browse files* feat(chart): use inference proxy
* fix: also use `HF_API_ROOT` for embedding endpoints
chart/env/prod.yaml
CHANGED
@@ -159,7 +159,7 @@ envVars:
|
|
159 |
"endpoints": [
|
160 |
{
|
161 |
"type": "openai",
|
162 |
-
"baseURL": "https://api-inference.huggingface.
|
163 |
}
|
164 |
]
|
165 |
},
|
@@ -193,7 +193,7 @@ envVars:
|
|
193 |
"endpoints": [
|
194 |
{
|
195 |
"type": "openai",
|
196 |
-
"baseURL": "https://api-inference.huggingface.
|
197 |
}
|
198 |
]
|
199 |
},
|
@@ -261,7 +261,7 @@ envVars:
|
|
261 |
"endpoints": [
|
262 |
{
|
263 |
"type": "openai",
|
264 |
-
"baseURL": "https://api-inference.huggingface.
|
265 |
}
|
266 |
]
|
267 |
},
|
@@ -280,7 +280,7 @@ envVars:
|
|
280 |
"endpoints": [
|
281 |
{
|
282 |
"type": "openai",
|
283 |
-
"baseURL": "https://api-inference.huggingface.
|
284 |
"multimodal": {
|
285 |
"image": {
|
286 |
"maxSizeInMB": 10,
|
@@ -597,7 +597,7 @@ envVars:
|
|
597 |
]
|
598 |
HF_ORG_ADMIN: '644171cfbd0c97265298aa99'
|
599 |
HF_ORG_EARLY_ACCESS: '5e67bd5b1009063689407478'
|
600 |
-
|
601 |
infisical:
|
602 |
enabled: true
|
603 |
env: "prod-us-east-1"
|
|
|
159 |
"endpoints": [
|
160 |
{
|
161 |
"type": "openai",
|
162 |
+
"baseURL": "https://proxy.serverless.api-inference.huggingface.tech/models/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/v1"
|
163 |
}
|
164 |
]
|
165 |
},
|
|
|
193 |
"endpoints": [
|
194 |
{
|
195 |
"type": "openai",
|
196 |
+
"baseURL": "https://proxy.serverless.api-inference.huggingface.tech/models/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF/v1"
|
197 |
}
|
198 |
]
|
199 |
},
|
|
|
261 |
"endpoints": [
|
262 |
{
|
263 |
"type": "openai",
|
264 |
+
"baseURL": "https://proxy.serverless.api-inference.huggingface.tech/models/Qwen/Qwen2.5-Coder-32B-Instruct/v1"
|
265 |
}
|
266 |
]
|
267 |
},
|
|
|
280 |
"endpoints": [
|
281 |
{
|
282 |
"type": "openai",
|
283 |
+
"baseURL": "https://proxy.serverless.api-inference.huggingface.tech/models/meta-llama/Llama-3.2-11B-Vision-Instruct/v1",
|
284 |
"multimodal": {
|
285 |
"image": {
|
286 |
"maxSizeInMB": 10,
|
|
|
597 |
]
|
598 |
HF_ORG_ADMIN: '644171cfbd0c97265298aa99'
|
599 |
HF_ORG_EARLY_ACCESS: '5e67bd5b1009063689407478'
|
600 |
+
HF_API_ROOT: 'https://proxy.serverless.api-inference.huggingface.tech/models'
|
601 |
infisical:
|
602 |
enabled: true
|
603 |
env: "prod-us-east-1"
|
src/lib/server/embeddingEndpoints/hfApi/embeddingHfApi.ts
CHANGED
@@ -18,7 +18,7 @@ export async function embeddingEndpointHfApi(
|
|
18 |
input: z.input<typeof embeddingEndpointHfApiSchema>
|
19 |
): Promise<EmbeddingEndpoint> {
|
20 |
const { model, authorization } = embeddingEndpointHfApiSchema.parse(input);
|
21 |
-
const url =
|
22 |
|
23 |
return async ({ inputs }) => {
|
24 |
const batchesInputs = chunk(inputs, 128);
|
|
|
18 |
input: z.input<typeof embeddingEndpointHfApiSchema>
|
19 |
): Promise<EmbeddingEndpoint> {
|
20 |
const { model, authorization } = embeddingEndpointHfApiSchema.parse(input);
|
21 |
+
const url = `${env.HF_API_ROOT}/${model.id}`;
|
22 |
|
23 |
return async ({ inputs }) => {
|
24 |
const batchesInputs = chunk(inputs, 128);
|