nsarrazin HF Staff commited on
Commit
cf63293
·
unverified ·
1 Parent(s): baaed81

feat(chart): use inference proxy (#1688)

Browse files

* feat(chart): use inference proxy

* fix: also use `HF_API_ROOT` for embedding endpoints

chart/env/prod.yaml CHANGED
@@ -159,7 +159,7 @@ envVars:
159
  "endpoints": [
160
  {
161
  "type": "openai",
162
- "baseURL": "https://api-inference.huggingface.co/models/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/v1"
163
  }
164
  ]
165
  },
@@ -193,7 +193,7 @@ envVars:
193
  "endpoints": [
194
  {
195
  "type": "openai",
196
- "baseURL": "https://api-inference.huggingface.co/models/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF/v1"
197
  }
198
  ]
199
  },
@@ -261,7 +261,7 @@ envVars:
261
  "endpoints": [
262
  {
263
  "type": "openai",
264
- "baseURL": "https://api-inference.huggingface.co/models/Qwen/Qwen2.5-Coder-32B-Instruct/v1"
265
  }
266
  ]
267
  },
@@ -280,7 +280,7 @@ envVars:
280
  "endpoints": [
281
  {
282
  "type": "openai",
283
- "baseURL": "https://api-inference.huggingface.co/models/meta-llama/Llama-3.2-11B-Vision-Instruct/v1",
284
  "multimodal": {
285
  "image": {
286
  "maxSizeInMB": 10,
@@ -597,7 +597,7 @@ envVars:
597
  ]
598
  HF_ORG_ADMIN: '644171cfbd0c97265298aa99'
599
  HF_ORG_EARLY_ACCESS: '5e67bd5b1009063689407478'
600
-
601
  infisical:
602
  enabled: true
603
  env: "prod-us-east-1"
 
159
  "endpoints": [
160
  {
161
  "type": "openai",
162
+ "baseURL": "https://proxy.serverless.api-inference.huggingface.tech/models/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B/v1"
163
  }
164
  ]
165
  },
 
193
  "endpoints": [
194
  {
195
  "type": "openai",
196
+ "baseURL": "https://proxy.serverless.api-inference.huggingface.tech/models/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF/v1"
197
  }
198
  ]
199
  },
 
261
  "endpoints": [
262
  {
263
  "type": "openai",
264
+ "baseURL": "https://proxy.serverless.api-inference.huggingface.tech/models/Qwen/Qwen2.5-Coder-32B-Instruct/v1"
265
  }
266
  ]
267
  },
 
280
  "endpoints": [
281
  {
282
  "type": "openai",
283
+ "baseURL": "https://proxy.serverless.api-inference.huggingface.tech/models/meta-llama/Llama-3.2-11B-Vision-Instruct/v1",
284
  "multimodal": {
285
  "image": {
286
  "maxSizeInMB": 10,
 
597
  ]
598
  HF_ORG_ADMIN: '644171cfbd0c97265298aa99'
599
  HF_ORG_EARLY_ACCESS: '5e67bd5b1009063689407478'
600
+ HF_API_ROOT: 'https://proxy.serverless.api-inference.huggingface.tech/models'
601
  infisical:
602
  enabled: true
603
  env: "prod-us-east-1"
src/lib/server/embeddingEndpoints/hfApi/embeddingHfApi.ts CHANGED
@@ -18,7 +18,7 @@ export async function embeddingEndpointHfApi(
18
  input: z.input<typeof embeddingEndpointHfApiSchema>
19
  ): Promise<EmbeddingEndpoint> {
20
  const { model, authorization } = embeddingEndpointHfApiSchema.parse(input);
21
- const url = "https://api-inference.huggingface.co/models/" + model.id;
22
 
23
  return async ({ inputs }) => {
24
  const batchesInputs = chunk(inputs, 128);
 
18
  input: z.input<typeof embeddingEndpointHfApiSchema>
19
  ): Promise<EmbeddingEndpoint> {
20
  const { model, authorization } = embeddingEndpointHfApiSchema.parse(input);
21
+ const url = `${env.HF_API_ROOT}/${model.id}`;
22
 
23
  return async ({ inputs }) => {
24
  const batchesInputs = chunk(inputs, 128);