Jun Siang Cheah nsarrazin HF Staff commited on
Commit
61e5613
·
unverified ·
1 Parent(s): 9e9bc7c

feat: add support for anthropic on vertex (#958)

Browse files

Co-authored-by: Nathan Sarrazin <[email protected]>

README.md CHANGED
@@ -480,8 +480,8 @@ MODELS=`[
480
  // optionals
481
  "apiKey": "sk-ant-...",
482
  "baseURL": "https://api.anthropic.com",
483
- defaultHeaders: {},
484
- defaultQuery: {}
485
  }
486
  ]
487
  },
@@ -498,8 +498,51 @@ MODELS=`[
498
  // optionals
499
  "apiKey": "sk-ant-...",
500
  "baseURL": "https://api.anthropic.com",
501
- defaultHeaders: {},
502
- defaultQuery: {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
503
  }
504
  ]
505
  }
 
480
  // optionals
481
  "apiKey": "sk-ant-...",
482
  "baseURL": "https://api.anthropic.com",
483
+ "defaultHeaders": {},
484
+ "defaultQuery": {}
485
  }
486
  ]
487
  },
 
498
  // optionals
499
  "apiKey": "sk-ant-...",
500
  "baseURL": "https://api.anthropic.com",
501
+ "defaultHeaders": {},
502
+ "defaultQuery": {}
503
+ }
504
+ ]
505
+ }
506
+ ]`
507
+ ```
508
+
509
+ We also support using Anthropic models running on Vertex AI. Authentication is done using Google Application Default Credentials. Project ID can be provided through the `endpoints.projectId` as per the following example:
510
+
511
+ ```
512
+ MODELS=`[
513
+ {
514
+ "name": "claude-3-sonnet@20240229",
515
+ "displayName": "Claude 3 Sonnet",
516
+ "description": "Ideal balance of intelligence and speed",
517
+ "parameters": {
518
+ "max_new_tokens": 4096,
519
+ },
520
+ "endpoints": [
521
+ {
522
+ "type": "anthropic-vertex",
523
+ "region": "us-central1",
524
+ "projectId": "gcp-project-id",
525
+ // optionals
526
+ "defaultHeaders": {},
527
+ "defaultQuery": {}
528
+ }
529
+ ]
530
+ },
531
+ {
532
+ "name": "claude-3-haiku@20240307",
533
+ "displayName": "Claude 3 Haiku",
534
+ "description": "Fastest, most compact model for near-instant responsiveness",
535
+ "parameters": {
536
+ "max_new_tokens": 4096
537
+ },
538
+ "endpoints": [
539
+ {
540
+ "type": "anthropic-vertex",
541
+ "region": "us-central1",
542
+ "projectId": "gcp-project-id",
543
+ // optionals
544
+ "defaultHeaders": {},
545
+ "defaultQuery": {}
546
  }
547
  ]
548
  }
package-lock.json CHANGED
@@ -79,6 +79,7 @@
79
  },
80
  "optionalDependencies": {
81
  "@anthropic-ai/sdk": "^0.17.1",
 
82
  "@google-cloud/vertexai": "^1.1.0",
83
  "aws4fetch": "^1.0.17",
84
  "cohere-ai": "^7.9.0",
@@ -157,6 +158,42 @@
157
  "node": ">= 8"
158
  }
159
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  "node_modules/@cspotcode/source-map-support": {
161
  "version": "0.8.1",
162
  "resolved": "https://registry.npmjs.org/@cspotcode/source-map-support/-/source-map-support-0.8.1.tgz",
 
79
  },
80
  "optionalDependencies": {
81
  "@anthropic-ai/sdk": "^0.17.1",
82
+ "@anthropic-ai/vertex-sdk": "^0.3.0",
83
  "@google-cloud/vertexai": "^1.1.0",
84
  "aws4fetch": "^1.0.17",
85
  "cohere-ai": "^7.9.0",
 
158
  "node": ">= 8"
159
  }
160
  },
161
+ "node_modules/@anthropic-ai/vertex-sdk": {
162
+ "version": "0.3.0",
163
+ "resolved": "https://registry.npmjs.org/@anthropic-ai/vertex-sdk/-/vertex-sdk-0.3.0.tgz",
164
+ "integrity": "sha512-RquU3sXAuGdxWnbx5luHovFnQVso7LuAtSmpLkZMOT6x5csldAJdp4TIgMX6/55pAefNVPDTtEYChwK5wpxRww==",
165
+ "optional": true,
166
+ "dependencies": {
167
+ "@anthropic-ai/sdk": "^0.14",
168
+ "google-auth-library": "^9.4.2"
169
+ }
170
+ },
171
+ "node_modules/@anthropic-ai/vertex-sdk/node_modules/@anthropic-ai/sdk": {
172
+ "version": "0.14.1",
173
+ "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.14.1.tgz",
174
+ "integrity": "sha512-/o0+6ijSF0WSxnzQ0GUZPKaxOE0y1dqAn9gM9KPU7hc/tqiI4lzCYqe/EFSEw8pFONgYi1IjcvevYjgOOc2vpg==",
175
+ "optional": true,
176
+ "dependencies": {
177
+ "@types/node": "^18.11.18",
178
+ "@types/node-fetch": "^2.6.4",
179
+ "abort-controller": "^3.0.0",
180
+ "agentkeepalive": "^4.2.1",
181
+ "digest-fetch": "^1.3.0",
182
+ "form-data-encoder": "1.7.2",
183
+ "formdata-node": "^4.3.2",
184
+ "node-fetch": "^2.6.7",
185
+ "web-streams-polyfill": "^3.2.1"
186
+ }
187
+ },
188
+ "node_modules/@anthropic-ai/vertex-sdk/node_modules/web-streams-polyfill": {
189
+ "version": "3.3.3",
190
+ "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.3.3.tgz",
191
+ "integrity": "sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==",
192
+ "optional": true,
193
+ "engines": {
194
+ "node": ">= 8"
195
+ }
196
+ },
197
  "node_modules/@cspotcode/source-map-support": {
198
  "version": "0.8.1",
199
  "resolved": "https://registry.npmjs.org/@cspotcode/source-map-support/-/source-map-support-0.8.1.tgz",
package.json CHANGED
@@ -89,6 +89,7 @@
89
  },
90
  "optionalDependencies": {
91
  "@anthropic-ai/sdk": "^0.17.1",
 
92
  "@google-cloud/vertexai": "^1.1.0",
93
  "aws4fetch": "^1.0.17",
94
  "cohere-ai": "^7.9.0",
 
89
  },
90
  "optionalDependencies": {
91
  "@anthropic-ai/sdk": "^0.17.1",
92
+ "@anthropic-ai/vertex-sdk": "^0.3.0",
93
  "@google-cloud/vertexai": "^1.1.0",
94
  "aws4fetch": "^1.0.17",
95
  "cohere-ai": "^7.9.0",
src/lib/server/endpoints/anthropic/endpointAnthropicVertex.ts ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { z } from "zod";
2
+ import type { Endpoint } from "../endpoints";
3
+ import type { TextGenerationStreamOutput } from "@huggingface/inference";
4
+
5
+ export const endpointAnthropicVertexParametersSchema = z.object({
6
+ weight: z.number().int().positive().default(1),
7
+ model: z.any(),
8
+ type: z.literal("anthropic-vertex"),
9
+ region: z.string().default("us-central1"),
10
+ projectId: z.string(),
11
+ defaultHeaders: z.record(z.string()).optional(),
12
+ defaultQuery: z.record(z.string()).optional(),
13
+ });
14
+
15
+ export async function endpointAnthropicVertex(
16
+ input: z.input<typeof endpointAnthropicVertexParametersSchema>
17
+ ): Promise<Endpoint> {
18
+ const { region, projectId, model, defaultHeaders, defaultQuery } =
19
+ endpointAnthropicVertexParametersSchema.parse(input);
20
+ let AnthropicVertex;
21
+ try {
22
+ AnthropicVertex = (await import("@anthropic-ai/vertex-sdk")).AnthropicVertex;
23
+ } catch (e) {
24
+ throw new Error("Failed to import @anthropic-ai/vertex-sdk", { cause: e });
25
+ }
26
+
27
+ const anthropic = new AnthropicVertex({
28
+ baseURL: `https://${region}-aiplatform.googleapis.com/v1`,
29
+ region,
30
+ projectId,
31
+ defaultHeaders,
32
+ defaultQuery,
33
+ });
34
+
35
+ return async ({ messages, preprompt }) => {
36
+ let system = preprompt;
37
+ if (messages?.[0]?.from === "system") {
38
+ system = messages[0].content;
39
+ }
40
+
41
+ const messagesFormatted = messages
42
+ .filter((message) => message.from !== "system")
43
+ .map((message) => ({
44
+ role: message.from,
45
+ content: message.content,
46
+ })) as unknown as {
47
+ role: "user" | "assistant";
48
+ content: string;
49
+ }[];
50
+
51
+ let tokenId = 0;
52
+ return (async function* () {
53
+ const stream = anthropic.messages.stream({
54
+ model: model.id ?? model.name,
55
+ messages: messagesFormatted,
56
+ max_tokens: model.parameters?.max_new_tokens,
57
+ temperature: model.parameters?.temperature,
58
+ top_p: model.parameters?.top_p,
59
+ top_k: model.parameters?.top_k,
60
+ stop_sequences: model.parameters?.stop,
61
+ system,
62
+ });
63
+ while (true) {
64
+ const result = await Promise.race([stream.emitted("text"), stream.emitted("end")]);
65
+
66
+ // Stream end
67
+ if (result === undefined) {
68
+ yield {
69
+ token: {
70
+ id: tokenId++,
71
+ text: "",
72
+ logprob: 0,
73
+ special: true,
74
+ },
75
+ generated_text: await stream.finalText(),
76
+ details: null,
77
+ } satisfies TextGenerationStreamOutput;
78
+ return;
79
+ }
80
+
81
+ // Text delta
82
+ yield {
83
+ token: {
84
+ id: tokenId++,
85
+ text: result as unknown as string,
86
+ special: false,
87
+ logprob: 0,
88
+ },
89
+ generated_text: null,
90
+ details: null,
91
+ } satisfies TextGenerationStreamOutput;
92
+ }
93
+ })();
94
+ };
95
+ }
src/lib/server/endpoints/endpoints.ts CHANGED
@@ -12,6 +12,10 @@ import {
12
  endpointAnthropic,
13
  endpointAnthropicParametersSchema,
14
  } from "./anthropic/endpointAnthropic";
 
 
 
 
15
  import type { Model } from "$lib/types/Model";
16
  import endpointCloudflare, {
17
  endpointCloudflareParametersSchema,
@@ -44,6 +48,7 @@ export type EndpointGenerator<T extends CommonEndpoint> = (parameters: T) => End
44
  export const endpoints = {
45
  tgi: endpointTgi,
46
  anthropic: endpointAnthropic,
 
47
  aws: endpointAws,
48
  openai: endpointOai,
49
  llamacpp: endpointLlamacpp,
@@ -56,6 +61,7 @@ export const endpoints = {
56
 
57
  export const endpointSchema = z.discriminatedUnion("type", [
58
  endpointAnthropicParametersSchema,
 
59
  endpointAwsParametersSchema,
60
  endpointOAIParametersSchema,
61
  endpointTgiParametersSchema,
 
12
  endpointAnthropic,
13
  endpointAnthropicParametersSchema,
14
  } from "./anthropic/endpointAnthropic";
15
+ import {
16
+ endpointAnthropicVertex,
17
+ endpointAnthropicVertexParametersSchema,
18
+ } from "./anthropic/endpointAnthropicVertex";
19
  import type { Model } from "$lib/types/Model";
20
  import endpointCloudflare, {
21
  endpointCloudflareParametersSchema,
 
48
  export const endpoints = {
49
  tgi: endpointTgi,
50
  anthropic: endpointAnthropic,
51
+ anthropicvertex: endpointAnthropicVertex,
52
  aws: endpointAws,
53
  openai: endpointOai,
54
  llamacpp: endpointLlamacpp,
 
61
 
62
  export const endpointSchema = z.discriminatedUnion("type", [
63
  endpointAnthropicParametersSchema,
64
+ endpointAnthropicVertexParametersSchema,
65
  endpointAwsParametersSchema,
66
  endpointOAIParametersSchema,
67
  endpointTgiParametersSchema,
src/lib/server/models.ts CHANGED
@@ -159,6 +159,8 @@ const addEndpoint = (m: Awaited<ReturnType<typeof processModel>>) => ({
159
  return endpoints.tgi(args);
160
  case "anthropic":
161
  return endpoints.anthropic(args);
 
 
162
  case "aws":
163
  return await endpoints.aws(args);
164
  case "openai":
 
159
  return endpoints.tgi(args);
160
  case "anthropic":
161
  return endpoints.anthropic(args);
162
+ case "anthropic-vertex":
163
+ return endpoints.anthropicvertex(args);
164
  case "aws":
165
  return await endpoints.aws(args);
166
  case "openai":