evalstate nsarrazin HF Staff commited on
Commit
33d6f58
·
unverified ·
1 Parent(s): b157e67

Anthropic PDF Beta Support (#1571)

Browse files

* support anthropic PDF beta

* upstream merge, remove commented out console log line

* Fixing type errors.
the anthropic API does not yet include a "DocumentBlock" for
support PDFs, so an extended type has been added to the endpoint.

* changed document processor to async (matching image processor)

* use the beta api types rather than custom extension

---------

Co-authored-by: Nathan Sarrazin <[email protected]>

package-lock.json CHANGED
@@ -101,7 +101,7 @@
101
  "vitest": "^2.1.4"
102
  },
103
  "optionalDependencies": {
104
- "@anthropic-ai/sdk": "^0.25.0",
105
  "@anthropic-ai/vertex-sdk": "^0.4.1",
106
  "@aws-sdk/client-bedrock-runtime": "^3.631.0",
107
  "@google-cloud/vertexai": "^1.1.0",
@@ -265,9 +265,9 @@
265
  }
266
  },
267
  "node_modules/@anthropic-ai/sdk": {
268
- "version": "0.25.2",
269
- "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.25.2.tgz",
270
- "integrity": "sha512-F1Hck/asswwidFLtGdMg3XYgRxEUfygNbpkq5KEaEGsHNaSfxeX18/uZGQCL0oQNcj/tYNx8BaFXVwRhFDi45g==",
271
  "optional": true,
272
  "dependencies": {
273
  "@types/node": "^18.11.18",
 
101
  "vitest": "^2.1.4"
102
  },
103
  "optionalDependencies": {
104
+ "@anthropic-ai/sdk": "^0.32.1",
105
  "@anthropic-ai/vertex-sdk": "^0.4.1",
106
  "@aws-sdk/client-bedrock-runtime": "^3.631.0",
107
  "@google-cloud/vertexai": "^1.1.0",
 
265
  }
266
  },
267
  "node_modules/@anthropic-ai/sdk": {
268
+ "version": "0.32.1",
269
+ "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.32.1.tgz",
270
+ "integrity": "sha512-U9JwTrDvdQ9iWuABVsMLj8nJVwAyQz6QXvgLsVhryhCEPkLsbcP/MXxm+jYcAwLoV8ESbaTTjnD4kuAFa+Hyjg==",
271
  "optional": true,
272
  "dependencies": {
273
  "@types/node": "^18.11.18",
package.json CHANGED
@@ -111,7 +111,7 @@
111
  "zod": "^3.22.3"
112
  },
113
  "optionalDependencies": {
114
- "@anthropic-ai/sdk": "^0.25.0",
115
  "@anthropic-ai/vertex-sdk": "^0.4.1",
116
  "@aws-sdk/client-bedrock-runtime": "^3.631.0",
117
  "@google-cloud/vertexai": "^1.1.0",
 
111
  "zod": "^3.22.3"
112
  },
113
  "optionalDependencies": {
114
+ "@anthropic-ai/sdk": "^0.32.1",
115
  "@anthropic-ai/vertex-sdk": "^0.4.1",
116
  "@aws-sdk/client-bedrock-runtime": "^3.631.0",
117
  "@google-cloud/vertexai": "^1.1.0",
src/lib/server/endpoints/anthropic/endpointAnthropic.ts CHANGED
@@ -4,6 +4,8 @@ import { env } from "$env/dynamic/private";
4
  import type { TextGenerationStreamOutput } from "@huggingface/inference";
5
  import { createImageProcessorOptionsValidator } from "../images";
6
  import { endpointMessagesToAnthropicMessages } from "./utils";
 
 
7
 
8
  export const endpointAnthropicParametersSchema = z.object({
9
  weight: z.number().int().positive().default(1),
@@ -23,6 +25,10 @@ export const endpointAnthropicParametersSchema = z.object({
23
  maxWidth: 4096,
24
  maxHeight: 4096,
25
  }),
 
 
 
 
26
  })
27
  .default({}),
28
  });
@@ -59,7 +65,10 @@ export async function endpointAnthropic(
59
  return (async function* () {
60
  const stream = anthropic.messages.stream({
61
  model: model.id ?? model.name,
62
- messages: await endpointMessagesToAnthropicMessages(messages, multimodal),
 
 
 
63
  max_tokens: parameters?.max_new_tokens,
64
  temperature: parameters?.temperature,
65
  top_p: parameters?.top_p,
 
4
  import type { TextGenerationStreamOutput } from "@huggingface/inference";
5
  import { createImageProcessorOptionsValidator } from "../images";
6
  import { endpointMessagesToAnthropicMessages } from "./utils";
7
+ import { createDocumentProcessorOptionsValidator } from "../document";
8
+ import type { MessageParam } from "@anthropic-ai/sdk/resources/messages.mjs";
9
 
10
  export const endpointAnthropicParametersSchema = z.object({
11
  weight: z.number().int().positive().default(1),
 
25
  maxWidth: 4096,
26
  maxHeight: 4096,
27
  }),
28
+ document: createDocumentProcessorOptionsValidator({
29
+ supportedMimeTypes: ["application/pdf"],
30
+ maxSizeInMB: 32,
31
+ }),
32
  })
33
  .default({}),
34
  });
 
65
  return (async function* () {
66
  const stream = anthropic.messages.stream({
67
  model: model.id ?? model.name,
68
+ messages: (await endpointMessagesToAnthropicMessages(
69
+ messages,
70
+ multimodal
71
+ )) as MessageParam[],
72
  max_tokens: parameters?.max_new_tokens,
73
  temperature: parameters?.temperature,
74
  top_p: parameters?.top_p,
src/lib/server/endpoints/anthropic/endpointAnthropicVertex.ts CHANGED
@@ -3,6 +3,7 @@ import type { Endpoint } from "../endpoints";
3
  import type { TextGenerationStreamOutput } from "@huggingface/inference";
4
  import { createImageProcessorOptionsValidator } from "../images";
5
  import { endpointMessagesToAnthropicMessages } from "./utils";
 
6
 
7
  export const endpointAnthropicVertexParametersSchema = z.object({
8
  weight: z.number().int().positive().default(1),
@@ -56,7 +57,10 @@ export async function endpointAnthropicVertex(
56
  return (async function* () {
57
  const stream = anthropic.messages.stream({
58
  model: model.id ?? model.name,
59
- messages: await endpointMessagesToAnthropicMessages(messages, multimodal),
 
 
 
60
  max_tokens: model.parameters?.max_new_tokens,
61
  temperature: model.parameters?.temperature,
62
  top_p: model.parameters?.top_p,
 
3
  import type { TextGenerationStreamOutput } from "@huggingface/inference";
4
  import { createImageProcessorOptionsValidator } from "../images";
5
  import { endpointMessagesToAnthropicMessages } from "./utils";
6
+ import type { MessageParam } from "@anthropic-ai/sdk/resources/messages.mjs";
7
 
8
  export const endpointAnthropicVertexParametersSchema = z.object({
9
  weight: z.number().int().positive().default(1),
 
57
  return (async function* () {
58
  const stream = anthropic.messages.stream({
59
  model: model.id ?? model.name,
60
+ messages: (await endpointMessagesToAnthropicMessages(
61
+ messages,
62
+ multimodal
63
+ )) as MessageParam[],
64
  max_tokens: model.parameters?.max_new_tokens,
65
  temperature: model.parameters?.temperature,
66
  top_p: model.parameters?.top_p,
src/lib/server/endpoints/anthropic/utils.ts CHANGED
@@ -1,12 +1,17 @@
1
  import { makeImageProcessor, type ImageProcessorOptions } from "../images";
 
2
  import type { EndpointMessage } from "../endpoints";
3
  import type { MessageFile } from "$lib/types/Message";
4
- import type { ImageBlockParam, MessageParam } from "@anthropic-ai/sdk/resources/messages.mjs";
 
 
 
 
5
 
6
  export async function fileToImageBlock(
7
  file: MessageFile,
8
  opts: ImageProcessorOptions<"image/png" | "image/jpeg" | "image/webp">
9
- ): Promise<ImageBlockParam> {
10
  const processor = makeImageProcessor(opts);
11
  const { image, mime } = await processor(file);
12
 
@@ -20,21 +25,48 @@ export async function fileToImageBlock(
20
  };
21
  }
22
 
23
- type NonSystemMessage = EndpointMessage & { from: "user" | "assistant" };
 
 
 
 
 
24
 
 
 
 
 
 
 
 
 
 
 
 
25
  export async function endpointMessagesToAnthropicMessages(
26
  messages: EndpointMessage[],
27
- multimodal: { image: ImageProcessorOptions<"image/png" | "image/jpeg" | "image/webp"> }
28
- ): Promise<MessageParam[]> {
 
 
 
29
  return await Promise.all(
30
  messages
31
  .filter((message): message is NonSystemMessage => message.from !== "system")
32
- .map<Promise<MessageParam>>(async (message) => {
33
  return {
34
  role: message.from,
35
  content: [
36
  ...(await Promise.all(
37
- (message.files ?? []).map((file) => fileToImageBlock(file, multimodal.image))
 
 
 
 
 
 
 
 
38
  )),
39
  { type: "text", text: message.content },
40
  ],
 
1
  import { makeImageProcessor, type ImageProcessorOptions } from "../images";
2
+ import { makeDocumentProcessor, type FileProcessorOptions } from "../document";
3
  import type { EndpointMessage } from "../endpoints";
4
  import type { MessageFile } from "$lib/types/Message";
5
+ import type {
6
+ BetaImageBlockParam,
7
+ BetaMessageParam,
8
+ BetaBase64PDFBlock,
9
+ } from "@anthropic-ai/sdk/resources/beta/messages/messages.mjs";
10
 
11
  export async function fileToImageBlock(
12
  file: MessageFile,
13
  opts: ImageProcessorOptions<"image/png" | "image/jpeg" | "image/webp">
14
+ ): Promise<BetaImageBlockParam> {
15
  const processor = makeImageProcessor(opts);
16
  const { image, mime } = await processor(file);
17
 
 
25
  };
26
  }
27
 
28
+ export async function fileToDocumentBlock(
29
+ file: MessageFile,
30
+ opts: FileProcessorOptions<"application/pdf">
31
+ ): Promise<BetaBase64PDFBlock> {
32
+ const processor = makeDocumentProcessor(opts);
33
+ const { file: document, mime } = await processor(file);
34
 
35
+ return {
36
+ type: "document",
37
+ source: {
38
+ type: "base64",
39
+ media_type: mime,
40
+ data: document.toString("base64"),
41
+ },
42
+ };
43
+ }
44
+
45
+ type NonSystemMessage = EndpointMessage & { from: "user" | "assistant" };
46
  export async function endpointMessagesToAnthropicMessages(
47
  messages: EndpointMessage[],
48
+ multimodal: {
49
+ image: ImageProcessorOptions<"image/png" | "image/jpeg" | "image/webp">;
50
+ document?: FileProcessorOptions<"application/pdf">;
51
+ }
52
+ ): Promise<BetaMessageParam[]> {
53
  return await Promise.all(
54
  messages
55
  .filter((message): message is NonSystemMessage => message.from !== "system")
56
+ .map<Promise<BetaMessageParam>>(async (message) => {
57
  return {
58
  role: message.from,
59
  content: [
60
  ...(await Promise.all(
61
+ (message.files ?? []).map(async (file) => {
62
+ if (file.mime.startsWith("image/")) {
63
+ return fileToImageBlock(file, multimodal.image);
64
+ } else if (file.mime === "application/pdf" && multimodal.document) {
65
+ return fileToDocumentBlock(file, multimodal.document);
66
+ } else {
67
+ throw new Error(`Unsupported file type: ${file.mime}`);
68
+ }
69
+ })
70
  )),
71
  { type: "text", text: message.content },
72
  ],
src/lib/server/endpoints/document.ts CHANGED
@@ -34,15 +34,21 @@ export type DocumentProcessor<TMimeType extends string = string> = (file: Messag
34
  mime: TMimeType;
35
  };
36
 
 
 
 
 
 
 
 
37
  export function makeDocumentProcessor<TMimeType extends string = string>(
38
  options: FileProcessorOptions<TMimeType>
39
- ): DocumentProcessor<TMimeType> {
40
- return (file) => {
41
  const { supportedMimeTypes, maxSizeInMB } = options;
42
  const { mime, value } = file;
43
 
44
  const buffer = Buffer.from(value, "base64");
45
-
46
  const tooLargeInBytes = buffer.byteLength > maxSizeInMB * 1000 * 1000;
47
 
48
  if (tooLargeInBytes) {
@@ -50,7 +56,6 @@ export function makeDocumentProcessor<TMimeType extends string = string>(
50
  }
51
 
52
  const outputMime = validateMimeType(supportedMimeTypes, mime);
53
-
54
  return { file: buffer, mime: outputMime };
55
  };
56
  }
 
34
  mime: TMimeType;
35
  };
36
 
37
+ export type AsyncDocumentProcessor<TMimeType extends string = string> = (
38
+ file: MessageFile
39
+ ) => Promise<{
40
+ file: Buffer;
41
+ mime: TMimeType;
42
+ }>;
43
+
44
  export function makeDocumentProcessor<TMimeType extends string = string>(
45
  options: FileProcessorOptions<TMimeType>
46
+ ): AsyncDocumentProcessor<TMimeType> {
47
+ return async (file) => {
48
  const { supportedMimeTypes, maxSizeInMB } = options;
49
  const { mime, value } = file;
50
 
51
  const buffer = Buffer.from(value, "base64");
 
52
  const tooLargeInBytes = buffer.byteLength > maxSizeInMB * 1000 * 1000;
53
 
54
  if (tooLargeInBytes) {
 
56
  }
57
 
58
  const outputMime = validateMimeType(supportedMimeTypes, mime);
 
59
  return { file: buffer, mime: outputMime };
60
  };
61
  }