goupilew commited on
Commit
aa59751
·
unverified ·
1 Parent(s): 40d5d77

[vertex] Add PDF/plein texts support (#1520)

Browse files

* [vertex] Add PDF support

* [vertex] Fix lint

* [vertex] Add support for text/plain

src/lib/components/chat/ChatWindow.svelte CHANGED
@@ -213,7 +213,7 @@
213
  ...(!$page.data?.assistant && currentModel.tools
214
  ? activeTools.flatMap((tool: ToolFront) => tool.mimeTypes ?? [])
215
  : []),
216
- ...(currentModel.multimodal ? ["image/*"] : []),
217
  ];
218
 
219
  $: isFileUploadEnabled = activeMimeTypes.length > 0;
 
213
  ...(!$page.data?.assistant && currentModel.tools
214
  ? activeTools.flatMap((tool: ToolFront) => tool.mimeTypes ?? [])
215
  : []),
216
+ ...(currentModel.multimodal ? currentModel.multimodalAcceptedMimetypes ?? ["image/*"] : []),
217
  ];
218
 
219
  $: isFileUploadEnabled = activeMimeTypes.length > 0;
src/lib/server/endpoints/document.ts ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import type { MessageFile } from "$lib/types/Message";
2
+ import { z } from "zod";
3
+
4
+ export interface FileProcessorOptions<TMimeType extends string = string> {
5
+ supportedMimeTypes: TMimeType[];
6
+ maxSizeInMB: number;
7
+ }
8
+
9
+ export type ImageProcessor<TMimeType extends string = string> = (file: MessageFile) => Promise<{
10
+ file: Buffer;
11
+ mime: TMimeType;
12
+ }>;
13
+
14
+ export const createDocumentProcessorOptionsValidator = <TMimeType extends string = string>(
15
+ defaults: FileProcessorOptions<TMimeType>
16
+ ) => {
17
+ return z
18
+ .object({
19
+ supportedMimeTypes: z
20
+ .array(
21
+ z.enum<string, [TMimeType, ...TMimeType[]]>([
22
+ defaults.supportedMimeTypes[0],
23
+ ...defaults.supportedMimeTypes.slice(1),
24
+ ])
25
+ )
26
+ .default(defaults.supportedMimeTypes),
27
+ maxSizeInMB: z.number().positive().default(defaults.maxSizeInMB),
28
+ })
29
+ .default(defaults);
30
+ };
31
+
32
+ export type DocumentProcessor<TMimeType extends string = string> = (file: MessageFile) => {
33
+ file: Buffer;
34
+ mime: TMimeType;
35
+ };
36
+
37
+ export function makeDocumentProcessor<TMimeType extends string = string>(
38
+ options: FileProcessorOptions<TMimeType>
39
+ ): DocumentProcessor<TMimeType> {
40
+ return (file) => {
41
+ const { supportedMimeTypes, maxSizeInMB } = options;
42
+ const { mime, value } = file;
43
+
44
+ const buffer = Buffer.from(value, "base64");
45
+
46
+ const tooLargeInBytes = buffer.byteLength > maxSizeInMB * 1000 * 1000;
47
+
48
+ if (tooLargeInBytes) {
49
+ throw Error("Document is too large");
50
+ }
51
+
52
+ const outputMime = validateMimeType(supportedMimeTypes, mime);
53
+
54
+ return { file: buffer, mime: outputMime };
55
+ };
56
+ }
57
+
58
+ const validateMimeType = <T extends readonly string[]>(
59
+ supportedMimes: T,
60
+ mime: string
61
+ ): T[number] => {
62
+ if (!supportedMimes.includes(mime)) {
63
+ const supportedMimesStr = supportedMimes.join(", ");
64
+
65
+ throw Error(`Mimetype "${mime}" not found in supported mimes: ${supportedMimesStr}`);
66
+ }
67
+
68
+ return mime;
69
+ };
src/lib/server/endpoints/google/endpointVertex.ts CHANGED
@@ -10,6 +10,7 @@ import { z } from "zod";
10
  import type { Message } from "$lib/types/Message";
11
  import type { TextGenerationStreamOutput } from "@huggingface/inference";
12
  import { createImageProcessorOptionsValidator, makeImageProcessor } from "../images";
 
13
 
14
  export const endpointVertexParametersSchema = z.object({
15
  weight: z.number().int().positive().default(1),
@@ -39,12 +40,17 @@ export const endpointVertexParametersSchema = z.object({
39
  "image/avif",
40
  "image/tiff",
41
  "image/gif",
 
42
  ],
43
  preferredMimeType: "image/webp",
44
- maxSizeInMB: Infinity,
45
  maxWidth: 4096,
46
  maxHeight: 4096,
47
  }),
 
 
 
 
48
  })
49
  .default({}),
50
  });
@@ -109,17 +115,33 @@ export function endpointVertex(input: z.input<typeof endpointVertexParametersSch
109
  const vertexMessages = await Promise.all(
110
  messages.map(async ({ from, content, files }: Omit<Message, "id">): Promise<Content> => {
111
  const imageProcessor = makeImageProcessor(multimodal.image);
112
- const processedFiles =
 
 
113
  files && files.length > 0
114
- ? await Promise.all(files.map(async (file) => imageProcessor(file)))
 
 
 
 
 
 
 
 
 
 
 
 
115
  : [];
116
 
 
 
117
  return {
118
  role: from === "user" ? "user" : "model",
119
  parts: [
120
  ...processedFiles.map((processedFile) => ({
121
  inlineData: {
122
- data: processedFile.image.toString("base64"),
123
  mimeType: processedFile.mime,
124
  },
125
  })),
 
10
  import type { Message } from "$lib/types/Message";
11
  import type { TextGenerationStreamOutput } from "@huggingface/inference";
12
  import { createImageProcessorOptionsValidator, makeImageProcessor } from "../images";
13
+ import { createDocumentProcessorOptionsValidator, makeDocumentProcessor } from "../document";
14
 
15
  export const endpointVertexParametersSchema = z.object({
16
  weight: z.number().int().positive().default(1),
 
40
  "image/avif",
41
  "image/tiff",
42
  "image/gif",
43
+ "application/pdf",
44
  ],
45
  preferredMimeType: "image/webp",
46
+ maxSizeInMB: 20,
47
  maxWidth: 4096,
48
  maxHeight: 4096,
49
  }),
50
+ document: createDocumentProcessorOptionsValidator({
51
+ supportedMimeTypes: ["application/pdf", "text/plain"],
52
+ maxSizeInMB: 20,
53
+ }),
54
  })
55
  .default({}),
56
  });
 
115
  const vertexMessages = await Promise.all(
116
  messages.map(async ({ from, content, files }: Omit<Message, "id">): Promise<Content> => {
117
  const imageProcessor = makeImageProcessor(multimodal.image);
118
+ const documentProcessor = makeDocumentProcessor(multimodal.document);
119
+
120
+ const processedFilesWithNull =
121
  files && files.length > 0
122
+ ? await Promise.all(
123
+ files.map(async (file) => {
124
+ if (file.mime.includes("image")) {
125
+ const { image, mime } = await imageProcessor(file);
126
+
127
+ return { file: image, mime };
128
+ } else if (file.mime === "application/pdf" || file.mime === "text/plain") {
129
+ return documentProcessor(file);
130
+ }
131
+
132
+ return null;
133
+ })
134
+ )
135
  : [];
136
 
137
+ const processedFiles = processedFilesWithNull.filter((file) => file !== null);
138
+
139
  return {
140
  role: from === "user" ? "user" : "model",
141
  parts: [
142
  ...processedFiles.map((processedFile) => ({
143
  inlineData: {
144
+ data: processedFile.file.toString("base64"),
145
  mimeType: processedFile.mime,
146
  },
147
  })),
src/lib/server/models.ts CHANGED
@@ -63,6 +63,7 @@ const modelConfig = z.object({
63
  .passthrough()
64
  .optional(),
65
  multimodal: z.boolean().default(false),
 
66
  tools: z.boolean().default(false),
67
  unlisted: z.boolean().default(false),
68
  embeddingModel: validateEmbeddingModelByName(embeddingModels).optional(),
 
63
  .passthrough()
64
  .optional(),
65
  multimodal: z.boolean().default(false),
66
+ multimodalAcceptedMimetypes: z.array(z.string()).optional(),
67
  tools: z.boolean().default(false),
68
  unlisted: z.boolean().default(false),
69
  embeddingModel: validateEmbeddingModelByName(embeddingModels).optional(),
src/lib/types/Model.ts CHANGED
@@ -16,6 +16,7 @@ export type Model = Pick<
16
  | "datasetUrl"
17
  | "preprompt"
18
  | "multimodal"
 
19
  | "unlisted"
20
  | "tools"
21
  | "hasInferenceAPI"
 
16
  | "datasetUrl"
17
  | "preprompt"
18
  | "multimodal"
19
+ | "multimodalAcceptedMimetypes"
20
  | "unlisted"
21
  | "tools"
22
  | "hasInferenceAPI"
src/routes/+layout.server.ts CHANGED
@@ -190,6 +190,7 @@ export const load: LayoutServerLoad = async ({ locals, depends, request }) => {
190
  parameters: model.parameters,
191
  preprompt: model.preprompt,
192
  multimodal: model.multimodal,
 
193
  tools:
194
  model.tools &&
195
  // disable tools on huggingchat android app
 
190
  parameters: model.parameters,
191
  preprompt: model.preprompt,
192
  multimodal: model.multimodal,
193
+ multimodalAcceptedMimetypes: model.multimodalAcceptedMimetypes,
194
  tools:
195
  model.tools &&
196
  // disable tools on huggingchat android app