Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
goupilew
commited on
[vertex] Add PDF/plein texts support (#1520)
Browse files* [vertex] Add PDF support
* [vertex] Fix lint
* [vertex] Add support for text/plain
src/lib/components/chat/ChatWindow.svelte
CHANGED
@@ -213,7 +213,7 @@
|
|
213 |
...(!$page.data?.assistant && currentModel.tools
|
214 |
? activeTools.flatMap((tool: ToolFront) => tool.mimeTypes ?? [])
|
215 |
: []),
|
216 |
-
...(currentModel.multimodal ? ["image/*"] : []),
|
217 |
];
|
218 |
|
219 |
$: isFileUploadEnabled = activeMimeTypes.length > 0;
|
|
|
213 |
...(!$page.data?.assistant && currentModel.tools
|
214 |
? activeTools.flatMap((tool: ToolFront) => tool.mimeTypes ?? [])
|
215 |
: []),
|
216 |
+
...(currentModel.multimodal ? currentModel.multimodalAcceptedMimetypes ?? ["image/*"] : []),
|
217 |
];
|
218 |
|
219 |
$: isFileUploadEnabled = activeMimeTypes.length > 0;
|
src/lib/server/endpoints/document.ts
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import type { MessageFile } from "$lib/types/Message";
|
2 |
+
import { z } from "zod";
|
3 |
+
|
4 |
+
export interface FileProcessorOptions<TMimeType extends string = string> {
|
5 |
+
supportedMimeTypes: TMimeType[];
|
6 |
+
maxSizeInMB: number;
|
7 |
+
}
|
8 |
+
|
9 |
+
export type ImageProcessor<TMimeType extends string = string> = (file: MessageFile) => Promise<{
|
10 |
+
file: Buffer;
|
11 |
+
mime: TMimeType;
|
12 |
+
}>;
|
13 |
+
|
14 |
+
export const createDocumentProcessorOptionsValidator = <TMimeType extends string = string>(
|
15 |
+
defaults: FileProcessorOptions<TMimeType>
|
16 |
+
) => {
|
17 |
+
return z
|
18 |
+
.object({
|
19 |
+
supportedMimeTypes: z
|
20 |
+
.array(
|
21 |
+
z.enum<string, [TMimeType, ...TMimeType[]]>([
|
22 |
+
defaults.supportedMimeTypes[0],
|
23 |
+
...defaults.supportedMimeTypes.slice(1),
|
24 |
+
])
|
25 |
+
)
|
26 |
+
.default(defaults.supportedMimeTypes),
|
27 |
+
maxSizeInMB: z.number().positive().default(defaults.maxSizeInMB),
|
28 |
+
})
|
29 |
+
.default(defaults);
|
30 |
+
};
|
31 |
+
|
32 |
+
export type DocumentProcessor<TMimeType extends string = string> = (file: MessageFile) => {
|
33 |
+
file: Buffer;
|
34 |
+
mime: TMimeType;
|
35 |
+
};
|
36 |
+
|
37 |
+
export function makeDocumentProcessor<TMimeType extends string = string>(
|
38 |
+
options: FileProcessorOptions<TMimeType>
|
39 |
+
): DocumentProcessor<TMimeType> {
|
40 |
+
return (file) => {
|
41 |
+
const { supportedMimeTypes, maxSizeInMB } = options;
|
42 |
+
const { mime, value } = file;
|
43 |
+
|
44 |
+
const buffer = Buffer.from(value, "base64");
|
45 |
+
|
46 |
+
const tooLargeInBytes = buffer.byteLength > maxSizeInMB * 1000 * 1000;
|
47 |
+
|
48 |
+
if (tooLargeInBytes) {
|
49 |
+
throw Error("Document is too large");
|
50 |
+
}
|
51 |
+
|
52 |
+
const outputMime = validateMimeType(supportedMimeTypes, mime);
|
53 |
+
|
54 |
+
return { file: buffer, mime: outputMime };
|
55 |
+
};
|
56 |
+
}
|
57 |
+
|
58 |
+
const validateMimeType = <T extends readonly string[]>(
|
59 |
+
supportedMimes: T,
|
60 |
+
mime: string
|
61 |
+
): T[number] => {
|
62 |
+
if (!supportedMimes.includes(mime)) {
|
63 |
+
const supportedMimesStr = supportedMimes.join(", ");
|
64 |
+
|
65 |
+
throw Error(`Mimetype "${mime}" not found in supported mimes: ${supportedMimesStr}`);
|
66 |
+
}
|
67 |
+
|
68 |
+
return mime;
|
69 |
+
};
|
src/lib/server/endpoints/google/endpointVertex.ts
CHANGED
@@ -10,6 +10,7 @@ import { z } from "zod";
|
|
10 |
import type { Message } from "$lib/types/Message";
|
11 |
import type { TextGenerationStreamOutput } from "@huggingface/inference";
|
12 |
import { createImageProcessorOptionsValidator, makeImageProcessor } from "../images";
|
|
|
13 |
|
14 |
export const endpointVertexParametersSchema = z.object({
|
15 |
weight: z.number().int().positive().default(1),
|
@@ -39,12 +40,17 @@ export const endpointVertexParametersSchema = z.object({
|
|
39 |
"image/avif",
|
40 |
"image/tiff",
|
41 |
"image/gif",
|
|
|
42 |
],
|
43 |
preferredMimeType: "image/webp",
|
44 |
-
maxSizeInMB:
|
45 |
maxWidth: 4096,
|
46 |
maxHeight: 4096,
|
47 |
}),
|
|
|
|
|
|
|
|
|
48 |
})
|
49 |
.default({}),
|
50 |
});
|
@@ -109,17 +115,33 @@ export function endpointVertex(input: z.input<typeof endpointVertexParametersSch
|
|
109 |
const vertexMessages = await Promise.all(
|
110 |
messages.map(async ({ from, content, files }: Omit<Message, "id">): Promise<Content> => {
|
111 |
const imageProcessor = makeImageProcessor(multimodal.image);
|
112 |
-
const
|
|
|
|
|
113 |
files && files.length > 0
|
114 |
-
? await Promise.all(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
: [];
|
116 |
|
|
|
|
|
117 |
return {
|
118 |
role: from === "user" ? "user" : "model",
|
119 |
parts: [
|
120 |
...processedFiles.map((processedFile) => ({
|
121 |
inlineData: {
|
122 |
-
data: processedFile.
|
123 |
mimeType: processedFile.mime,
|
124 |
},
|
125 |
})),
|
|
|
10 |
import type { Message } from "$lib/types/Message";
|
11 |
import type { TextGenerationStreamOutput } from "@huggingface/inference";
|
12 |
import { createImageProcessorOptionsValidator, makeImageProcessor } from "../images";
|
13 |
+
import { createDocumentProcessorOptionsValidator, makeDocumentProcessor } from "../document";
|
14 |
|
15 |
export const endpointVertexParametersSchema = z.object({
|
16 |
weight: z.number().int().positive().default(1),
|
|
|
40 |
"image/avif",
|
41 |
"image/tiff",
|
42 |
"image/gif",
|
43 |
+
"application/pdf",
|
44 |
],
|
45 |
preferredMimeType: "image/webp",
|
46 |
+
maxSizeInMB: 20,
|
47 |
maxWidth: 4096,
|
48 |
maxHeight: 4096,
|
49 |
}),
|
50 |
+
document: createDocumentProcessorOptionsValidator({
|
51 |
+
supportedMimeTypes: ["application/pdf", "text/plain"],
|
52 |
+
maxSizeInMB: 20,
|
53 |
+
}),
|
54 |
})
|
55 |
.default({}),
|
56 |
});
|
|
|
115 |
const vertexMessages = await Promise.all(
|
116 |
messages.map(async ({ from, content, files }: Omit<Message, "id">): Promise<Content> => {
|
117 |
const imageProcessor = makeImageProcessor(multimodal.image);
|
118 |
+
const documentProcessor = makeDocumentProcessor(multimodal.document);
|
119 |
+
|
120 |
+
const processedFilesWithNull =
|
121 |
files && files.length > 0
|
122 |
+
? await Promise.all(
|
123 |
+
files.map(async (file) => {
|
124 |
+
if (file.mime.includes("image")) {
|
125 |
+
const { image, mime } = await imageProcessor(file);
|
126 |
+
|
127 |
+
return { file: image, mime };
|
128 |
+
} else if (file.mime === "application/pdf" || file.mime === "text/plain") {
|
129 |
+
return documentProcessor(file);
|
130 |
+
}
|
131 |
+
|
132 |
+
return null;
|
133 |
+
})
|
134 |
+
)
|
135 |
: [];
|
136 |
|
137 |
+
const processedFiles = processedFilesWithNull.filter((file) => file !== null);
|
138 |
+
|
139 |
return {
|
140 |
role: from === "user" ? "user" : "model",
|
141 |
parts: [
|
142 |
...processedFiles.map((processedFile) => ({
|
143 |
inlineData: {
|
144 |
+
data: processedFile.file.toString("base64"),
|
145 |
mimeType: processedFile.mime,
|
146 |
},
|
147 |
})),
|
src/lib/server/models.ts
CHANGED
@@ -63,6 +63,7 @@ const modelConfig = z.object({
|
|
63 |
.passthrough()
|
64 |
.optional(),
|
65 |
multimodal: z.boolean().default(false),
|
|
|
66 |
tools: z.boolean().default(false),
|
67 |
unlisted: z.boolean().default(false),
|
68 |
embeddingModel: validateEmbeddingModelByName(embeddingModels).optional(),
|
|
|
63 |
.passthrough()
|
64 |
.optional(),
|
65 |
multimodal: z.boolean().default(false),
|
66 |
+
multimodalAcceptedMimetypes: z.array(z.string()).optional(),
|
67 |
tools: z.boolean().default(false),
|
68 |
unlisted: z.boolean().default(false),
|
69 |
embeddingModel: validateEmbeddingModelByName(embeddingModels).optional(),
|
src/lib/types/Model.ts
CHANGED
@@ -16,6 +16,7 @@ export type Model = Pick<
|
|
16 |
| "datasetUrl"
|
17 |
| "preprompt"
|
18 |
| "multimodal"
|
|
|
19 |
| "unlisted"
|
20 |
| "tools"
|
21 |
| "hasInferenceAPI"
|
|
|
16 |
| "datasetUrl"
|
17 |
| "preprompt"
|
18 |
| "multimodal"
|
19 |
+
| "multimodalAcceptedMimetypes"
|
20 |
| "unlisted"
|
21 |
| "tools"
|
22 |
| "hasInferenceAPI"
|
src/routes/+layout.server.ts
CHANGED
@@ -190,6 +190,7 @@ export const load: LayoutServerLoad = async ({ locals, depends, request }) => {
|
|
190 |
parameters: model.parameters,
|
191 |
preprompt: model.preprompt,
|
192 |
multimodal: model.multimodal,
|
|
|
193 |
tools:
|
194 |
model.tools &&
|
195 |
// disable tools on huggingchat android app
|
|
|
190 |
parameters: model.parameters,
|
191 |
preprompt: model.preprompt,
|
192 |
multimodal: model.multimodal,
|
193 |
+
multimodalAcceptedMimetypes: model.multimodalAcceptedMimetypes,
|
194 |
tools:
|
195 |
model.tools &&
|
196 |
// disable tools on huggingchat android app
|