Spaces:

jdelavande
/

chat-ui-energy

Running on CPU Upgrade

App Files Files Community

ABarLT

nsarrazin HF Staff commited on Aug 26, 2024

Commit

bf78ac3

unverified ·

1 Parent(s): d8a31e6

Add support for Anthropic models via AWS Bedrock (#1413)

Browse files

* Add support for Anthropic models via AWS Bedrock

* deps

* Fixed type errors

* Temporary fix for continue button showing up on Claude

* Fix continue button issue by setting the last message token's special to true

---------

Co-authored-by: Nathan Sarrazin <[email protected]>

Files changed (5) hide show

package-lock.json +0 -0
package.json +1 -0
src/lib/server/endpoints/aws/endpointBedrock.ts +150 -0
src/lib/server/endpoints/endpoints.ts +3 -0
src/lib/server/models.ts +2 -0

package-lock.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

package.json CHANGED Viewed

@@ -108,6 +108,7 @@
 		"zod": "^3.22.3"
 	},
 	"optionalDependencies": {
 		"@anthropic-ai/sdk": "^0.25.0",
 		"@anthropic-ai/vertex-sdk": "^0.4.1",
 		"@google-cloud/vertexai": "^1.1.0",

 		"zod": "^3.22.3"
 	},
 	"optionalDependencies": {
+		"@aws-sdk/client-bedrock-runtime": "^3.631.0",
 		"@anthropic-ai/sdk": "^0.25.0",
 		"@anthropic-ai/vertex-sdk": "^0.4.1",
 		"@google-cloud/vertexai": "^1.1.0",

src/lib/server/endpoints/aws/endpointBedrock.ts ADDED Viewed

	@@ -0,0 +1,150 @@

+import { z } from "zod";
+import type { Endpoint } from "../endpoints";
+import type { TextGenerationStreamOutput } from "@huggingface/inference";
+import {
+	BedrockRuntimeClient,
+	InvokeModelWithResponseStreamCommand,
+} from "@aws-sdk/client-bedrock-runtime";
+import { createImageProcessorOptionsValidator, makeImageProcessor } from "../images";
+import type { EndpointMessage } from "../endpoints";
+import type { MessageFile } from "$lib/types/Message";
+export const endpointBedrockParametersSchema = z.object({
+	weight: z.number().int().positive().default(1),
+	type: z.literal("bedrock"),
+	region: z.string().default("us-east-1"),
+	model: z.any(),
+	anthropicVersion: z.string().default("bedrock-2023-05-31"),
+	multimodal: z
+		.object({
+			image: createImageProcessorOptionsValidator({
+				supportedMimeTypes: [
+					"image/png",
+					"image/jpeg",
+					"image/webp",
+					"image/avif",
+					"image/tiff",
+					"image/gif",
+				],
+				preferredMimeType: "image/webp",
+				maxSizeInMB: Infinity,
+				maxWidth: 4096,
+				maxHeight: 4096,
+			}),
+		})
+		.default({}),
+});
+export async function endpointBedrock(
+	input: z.input<typeof endpointBedrockParametersSchema>
+): Promise<Endpoint> {
+	const { region, model, anthropicVersion, multimodal } =
+		endpointBedrockParametersSchema.parse(input);
+	const client = new BedrockRuntimeClient({
+		region,
+	});
+	const imageProcessor = makeImageProcessor(multimodal.image);
+	return async ({ messages, preprompt, generateSettings }) => {
+		let system = preprompt;
+		// Use the first message as the system prompt if it's of type "system"
+		if (messages?.[0]?.from === "system") {
+			system = messages[0].content;
+			messages = messages.slice(1); // Remove the first system message from the array
+		}
+		const formattedMessages = await prepareMessages(messages, imageProcessor);
+		let tokenId = 0;
+		const parameters = { ...model.parameters, ...generateSettings };
+		return (async function* () {
+			const command = new InvokeModelWithResponseStreamCommand({
+				body: Buffer.from(
+					JSON.stringify({
+						anthropic_version: anthropicVersion,
+						max_tokens: parameters.max_new_tokens ? parameters.max_new_tokens : 4096,
+						messages: formattedMessages,
+						system,
+					}),
+					"utf-8"
+				),
+				contentType: "application/json",
+				accept: "application/json",
+				modelId: model.id,
+				trace: "DISABLED",
+			});
+			const response = await client.send(command);
+			let text = "";
+			for await (const item of response.body ?? []) {
+				const chunk = JSON.parse(new TextDecoder().decode(item.chunk?.bytes));
+				const chunk_type = chunk.type;
+				if (chunk_type === "content_block_delta") {
+					text += chunk.delta.text;
+					yield {
+						token: {
+							id: tokenId++,
+							text: chunk.delta.text,
+							logprob: 0,
+							special: false,
+						},
+						generated_text: null,
+						details: null,
+					} satisfies TextGenerationStreamOutput;
+				} else if (chunk_type === "message_stop") {
+					yield {
+						token: {
+							id: tokenId++,
+							text: "",
+							logprob: 0,
+							special: true,
+						},
+						generated_text: text,
+						details: null,
+					} satisfies TextGenerationStreamOutput;
+				}
+			}
+		})();
+	};
+}
+// Prepare the messages excluding system prompts
+async function prepareMessages(
+	messages: EndpointMessage[],
+	imageProcessor: ReturnType<typeof makeImageProcessor>
+) {
+	const formattedMessages = [];
+	for (const message of messages) {
+		const content = [];
+		if (message.files?.length) {
+			content.push(...(await prepareFiles(imageProcessor, message.files)));
+		}
+		content.push({ type: "text", text: message.content });
+		const lastMessage = formattedMessages[formattedMessages.length - 1];
+		if (lastMessage && lastMessage.role === message.from) {
+			// If the last message has the same role, merge the content
+			lastMessage.content.push(...content);
+		} else {
+			formattedMessages.push({ role: message.from, content });
+		}
+	}
+	return formattedMessages;
+}
+// Process files and convert them to base64 encoded strings
+async function prepareFiles(
+	imageProcessor: ReturnType<typeof makeImageProcessor>,
+	files: MessageFile[]
+) {
+	const processedFiles = await Promise.all(files.map(imageProcessor));
+	return processedFiles.map((file) => ({
+		type: "image",
+		source: { type: "base64", media_type: "image/jpeg", data: file.image.toString("base64") },
+	}));
+}

src/lib/server/endpoints/endpoints.ts CHANGED Viewed

@@ -9,6 +9,7 @@ import endpointLlamacpp, { endpointLlamacppParametersSchema } from "./llamacpp/e
 import endpointOllama, { endpointOllamaParametersSchema } from "./ollama/endpointOllama";
 import endpointVertex, { endpointVertexParametersSchema } from "./google/endpointVertex";
 import endpointGenAI, { endpointGenAIParametersSchema } from "./google/endpointGenAI";
 import {
 	endpointAnthropic,
@@ -61,6 +62,7 @@ export const endpoints = {
 	tgi: endpointTgi,
 	anthropic: endpointAnthropic,
 	anthropicvertex: endpointAnthropicVertex,
 	aws: endpointAws,
 	openai: endpointOai,
 	llamacpp: endpointLlamacpp,
@@ -76,6 +78,7 @@ export const endpointSchema = z.discriminatedUnion("type", [
 	endpointAnthropicParametersSchema,
 	endpointAnthropicVertexParametersSchema,
 	endpointAwsParametersSchema,
 	endpointOAIParametersSchema,
 	endpointTgiParametersSchema,
 	endpointLlamacppParametersSchema,

 import endpointOllama, { endpointOllamaParametersSchema } from "./ollama/endpointOllama";
 import endpointVertex, { endpointVertexParametersSchema } from "./google/endpointVertex";
 import endpointGenAI, { endpointGenAIParametersSchema } from "./google/endpointGenAI";
+import { endpointBedrock, endpointBedrockParametersSchema } from "./aws/endpointBedrock";
 import {
 	endpointAnthropic,
 	tgi: endpointTgi,
 	anthropic: endpointAnthropic,
 	anthropicvertex: endpointAnthropicVertex,
+	bedrock: endpointBedrock,
 	aws: endpointAws,
 	openai: endpointOai,
 	llamacpp: endpointLlamacpp,
 	endpointAnthropicParametersSchema,
 	endpointAnthropicVertexParametersSchema,
 	endpointAwsParametersSchema,
+	endpointBedrockParametersSchema,
 	endpointOAIParametersSchema,
 	endpointTgiParametersSchema,
 	endpointLlamacppParametersSchema,

src/lib/server/models.ts CHANGED Viewed

@@ -280,6 +280,8 @@ const addEndpoint = (m: Awaited<ReturnType<typeof processModel>>) => ({
 						return endpoints.anthropic(args);
 					case "anthropic-vertex":
 						return endpoints.anthropicvertex(args);
 					case "aws":
 						return await endpoints.aws(args);
 					case "openai":

 						return endpoints.anthropic(args);
 					case "anthropic-vertex":
 						return endpoints.anthropicvertex(args);
+					case "bedrock":
+						return endpoints.bedrock(args);
 					case "aws":
 						return await endpoints.aws(args);
 					case "openai":