Spaces:

FlexTheAi
/

Flexstorydiff

Runtime error

App Files Files Community

Flexstorydiff / xformers /tests /test_feedforward.py

FlexTheAi

Upload folder using huggingface_hub

e202b16 verified 10 months ago

raw

history blame contribute delete

2.93 kB

	# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
	#
	# This source code is licensed under the BSD license found in the
	# LICENSE file in the root directory of this source tree.

	import pytest
	import torch

	from xformers.components import Activation
	from xformers.components.feedforward import FEEDFORWARD_REGISTRY, build_feedforward
	from xformers.components.feedforward.mixture_of_experts import GateConfig
	from xformers.helpers.test_utils import init_torch_distributed_local

	BATCH = 4
	SEQ = 256
	EMBD = 16
	LATENT = 128
	DROPOUT = 0.5

	DEVICES = (
	[torch.device("cpu")] if not torch.cuda.is_available() else [torch.device("cuda")]
	)

	assert FEEDFORWARD_REGISTRY.keys(), "Feedforward layers should have been registered"


	@pytest.mark.parametrize("feedforward_name", FEEDFORWARD_REGISTRY.keys())
	@pytest.mark.parametrize("activation", [a.value for a in Activation])
	@pytest.mark.parametrize("device", DEVICES)
	def test_feedforward(
	feedforward_name: str, activation: Activation, device: torch.device
	):
	test_config = {
	"name": feedforward_name,
	"dim_model": LATENT,
	"dropout": DROPOUT,
	"activation": activation,
	"hidden_layer_multiplier": 4,
	"number_of_experts": 4, # MoE
	"gate": "top_2", # MoE
	}

	if feedforward_name == "MixtureOfExperts":
	init_torch_distributed_local()

	# dummy, just check construction and dimensions in the FW pass
	ffw = build_feedforward(test_config)

	if ffw.requires_cuda and not device.type == "cuda":
	# pyre-fixme[29]: The library function `pytest.skip` is not supported by Pyre.
	pytest.skip("This MLP requires CUDA and current device does not match")

	inputs = torch.rand(BATCH, SEQ, LATENT, device=device)
	ffw = ffw.to(device)

	_ = ffw(inputs)


	def get_expert():
	return torch.nn.Linear(LATENT, LATENT, bias=False)


	@pytest.mark.skipif(not torch.cuda.is_available(), reason="This test requires CUDA")
	@pytest.mark.parametrize("gate", [g.value for g in GateConfig])
	@pytest.mark.parametrize("number_of_local_experts", [None, 4])
	@pytest.mark.parametrize("expert_constructor", [None, get_expert])
	def test_moe(gate, number_of_local_experts, expert_constructor):
	test_config = {
	"name": "MixtureOfExperts",
	"dim_model": LATENT,
	"dropout": DROPOUT,
	"activation": Activation.ReLU,
	"hidden_layer_multiplier": 4,
	"number_of_experts": 4,
	"number_of_local_experts": number_of_local_experts,
	"gate": gate,
	"expert_constructor": expert_constructor,
	}

	init_torch_distributed_local()

	# dummy, just check construction and dimensions in the FW pass
	ffw = build_feedforward(test_config)

	inputs = torch.rand(BATCH, SEQ, LATENT, device=torch.device("cuda"))
	ffw = ffw.to(torch.device("cuda"))

	outputs = ffw(inputs)
	loss = torch.sum(outputs)
	loss.backward()