Spaces:

ferid197
/

LLaMA-Factory

Running

App Files Files Community

LLaMA-Factory / tests /model /model_utils /test_attention.py

ferid197

Upload folder using huggingface_hub

e81015c verified 2 days ago

raw

history blame contribute delete

1.78 kB

	# Copyright 2025 the LlamaFactory team.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	import os

	import pytest
	from transformers.utils import is_flash_attn_2_available, is_torch_sdpa_available

	from llamafactory.extras.packages import is_transformers_version_greater_than
	from llamafactory.train.test_utils import load_infer_model


	TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")

	INFER_ARGS = {
	"model_name_or_path": TINY_LLAMA3,
	"template": "llama3",
	}


	@pytest.mark.xfail(is_transformers_version_greater_than("4.48"), reason="Attention refactor.")
	def test_attention():
	attention_available = ["disabled"]
	if is_torch_sdpa_available():
	attention_available.append("sdpa")

	if is_flash_attn_2_available():
	attention_available.append("fa2")

	llama_attention_classes = {
	"disabled": "LlamaAttention",
	"sdpa": "LlamaSdpaAttention",
	"fa2": "LlamaFlashAttention2",
	}
	for requested_attention in attention_available:
	model = load_infer_model(flash_attn=requested_attention, **INFER_ARGS)
	for module in model.modules():
	if "Attention" in module.__class__.__name__:
	assert module.__class__.__name__ == llama_attention_classes[requested_attention]