File size: 3,088 Bytes
b4f09f9 6520a09 b4f09f9 e835e1e b4f09f9 e835e1e b4f09f9 149f453 b4f09f9 e835e1e b4f09f9 149f453 b4f09f9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
from io import BytesIO
from os import environ
from random import seed
import numpy as np
import soundfile as sf
import pytest
# Global variables defining endpoint targets
ENDPOINT_URL = environ["ENDPOINT_URL"]
ENDPOINT_NUM_SAMPLES = int(environ["ENDPOINT_NUM_SAMPLES"])
ENDPOINT_TEST_SEED = int(environ["ENDPOINT_TEST_SEED"])
# Seed all the rngs
seed(ENDPOINT_TEST_SEED)
np.random.seed(ENDPOINT_TEST_SEED)
from datasets import load_dataset, Dataset
from openai import OpenAI
# Global client to make requests
client = OpenAI(base_url=ENDPOINT_URL)
@pytest.fixture
def dataset():
dataset = load_dataset("hf-audio/esb-datasets-test-only-sorted", "ami", split="test")
return dataset.take(ENDPOINT_NUM_SAMPLES)
@pytest.mark.parametrize("response_format", ["text", "json", "verbose_json"])
def test_seq_openai_client_no_params(dataset: Dataset, response_format: str):
try:
for sample in dataset:
with BytesIO() as audio_buffer:
sf.write(audio_buffer, sample["audio"]["array"], sample["audio"]["sampling_rate"], format="WAV")
response = client.audio.transcriptions.create(
file=audio_buffer, model="", response_format=response_format
)
if response_format == "verbose_json":
assert len(response.segments), "No segments returned"
assert all(map(lambda s: s.avg_logprob != float('nan'), response.segments)), "avg_logprob is NaN"
assert all(
map(lambda s: s.compression_ratio != float('nan'), response.segments)), "avg_logprob is NaN"
assert all(map(lambda s: s.temperature == 0.0, response.segments)), "temperature not equals 0.0"
except Exception as e:
assert False, f"Caught error while sending audio/transcriptions request: {e}"
@pytest.mark.parametrize("response_format", ["text", "json", "verbose_json"])
def test_seq_openai_client_temperature(dataset: Dataset, response_format: str):
try:
for sample in dataset:
with BytesIO() as audio_buffer:
sf.write(audio_buffer, sample["audio"]["array"], sample["audio"]["sampling_rate"], format="WAV")
response = client.audio.transcriptions.create(
file=audio_buffer,
model="",
temperature=1.0,
response_format=response_format
)
if response_format == "verbose_json":
assert len(response.segments), "No segments returned"
assert all(map(lambda s: s.avg_logprob != float('nan'), response.segments)), "avg_logprob is NaN"
assert all(
map(lambda s: s.compression_ratio != float('nan'), response.segments)), "avg_logprob is NaN"
assert all(map(lambda s: s.temperature == 1.0, response.segments)), "temperature not equals 1.0"
except Exception as e:
assert False, f"Caught error while sending audio/transcriptions request: {e}"
|