|
/****************************************************************************** |
|
* Copyright (c) 2011, Duane Merrill. All rights reserved. |
|
* Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. |
|
* |
|
* Redistribution and use in source and binary forms, with or without |
|
* modification, are permitted provided that the following conditions are met: |
|
* * Redistributions of source code must retain the above copyright |
|
* notice, this list of conditions and the following disclaimer. |
|
* * Redistributions in binary form must reproduce the above copyright |
|
* notice, this list of conditions and the following disclaimer in the |
|
* documentation and/or other materials provided with the distribution. |
|
* * Neither the name of the NVIDIA CORPORATION nor the |
|
* names of its contributors may be used to endorse or promote products |
|
* derived from this software without specific prior written permission. |
|
* |
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND |
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
|
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY |
|
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
|
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
|
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
* |
|
******************************************************************************/ |
|
|
|
/****************************************************************************** |
|
* Test of iterator utilities |
|
******************************************************************************/ |
|
|
|
// Ensure printing of CUDA runtime errors to console |
|
#define CUB_STDERR |
|
|
|
#include <iterator> |
|
#include <stdio.h> |
|
#include <typeinfo> |
|
|
|
#include <cub/iterator/arg_index_input_iterator.cuh> |
|
#include <cub/iterator/cache_modified_input_iterator.cuh> |
|
#include <cub/iterator/cache_modified_output_iterator.cuh> |
|
#include <cub/iterator/constant_input_iterator.cuh> |
|
#include <cub/iterator/counting_input_iterator.cuh> |
|
#include <cub/iterator/tex_obj_input_iterator.cuh> |
|
#include <cub/iterator/tex_ref_input_iterator.cuh> |
|
#include <cub/iterator/transform_input_iterator.cuh> |
|
|
|
#include <cub/util_type.cuh> |
|
#include <cub/util_allocator.cuh> |
|
|
|
#include "test_util.h" |
|
|
|
#include <thrust/device_ptr.h> |
|
#include <thrust/copy.h> |
|
|
|
using namespace cub; |
|
|
|
|
|
//--------------------------------------------------------------------- |
|
// Globals, constants and typedefs |
|
//--------------------------------------------------------------------- |
|
|
|
bool g_verbose = false; |
|
CachingDeviceAllocator g_allocator(true); |
|
|
|
// Dispatch types |
|
enum Backend |
|
{ |
|
CUB, // CUB method |
|
THRUST, // Thrust method |
|
CDP, // GPU-based (dynamic parallelism) dispatch to CUB method |
|
}; |
|
|
|
|
|
template <typename T> |
|
struct TransformOp |
|
{ |
|
// Increment transform |
|
__host__ __device__ __forceinline__ T operator()(T input) const |
|
{ |
|
T addend; |
|
InitValue(INTEGER_SEED, addend, 1); |
|
return input + addend; |
|
} |
|
}; |
|
|
|
struct SelectOp |
|
{ |
|
template <typename T> |
|
__host__ __device__ __forceinline__ bool operator()(T input) |
|
{ |
|
return true; |
|
} |
|
}; |
|
|
|
|
|
//--------------------------------------------------------------------- |
|
// Test kernels |
|
//--------------------------------------------------------------------- |
|
|
|
/** |
|
* Test random access input iterator |
|
*/ |
|
template < |
|
typename InputIteratorT, |
|
typename T> |
|
__global__ void Kernel( |
|
InputIteratorT d_in, |
|
T *d_out, |
|
InputIteratorT *d_itrs) |
|
{ |
|
d_out[0] = *d_in; // Value at offset 0 |
|
d_out[1] = d_in[100]; // Value at offset 100 |
|
d_out[2] = *(d_in + 1000); // Value at offset 1000 |
|
d_out[3] = *(d_in + 10000); // Value at offset 10000 |
|
|
|
d_in++; |
|
d_out[4] = d_in[0]; // Value at offset 1 |
|
|
|
d_in += 20; |
|
d_out[5] = d_in[0]; // Value at offset 21 |
|
d_itrs[0] = d_in; // Iterator at offset 21 |
|
|
|
d_in -= 10; |
|
d_out[6] = d_in[0]; // Value at offset 11; |
|
|
|
d_in -= 11; |
|
d_out[7] = d_in[0]; // Value at offset 0 |
|
d_itrs[1] = d_in; // Iterator at offset 0 |
|
} |
|
|
|
|
|
|
|
//--------------------------------------------------------------------- |
|
// Host testing subroutines |
|
//--------------------------------------------------------------------- |
|
|
|
|
|
/** |
|
* Run iterator test on device |
|
*/ |
|
template < |
|
typename InputIteratorT, |
|
typename T, |
|
int TEST_VALUES> |
|
void Test( |
|
InputIteratorT d_in, |
|
T (&h_reference)[TEST_VALUES]) |
|
{ |
|
// Allocate device arrays |
|
T *d_out = NULL; |
|
InputIteratorT *d_itrs = NULL; |
|
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_out, sizeof(T) * TEST_VALUES)); |
|
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_itrs, sizeof(InputIteratorT) * 2)); |
|
|
|
int compare; |
|
|
|
// Run unguarded kernel |
|
Kernel<<<1, 1>>>(d_in, d_out, d_itrs); |
|
|
|
CubDebugExit(cudaPeekAtLastError()); |
|
CubDebugExit(cudaDeviceSynchronize()); |
|
|
|
// Check results |
|
compare = CompareDeviceResults(h_reference, d_out, TEST_VALUES, g_verbose, g_verbose); |
|
printf("\tValues: %s\n", (compare) ? "FAIL" : "PASS"); |
|
AssertEquals(0, compare); |
|
|
|
// Check iterator at offset 21 |
|
InputIteratorT h_itr = d_in + 21; |
|
compare = CompareDeviceResults(&h_itr, d_itrs, 1, g_verbose, g_verbose); |
|
printf("\tIterators: %s\n", (compare) ? "FAIL" : "PASS"); |
|
AssertEquals(0, compare); |
|
|
|
// Check iterator at offset 0 |
|
compare = CompareDeviceResults(&d_in, d_itrs + 1, 1, g_verbose, g_verbose); |
|
printf("\tIterators: %s\n", (compare) ? "FAIL" : "PASS"); |
|
AssertEquals(0, compare); |
|
|
|
// Cleanup |
|
if (d_out) CubDebugExit(g_allocator.DeviceFree(d_out)); |
|
if (d_itrs) CubDebugExit(g_allocator.DeviceFree(d_itrs)); |
|
} |
|
|
|
|
|
/** |
|
* Test constant iterator |
|
*/ |
|
template <typename T> |
|
void TestConstant(T base) |
|
{ |
|
printf("\nTesting constant iterator on type %s (base: %lld)\n", typeid(T).name(), (unsigned long long) (base)); fflush(stdout); |
|
|
|
// |
|
// Test iterator manipulation in kernel |
|
// |
|
|
|
T h_reference[8] = {base, base, base, base, base, base, base, base}; |
|
ConstantInputIterator<T> d_itr(base); |
|
Test(d_itr, h_reference); |
|
|
|
#if (THRUST_VERSION >= 100700) // Thrust 1.7 or newer |
|
|
|
// |
|
// Test with thrust::copy_if() |
|
// |
|
|
|
int copy_items = 100; |
|
T *h_copy = new T[copy_items]; |
|
T *d_copy = NULL; |
|
|
|
for (int i = 0; i < copy_items; ++i) |
|
h_copy[i] = d_itr[i]; |
|
|
|
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_copy, sizeof(T) * copy_items)); |
|
thrust::device_ptr<T> d_copy_wrapper(d_copy); |
|
|
|
thrust::copy_if(d_itr, d_itr + copy_items, d_copy_wrapper, SelectOp()); |
|
|
|
int compare = CompareDeviceResults(h_copy, d_copy, copy_items, g_verbose, g_verbose); |
|
printf("\tthrust::copy_if(): %s\n", (compare) ? "FAIL" : "PASS"); |
|
AssertEquals(0, compare); |
|
|
|
if (h_copy) delete[] h_copy; |
|
if (d_copy) CubDebugExit(g_allocator.DeviceFree(d_copy)); |
|
|
|
#endif // THRUST_VERSION |
|
} |
|
|
|
|
|
/** |
|
* Test counting iterator |
|
*/ |
|
template <typename T> |
|
void TestCounting(T base) |
|
{ |
|
printf("\nTesting counting iterator on type %s (base: %d) \n", typeid(T).name(), int(base)); fflush(stdout); |
|
|
|
// |
|
// Test iterator manipulation in kernel |
|
// |
|
|
|
// Initialize reference data |
|
T h_reference[8]; |
|
h_reference[0] = base + 0; // Value at offset 0 |
|
h_reference[1] = base + 100; // Value at offset 100 |
|
h_reference[2] = base + 1000; // Value at offset 1000 |
|
h_reference[3] = base + 10000; // Value at offset 10000 |
|
h_reference[4] = base + 1; // Value at offset 1 |
|
h_reference[5] = base + 21; // Value at offset 21 |
|
h_reference[6] = base + 11; // Value at offset 11 |
|
h_reference[7] = base + 0; // Value at offset 0; |
|
|
|
CountingInputIterator<T> d_itr(base); |
|
Test(d_itr, h_reference); |
|
|
|
#if (THRUST_VERSION >= 100700) // Thrust 1.7 or newer |
|
|
|
// |
|
// Test with thrust::copy_if() |
|
// |
|
|
|
unsigned long long max_items = ((1ull << ((sizeof(T) * 8) - 1)) - 1); |
|
size_t copy_items = (size_t) CUB_MIN(max_items - base, 100); // potential issue with differencing overflows when T is a smaller type than can handle the offset |
|
T *h_copy = new T[copy_items]; |
|
T *d_copy = NULL; |
|
|
|
for (unsigned long long i = 0; i < copy_items; ++i) |
|
h_copy[i] = d_itr[i]; |
|
|
|
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_copy, sizeof(T) * copy_items)); |
|
thrust::device_ptr<T> d_copy_wrapper(d_copy); |
|
thrust::copy_if(d_itr, d_itr + copy_items, d_copy_wrapper, SelectOp()); |
|
|
|
int compare = CompareDeviceResults(h_copy, d_copy, copy_items, g_verbose, g_verbose); |
|
printf("\tthrust::copy_if(): %s\n", (compare) ? "FAIL" : "PASS"); |
|
AssertEquals(0, compare); |
|
|
|
if (h_copy) delete[] h_copy; |
|
if (d_copy) CubDebugExit(g_allocator.DeviceFree(d_copy)); |
|
|
|
#endif // THRUST_VERSION |
|
} |
|
|
|
|
|
/** |
|
* Test modified iterator |
|
*/ |
|
template <typename T, typename CastT> |
|
void TestModified() |
|
{ |
|
printf("\nTesting cache-modified iterator on type %s\n", typeid(T).name()); fflush(stdout); |
|
|
|
// |
|
// Test iterator manipulation in kernel |
|
// |
|
|
|
constexpr int TEST_VALUES = 11000; |
|
|
|
T *h_data = new T[TEST_VALUES]; |
|
for (int i = 0; i < TEST_VALUES; ++i) |
|
{ |
|
RandomBits(h_data[i]); |
|
} |
|
|
|
// Allocate device arrays |
|
T *d_data = NULL; |
|
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_data, sizeof(T) * TEST_VALUES)); |
|
CubDebugExit(cudaMemcpy(d_data, h_data, sizeof(T) * TEST_VALUES, cudaMemcpyHostToDevice)); |
|
|
|
// Initialize reference data |
|
T h_reference[8]; |
|
h_reference[0] = h_data[0]; // Value at offset 0 |
|
h_reference[1] = h_data[100]; // Value at offset 100 |
|
h_reference[2] = h_data[1000]; // Value at offset 1000 |
|
h_reference[3] = h_data[10000]; // Value at offset 10000 |
|
h_reference[4] = h_data[1]; // Value at offset 1 |
|
h_reference[5] = h_data[21]; // Value at offset 21 |
|
h_reference[6] = h_data[11]; // Value at offset 11 |
|
h_reference[7] = h_data[0]; // Value at offset 0; |
|
|
|
Test(CacheModifiedInputIterator<LOAD_DEFAULT, T>((CastT*) d_data), h_reference); |
|
Test(CacheModifiedInputIterator<LOAD_CA, T>((CastT*) d_data), h_reference); |
|
Test(CacheModifiedInputIterator<LOAD_CG, T>((CastT*) d_data), h_reference); |
|
Test(CacheModifiedInputIterator<LOAD_CS, T>((CastT*) d_data), h_reference); |
|
Test(CacheModifiedInputIterator<LOAD_CV, T>((CastT*) d_data), h_reference); |
|
Test(CacheModifiedInputIterator<LOAD_LDG, T>((CastT*) d_data), h_reference); |
|
Test(CacheModifiedInputIterator<LOAD_VOLATILE, T>((CastT*) d_data), h_reference); |
|
|
|
#if (THRUST_VERSION >= 100700) // Thrust 1.7 or newer |
|
|
|
// |
|
// Test with thrust::copy_if() |
|
// |
|
|
|
T *d_copy = NULL; |
|
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_copy, sizeof(T) * TEST_VALUES)); |
|
|
|
CacheModifiedInputIterator<LOAD_CG, T> d_in_itr((CastT*) d_data); |
|
CacheModifiedOutputIterator<STORE_CG, T> d_out_itr((CastT*) d_copy); |
|
|
|
thrust::copy_if(d_in_itr, d_in_itr + TEST_VALUES, d_out_itr, SelectOp()); |
|
|
|
int compare = CompareDeviceResults(h_data, d_copy, TEST_VALUES, g_verbose, g_verbose); |
|
printf("\tthrust::copy_if(): %s\n", (compare) ? "FAIL" : "PASS"); |
|
AssertEquals(0, compare); |
|
|
|
// Cleanup |
|
if (d_copy) CubDebugExit(g_allocator.DeviceFree(d_copy)); |
|
|
|
#endif // THRUST_VERSION |
|
|
|
if (h_data) delete[] h_data; |
|
if (d_data) CubDebugExit(g_allocator.DeviceFree(d_data)); |
|
} |
|
|
|
|
|
/** |
|
* Test transform iterator |
|
*/ |
|
template <typename T, typename CastT> |
|
void TestTransform() |
|
{ |
|
printf("\nTesting transform iterator on type %s\n", typeid(T).name()); fflush(stdout); |
|
|
|
// |
|
// Test iterator manipulation in kernel |
|
// |
|
|
|
constexpr int TEST_VALUES = 11000; |
|
|
|
T *h_data = new T[TEST_VALUES]; |
|
for (int i = 0; i < TEST_VALUES; ++i) |
|
{ |
|
InitValue(INTEGER_SEED, h_data[i], i); |
|
} |
|
|
|
// Allocate device arrays |
|
T *d_data = NULL; |
|
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_data, sizeof(T) * TEST_VALUES)); |
|
CubDebugExit(cudaMemcpy(d_data, h_data, sizeof(T) * TEST_VALUES, cudaMemcpyHostToDevice)); |
|
|
|
TransformOp<T> op; |
|
|
|
// Initialize reference data |
|
T h_reference[8]; |
|
h_reference[0] = op(h_data[0]); // Value at offset 0 |
|
h_reference[1] = op(h_data[100]); // Value at offset 100 |
|
h_reference[2] = op(h_data[1000]); // Value at offset 1000 |
|
h_reference[3] = op(h_data[10000]); // Value at offset 10000 |
|
h_reference[4] = op(h_data[1]); // Value at offset 1 |
|
h_reference[5] = op(h_data[21]); // Value at offset 21 |
|
h_reference[6] = op(h_data[11]); // Value at offset 11 |
|
h_reference[7] = op(h_data[0]); // Value at offset 0; |
|
|
|
TransformInputIterator<T, TransformOp<T>, CastT*> d_itr((CastT*) d_data, op); |
|
Test(d_itr, h_reference); |
|
|
|
#if (THRUST_VERSION >= 100700) // Thrust 1.7 or newer |
|
|
|
// |
|
// Test with thrust::copy_if() |
|
// |
|
|
|
T *h_copy = new T[TEST_VALUES]; |
|
for (int i = 0; i < TEST_VALUES; ++i) |
|
h_copy[i] = op(h_data[i]); |
|
|
|
T *d_copy = NULL; |
|
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_copy, sizeof(T) * TEST_VALUES)); |
|
thrust::device_ptr<T> d_copy_wrapper(d_copy); |
|
|
|
thrust::copy_if(d_itr, d_itr + TEST_VALUES, d_copy_wrapper, SelectOp()); |
|
|
|
int compare = CompareDeviceResults(h_copy, d_copy, TEST_VALUES, g_verbose, g_verbose); |
|
printf("\tthrust::copy_if(): %s\n", (compare) ? "FAIL" : "PASS"); |
|
AssertEquals(0, compare); |
|
|
|
// Cleanup |
|
if (h_copy) delete[] h_copy; |
|
if (d_copy) CubDebugExit(g_allocator.DeviceFree(d_copy)); |
|
|
|
#endif // THRUST_VERSION |
|
|
|
if (h_data) delete[] h_data; |
|
if (d_data) CubDebugExit(g_allocator.DeviceFree(d_data)); |
|
} |
|
|
|
|
|
/** |
|
* Test tex-obj texture iterator |
|
*/ |
|
template <typename T, typename CastT> |
|
void TestTexObj() |
|
{ |
|
printf("\nTesting tex-obj iterator on type %s\n", typeid(T).name()); fflush(stdout); |
|
|
|
// |
|
// Test iterator manipulation in kernel |
|
// |
|
|
|
const unsigned int TEST_VALUES = 11000; |
|
const unsigned int DUMMY_OFFSET = 500; |
|
const unsigned int DUMMY_TEST_VALUES = TEST_VALUES - DUMMY_OFFSET; |
|
|
|
T *h_data = new T[TEST_VALUES]; |
|
for (int i = 0; i < TEST_VALUES; ++i) |
|
{ |
|
RandomBits(h_data[i]); |
|
} |
|
|
|
// Allocate device arrays |
|
T *d_data = NULL; |
|
T *d_dummy = NULL; |
|
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_data, sizeof(T) * TEST_VALUES)); |
|
CubDebugExit(cudaMemcpy(d_data, h_data, sizeof(T) * TEST_VALUES, cudaMemcpyHostToDevice)); |
|
|
|
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_dummy, sizeof(T) * DUMMY_TEST_VALUES)); |
|
CubDebugExit(cudaMemcpy(d_dummy, h_data + DUMMY_OFFSET, sizeof(T) * DUMMY_TEST_VALUES, cudaMemcpyHostToDevice)); |
|
|
|
// Initialize reference data |
|
T h_reference[8]; |
|
h_reference[0] = h_data[0]; // Value at offset 0 |
|
h_reference[1] = h_data[100]; // Value at offset 100 |
|
h_reference[2] = h_data[1000]; // Value at offset 1000 |
|
h_reference[3] = h_data[10000]; // Value at offset 10000 |
|
h_reference[4] = h_data[1]; // Value at offset 1 |
|
h_reference[5] = h_data[21]; // Value at offset 21 |
|
h_reference[6] = h_data[11]; // Value at offset 11 |
|
h_reference[7] = h_data[0]; // Value at offset 0; |
|
|
|
// Create and bind obj-based test iterator |
|
TexObjInputIterator<T> d_obj_itr; |
|
CubDebugExit(d_obj_itr.BindTexture((CastT*) d_data, sizeof(T) * TEST_VALUES)); |
|
|
|
Test(d_obj_itr, h_reference); |
|
|
|
#if (THRUST_VERSION >= 100700) // Thrust 1.7 or newer |
|
|
|
// |
|
// Test with thrust::copy_if() |
|
// |
|
|
|
T *d_copy = NULL; |
|
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_copy, sizeof(T) * TEST_VALUES)); |
|
thrust::device_ptr<T> d_copy_wrapper(d_copy); |
|
|
|
CubDebugExit(cudaMemset(d_copy, 0, sizeof(T) * TEST_VALUES)); |
|
thrust::copy_if(d_obj_itr, d_obj_itr + TEST_VALUES, d_copy_wrapper, SelectOp()); |
|
|
|
int compare = CompareDeviceResults(h_data, d_copy, TEST_VALUES, g_verbose, g_verbose); |
|
printf("\tthrust::copy_if(): %s\n", (compare) ? "FAIL" : "PASS"); |
|
AssertEquals(0, compare); |
|
|
|
// Cleanup |
|
CubDebugExit(d_obj_itr.UnbindTexture()); |
|
|
|
if (d_copy) CubDebugExit(g_allocator.DeviceFree(d_copy)); |
|
|
|
#endif // THRUST_VERSION |
|
|
|
if (h_data) delete[] h_data; |
|
if (d_data) CubDebugExit(g_allocator.DeviceFree(d_data)); |
|
if (d_dummy) CubDebugExit(g_allocator.DeviceFree(d_dummy)); |
|
} |
|
|
|
|
|
#if CUDART_VERSION >= 5050 |
|
|
|
/** |
|
* Test tex-ref texture iterator |
|
*/ |
|
template <typename T, typename CastT> |
|
void TestTexRef() |
|
{ |
|
printf("\nTesting tex-ref iterator on type %s\n", typeid(T).name()); fflush(stdout); |
|
|
|
// |
|
// Test iterator manipulation in kernel |
|
// |
|
|
|
constexpr int TEST_VALUES = 11000; |
|
constexpr unsigned int DUMMY_OFFSET = 500; |
|
constexpr unsigned int DUMMY_TEST_VALUES = TEST_VALUES - DUMMY_OFFSET; |
|
|
|
T *h_data = new T[TEST_VALUES]; |
|
for (int i = 0; i < TEST_VALUES; ++i) |
|
{ |
|
RandomBits(h_data[i]); |
|
} |
|
|
|
// Allocate device arrays |
|
T *d_data = NULL; |
|
T *d_dummy = NULL; |
|
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_data, sizeof(T) * TEST_VALUES)); |
|
CubDebugExit(cudaMemcpy(d_data, h_data, sizeof(T) * TEST_VALUES, cudaMemcpyHostToDevice)); |
|
|
|
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_dummy, sizeof(T) * DUMMY_TEST_VALUES)); |
|
CubDebugExit(cudaMemcpy(d_dummy, h_data + DUMMY_OFFSET, sizeof(T) * DUMMY_TEST_VALUES, cudaMemcpyHostToDevice)); |
|
|
|
// Initialize reference data |
|
T h_reference[8]; |
|
h_reference[0] = h_data[0]; // Value at offset 0 |
|
h_reference[1] = h_data[100]; // Value at offset 100 |
|
h_reference[2] = h_data[1000]; // Value at offset 1000 |
|
h_reference[3] = h_data[10000]; // Value at offset 10000 |
|
h_reference[4] = h_data[1]; // Value at offset 1 |
|
h_reference[5] = h_data[21]; // Value at offset 21 |
|
h_reference[6] = h_data[11]; // Value at offset 11 |
|
h_reference[7] = h_data[0]; // Value at offset 0; |
|
|
|
// Create and bind ref-based test iterator |
|
TexRefInputIterator<T, __LINE__> d_ref_itr; |
|
CubDebugExit(d_ref_itr.BindTexture((CastT*) d_data, sizeof(T) * TEST_VALUES)); |
|
|
|
// Create and bind dummy iterator of same type to check with interferance |
|
TexRefInputIterator<T, __LINE__> d_ref_itr2; |
|
CubDebugExit(d_ref_itr2.BindTexture((CastT*) d_dummy, sizeof(T) * DUMMY_TEST_VALUES)); |
|
|
|
Test(d_ref_itr, h_reference); |
|
|
|
#if (THRUST_VERSION >= 100700) // Thrust 1.7 or newer |
|
|
|
// |
|
// Test with thrust::copy_if() |
|
// |
|
|
|
T *d_copy = NULL; |
|
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_copy, sizeof(T) * TEST_VALUES)); |
|
thrust::device_ptr<T> d_copy_wrapper(d_copy); |
|
|
|
CubDebugExit(cudaMemset(d_copy, 0, sizeof(T) * TEST_VALUES)); |
|
thrust::copy_if(d_ref_itr, d_ref_itr + TEST_VALUES, d_copy_wrapper, SelectOp()); |
|
|
|
int compare = CompareDeviceResults(h_data, d_copy, TEST_VALUES, g_verbose, g_verbose); |
|
printf("\tthrust::copy_if(): %s\n", (compare) ? "FAIL" : "PASS"); |
|
AssertEquals(0, compare); |
|
|
|
if (d_copy) CubDebugExit(g_allocator.DeviceFree(d_copy)); |
|
|
|
#endif // THRUST_VERSION |
|
|
|
CubDebugExit(d_ref_itr.UnbindTexture()); |
|
CubDebugExit(d_ref_itr2.UnbindTexture()); |
|
|
|
if (h_data) delete[] h_data; |
|
if (d_data) CubDebugExit(g_allocator.DeviceFree(d_data)); |
|
if (d_dummy) CubDebugExit(g_allocator.DeviceFree(d_dummy)); |
|
} |
|
|
|
|
|
/** |
|
* Test texture transform iterator |
|
*/ |
|
template <typename T, typename CastT> |
|
void TestTexTransform() |
|
{ |
|
printf("\nTesting tex-transform iterator on type %s\n", typeid(T).name()); fflush(stdout); |
|
|
|
// |
|
// Test iterator manipulation in kernel |
|
// |
|
|
|
constexpr int TEST_VALUES = 11000; |
|
|
|
T *h_data = new T[TEST_VALUES]; |
|
for (int i = 0; i < TEST_VALUES; ++i) |
|
{ |
|
InitValue(INTEGER_SEED, h_data[i], i); |
|
} |
|
|
|
// Allocate device arrays |
|
T *d_data = NULL; |
|
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_data, sizeof(T) * TEST_VALUES)); |
|
CubDebugExit(cudaMemcpy(d_data, h_data, sizeof(T) * TEST_VALUES, cudaMemcpyHostToDevice)); |
|
|
|
TransformOp<T> op; |
|
|
|
// Initialize reference data |
|
T h_reference[8]; |
|
h_reference[0] = op(h_data[0]); // Value at offset 0 |
|
h_reference[1] = op(h_data[100]); // Value at offset 100 |
|
h_reference[2] = op(h_data[1000]); // Value at offset 1000 |
|
h_reference[3] = op(h_data[10000]); // Value at offset 10000 |
|
h_reference[4] = op(h_data[1]); // Value at offset 1 |
|
h_reference[5] = op(h_data[21]); // Value at offset 21 |
|
h_reference[6] = op(h_data[11]); // Value at offset 11 |
|
h_reference[7] = op(h_data[0]); // Value at offset 0; |
|
|
|
// Create and bind texture iterator |
|
typedef TexRefInputIterator<T, __LINE__> TextureIterator; |
|
|
|
TextureIterator d_tex_itr; |
|
CubDebugExit(d_tex_itr.BindTexture((CastT*) d_data, sizeof(T) * TEST_VALUES)); |
|
|
|
// Create transform iterator |
|
TransformInputIterator<T, TransformOp<T>, TextureIterator> xform_itr(d_tex_itr, op); |
|
|
|
Test(xform_itr, h_reference); |
|
|
|
#if (THRUST_VERSION >= 100700) // Thrust 1.7 or newer |
|
|
|
// |
|
// Test with thrust::copy_if() |
|
// |
|
|
|
T *h_copy = new T[TEST_VALUES]; |
|
for (int i = 0; i < TEST_VALUES; ++i) |
|
h_copy[i] = op(h_data[i]); |
|
|
|
T *d_copy = NULL; |
|
CubDebugExit(g_allocator.DeviceAllocate((void**)&d_copy, sizeof(T) * TEST_VALUES)); |
|
thrust::device_ptr<T> d_copy_wrapper(d_copy); |
|
|
|
thrust::copy_if(xform_itr, xform_itr + TEST_VALUES, d_copy_wrapper, SelectOp()); |
|
|
|
int compare = CompareDeviceResults(h_copy, d_copy, TEST_VALUES, g_verbose, g_verbose); |
|
printf("\tthrust::copy_if(): %s\n", (compare) ? "FAIL" : "PASS"); |
|
AssertEquals(0, compare); |
|
|
|
// Cleanup |
|
if (h_copy) delete[] h_copy; |
|
if (d_copy) CubDebugExit(g_allocator.DeviceFree(d_copy)); |
|
|
|
#endif // THRUST_VERSION |
|
|
|
CubDebugExit(d_tex_itr.UnbindTexture()); |
|
if (h_data) delete[] h_data; |
|
if (d_data) CubDebugExit(g_allocator.DeviceFree(d_data)); |
|
} |
|
|
|
#endif // CUDART_VERSION |
|
|
|
|
|
|
|
|
|
/** |
|
* Run non-integer tests |
|
*/ |
|
template <typename T, typename CastT> |
|
void Test(Int2Type<false> /* is_integer */) |
|
{ |
|
TestModified<T, CastT>(); |
|
TestTransform<T, CastT>(); |
|
|
|
#if CUB_CDP |
|
// Test tex-obj iterators if CUDA dynamic parallelism enabled |
|
TestTexObj<T, CastT>(type_string); |
|
#endif // CUB_CDP |
|
|
|
#if CUDART_VERSION >= 5050 |
|
// Test tex-ref iterators for CUDA 5.5 |
|
TestTexRef<T, CastT>(); |
|
TestTexTransform<T, CastT>(); |
|
#endif // CUDART_VERSION |
|
} |
|
|
|
/** |
|
* Run integer tests |
|
*/ |
|
template <typename T, typename CastT> |
|
void Test(Int2Type<true> /* is_integer */) |
|
{ |
|
TestConstant<T>(0); |
|
TestConstant<T>(99); |
|
|
|
TestCounting<T>(0); |
|
TestCounting<T>(99); |
|
|
|
// Run non-integer tests |
|
Test<T, CastT>(Int2Type<false>()); |
|
} |
|
|
|
/** |
|
* Run tests |
|
*/ |
|
template <typename T> |
|
void Test() |
|
{ |
|
enum { |
|
IS_INTEGER = (Traits<T>::CATEGORY == SIGNED_INTEGER) || (Traits<T>::CATEGORY == UNSIGNED_INTEGER) |
|
}; |
|
|
|
// Test non-const type |
|
Test<T, T>(Int2Type<IS_INTEGER>()); |
|
|
|
// Test non-const type |
|
Test<T, const T>(Int2Type<IS_INTEGER>()); |
|
} |
|
|
|
|
|
/** |
|
* Main |
|
*/ |
|
int main(int argc, char** argv) |
|
{ |
|
// Initialize command line |
|
CommandLineArgs args(argc, argv); |
|
g_verbose = args.CheckCmdLineFlag("v"); |
|
|
|
// Print usage |
|
if (args.CheckCmdLineFlag("help")) |
|
{ |
|
printf("%s " |
|
"[--device=<device-id>] " |
|
"[--v] " |
|
"\n", argv[0]); |
|
exit(0); |
|
} |
|
|
|
// Initialize device |
|
CubDebugExit(args.DeviceInit()); |
|
|
|
// Get ptx version |
|
int ptx_version = 0; |
|
CubDebugExit(PtxVersion(ptx_version)); |
|
|
|
// Evaluate different data types |
|
Test<char>(); |
|
Test<short>(); |
|
Test<int>(); |
|
Test<long>(); |
|
Test<long long>(); |
|
Test<float>(); |
|
if (ptx_version > 120) // Don't check doubles on PTX120 or below because they're down-converted |
|
Test<double>(); |
|
|
|
Test<char2>(); |
|
Test<short2>(); |
|
Test<int2>(); |
|
Test<long2>(); |
|
Test<longlong2>(); |
|
Test<float2>(); |
|
if (ptx_version > 120) // Don't check doubles on PTX120 or below because they're down-converted |
|
Test<double2>(); |
|
|
|
Test<char3>(); |
|
Test<short3>(); |
|
Test<int3>(); |
|
Test<long3>(); |
|
Test<longlong3>(); |
|
Test<float3>(); |
|
if (ptx_version > 120) // Don't check doubles on PTX120 or below because they're down-converted |
|
Test<double3>(); |
|
|
|
Test<char4>(); |
|
Test<short4>(); |
|
Test<int4>(); |
|
Test<long4>(); |
|
Test<longlong4>(); |
|
Test<float4>(); |
|
if (ptx_version > 120) // Don't check doubles on PTX120 or below because they're down-converted |
|
Test<double4>(); |
|
|
|
Test<TestFoo>(); |
|
Test<TestBar>(); |
|
|
|
printf("\nTest complete\n"); fflush(stdout); |
|
|
|
return 0; |
|
} |
|
|
|
|
|
|
|
|