[TESTS] Simplified testing of half-precision transposes
This commit is contained in:
committed by
Philippe Tillet
parent
4ae0e28b32
commit
ec2cb2155e
@@ -12,16 +12,16 @@ int main() {
|
||||
typedef std::tuple<std::vector<int>, std::vector<int>, std::vector<int>> config_t;
|
||||
std::vector<config_t> configs = {
|
||||
{{4096*4096}, {0}, {0}},
|
||||
{{4096, 4096}, {0, 1}, {1, 0}},
|
||||
{{4096, 4096}, {0, 1}, {0, 1}},
|
||||
{{4096, 4096}, {0, 1}, {1, 0}},
|
||||
{{4096, 4096}, {1, 0}, {0, 1}},
|
||||
{{4096, 4096}, {0, 1}, {0, 1}},
|
||||
{{4096, 4096}, {1, 0}, {1, 0}},
|
||||
{{256, 256, 256}, {0, 1, 2}, {0, 1, 2}},
|
||||
{{256, 256, 256}, {0, 1, 2}, {0, 2, 1}},
|
||||
{{256, 256, 256}, {1, 0, 2}, {1, 2, 0}},
|
||||
{{256, 256, 256}, {1, 2, 0}, {1, 0, 2}},
|
||||
{{256, 256, 256}, {2, 0, 1}, {0, 1, 2}},
|
||||
{{256, 256, 256}, {2, 1, 0}, {0, 2, 1}}
|
||||
{{256, 256, 256}, {1, 2, 0}, {1, 0, 2}}
|
||||
// {{256, 256, 256}, {2, 0, 1}, {0, 1, 2}},
|
||||
// {{256, 256, 256}, {2, 1, 0}, {0, 2, 1}}
|
||||
};
|
||||
// does the work
|
||||
std::vector<int32_t> shape;
|
||||
@@ -29,7 +29,7 @@ int main() {
|
||||
for(const auto& c: configs){
|
||||
std::tie(shape, ord_x, ord_y) = c;
|
||||
std::cout << "// " << c << std::flush;
|
||||
for(auto perf: bench_copy_nd(stream, shape, ord_x, ord_y))
|
||||
for(auto perf: bench_copy_nd(stream, HALF, shape, ord_x, ord_y))
|
||||
std::cout << ", " << perf << std::flush;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
@@ -2,6 +2,7 @@
|
||||
#include "triton/driver/stream.h"
|
||||
#include "triton/runtime/function.h"
|
||||
#include "triton/tools/bench.hpp"
|
||||
#include "triton/external/half.hpp"
|
||||
#include "util.h"
|
||||
|
||||
int32_t off(const std::vector<int32_t>& idx, const std::vector<int32_t>& strides) {
|
||||
@@ -16,6 +17,13 @@ enum run_mode_t {
|
||||
TEST
|
||||
};
|
||||
|
||||
enum dtype_t {
|
||||
FLOAT,
|
||||
HALF,
|
||||
DOUBLE
|
||||
};
|
||||
|
||||
|
||||
template<class T>
|
||||
void cc_copy_nd(const std::vector<T>& x, std::vector<T>& y,
|
||||
const std::vector<int32_t>& shape,
|
||||
@@ -46,13 +54,28 @@ void cc_copy_nd(const std::vector<T>& x, std::vector<T>& y,
|
||||
y[off({i, j, k}, y_strides)] = x[off({i, j, k}, x_strides)];
|
||||
}
|
||||
|
||||
template<class T>
|
||||
struct to_string;
|
||||
|
||||
template<> struct to_string<half_float::half>{
|
||||
static constexpr const char* value = "half";
|
||||
};
|
||||
|
||||
template<> struct to_string<float>{
|
||||
static constexpr const char* value = "float";
|
||||
};
|
||||
|
||||
template<> struct to_string<double>{
|
||||
static constexpr const char* value = "double";
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
void triton_copy_nd(drv::stream* stream, const std::vector<int32_t>& shape,
|
||||
const std::vector<int32_t>& x_order, const std::vector<int32_t>& y_order,
|
||||
std::vector<std::vector<std::string>> TS,
|
||||
run_mode_t mode, std::vector<double>& bench, bool &test) {
|
||||
typedef float NumericT;
|
||||
std::string ty = "float";
|
||||
size_t dtsize = sizeof(NumericT);
|
||||
std::string ty = to_string<T>::value;
|
||||
size_t dtsize = sizeof(T);
|
||||
drv::context* context = stream->context();
|
||||
|
||||
// rank
|
||||
@@ -107,11 +130,11 @@ void triton_copy_nd(drv::stream* stream, const std::vector<int32_t>& shape,
|
||||
|
||||
// test triton
|
||||
if(mode == TEST){
|
||||
std::vector<NumericT> hx(size);
|
||||
std::vector<NumericT> hy(size);
|
||||
std::vector<NumericT> ry(size);
|
||||
std::vector<T> hx(size);
|
||||
std::vector<T> hy(size);
|
||||
std::vector<T> ry(size);
|
||||
for(size_t i = 0; i < hx.size(); i++)
|
||||
hx[i] = static_cast<NumericT>((float)rand()/RAND_MAX);
|
||||
hx[i] = static_cast<T>((float)rand()/RAND_MAX);
|
||||
stream->write(&*dx, true, 0, hx);
|
||||
function(args, grid, stream);
|
||||
stream->synchronize();
|
||||
@@ -121,15 +144,23 @@ void triton_copy_nd(drv::stream* stream, const std::vector<int32_t>& shape,
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<double> bench_copy_nd(drv::stream* stream, const std::vector<int32_t>& shape,
|
||||
std::vector<double> bench_copy_nd(drv::stream* stream, dtype_t dtype, const std::vector<int32_t>& shape,
|
||||
const std::vector<int32_t>& x_order, const std::vector<int32_t>& y_order) {
|
||||
std::vector<double> bench;
|
||||
bool test;
|
||||
triton_copy_nd(stream, shape, x_order, y_order, {}, BENCH, bench, test);
|
||||
switch(dtype){
|
||||
case HALF:
|
||||
triton_copy_nd<half_float::half>(stream, shape, x_order, y_order, {}, BENCH, bench, test);
|
||||
break;
|
||||
case FLOAT:
|
||||
triton_copy_nd<float>(stream, shape, x_order, y_order, {}, BENCH, bench, test);
|
||||
break;
|
||||
default: break;
|
||||
}
|
||||
return bench;
|
||||
}
|
||||
|
||||
bool test_copy_nd(drv::stream* stream, const std::vector<int32_t>& shape,
|
||||
bool test_copy_nd(drv::stream* stream, dtype_t dtype, const std::vector<int32_t>& shape,
|
||||
const std::vector<int32_t>& TS,
|
||||
const std::vector<int32_t>& x_order, const std::vector<int32_t>& y_order) {
|
||||
std::vector<double> bench;
|
||||
@@ -137,6 +168,14 @@ bool test_copy_nd(drv::stream* stream, const std::vector<int32_t>& shape,
|
||||
std::vector<std::vector<std::string>> TSS;
|
||||
for(int32_t d: TS)
|
||||
TSS.push_back({std::to_string(d)});
|
||||
triton_copy_nd(stream, shape, x_order, y_order, TSS, TEST, bench, test);
|
||||
switch(dtype){
|
||||
case HALF:
|
||||
triton_copy_nd<half_float::half>(stream, shape, x_order, y_order, TSS, TEST, bench, test);
|
||||
break;
|
||||
case FLOAT:
|
||||
triton_copy_nd<float>(stream, shape, x_order, y_order, TSS, TEST, bench, test);
|
||||
break;
|
||||
default: break;
|
||||
}
|
||||
return test;
|
||||
}
|
||||
|
@@ -26,9 +26,10 @@ void copy2d(TYPE * X __noalias __readonly __aligned(16),
|
||||
int pid1 = get_program_id(1);
|
||||
int rs0[TS0] = pid0 * TS0 + 0 ... TS0;
|
||||
int rs1[TS1] = pid1 * TS1 + 0 ... TS1;
|
||||
bool in_bounds[TS0, TS1] = rs0[:, newaxis] < S0 && rs1[newaxis, :] < S1;
|
||||
TYPE* px[TS0, TS1] = X + rs0[:, newaxis] * STRIDE_XS0 + rs1[newaxis, :] * STRIDE_XS1;
|
||||
TYPE* py[TS0, TS1] = Y + rs0[:, newaxis] * STRIDE_YS0 + rs1[newaxis, :] * STRIDE_YS1;
|
||||
*py = *px;
|
||||
*?(in_bounds)py = *?(in_bounds)px;
|
||||
}
|
||||
)";
|
||||
|
||||
|
@@ -46,8 +46,8 @@ inline std::vector<std::vector<std::string>> tile_nd(size_t rank) {
|
||||
if(rank == 1)
|
||||
return {{"128", "256", "512", "1024"}};
|
||||
if(rank == 2)
|
||||
return {{"64"},
|
||||
{"64"}};
|
||||
return {{"16", "32", "64"},
|
||||
{"16", "32", "64"}};
|
||||
if(rank == 3)
|
||||
return {{"4", "16", "32"},
|
||||
{"4", "16", "32"},
|
||||
|
@@ -50,7 +50,7 @@ int main() {
|
||||
bool result = true;
|
||||
for(const auto& c: configs){
|
||||
std::tie(shape, tile, ord_x, ord_y) = c;
|
||||
bool pass = test_copy_nd(stream, shape, tile, ord_x, ord_y);
|
||||
bool pass = test_copy_nd(stream, FLOAT, shape, tile, ord_x, ord_y);
|
||||
result = result && pass;
|
||||
std::cout << "// " << c << ", " << pass << std::endl;
|
||||
}
|
||||
|
Reference in New Issue
Block a user