[TESTS] Simplified testing of half-precision transposes

This commit is contained in:
Philippe Tillet
2020-04-09 01:10:11 -04:00
committed by Philippe Tillet
parent 4ae0e28b32
commit ec2cb2155e
5 changed files with 61 additions and 21 deletions

View File

@@ -12,16 +12,16 @@ int main() {
typedef std::tuple<std::vector<int>, std::vector<int>, std::vector<int>> config_t;
std::vector<config_t> configs = {
{{4096*4096}, {0}, {0}},
{{4096, 4096}, {0, 1}, {1, 0}},
{{4096, 4096}, {0, 1}, {0, 1}},
{{4096, 4096}, {0, 1}, {1, 0}},
{{4096, 4096}, {1, 0}, {0, 1}},
{{4096, 4096}, {0, 1}, {0, 1}},
{{4096, 4096}, {1, 0}, {1, 0}},
{{256, 256, 256}, {0, 1, 2}, {0, 1, 2}},
{{256, 256, 256}, {0, 1, 2}, {0, 2, 1}},
{{256, 256, 256}, {1, 0, 2}, {1, 2, 0}},
{{256, 256, 256}, {1, 2, 0}, {1, 0, 2}},
{{256, 256, 256}, {2, 0, 1}, {0, 1, 2}},
{{256, 256, 256}, {2, 1, 0}, {0, 2, 1}}
{{256, 256, 256}, {1, 2, 0}, {1, 0, 2}}
// {{256, 256, 256}, {2, 0, 1}, {0, 1, 2}},
// {{256, 256, 256}, {2, 1, 0}, {0, 2, 1}}
};
// does the work
std::vector<int32_t> shape;
@@ -29,7 +29,7 @@ int main() {
for(const auto& c: configs){
std::tie(shape, ord_x, ord_y) = c;
std::cout << "// " << c << std::flush;
for(auto perf: bench_copy_nd(stream, shape, ord_x, ord_y))
for(auto perf: bench_copy_nd(stream, HALF, shape, ord_x, ord_y))
std::cout << ", " << perf << std::flush;
std::cout << std::endl;
}

View File

@@ -2,6 +2,7 @@
#include "triton/driver/stream.h"
#include "triton/runtime/function.h"
#include "triton/tools/bench.hpp"
#include "triton/external/half.hpp"
#include "util.h"
int32_t off(const std::vector<int32_t>& idx, const std::vector<int32_t>& strides) {
@@ -16,6 +17,13 @@ enum run_mode_t {
TEST
};
enum dtype_t {
FLOAT,
HALF,
DOUBLE
};
template<class T>
void cc_copy_nd(const std::vector<T>& x, std::vector<T>& y,
const std::vector<int32_t>& shape,
@@ -46,13 +54,28 @@ void cc_copy_nd(const std::vector<T>& x, std::vector<T>& y,
y[off({i, j, k}, y_strides)] = x[off({i, j, k}, x_strides)];
}
template<class T>
struct to_string;
template<> struct to_string<half_float::half>{
static constexpr const char* value = "half";
};
template<> struct to_string<float>{
static constexpr const char* value = "float";
};
template<> struct to_string<double>{
static constexpr const char* value = "double";
};
template<typename T>
void triton_copy_nd(drv::stream* stream, const std::vector<int32_t>& shape,
const std::vector<int32_t>& x_order, const std::vector<int32_t>& y_order,
std::vector<std::vector<std::string>> TS,
run_mode_t mode, std::vector<double>& bench, bool &test) {
typedef float NumericT;
std::string ty = "float";
size_t dtsize = sizeof(NumericT);
std::string ty = to_string<T>::value;
size_t dtsize = sizeof(T);
drv::context* context = stream->context();
// rank
@@ -107,11 +130,11 @@ void triton_copy_nd(drv::stream* stream, const std::vector<int32_t>& shape,
// test triton
if(mode == TEST){
std::vector<NumericT> hx(size);
std::vector<NumericT> hy(size);
std::vector<NumericT> ry(size);
std::vector<T> hx(size);
std::vector<T> hy(size);
std::vector<T> ry(size);
for(size_t i = 0; i < hx.size(); i++)
hx[i] = static_cast<NumericT>((float)rand()/RAND_MAX);
hx[i] = static_cast<T>((float)rand()/RAND_MAX);
stream->write(&*dx, true, 0, hx);
function(args, grid, stream);
stream->synchronize();
@@ -121,15 +144,23 @@ void triton_copy_nd(drv::stream* stream, const std::vector<int32_t>& shape,
}
}
std::vector<double> bench_copy_nd(drv::stream* stream, const std::vector<int32_t>& shape,
std::vector<double> bench_copy_nd(drv::stream* stream, dtype_t dtype, const std::vector<int32_t>& shape,
const std::vector<int32_t>& x_order, const std::vector<int32_t>& y_order) {
std::vector<double> bench;
bool test;
triton_copy_nd(stream, shape, x_order, y_order, {}, BENCH, bench, test);
switch(dtype){
case HALF:
triton_copy_nd<half_float::half>(stream, shape, x_order, y_order, {}, BENCH, bench, test);
break;
case FLOAT:
triton_copy_nd<float>(stream, shape, x_order, y_order, {}, BENCH, bench, test);
break;
default: break;
}
return bench;
}
bool test_copy_nd(drv::stream* stream, const std::vector<int32_t>& shape,
bool test_copy_nd(drv::stream* stream, dtype_t dtype, const std::vector<int32_t>& shape,
const std::vector<int32_t>& TS,
const std::vector<int32_t>& x_order, const std::vector<int32_t>& y_order) {
std::vector<double> bench;
@@ -137,6 +168,14 @@ bool test_copy_nd(drv::stream* stream, const std::vector<int32_t>& shape,
std::vector<std::vector<std::string>> TSS;
for(int32_t d: TS)
TSS.push_back({std::to_string(d)});
triton_copy_nd(stream, shape, x_order, y_order, TSS, TEST, bench, test);
switch(dtype){
case HALF:
triton_copy_nd<half_float::half>(stream, shape, x_order, y_order, TSS, TEST, bench, test);
break;
case FLOAT:
triton_copy_nd<float>(stream, shape, x_order, y_order, TSS, TEST, bench, test);
break;
default: break;
}
return test;
}

View File

@@ -26,9 +26,10 @@ void copy2d(TYPE * X __noalias __readonly __aligned(16),
int pid1 = get_program_id(1);
int rs0[TS0] = pid0 * TS0 + 0 ... TS0;
int rs1[TS1] = pid1 * TS1 + 0 ... TS1;
bool in_bounds[TS0, TS1] = rs0[:, newaxis] < S0 && rs1[newaxis, :] < S1;
TYPE* px[TS0, TS1] = X + rs0[:, newaxis] * STRIDE_XS0 + rs1[newaxis, :] * STRIDE_XS1;
TYPE* py[TS0, TS1] = Y + rs0[:, newaxis] * STRIDE_YS0 + rs1[newaxis, :] * STRIDE_YS1;
*py = *px;
*?(in_bounds)py = *?(in_bounds)px;
}
)";

View File

@@ -46,8 +46,8 @@ inline std::vector<std::vector<std::string>> tile_nd(size_t rank) {
if(rank == 1)
return {{"128", "256", "512", "1024"}};
if(rank == 2)
return {{"64"},
{"64"}};
return {{"16", "32", "64"},
{"16", "32", "64"}};
if(rank == 3)
return {{"4", "16", "32"},
{"4", "16", "32"},

View File

@@ -50,7 +50,7 @@ int main() {
bool result = true;
for(const auto& c: configs){
std::tie(shape, tile, ord_x, ord_y) = c;
bool pass = test_copy_nd(stream, shape, tile, ord_x, ord_y);
bool pass = test_copy_nd(stream, FLOAT, shape, tile, ord_x, ord_y);
result = result && pass;
std::cout << "// " << c << ", " << pass << std::endl;
}