[jit/autotune] added support for multi-threaded auto-tuning
This commit is contained in:
@@ -14,13 +14,13 @@ int main() {
|
||||
|
||||
// initialize default compute device
|
||||
auto context = triton::driver::backend::contexts::get_default();
|
||||
auto op = triton::dnn::shift::BPROP;
|
||||
auto op = triton::dnn::shift::FPROP;
|
||||
|
||||
// initialization
|
||||
int32_t R = 3, S = 3;
|
||||
int32_t B = 16, F = 4096;
|
||||
int32_t B = 16, F = 512;
|
||||
int32_t H = 16, W = 16;
|
||||
int32_t C = 4096;
|
||||
int32_t C = 512;
|
||||
|
||||
// random shifts
|
||||
std::vector<int32_t> shift_h(C);
|
||||
@@ -66,7 +66,7 @@ int main() {
|
||||
stream->write(db, true, 0, hb);
|
||||
stream->write(dc, true, 0, hc);
|
||||
stream->synchronize();
|
||||
shift.enqueue(stream, {da, db, dc});
|
||||
shift.enqueue(stream, {da, db, dc}, true);
|
||||
// stream->read(dc, true, 0, hc);
|
||||
// shift.cpu_ref(rc.data(), ha.data(), hb.data());
|
||||
// for(size_t i = 0; i < hc.size(); i++)
|
||||
|
Reference in New Issue
Block a user