[examples] normalize benchmark by max_clock / current_clock
This commit is contained in:
@@ -87,19 +87,28 @@ T min(std::vector<T> x)
|
||||
|
||||
|
||||
template<class OP, class SYNC>
|
||||
double bench(OP const & op, SYNC const & sync, unsigned repeat = 20)
|
||||
double bench(OP const & op, SYNC const & sync, triton::driver::device const & device)
|
||||
{
|
||||
timer tmr;
|
||||
std::vector<size_t> times;
|
||||
double total_time = 0;
|
||||
op();
|
||||
sync();
|
||||
tmr.start();
|
||||
for(unsigned i = 0; i < repeat; i++)
|
||||
while(total_time*1e-9 < 1e-3){
|
||||
float norm = 1;
|
||||
// normalize clock if possible to get roughly constant result
|
||||
if(auto cu_device = dynamic_cast<const triton::driver::cu_device*>(&device))
|
||||
norm = (float)cu_device->current_sm_clock()/cu_device->max_sm_clock();
|
||||
tmr.start();
|
||||
op();
|
||||
sync();
|
||||
double time = tmr.get().count();
|
||||
return time / repeat;
|
||||
sync();
|
||||
times.push_back(norm*tmr.get().count());
|
||||
total_time+=times.back();
|
||||
}
|
||||
return min(times);
|
||||
}
|
||||
|
||||
|
||||
int main() {
|
||||
// initialize default compute device
|
||||
auto context = triton::driver::backend::contexts::get_default();
|
||||
@@ -159,7 +168,7 @@ int main() {
|
||||
stream->synchronize();
|
||||
// benchmark
|
||||
double ts = bench([&](){stream->enqueue(kernel, grid, {nthreads, 1, 1});},
|
||||
[&](){ stream->synchronize(); });
|
||||
[&](){ stream->synchronize(); }, *context->device());
|
||||
ts = ts * 1e-9;
|
||||
double tflops = 2.*M*N*K / ts * 1e-12;
|
||||
return tflops;
|
||||
@@ -175,7 +184,7 @@ int main() {
|
||||
4
|
||||
};
|
||||
|
||||
// jit.autotune(src, benchmark);
|
||||
jit.autotune(src, benchmark);
|
||||
jit.add_module(src, params);
|
||||
triton::driver::kernel* kernel = jit.get_function("matmul");
|
||||
triton::jit::launch_information info = jit.get_launch_info("matmul");
|
||||
|
@@ -115,6 +115,7 @@ public:
|
||||
size_t max_shared_memory() const;
|
||||
size_t max_sm_clock() const;
|
||||
size_t max_mem_clock() const;
|
||||
void set_max_clock();
|
||||
// Target
|
||||
std::unique_ptr<codegen::target> make_target() const;
|
||||
|
||||
|
@@ -165,6 +165,7 @@ public:
|
||||
static nvmlReturn_t nvmlDeviceGetHandleByPciBusId_v2( const char* pciBusId, nvmlDevice_t* device);
|
||||
static nvmlReturn_t nvmlDeviceGetClockInfo(nvmlDevice_t device, nvmlClockType_t type, unsigned int *clock);
|
||||
static nvmlReturn_t nvmlDeviceGetMaxClockInfo(nvmlDevice_t device, nvmlClockType_t type, unsigned int *clock);
|
||||
static nvmlReturn_t nvmlDeviceSetApplicationsClocks(nvmlDevice_t device, unsigned int mem_clock, unsigned int sm_clock);
|
||||
|
||||
static cublasHandle_t cublasHandle(driver::cu_context const & ctx);
|
||||
static cublasStatus_t cublasCreate_v2(cublasHandle_t* h);
|
||||
@@ -281,6 +282,7 @@ private:
|
||||
static void* nvmlDeviceGetHandleByPciBusId_v2_;
|
||||
static void* nvmlDeviceGetClockInfo_;
|
||||
static void* nvmlDeviceGetMaxClockInfo_;
|
||||
static void* nvmlDeviceSetApplicationsClocks_;
|
||||
// cuBLAS
|
||||
static void* cublasCreate_v2_;
|
||||
static void* cublasGetStream_v2_;
|
||||
|
@@ -61,7 +61,6 @@ public:
|
||||
barriers.run(module);
|
||||
}
|
||||
vectorize.run(module);
|
||||
triton::ir::print(module, std::cout);
|
||||
}
|
||||
|
||||
codegen::tune tune;
|
||||
|
@@ -234,7 +234,7 @@ ir::type* tile::type_impl(ir::module *mod, ir::type *type, storage_spec_vec_cons
|
||||
// Pointer
|
||||
ir::type* pointer::type_impl(ir::module*, ir::type *type, storage_spec_vec_const_ref_t storage) const{
|
||||
bool is_ptr_to_const = std::find(storage.begin(), storage.end(), CONSTANT_SPACE_T) != storage.end();
|
||||
return ir::pointer_type::get(type, is_ptr_to_const?4:0);
|
||||
return ir::pointer_type::get(type, is_ptr_to_const?4:1);
|
||||
}
|
||||
|
||||
// Function
|
||||
|
@@ -145,7 +145,7 @@ void tune::run(ir::module &mod) {
|
||||
// Layout parameters
|
||||
while(!nodes_.empty()){
|
||||
ir::type *ty = mod.get_builder().get_int32_ty();
|
||||
ir::metaparameter *nts = ir::metaparameter::create(ctx, ty, 2, 2);
|
||||
ir::metaparameter *nts = ir::metaparameter::create(ctx, ty, 2, 4);
|
||||
ir::metaparameter *mts = ir::metaparameter::create(ctx, ty, 4, 32);
|
||||
connected_components(*nodes_.begin(), {nts, mts}, nodes_, dependencies_);
|
||||
}
|
||||
|
@@ -1,22 +1,22 @@
|
||||
/* Copyright 2015-2017 Philippe Tillet
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files
|
||||
* (the "Software"), to deal in the Software without restriction,
|
||||
* including without limitation the rights to use, copy, modify, merge,
|
||||
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so,
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files
|
||||
* (the "Software"), to deal in the Software without restriction,
|
||||
* including without limitation the rights to use, copy, modify, merge,
|
||||
* publish, distribute, sublicense, and/or sell copies of the Software,
|
||||
* and to permit persons to whom the Software is furnished to do so,
|
||||
* subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
@@ -217,6 +217,11 @@ size_t cu_device::max_mem_clock() const{
|
||||
return result;
|
||||
}
|
||||
|
||||
// max memory clock
|
||||
void cu_device::set_max_clock() {
|
||||
dispatch::nvmlDeviceSetApplicationsClocks(nvml_device(), max_mem_clock(), max_sm_clock());
|
||||
}
|
||||
|
||||
// print infos
|
||||
std::string cu_device::infos() const{
|
||||
std::ostringstream oss;
|
||||
|
@@ -199,6 +199,7 @@ CUDA_DEFINE1(CUresult, cuCtxPopCurrent_v2, CUcontext*)
|
||||
NVML_DEFINE2(nvmlReturn_t, nvmlDeviceGetHandleByPciBusId_v2, const char *, nvmlDevice_t*)
|
||||
NVML_DEFINE3(nvmlReturn_t, nvmlDeviceGetClockInfo, nvmlDevice_t, nvmlClockType_t, unsigned int*)
|
||||
NVML_DEFINE3(nvmlReturn_t, nvmlDeviceGetMaxClockInfo, nvmlDevice_t, nvmlClockType_t, unsigned int*)
|
||||
NVML_DEFINE3(nvmlReturn_t, nvmlDeviceSetApplicationsClocks, nvmlDevice_t, unsigned int, unsigned int)
|
||||
|
||||
cublasHandle_t dispatch::cublasHandle(const cu_context &ctx){
|
||||
static std::map<CUcontext, cublasHandle_t> handles;
|
||||
@@ -391,6 +392,7 @@ void* dispatch::nvmlInit_v2_;
|
||||
void* dispatch::nvmlDeviceGetHandleByPciBusId_v2_;
|
||||
void* dispatch::nvmlDeviceGetClockInfo_;
|
||||
void* dispatch::nvmlDeviceGetMaxClockInfo_;
|
||||
void* dispatch::nvmlDeviceSetApplicationsClocks_;
|
||||
|
||||
void* dispatch::cublasCreate_v2_;
|
||||
void* dispatch::cublasGetStream_v2_;
|
||||
|
@@ -106,6 +106,7 @@ void module::compile_llvm_module(llvm::Module* module, const std::string& triple
|
||||
const std::string& features,
|
||||
file_type_t ft) {
|
||||
init_llvm();
|
||||
|
||||
// create machine
|
||||
module->setTargetTriple(triple);
|
||||
std::string error;
|
||||
@@ -179,10 +180,6 @@ host_module::host_module(driver::context * context, llvm::Module* src): module(c
|
||||
|
||||
|
||||
// create execution engine
|
||||
// llvm::legacy::PassManager pass;
|
||||
// pass.add(llvm::createPrintModulePass(llvm::outs()));
|
||||
// pass.add(llvm::createVerifierPass());
|
||||
// pass.run(*src);
|
||||
auto cloned = llvm::CloneModule(*src);
|
||||
for(llvm::Function& fn: cloned->functions())
|
||||
hst_->functions[fn.getName()] = &fn;
|
||||
|
Reference in New Issue
Block a user