Feature: Merged kernel-fusion branch

* Fuses multiple AXPY kernel
* Possibility to add thread-wise for loops in AXPY-like kernels
This commit is contained in:
Philippe Tillet
2015-09-30 15:31:41 -04:00
parent 149441b9e2
commit feeb1e9862
64 changed files with 10047 additions and 1119 deletions

View File

@@ -1,9 +1,7 @@
#include "isaac/array.h"
#include "isaac/tools/timer.hpp"
#include <vector>
namespace isc = isaac;
namespace sc = isaac;
#ifdef BENCH_CUBLAS
__global__ void dummy(){}
@@ -12,7 +10,7 @@ __global__ void dummy(){}
int main()
{
for(isc::driver::backend::data_type::const_iterator it = isc::driver::queues.data().begin() ; it != isc::driver::queues.data().end() ; ++it)
for(sc::driver::backend::data_type::const_iterator it = sc::driver::queues.data().begin() ; it != sc::driver::queues.data().end() ; ++it)
{
cl::CommandQueue queue = it->second[0];
cl::Context context = it->first;