Database: Renamed GCN architectures and added some default profiles

This commit is contained in:
Philippe Tillet
2016-10-06 09:45:56 -04:00
parent 625dbf8de7
commit b21024cd37
13 changed files with 44 additions and 19811 deletions

View File

@@ -121,19 +121,17 @@ Currently supported functions are:
| BLAS1 | BLAS2 | BLAS3 |
| --------------| --------------| --------------|
| xAXPY | xGEMV | xGEMM |
| xCOPY | xGER | |
| xSCAL | | |
| xDOT | | |
| xASUM | | |
| sAXPY | sGEMV | sGEMM |
| sCOPY | sGER | |
| sSCAL | | |
| sDOT | | |
| sASUM | | |
### Contributing
Non-tuned GPUs are:
- Intel Skylake iGPU
- AMD GCN > 1.2
- Intel Xeon Phi
I'm planning on adding double precision support very soon.
You can contribute to further tuning isaac if you have one of the following architecture:
- NVidia: SM 2.x ; SM 3.5 ; SM 5.0 ; SM 6.0
- Intel: Skylake iGPU
I'm planning on adding double precision support soon.
I'm no longer very active on this project, although I'll fix bugs. I've been working on a more ambitious project lately.

View File

@@ -74,14 +74,16 @@ public:
SM_3_7,
SM_5_0,
SM_5_2,
SM_6_0,
SM_6_1,
//AMD
TERASCALE_2,
TERASCALE_3,
GCN_1_0,
GCN_1_1,
GCN_1_2,
GCN_1,
GCN_2,
GCN_3,
GCN_4,
UNKNOWN
};

View File

@@ -84,6 +84,7 @@ Device::Architecture Device::architecture() const
case 6:
switch(sm.second)
{
case 0: return Architecture::SM_6_0;
case 1: return Architecture::SM_6_1;
}
@@ -139,23 +140,25 @@ Device::Architecture Device::architecture() const
MAP_DEVICE("Scrapper",TERASCALE_3);
MAP_DEVICE("Devastator",TERASCALE_3);
//GCN 1.0
MAP_DEVICE("Cape",GCN_1_0);
MAP_DEVICE("Pitcairn",GCN_1_0);
MAP_DEVICE("Tahiti",GCN_1_0);
MAP_DEVICE("New Zealand",GCN_1_0);
MAP_DEVICE("Curacao",GCN_1_0);
MAP_DEVICE("Malta",GCN_1_0);
//GCN 1
MAP_DEVICE("Cape",GCN_1);
MAP_DEVICE("Pitcairn",GCN_1);
MAP_DEVICE("Tahiti",GCN_1);
MAP_DEVICE("New Zealand",GCN_1);
MAP_DEVICE("Curacao",GCN_1);
MAP_DEVICE("Malta",GCN_1);
//GCN 1.1
MAP_DEVICE("Bonaire",GCN_1_1);
MAP_DEVICE("Hawaii",GCN_1_1);
MAP_DEVICE("Vesuvius",GCN_1_1);
//GCN 2
MAP_DEVICE("Bonaire",GCN_2);
MAP_DEVICE("Hawaii",GCN_2);
MAP_DEVICE("Vesuvius",GCN_2);
//GCN 1.2
MAP_DEVICE("Tonga",GCN_1_2);
MAP_DEVICE("Fiji",GCN_1_2);
//GCN 3
MAP_DEVICE("Tonga",GCN_3);
MAP_DEVICE("Fiji",GCN_3);
//GCN 4
MAP_DEVICE("Polaris",GCN_4);
#undef MAP_DEVICE
}

View File

@@ -29,15 +29,12 @@
#include "database/intel/broadwell.hpp"
//NVidia
#include "database/nvidia/sm_2_0.hpp"
#include "database/nvidia/sm_3_0.hpp"
#include "database/nvidia/sm_3_5.hpp"
#include "database/nvidia/sm_5_2.hpp"
#include "database/nvidia/sm_6_1.hpp"
//AMD
#include "database/amd/gcn_1_1.hpp"
#include "database/amd/gcn_1_2.hpp"
#include "database/amd/gcn_3.hpp"
namespace isaac
{
@@ -54,18 +51,20 @@ const profiles::presets_type profiles::presets_ =
//INTEL
DATABASE_ENTRY(GPU, INTEL, BROADWELL, database::intel::broadwell),
//NVIDIA
DATABASE_ENTRY(GPU, NVIDIA, SM_2_0, database::nvidia::sm_2_0),
DATABASE_ENTRY(GPU, NVIDIA, SM_2_1, database::nvidia::sm_2_0),
DATABASE_ENTRY(GPU, NVIDIA, SM_2_0, database::nvidia::sm_3_0),
DATABASE_ENTRY(GPU, NVIDIA, SM_2_1, database::nvidia::sm_3_0),
DATABASE_ENTRY(GPU, NVIDIA, SM_3_0, database::nvidia::sm_3_0),
DATABASE_ENTRY(GPU, NVIDIA, SM_3_5, database::nvidia::sm_3_5),
DATABASE_ENTRY(GPU, NVIDIA, SM_3_7, database::nvidia::sm_3_5),
DATABASE_ENTRY(GPU, NVIDIA, SM_3_5, database::nvidia::sm_3_0),
DATABASE_ENTRY(GPU, NVIDIA, SM_3_7, database::nvidia::sm_3_0),
DATABASE_ENTRY(GPU, NVIDIA, SM_5_0, database::nvidia::sm_5_2),
DATABASE_ENTRY(GPU, NVIDIA, SM_5_2, database::nvidia::sm_5_2),
DATABASE_ENTRY(GPU, NVIDIA, SM_6_0, database::nvidia::sm_6_1),
DATABASE_ENTRY(GPU, NVIDIA, SM_6_1, database::nvidia::sm_6_1),
//AMD
DATABASE_ENTRY(GPU, AMD, GCN_1_0, database::amd::gcn_1_1),
DATABASE_ENTRY(GPU, AMD, GCN_1_1, database::amd::gcn_1_1),
DATABASE_ENTRY(GPU, AMD, GCN_1_2, database::amd::gcn_1_2)
DATABASE_ENTRY(GPU, AMD, GCN_1, database::amd::gcn_3),
DATABASE_ENTRY(GPU, AMD, GCN_2, database::amd::gcn_3),
DATABASE_ENTRY(GPU, AMD, GCN_3, database::amd::gcn_3),
DATABASE_ENTRY(GPU, AMD, GCN_4, database::amd::gcn_3)
};
#undef DATABASE_ENTRY

File diff suppressed because it is too large Load Diff

View File

@@ -9,7 +9,7 @@ namespace database
namespace amd
{
static const char gcn_1_2[] = {
static const char gcn_3[] = {
0x7b, 0x22, 0x67, 0x65, 0x6d, 0x6d, 0x5f, 0x74, 0x6e, 0x22,
0x3a, 0x20, 0x7b, 0x22, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x33,
0x32, 0x22, 0x3a, 0x20, 0x7b, 0x22, 0x70, 0x72, 0x65, 0x64,
@@ -20703,7 +20703,7 @@ static const char gcn_1_2[] = {
0x2c, 0x20, 0x31, 0x36, 0x2c, 0x20, 0x31, 0x36, 0x5d, 0x5d,
0x7d, 0x7d, 0x7d, 0x0};
static const std::size_t gcn_1_2_len = 206914;
static const std::size_t gcn_3_len = 206914;
}
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -73,7 +73,7 @@ def main():
libraries += ['gnustl_shared']
#Source files
src = 'src/lib/api/blas/clBLAS.cpp src/lib/api/blas/cublas.cpp src/lib/driver/device.cpp src/lib/driver/buffer.cpp src/lib/driver/event.cpp src/lib/driver/handle.cpp src/lib/driver/check.cpp src/lib/driver/ndrange.cpp src/lib/driver/platform.cpp src/lib/driver/program.cpp src/lib/driver/backend.cpp src/lib/driver/program_cache.cpp src/lib/driver/kernel.cpp src/lib/driver/context.cpp src/lib/driver/dispatch.cpp src/lib/driver/command_queue.cpp src/lib/random/rand.cpp src/lib/exception/api.cpp src/lib/exception/driver.cpp src/lib/jit/generation/reduce_1d.cpp src/lib/jit/generation/elementwise_2d.cpp src/lib/jit/generation/engine/stream.cpp src/lib/jit/generation/engine/keywords.cpp src/lib/jit/generation/base.cpp src/lib/jit/generation/reduce_2d.cpp src/lib/jit/generation/elementwise_1d.cpp src/lib/jit/generation/gemm.cpp src/lib/jit/syntax/engine/process.cpp src/lib/jit/syntax/engine/macro.cpp src/lib/jit/syntax/engine/binder.cpp src/lib/jit/syntax/engine/object.cpp src/lib/jit/syntax/expression/expression.cpp src/lib/jit/syntax/expression/preset.cpp src/lib/jit/syntax/expression/operations.cpp src/lib/runtime/profiles.cpp src/lib/runtime/predictors/random_forest.cpp src/lib/runtime/database.cpp src/lib/runtime/execute.cpp src/lib/value_scalar.cpp src/lib/array.cpp '.split() + [os.path.join('src', 'bind', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'kernels.cpp', 'exceptions.cpp']]
src = 'src/lib/random/rand.cpp src/lib/jit/syntax/expression/preset.cpp src/lib/jit/syntax/expression/expression.cpp src/lib/jit/syntax/expression/operations.cpp src/lib/jit/syntax/engine/macro.cpp src/lib/jit/syntax/engine/object.cpp src/lib/jit/syntax/engine/process.cpp src/lib/jit/syntax/engine/binder.cpp src/lib/jit/generation/reduce_2d.cpp src/lib/jit/generation/elementwise_2d.cpp src/lib/jit/generation/engine/stream.cpp src/lib/jit/generation/engine/keywords.cpp src/lib/jit/generation/elementwise_1d.cpp src/lib/jit/generation/reduce_1d.cpp src/lib/jit/generation/gemm.cpp src/lib/jit/generation/base.cpp src/lib/runtime/execute.cpp src/lib/runtime/database.cpp src/lib/runtime/profiles.cpp src/lib/runtime/predictors/random_forest.cpp src/lib/array.cpp src/lib/value_scalar.cpp src/lib/driver/backend.cpp src/lib/driver/device.cpp src/lib/driver/kernel.cpp src/lib/driver/buffer.cpp src/lib/driver/platform.cpp src/lib/driver/check.cpp src/lib/driver/program.cpp src/lib/driver/command_queue.cpp src/lib/driver/dispatch.cpp src/lib/driver/program_cache.cpp src/lib/driver/context.cpp src/lib/driver/event.cpp src/lib/driver/ndrange.cpp src/lib/driver/handle.cpp src/lib/api/blas/clBLAS.cpp src/lib/api/blas/cublas.cpp src/lib/exception/api.cpp src/lib/exception/driver.cpp '.split() + [os.path.join('src', 'bind', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'kernels.cpp', 'exceptions.cpp']]
boostsrc = 'external/boost/libs/'
for s in ['numpy','python','smart_ptr','system','thread']:
src = src + [x for x in recursive_glob('external/boost/libs/' + s + '/src/','.cpp') if 'win32' not in x and 'pthread' not in x]