Database: Renamed GCN architectures and added some default profiles
This commit is contained in:
20
README.md
20
README.md
@@ -121,19 +121,17 @@ Currently supported functions are:
|
||||
|
||||
| BLAS1 | BLAS2 | BLAS3 |
|
||||
| --------------| --------------| --------------|
|
||||
| xAXPY | xGEMV | xGEMM |
|
||||
| xCOPY | xGER | |
|
||||
| xSCAL | | |
|
||||
| xDOT | | |
|
||||
| xASUM | | |
|
||||
| sAXPY | sGEMV | sGEMM |
|
||||
| sCOPY | sGER | |
|
||||
| sSCAL | | |
|
||||
| sDOT | | |
|
||||
| sASUM | | |
|
||||
|
||||
### Contributing
|
||||
|
||||
Non-tuned GPUs are:
|
||||
- Intel Skylake iGPU
|
||||
- AMD GCN > 1.2
|
||||
- Intel Xeon Phi
|
||||
|
||||
I'm planning on adding double precision support very soon.
|
||||
You can contribute to further tuning isaac if you have one of the following architecture:
|
||||
- NVidia: SM 2.x ; SM 3.5 ; SM 5.0 ; SM 6.0
|
||||
- Intel: Skylake iGPU
|
||||
I'm planning on adding double precision support soon.
|
||||
|
||||
I'm no longer very active on this project, although I'll fix bugs. I've been working on a more ambitious project lately.
|
||||
|
@@ -74,14 +74,16 @@ public:
|
||||
SM_3_7,
|
||||
SM_5_0,
|
||||
SM_5_2,
|
||||
SM_6_0,
|
||||
SM_6_1,
|
||||
|
||||
//AMD
|
||||
TERASCALE_2,
|
||||
TERASCALE_3,
|
||||
GCN_1_0,
|
||||
GCN_1_1,
|
||||
GCN_1_2,
|
||||
GCN_1,
|
||||
GCN_2,
|
||||
GCN_3,
|
||||
GCN_4,
|
||||
|
||||
UNKNOWN
|
||||
};
|
||||
|
@@ -84,6 +84,7 @@ Device::Architecture Device::architecture() const
|
||||
case 6:
|
||||
switch(sm.second)
|
||||
{
|
||||
case 0: return Architecture::SM_6_0;
|
||||
case 1: return Architecture::SM_6_1;
|
||||
}
|
||||
|
||||
@@ -139,23 +140,25 @@ Device::Architecture Device::architecture() const
|
||||
MAP_DEVICE("Scrapper",TERASCALE_3);
|
||||
MAP_DEVICE("Devastator",TERASCALE_3);
|
||||
|
||||
//GCN 1.0
|
||||
MAP_DEVICE("Cape",GCN_1_0);
|
||||
MAP_DEVICE("Pitcairn",GCN_1_0);
|
||||
MAP_DEVICE("Tahiti",GCN_1_0);
|
||||
MAP_DEVICE("New Zealand",GCN_1_0);
|
||||
MAP_DEVICE("Curacao",GCN_1_0);
|
||||
MAP_DEVICE("Malta",GCN_1_0);
|
||||
//GCN 1
|
||||
MAP_DEVICE("Cape",GCN_1);
|
||||
MAP_DEVICE("Pitcairn",GCN_1);
|
||||
MAP_DEVICE("Tahiti",GCN_1);
|
||||
MAP_DEVICE("New Zealand",GCN_1);
|
||||
MAP_DEVICE("Curacao",GCN_1);
|
||||
MAP_DEVICE("Malta",GCN_1);
|
||||
|
||||
//GCN 1.1
|
||||
MAP_DEVICE("Bonaire",GCN_1_1);
|
||||
MAP_DEVICE("Hawaii",GCN_1_1);
|
||||
MAP_DEVICE("Vesuvius",GCN_1_1);
|
||||
//GCN 2
|
||||
MAP_DEVICE("Bonaire",GCN_2);
|
||||
MAP_DEVICE("Hawaii",GCN_2);
|
||||
MAP_DEVICE("Vesuvius",GCN_2);
|
||||
|
||||
//GCN 1.2
|
||||
MAP_DEVICE("Tonga",GCN_1_2);
|
||||
MAP_DEVICE("Fiji",GCN_1_2);
|
||||
//GCN 3
|
||||
MAP_DEVICE("Tonga",GCN_3);
|
||||
MAP_DEVICE("Fiji",GCN_3);
|
||||
|
||||
//GCN 4
|
||||
MAP_DEVICE("Polaris",GCN_4);
|
||||
#undef MAP_DEVICE
|
||||
|
||||
}
|
||||
|
@@ -29,15 +29,12 @@
|
||||
#include "database/intel/broadwell.hpp"
|
||||
|
||||
//NVidia
|
||||
#include "database/nvidia/sm_2_0.hpp"
|
||||
#include "database/nvidia/sm_3_0.hpp"
|
||||
#include "database/nvidia/sm_3_5.hpp"
|
||||
#include "database/nvidia/sm_5_2.hpp"
|
||||
#include "database/nvidia/sm_6_1.hpp"
|
||||
|
||||
//AMD
|
||||
#include "database/amd/gcn_1_1.hpp"
|
||||
#include "database/amd/gcn_1_2.hpp"
|
||||
#include "database/amd/gcn_3.hpp"
|
||||
|
||||
namespace isaac
|
||||
{
|
||||
@@ -54,18 +51,20 @@ const profiles::presets_type profiles::presets_ =
|
||||
//INTEL
|
||||
DATABASE_ENTRY(GPU, INTEL, BROADWELL, database::intel::broadwell),
|
||||
//NVIDIA
|
||||
DATABASE_ENTRY(GPU, NVIDIA, SM_2_0, database::nvidia::sm_2_0),
|
||||
DATABASE_ENTRY(GPU, NVIDIA, SM_2_1, database::nvidia::sm_2_0),
|
||||
DATABASE_ENTRY(GPU, NVIDIA, SM_2_0, database::nvidia::sm_3_0),
|
||||
DATABASE_ENTRY(GPU, NVIDIA, SM_2_1, database::nvidia::sm_3_0),
|
||||
DATABASE_ENTRY(GPU, NVIDIA, SM_3_0, database::nvidia::sm_3_0),
|
||||
DATABASE_ENTRY(GPU, NVIDIA, SM_3_5, database::nvidia::sm_3_5),
|
||||
DATABASE_ENTRY(GPU, NVIDIA, SM_3_7, database::nvidia::sm_3_5),
|
||||
DATABASE_ENTRY(GPU, NVIDIA, SM_3_5, database::nvidia::sm_3_0),
|
||||
DATABASE_ENTRY(GPU, NVIDIA, SM_3_7, database::nvidia::sm_3_0),
|
||||
DATABASE_ENTRY(GPU, NVIDIA, SM_5_0, database::nvidia::sm_5_2),
|
||||
DATABASE_ENTRY(GPU, NVIDIA, SM_5_2, database::nvidia::sm_5_2),
|
||||
DATABASE_ENTRY(GPU, NVIDIA, SM_6_0, database::nvidia::sm_6_1),
|
||||
DATABASE_ENTRY(GPU, NVIDIA, SM_6_1, database::nvidia::sm_6_1),
|
||||
//AMD
|
||||
DATABASE_ENTRY(GPU, AMD, GCN_1_0, database::amd::gcn_1_1),
|
||||
DATABASE_ENTRY(GPU, AMD, GCN_1_1, database::amd::gcn_1_1),
|
||||
DATABASE_ENTRY(GPU, AMD, GCN_1_2, database::amd::gcn_1_2)
|
||||
DATABASE_ENTRY(GPU, AMD, GCN_1, database::amd::gcn_3),
|
||||
DATABASE_ENTRY(GPU, AMD, GCN_2, database::amd::gcn_3),
|
||||
DATABASE_ENTRY(GPU, AMD, GCN_3, database::amd::gcn_3),
|
||||
DATABASE_ENTRY(GPU, AMD, GCN_4, database::amd::gcn_3)
|
||||
};
|
||||
|
||||
#undef DATABASE_ENTRY
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -9,7 +9,7 @@ namespace database
|
||||
namespace amd
|
||||
{
|
||||
|
||||
static const char gcn_1_2[] = {
|
||||
static const char gcn_3[] = {
|
||||
0x7b, 0x22, 0x67, 0x65, 0x6d, 0x6d, 0x5f, 0x74, 0x6e, 0x22,
|
||||
0x3a, 0x20, 0x7b, 0x22, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x33,
|
||||
0x32, 0x22, 0x3a, 0x20, 0x7b, 0x22, 0x70, 0x72, 0x65, 0x64,
|
||||
@@ -20703,7 +20703,7 @@ static const char gcn_1_2[] = {
|
||||
0x2c, 0x20, 0x31, 0x36, 0x2c, 0x20, 0x31, 0x36, 0x5d, 0x5d,
|
||||
0x7d, 0x7d, 0x7d, 0x0};
|
||||
|
||||
static const std::size_t gcn_1_2_len = 206914;
|
||||
static const std::size_t gcn_3_len = 206914;
|
||||
|
||||
}
|
||||
}
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -73,7 +73,7 @@ def main():
|
||||
libraries += ['gnustl_shared']
|
||||
|
||||
#Source files
|
||||
src = 'src/lib/api/blas/clBLAS.cpp src/lib/api/blas/cublas.cpp src/lib/driver/device.cpp src/lib/driver/buffer.cpp src/lib/driver/event.cpp src/lib/driver/handle.cpp src/lib/driver/check.cpp src/lib/driver/ndrange.cpp src/lib/driver/platform.cpp src/lib/driver/program.cpp src/lib/driver/backend.cpp src/lib/driver/program_cache.cpp src/lib/driver/kernel.cpp src/lib/driver/context.cpp src/lib/driver/dispatch.cpp src/lib/driver/command_queue.cpp src/lib/random/rand.cpp src/lib/exception/api.cpp src/lib/exception/driver.cpp src/lib/jit/generation/reduce_1d.cpp src/lib/jit/generation/elementwise_2d.cpp src/lib/jit/generation/engine/stream.cpp src/lib/jit/generation/engine/keywords.cpp src/lib/jit/generation/base.cpp src/lib/jit/generation/reduce_2d.cpp src/lib/jit/generation/elementwise_1d.cpp src/lib/jit/generation/gemm.cpp src/lib/jit/syntax/engine/process.cpp src/lib/jit/syntax/engine/macro.cpp src/lib/jit/syntax/engine/binder.cpp src/lib/jit/syntax/engine/object.cpp src/lib/jit/syntax/expression/expression.cpp src/lib/jit/syntax/expression/preset.cpp src/lib/jit/syntax/expression/operations.cpp src/lib/runtime/profiles.cpp src/lib/runtime/predictors/random_forest.cpp src/lib/runtime/database.cpp src/lib/runtime/execute.cpp src/lib/value_scalar.cpp src/lib/array.cpp '.split() + [os.path.join('src', 'bind', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'kernels.cpp', 'exceptions.cpp']]
|
||||
src = 'src/lib/random/rand.cpp src/lib/jit/syntax/expression/preset.cpp src/lib/jit/syntax/expression/expression.cpp src/lib/jit/syntax/expression/operations.cpp src/lib/jit/syntax/engine/macro.cpp src/lib/jit/syntax/engine/object.cpp src/lib/jit/syntax/engine/process.cpp src/lib/jit/syntax/engine/binder.cpp src/lib/jit/generation/reduce_2d.cpp src/lib/jit/generation/elementwise_2d.cpp src/lib/jit/generation/engine/stream.cpp src/lib/jit/generation/engine/keywords.cpp src/lib/jit/generation/elementwise_1d.cpp src/lib/jit/generation/reduce_1d.cpp src/lib/jit/generation/gemm.cpp src/lib/jit/generation/base.cpp src/lib/runtime/execute.cpp src/lib/runtime/database.cpp src/lib/runtime/profiles.cpp src/lib/runtime/predictors/random_forest.cpp src/lib/array.cpp src/lib/value_scalar.cpp src/lib/driver/backend.cpp src/lib/driver/device.cpp src/lib/driver/kernel.cpp src/lib/driver/buffer.cpp src/lib/driver/platform.cpp src/lib/driver/check.cpp src/lib/driver/program.cpp src/lib/driver/command_queue.cpp src/lib/driver/dispatch.cpp src/lib/driver/program_cache.cpp src/lib/driver/context.cpp src/lib/driver/event.cpp src/lib/driver/ndrange.cpp src/lib/driver/handle.cpp src/lib/api/blas/clBLAS.cpp src/lib/api/blas/cublas.cpp src/lib/exception/api.cpp src/lib/exception/driver.cpp '.split() + [os.path.join('src', 'bind', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'kernels.cpp', 'exceptions.cpp']]
|
||||
boostsrc = 'external/boost/libs/'
|
||||
for s in ['numpy','python','smart_ptr','system','thread']:
|
||||
src = src + [x for x in recursive_glob('external/boost/libs/' + s + '/src/','.cpp') if 'win32' not in x and 'pthread' not in x]
|
||||
|
Reference in New Issue
Block a user