diff --git a/include/isaac/driver/device.h b/include/isaac/driver/device.h index 26ecd458e..59e5c0556 100644 --- a/include/isaac/driver/device.h +++ b/include/isaac/driver/device.h @@ -62,10 +62,13 @@ public: BROADWELL, //NVidia - TESLA, - FERMI, - KEPLER, - MAXWELL, + SM_2_0, + SM_2_1, + SM_3_0, + SM_3_5, + SM_3_7, + SM_5_0, + SM_5_2, //AMD TERASCALE_2, diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 62a61c23f..b45be46b6 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -1,11 +1,3 @@ -if(NOT ANDROID) - #Presets - set(DATABASE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/profiles/presets/") - file(GLOB_RECURSE JSON_FILES "${DATABASE_PATH}/json/*.json") - CODE_TO_H(SOURCES ${JSON_FILES} VARNAME json_files EXTENSION "hpp" OUTPUT_DIR "${DATABASE_PATH}" - NAMESPACE "isaac presets" TARGET database EOF "1") -endif() - #Compilation if(ANDROID) add_library(isaac STATIC ${LIBISAAC_SRC}) @@ -16,7 +8,18 @@ else() set_target_properties(isaac PROPERTIES LINK_FLAGS "/DEF:${CMAKE_CURRENT_SOURCE_DIR}/wrap/clBLAS.def") endif() endif() -add_dependencies(isaac database) + +#Database +if(NOT ANDROID) + #Presets + foreach(VENDOR amd intel nvidia) + set(DATABASE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/profiles/database/${VENDOR}/") + file(GLOB_RECURSE JSON_FILES "${DATABASE_PATH}/json/*.json") + CODE_TO_H(SOURCES ${JSON_FILES} VARNAME database EXTENSION "hpp" OUTPUT_DIR "${DATABASE_PATH}" + NAMESPACE "isaac database ${VENDOR}" TARGET database_${VENDOR} EOF "1") + add_dependencies(isaac database_${VENDOR}) + endforeach() +endif() target_link_libraries(isaac "dl") diff --git a/lib/driver/device.cpp b/lib/driver/device.cpp index 48885c6c1..f19bb5189 100644 --- a/lib/driver/device.cpp +++ b/lib/driver/device.cpp @@ -92,10 +92,32 @@ Device::Architecture Device::architecture() const std::pair sm = nv_compute_capability(); switch(sm.first) { - case 5: return Architecture::MAXWELL; - case 3: return Architecture::KEPLER; - case 2: return Architecture::FERMI; - case 1: return Architecture::TESLA; + case 5: + switch(sm.second) + { + case 0: return Architecture::SM_5_0; + case 2: return Architecture::SM_5_2; + default: return Architecture::UNKNOWN; + } + + case 3: + switch(sm.second) + { + case 0: return Architecture::SM_3_0; + case 5: return Architecture::SM_3_5; + case 7: return Architecture::SM_3_7; + default: return Architecture::UNKNOWN; + } + + case 2: + switch(sm.second) + { + case 0: return Architecture::SM_2_0; + case 1: return Architecture::SM_2_1; + default: return Architecture::UNKNOWN; + } + + default: return Architecture::UNKNOWN; } } case Vendor::AMD: diff --git a/lib/driver/helpers/cuda/vector.hpp b/lib/driver/helpers/cuda/vector.hpp index 3a2559b1b..9bd40d6c1 100644 --- a/lib/driver/helpers/cuda/vector.hpp +++ b/lib/driver/helpers/cuda/vector.hpp @@ -1,23 +1,3 @@ -/* - * Copyright (c) 2015, PHILIPPE TILLET. All rights reserved. - * - * This file is part of ISAAC. - * - * ISAAC is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - * MA 02110-1301 USA - */ #pragma once #include diff --git a/lib/kernels/templates/matrix_product.cpp b/lib/kernels/templates/matrix_product.cpp index 4036579d6..58114020b 100644 --- a/lib/kernels/templates/matrix_product.cpp +++ b/lib/kernels/templates/matrix_product.cpp @@ -166,7 +166,10 @@ matrix_product_parameters::matrix_product_parameters(unsigned int simd_width stream << " __attribute__((reqd_work_group_size(" << p_.local_size_0 << "," << p_.local_size_1 << ",1)))" << std::endl; break; } - stream << KernelPrefix(backend) << " void " << matrix_product_name << "(" << _size_t << " M, " << _size_t << " N, " << _size_t << " K, " + stream << KernelPrefix(backend) << " void "; + if(backend==driver::CUDA) + stream << "__launch_bounds__(" << p_.local_size_0*p_.local_size_1 << ") "; + stream << " " << matrix_product_name << "(" << _size_t << " M, " << _size_t << " N, " << _size_t << " K, " << Global(backend) << " " << sdtype << "* C, " << _size_t << " ldc," << _size_t << " offc," << _size_t << " Cstride1, " << sdtype << " alpha," << Global(backend) << " " << sdtype << "* A, " << _size_t << " lda," << _size_t << " offa," << _size_t << " Astride1," diff --git a/lib/profiles/presets.cpp b/lib/profiles/database.cpp similarity index 57% rename from lib/profiles/presets.cpp rename to lib/profiles/database.cpp index 6b494725e..291ce1be9 100644 --- a/lib/profiles/presets.cpp +++ b/lib/profiles/database.cpp @@ -22,13 +22,16 @@ #include "isaac/profiles/profiles.h" //Intel -#include "presets/broadwell.hpp" +#include "database/intel/broadwell.hpp" + //NVidia -#include "presets/maxwell.hpp" -#include "presets/kepler.hpp" +#include "database/nvidia/sm_3_0.hpp" +#include "database/nvidia/sm_3_5.hpp" +#include "database/nvidia/sm_5_2.hpp" + //AMD -#include "presets/fiji.hpp" -#include "presets/hawaii.hpp" +#include "database/amd/gcn_1_1.hpp" +#include "database/amd/gcn_1_2.hpp" namespace isaac { @@ -40,13 +43,19 @@ namespace isaac const profiles::presets_type profiles::presets_ = { //INTEL - DATABASE_ENTRY(GPU, INTEL, BROADWELL, presets::broadwell), + DATABASE_ENTRY(GPU, INTEL, BROADWELL, database::intel::broadwell), //NVIDIA - DATABASE_ENTRY(GPU, NVIDIA, KEPLER, presets::kepler), - DATABASE_ENTRY(GPU, NVIDIA, MAXWELL, presets::maxwell), + DATABASE_ENTRY(GPU, NVIDIA, SM_2_0, database::nvidia::sm_3_0), + DATABASE_ENTRY(GPU, NVIDIA, SM_2_1, database::nvidia::sm_3_0), + DATABASE_ENTRY(GPU, NVIDIA, SM_3_0, database::nvidia::sm_3_0), + DATABASE_ENTRY(GPU, NVIDIA, SM_3_5, database::nvidia::sm_3_5), + DATABASE_ENTRY(GPU, NVIDIA, SM_3_7, database::nvidia::sm_3_5), + DATABASE_ENTRY(GPU, NVIDIA, SM_5_0, database::nvidia::sm_5_2), + DATABASE_ENTRY(GPU, NVIDIA, SM_5_2, database::nvidia::sm_5_2), //AMD - DATABASE_ENTRY(GPU, AMD, GCN_1_1, presets::hawaii), - DATABASE_ENTRY(GPU, AMD, GCN_1_2, presets::fiji) + DATABASE_ENTRY(GPU, AMD, GCN_1_0, database::amd::gcn_1_1), + DATABASE_ENTRY(GPU, AMD, GCN_1_1, database::amd::gcn_1_1), + DATABASE_ENTRY(GPU, AMD, GCN_1_2, database::amd::gcn_1_2) }; diff --git a/lib/profiles/presets/hawaii.hpp b/lib/profiles/database/amd/gcn_1_1.hpp similarity index 99% rename from lib/profiles/presets/hawaii.hpp rename to lib/profiles/database/amd/gcn_1_1.hpp index 32fa8c02f..52dbda15f 100644 --- a/lib/profiles/presets/hawaii.hpp +++ b/lib/profiles/database/amd/gcn_1_1.hpp @@ -4,10 +4,12 @@ namespace isaac { -namespace presets +namespace database +{ +namespace amd { -static const char hawaii[] = { +static const char gcn_1_1[] = { 0x7b, 0x22, 0x72, 0x65, 0x64, 0x75, 0x63, 0x65, 0x5f, 0x32, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x73, 0x22, 0x3a, 0x20, 0x7b, 0x22, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x33, 0x32, 0x22, 0x3a, @@ -7914,7 +7916,8 @@ static const char hawaii[] = { 0x2c, 0x20, 0x38, 0x2c, 0x20, 0x38, 0x5d, 0x5d, 0x7d, 0x7d, 0x7d, 0x0}; -static const std::size_t hawaii_len = 79042; +static const std::size_t gcn_1_1_len = 79042; } } +} diff --git a/lib/profiles/presets/fiji.hpp b/lib/profiles/database/amd/gcn_1_2.hpp similarity index 99% rename from lib/profiles/presets/fiji.hpp rename to lib/profiles/database/amd/gcn_1_2.hpp index 79fa7188f..259b31155 100644 --- a/lib/profiles/presets/fiji.hpp +++ b/lib/profiles/database/amd/gcn_1_2.hpp @@ -1,33 +1,15 @@ -/* - * Copyright (c) 2015, PHILIPPE TILLET. All rights reserved. - * - * This file is part of ISAAC. - * - * ISAAC is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - * MA 02110-1301 USA - */ #pragma once #include namespace isaac { -namespace presets +namespace database +{ +namespace amd { -static const char fiji[] = { +static const char gcn_1_2[] = { 0x7b, 0x22, 0x72, 0x65, 0x64, 0x75, 0x63, 0x65, 0x5f, 0x32, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x73, 0x22, 0x3a, 0x20, 0x7b, 0x22, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x33, 0x32, 0x22, 0x3a, @@ -5932,7 +5914,8 @@ static const char fiji[] = { 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x38, 0x2c, 0x20, 0x38, 0x5d, 0x5d, 0x7d, 0x7d, 0x7d, 0x0}; -static const std::size_t fiji_len = 59025; +static const std::size_t gcn_1_2_len = 59025; } } +} diff --git a/lib/profiles/presets/json/hawaii.json b/lib/profiles/database/amd/json/gcn_1_1.json similarity index 100% rename from lib/profiles/presets/json/hawaii.json rename to lib/profiles/database/amd/json/gcn_1_1.json diff --git a/lib/profiles/presets/json/fiji.json b/lib/profiles/database/amd/json/gcn_1_2.json similarity index 100% rename from lib/profiles/presets/json/fiji.json rename to lib/profiles/database/amd/json/gcn_1_2.json diff --git a/lib/profiles/presets/broadwell.hpp b/lib/profiles/database/intel/broadwell.hpp similarity index 99% rename from lib/profiles/presets/broadwell.hpp rename to lib/profiles/database/intel/broadwell.hpp index 7a6910e68..869d59e0f 100644 --- a/lib/profiles/presets/broadwell.hpp +++ b/lib/profiles/database/intel/broadwell.hpp @@ -1,30 +1,12 @@ -/* - * Copyright (c) 2015, PHILIPPE TILLET. All rights reserved. - * - * This file is part of ISAAC. - * - * ISAAC is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - * MA 02110-1301 USA - */ #pragma once #include namespace isaac { -namespace presets +namespace database +{ +namespace intel { static const char broadwell[] = { @@ -5187,3 +5169,4 @@ static const std::size_t broadwell_len = 51536; } } +} diff --git a/lib/profiles/presets/json/broadwell.json b/lib/profiles/database/intel/json/broadwell.json similarity index 100% rename from lib/profiles/presets/json/broadwell.json rename to lib/profiles/database/intel/json/broadwell.json diff --git a/lib/profiles/presets/json/kepler.json b/lib/profiles/database/nvidia/json/sm_3_0.json similarity index 100% rename from lib/profiles/presets/json/kepler.json rename to lib/profiles/database/nvidia/json/sm_3_0.json diff --git a/lib/profiles/presets/json/titan.json b/lib/profiles/database/nvidia/json/sm_3_5.json similarity index 100% rename from lib/profiles/presets/json/titan.json rename to lib/profiles/database/nvidia/json/sm_3_5.json diff --git a/lib/profiles/presets/json/maxwell.json b/lib/profiles/database/nvidia/json/sm_5_2.json similarity index 100% rename from lib/profiles/presets/json/maxwell.json rename to lib/profiles/database/nvidia/json/sm_5_2.json diff --git a/lib/profiles/presets/kepler.hpp b/lib/profiles/database/nvidia/sm_3_0.hpp similarity index 99% rename from lib/profiles/presets/kepler.hpp rename to lib/profiles/database/nvidia/sm_3_0.hpp index c64e869e7..65b8dc513 100644 --- a/lib/profiles/presets/kepler.hpp +++ b/lib/profiles/database/nvidia/sm_3_0.hpp @@ -4,10 +4,12 @@ namespace isaac { -namespace presets +namespace database +{ +namespace nvidia { -static const char kepler[] = { +static const char sm_3_0[] = { 0x7b, 0x22, 0x72, 0x65, 0x64, 0x75, 0x63, 0x65, 0x5f, 0x32, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x73, 0x22, 0x3a, 0x20, 0x7b, 0x22, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x33, 0x32, 0x22, 0x3a, @@ -3284,7 +3286,8 @@ static const char kepler[] = { 0x31, 0x36, 0x2c, 0x20, 0x38, 0x5d, 0x5d, 0x7d, 0x7d, 0x7d, 0x0}; -static const std::size_t kepler_len = 32741; +static const std::size_t sm_3_0_len = 32741; } } +} diff --git a/lib/profiles/presets/titan.hpp b/lib/profiles/database/nvidia/sm_3_5.hpp similarity index 99% rename from lib/profiles/presets/titan.hpp rename to lib/profiles/database/nvidia/sm_3_5.hpp index 4092fc2da..a567fbcc1 100644 --- a/lib/profiles/presets/titan.hpp +++ b/lib/profiles/database/nvidia/sm_3_5.hpp @@ -4,10 +4,12 @@ namespace isaac { -namespace presets +namespace database +{ +namespace nvidia { -static const char titan[] = { +static const char sm_3_5[] = { 0x7b, 0x22, 0x72, 0x65, 0x64, 0x75, 0x63, 0x65, 0x5f, 0x32, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x73, 0x22, 0x3a, 0x20, 0x7b, 0x22, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x33, 0x32, 0x22, 0x3a, @@ -7562,7 +7564,8 @@ static const char titan[] = { 0x38, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x31, 0x36, 0x2c, 0x20, 0x38, 0x5d, 0x5d, 0x7d, 0x7d, 0x7d, 0x0}; -static const std::size_t titan_len = 75530; +static const std::size_t sm_3_5_len = 75530; } } +} diff --git a/lib/profiles/presets/maxwell.hpp b/lib/profiles/database/nvidia/sm_5_2.hpp similarity index 99% rename from lib/profiles/presets/maxwell.hpp rename to lib/profiles/database/nvidia/sm_5_2.hpp index 5c16ab460..c42909a35 100644 --- a/lib/profiles/presets/maxwell.hpp +++ b/lib/profiles/database/nvidia/sm_5_2.hpp @@ -1,33 +1,15 @@ -/* - * Copyright (c) 2015, PHILIPPE TILLET. All rights reserved. - * - * This file is part of ISAAC. - * - * ISAAC is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - * MA 02110-1301 USA - */ #pragma once #include namespace isaac { -namespace presets +namespace database +{ +namespace nvidia { -static const char maxwell[] = { +static const char sm_5_2[] = { 0x7b, 0x22, 0x72, 0x65, 0x64, 0x75, 0x63, 0x65, 0x5f, 0x32, 0x64, 0x5f, 0x63, 0x6f, 0x6c, 0x73, 0x22, 0x3a, 0x20, 0x7b, 0x22, 0x66, 0x6c, 0x6f, 0x61, 0x74, 0x33, 0x32, 0x22, 0x3a, @@ -2833,7 +2815,8 @@ static const char maxwell[] = { 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x30, 0x2c, 0x20, 0x38, 0x2c, 0x20, 0x38, 0x5d, 0x5d, 0x7d, 0x7d, 0x7d, 0x0}; -static const std::size_t maxwell_len = 28038; +static const std::size_t sm_5_2_len = 28038; } } +} diff --git a/python/setup.py b/python/setup.py index 369c1e7b9..1da750921 100644 --- a/python/setup.py +++ b/python/setup.py @@ -73,7 +73,7 @@ def main(): libraries += ['gnustl_shared'] #Source files - src = 'src/lib/symbolic/expression.cpp src/lib/symbolic/execute.cpp src/lib/symbolic/preset.cpp src/lib/symbolic/io.cpp src/lib/kernels/binder.cpp src/lib/kernels/templates/base.cpp src/lib/kernels/templates/elementwise_2d.cpp src/lib/kernels/templates/matrix_product.cpp src/lib/kernels/templates/reduce_1d.cpp src/lib/kernels/templates/elementwise_1d.cpp src/lib/kernels/templates/reduce_2d.cpp src/lib/kernels/stream.cpp src/lib/kernels/parse.cpp src/lib/kernels/mapped_object.cpp src/lib/kernels/keywords.cpp src/lib/exception/operation_not_supported.cpp src/lib/exception/unknown_datatype.cpp src/lib/wrap/clBLAS.cpp src/lib/wrap/cublas.cpp src/lib/profiles/predictors/random_forest.cpp src/lib/profiles/presets.cpp src/lib/profiles/profiles.cpp src/lib/value_scalar.cpp src/lib/driver/event.cpp src/lib/driver/backend.cpp src/lib/driver/context.cpp src/lib/driver/dispatch.cpp src/lib/driver/program.cpp src/lib/driver/device.cpp src/lib/driver/buffer.cpp src/lib/driver/command_queue.cpp src/lib/driver/ndrange.cpp src/lib/driver/handle.cpp src/lib/driver/platform.cpp src/lib/driver/check.cpp src/lib/driver/kernel.cpp src/lib/driver/program_cache.cpp src/lib/array.cpp '.split() + [os.path.join('src', 'bind', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'kernels.cpp', 'exceptions.cpp']] + src = 'src/lib/wrap/cublas.cpp src/lib/wrap/clBLAS.cpp src/lib/exception/operation_not_supported.cpp src/lib/exception/unknown_datatype.cpp src/lib/value_scalar.cpp src/lib/array.cpp src/lib/symbolic/io.cpp src/lib/symbolic/expression.cpp src/lib/symbolic/preset.cpp src/lib/symbolic/execute.cpp src/lib/kernels/binder.cpp src/lib/kernels/keywords.cpp src/lib/kernels/parse.cpp src/lib/kernels/templates/elementwise_1d.cpp src/lib/kernels/templates/matrix_product.cpp src/lib/kernels/templates/reduce_2d.cpp src/lib/kernels/templates/reduce_1d.cpp src/lib/kernels/templates/base.cpp src/lib/kernels/templates/elementwise_2d.cpp src/lib/kernels/mapped_object.cpp src/lib/kernels/stream.cpp src/lib/driver/dispatch.cpp src/lib/driver/kernel.cpp src/lib/driver/backend.cpp src/lib/driver/platform.cpp src/lib/driver/buffer.cpp src/lib/driver/event.cpp src/lib/driver/ndrange.cpp src/lib/driver/device.cpp src/lib/driver/program_cache.cpp src/lib/driver/check.cpp src/lib/driver/command_queue.cpp src/lib/driver/handle.cpp src/lib/driver/context.cpp src/lib/driver/program.cpp src/lib/profiles/predictors/random_forest.cpp src/lib/profiles/profiles.cpp src/lib/profiles/database.cpp '.split() + [os.path.join('src', 'bind', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'kernels.cpp', 'exceptions.cpp']] boostsrc = 'external/boost/libs/' for s in ['numpy','python','smart_ptr','system','thread']: src = src + [x for x in recursive_glob('external/boost/libs/' + s + '/src/','.cpp') if 'win32' not in x and 'pthread' not in x]