Fixes for building on Windows (#382)
* make C++ code compatible with Windows + MSVC * added dlfcn-win32 for cross-platform dlopen * fixed building and pip install on Windows * fixed shared library file name under Windows
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -1,6 +1,9 @@
|
||||
build/
|
||||
|
||||
__pycache__
|
||||
.pytest_cache
|
||||
|
||||
python/build/
|
||||
python/triton.egg-info/
|
||||
python/triton/_C/libtriton.pyd
|
||||
python/triton/_C/libtriton.so
|
||||
|
3
.gitmodules
vendored
Normal file
3
.gitmodules
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
[submodule "deps/dlfcn-win32"]
|
||||
path = deps/dlfcn-win32
|
||||
url = https://github.com/dlfcn-win32/dlfcn-win32.git
|
@@ -1,6 +1,8 @@
|
||||
cmake_minimum_required(VERSION 3.6)
|
||||
include(ExternalProject)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
if(NOT TRITON_LLVM_BUILD_DIR)
|
||||
set(TRITON_LLVM_BUILD_DIR ${CMAKE_BINARY_DIR})
|
||||
endif()
|
||||
@@ -8,7 +10,9 @@ endif()
|
||||
|
||||
project(triton)
|
||||
include(CTest)
|
||||
if(NOT WIN32)
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
|
||||
endif()
|
||||
|
||||
# Options
|
||||
option(BUILD_TUTORIALS "Build C++ Triton tutorials" ON)
|
||||
@@ -20,10 +24,19 @@ if(NOT CMAKE_BUILD_TYPE)
|
||||
set(CMAKE_BUILD_TYPE "Release")
|
||||
endif()
|
||||
|
||||
if(NOT WIN32)
|
||||
find_library(TERMINFO_LIBRARY tinfo)
|
||||
endif()
|
||||
|
||||
# Compiler flags
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)
|
||||
|
||||
if(WIN32)
|
||||
SET(BUILD_SHARED_LIBS OFF)
|
||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/deps/dlfcn-win32/src)
|
||||
add_subdirectory(deps/dlfcn-win32/src ${CMAKE_BINARY_DIR}/dlfcn-win32)
|
||||
endif()
|
||||
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__STDC_FORMAT_MACROS -std=gnu++17")
|
||||
|
||||
|
||||
@@ -31,7 +44,20 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__STDC_FORMAT_MACROS -std=gnu++17")
|
||||
# LLVM
|
||||
##########
|
||||
if("${LLVM_LIBRARY_DIR}" STREQUAL "")
|
||||
if(WIN32)
|
||||
find_package(LLVM 13 REQUIRED COMPONENTS nvptx amdgpu)
|
||||
|
||||
include_directories(${LLVM_INCLUDE_DIRS})
|
||||
separate_arguments(LLVM_DEFINITIONS_LIST NATIVE_COMMAND ${LLVM_DEFINITIONS})
|
||||
add_definitions(${LLVM_DEFINITIONS_LIST})
|
||||
|
||||
llvm_map_components_to_libnames(LLVM_LIBRARIES support core
|
||||
NVPTXInfo nvptxcodegen
|
||||
AMDGPUInfo AMDGPUcodegen
|
||||
)
|
||||
else()
|
||||
find_package(LLVM 11 REQUIRED COMPONENTS "nvptx;amdgpu")
|
||||
endif()
|
||||
message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
|
||||
if(APPLE)
|
||||
set(CMAKE_OSX_DEPLOYMENT_TARGET "10.14")
|
||||
@@ -108,12 +134,25 @@ endif()
|
||||
|
||||
# Triton
|
||||
file(GLOB_RECURSE LIBTRITON_SRC lib/*.cc)
|
||||
if (WIN32 AND BUILD_PYTHON_MODULE)
|
||||
find_package(Python3 REQUIRED COMPONENTS Development)
|
||||
Python3_add_library(triton SHARED ${LIBTRITON_SRC} ${PYTHON_SRC})
|
||||
set_target_properties(triton PROPERTIES SUFFIX ".pyd")
|
||||
set_target_properties(triton PROPERTIES PREFIX "lib")
|
||||
else()
|
||||
add_library(triton SHARED ${LIBTRITON_SRC} ${PYTHON_SRC})
|
||||
endif()
|
||||
|
||||
target_link_options(triton PRIVATE ${LLVM_LDFLAGS})
|
||||
|
||||
if(WIN32)
|
||||
target_link_libraries(triton PRIVATE ${LLVM_LIBRARIES} dl) # dl is from dlfcn-win32
|
||||
else()
|
||||
target_link_libraries(triton ${LLVM_LIBRARIES} z ${TERMINFO_LIBRARY})
|
||||
endif()
|
||||
|
||||
|
||||
if(BUILD_PYTHON_MODULE)
|
||||
if(BUILD_PYTHON_MODULE AND NOT WIN32)
|
||||
set(CMAKE_SHARED_LIBRARY_SUFFIX ".so")
|
||||
# Check if the platform is MacOS
|
||||
if(APPLE)
|
||||
|
1
deps/dlfcn-win32
vendored
Submodule
1
deps/dlfcn-win32
vendored
Submodule
Submodule deps/dlfcn-win32 added at 522c301ec3
@@ -13,6 +13,14 @@ namespace tools
|
||||
{
|
||||
|
||||
|
||||
#ifdef _WIN32
|
||||
#define popen _popen
|
||||
#define pclose _pclose
|
||||
#endif
|
||||
|
||||
#ifndef WEXITSTATUS
|
||||
#define WEXITSTATUS(stat_val) ((unsigned)(stat_val) & 255)
|
||||
#endif
|
||||
|
||||
int exec(const std::string& cmd, std::string& result) {
|
||||
char buffer[128];
|
||||
|
@@ -33,19 +33,10 @@ namespace tools
|
||||
|
||||
inline std::string getenv(const char * name)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
char* cache_path = 0;
|
||||
std::size_t sz = 0;
|
||||
_dupenv_s(&cache_path, &sz, name);
|
||||
#else
|
||||
const char * cstr = std::getenv(name);
|
||||
#endif
|
||||
if(!cstr)
|
||||
return "";
|
||||
std::string result(cstr);
|
||||
#ifdef _MSC_VER
|
||||
free(cache_path);
|
||||
#endif
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@@ -441,18 +441,18 @@ std::tuple<Value*, Value*, Value*, Value*> generator::fp8x4_to_fp16x4(Value *in0
|
||||
"lop3.b32 $1, b1, 0x80008000, a1, 0xf8; \n\t"
|
||||
"}", "=r,=r,r", false);
|
||||
Value *packed_in = UndefValue::get(vec_ty(i8_ty, 4));
|
||||
packed_in = insert_elt(packed_in, in0, (int)0);
|
||||
packed_in = insert_elt(packed_in, in1, (int)1);
|
||||
packed_in = insert_elt(packed_in, in2, (int)2);
|
||||
packed_in = insert_elt(packed_in, in3, (int)3);
|
||||
packed_in = insert_elt(packed_in, in0, (uint64_t)0);
|
||||
packed_in = insert_elt(packed_in, in1, (uint64_t)1);
|
||||
packed_in = insert_elt(packed_in, in2, (uint64_t)2);
|
||||
packed_in = insert_elt(packed_in, in3, (uint64_t)3);
|
||||
Value *in = bit_cast(packed_in, i32_ty);
|
||||
Value *ret = call(ptx, {in});
|
||||
Value *packed_ret0 = extract_val(ret, {0});
|
||||
Value *packed_ret1 = extract_val(ret, {1});
|
||||
Value *ret0 = extract_elt(packed_ret0, (int)0);
|
||||
Value *ret1 = extract_elt(packed_ret0, (int)1);
|
||||
Value *ret2 = extract_elt(packed_ret1, (int)0);
|
||||
Value *ret3 = extract_elt(packed_ret1, (int)1);
|
||||
Value *ret0 = extract_elt(packed_ret0, (uint64_t)0);
|
||||
Value *ret1 = extract_elt(packed_ret0, (uint64_t)1);
|
||||
Value *ret2 = extract_elt(packed_ret1, (uint64_t)0);
|
||||
Value *ret3 = extract_elt(packed_ret1, (uint64_t)1);
|
||||
return std::make_tuple(ret0, ret1, ret2, ret3);
|
||||
}
|
||||
|
||||
@@ -694,11 +694,11 @@ void generator::visit_load_inst(ir::load_inst* x){
|
||||
// ---
|
||||
// finally call inline ASM
|
||||
// ---
|
||||
InlineAsm *_asm = InlineAsm::get(asm_ty, asm_oss.str(), asm_cstrt, true);
|
||||
InlineAsm *inlineAsm = InlineAsm::get(asm_ty, asm_oss.str(), asm_cstrt, true);
|
||||
std::vector<Value*> args = {pred, ptr};
|
||||
for(Value *v: others)
|
||||
args.push_back(v);
|
||||
Value *_ret = call(_asm, args);
|
||||
Value *_ret = call(inlineAsm, args);
|
||||
// ---
|
||||
// extract and store return values
|
||||
// ---
|
||||
|
@@ -20,7 +20,9 @@
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#include <fstream>
|
||||
#if __has_include(<unistd.h>)
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#include <memory>
|
||||
#include <regex>
|
||||
#include "triton/driver/llvm.h"
|
||||
@@ -165,10 +167,10 @@ std::string ptx_to_cubin(const std::string& ptx, int cc) {
|
||||
return "";
|
||||
|
||||
// compile ptx with ptxas
|
||||
char _fsrc[] = "/tmp/triton_k_XXXXXX";
|
||||
char _flog[] = "/tmp/triton_l_XXXXXX";
|
||||
mkstemp(_fsrc);
|
||||
mkstemp(_flog);
|
||||
char _fsrc[L_tmpnam];
|
||||
char _flog[L_tmpnam];
|
||||
std::tmpnam(_fsrc);
|
||||
std::tmpnam(_flog);
|
||||
std::string fsrc = _fsrc;
|
||||
std::string flog = _flog;
|
||||
std::string fbin = fsrc + ".o";
|
||||
@@ -202,10 +204,10 @@ CUmodule ptx_to_cumodule(const std::string& ptx, int cc) {
|
||||
// Use PTXAS via system call
|
||||
if(use_system_ptxas){
|
||||
// compile ptx with ptxas
|
||||
char _fsrc[] = "/tmp/triton_k_XXXXXX";
|
||||
char _flog[] = "/tmp/triton_l_XXXXXX";
|
||||
mkstemp(_fsrc);
|
||||
mkstemp(_flog);
|
||||
char _fsrc[L_tmpnam];
|
||||
char _flog[L_tmpnam];
|
||||
std::tmpnam(_fsrc);
|
||||
std::tmpnam(_flog);
|
||||
std::string fsrc = _fsrc;
|
||||
std::string flog = _flog;
|
||||
std::string fbin = fsrc + ".o";
|
||||
@@ -232,8 +234,8 @@ CUmodule ptx_to_cumodule(const std::string& ptx, int cc) {
|
||||
CUjit_option opt[] = {CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES, CU_JIT_ERROR_LOG_BUFFER,
|
||||
CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES, CU_JIT_INFO_LOG_BUFFER,
|
||||
CU_JIT_LOG_VERBOSE};
|
||||
unsigned int errbufsize = 8192;
|
||||
unsigned int logbufsize = 8192;
|
||||
const unsigned int errbufsize = 8192;
|
||||
const unsigned int logbufsize = 8192;
|
||||
char _err[errbufsize];
|
||||
char _log[logbufsize];
|
||||
void* optval[] = {(void*)(uintptr_t)errbufsize, (void*)_err, (void*)(uintptr_t)logbufsize, (void*)_log, (void*)1};
|
||||
@@ -344,8 +346,8 @@ hipModule_t amdgpu_to_hipmodule(const std::string& path) {
|
||||
hipJitOption opt[] = {hipJitOptionErrorLogBufferSizeBytes, hipJitOptionErrorLogBuffer,
|
||||
hipJitOptionInfoLogBufferSizeBytes, hipJitOptionInfoLogBuffer,
|
||||
hipJitOptionLogVerbose};
|
||||
unsigned int errbufsize = 8192;
|
||||
unsigned int logbufsize = 8192;
|
||||
const unsigned int errbufsize = 8192;
|
||||
const unsigned int logbufsize = 8192;
|
||||
char _err[errbufsize];
|
||||
char _log[logbufsize];
|
||||
void* optval[] = {(void*)(uintptr_t)errbufsize,
|
||||
|
@@ -24,6 +24,8 @@ def get_llvm():
|
||||
paths = [p for p in paths if p is not None]
|
||||
if paths:
|
||||
return '', ''
|
||||
if platform.system() == "Windows":
|
||||
return '', ''
|
||||
# download if nothing is installed
|
||||
name = 'clang+llvm-11.0.1-x86_64-linux-gnu-ubuntu-16.04'
|
||||
dir = '/tmp'
|
||||
@@ -105,7 +107,7 @@ class CMakeBuild(build_ext):
|
||||
build_args = ["--config", cfg]
|
||||
|
||||
if platform.system() == "Windows":
|
||||
cmake_args += ["-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}".format(cfg.upper(), extdir)]
|
||||
cmake_args += ["-DCMAKE_RUNTIME_OUTPUT_DIRECTORY_{}={}".format(cfg.upper(), extdir)]
|
||||
if sys.maxsize > 2**32:
|
||||
cmake_args += ["-A", "x64"]
|
||||
build_args += ["--", "/m"]
|
||||
|
@@ -14,6 +14,7 @@
|
||||
#include <pybind11/pybind11.h>
|
||||
#include <pybind11/stl.h>
|
||||
#include <regex>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/IR/LegacyPassManager.h"
|
||||
|
Reference in New Issue
Block a user