[general] added missing files
This commit is contained in:
63
include/triton/codegen/target.h
Normal file
63
include/triton/codegen/target.h
Normal file
@@ -0,0 +1,63 @@
|
||||
#ifndef TDL_INCLUDE_IR_CODEGEN_TARGET_H
|
||||
#define TDL_INCLUDE_IR_CODEGEN_TARGET_H
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
|
||||
namespace llvm{
|
||||
class Instruction;
|
||||
class Value;
|
||||
class Module;
|
||||
class LLVMContext;
|
||||
class Function;
|
||||
}
|
||||
|
||||
namespace triton{
|
||||
namespace codegen{
|
||||
|
||||
class target {
|
||||
public:
|
||||
target(bool is_gpu): is_gpu_(is_gpu){}
|
||||
virtual void set_kernel(llvm::IRBuilder<>& builder, llvm::LLVMContext &ctx, llvm::Module *module, llvm::Function* fn) = 0;
|
||||
virtual llvm::Instruction* add_barrier(llvm::Module *module, llvm::IRBuilder<>& builder) = 0;
|
||||
virtual llvm::Value* get_global_offset(llvm::Module *module, llvm::IRBuilder<>& builder, unsigned stride, unsigned ax) = 0;
|
||||
virtual llvm::Value* get_local_id(llvm::Module *module, llvm::IRBuilder<>& builder, unsigned ax) = 0;
|
||||
bool is_gpu() const;
|
||||
|
||||
private:
|
||||
bool is_gpu_;
|
||||
};
|
||||
|
||||
class amd_cl_target: public target {
|
||||
public:
|
||||
amd_cl_target(): target(true){}
|
||||
void set_kernel(llvm::IRBuilder<>& builder, llvm::LLVMContext &ctx, llvm::Module *module, llvm::Function* fn);
|
||||
llvm::Instruction* add_barrier(llvm::Module *module, llvm::IRBuilder<>& builder);
|
||||
llvm::Value* get_global_offset(llvm::Module *module, llvm::IRBuilder<>& builder, unsigned stride, unsigned ax);
|
||||
llvm::Value* get_local_id(llvm::Module *module, llvm::IRBuilder<>& builder, unsigned ax);
|
||||
};
|
||||
|
||||
class nvidia_cu_target: public target {
|
||||
public:
|
||||
nvidia_cu_target(): target(true){}
|
||||
void set_kernel(llvm::IRBuilder<>& builder, llvm::LLVMContext &ctx, llvm::Module *module, llvm::Function* fn);
|
||||
llvm::Instruction* add_barrier(llvm::Module *module, llvm::IRBuilder<>& builder);
|
||||
llvm::Value* get_global_offset(llvm::Module *module, llvm::IRBuilder<>& builder, unsigned stride, unsigned ax);
|
||||
llvm::Value* get_local_id(llvm::Module *module, llvm::IRBuilder<>& builder, unsigned ax);
|
||||
};
|
||||
|
||||
class cpu_target: public target {
|
||||
public:
|
||||
cpu_target(): target(false){}
|
||||
void set_kernel(llvm::IRBuilder<>& builder, llvm::LLVMContext &ctx, llvm::Module *module, llvm::Function* fn);
|
||||
llvm::Instruction* add_barrier(llvm::Module *module, llvm::IRBuilder<>& builder);
|
||||
llvm::Value* get_global_offset(llvm::Module *module, llvm::IRBuilder<>& builder, unsigned stride, unsigned ax);
|
||||
llvm::Value* get_local_id(llvm::Module *module, llvm::IRBuilder<>& builder, unsigned ax);
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
413
include/triton/driver/helpers/CL/infos.hpp
Normal file
413
include/triton/driver/helpers/CL/infos.hpp
Normal file
@@ -0,0 +1,413 @@
|
||||
#ifndef ISAAC_DRIVER_HELPERS_OCL_INFOS_HPP_
|
||||
#define ISAAC_DRIVER_HELPERS_OCL_INFOS_HPP_
|
||||
|
||||
/* =========================================================================
|
||||
Copyright (c) 2010-2012, Institute for Microelectronics,
|
||||
Institute for Analysis and Scientific Computing,
|
||||
TU Wien.
|
||||
|
||||
-----------------
|
||||
ViennaCL - The Vienna Computing Library
|
||||
-----------------
|
||||
|
||||
Project Head: Karl Rupp rupp@iue.tuwien.ac.at
|
||||
|
||||
(A list of authors and contributors can be found in the PDF manual)
|
||||
|
||||
License: MIT (X11), see file LICENSE in the base directory
|
||||
============================================================================= */
|
||||
|
||||
|
||||
|
||||
#include "triton/driver/error.h"
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
namespace triton
|
||||
{
|
||||
namespace driver
|
||||
{
|
||||
namespace ocl
|
||||
{
|
||||
|
||||
/** @brief Implementation details for the OpenCL managment layer in ViennaCL */
|
||||
namespace detail{
|
||||
|
||||
/** @brief Helper class for obtaining informations from the OpenCL backend. Deprecated! */
|
||||
template<typename T>
|
||||
struct info;
|
||||
|
||||
/** \cond */
|
||||
template<>
|
||||
struct info<cl_mem>
|
||||
{
|
||||
typedef cl_mem_info type;
|
||||
|
||||
static void get(cl_mem handle, cl_mem_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret)
|
||||
{
|
||||
cl_int err = dispatch::clGetMemObjectInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
|
||||
check(err);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct info<cl_device_id>
|
||||
{
|
||||
typedef cl_device_info type;
|
||||
|
||||
static void get(cl_device_id handle, cl_device_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret)
|
||||
{
|
||||
cl_int err = dispatch::clGetDeviceInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
|
||||
check(err);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct info<cl_kernel>
|
||||
{
|
||||
typedef cl_kernel_info type;
|
||||
|
||||
static void get(cl_kernel handle, cl_kernel_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
|
||||
cl_int err = dispatch::clGetKernelInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
|
||||
check(err);
|
||||
}
|
||||
|
||||
static void get(cl_kernel handle, cl_device_id dev_id, cl_kernel_work_group_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
|
||||
cl_int err = dispatch::clGetKernelWorkGroupInfo(handle, dev_id, param_name,param_value_size,param_value,param_value_size_ret);
|
||||
check(err);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct info<cl_context>
|
||||
{
|
||||
typedef cl_context_info type;
|
||||
|
||||
static void get(cl_context handle, cl_context_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
|
||||
cl_int err = dispatch::clGetContextInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
|
||||
check(err);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct info<cl_program>
|
||||
{
|
||||
typedef cl_program_info type;
|
||||
|
||||
static void get(cl_program handle, cl_program_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
|
||||
cl_int err = dispatch::clGetProgramInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
|
||||
check(err);
|
||||
}
|
||||
|
||||
static void get(cl_program handle, cl_device_id device, cl_program_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
|
||||
cl_int err = dispatch::clGetProgramBuildInfo(handle,device,param_name,param_value_size,param_value,param_value_size_ret);
|
||||
check(err);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<>
|
||||
struct info<cl_event>
|
||||
{
|
||||
typedef cl_profiling_info type;
|
||||
static void get(cl_event handle, cl_profiling_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
|
||||
cl_int err = dispatch::clGetEventProfilingInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
|
||||
check(err);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct info<cl_command_queue>
|
||||
{
|
||||
typedef cl_command_queue_info type;
|
||||
static void get(cl_command_queue handle, cl_profiling_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
|
||||
cl_int err = dispatch::clGetCommandQueueInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
|
||||
check(err);
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct info<cl_platform_id>
|
||||
{
|
||||
typedef cl_command_queue_info type;
|
||||
static void get(cl_platform_id handle, cl_profiling_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
|
||||
cl_int err = dispatch::clGetPlatformInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
|
||||
check(err);
|
||||
}
|
||||
};
|
||||
|
||||
//Info getter
|
||||
//Some intelligence is needed for some types
|
||||
template<class RES_T>
|
||||
struct get_info_impl{
|
||||
|
||||
template<class MEM_T, class INFO_T>
|
||||
RES_T operator()(MEM_T const & mem, INFO_T const & info){
|
||||
RES_T res;
|
||||
detail::info<MEM_T>::get(mem,info,sizeof(RES_T),&res,NULL);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<class MEM_T, class ARG_MEM_T, class INFO_T>
|
||||
RES_T operator()(MEM_T const & mem, ARG_MEM_T const & arg_mem, INFO_T const & info){
|
||||
RES_T res;
|
||||
detail::info<MEM_T>::get(mem,arg_mem, info,sizeof(RES_T),&res,NULL);
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct get_info_impl<std::string>{
|
||||
|
||||
template<class MEM_T, class INFO_T>
|
||||
std::string operator()(const MEM_T &mem, const INFO_T &info){
|
||||
char buff[1024];
|
||||
detail::info<MEM_T>::get(mem,info,1024,buff,NULL);
|
||||
return std::string(buff);
|
||||
}
|
||||
|
||||
template<class MEM_T, class ARG_MEM_T, class INFO_T>
|
||||
std::string operator()(MEM_T const & mem, ARG_MEM_T const & arg_mem, INFO_T const & info){
|
||||
char buff[1024];
|
||||
detail::info<MEM_T>::get(mem,arg_mem,info,1024,buff,NULL);
|
||||
return std::string(buff);
|
||||
}
|
||||
};
|
||||
|
||||
template<class T>
|
||||
struct get_info_impl<std::vector<T> >
|
||||
{
|
||||
template<class MEM_T, class INFO_T>
|
||||
std::vector<T> operator()(const MEM_T &mem, const INFO_T &info)
|
||||
{
|
||||
size_t vec_size;
|
||||
detail::info<MEM_T>::get(mem,info,0,NULL,&vec_size);
|
||||
std::vector<T> res(vec_size/sizeof(T));
|
||||
detail::info<MEM_T>::get(mem,info,vec_size,res.data(),NULL);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<class MEM_T, class ARG_MEM_T, class INFO_T>
|
||||
std::vector<T> operator()(MEM_T const & mem, ARG_MEM_T const & arg_mem, INFO_T const & info)
|
||||
{
|
||||
size_t vec_size;
|
||||
detail::info<MEM_T>::get(mem,arg_mem,info,0,NULL,&vec_size);
|
||||
std::vector<T> res(vec_size/sizeof(T));
|
||||
detail::info<MEM_T>::get(mem,arg_mem,info,vec_size,res.data(),NULL);
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T, typename info<T>::type param>
|
||||
struct return_type;
|
||||
/** \endcond */
|
||||
|
||||
/** \cond */
|
||||
#define SET_INFO_RETURN_TYPE(DATA_TYPE,NAME,RETURN_TYPE) template<> struct return_type<DATA_TYPE, NAME> { typedef RETURN_TYPE Result; }
|
||||
|
||||
SET_INFO_RETURN_TYPE(cl_command_queue, CL_QUEUE_CONTEXT, cl_context);
|
||||
SET_INFO_RETURN_TYPE(cl_command_queue, CL_QUEUE_DEVICE, cl_device_id);
|
||||
SET_INFO_RETURN_TYPE(cl_command_queue, CL_QUEUE_REFERENCE_COUNT, cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_command_queue, CL_QUEUE_PROPERTIES, cl_command_queue_properties);
|
||||
|
||||
SET_INFO_RETURN_TYPE(cl_context, CL_CONTEXT_DEVICES, std::vector<cl_device_id>);
|
||||
SET_INFO_RETURN_TYPE(cl_context, CL_CONTEXT_NUM_DEVICES, cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_context, CL_CONTEXT_REFERENCE_COUNT, cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_context, CL_CONTEXT_PROPERTIES, cl_context_properties);
|
||||
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_ADDRESS_BITS, cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_AVAILABLE, cl_bool);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_COMPILER_AVAILABLE, cl_bool);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_WAVEFRONT_WIDTH_AMD, cl_uint);
|
||||
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_DOUBLE_FP_CONFIG, cl_device_fp_config);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_ENDIAN_LITTLE, cl_bool);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_ERROR_CORRECTION_SUPPORT, cl_bool);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_EXECUTION_CAPABILITIES, cl_device_exec_capabilities);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_EXTENSIONS, std::string);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cl_ulong);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_GLOBAL_MEM_SIZE, cl_ulong);
|
||||
//SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_HALF_FP_CONFIG, cl_device_fp_config);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_IMAGE_SUPPORT, cl_bool);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_IMAGE2D_MAX_HEIGHT , size_t);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_IMAGE2D_MAX_WIDTH , size_t);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_IMAGE3D_MAX_DEPTH , size_t);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_IMAGE3D_MAX_HEIGHT , size_t);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_IMAGE3D_MAX_WIDTH , size_t);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_LOCAL_MEM_SIZE, cl_ulong);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_LOCAL_MEM_TYPE, cl_device_local_mem_type);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_CLOCK_FREQUENCY , cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_COMPUTE_UNITS , cl_uint); //The minimum value is 1
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_CONSTANT_ARGS , cl_uint); //The minimum value is 8
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE , cl_ulong); //The minimum value is 64 KB
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE , cl_ulong); //The minimum value is max (1/4th of CL_DEVICE_GLOBAL_MEM_SIZE, 128*1024*1024)
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_PARAMETER_SIZE , size_t);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_READ_IMAGE_ARGS , cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_SAMPLERS , cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE , size_t);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS , cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_WORK_ITEM_SIZES , std::vector<size_t>);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_WRITE_IMAGE_ARGS , cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MEM_BASE_ADDR_ALIGN , cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE , cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_NAME , std::string);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_PLATFORM , cl_platform_id);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR , cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT , cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT , cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT , cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE , cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_PROFILE , std::string);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_PROFILING_TIMER_RESOLUTION , size_t);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_QUEUE_PROPERTIES , cl_command_queue_properties);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_SINGLE_FP_CONFIG , cl_device_fp_config);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_TYPE , cl_device_type);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_VENDOR , std::string);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_VENDOR_ID , cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_VERSION , std::string);
|
||||
SET_INFO_RETURN_TYPE(cl_device_id, CL_DRIVER_VERSION , std::string);
|
||||
|
||||
SET_INFO_RETURN_TYPE(cl_event, CL_PROFILING_COMMAND_QUEUED, cl_ulong);
|
||||
SET_INFO_RETURN_TYPE(cl_event, CL_PROFILING_COMMAND_SUBMIT, cl_ulong);
|
||||
SET_INFO_RETURN_TYPE(cl_event, CL_PROFILING_COMMAND_START, cl_ulong);
|
||||
SET_INFO_RETURN_TYPE(cl_event, CL_PROFILING_COMMAND_END, cl_ulong);
|
||||
|
||||
SET_INFO_RETURN_TYPE(cl_kernel,CL_KERNEL_FUNCTION_NAME, std::string);
|
||||
SET_INFO_RETURN_TYPE(cl_kernel,CL_KERNEL_NUM_ARGS, cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_kernel,CL_KERNEL_REFERENCE_COUNT, cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_kernel,CL_KERNEL_CONTEXT, cl_context);
|
||||
SET_INFO_RETURN_TYPE(cl_kernel,CL_KERNEL_PROGRAM, cl_program);
|
||||
|
||||
|
||||
SET_INFO_RETURN_TYPE(cl_kernel,CL_KERNEL_WORK_GROUP_SIZE, size_t);
|
||||
SET_INFO_RETURN_TYPE(cl_kernel,CL_KERNEL_COMPILE_WORK_GROUP_SIZE, std::vector<size_t>);
|
||||
SET_INFO_RETURN_TYPE(cl_kernel,CL_KERNEL_LOCAL_MEM_SIZE, cl_ulong);
|
||||
SET_INFO_RETURN_TYPE(cl_kernel,CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, size_t);
|
||||
|
||||
SET_INFO_RETURN_TYPE(cl_mem,CL_MEM_TYPE, cl_mem_object_type);
|
||||
SET_INFO_RETURN_TYPE(cl_mem,CL_MEM_FLAGS, cl_mem_flags);
|
||||
SET_INFO_RETURN_TYPE(cl_mem,CL_MEM_SIZE, size_t);
|
||||
SET_INFO_RETURN_TYPE(cl_mem,CL_MEM_HOST_PTR, void*);
|
||||
SET_INFO_RETURN_TYPE(cl_mem,CL_MEM_MAP_COUNT, cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_mem,CL_MEM_REFERENCE_COUNT, cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_mem,CL_MEM_CONTEXT, cl_context);
|
||||
|
||||
SET_INFO_RETURN_TYPE(cl_program,CL_PROGRAM_CONTEXT,cl_context);
|
||||
SET_INFO_RETURN_TYPE(cl_program,CL_PROGRAM_DEVICES,std::vector<cl_device_id>);
|
||||
SET_INFO_RETURN_TYPE(cl_program,CL_PROGRAM_NUM_DEVICES,cl_uint);
|
||||
SET_INFO_RETURN_TYPE(cl_program,CL_PROGRAM_SOURCE,std::string);
|
||||
SET_INFO_RETURN_TYPE(cl_program,CL_PROGRAM_BINARY_SIZES,std::vector<size_t>);
|
||||
SET_INFO_RETURN_TYPE(cl_program,CL_PROGRAM_BINARIES,std::vector<unsigned char*>);
|
||||
//Build
|
||||
SET_INFO_RETURN_TYPE(cl_program,CL_PROGRAM_BUILD_STATUS, cl_build_status);
|
||||
SET_INFO_RETURN_TYPE(cl_program,CL_PROGRAM_BUILD_OPTIONS, std::string);
|
||||
SET_INFO_RETURN_TYPE(cl_program,CL_PROGRAM_BUILD_LOG, std::string);
|
||||
|
||||
SET_INFO_RETURN_TYPE(cl_platform_id,CL_PLATFORM_PROFILE, std::string);
|
||||
SET_INFO_RETURN_TYPE(cl_platform_id,CL_PLATFORM_VERSION, std::string);
|
||||
SET_INFO_RETURN_TYPE(cl_platform_id,CL_PLATFORM_NAME, std::string);
|
||||
SET_INFO_RETURN_TYPE(cl_platform_id,CL_PLATFORM_VENDOR, std::string);
|
||||
SET_INFO_RETURN_TYPE(cl_platform_id,CL_PLATFORM_EXTENSIONS, std::string);
|
||||
|
||||
#undef SET_INFO_RETURN_TYPE
|
||||
|
||||
/** \endcond */
|
||||
}
|
||||
|
||||
template<cl_device_info param>
|
||||
typename detail::return_type<cl_device_id, param>::Result info(cl_device_id const & handle){
|
||||
typedef typename detail::return_type<cl_device_id, param>::Result res_t;
|
||||
return detail::get_info_impl<res_t>()(handle,param);
|
||||
}
|
||||
|
||||
template<cl_mem_info param>
|
||||
typename detail::return_type<cl_mem, param>::Result info(cl_mem const & handle){
|
||||
typedef typename detail::return_type<cl_mem, param>::Result res_t;
|
||||
return detail::get_info_impl<res_t>()(handle,param);
|
||||
}
|
||||
|
||||
//Program
|
||||
|
||||
template<cl_program_info param>
|
||||
typename detail::return_type<cl_program, param>::Result info(cl_program const & handle){
|
||||
typedef typename detail::return_type<cl_program, param>::Result res_t;
|
||||
return detail::get_info_impl<res_t>()(handle,param);
|
||||
}
|
||||
|
||||
template<>
|
||||
inline typename detail::return_type<cl_program, CL_PROGRAM_BINARIES>::Result info<CL_PROGRAM_BINARIES>(cl_program const & handle)
|
||||
{
|
||||
std::vector<unsigned char *> res;
|
||||
std::vector<size_t> sizes = info<CL_PROGRAM_BINARY_SIZES>(handle);
|
||||
for(size_t s: sizes)
|
||||
res.push_back(new unsigned char[s]);
|
||||
dispatch::clGetProgramInfo(handle, CL_PROGRAM_BINARIES, sizeof(unsigned char**), (void*)res.data(), NULL);
|
||||
return res;
|
||||
}
|
||||
|
||||
template<cl_program_build_info param>
|
||||
typename detail::return_type<cl_program, param>::Result info(cl_program const & phandle, cl_device_id const & dhandle){
|
||||
typedef typename detail::return_type<cl_program, param>::Result res_t;
|
||||
return detail::get_info_impl<res_t>()(phandle,dhandle,param);
|
||||
}
|
||||
|
||||
//Kernel
|
||||
template<cl_kernel_info param>
|
||||
typename detail::return_type<cl_kernel, param>::Result info(cl_kernel const & handle){
|
||||
typedef typename detail::return_type<cl_kernel, param>::Result res_t;
|
||||
return detail::get_info_impl<res_t>()(handle,param);
|
||||
}
|
||||
|
||||
template<cl_kernel_work_group_info param>
|
||||
typename detail::return_type<cl_kernel, param>::Result info(cl_kernel const & khandle, cl_device_id const & dhandle){
|
||||
typedef typename detail::return_type<cl_kernel, param>::Result res_t;
|
||||
return detail::get_info_impl<res_t>()(khandle,dhandle,param);
|
||||
}
|
||||
|
||||
//Context
|
||||
template<cl_context_info param>
|
||||
typename detail::return_type<cl_context, param>::Result info(cl_context const & handle){
|
||||
typedef typename detail::return_type<cl_context, param>::Result res_t;
|
||||
return detail::get_info_impl<res_t>()(handle,param);
|
||||
}
|
||||
|
||||
//Event
|
||||
template<cl_profiling_info param>
|
||||
typename detail::return_type<cl_event, param>::Result info(cl_event const & handle){
|
||||
typedef typename detail::return_type<cl_event, param>::Result res_t;
|
||||
return detail::get_info_impl<res_t>()(handle,param);
|
||||
}
|
||||
|
||||
//Command queue
|
||||
template<cl_command_queue_info param>
|
||||
typename detail::return_type<cl_command_queue, param>::Result info(cl_command_queue const & handle){
|
||||
typedef typename detail::return_type<cl_command_queue, param>::Result res_t;
|
||||
return detail::get_info_impl<res_t>()(handle,param);
|
||||
}
|
||||
|
||||
//Plaftform
|
||||
template<cl_platform_info param>
|
||||
typename detail::return_type<cl_platform_id, param>::Result info(cl_platform_id const & handle){
|
||||
typedef typename detail::return_type<cl_platform_id, param>::Result res_t;
|
||||
return detail::get_info_impl<res_t>()(handle,param);
|
||||
}
|
||||
|
||||
template<class OCL_TYPE, typename detail::info<OCL_TYPE>::type param>
|
||||
typename detail::return_type<OCL_TYPE, param>::Result info(OCL_TYPE const & handle){
|
||||
return info(handle.get());
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<class OCL_TYPE, class OCL_TYPE_ARG, typename detail::info<OCL_TYPE>::type param>
|
||||
typename detail::return_type<OCL_TYPE, param>::Result info(OCL_TYPE const & handle, OCL_TYPE_ARG const & arg_handle){
|
||||
return info(handle.get(), arg_handle.get());
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // INFOS_HPP
|
20
include/triton/ir/cfg.h
Normal file
20
include/triton/ir/cfg.h
Normal file
@@ -0,0 +1,20 @@
|
||||
#ifndef TDL_INCLUDE_IR_CFG_H
|
||||
#define TDL_INCLUDE_IR_CFG_H
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace triton{
|
||||
namespace ir{
|
||||
|
||||
class function;
|
||||
class basic_block;
|
||||
|
||||
class cfg {
|
||||
public:
|
||||
static std::vector<basic_block *> reverse_post_order(function* fn);
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
118
lib/codegen/target.cpp
Normal file
118
lib/codegen/target.cpp
Normal file
@@ -0,0 +1,118 @@
|
||||
#include "triton/codegen/target.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/Intrinsics.h"
|
||||
#include "llvm/IR/Value.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace triton{
|
||||
namespace codegen{
|
||||
|
||||
// base
|
||||
bool target::is_gpu() const {
|
||||
return is_gpu_;
|
||||
}
|
||||
|
||||
// AMD
|
||||
void amd_cl_target::set_kernel(IRBuilder<>& builder, LLVMContext &ctx, Module *module, Function* fn) {
|
||||
fn->setCallingConv(CallingConv::AMDGPU_KERNEL);
|
||||
}
|
||||
|
||||
Instruction* amd_cl_target::add_barrier(Module *module, IRBuilder<>& builder) {
|
||||
Function *barrier = Intrinsic::getDeclaration(module, Intrinsic::amdgcn_s_barrier);
|
||||
return builder.CreateCall(barrier, {});
|
||||
}
|
||||
|
||||
Value* amd_cl_target::get_global_offset(Module *module, IRBuilder<>& builder, unsigned stride, unsigned ax) {
|
||||
static std::array<Intrinsic::ID, 3> ids = {
|
||||
Intrinsic::amdgcn_workgroup_id_x,
|
||||
Intrinsic::amdgcn_workgroup_id_y,
|
||||
Intrinsic::amdgcn_workgroup_id_z
|
||||
};
|
||||
Value* get_group_id = Intrinsic::getDeclaration(module, ids[ax]);
|
||||
Value* group_id = builder.CreateCall(get_group_id, {});
|
||||
Value* result = builder.CreateMul(builder.getInt32(stride), group_id);
|
||||
return result;
|
||||
}
|
||||
|
||||
Value* amd_cl_target::get_local_id(Module *module, IRBuilder<>& builder, unsigned ax) {
|
||||
static std::array<Intrinsic::ID, 3> ids = {
|
||||
Intrinsic::amdgcn_workitem_id_x,
|
||||
Intrinsic::amdgcn_workitem_id_y,
|
||||
Intrinsic::amdgcn_workitem_id_z
|
||||
};
|
||||
Function *get_local_id = Intrinsic::getDeclaration(module, ids[ax]);
|
||||
return builder.CreateCall(get_local_id, {});
|
||||
}
|
||||
|
||||
// NVIDIA
|
||||
|
||||
void nvidia_cu_target::set_kernel(IRBuilder<>& builder, LLVMContext &ctx, Module *module, Function* fn){
|
||||
// set metadata
|
||||
Metadata *md_args[] = {
|
||||
ValueAsMetadata::get(fn),
|
||||
MDString::get(ctx, "kernel"),
|
||||
ValueAsMetadata::get(builder.getInt32(1))
|
||||
};
|
||||
module->getOrInsertNamedMetadata("nvvm.annotations")->addOperand(MDNode::get(ctx, md_args));
|
||||
}
|
||||
|
||||
Instruction* nvidia_cu_target::add_barrier(Module *module, IRBuilder<>& builder) {
|
||||
Function *barrier = Intrinsic::getDeclaration(module, Intrinsic::nvvm_barrier0);
|
||||
return builder.CreateCall(barrier, {});
|
||||
}
|
||||
|
||||
Value* nvidia_cu_target::get_global_offset(Module *module, IRBuilder<>& builder, unsigned stride, unsigned ax) {
|
||||
static std::array<Intrinsic::ID, 3> ids = {
|
||||
Intrinsic::nvvm_read_ptx_sreg_ctaid_x,
|
||||
Intrinsic::nvvm_read_ptx_sreg_ctaid_y,
|
||||
Intrinsic::nvvm_read_ptx_sreg_ctaid_z
|
||||
};
|
||||
Value* get_group_id = Intrinsic::getDeclaration(module, ids[ax]);
|
||||
Value* group_id = builder.CreateCall(get_group_id, {});
|
||||
Value* result = builder.CreateMul(builder.getInt32(stride), group_id);
|
||||
return result;
|
||||
}
|
||||
|
||||
Value* nvidia_cu_target::get_local_id(Module *module, IRBuilder<>& builder, unsigned ax) {
|
||||
static std::array<Intrinsic::ID, 3> ids = {
|
||||
Intrinsic::nvvm_read_ptx_sreg_tid_x,
|
||||
Intrinsic::nvvm_read_ptx_sreg_tid_y,
|
||||
Intrinsic::nvvm_read_ptx_sreg_tid_z
|
||||
};
|
||||
Function *get_local_id = Intrinsic::getDeclaration(module, ids[ax]);
|
||||
return builder.CreateCall(get_local_id, {});
|
||||
}
|
||||
|
||||
// CPU
|
||||
|
||||
void cpu_target::set_kernel(IRBuilder<>& builder, LLVMContext &ctx, Module *module, Function* fn) {
|
||||
// normal cpu functions can be kernels
|
||||
}
|
||||
|
||||
Instruction* cpu_target::add_barrier(Module *module, IRBuilder<>& builder) {
|
||||
// no barrier on CPU
|
||||
return (Instruction*)builder.CreateAdd(builder.getInt32(0), builder.getInt32(0));
|
||||
}
|
||||
|
||||
Value* cpu_target::get_global_offset(Module *module, IRBuilder<>& builder, unsigned stride, unsigned ax) {
|
||||
const Function *fn = builder.GetInsertBlock()->getParent();
|
||||
size_t num_params = fn->getFunctionType()->getNumParams();
|
||||
static std::array<const Argument*, 3> ids = {
|
||||
fn->arg_begin() + num_params - 3,
|
||||
fn->arg_begin() + num_params - 2,
|
||||
fn->arg_begin() + num_params - 1
|
||||
};
|
||||
Value* result = builder.CreateMul(builder.getInt32(stride), (Argument*)ids[ax]);
|
||||
return result;
|
||||
}
|
||||
|
||||
Value* cpu_target::get_local_id(Module *module, IRBuilder<>& builder, unsigned ax) {
|
||||
return builder.getInt32(0);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
@@ -6,6 +6,7 @@
|
||||
#include "triton/ir/function.h"
|
||||
#include "triton/ir/context_impl.h"
|
||||
#include "triton/ir/constant.h"
|
||||
#include "triton/driver/device.h"
|
||||
|
||||
#include <cstdlib>
|
||||
|
||||
@@ -242,12 +243,11 @@ bool tune::check_constraints(std::map<ir::value *, std::vector<std::string>> &er
|
||||
errors[i].push_back("for dim " + strk + ": shape (" + to_string(shapes[k]->get_value()) + ")"
|
||||
" is not a multiple of layout (" + to_string(multiple) + ")");
|
||||
}
|
||||
// the number of thread per warp must be 32
|
||||
int num_threads = 1;
|
||||
for(size_t k = 0; k < shapes.size(); k++)
|
||||
num_threads *= params_[i]["mts.d" + to_string(k)]->get_value();
|
||||
if(num_threads % 64 != 0)
|
||||
errors[i].push_back("number of threads per block (" + to_string(num_threads) + ") must be multiple of 32");
|
||||
errors[i].push_back("number of threads per block (" + to_string(num_threads) + ") must be multiple of warp size");
|
||||
if(num_threads != num_threads_)
|
||||
errors[i].push_back("Number of threads must be the same for all tiles (" + to_string(num_threads_) + ")");
|
||||
}
|
||||
|
32
lib/ir/cfg.cpp
Normal file
32
lib/ir/cfg.cpp
Normal file
@@ -0,0 +1,32 @@
|
||||
#include "triton/ir/cfg.h"
|
||||
#include "triton/ir/basic_block.h"
|
||||
#include "triton/ir/function.h"
|
||||
#include <stack>
|
||||
#include <iostream>
|
||||
|
||||
namespace triton{
|
||||
namespace ir{
|
||||
|
||||
std::vector<basic_block*> cfg::reverse_post_order(function* fn) {
|
||||
std::stack<basic_block*> stack;
|
||||
std::set<basic_block*> visited;
|
||||
std::vector<basic_block*> result;
|
||||
// initialize stack
|
||||
for(ir::basic_block* block: fn->blocks())
|
||||
if(block->get_predecessors().empty())
|
||||
stack.push(block);
|
||||
// DFS
|
||||
while(!stack.empty()) {
|
||||
basic_block* current = stack.top();
|
||||
stack.pop();
|
||||
result.push_back(current);
|
||||
visited.insert(current);
|
||||
for(basic_block* succ: current->get_successors())
|
||||
if(visited.find(succ) == visited.end())
|
||||
stack.push(succ);
|
||||
}
|
||||
return std::move(result);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user