[general] added missing files

This commit is contained in:
Philippe Tillet
2019-03-27 20:01:35 -04:00
parent bc2a257d5c
commit fdf8559806
6 changed files with 648 additions and 2 deletions

View File

@@ -0,0 +1,63 @@
#ifndef TDL_INCLUDE_IR_CODEGEN_TARGET_H
#define TDL_INCLUDE_IR_CODEGEN_TARGET_H
#include <map>
#include <set>
#include <vector>
#include "llvm/IR/IRBuilder.h"
namespace llvm{
class Instruction;
class Value;
class Module;
class LLVMContext;
class Function;
}
namespace triton{
namespace codegen{
class target {
public:
target(bool is_gpu): is_gpu_(is_gpu){}
virtual void set_kernel(llvm::IRBuilder<>& builder, llvm::LLVMContext &ctx, llvm::Module *module, llvm::Function* fn) = 0;
virtual llvm::Instruction* add_barrier(llvm::Module *module, llvm::IRBuilder<>& builder) = 0;
virtual llvm::Value* get_global_offset(llvm::Module *module, llvm::IRBuilder<>& builder, unsigned stride, unsigned ax) = 0;
virtual llvm::Value* get_local_id(llvm::Module *module, llvm::IRBuilder<>& builder, unsigned ax) = 0;
bool is_gpu() const;
private:
bool is_gpu_;
};
class amd_cl_target: public target {
public:
amd_cl_target(): target(true){}
void set_kernel(llvm::IRBuilder<>& builder, llvm::LLVMContext &ctx, llvm::Module *module, llvm::Function* fn);
llvm::Instruction* add_barrier(llvm::Module *module, llvm::IRBuilder<>& builder);
llvm::Value* get_global_offset(llvm::Module *module, llvm::IRBuilder<>& builder, unsigned stride, unsigned ax);
llvm::Value* get_local_id(llvm::Module *module, llvm::IRBuilder<>& builder, unsigned ax);
};
class nvidia_cu_target: public target {
public:
nvidia_cu_target(): target(true){}
void set_kernel(llvm::IRBuilder<>& builder, llvm::LLVMContext &ctx, llvm::Module *module, llvm::Function* fn);
llvm::Instruction* add_barrier(llvm::Module *module, llvm::IRBuilder<>& builder);
llvm::Value* get_global_offset(llvm::Module *module, llvm::IRBuilder<>& builder, unsigned stride, unsigned ax);
llvm::Value* get_local_id(llvm::Module *module, llvm::IRBuilder<>& builder, unsigned ax);
};
class cpu_target: public target {
public:
cpu_target(): target(false){}
void set_kernel(llvm::IRBuilder<>& builder, llvm::LLVMContext &ctx, llvm::Module *module, llvm::Function* fn);
llvm::Instruction* add_barrier(llvm::Module *module, llvm::IRBuilder<>& builder);
llvm::Value* get_global_offset(llvm::Module *module, llvm::IRBuilder<>& builder, unsigned stride, unsigned ax);
llvm::Value* get_local_id(llvm::Module *module, llvm::IRBuilder<>& builder, unsigned ax);
};
}
}
#endif

View File

@@ -0,0 +1,413 @@
#ifndef ISAAC_DRIVER_HELPERS_OCL_INFOS_HPP_
#define ISAAC_DRIVER_HELPERS_OCL_INFOS_HPP_
/* =========================================================================
Copyright (c) 2010-2012, Institute for Microelectronics,
Institute for Analysis and Scientific Computing,
TU Wien.
-----------------
ViennaCL - The Vienna Computing Library
-----------------
Project Head: Karl Rupp rupp@iue.tuwien.ac.at
(A list of authors and contributors can be found in the PDF manual)
License: MIT (X11), see file LICENSE in the base directory
============================================================================= */
#include "triton/driver/error.h"
#include <vector>
#include <string>
namespace triton
{
namespace driver
{
namespace ocl
{
/** @brief Implementation details for the OpenCL managment layer in ViennaCL */
namespace detail{
/** @brief Helper class for obtaining informations from the OpenCL backend. Deprecated! */
template<typename T>
struct info;
/** \cond */
template<>
struct info<cl_mem>
{
typedef cl_mem_info type;
static void get(cl_mem handle, cl_mem_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret)
{
cl_int err = dispatch::clGetMemObjectInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
check(err);
}
};
template<>
struct info<cl_device_id>
{
typedef cl_device_info type;
static void get(cl_device_id handle, cl_device_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret)
{
cl_int err = dispatch::clGetDeviceInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
check(err);
}
};
template<>
struct info<cl_kernel>
{
typedef cl_kernel_info type;
static void get(cl_kernel handle, cl_kernel_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
cl_int err = dispatch::clGetKernelInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
check(err);
}
static void get(cl_kernel handle, cl_device_id dev_id, cl_kernel_work_group_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
cl_int err = dispatch::clGetKernelWorkGroupInfo(handle, dev_id, param_name,param_value_size,param_value,param_value_size_ret);
check(err);
}
};
template<>
struct info<cl_context>
{
typedef cl_context_info type;
static void get(cl_context handle, cl_context_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
cl_int err = dispatch::clGetContextInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
check(err);
}
};
template<>
struct info<cl_program>
{
typedef cl_program_info type;
static void get(cl_program handle, cl_program_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
cl_int err = dispatch::clGetProgramInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
check(err);
}
static void get(cl_program handle, cl_device_id device, cl_program_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
cl_int err = dispatch::clGetProgramBuildInfo(handle,device,param_name,param_value_size,param_value,param_value_size_ret);
check(err);
}
};
template<>
struct info<cl_event>
{
typedef cl_profiling_info type;
static void get(cl_event handle, cl_profiling_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
cl_int err = dispatch::clGetEventProfilingInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
check(err);
}
};
template<>
struct info<cl_command_queue>
{
typedef cl_command_queue_info type;
static void get(cl_command_queue handle, cl_profiling_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
cl_int err = dispatch::clGetCommandQueueInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
check(err);
}
};
template<>
struct info<cl_platform_id>
{
typedef cl_command_queue_info type;
static void get(cl_platform_id handle, cl_profiling_info param_name,size_t param_value_size,void *param_value,size_t *param_value_size_ret){
cl_int err = dispatch::clGetPlatformInfo(handle,param_name,param_value_size,param_value,param_value_size_ret);
check(err);
}
};
//Info getter
//Some intelligence is needed for some types
template<class RES_T>
struct get_info_impl{
template<class MEM_T, class INFO_T>
RES_T operator()(MEM_T const & mem, INFO_T const & info){
RES_T res;
detail::info<MEM_T>::get(mem,info,sizeof(RES_T),&res,NULL);
return res;
}
template<class MEM_T, class ARG_MEM_T, class INFO_T>
RES_T operator()(MEM_T const & mem, ARG_MEM_T const & arg_mem, INFO_T const & info){
RES_T res;
detail::info<MEM_T>::get(mem,arg_mem, info,sizeof(RES_T),&res,NULL);
return res;
}
};
template<>
struct get_info_impl<std::string>{
template<class MEM_T, class INFO_T>
std::string operator()(const MEM_T &mem, const INFO_T &info){
char buff[1024];
detail::info<MEM_T>::get(mem,info,1024,buff,NULL);
return std::string(buff);
}
template<class MEM_T, class ARG_MEM_T, class INFO_T>
std::string operator()(MEM_T const & mem, ARG_MEM_T const & arg_mem, INFO_T const & info){
char buff[1024];
detail::info<MEM_T>::get(mem,arg_mem,info,1024,buff,NULL);
return std::string(buff);
}
};
template<class T>
struct get_info_impl<std::vector<T> >
{
template<class MEM_T, class INFO_T>
std::vector<T> operator()(const MEM_T &mem, const INFO_T &info)
{
size_t vec_size;
detail::info<MEM_T>::get(mem,info,0,NULL,&vec_size);
std::vector<T> res(vec_size/sizeof(T));
detail::info<MEM_T>::get(mem,info,vec_size,res.data(),NULL);
return res;
}
template<class MEM_T, class ARG_MEM_T, class INFO_T>
std::vector<T> operator()(MEM_T const & mem, ARG_MEM_T const & arg_mem, INFO_T const & info)
{
size_t vec_size;
detail::info<MEM_T>::get(mem,arg_mem,info,0,NULL,&vec_size);
std::vector<T> res(vec_size/sizeof(T));
detail::info<MEM_T>::get(mem,arg_mem,info,vec_size,res.data(),NULL);
return res;
}
};
template<typename T, typename info<T>::type param>
struct return_type;
/** \endcond */
/** \cond */
#define SET_INFO_RETURN_TYPE(DATA_TYPE,NAME,RETURN_TYPE) template<> struct return_type<DATA_TYPE, NAME> { typedef RETURN_TYPE Result; }
SET_INFO_RETURN_TYPE(cl_command_queue, CL_QUEUE_CONTEXT, cl_context);
SET_INFO_RETURN_TYPE(cl_command_queue, CL_QUEUE_DEVICE, cl_device_id);
SET_INFO_RETURN_TYPE(cl_command_queue, CL_QUEUE_REFERENCE_COUNT, cl_uint);
SET_INFO_RETURN_TYPE(cl_command_queue, CL_QUEUE_PROPERTIES, cl_command_queue_properties);
SET_INFO_RETURN_TYPE(cl_context, CL_CONTEXT_DEVICES, std::vector<cl_device_id>);
SET_INFO_RETURN_TYPE(cl_context, CL_CONTEXT_NUM_DEVICES, cl_uint);
SET_INFO_RETURN_TYPE(cl_context, CL_CONTEXT_REFERENCE_COUNT, cl_uint);
SET_INFO_RETURN_TYPE(cl_context, CL_CONTEXT_PROPERTIES, cl_context_properties);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_ADDRESS_BITS, cl_uint);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_AVAILABLE, cl_bool);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_COMPILER_AVAILABLE, cl_bool);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, cl_uint);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, cl_uint);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_WAVEFRONT_WIDTH_AMD, cl_uint);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_DOUBLE_FP_CONFIG, cl_device_fp_config);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_ENDIAN_LITTLE, cl_bool);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_ERROR_CORRECTION_SUPPORT, cl_bool);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_EXECUTION_CAPABILITIES, cl_device_exec_capabilities);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_EXTENSIONS, std::string);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cl_ulong);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cl_uint);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_GLOBAL_MEM_SIZE, cl_ulong);
//SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_HALF_FP_CONFIG, cl_device_fp_config);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_IMAGE_SUPPORT, cl_bool);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_IMAGE2D_MAX_HEIGHT , size_t);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_IMAGE2D_MAX_WIDTH , size_t);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_IMAGE3D_MAX_DEPTH , size_t);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_IMAGE3D_MAX_HEIGHT , size_t);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_IMAGE3D_MAX_WIDTH , size_t);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_LOCAL_MEM_SIZE, cl_ulong);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_LOCAL_MEM_TYPE, cl_device_local_mem_type);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_CLOCK_FREQUENCY , cl_uint);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_COMPUTE_UNITS , cl_uint); //The minimum value is 1
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_CONSTANT_ARGS , cl_uint); //The minimum value is 8
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE , cl_ulong); //The minimum value is 64 KB
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_MEM_ALLOC_SIZE , cl_ulong); //The minimum value is max (1/4th of CL_DEVICE_GLOBAL_MEM_SIZE, 128*1024*1024)
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_PARAMETER_SIZE , size_t);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_READ_IMAGE_ARGS , cl_uint);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_SAMPLERS , cl_uint);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE , size_t);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS , cl_uint);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_WORK_ITEM_SIZES , std::vector<size_t>);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MAX_WRITE_IMAGE_ARGS , cl_uint);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MEM_BASE_ADDR_ALIGN , cl_uint);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE , cl_uint);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_NAME , std::string);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_PLATFORM , cl_platform_id);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR , cl_uint);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT , cl_uint);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT , cl_uint);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT , cl_uint);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE , cl_uint);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_PROFILE , std::string);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_PROFILING_TIMER_RESOLUTION , size_t);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_QUEUE_PROPERTIES , cl_command_queue_properties);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_SINGLE_FP_CONFIG , cl_device_fp_config);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_TYPE , cl_device_type);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_VENDOR , std::string);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_VENDOR_ID , cl_uint);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DEVICE_VERSION , std::string);
SET_INFO_RETURN_TYPE(cl_device_id, CL_DRIVER_VERSION , std::string);
SET_INFO_RETURN_TYPE(cl_event, CL_PROFILING_COMMAND_QUEUED, cl_ulong);
SET_INFO_RETURN_TYPE(cl_event, CL_PROFILING_COMMAND_SUBMIT, cl_ulong);
SET_INFO_RETURN_TYPE(cl_event, CL_PROFILING_COMMAND_START, cl_ulong);
SET_INFO_RETURN_TYPE(cl_event, CL_PROFILING_COMMAND_END, cl_ulong);
SET_INFO_RETURN_TYPE(cl_kernel,CL_KERNEL_FUNCTION_NAME, std::string);
SET_INFO_RETURN_TYPE(cl_kernel,CL_KERNEL_NUM_ARGS, cl_uint);
SET_INFO_RETURN_TYPE(cl_kernel,CL_KERNEL_REFERENCE_COUNT, cl_uint);
SET_INFO_RETURN_TYPE(cl_kernel,CL_KERNEL_CONTEXT, cl_context);
SET_INFO_RETURN_TYPE(cl_kernel,CL_KERNEL_PROGRAM, cl_program);
SET_INFO_RETURN_TYPE(cl_kernel,CL_KERNEL_WORK_GROUP_SIZE, size_t);
SET_INFO_RETURN_TYPE(cl_kernel,CL_KERNEL_COMPILE_WORK_GROUP_SIZE, std::vector<size_t>);
SET_INFO_RETURN_TYPE(cl_kernel,CL_KERNEL_LOCAL_MEM_SIZE, cl_ulong);
SET_INFO_RETURN_TYPE(cl_kernel,CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, size_t);
SET_INFO_RETURN_TYPE(cl_mem,CL_MEM_TYPE, cl_mem_object_type);
SET_INFO_RETURN_TYPE(cl_mem,CL_MEM_FLAGS, cl_mem_flags);
SET_INFO_RETURN_TYPE(cl_mem,CL_MEM_SIZE, size_t);
SET_INFO_RETURN_TYPE(cl_mem,CL_MEM_HOST_PTR, void*);
SET_INFO_RETURN_TYPE(cl_mem,CL_MEM_MAP_COUNT, cl_uint);
SET_INFO_RETURN_TYPE(cl_mem,CL_MEM_REFERENCE_COUNT, cl_uint);
SET_INFO_RETURN_TYPE(cl_mem,CL_MEM_CONTEXT, cl_context);
SET_INFO_RETURN_TYPE(cl_program,CL_PROGRAM_CONTEXT,cl_context);
SET_INFO_RETURN_TYPE(cl_program,CL_PROGRAM_DEVICES,std::vector<cl_device_id>);
SET_INFO_RETURN_TYPE(cl_program,CL_PROGRAM_NUM_DEVICES,cl_uint);
SET_INFO_RETURN_TYPE(cl_program,CL_PROGRAM_SOURCE,std::string);
SET_INFO_RETURN_TYPE(cl_program,CL_PROGRAM_BINARY_SIZES,std::vector<size_t>);
SET_INFO_RETURN_TYPE(cl_program,CL_PROGRAM_BINARIES,std::vector<unsigned char*>);
//Build
SET_INFO_RETURN_TYPE(cl_program,CL_PROGRAM_BUILD_STATUS, cl_build_status);
SET_INFO_RETURN_TYPE(cl_program,CL_PROGRAM_BUILD_OPTIONS, std::string);
SET_INFO_RETURN_TYPE(cl_program,CL_PROGRAM_BUILD_LOG, std::string);
SET_INFO_RETURN_TYPE(cl_platform_id,CL_PLATFORM_PROFILE, std::string);
SET_INFO_RETURN_TYPE(cl_platform_id,CL_PLATFORM_VERSION, std::string);
SET_INFO_RETURN_TYPE(cl_platform_id,CL_PLATFORM_NAME, std::string);
SET_INFO_RETURN_TYPE(cl_platform_id,CL_PLATFORM_VENDOR, std::string);
SET_INFO_RETURN_TYPE(cl_platform_id,CL_PLATFORM_EXTENSIONS, std::string);
#undef SET_INFO_RETURN_TYPE
/** \endcond */
}
template<cl_device_info param>
typename detail::return_type<cl_device_id, param>::Result info(cl_device_id const & handle){
typedef typename detail::return_type<cl_device_id, param>::Result res_t;
return detail::get_info_impl<res_t>()(handle,param);
}
template<cl_mem_info param>
typename detail::return_type<cl_mem, param>::Result info(cl_mem const & handle){
typedef typename detail::return_type<cl_mem, param>::Result res_t;
return detail::get_info_impl<res_t>()(handle,param);
}
//Program
template<cl_program_info param>
typename detail::return_type<cl_program, param>::Result info(cl_program const & handle){
typedef typename detail::return_type<cl_program, param>::Result res_t;
return detail::get_info_impl<res_t>()(handle,param);
}
template<>
inline typename detail::return_type<cl_program, CL_PROGRAM_BINARIES>::Result info<CL_PROGRAM_BINARIES>(cl_program const & handle)
{
std::vector<unsigned char *> res;
std::vector<size_t> sizes = info<CL_PROGRAM_BINARY_SIZES>(handle);
for(size_t s: sizes)
res.push_back(new unsigned char[s]);
dispatch::clGetProgramInfo(handle, CL_PROGRAM_BINARIES, sizeof(unsigned char**), (void*)res.data(), NULL);
return res;
}
template<cl_program_build_info param>
typename detail::return_type<cl_program, param>::Result info(cl_program const & phandle, cl_device_id const & dhandle){
typedef typename detail::return_type<cl_program, param>::Result res_t;
return detail::get_info_impl<res_t>()(phandle,dhandle,param);
}
//Kernel
template<cl_kernel_info param>
typename detail::return_type<cl_kernel, param>::Result info(cl_kernel const & handle){
typedef typename detail::return_type<cl_kernel, param>::Result res_t;
return detail::get_info_impl<res_t>()(handle,param);
}
template<cl_kernel_work_group_info param>
typename detail::return_type<cl_kernel, param>::Result info(cl_kernel const & khandle, cl_device_id const & dhandle){
typedef typename detail::return_type<cl_kernel, param>::Result res_t;
return detail::get_info_impl<res_t>()(khandle,dhandle,param);
}
//Context
template<cl_context_info param>
typename detail::return_type<cl_context, param>::Result info(cl_context const & handle){
typedef typename detail::return_type<cl_context, param>::Result res_t;
return detail::get_info_impl<res_t>()(handle,param);
}
//Event
template<cl_profiling_info param>
typename detail::return_type<cl_event, param>::Result info(cl_event const & handle){
typedef typename detail::return_type<cl_event, param>::Result res_t;
return detail::get_info_impl<res_t>()(handle,param);
}
//Command queue
template<cl_command_queue_info param>
typename detail::return_type<cl_command_queue, param>::Result info(cl_command_queue const & handle){
typedef typename detail::return_type<cl_command_queue, param>::Result res_t;
return detail::get_info_impl<res_t>()(handle,param);
}
//Plaftform
template<cl_platform_info param>
typename detail::return_type<cl_platform_id, param>::Result info(cl_platform_id const & handle){
typedef typename detail::return_type<cl_platform_id, param>::Result res_t;
return detail::get_info_impl<res_t>()(handle,param);
}
template<class OCL_TYPE, typename detail::info<OCL_TYPE>::type param>
typename detail::return_type<OCL_TYPE, param>::Result info(OCL_TYPE const & handle){
return info(handle.get());
}
template<class OCL_TYPE, class OCL_TYPE_ARG, typename detail::info<OCL_TYPE>::type param>
typename detail::return_type<OCL_TYPE, param>::Result info(OCL_TYPE const & handle, OCL_TYPE_ARG const & arg_handle){
return info(handle.get(), arg_handle.get());
}
}
}
}
#endif // INFOS_HPP

20
include/triton/ir/cfg.h Normal file
View File

@@ -0,0 +1,20 @@
#ifndef TDL_INCLUDE_IR_CFG_H
#define TDL_INCLUDE_IR_CFG_H
#include <vector>
namespace triton{
namespace ir{
class function;
class basic_block;
class cfg {
public:
static std::vector<basic_block *> reverse_post_order(function* fn);
};
}
}
#endif

118
lib/codegen/target.cpp Normal file
View File

@@ -0,0 +1,118 @@
#include "triton/codegen/target.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/IRBuilder.h"
using namespace llvm;
namespace triton{
namespace codegen{
// base
bool target::is_gpu() const {
return is_gpu_;
}
// AMD
void amd_cl_target::set_kernel(IRBuilder<>& builder, LLVMContext &ctx, Module *module, Function* fn) {
fn->setCallingConv(CallingConv::AMDGPU_KERNEL);
}
Instruction* amd_cl_target::add_barrier(Module *module, IRBuilder<>& builder) {
Function *barrier = Intrinsic::getDeclaration(module, Intrinsic::amdgcn_s_barrier);
return builder.CreateCall(barrier, {});
}
Value* amd_cl_target::get_global_offset(Module *module, IRBuilder<>& builder, unsigned stride, unsigned ax) {
static std::array<Intrinsic::ID, 3> ids = {
Intrinsic::amdgcn_workgroup_id_x,
Intrinsic::amdgcn_workgroup_id_y,
Intrinsic::amdgcn_workgroup_id_z
};
Value* get_group_id = Intrinsic::getDeclaration(module, ids[ax]);
Value* group_id = builder.CreateCall(get_group_id, {});
Value* result = builder.CreateMul(builder.getInt32(stride), group_id);
return result;
}
Value* amd_cl_target::get_local_id(Module *module, IRBuilder<>& builder, unsigned ax) {
static std::array<Intrinsic::ID, 3> ids = {
Intrinsic::amdgcn_workitem_id_x,
Intrinsic::amdgcn_workitem_id_y,
Intrinsic::amdgcn_workitem_id_z
};
Function *get_local_id = Intrinsic::getDeclaration(module, ids[ax]);
return builder.CreateCall(get_local_id, {});
}
// NVIDIA
void nvidia_cu_target::set_kernel(IRBuilder<>& builder, LLVMContext &ctx, Module *module, Function* fn){
// set metadata
Metadata *md_args[] = {
ValueAsMetadata::get(fn),
MDString::get(ctx, "kernel"),
ValueAsMetadata::get(builder.getInt32(1))
};
module->getOrInsertNamedMetadata("nvvm.annotations")->addOperand(MDNode::get(ctx, md_args));
}
Instruction* nvidia_cu_target::add_barrier(Module *module, IRBuilder<>& builder) {
Function *barrier = Intrinsic::getDeclaration(module, Intrinsic::nvvm_barrier0);
return builder.CreateCall(barrier, {});
}
Value* nvidia_cu_target::get_global_offset(Module *module, IRBuilder<>& builder, unsigned stride, unsigned ax) {
static std::array<Intrinsic::ID, 3> ids = {
Intrinsic::nvvm_read_ptx_sreg_ctaid_x,
Intrinsic::nvvm_read_ptx_sreg_ctaid_y,
Intrinsic::nvvm_read_ptx_sreg_ctaid_z
};
Value* get_group_id = Intrinsic::getDeclaration(module, ids[ax]);
Value* group_id = builder.CreateCall(get_group_id, {});
Value* result = builder.CreateMul(builder.getInt32(stride), group_id);
return result;
}
Value* nvidia_cu_target::get_local_id(Module *module, IRBuilder<>& builder, unsigned ax) {
static std::array<Intrinsic::ID, 3> ids = {
Intrinsic::nvvm_read_ptx_sreg_tid_x,
Intrinsic::nvvm_read_ptx_sreg_tid_y,
Intrinsic::nvvm_read_ptx_sreg_tid_z
};
Function *get_local_id = Intrinsic::getDeclaration(module, ids[ax]);
return builder.CreateCall(get_local_id, {});
}
// CPU
void cpu_target::set_kernel(IRBuilder<>& builder, LLVMContext &ctx, Module *module, Function* fn) {
// normal cpu functions can be kernels
}
Instruction* cpu_target::add_barrier(Module *module, IRBuilder<>& builder) {
// no barrier on CPU
return (Instruction*)builder.CreateAdd(builder.getInt32(0), builder.getInt32(0));
}
Value* cpu_target::get_global_offset(Module *module, IRBuilder<>& builder, unsigned stride, unsigned ax) {
const Function *fn = builder.GetInsertBlock()->getParent();
size_t num_params = fn->getFunctionType()->getNumParams();
static std::array<const Argument*, 3> ids = {
fn->arg_begin() + num_params - 3,
fn->arg_begin() + num_params - 2,
fn->arg_begin() + num_params - 1
};
Value* result = builder.CreateMul(builder.getInt32(stride), (Argument*)ids[ax]);
return result;
}
Value* cpu_target::get_local_id(Module *module, IRBuilder<>& builder, unsigned ax) {
return builder.getInt32(0);
}
}
}

View File

@@ -6,6 +6,7 @@
#include "triton/ir/function.h"
#include "triton/ir/context_impl.h"
#include "triton/ir/constant.h"
#include "triton/driver/device.h"
#include <cstdlib>
@@ -242,12 +243,11 @@ bool tune::check_constraints(std::map<ir::value *, std::vector<std::string>> &er
errors[i].push_back("for dim " + strk + ": shape (" + to_string(shapes[k]->get_value()) + ")"
" is not a multiple of layout (" + to_string(multiple) + ")");
}
// the number of thread per warp must be 32
int num_threads = 1;
for(size_t k = 0; k < shapes.size(); k++)
num_threads *= params_[i]["mts.d" + to_string(k)]->get_value();
if(num_threads % 64 != 0)
errors[i].push_back("number of threads per block (" + to_string(num_threads) + ") must be multiple of 32");
errors[i].push_back("number of threads per block (" + to_string(num_threads) + ") must be multiple of warp size");
if(num_threads != num_threads_)
errors[i].push_back("Number of threads must be the same for all tiles (" + to_string(num_threads_) + ")");
}

32
lib/ir/cfg.cpp Normal file
View File

@@ -0,0 +1,32 @@
#include "triton/ir/cfg.h"
#include "triton/ir/basic_block.h"
#include "triton/ir/function.h"
#include <stack>
#include <iostream>
namespace triton{
namespace ir{
std::vector<basic_block*> cfg::reverse_post_order(function* fn) {
std::stack<basic_block*> stack;
std::set<basic_block*> visited;
std::vector<basic_block*> result;
// initialize stack
for(ir::basic_block* block: fn->blocks())
if(block->get_predecessors().empty())
stack.push(block);
// DFS
while(!stack.empty()) {
basic_block* current = stack.top();
stack.pop();
result.push_back(current);
visited.insert(current);
for(basic_block* succ: current->get_successors())
if(visited.find(succ) == visited.end())
stack.push(succ);
}
return std::move(result);
}
}
}