[PYTHON] Compiling Triton in Release mode now...
This commit is contained in:
committed by
Philippe Tillet
parent
5d84fde733
commit
073fddffc1
@@ -713,12 +713,10 @@ void generator::visit_atomic_add_inst(ir::atomic_add_inst* add) {
|
|||||||
|
|
||||||
// vector size
|
// vector size
|
||||||
int vector_size = 1;
|
int vector_size = 1;
|
||||||
/*
|
|
||||||
int ld = ptrs->get_order()[0];
|
int ld = ptrs->get_order()[0];
|
||||||
unsigned alignment = alignment_->get(ptr, ld);
|
unsigned alignment = alignment_->get(ptr, ld);
|
||||||
vector_size = gcd(ptrs->axis(ld).contiguous, alignment);
|
vector_size = gcd(ptrs->axis(ld).contiguous, alignment);
|
||||||
vector_size = std::min(vector_size, val->get_type()->get_tile_element_ty()->is_half_ty() ? 2 : 1);
|
vector_size = std::min(vector_size, val->get_type()->get_tile_element_ty()->is_half_ty() ? 2 : 1);
|
||||||
vector_size = 1;
|
|
||||||
|
|
||||||
std::map<unsigned, Value*> packets;
|
std::map<unsigned, Value*> packets;
|
||||||
for_each(val, [&](indices_t idx){
|
for_each(val, [&](indices_t idx){
|
||||||
@@ -729,19 +727,18 @@ void generator::visit_atomic_add_inst(ir::atomic_add_inst* add) {
|
|||||||
packets[id] = UndefValue::get(VectorType::get(in_value->getType(), vector_size));
|
packets[id] = UndefValue::get(VectorType::get(in_value->getType(), vector_size));
|
||||||
packets[id] = builder_->CreateInsertElement(packets.at(id), in_value, linear % vector_size);
|
packets[id] = builder_->CreateInsertElement(packets.at(id), in_value, linear % vector_size);
|
||||||
});
|
});
|
||||||
*/
|
|
||||||
|
|
||||||
for_each(ptr, [&](indices_t idx){
|
for_each(ptr, [&](indices_t idx){
|
||||||
unsigned linear = vals->get_linear_index(idx);
|
unsigned linear = vals->get_linear_index(idx);
|
||||||
unsigned id = linear / vector_size;
|
unsigned id = linear / vector_size;
|
||||||
//if(linear % vector_size != 0)
|
if(linear % vector_size != 0)
|
||||||
// return;
|
return;
|
||||||
// num bytes
|
// num bytes
|
||||||
Value *rmw_ptr = ptrs->get_value(idx);
|
Value *rmw_ptr = ptrs->get_value(idx);
|
||||||
Value *rmw_msk = msks->get_value(idx);
|
Value *rmw_msk = msks->get_value(idx);
|
||||||
Value *rmw_val = vals->get_value(idx);
|
Value *rmw_val = packets[id];
|
||||||
//if(vector_size == 1)
|
if(vector_size == 1)
|
||||||
// rmw_val = builder_->CreateExtractElement(rmw_val, builder_->getInt32(0));
|
rmw_val = builder_->CreateExtractElement(rmw_val, builder_->getInt32(0));
|
||||||
Type* ty = rmw_val->getType();
|
Type* ty = rmw_val->getType();
|
||||||
size_t nbits = ty->getScalarSizeInBits();
|
size_t nbits = ty->getScalarSizeInBits();
|
||||||
// extract pointer offset
|
// extract pointer offset
|
||||||
|
@@ -82,6 +82,7 @@ void host_stream::synchronize() {
|
|||||||
|
|
||||||
void host_stream::enqueue(driver::kernel* kernel, std::array<size_t, 3> grid, std::array<size_t, 3> block, std::vector<event> const *, event* event, void **args, size_t args_size) {
|
void host_stream::enqueue(driver::kernel* kernel, std::array<size_t, 3> grid, std::array<size_t, 3> block, std::vector<event> const *, event* event, void **args, size_t args_size) {
|
||||||
auto hst = kernel->module()->hst();
|
auto hst = kernel->module()->hst();
|
||||||
|
hst_->futures->reserve(hst_->futures->size() + grid[0]*grid[1]*grid[2]);
|
||||||
char* params = new char[args_size];
|
char* params = new char[args_size];
|
||||||
std::memcpy((void*)params, (void*)args, args_size);
|
std::memcpy((void*)params, (void*)args, args_size);
|
||||||
for(size_t i = 0; i < grid[0]; i++)
|
for(size_t i = 0; i < grid[0]; i++)
|
||||||
|
@@ -55,7 +55,7 @@ class CMakeBuild(build_ext):
|
|||||||
self.build_extension(ext)
|
self.build_extension(ext)
|
||||||
|
|
||||||
def build_extension(self, ext):
|
def build_extension(self, ext):
|
||||||
self.debug = True
|
#self.debug = True
|
||||||
extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.path)))
|
extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.path)))
|
||||||
# python directories
|
# python directories
|
||||||
python_include_dirs = distutils.sysconfig.get_python_inc()
|
python_include_dirs = distutils.sysconfig.get_python_inc()
|
||||||
@@ -75,7 +75,7 @@ class CMakeBuild(build_ext):
|
|||||||
'-DLLVM_CONFIG=' + find_llvm()]
|
'-DLLVM_CONFIG=' + find_llvm()]
|
||||||
# configuration
|
# configuration
|
||||||
cfg = 'Debug' if self.debug else 'Release'
|
cfg = 'Debug' if self.debug else 'Release'
|
||||||
cfg = 'Debug'
|
cfg = 'Release'
|
||||||
build_args = ['--config', cfg]
|
build_args = ['--config', cfg]
|
||||||
|
|
||||||
if platform.system() == "Windows":
|
if platform.system() == "Windows":
|
||||||
|
Reference in New Issue
Block a user