[PYTHON] Compiling Triton in Release mode now...

This commit is contained in:
Philippe Tillet
2020-11-13 01:44:52 -05:00
committed by Philippe Tillet
parent 5d84fde733
commit 073fddffc1
3 changed files with 8 additions and 10 deletions

View File

@@ -713,12 +713,10 @@ void generator::visit_atomic_add_inst(ir::atomic_add_inst* add) {
// vector size
int vector_size = 1;
/*
int ld = ptrs->get_order()[0];
unsigned alignment = alignment_->get(ptr, ld);
vector_size = gcd(ptrs->axis(ld).contiguous, alignment);
vector_size = std::min(vector_size, val->get_type()->get_tile_element_ty()->is_half_ty() ? 2 : 1);
vector_size = 1;
std::map<unsigned, Value*> packets;
for_each(val, [&](indices_t idx){
@@ -729,19 +727,18 @@ void generator::visit_atomic_add_inst(ir::atomic_add_inst* add) {
packets[id] = UndefValue::get(VectorType::get(in_value->getType(), vector_size));
packets[id] = builder_->CreateInsertElement(packets.at(id), in_value, linear % vector_size);
});
*/
for_each(ptr, [&](indices_t idx){
unsigned linear = vals->get_linear_index(idx);
unsigned id = linear / vector_size;
//if(linear % vector_size != 0)
// return;
if(linear % vector_size != 0)
return;
// num bytes
Value *rmw_ptr = ptrs->get_value(idx);
Value *rmw_msk = msks->get_value(idx);
Value *rmw_val = vals->get_value(idx);
//if(vector_size == 1)
// rmw_val = builder_->CreateExtractElement(rmw_val, builder_->getInt32(0));
Value *rmw_val = packets[id];
if(vector_size == 1)
rmw_val = builder_->CreateExtractElement(rmw_val, builder_->getInt32(0));
Type* ty = rmw_val->getType();
size_t nbits = ty->getScalarSizeInBits();
// extract pointer offset

View File

@@ -82,6 +82,7 @@ void host_stream::synchronize() {
void host_stream::enqueue(driver::kernel* kernel, std::array<size_t, 3> grid, std::array<size_t, 3> block, std::vector<event> const *, event* event, void **args, size_t args_size) {
auto hst = kernel->module()->hst();
hst_->futures->reserve(hst_->futures->size() + grid[0]*grid[1]*grid[2]);
char* params = new char[args_size];
std::memcpy((void*)params, (void*)args, args_size);
for(size_t i = 0; i < grid[0]; i++)

View File

@@ -55,7 +55,7 @@ class CMakeBuild(build_ext):
self.build_extension(ext)
def build_extension(self, ext):
self.debug = True
#self.debug = True
extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.path)))
# python directories
python_include_dirs = distutils.sysconfig.get_python_inc()
@@ -75,7 +75,7 @@ class CMakeBuild(build_ext):
'-DLLVM_CONFIG=' + find_llvm()]
# configuration
cfg = 'Debug' if self.debug else 'Release'
cfg = 'Debug'
cfg = 'Release'
build_args = ['--config', cfg]
if platform.system() == "Windows":