[PYTHON] Compiling Triton in Release mode now...
This commit is contained in:
committed by
Philippe Tillet
parent
5d84fde733
commit
073fddffc1
@@ -713,12 +713,10 @@ void generator::visit_atomic_add_inst(ir::atomic_add_inst* add) {
|
||||
|
||||
// vector size
|
||||
int vector_size = 1;
|
||||
/*
|
||||
int ld = ptrs->get_order()[0];
|
||||
unsigned alignment = alignment_->get(ptr, ld);
|
||||
vector_size = gcd(ptrs->axis(ld).contiguous, alignment);
|
||||
vector_size = std::min(vector_size, val->get_type()->get_tile_element_ty()->is_half_ty() ? 2 : 1);
|
||||
vector_size = 1;
|
||||
|
||||
std::map<unsigned, Value*> packets;
|
||||
for_each(val, [&](indices_t idx){
|
||||
@@ -729,19 +727,18 @@ void generator::visit_atomic_add_inst(ir::atomic_add_inst* add) {
|
||||
packets[id] = UndefValue::get(VectorType::get(in_value->getType(), vector_size));
|
||||
packets[id] = builder_->CreateInsertElement(packets.at(id), in_value, linear % vector_size);
|
||||
});
|
||||
*/
|
||||
|
||||
for_each(ptr, [&](indices_t idx){
|
||||
unsigned linear = vals->get_linear_index(idx);
|
||||
unsigned id = linear / vector_size;
|
||||
//if(linear % vector_size != 0)
|
||||
// return;
|
||||
if(linear % vector_size != 0)
|
||||
return;
|
||||
// num bytes
|
||||
Value *rmw_ptr = ptrs->get_value(idx);
|
||||
Value *rmw_msk = msks->get_value(idx);
|
||||
Value *rmw_val = vals->get_value(idx);
|
||||
//if(vector_size == 1)
|
||||
// rmw_val = builder_->CreateExtractElement(rmw_val, builder_->getInt32(0));
|
||||
Value *rmw_val = packets[id];
|
||||
if(vector_size == 1)
|
||||
rmw_val = builder_->CreateExtractElement(rmw_val, builder_->getInt32(0));
|
||||
Type* ty = rmw_val->getType();
|
||||
size_t nbits = ty->getScalarSizeInBits();
|
||||
// extract pointer offset
|
||||
|
@@ -82,6 +82,7 @@ void host_stream::synchronize() {
|
||||
|
||||
void host_stream::enqueue(driver::kernel* kernel, std::array<size_t, 3> grid, std::array<size_t, 3> block, std::vector<event> const *, event* event, void **args, size_t args_size) {
|
||||
auto hst = kernel->module()->hst();
|
||||
hst_->futures->reserve(hst_->futures->size() + grid[0]*grid[1]*grid[2]);
|
||||
char* params = new char[args_size];
|
||||
std::memcpy((void*)params, (void*)args, args_size);
|
||||
for(size_t i = 0; i < grid[0]; i++)
|
||||
|
@@ -55,7 +55,7 @@ class CMakeBuild(build_ext):
|
||||
self.build_extension(ext)
|
||||
|
||||
def build_extension(self, ext):
|
||||
self.debug = True
|
||||
#self.debug = True
|
||||
extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.path)))
|
||||
# python directories
|
||||
python_include_dirs = distutils.sysconfig.get_python_inc()
|
||||
@@ -75,7 +75,7 @@ class CMakeBuild(build_ext):
|
||||
'-DLLVM_CONFIG=' + find_llvm()]
|
||||
# configuration
|
||||
cfg = 'Debug' if self.debug else 'Release'
|
||||
cfg = 'Debug'
|
||||
cfg = 'Release'
|
||||
build_args = ['--config', cfg]
|
||||
|
||||
if platform.system() == "Windows":
|
||||
|
Reference in New Issue
Block a user