[BUILD] Fix compilation problems in the release build (#897)

This commit is contained in:
Keren Zhou
2022-11-20 21:40:36 -08:00
committed by GitHub
parent 23f71daa27
commit 85cccfb81f
3 changed files with 21 additions and 16 deletions

View File

@@ -1438,9 +1438,10 @@ struct BroadcastOpConversion
SmallVector<int64_t> resultLogicalShape(2 * rank);
SmallVector<unsigned> broadcastDims;
for (unsigned d = 0; d < rank; ++d) {
unsigned resultShapePerCTA = triton::gpu::getSizePerThread(resultLayout)[d] *
triton::gpu::getThreadsPerWarp(resultLayout)[d] *
triton::gpu::getWarpsPerCTA(resultLayout)[d];
unsigned resultShapePerCTA =
triton::gpu::getSizePerThread(resultLayout)[d] *
triton::gpu::getThreadsPerWarp(resultLayout)[d] *
triton::gpu::getWarpsPerCTA(resultLayout)[d];
int64_t numCtas = ceil<unsigned>(resultShape[d], resultShapePerCTA);
if (srcShape[d] != resultShape[d]) {
assert(srcShape[d] == 1);
@@ -1450,10 +1451,12 @@ struct BroadcastOpConversion
std::max<unsigned>(1, triton::gpu::getSizePerThread(srcLayout)[d]);
} else {
srcLogicalShape[d] = numCtas;
srcLogicalShape[d + rank] = triton::gpu::getSizePerThread(resultLayout)[d];
srcLogicalShape[d + rank] =
triton::gpu::getSizePerThread(resultLayout)[d];
}
resultLogicalShape[d] = numCtas;
resultLogicalShape[d + rank] = triton::gpu::getSizePerThread(resultLayout)[d];
resultLogicalShape[d + rank] =
triton::gpu::getSizePerThread(resultLayout)[d];
srcLogicalOrder[d] = order[d] + rank;
srcLogicalOrder[d + rank] = order[d];
@@ -1968,6 +1971,7 @@ struct PrintfOpConversion
return "%u";
}
assert(false && "not supported type");
return "";
}
// declare vprintf(i8*, i8*) as external function
@@ -5482,6 +5486,7 @@ Value convertSplatLikeOpWithMmaLayout(const MmaEncodingAttr &layout,
}
assert(false && "Unsupported mma layout found");
return {};
}
class TritonGPUToLLVMTypeConverter : public LLVMTypeConverter {