[GENERAL] Merged einsum feature branch. Various feature, performance
improvements and bugfixes: * Added preliminary support for extended Einstein summation in PyTriton * Significant performance improvement on FP32 kernels containing matrix multiplication * Added re-coalescing pass for FP16 kernels containing matrix multiplication * Various bugfixes
This commit is contained in:
@@ -1083,14 +1083,12 @@ QualType Parser::ParseDeclSpec(int* storageSpec, int* funcSpec, int* alignSpec)
|
||||
*storageSpec |= S_THREAD;
|
||||
break;
|
||||
|
||||
case Token::AUTO:
|
||||
EnsureAndSetStorageSpec(tok, storageSpec, S_AUTO);
|
||||
break;
|
||||
|
||||
// Type qualifier
|
||||
case Token::CONST: qualSpec |= Qualifier::CONST; break;
|
||||
case Token::RESTRICT: qualSpec |= Qualifier::RESTRICT; break;
|
||||
case Token::VOLATILE: qualSpec |= Qualifier::VOLATILE; break;
|
||||
case Token::CMEM: qualSpec |= Qualifier::CMEM; break;
|
||||
|
||||
// Type specifier
|
||||
case Token::SIGNED:
|
||||
@@ -1551,6 +1549,7 @@ int Parser::ParseQual() {
|
||||
case Token::CONST: qualSpec |= Qualifier::CONST; break;
|
||||
case Token::RESTRICT: qualSpec |= Qualifier::RESTRICT; break;
|
||||
case Token::VOLATILE: qualSpec |= Qualifier::VOLATILE; break;
|
||||
case Token::CMEM: qualSpec |= Qualifier::CMEM; break;
|
||||
case Token::ATOMIC: Error(tok, "do not support 'atomic'"); break;
|
||||
default: ts_.PutBack(); return qualSpec;
|
||||
}
|
||||
@@ -1769,6 +1768,7 @@ QualType Parser::ParseArrayFuncDeclarator(const Token* ident, QualType base) {
|
||||
if (!base->Complete()) {
|
||||
Error(ident, "'%s' has incomplete element type", ident->str_.c_str());
|
||||
}
|
||||
// return a pointer for tiles in constant memory:
|
||||
return TileType::New(shape, base);
|
||||
|
||||
} else if (ts_.Try('(')) { // Function declaration
|
||||
|
Reference in New Issue
Block a user