[GENERAL] Merged einsum feature branch. Various feature, performance

improvements and bugfixes:

* Added preliminary support for extended Einstein summation in PyTriton
* Significant performance improvement on FP32 kernels containing matrix
multiplication
* Added re-coalescing pass for FP16 kernels containing matrix
multiplication
* Various bugfixes
This commit is contained in:
Philippe Tillet
2020-01-16 12:09:50 -05:00
parent 50a52df489
commit f278d9741a
49 changed files with 1923 additions and 994 deletions

View File

@@ -1083,14 +1083,12 @@ QualType Parser::ParseDeclSpec(int* storageSpec, int* funcSpec, int* alignSpec)
*storageSpec |= S_THREAD;
break;
case Token::AUTO:
EnsureAndSetStorageSpec(tok, storageSpec, S_AUTO);
break;
// Type qualifier
case Token::CONST: qualSpec |= Qualifier::CONST; break;
case Token::RESTRICT: qualSpec |= Qualifier::RESTRICT; break;
case Token::VOLATILE: qualSpec |= Qualifier::VOLATILE; break;
case Token::CMEM: qualSpec |= Qualifier::CMEM; break;
// Type specifier
case Token::SIGNED:
@@ -1551,6 +1549,7 @@ int Parser::ParseQual() {
case Token::CONST: qualSpec |= Qualifier::CONST; break;
case Token::RESTRICT: qualSpec |= Qualifier::RESTRICT; break;
case Token::VOLATILE: qualSpec |= Qualifier::VOLATILE; break;
case Token::CMEM: qualSpec |= Qualifier::CMEM; break;
case Token::ATOMIC: Error(tok, "do not support 'atomic'"); break;
default: ts_.PutBack(); return qualSpec;
}
@@ -1769,6 +1768,7 @@ QualType Parser::ParseArrayFuncDeclarator(const Token* ident, QualType base) {
if (!base->Complete()) {
Error(ident, "'%s' has incomplete element type", ident->str_.c_str());
}
// return a pointer for tiles in constant memory:
return TileType::New(shape, base);
} else if (ts_.Try('(')) { // Function declaration