GEMM: Safer bounds checking for K
This commit is contained in:
@@ -171,7 +171,6 @@ gemm_parameters::gemm_parameters(unsigned int simd_width
|
|||||||
stream << _size_t << " idt;" << std::endl;
|
stream << _size_t << " idt;" << std::endl;
|
||||||
if(has_depth)
|
if(has_depth)
|
||||||
stream << _size_t << " gidz, div, offz;" << std::endl;
|
stream << _size_t << " gidz, div, offz;" << std::endl;
|
||||||
stream << "int Ky, Kx;" << std::endl;
|
|
||||||
|
|
||||||
stream << "A += offa;" << std::endl;
|
stream << "A += offa;" << std::endl;
|
||||||
stream << "B += offb;" << std::endl;
|
stream << "B += offb;" << std::endl;
|
||||||
@@ -200,7 +199,16 @@ gemm_parameters::gemm_parameters(unsigned int simd_width
|
|||||||
stream << "idT.x *= " << p_.simd_width << ";" << std::endl;
|
stream << "idT.x *= " << p_.simd_width << ";" << std::endl;
|
||||||
|
|
||||||
stream << "M -= ids.x;" << std::endl;
|
stream << "M -= ids.x;" << std::endl;
|
||||||
|
if(A_trans_=='N')
|
||||||
|
stream << "M -= idT.x;" << std::endl;
|
||||||
|
else
|
||||||
|
stream << "M -= idT.y;" << std::endl;
|
||||||
|
|
||||||
stream << "N -= ids.y;" << std::endl;
|
stream << "N -= ids.y;" << std::endl;
|
||||||
|
if(B_trans_=='T')
|
||||||
|
stream << "N -= idT.x;" << std::endl;
|
||||||
|
else
|
||||||
|
stream << "N -= idT.y;" << std::endl;
|
||||||
|
|
||||||
if (A_trans_=='N')
|
if (A_trans_=='N')
|
||||||
{
|
{
|
||||||
@@ -247,206 +255,230 @@ gemm_parameters::gemm_parameters(unsigned int simd_width
|
|||||||
|
|
||||||
for(unsigned int i = 0 ; i < npA ; i++ )
|
for(unsigned int i = 0 ; i < npA ; i++ )
|
||||||
if (A_trans_=='N')
|
if (A_trans_=='N')
|
||||||
stream << "if(idT.x + " << i*p_.local_fetch_0*p_.simd_width << " < M) Ai[" << i << "] += (idT.x + " << i*p_.local_fetch_0*p_.simd_width << ")" << ASTRIDE1 << ";" << std::endl;
|
stream << "if( " << i*p_.local_fetch_0*p_.simd_width << " < M) Ai[" << i << "] += (idT.x + " << i*p_.local_fetch_0*p_.simd_width << ")" << ASTRIDE1 << ";" << std::endl;
|
||||||
else
|
else
|
||||||
stream << "if(idT.y + " << i*p_.local_fetch_1 << " < M) Ai[" << i << "] += (idT.y + " << i*p_.local_fetch_1 << ")*lda;" << std::endl;
|
stream << "if(" << i*p_.local_fetch_1 << " < M) Ai[" << i << "] += (idT.y + " << i*p_.local_fetch_1 << ")*lda;" << std::endl;
|
||||||
|
|
||||||
for(unsigned int i = 0 ; i < npB ; i++ )
|
for(unsigned int i = 0 ; i < npB ; i++ )
|
||||||
if (B_trans_=='T')
|
if (B_trans_=='T')
|
||||||
stream << "if(idT.x + " << i*p_.local_fetch_0*p_.simd_width << " < N) Bi[" << i << "] += (idT.x + " << i*p_.local_fetch_0*p_.simd_width << ")" << BSTRIDE1 << ";" << std::endl;
|
stream << "if(" << i*p_.local_fetch_0*p_.simd_width << " < N) Bi[" << i << "] += (idT.x + " << i*p_.local_fetch_0*p_.simd_width << ")" << BSTRIDE1 << ";" << std::endl;
|
||||||
else
|
else
|
||||||
stream << "if(idT.y + " << i*p_.local_fetch_1 << " < N) Bi[" << i << "] += (idT.y + " << i*p_.local_fetch_1 << ")*ldb;" << std::endl;
|
stream << "if(" << i*p_.local_fetch_1 << " < N) Bi[" << i << "] += (idT.y + " << i*p_.local_fetch_1 << ")*ldb;" << std::endl;
|
||||||
|
|
||||||
stream << "storeA = lA + idT.y*" << llda << " + idT.x;" << std::endl;
|
stream << "storeA = lA + idT.y*" << llda << " + idT.x;" << std::endl;
|
||||||
stream << "storeB = lB + idT.y*" << lldb << " + idT.x;" << std::endl;
|
stream << "storeB = lB + idT.y*" << lldb << " + idT.x;" << std::endl;
|
||||||
|
|
||||||
if(A_trans_=='N' || B_trans_=='T')
|
|
||||||
stream << "Ky = K - idT.y;" << std::endl;
|
|
||||||
if(A_trans_=='T' || B_trans_=='N')
|
|
||||||
stream << "Kx = K - idT.x;" << std::endl;
|
|
||||||
|
|
||||||
stream << "//Outer loop" << std::endl;
|
stream << "//Outer loop" << std::endl;
|
||||||
stream << "while(K > 0){" << std::endl;
|
stream << "while(K >=" << p_.kL << "){" << std::endl;
|
||||||
stream.inc_tab();
|
stream.inc_tab();
|
||||||
stream << LocalBarrier(backend) << ";" << std::endl;
|
|
||||||
|
|
||||||
|
auto fetch_to_lds = [&](bool last_iteration)
|
||||||
|
{
|
||||||
|
|
||||||
|
stream << LocalBarrier(backend) << ";" << std::endl;
|
||||||
|
|
||||||
|
stream << "//Fetch A to local memory" << std::endl;
|
||||||
|
if (A_trans_=='N')
|
||||||
|
{
|
||||||
|
for(unsigned int k = 0; k < p_.kL; k += p_.local_fetch_1)
|
||||||
|
for(unsigned int m = 0; m < p_.mL; m += p_.local_fetch_0*p_.simd_width)
|
||||||
|
{
|
||||||
|
std::string mm = to_string(m/(p_.simd_width*p_.local_fetch_0));
|
||||||
|
std::string kk = to_string(k);
|
||||||
|
if(last_iteration)
|
||||||
|
for(unsigned int s = 0 ; s < p_.simd_width ; ++s)
|
||||||
|
stream << "storeA[" << k*llda + m + s << "] = (condy" << k << " && " << s << "< M)? Ai[" << mm << "][" << k << "*lda + " << s << "] : 0;" << std::endl;
|
||||||
|
else
|
||||||
|
stream << VSTORE(VLOAD("0" ,"&Ai[" + mm +"][" + kk + "*lda]"), "0", "storeA + " + to_string(k*llda+m)) << ";" << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for(unsigned int k = 0; k < p_.kL; k += p_.local_fetch_0*p_.simd_width)
|
||||||
|
for(unsigned int m = 0; m < p_.mL; m += p_.local_fetch_1)
|
||||||
|
{
|
||||||
|
std::string mm = to_string(m/p_.local_fetch_1);
|
||||||
|
std::string kk = to_string(k);
|
||||||
|
if(last_iteration)
|
||||||
|
for(unsigned int s = 0 ; s < p_.simd_width ; ++s)
|
||||||
|
stream << "storeA[" << m*llda + k + s << "] = condx" << k + s << "? Ai[" << mm << "][" << k + s << ASTRIDE1 << "] : 0;" << std::endl;
|
||||||
|
|
||||||
|
else
|
||||||
|
stream << VSTORE(VLOAD("0", "&Ai[" + mm + "][" + kk + ASTRIDE1 + "]"), "0", "storeA + " + to_string(m*llda+k)) << ";" << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
stream << "//Fetch B to local memory" << std::endl;
|
||||||
|
if (B_trans_=='T')
|
||||||
|
{
|
||||||
|
for(unsigned int k = 0; k < p_.kL; k += p_.local_fetch_1)
|
||||||
|
for(unsigned int n = 0; n < p_.nL; n += p_.local_fetch_0*p_.simd_width)
|
||||||
|
{
|
||||||
|
std::string nn = to_string(n/(p_.simd_width*p_.local_fetch_0));
|
||||||
|
std::string kk = to_string(k);
|
||||||
|
if(last_iteration)
|
||||||
|
for(unsigned int s = 0 ; s < p_.simd_width ; ++s)
|
||||||
|
stream << "storeB[" << k*lldb + n + s << "] = (condy" << k << " && " << s << "< N)? Bi[" << nn << "][" << kk << "*ldb +" << s << "] : 0;" << std::endl;
|
||||||
|
else
|
||||||
|
stream << VSTORE(VLOAD("0" ,"&Bi[" + nn +"][" + kk + "*ldb]"), "0", "storeB + " + to_string(k*lldb+n)) << ";" << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for(unsigned int k = 0; k < p_.kL; k += p_.local_fetch_0*p_.simd_width)
|
||||||
|
for(unsigned int n = 0; n < p_.nL; n += p_.local_fetch_1)
|
||||||
|
{
|
||||||
|
std::string nn = to_string(n/p_.local_fetch_1);
|
||||||
|
std::string kk = to_string(k);
|
||||||
|
if(last_iteration)
|
||||||
|
for(unsigned int s = 0 ; s < p_.simd_width ; ++s)
|
||||||
|
stream << "storeB[" << n*lldb + k + s << "] = condx" << k + s << "? Bi[" << nn << "][" << k + s << BSTRIDE1 << "] : 0;" << std::endl;
|
||||||
|
|
||||||
|
else
|
||||||
|
stream << VSTORE(VLOAD("0", "&Bi[" + nn + "][" + kk + BSTRIDE1 + "]"), "0", "storeB + " + to_string(n*lldb+k)) << ";" << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(A_trans_=='N')
|
||||||
|
stream << "readA = lA + ids.z*" << p_.simd_width << ";" << std::endl;
|
||||||
|
else
|
||||||
|
stream << "readA = lA + ids.z*" << llda*p_.simd_width << ";" << std::endl;
|
||||||
|
|
||||||
|
if(B_trans_=='T')
|
||||||
|
stream << "readB = lB + ids.w*" << p_.simd_width << ";" << std::endl;
|
||||||
|
else
|
||||||
|
stream << "readB = lB + ids.w*" << lldb*p_.simd_width << ";" << std::endl;
|
||||||
|
|
||||||
|
stream << LocalBarrier(backend) << ";" << std::endl;
|
||||||
|
|
||||||
|
stream << "//Inner loop" << std::endl;
|
||||||
|
stream << "for(unsigned int k = 0; k < " << p_.kL << "; k+=" << p_.kS << "){" << std::endl;
|
||||||
|
stream.inc_tab();
|
||||||
|
|
||||||
|
stream << "//Fetch A to registers" << std::endl;
|
||||||
|
stream << "#pragma unroll" << std::endl;
|
||||||
|
stream << "for(unsigned int kk = 0; kk < " << p_.kS << "; kk++)" << std::endl;
|
||||||
|
stream << "#pragma unroll " << p_.mS/p_.simd_width << std::endl;
|
||||||
|
stream << "for(unsigned int mm = 0; mm < " << p_.mS/p_.simd_width << "; mm++)" << std::endl;
|
||||||
|
stream << "{" << std::endl;
|
||||||
|
stream.inc_tab();
|
||||||
|
if(A_trans_=='N')
|
||||||
|
stream << "rA[kk][mm] = " << VLOAD("0", "readA + k*" + to_string(llda) + " + mm*" + to_string(p_.local_size_0*p_.simd_width) + "+ kk*" + to_string(llda)) << ";" << std::endl;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if(p_.simd_width==1)
|
||||||
|
stream << "rA[kk][mm] = readA[k + mm*" << p_.local_size_0*llda << "+ kk" << "];" << std::endl;
|
||||||
|
else
|
||||||
|
for(unsigned int s = 0 ; s < p_.simd_width ; ++s)
|
||||||
|
stream << access_vector_type("rA[kk][mm]", s) << " = readA[k + (mm*" << p_.simd_width*p_.local_size_0 << " + " << s << ")*" << llda << "+ kk];" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
stream.dec_tab();
|
||||||
|
stream << "}" << std::endl;
|
||||||
|
|
||||||
|
stream << "//Fetch B to registers" << std::endl;
|
||||||
|
stream << "#pragma unroll " << p_.kS << std::endl;
|
||||||
|
stream << "for(unsigned int kk = 0; kk < " << p_.kS << "; kk++)" << std::endl;
|
||||||
|
stream << "#pragma unroll " << p_.nS/p_.simd_width << std::endl;
|
||||||
|
stream << "for(unsigned int nn = 0; nn < " << p_.nS/p_.simd_width << "; nn++)" << std::endl;
|
||||||
|
stream << "{" << std::endl;
|
||||||
|
stream.inc_tab();
|
||||||
|
if(B_trans_=='T')
|
||||||
|
stream << "rB[kk][nn] = " << VLOAD("0", "readB + k*" + to_string(lldb) + " + nn*" + to_string(p_.local_size_1*p_.simd_width) + "+ kk*" + to_string(lldb)) << ";" << std::endl;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if(p_.simd_width==1)
|
||||||
|
stream << "rB[kk][nn] = readB[k" << " + nn*" << p_.local_size_1*lldb << "+ kk" << "];" << std::endl;
|
||||||
|
else
|
||||||
|
for(unsigned int s = 0 ; s < p_.simd_width ; ++s)
|
||||||
|
stream << access_vector_type("rB[kk][nn]", s) << " = readB[k" << " + (nn*" << p_.simd_width*p_.local_size_1 << " + " << s << ")*" << lldb << "+ kk];" << std::endl;
|
||||||
|
}
|
||||||
|
stream.dec_tab();
|
||||||
|
stream << "}" << std::endl;
|
||||||
|
|
||||||
|
stream << "//FMA computations" << std::endl;
|
||||||
|
for(unsigned int kk=0 ; kk < p_.kS; ++kk)
|
||||||
|
for(unsigned int nn=0; nn < p_.nS; ++nn)
|
||||||
|
for(unsigned int mm=0; mm < p_.mS; ++mm)
|
||||||
|
{
|
||||||
|
string res_str, lhs_str, rhs_str;
|
||||||
|
res_str = "rC[" + to_string(mm) + "][" + to_string(nn) + "]";
|
||||||
|
if (p_.simd_width==1)
|
||||||
|
lhs_str = "rA[" + to_string(kk) + "][" + to_string(mm) + "]";
|
||||||
|
else
|
||||||
|
lhs_str = access_vector_type("rA[" + to_string(kk) + "][" + to_string(mm/p_.simd_width) + "]", mm%p_.simd_width);
|
||||||
|
if (p_.simd_width==1)
|
||||||
|
rhs_str = "rB[" + to_string(kk) + "]["+to_string(nn)+"]";
|
||||||
|
else
|
||||||
|
rhs_str = access_vector_type("rB[" + to_string(kk) + "]["+to_string(nn/p_.simd_width)+"]", nn%p_.simd_width);
|
||||||
|
stream << res_str << "=" << "fma(" << lhs_str << "," << rhs_str << "," << res_str << ");" << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
stream.dec_tab();
|
||||||
|
stream << "}" << std::endl;
|
||||||
|
|
||||||
|
stream << "K -= " << p_.kL << ";" << std::endl;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//Increment A pointers to global memory
|
||||||
|
if (A_trans_=='N')
|
||||||
|
for(unsigned int i = 0 ; i < npA ; ++i)
|
||||||
|
stream << "Ai[" << i << "] += " << p_.kL << "*lda;" << std::endl;
|
||||||
|
else
|
||||||
|
for(unsigned int i = 0 ; i < npA ; ++i)
|
||||||
|
stream << "Ai[" << i << "] += " << p_.kL << ASTRIDE1 << ";" << std::endl;
|
||||||
|
|
||||||
|
//Increment B pointers to global memory
|
||||||
|
if (B_trans_=='T')
|
||||||
|
for(unsigned int i = 0 ; i < npB ; ++i)
|
||||||
|
stream << "Bi[" << i << "] += " << p_.kL << "*ldb;" << std::endl;
|
||||||
|
else
|
||||||
|
for(unsigned int i = 0 ; i < npB ; ++i)
|
||||||
|
stream << "Bi[" << i << "] += " << p_.kL << BSTRIDE1 << ";" << std::endl;
|
||||||
|
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
fetch_to_lds(false);
|
||||||
|
|
||||||
|
|
||||||
|
stream.dec_tab();
|
||||||
|
stream << "}" << std::endl;
|
||||||
|
|
||||||
|
|
||||||
if(A_trans_=='N' || B_trans_=='T')
|
if(A_trans_=='N' || B_trans_=='T')
|
||||||
{
|
stream << "int Ky = K - idT.y;" << std::endl;
|
||||||
|
if(A_trans_=='T' || B_trans_=='N')
|
||||||
|
stream << "int Kx = K - idT.x;" << std::endl;
|
||||||
|
|
||||||
|
if(A_trans_=='N' || B_trans_=='T')
|
||||||
for(unsigned int k = 0; k < p_.kL; k += p_.local_fetch_1)
|
for(unsigned int k = 0; k < p_.kL; k += p_.local_fetch_1)
|
||||||
stream << vint << " condy" << k << " = (" << vint << ")(" << k << ") < Ky;" << std::endl;
|
stream << "int condy" << k << " = " << k << " < Ky;" << std::endl;
|
||||||
}
|
|
||||||
|
|
||||||
if(A_trans_=='T' || B_trans_=='N')
|
if(A_trans_=='T' || B_trans_=='N')
|
||||||
{
|
{
|
||||||
for(unsigned int k = 0 ; k < p_.kL ; k += p_.local_fetch_0*p_.simd_width)
|
for(unsigned int k = 0 ; k < p_.kL ; k += p_.local_fetch_0*p_.simd_width)
|
||||||
{
|
|
||||||
stream << vint << " condx" << k << " = (" << vint << ")(";
|
|
||||||
for(unsigned int s = 0 ; s < p_.simd_width ; ++s)
|
for(unsigned int s = 0 ; s < p_.simd_width ; ++s)
|
||||||
stream << (s>0?",":"") << k + s;
|
stream << "int condx" << k + s << " = " << k + s << " < Kx;" << std::endl;
|
||||||
stream << ") < Kx;" << std::endl;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
stream << "//Fetch A to local memory" << std::endl;
|
fetch_to_lds(true);
|
||||||
if (A_trans_=='N')
|
|
||||||
{
|
|
||||||
for(unsigned int k = 0; k < p_.kL; k += p_.local_fetch_1)
|
|
||||||
for(unsigned int m = 0; m < p_.mL; m += p_.local_fetch_0*p_.simd_width)
|
|
||||||
{
|
|
||||||
std::string mm = to_string(m/(p_.simd_width*p_.local_fetch_0));
|
|
||||||
std::string kk = to_string(k);
|
|
||||||
string to_load = VLOAD("0" ,"&Ai[" + mm +"][" + kk + "*lda]");
|
|
||||||
to_load = "(" + kk + " < Ky)?select((" + vdtype + ")0, " + to_load + ", condy" + kk + "):0";
|
|
||||||
stream << VSTORE(to_load, "0", "storeA + " + to_string(k*llda+m)) << ";" << std::endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for(unsigned int k = 0; k < p_.kL; k += p_.local_fetch_0*p_.simd_width)
|
|
||||||
for(unsigned int m = 0; m < p_.mL; m += p_.local_fetch_1)
|
|
||||||
{
|
|
||||||
std::string mm = to_string(m/p_.local_fetch_1);
|
|
||||||
std::string kk = to_string(k);
|
|
||||||
string to_load = VLOAD("0", "&Ai[" + mm + "][" + kk + ASTRIDE1 + "]");
|
|
||||||
to_load = "(" + kk + " < Kx)?select((" + vdtype + ")0, " + to_load + ", condx" + kk + "):0";
|
|
||||||
stream << VSTORE(to_load, "0", "storeA + " + to_string(m*llda+k)) << ";" << std::endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
stream << "//Fetch B to local memory" << std::endl;
|
|
||||||
if (B_trans_=='T')
|
|
||||||
{
|
|
||||||
for(unsigned int k = 0; k < p_.kL; k += p_.local_fetch_1)
|
|
||||||
for(unsigned int n = 0; n < p_.nL; n += p_.local_fetch_0*p_.simd_width)
|
|
||||||
{
|
|
||||||
std::string nn = to_string(n/(p_.simd_width*p_.local_fetch_0));
|
|
||||||
std::string kk = to_string(k);
|
|
||||||
string to_load = VLOAD("0", "&Bi[" + nn + "][" + kk + "*ldb]");
|
|
||||||
to_load = "(" + kk + " < Ky)?select((" + vdtype + ")0, " + to_load + ", condy" + kk + "):0";
|
|
||||||
stream << VSTORE(to_load, "0", "storeB + " + to_string(k*lldb+n)) << ";" << std::endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for(unsigned int k = 0; k < p_.kL; k += p_.local_fetch_0*p_.simd_width)
|
|
||||||
for(unsigned int n = 0; n < p_.nL; n += p_.local_fetch_1)
|
|
||||||
{
|
|
||||||
std::string nn = to_string(n/p_.local_fetch_1);
|
|
||||||
std::string kk = to_string(k);
|
|
||||||
string to_load = VLOAD("0", "&Bi[" + nn + "][" + kk + BSTRIDE1 + "]");
|
|
||||||
to_load = "(" + kk + " < Kx)?select((" + vdtype + ")0, " + to_load + ", condx" + kk + "):0";
|
|
||||||
stream << VSTORE(to_load, "0", "storeB + " + to_string(n*lldb+k)) << ";" << std::endl;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if(A_trans_=='N')
|
|
||||||
stream << "readA = lA + ids.z*" << p_.simd_width << ";" << std::endl;
|
|
||||||
else
|
|
||||||
stream << "readA = lA + ids.z*" << llda*p_.simd_width << ";" << std::endl;
|
|
||||||
|
|
||||||
if(B_trans_=='T')
|
|
||||||
stream << "readB = lB + ids.w*" << p_.simd_width << ";" << std::endl;
|
|
||||||
else
|
|
||||||
stream << "readB = lB + ids.w*" << lldb*p_.simd_width << ";" << std::endl;
|
|
||||||
|
|
||||||
stream << LocalBarrier(backend) << ";" << std::endl;
|
|
||||||
|
|
||||||
|
|
||||||
stream << "//Inner loop" << std::endl;
|
|
||||||
stream << "for(unsigned int k = 0; k < " << p_.kL << "; k+=" << p_.kS << "){" << std::endl;
|
|
||||||
stream.inc_tab();
|
|
||||||
|
|
||||||
stream << "//Fetch A to registers" << std::endl;
|
|
||||||
stream << "#pragma unroll" << std::endl;
|
|
||||||
stream << "for(unsigned int kk = 0; kk < " << p_.kS << "; kk++)" << std::endl;
|
|
||||||
stream << "#pragma unroll " << p_.mS/p_.simd_width << std::endl;
|
|
||||||
stream << "for(unsigned int mm = 0; mm < " << p_.mS/p_.simd_width << "; mm++)" << std::endl;
|
|
||||||
stream << "{" << std::endl;
|
|
||||||
stream.inc_tab();
|
|
||||||
if(A_trans_=='N')
|
|
||||||
stream << "rA[kk][mm] = " << VLOAD("0", "readA + k*" + to_string(llda) + " + mm*" + to_string(p_.local_size_0*p_.simd_width) + "+ kk*" + to_string(llda)) << ";" << std::endl;
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if(p_.simd_width==1)
|
|
||||||
stream << "rA[kk][mm] = readA[k + mm*" << p_.local_size_0*llda << "+ kk" << "];" << std::endl;
|
|
||||||
else
|
|
||||||
for(unsigned int s = 0 ; s < p_.simd_width ; ++s)
|
|
||||||
stream << access_vector_type("rA[kk][mm]", s) << " = readA[k + (mm*" << p_.simd_width*p_.local_size_0 << " + " << s << ")*" << llda << "+ kk];" << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
stream.dec_tab();
|
|
||||||
stream << "}" << std::endl;
|
|
||||||
|
|
||||||
stream << "//Fetch B to registers" << std::endl;
|
|
||||||
stream << "#pragma unroll " << p_.kS << std::endl;
|
|
||||||
stream << "for(unsigned int kk = 0; kk < " << p_.kS << "; kk++)" << std::endl;
|
|
||||||
stream << "#pragma unroll " << p_.nS/p_.simd_width << std::endl;
|
|
||||||
stream << "for(unsigned int nn = 0; nn < " << p_.nS/p_.simd_width << "; nn++)" << std::endl;
|
|
||||||
stream << "{" << std::endl;
|
|
||||||
stream.inc_tab();
|
|
||||||
if(B_trans_=='T')
|
|
||||||
stream << "rB[kk][nn] = " << VLOAD("0", "readB + k*" + to_string(lldb) + " + nn*" + to_string(p_.local_size_1*p_.simd_width) + "+ kk*" + to_string(lldb)) << ";" << std::endl;
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if(p_.simd_width==1)
|
|
||||||
stream << "rB[kk][nn] = readB[k" << " + nn*" << p_.local_size_1*lldb << "+ kk" << "];" << std::endl;
|
|
||||||
else
|
|
||||||
for(unsigned int s = 0 ; s < p_.simd_width ; ++s)
|
|
||||||
stream << access_vector_type("rB[kk][nn]", s) << " = readB[k" << " + (nn*" << p_.simd_width*p_.local_size_1 << " + " << s << ")*" << lldb << "+ kk];" << std::endl;
|
|
||||||
}
|
|
||||||
stream.dec_tab();
|
|
||||||
stream << "}" << std::endl;
|
|
||||||
|
|
||||||
stream << "//FMA computations" << std::endl;
|
|
||||||
for(unsigned int kk=0 ; kk < p_.kS; ++kk)
|
|
||||||
for(unsigned int nn=0; nn < p_.nS; ++nn)
|
|
||||||
for(unsigned int mm=0; mm < p_.mS; ++mm)
|
|
||||||
{
|
|
||||||
string res_str, lhs_str, rhs_str;
|
|
||||||
res_str = "rC[" + to_string(mm) + "][" + to_string(nn) + "]";
|
|
||||||
if (p_.simd_width==1)
|
|
||||||
lhs_str = "rA[" + to_string(kk) + "][" + to_string(mm) + "]";
|
|
||||||
else
|
|
||||||
lhs_str = access_vector_type("rA[" + to_string(kk) + "][" + to_string(mm/p_.simd_width) + "]", mm%p_.simd_width);
|
|
||||||
if (p_.simd_width==1)
|
|
||||||
rhs_str = "rB[" + to_string(kk) + "]["+to_string(nn)+"]";
|
|
||||||
else
|
|
||||||
rhs_str = access_vector_type("rB[" + to_string(kk) + "]["+to_string(nn/p_.simd_width)+"]", nn%p_.simd_width);
|
|
||||||
stream << res_str << "=" << "fma(" << lhs_str << "," << rhs_str << "," << res_str << ");" << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
stream.dec_tab();
|
|
||||||
stream << "}" << std::endl;
|
|
||||||
|
|
||||||
|
|
||||||
stream << "K -= " << p_.kL << ";" << std::endl;
|
|
||||||
if(A_trans_=='N' || B_trans_=='T')
|
|
||||||
stream << "Ky -= " << p_.kL << ";" << std::endl;
|
|
||||||
if(A_trans_=='T' || B_trans_=='N')
|
|
||||||
stream << "Kx -= " << p_.kL << ";" << std::endl;
|
|
||||||
|
|
||||||
//Increment A pointers to global memory
|
|
||||||
if (A_trans_=='N')
|
|
||||||
for(unsigned int i = 0 ; i < npA ; ++i)
|
|
||||||
stream << "Ai[" << i << "] += " << p_.kL << "*lda;" << std::endl;
|
|
||||||
else
|
|
||||||
for(unsigned int i = 0 ; i < npA ; ++i)
|
|
||||||
stream << "Ai[" << i << "] += " << p_.kL << ASTRIDE1 << ";" << std::endl;
|
|
||||||
|
|
||||||
//Increment B pointers to global memory
|
|
||||||
if (B_trans_=='T')
|
|
||||||
for(unsigned int i = 0 ; i < npB ; ++i)
|
|
||||||
stream << "Bi[" << i << "] += " << p_.kL << "*ldb;" << std::endl;
|
|
||||||
else
|
|
||||||
for(unsigned int i = 0 ; i < npB ; ++i)
|
|
||||||
stream << "Bi[" << i << "] += " << p_.kL << BSTRIDE1 << ";" << std::endl;
|
|
||||||
|
|
||||||
stream.dec_tab();
|
|
||||||
stream << "}" << std::endl;
|
|
||||||
|
|
||||||
stream << "//Write back C" << std::endl;
|
stream << "//Write back C" << std::endl;
|
||||||
stream << "M += ids.x;" << std::endl;
|
stream << "M += ids.x;" << std::endl;
|
||||||
|
if(A_trans_=='N')
|
||||||
|
stream << "M += idT.x;" << std::endl;
|
||||||
|
else
|
||||||
|
stream << "M += idT.y;" << std::endl;
|
||||||
|
|
||||||
|
if(B_trans_=='T')
|
||||||
|
stream << "N += idT.x;" << std::endl;
|
||||||
|
else
|
||||||
|
stream << "N += idT.y;" << std::endl;
|
||||||
stream << "N += ids.y;" << std::endl;
|
stream << "N += ids.y;" << std::endl;
|
||||||
stream << _size_t << " offx = (ids.x + ids.z*" << p_.simd_width << ")" << ";" << std::endl;
|
stream << _size_t << " offx = (ids.x + ids.z*" << p_.simd_width << ")" << ";" << std::endl;
|
||||||
stream << _size_t << " offy = (ids.y + ids.w*" << p_.simd_width << ");" << std::endl;
|
stream << _size_t << " offy = (ids.y + ids.w*" << p_.simd_width << ");" << std::endl;
|
||||||
|
340
python/setup.py
Executable file → Normal file
340
python/setup.py
Executable file → Normal file
@@ -1,170 +1,170 @@
|
|||||||
#Thanks to Andreas Knoeckler for providing stand-alone boost.python
|
#Thanks to Andreas Knoeckler for providing stand-alone boost.python
|
||||||
#through PyOpenCL and PyCUDA
|
#through PyOpenCL and PyCUDA
|
||||||
|
|
||||||
import os, sys
|
import os, sys
|
||||||
from distutils.ccompiler import show_compilers,new_compiler
|
from distutils.ccompiler import show_compilers,new_compiler
|
||||||
from distutils.command.build_ext import build_ext
|
from distutils.command.build_ext import build_ext
|
||||||
from distutils.command.build_py import build_py
|
from distutils.command.build_py import build_py
|
||||||
from distutils.core import setup, Extension
|
from distutils.core import setup, Extension
|
||||||
from distutils.sysconfig import get_python_inc
|
from distutils.sysconfig import get_python_inc
|
||||||
from distutils import sysconfig
|
from distutils import sysconfig
|
||||||
from imp import find_module
|
from imp import find_module
|
||||||
from glob import glob
|
from glob import glob
|
||||||
from os.path import dirname
|
from os.path import dirname
|
||||||
|
|
||||||
platform_cflags = {}
|
platform_cflags = {}
|
||||||
platform_ldflags = {}
|
platform_ldflags = {}
|
||||||
platform_libs = {}
|
platform_libs = {}
|
||||||
|
|
||||||
class build_ext_subclass(build_ext):
|
class build_ext_subclass(build_ext):
|
||||||
def build_extensions(self):
|
def build_extensions(self):
|
||||||
c = self.compiler.compiler_type
|
c = self.compiler.compiler_type
|
||||||
if c in platform_cflags.keys():
|
if c in platform_cflags.keys():
|
||||||
for e in self.extensions:
|
for e in self.extensions:
|
||||||
e.extra_compile_args = platform_cflags[c]
|
e.extra_compile_args = platform_cflags[c]
|
||||||
if c in platform_ldflags.keys():
|
if c in platform_ldflags.keys():
|
||||||
for e in self.extensions:
|
for e in self.extensions:
|
||||||
e.extra_link_args = platform_ldflags[c]
|
e.extra_link_args = platform_ldflags[c]
|
||||||
if c in platform_libs.keys():
|
if c in platform_libs.keys():
|
||||||
for e in self.extensions:
|
for e in self.extensions:
|
||||||
try:
|
try:
|
||||||
e.libraries += platform_libs[c]
|
e.libraries += platform_libs[c]
|
||||||
except:
|
except:
|
||||||
e.libraries = platform_libs[c]
|
e.libraries = platform_libs[c]
|
||||||
build_ext.build_extensions(self)
|
build_ext.build_extensions(self)
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|
||||||
def recursive_glob(rootdir='.', suffix=''):
|
def recursive_glob(rootdir='.', suffix=''):
|
||||||
return [os.path.join(looproot, filename)
|
return [os.path.join(looproot, filename)
|
||||||
for looproot, _, filenames in os.walk(rootdir)
|
for looproot, _, filenames in os.walk(rootdir)
|
||||||
for filename in filenames if filename.endswith(suffix)]
|
for filename in filenames if filename.endswith(suffix)]
|
||||||
|
|
||||||
def remove_prefixes(optlist, bad_prefixes):
|
def remove_prefixes(optlist, bad_prefixes):
|
||||||
for bad_prefix in bad_prefixes:
|
for bad_prefix in bad_prefixes:
|
||||||
for i, flag in enumerate(optlist):
|
for i, flag in enumerate(optlist):
|
||||||
if flag.startswith(bad_prefix):
|
if flag.startswith(bad_prefix):
|
||||||
optlist.pop(i)
|
optlist.pop(i)
|
||||||
break
|
break
|
||||||
return optlist
|
return optlist
|
||||||
|
|
||||||
def find_library(name, cmake_glob_list):
|
def find_library(name, cmake_glob_list):
|
||||||
cvars = sysconfig.get_config_vars()
|
cvars = sysconfig.get_config_vars()
|
||||||
compiler = new_compiler()
|
compiler = new_compiler()
|
||||||
dirs = []
|
dirs = []
|
||||||
for gpath in cmake_glob_list.split(';'):
|
for gpath in cmake_glob_list.split(';'):
|
||||||
path = glob(gpath)
|
path = glob(gpath)
|
||||||
if path:
|
if path:
|
||||||
dirs += [path[0]]
|
dirs += [path[0]]
|
||||||
return compiler.find_library_file(cvars['LIBDIR'].split(';') + dirs, name)
|
return compiler.find_library_file(cvars['LIBDIR'].split(';') + dirs, name)
|
||||||
|
|
||||||
def find_opencl():
|
def find_opencl():
|
||||||
cvars = sysconfig.get_config_vars()
|
cvars = sysconfig.get_config_vars()
|
||||||
is_on_android = '-mandroid' in cvars['PY_CFLAGS']
|
is_on_android = '-mandroid' in cvars['PY_CFLAGS']
|
||||||
lib = find_library('OpenCL', '' if is_on_android else '/opt/AMDAPPSDK*/lib/x86_64')
|
lib = find_library('OpenCL', '' if is_on_android else '/opt/AMDAPPSDK*/lib/x86_64')
|
||||||
return {'include': '', 'lib': dirname(lib)} if lib else None
|
return {'include': '', 'lib': dirname(lib)} if lib else None
|
||||||
|
|
||||||
def find_in_path(name, path):
|
def find_in_path(name, path):
|
||||||
"Find a file in a search path"
|
"Find a file in a search path"
|
||||||
#adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
|
#adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
|
||||||
for dir in path.split(os.pathsep):
|
for dir in path.split(os.pathsep):
|
||||||
binpath = os.path.join(dir, name)
|
binpath = os.path.join(dir, name)
|
||||||
if os.path.exists(binpath):
|
if os.path.exists(binpath):
|
||||||
return os.path.abspath(binpath)
|
return os.path.abspath(binpath)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def find_cuda():
|
def find_cuda():
|
||||||
if 'CUDAHOME' in os.environ:
|
if 'CUDAHOME' in os.environ:
|
||||||
home = os.environ['CUDAHOME']
|
home = os.environ['CUDAHOME']
|
||||||
nvcc = os.path.join(home, 'bin', 'nvcc')
|
nvcc = os.path.join(home, 'bin', 'nvcc')
|
||||||
else:
|
else:
|
||||||
nvcc = find_in_path('nvcc', os.environ['PATH'])
|
nvcc = find_in_path('nvcc', os.environ['PATH'])
|
||||||
|
|
||||||
if nvcc:
|
if nvcc:
|
||||||
home = dirname(os.path.dirname(nvcc))
|
home = dirname(os.path.dirname(nvcc))
|
||||||
return {'include': os.path.join(home, 'include'),
|
return {'include': os.path.join(home, 'include'),
|
||||||
'lib': os.path.join(home, 'lib64')}
|
'lib': os.path.join(home, 'lib64')}
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
#Tweaks warning, because boost-numpy and boost-python won't compile cleanly without these changes
|
#Tweaks warning, because boost-numpy and boost-python won't compile cleanly without these changes
|
||||||
cvars = sysconfig.get_config_vars()
|
cvars = sysconfig.get_config_vars()
|
||||||
cvars['OPT'] = str.join(' ', remove_prefixes(cvars['OPT'].split(), ['-g', '-Wstrict-prototypes']))
|
cvars['OPT'] = str.join(' ', remove_prefixes(cvars['OPT'].split(), ['-g', '-Wstrict-prototypes']))
|
||||||
cvars["CFLAGS"] = cvars["BASECFLAGS"] + ' ' + cvars['OPT']
|
cvars["CFLAGS"] = cvars["BASECFLAGS"] + ' ' + cvars['OPT']
|
||||||
cvars["LDFLAGS"] = '-Wl,--no-as-needed ' + cvars["LDFLAGS"]
|
cvars["LDFLAGS"] = '-Wl,--no-as-needed ' + cvars["LDFLAGS"]
|
||||||
|
|
||||||
#OpenCL
|
#OpenCL
|
||||||
opencl_config = find_opencl()
|
opencl_config = find_opencl()
|
||||||
|
|
||||||
#CUDA
|
#CUDA
|
||||||
cuda_config = find_cuda()
|
cuda_config = find_cuda()
|
||||||
|
|
||||||
#Libraries
|
#Libraries
|
||||||
libraries = ['OpenCL']
|
libraries = ['OpenCL']
|
||||||
if cuda_config: libraries += ['cuda', 'nvrtc']
|
if cuda_config: libraries += ['cuda', 'nvrtc']
|
||||||
|
|
||||||
#Backends:
|
#Backends:
|
||||||
backend_defines = ['-DISAAC_WITH_OPENCL']
|
backend_defines = ['-DISAAC_WITH_OPENCL']
|
||||||
if cuda_config: backend_defines += ['-DISAAC_WITH_CUDA']
|
if cuda_config: backend_defines += ['-DISAAC_WITH_CUDA']
|
||||||
|
|
||||||
#Library directories
|
#Library directories
|
||||||
library_dirs = [config['lib'] for config in [opencl_config, cuda_config] if config is not None]
|
library_dirs = [config['lib'] for config in [opencl_config, cuda_config] if config is not None]
|
||||||
|
|
||||||
#Include directories
|
#Include directories
|
||||||
include =' src/include'.split() + ['external/boost/include', os.path.join(find_module("numpy")[1], "core", "include")]
|
include =' src/include'.split() + ['external/boost/include', os.path.join(find_module("numpy")[1], "core", "include")]
|
||||||
|
|
||||||
#Source files
|
#Source files
|
||||||
src = 'src/lib/wrap/clBLAS.cpp src/lib/value_scalar.cpp src/lib/symbolic/preset.cpp src/lib/symbolic/io.cpp src/lib/symbolic/expression.cpp src/lib/symbolic/execute.cpp src/lib/model/predictors/random_forest.cpp src/lib/model/model.cpp src/lib/exception/unknown_datatype.cpp src/lib/exception/operation_not_supported.cpp src/lib/driver/program.cpp src/lib/driver/platform.cpp src/lib/driver/ndrange.cpp src/lib/driver/kernel.cpp src/lib/driver/handle.cpp src/lib/driver/event.cpp src/lib/driver/device.cpp src/lib/driver/context.cpp src/lib/driver/command_queue.cpp src/lib/driver/check.cpp src/lib/driver/buffer.cpp src/lib/driver/backend.cpp src/lib/backend/templates/ger.cpp src/lib/backend/templates/gemv.cpp src/lib/backend/templates/gemm.cpp src/lib/backend/templates/dot.cpp src/lib/backend/templates/base.cpp src/lib/backend/templates/axpy.cpp src/lib/backend/stream.cpp src/lib/backend/parse.cpp src/lib/backend/mapped_object.cpp src/lib/backend/keywords.cpp src/lib/backend/binder.cpp src/lib/array.cpp '.split() + [os.path.join('src', 'wrap', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'model.cpp', 'exceptions.cpp']]
|
src = 'src/lib/array.cpp src/lib/value_scalar.cpp src/lib/wrap/clBLAS.cpp src/lib/symbolic/execute.cpp src/lib/symbolic/preset.cpp src/lib/symbolic/io.cpp src/lib/symbolic/expression.cpp src/lib/model/model.cpp src/lib/model/predictors/random_forest.cpp src/lib/exception/unknown_datatype.cpp src/lib/exception/operation_not_supported.cpp src/lib/driver/context.cpp src/lib/driver/program.cpp src/lib/driver/backend.cpp src/lib/driver/platform.cpp src/lib/driver/ndrange.cpp src/lib/driver/kernel.cpp src/lib/driver/handle.cpp src/lib/driver/event.cpp src/lib/driver/device.cpp src/lib/driver/command_queue.cpp src/lib/driver/check.cpp src/lib/driver/buffer.cpp src/lib/backend/parse.cpp src/lib/backend/mapped_object.cpp src/lib/backend/templates/gemm.cpp src/lib/backend/templates/base.cpp src/lib/backend/templates/axpy.cpp src/lib/backend/templates/ger.cpp src/lib/backend/templates/gemv.cpp src/lib/backend/templates/dot.cpp src/lib/backend/stream.cpp src/lib/backend/keywords.cpp src/lib/backend/binder.cpp '.split() + [os.path.join('src', 'wrap', sf) for sf in ['_isaac.cpp', 'core.cpp', 'driver.cpp', 'model.cpp', 'exceptions.cpp']]
|
||||||
boostsrc = 'external/boost/libs/'
|
boostsrc = 'external/boost/libs/'
|
||||||
for s in ['numpy','python','smart_ptr','system','thread']:
|
for s in ['numpy','python','smart_ptr','system','thread']:
|
||||||
src = src + [x for x in recursive_glob('external/boost/libs/' + s + '/src/','.cpp') if 'win32' not in x and 'pthread' not in x]
|
src = src + [x for x in recursive_glob('external/boost/libs/' + s + '/src/','.cpp') if 'win32' not in x and 'pthread' not in x]
|
||||||
# make sure next line succeeds even on Windows
|
# make sure next line succeeds even on Windows
|
||||||
src = [f.replace("\\", "/") for f in src]
|
src = [f.replace("\\", "/") for f in src]
|
||||||
if sys.platform == "win32":
|
if sys.platform == "win32":
|
||||||
src += glob(boostsrc + "/thread/src/win32/*.cpp")
|
src += glob(boostsrc + "/thread/src/win32/*.cpp")
|
||||||
src += glob(boostsrc + "/thread/src/tss_null.cpp")
|
src += glob(boostsrc + "/thread/src/tss_null.cpp")
|
||||||
else:
|
else:
|
||||||
src += glob(boostsrc + "/thread/src/pthread/*.cpp")
|
src += glob(boostsrc + "/thread/src/pthread/*.cpp")
|
||||||
src= [f for f in src if not f.endswith("once_atomic.cpp")]
|
src= [f for f in src if not f.endswith("once_atomic.cpp")]
|
||||||
|
|
||||||
#Setup
|
#Setup
|
||||||
setup(
|
setup(
|
||||||
name='isaac',
|
name='isaac',
|
||||||
version='1.0',
|
version='1.0',
|
||||||
description="Input-specific architecture-aware computations",
|
description="Input-specific architecture-aware computations",
|
||||||
author='Philippe Tillet',
|
author='Philippe Tillet',
|
||||||
author_email='ptillet@g.harvard.edu',
|
author_email='ptillet@g.harvard.edu',
|
||||||
license='MPL 2.0',
|
license='MPL 2.0',
|
||||||
packages=["isaac"],
|
packages=["isaac"],
|
||||||
ext_package="isaac",
|
ext_package="isaac",
|
||||||
ext_modules=[Extension(
|
ext_modules=[Extension(
|
||||||
'_isaac',src,
|
'_isaac',src,
|
||||||
extra_compile_args= backend_defines + ['-std=c++11', '-Wno-unused-function', '-Wno-unused-local-typedefs', '-Wno-sign-compare'],
|
extra_compile_args= backend_defines + ['-std=c++11', '-Wno-unused-function', '-Wno-unused-local-typedefs', '-Wno-sign-compare'],
|
||||||
extra_link_args=['-Wl,-soname=_isaac.so'],
|
extra_link_args=['-Wl,-soname=_isaac.so'],
|
||||||
undef_macros=[],
|
undef_macros=[],
|
||||||
include_dirs=include,
|
include_dirs=include,
|
||||||
library_dirs=library_dirs,
|
library_dirs=library_dirs,
|
||||||
libraries=libraries
|
libraries=libraries
|
||||||
)],
|
)],
|
||||||
cmdclass={'build_py': build_py, 'build_ext': build_ext_subclass},
|
cmdclass={'build_py': build_py, 'build_ext': build_ext_subclass},
|
||||||
classifiers=[
|
classifiers=[
|
||||||
'Environment :: Console',
|
'Environment :: Console',
|
||||||
'Development Status :: 1 - Experimental',
|
'Development Status :: 1 - Experimental',
|
||||||
'Intended Audience :: Developers',
|
'Intended Audience :: Developers',
|
||||||
'Intended Audience :: Other Audience',
|
'Intended Audience :: Other Audience',
|
||||||
'Intended Audience :: Science/Research',
|
'Intended Audience :: Science/Research',
|
||||||
'License :: OSI Approved :: MIT License',
|
'License :: OSI Approved :: MIT License',
|
||||||
'Natural Language :: English',
|
'Natural Language :: English',
|
||||||
'Programming Language :: C++',
|
'Programming Language :: C++',
|
||||||
'Programming Language :: Python',
|
'Programming Language :: Python',
|
||||||
'Programming Language :: Python :: 3',
|
'Programming Language :: Python :: 3',
|
||||||
'Topic :: Scientific/Engineering',
|
'Topic :: Scientific/Engineering',
|
||||||
'Topic :: Scientific/Engineering :: Mathematics',
|
'Topic :: Scientific/Engineering :: Mathematics',
|
||||||
'Topic :: Scientific/Engineering :: Physics',
|
'Topic :: Scientific/Engineering :: Physics',
|
||||||
'Topic :: Scientific/Engineering :: Machine Learning',
|
'Topic :: Scientific/Engineering :: Machine Learning',
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
Reference in New Issue
Block a user