1 line
22 KiB
JavaScript
1 line
22 KiB
JavaScript
Search.setIndex({docnames:["getting-started/installation","getting-started/tutorials/01-vector-add","getting-started/tutorials/02-fused-softmax","getting-started/tutorials/03-matrix-multiplication","getting-started/tutorials/index","getting-started/tutorials/sg_execution_times","index","programming-guide/chapter-1/introduction","programming-guide/chapter-2/related-work","programming-guide/chapter-3/triton-c","programming-guide/chapter-4/triton-ir","programming-guide/introduction","programming-guide/related-work","programming-guide/triton-c"],envversion:{"sphinx.domains.c":2,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":3,"sphinx.domains.index":1,"sphinx.domains.javascript":2,"sphinx.domains.math":2,"sphinx.domains.python":2,"sphinx.domains.rst":2,"sphinx.domains.std":2,sphinx:56},filenames:["getting-started/installation.rst","getting-started/tutorials/01-vector-add.rst","getting-started/tutorials/02-fused-softmax.rst","getting-started/tutorials/03-matrix-multiplication.rst","getting-started/tutorials/index.rst","getting-started/tutorials/sg_execution_times.rst","index.rst","programming-guide/chapter-1/introduction.rst","programming-guide/chapter-2/related-work.rst","programming-guide/chapter-3/triton-c.rst","programming-guide/chapter-4/triton-ir.rst","programming-guide/introduction.rst","programming-guide/related-work.rst","programming-guide/triton-c.rst"],objects:{},objnames:{},objtypes:{},terms:{"000":5,"0000":3,"1024":1,"10mn":2,"1250":3,"128":3,"182":3,"1823":2,"184":3,"185":3,"186":3,"188":3,"190":3,"191":3,"192":3,"193":3,"194":3,"195":3,"196":3,"198":3,"1983":[7,11],"1984":[8,12],"1989":[8,12],"199":3,"1991":[7,8,11,12],"1999":[8,10,12],"200":3,"2001":10,"2004":8,"2006":[8,12],"2012":[8,12],"2013":[7,10,11],"2014":[7,11],"2016":[7,8,11,12],"2017":[7,11],"2018":[7,8,11,12],"2019":[1,8,12],"202":3,"2021":[7,8,11,12],"2048":2,"2141":1,"220":3,"245":3,"2500":3,"256":[2,3],"2mn":2,"3076":1,"3713":1,"3750":3,"3mn":2,"4096":2,"4940":1,"497":1,"5000":3,"512":3,"6250":3,"654":[2,5],"6724":1,"7500":3,"768":3,"781":2,"7mn":2,"861":3,"8750":3,"896":3,"9733":1,"98432":1,"abstract":[7,8,10,11,12],"break":[8,12],"byte":2,"case":[2,3,7,8,9,10,11,12,13],"class":[1,2,3,7,8,9,11,12,13],"export":3,"final":10,"float":[1,2,3,7,8,9,11,12,13],"function":[1,2,8,9,12,13],"import":[1,2,3,7,8,9,11,12,13],"int":[1,2,3,7,8,9,11,12,13],"return":[1,2,3,10],"short":3,"static":[7,8,9,10,11,12,13],"super":3,"switch":3,"true":[1,2,3],"typeof":3,"var":[8,12],"void":[1,2,3],"while":[0,7,9,11,13],For:[1,3,7,8,9,10,11,12,13],One:[3,9,13],SMs:[8,12],T_S:[8,12],The:[1,2,6,7,8,11,12],These:[3,8,9,10,12,13],Used:[1,2,3],__expf:2,__global__:[1,2,3,9,13],_add:1,_align8:3,_dot:3,_softmax:2,_src:[1,2],a100:[3,8,12],abl:[3,8,12],about:[1,2,3,6],abov:[1,2,3,8,9,12,13],abs:1,absolut:3,academ:[7,11],acc:[3,7,8,11,12],acceler:[7,11],access:[1,7,8,10,11,12],accomod:[3,9,13],accordingli:[8,12],account:[3,8,12],accumul:[8,12],accuraci:[7,11],achiev:[1,7,8,9,11,12,13],across:[7,8,11,12],activ:3,actual:[3,7,8,11,12],add:[1,5,9,10,13],added:[9,10,13],addit:[2,3,4,5,7,9,10,11,13],addition:[8,9,12,13],address:[2,7,11],adequ:10,adopt:[8,12],advanc:[7,11],advantag:1,advoc:[8,12],affect:3,affin:[8,12],after:[3,10],again:[9,13],against:[1,2,3,6,10],aggreg:10,aggress:[2,7,8,10,11,12],agnost:[7,8,11,12],ahead:[8,12],aim:6,algebra:[8,12],algorithm:[3,7,8,9,10,11,12,13],alia:[8,12],alias:10,align8:3,align:10,all:[2,3,4,7,8,9,10,11,12,13],allclos:[2,3],allen1984:8,allen:[8,12],alloc:[1,7,11],allow:[1,2,7,8,10,11,12],almost:2,along:[1,9,10,13],also:[1,2,3,7,8,9,10,11,12,13],alwai:[8,12],amd:[7,11],amen:[8,12],amount:[2,7,11],ampl:[8,12],analysi:[7,8,9,11,12,13],analyz:[8,12],ancourt1991:8,ancourt:[8,12],ani:[2,3,8,9,12,13],anoth:[2,8,10,12],ansi:[9,13],anywai:10,apart:[8,12],api:[0,1],appli:[1,2,3,7,8,11,12],applic:[8,10,12],approach:[7,8,9,11,12,13],appropri:[9,13],approxim:2,architectur:[3,7,11],area:[8,12],arg:[1,2,3],argument:[1,2,3,9,10,13],arithmet:10,around:2,arrai:[1,8,9,10,12,13],arrang:3,art:[7,8,11,12],arxiv:[7,8,11,12],ask:2,aspect:[8,9,12,13],asplo:[7,11],assembl:3,assert:[1,2,3],assign:10,associ:[9,13],assum:2,ast:10,asynchron:[7,11],atol:3,atom:[9,10,13],atomic_add:[9,13],attribut:[1,9,10,13],auguin1983:7,auguin:[7,11],auto:[2,8,10,12],autograd:[1,2],autom:[7,11],automat:[2,3,7,8,9,11,12,13],autotun:[3,8,12],autotune_config:3,autotune_kei:3,avail:[3,7,8,11,12],awar:[7,9,11,13],axi:[1,2,3,9,13],back:[1,2,3],backend:10,backward:1,baghdadi2021:[7,8],baghdadi:[7,8,11,12],balanc:[8,12],bandwidth:2,base:[7,8,9,10,11,12,13],basic:[1,3,4,8,9,12,13],becaus:[2,9,13],becom:[7,11],been:[7,8,11,12],befor:[3,10],begin:[8,12],behavior:[8,12],being:10,believ:[8,12],below:[1,4,8,9,10,12,13],bench:0,benchmark:0,benefit:[2,7,8,9,10,11,12,13],best:[1,3,7,11],better:[3,10],between:[1,7,11],bin:3,binari:[9,13],bit:[2,10],block:[1,2,3,7,8,9,11,12,13],block_siz:[8,12],blockidx:1,bodi:10,boil:[8,12],bool:[1,2,9,13],both:[8,9,12,13],bound:[1,2,8,12],branch:[8,10,12],braun13:10,braun:10,broad:[7,11],broadcast:10,build:[0,3],built:[1,3,8,9,12,13],c_0:3,c_1:3,c_2:3,cach:[1,2,7,8,9,10,11,12,13],call:[3,8,10,12],callabl:1,can:[0,1,2,3,7,8,9,10,11,12,13],cannot:[3,7,8,9,11,12,13],capabl:[2,6,7,9,11,13],carri:10,carter99:10,carter:10,cast:3,categori:[9,13],cdiv:[1,3],ceil:1,center:10,certain:[9,13],cfg:10,cgo:[7,8,11,12],chang:[3,9,13],chapter:[6,9,10,13],characterist:[8,12],cheap:[7,11],check:[1,2,6],chen2018:7,chen:[7,11],chip:2,choic:6,chunk:1,cite:[8,12],citizen:[1,9,13],click:[1,2,3],clone:[0,3],close:[8,10,12],cmake:[0,3],cmp:10,coalesc:[7,9,11,13],code:[1,2,3,4,7,8,9,10,11,12,13],codegen:0,coher:10,col:[8,12],column:[2,3,9,13],com:[0,3,8,12],combin:[7,11],come:[1,2,3,8,12],command:[0,3],comment:1,common:[8,12],commonli:[8,12],commun:[9,13],compar:[2,3,6,8,12],compil:[0,1,2,3,6,7,9,10,11,13],complet:[8,9,12,13],complex:[8,9,10,12,13],compos:[7,10,11],composit:[8,12],comprehens:[7,8,11,12],comput:[6,7,8,11,12],computation:[7,8,11,12],concern:[8,12],concis:1,concurr:[9,13],cond:[1,10],condit:[1,8,9,10,12,13],config:3,connectom:[7,11],consecut:[8,12],consequ:[7,11],consid:2,consist:[2,10],constant:[9,10,13],constraint:[1,2,8,9,12,13],construct:[3,7,9,10,11,13],constructor:3,contain:[8,10,12],content:[9,13],context:1,contextu:[8,12],contigu:[1,3],contrari:1,control:[1,7,8,9,11,12,13],convers:[9,13],convolut:[7,11],coordin:1,copi:[1,7,11],core:[3,7,8,9,11,12,13],correct:1,correspond:[1,2,3,10],cost:[8,12],could:[2,3,8,10,12],cours:[1,7,11],creat:[1,3,7,9,10,11,13],crucial:10,csv:1,ctx:[1,2,3],cubla:[3,7,11],cuda:[1,2,3,7,9,11,13],cudnn:[7,11],current:[3,9,13],custom:[1,2,3,6],cutlass_include_dir:3,cutlass_library_dir:3,cutlass_library_kernel:3,cutlass_matmul:3,cutlass_tensorop_f16_s16816gemm_:3,cutlass_tensorop_f16_s884gemm_f16_:3,cvpr:[7,11],dart:[8,12],darte1999:8,data:[1,2,3,7,8,9,10,11,12,13],data_ptr:[1,2,3],dataflow:[8,12],dblock:1,dcutlass_library_kernel:3,dcutlass_nvcc_archs_en:3,decad:[7,11],declar:[1,9,10,13],decompos:[8,9,12,13],decor:1,decreas:3,deep:[3,7,8,11,12],def:[1,2,3],defin:[1,2,3,8,9,10,12,13],definit:[8,9,10,12,13],denom:2,denomin:2,dens:[8,12],depend:[8,12],deploi:[7,11],dereferenc:[1,9,13],describ:[8,9,10,12,13],design:[8,12],desir:[3,10],detail:[1,8,9,12,13],detect:[7,11],develop:[7,8,9,11,12,13],devic:[1,2,3],dfg:10,dialect:[8,12],dict:[1,2,3],diesel:[8,12],differ:[1,2,3,7,8,11,12],difficult:[8,12],difficulti:[3,7,11],dijkstra82:[8,12],dim:[8,12],dimens:[3,9,10,13],dimension:[3,8,9,10,12,13],dir:0,direct:0,directli:[0,9,10,13],discuss:[9,13],disjoint:[8,12],disk:1,dissert:[8,12],distribut:[8,12],diverg:10,divis:1,dnn:[6,7,8,11,12],do_bench:[1,2,3],document:[9,10,13],doe:[1,2,8,9,12,13],doesn:[8,12],doing:[7,10,11],domain:[7,8,11,12],don:2,done:[0,3,7,9,10,11,13],door:[9,13],dot:[3,10],doubl:[9,13],doubli:3,doubt:[8,12],down:[8,12],download:[0,1,2,3,4],dram:2,driver:0,dsl:[6,7,8,10,11,12],dtype:[1,2,3],dure:3,each:[1,2,3,7,8,9,10,11,12,13],eas:[8,12],easi:[1,3],easier:[1,2,7,11],education:2,effect:[8,12],effici:[3,7,10,11],effort:[8,12],egg:3,either:[9,10,13],elango2018:8,elango:[8,12],element:[1,2,9,10,13],element_s:2,elementwis:2,ellips:[9,13],emerg:[7,11],emit:[9,13],empti:[3,10],empty_lik:[1,2],enabl:[8,12],encod:[8,12],end:[7,8,9,10,11,12,13],enforc:[8,12],engin:[8,9,12,13],ensur:[8,12],entir:[2,8,12],environ:[3,6,10],equal:[8,12],error:[9,13],especi:[7,11],etc:[3,9,13],euromicro:[7,11],even:[8,12],eventu:10,evidenc:[7,11],evolv:[7,11],exampl:[1,2,3,4,7,8,9,10,11,12,13],execut:[1,5,7,8,9,10,11,12,13],exemplifi:[9,13],exist:[1,7,8,9,11,12,13],exp:2,expect:[1,2],expens:[7,8,11,12],expert:3,explor:[7,11],exponenti:2,expos:[3,10],express:[7,8,11,12],extend:[3,9,10,13],extens:[1,10],extrem:[8,12],f32_infin:2,facilit:[7,8,9,11,12,13],fact:[3,8,12],fairli:3,fall:[3,9,13],false_valu:10,familiar:[9,13],far:2,fast:[2,7,8,11,12],faster:[2,3],fastest:[8,12],featur:[9,13],feel:3,fetch:[7,11],few:[1,8,9,12,13],field:[7,11],figur:[8,12],file:[1,2,3,5],find:3,first:[1,6,8,9,12,13],fit:2,flexibl:[7,11],float16:3,float32:[1,2,3],flow:[1,7,8,9,11,12,13],focu:[3,8,12],follow:[0,2,3,6,7,8,9,10,11,12,13],forbidden:[9,13],forc:3,form:10,formal:[8,9,12,13],format:[8,12],former:10,forward:[1,2,3,10],found:[0,9,10,13],foundat:[8,12],fp16:3,framework:[7,8,11,12],free:3,from:[1,2,3,7,8,9,10,11,12,13],full:[1,2,3,10],fulli:[8,12],func:[8,12],fundament:[8,12],further:[8,12],fuse:[3,4,5],fusion:[2,8,12],futur:[3,9,10,13],galleri:[1,2,3,4],gbp:[1,2],gener:[1,2,3,4,7,8,10,11,12],geq:[8,12],get:[1,2,3,5],get_program_id:[1,2,3,9,13],getelementptr:10,girbal2006:8,girbal:[8,12],git:[0,3],github:[0,3],give:[7,11],given:[2,3],global:[8,9,10,12,13],good:[1,3,8,12],gpgpu:[7,11],gpu:[2,6,7,8,9,10,11,12,13],grammat:[8,12],graph:10,graphic:[7,11],greater:2,grid:[1,2,3],grid_m:3,grid_n:3,grosser2012:8,grosser:[8,12],group:3,group_id:3,group_siz:3,grow:[8,12],guard:[2,10],guess:2,guid:[7,11],had:[9,13],half:[3,9,13],halid:[7,8,11,12],hand:[8,12],handl:[2,3,8,9,10,12,13],handwritten:[3,7,11],happen:3,hard:3,harder:[8,12],hardwar:[3,6,8,9,12,13],has:[1,2,7,8,11,12],have:[2,7,8,9,11,12,13],haven:[9,13],header:10,heavi:[7,11],helper:2,henc:[3,10],here:[0,1,2,3],heurist:2,hide:[9,13],hierarch:[7,11],hierarchi:[8,12],high:[3,7,8,10,11,12],higher:10,highest:10,highli:[3,7,11],highlight:[1,8,12],hint:[8,10,12],hit:3,hook:[9,13],how:[1,2,3,6,7,9,11,13],howev:[3,8,9,10,12,13],http:[0,3],i32:10,idea:[7,11],ident:[2,9,13],identifi:[9,13],ids:[9,13],idx:10,imag:[7,8,11,12],imper:[6,9,13],implement:[1,2,3,7,8,9,10,11,12,13],implicitli:[3,9,10,13],importantli:[8,12],impos:[8,12],improv:3,includ:[3,10],incompat:[3,8,9,12,13],increas:1,incred:[7,11],increment:[8,12],inde:[8,12],independ:[8,10,12],index:2,indic:[2,8,10,12],individu:10,induc:[8,12],industri:[7,11],inequ:[8,12],infanc:[9,13],inform:[8,12],infrastructur:[8,12],initi:[3,9,13],inlin:10,inner:3,input:[1,2,3,8,10,12],instal:6,instanc:[2,3,7,9,10,11,13],instead:[1,2,3],instrins:[9,13],instruct:[6,7,9,11,13],int16:[9,13],int32:[3,9,13],int64:[3,9,13],int8:[9,13],integ:[8,9,10,12,13],integr:0,inter:[9,13],interchang:[8,12],interdepend:10,interest:[0,3,7,8,11,12],intermedi:[0,6,8,9,12,13],intern:[0,2,8,12],interpret:10,intra:10,intrins:[8,12],introduc:[3,9,10,13],introduct:[6,9,13],intuit:[9,13],invari:[8,12],ipynb:[1,2,3],irregular:[2,8,12],is_contigu:3,issu:[7,8,11,12],iter:[3,7,8,11,12],its:[2,8,9,10,12,13],itself:[9,10,13],jit:[9,10,13],journal:[8,12],jrk2013:7,jupyt:[1,2,3,4],just:[1,2,3,8,9,12,13],kei:[1,2,3,7,11],kellei:[7,11],kernel:[6,7,9,11,13],kick:3,kind:[9,13],kitwar:3,known:[8,10,12],label:[1,2,3],lam1991:7,lam:[7,11],lambda:[1,2,3],languag:[1,6,7,11],larg:[2,7,8,11,12],larger:2,later:[8,12],latest:0,lattner2004:8,lattner2019:8,lattner:[8,12],launch:[1,2,3],law:[8,12],layer:[7,8,11,12],lda:3,ldb:3,ldc:3,lead:[7,8,9,11,12,13],learn:[1,2,3,6,7,8,11,12],least:[8,12],lee2017:7,lee:[7,11],left:[8,9,12,13],legal:[8,12],less:[2,7,8,11,12],let:2,letter:[8,12],level:[3,7,8,9,11,12,13],lib:3,librari:[3,7,8,11,12],lies:[7,11],lifelong:[8,12],like:[1,7,8,9,11,12,13],limit:[2,9,13],line:[1,2,3,8,10,12],linear:[7,8,11,12],linkag:10,linker:10,linux:3,list:[3,10],literatur:10,litteratur:[8,12],live:3,llvm:[0,8,10,12],load:[1,2,8,9,10,12,13],local:[7,8,11,12],locat:3,logarithm:1,longer:2,look:[2,6,7,11],loop:[3,8,9,10,12,13],lot:10,low:[8,12],lowest:[9,13],lstlist:[9,13],machin:[7,8,11,12],machineri:[7,8,11,12],macro:[1,9,13],made:[7,9,11,13],mai:[0,1,8,9,10,12,13],main:[3,7,8,9,11,12,13],maintain:[1,2,8,12],major:[3,8,12],make:[0,1,2,3,7,8,10,11,12],make_add_kernel:1,make_kernel:[2,3],manag:[7,9,11,13],mani:[1,7,8,9,10,11,12,13],manual:[2,8,12],manual_se:[1,2],mapl:[1,8,12],markedli:[7,11],mask:[9,13],masked_load:10,masked_stor:10,match:3,mathbb:[8,12],mathbf:[8,12],mathcal:[8,12],mathemat:[8,12],matmul:[3,8,12],matric:[2,3],matrix:[2,4,5,7,8,9,10,11,12,13],matrix_s:[8,12],matter:[1,3,7,8,11,12],max:[1,2,9,13],max_group_s:3,max_m:[1,2,3],maxim:[6,8,12],maximum:[1,9,13],mean:[2,3,8,9,10,12,13],mechan:[2,8,12],memori:[1,2,3,7,8,9,10,11,12,13],mention:[2,3],merg:10,metadata:10,method:[1,8,9,12,13],methodolog:[8,12],micro:[7,10,11],might:[9,13],min:[2,3,9,13],min_m:[1,2,3],minut:[0,1,2,3],miscellan:10,miss:[8,12],mitig:[8,12],mixtur:3,mkdir:[0,3],mlir:[8,12],model:[1,7,8,11,12],modern:[3,6,7,8,9,11,12,13],modifi:[1,3,9,13],modular:[8,12],moor:[8,12],more:[1,2,6,7,8,10,11,12],most:[3,8,12],move:2,much:10,mullapudi2016:8,mullapudi:[8,12],multi:[3,7,8,9,10,11,12,13],multipl:[1,4,5,7,8,9,10,11,12,13],multipli:[8,12],must:[2,3,9,10,13],naiv:[2,3],naive_softmax:2,name:[1,2,3,10],nativ:1,natur:[2,7,11],necessari:[2,10],need:[1,2,3,9,10,13],nelement:2,nest:[3,8,9,12,13],net:[8,12],network:[7,8,11,12],neural:[7,8,11,12],neurosci:[7,11],newaxi:[3,9,13],next:[2,3,9,10,13],next_power_of_2:2,nightli:0,nip:[7,11],non:[7,9,10,11,13],none:[2,10],nonetheless:[9,13],normal:2,note:[0,1,2,3,8,9,10,12,13],notebook:[1,2,3,4],notic:[8,12],notori:[3,7,11],novel:[7,11],now:[1,2,3,9,13],num:2,num_warp:[2,3],number:[1,2,8,10,12],numer:[2,7,11],numpi:[9,13],nvidia:[3,7,10,11],object:[1,3,7,11],obvious:2,occur:[8,12],offer:[7,9,11,13],offici:0,offset:[1,3],often:3,omega:[8,12],onc:[2,7,8,10,11,12],one:[2,4,7,8,9,10,11,12,13],ones:[9,10,13],onli:[1,2,3,7,8,10,11,12],open:[9,13],opencl:[7,11],oper:[1,2,3,4,7,9,10,11,13],operand:[3,9,13],opportun:[7,11],ops:1,opsila:[7,11],opt:[1,2,3],optim:[7,8,9,10,11,12,13],order:[3,4,8,12],origin:[8,12],osdi:[7,11],other:[1,3,6,8,10,12],our:[1,2,3,7,9,10,11,13],out:[1,2,3,6,8,10,12],outlin:[8,10,12],outperform:[2,3],output:[1,2,3],over:[7,8,11,12],overach:1,pact:[8,10,12],pad:[2,9,10,13],paper:1,paradigm:[7,8,11,12],parallel:[1,2,3,6,7,8,9,11,12,13],paralleliz:[7,11],paramet:[1,3,8,9,10,12,13],parametr:[7,9,10,11,13],parenthes:3,pars:10,parser:0,part:[3,8,12],partial:[2,10],particular:[9,13],particularli:[7,8,10,11,12],partit:[7,11],pass:[1,3,8,9,12,13],past:[1,7,8,11,12],path:[1,3,10],pattern:[7,11],peak:[8,12],per:2,perf:3,perf_report:[1,2,3],perform:[1,2,7,8,9,10,11,12,13],pertain:[9,13],phase:[8,12],philosophi:[8,12],pid:[1,3],pid_m:3,pid_n:3,piec:3,pip:[0,3],pipelin:[7,8,11,12],platform:[6,8,12],pldi:[7,11],plot:[0,1,2,3],plot_nam:[1,2,3],pmatrix:[8,12],point:[8,9,12,13],pointer:[1,9,13],polli:[8,12],polyhedr:[7,11],polyhedra:[8,12],popular:[8,12],portabl:[1,7,8,11,12],pose:[7,11],possibl:[1,2,3,8,12],potenti:10,power:[2,8,12],ppopp:[8,12],practic:[1,2,3,7,11],pragma:[7,11],pre:[0,1,7,9,11,13],preced:[9,13],predic:[2,10],predict:[2,8,12],prefer:[1,2],premis:[7,11],prepar:10,preprocessor:10,present:[9,13],preserv:[8,10,12],pressur:3,pretti:[1,3,9,13],prevent:[8,12],previou:[2,10],primer:[8,12],primit:[1,7,9,11,13],principl:[8,9,12,13],print:[1,2,3],probabl:[3,8,12],problem:1,problemat:[8,10,12],procedur:[8,12],process:[1,3,7,8,11,12],processor:[1,7,9,11,13],product:[3,6,8,12],program:[1,2,3,7,11],program_id:3,program_id_m:3,program_id_n:3,programm:[7,8,9,11,12,13],project:[7,9,11,13],prologu:3,promot:[3,8,12],properli:2,properti:[8,12],propos:[7,11],proprietari:3,provid:[1,2,3,6,8,9,10,12,13],pseudo:[3,9,13],pssa:10,ptillet:[0,3],ptr:[1,9,13],publicli:3,purpos:[3,7,8,9,10,11,12,13],push:[8,12],put:3,putat:[8,12],puzzl:[9,13],pytest:0,python:[1,2,3,4],pytorch:[1,2],qquad:[8,12],queri:[9,13],quit:[2,9,13],ragan:[7,11],rand:[1,3],randn:[2,3],rang:[1,2,3,7,8,9,11,12,13],rapidli:[7,8,11,12],rate:3,rather:[3,7,11],raw:[1,10],rdom:[8,12],read:[2,3,4],reader:[8,12],readi:3,readonli:10,real:[7,11],reason:[3,8,9,12,13],reblock:10,recent:[3,7,10,11],recommend:4,recomput:[7,11],rectifi:[7,11],redmon2016:7,redmon:[7,11],reduc:10,reduct:[2,3,9,10,13],refer:1,regist:3,regrett:[7,11],regular:[8,12],rel:[1,8,12],relat:6,releas:[0,3,7,11],reli:[8,12],remain:[7,11],remateri:3,reorder:[8,12],replic:[3,9,10,13],repres:[8,12],represent:[0,6],requir:[2,8,9,12,13],research:[7,8,11,12],reshap:[9,10,13],resolut:[8,12],resolv:10,resourc:[7,11],resp:[8,12],respect:[8,12],restrict:[8,12],result:[0,1,2,7,8,10,11,12],ret:2,retriev:[1,8,9,12,13],reus:3,revisit:[7,11],right:[8,9,12,13],rise:[8,12],role:10,roughli:3,round:2,row:[2,3,9,13],rtol:3,rule:[9,13],run:[0,1,2,3,6,8,12],runtim:[0,8,12],rvar:[8,12],said:[8,12],same:[7,9,10,11,13],satisfi:[9,13],sato2019:8,sato:[8,12],save:[1,2,3],save_path:1,scalabl:[8,12],scalar:[7,9,10,11,13],scan:[8,12],schedul:[7,11],scienc:[8,12],scop:[8,12],scope:[8,12],script:[1,2,3],second:[1,2,3,8,9,12,13],section:[3,8,12],see:[1,2,3,8,12],seem:[1,8,12],select:[7,10,11],semant:[8,10,12],semi:[8,12],sens:[1,7,8,9,11,12,13],separ:[8,12],sequenc:[7,10,11],serial:10,set:[1,3,8,10,12],sever:[7,8,9,11,12,13],shall:[8,12],shape:[1,2,3,8,9,10,12,13],share:[2,7,9,10,11,13],shortcut:1,should:[1,3,7,8,11,12],show:3,show_plot:[1,2,3],shown:[1,8,9,10,12,13],side:[8,12],sight:[8,12],signal:[7,11],signifi:10,significantli:2,sigplan:[8,12],simd:[7,11],similar:[1,2,9,10,13],simpl:[1,2,10],simpler:[9,10,13],simplest:4,simpli:[3,8,12],simplifi:[9,10,13],sinc:[3,10],singl:[1,7,9,10,11,13],size:[1,3,8,10,12],slice:[9,13],slower:[7,8,11,12],slowest:[8,12],smaller:2,smallest:2,snemi3d:[7,11],softmax:[4,5],solid:[8,12],solut:3,solv:[8,10,12],some:[3,9,13],sometim:[8,12],sourc:[1,2,3,4,8,12],space:[7,8,11,12],spars:[7,8,11,12],spatial:[8,12],speak:3,spec:2,special:[7,9,10,11,13],specif:[3,7,11],specifi:[3,8,10,12],speed:2,sphinx:[1,2,3,4],split:[8,12],spmd:[1,7,8,9,11,12,13],src:3,ssa:10,stabil:2,stack:[9,13],standard:[1,8,12],start:4,started_tutori:5,state:[7,8,11,12],statement:[8,12],staticmethod:[1,2,3],step:[8,12],still:[8,9,12,13],store:[1,2,9,13],stoutchinin01:10,stoutchinin:10,straight:10,straightforward:3,strategi:[8,12],strength:[7,11],stress:[9,13],strict:[9,13],stride:[2,3],stride_a_0:3,stride_b_0:3,stride_c_0:3,stride_xm:2,stride_ym:2,string:[1,3],strongli:[9,13],struct:[9,13],structur:[7,8,9,11,12,13],sub:[9,13],subdirectori:3,subscript:[8,12],subsect:[9,13],substanti:[7,11],successfulli:[8,12],suffer:[8,12],suit:[7,11],suitabl:10,sum:[1,2,9,13],superhuman:[7,11],support:[0,8,9,10,12,13],sure:[1,2],surprisingli:[7,11],surround:[8,12],sutskev:[7,11],sutskever2014:7,swizzl:[7,11],symbol:10,synchron:[7,9,11,13],syntax:[1,2,9,10,13],system:[3,7,8,11,12],taco:[8,12],take:[0,1,3,6],taken:[8,12],tar:3,target:[7,11],techniqu:[7,8,11,12],tempor:[8,12],temporari:2,tend:[8,12],tension:[7,11],tensor:[1,3,7,8,9,11,12,13],tensorrt:[7,11],termin:10,ternari:[1,9,13],test:0,text:[8,12],tflop:3,than:[2,3,7,8,11,12],thei:[1,3,7,8,9,10,11,12,13],them:1,themselv:3,theoret:2,therebi:[8,12],therefor:[3,10],theta:[8,12],theta_:[8,12],thi:[0,1,2,3,7,8,9,10,11,12,13],thing:1,think:2,those:[0,1,9,13],though:[2,7,8,9,11,12,13],thread:[7,9,11,13],three:1,through:[3,4,8,10,12],throughout:[8,12],throughput:6,tile:[2,3,8,12],time:[1,2,3,7,8,9,10,11,12,13],tiramisu:[7,8,11,12],tmp:[0,3],tog:[8,12],togeth:3,too:2,tool:3,topic:[8,12],total:[1,2,3,5],tradit:[7,8,10,11,12],transcompil:[9,13],transfer:2,transform:[1,8,10,12],travers:[8,12],tree:10,trend:[7,11],trick:2,trigger:3,triton:[0,1,2,3,4,7,8,11,12],trivial:[7,11],true_addr:10,true_valu:10,ts1:10,tsn:10,tune:[2,8,12],tuner:[3,10],tupl:1,tutori:[0,1,2,3,6,9,13],tutorials_jupyt:4,tutorials_python:4,tvm:[7,8,11,12],twice:2,two:[1,2,3,8,9,12,13],type:[1,3,9,13],typic:[3,8,12],uint16:[9,13],uint32:[9,13],uint64:[9,13],uint8:[9,13],unclear:[9,13],uncommon:[8,12],underneath:[8,12],understand:2,unfortun:[3,8,12],unifi:[7,11],uninstal:3,union:[9,13],unit:[0,7,10,11],univers:[8,12],unless:[9,13],unlik:3,unnecessari:10,unrol:[8,12],until:[9,13],updat:[3,8,12],usag:[0,2],use:[0,1,2,3,7,8,9,10,11,12,13],used:[1,3,9,10,13],useful:[9,10,13],uses:[3,10],using:[1,3,7,8,9,10,11,12,13],usual:[9,10,13],util:1,v100:[3,8,12],val_fals:1,val_tru:1,valid:1,valu:[1,2,3,10],valuabl:2,vari:[9,13],variabl:[3,9,10,13],variant:[7,11],variou:4,vasilach:[7,8,11,12],vasilache2018:[7,8],vast:[8,12],vec:[8,12],vector:[2,4,5,7,8,10,11,12],vendor:3,veri:[2,8,12],verif:[8,12],verifi:[2,8,12],version:[3,10],via:[8,12],violat:10,visibl:[8,10,12],vision:[7,11],wai:[2,3,10],want:[0,1,2,3],warp:[2,9,10,13],wast:2,well:[7,8,11,12],were:[9,10,13],wget:3,what:1,when:[1,2,3,7,8,10,11,12],where:[2,3,8,9,10,12,13],whether:[1,7,11],which:[1,2,3,7,8,9,11,12,13],whose:[1,2,3,8,10,12],why:[9,13],wide:[8,12],width:3,wise:[1,2,9,10,13],wish:[3,8,12],within:10,without:[1,8,12],wolf:[8,12],wolfe1989:8,won:2,word:[1,8,12],work:[2,6,7,11],workload:3,wors:[7,8,11,12],would:[2,9,10,13],wouldn:[8,12],wrapper:3,write:[1,2,3,4,6,8,12],written:3,wrote:2,x86_64:3,x_1:[9,13],x_2:[9,13],x_log:1,x_max:2,x_name:[1,2,3],x_val:[1,2,3],xii:[8,12],xzvf:3,y_line:[1,2,3],y_name:[1,2,3],y_ref:2,y_tri:2,y_val:[1,2,3],year:[8,12],yet:[7,8,9,11,12,13],yii:[8,12],ylabel:[1,2,3],you:[0,1,2,3,4,7,11],your:[0,6],yourself:[2,3],zero:3,zip:4},titles:["Installation","Vector Addition","Fused Softmax","Matrix Multiplication","Tutorials","Computation times","Welcome to Triton\u2019s documentation!","Introduction","Related Work","The Triton-C Language","The Triton-IR Intermediate Representation","Introduction","Related Work","The Triton-C Language"],titleterms:{"final":3,"function":[3,10],The:[3,9,10,13],addit:1,advantag:[8,12],analysi:10,arithmet:3,auto:3,autograd:3,basic:10,benchmark:[1,2,3],binari:0,bind:[1,2,3],block:10,broadcast:[9,13],cach:3,challeng:[7,11],compil:[8,12],comput:[1,2,3,5],control:10,cutlass:3,dataflow:10,differ:[9,13],distribut:0,document:6,extens:[9,13],flow:10,from:0,fuse:2,get:6,guid:6,instal:[0,3],instruct:10,intermedi:10,introduct:[7,11],kernel:[1,2,3],languag:[8,9,12,13],level:10,limit:[8,12],matrix:3,model:[9,13],modul:10,motiv:[2,3,7,11],multipl:3,optim:3,packag:0,perform:3,pointer:3,polyhedr:[8,12],program:[6,8,9,10,12,13],python:0,refer:[7,8,10,11,12],relat:[8,12],represent:[8,10,12],restrict:[9,13],result:3,schedul:[8,12],semant:[9,13],softmax:2,sourc:0,squar:3,start:6,structur:10,test:[1,2,3],time:5,torch:[1,2,3],triton:[6,9,10,13],tune:3,tutori:4,type:10,unit:[1,2,3],vector:1,welcom:6,work:[8,12]}}) |