Files
triton/searchindex.js
2021-03-23 17:10:07 -04:00

1 line
18 KiB
JavaScript

Search.setIndex({docnames:["getting-started/installation","getting-started/tutorials/01-vector-add","getting-started/tutorials/02-fused-softmax","getting-started/tutorials/03-matrix-multiplication","getting-started/tutorials/index","getting-started/tutorials/sg_execution_times","index","programming-guide/chapter-1/introduction","programming-guide/chapter-2/related-work","programming-guide/chapter-3/triton-c","programming-guide/chapter-4/triton-ir"],envversion:{"sphinx.domains.c":2,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":3,"sphinx.domains.index":1,"sphinx.domains.javascript":2,"sphinx.domains.math":2,"sphinx.domains.python":2,"sphinx.domains.rst":2,"sphinx.domains.std":2,sphinx:56},filenames:["getting-started/installation.rst","getting-started/tutorials/01-vector-add.rst","getting-started/tutorials/02-fused-softmax.rst","getting-started/tutorials/03-matrix-multiplication.rst","getting-started/tutorials/index.rst","getting-started/tutorials/sg_execution_times.rst","index.rst","programming-guide/chapter-1/introduction.rst","programming-guide/chapter-2/related-work.rst","programming-guide/chapter-3/triton-c.rst","programming-guide/chapter-4/triton-ir.rst"],objects:{},objnames:{},objtypes:{},terms:{"0000":3,"1024":1,"10mn":2,"1250":3,"128":3,"182":3,"1823":2,"184":3,"185":3,"186":3,"188":3,"190":[3,5],"191":3,"192":3,"193":3,"194":3,"195":3,"196":3,"198":3,"1983":7,"1984":8,"1989":8,"199":3,"1991":[7,8],"1999":[8,10],"200":3,"2001":10,"2004":8,"2006":8,"2012":8,"2013":[7,10],"2014":7,"2016":[7,8],"2017":7,"2018":[7,8],"2019":[1,8],"202":3,"2021":[7,8],"2048":2,"2141":1,"220":3,"245":3,"2500":3,"256":[2,3],"2mn":2,"3076":1,"3713":1,"3750":3,"3mn":2,"4096":2,"4940":1,"5000":3,"502":[3,5],"512":3,"6250":3,"6724":1,"7500":3,"756":[1,5],"768":3,"781":2,"7mn":2,"8750":3,"896":3,"933":[2,5],"9733":1,"98432":1,"abstract":[7,8,10],"break":8,"byte":2,"case":[2,3,7,8,9,10],"class":[1,2,3,7,8,9],"export":3,"final":10,"float":[1,2,3,7,8,9],"function":[1,2,8,9],"import":[1,2,3,7,8,9],"int":[1,2,3,7,8,9],"return":[1,2,3,10],"short":3,"static":[7,8,9,10],"super":3,"switch":3,"true":[1,2,3],"typeof":3,"var":8,"void":[1,2,3],"while":[0,7,9],For:[1,3,7,8,9,10],One:[3,9],SMs:8,T_S:8,The:[1,2,6,7,8],These:[3,8,9,10],Used:[1,2,3],__expf:2,__global__:[1,2,3,9],_add:1,_align8:3,_dot:3,_softmax:2,_src:[1,2],a100:[3,8],abl:[3,8],about:[1,2,3,6],abov:[1,2,3,8,9],abs:1,absolut:3,academ:7,acc:[3,7,8],acceler:7,access:[1,7,8,10],accomod:[3,9],accordingli:8,account:[3,8],accumul:8,accuraci:7,achiev:[1,7,8,9],across:[7,8],activ:3,actual:[3,7,8],add:[1,5,9,10],added:[9,10],addit:[2,3,4,5,7,9,10],addition:[8,9],address:[2,7],adequ:10,adopt:8,advanc:7,advantag:1,advoc:8,affect:3,affin:8,after:[3,10],again:9,against:[1,2,3,6,10],aggreg:10,aggress:[2,7,8,10],agnost:[7,8],ahead:8,aim:6,algebra:8,algorithm:[3,7,8,9,10],alia:8,alias:10,align8:3,align:10,all:[2,3,4,7,8,9,10],allclos:[2,3],allen1984:8,allen:8,alloc:[1,7],allow:[1,2,7,8,10],almost:2,along:[1,9,10],also:[1,2,3,7,8,9,10],alwai:8,amd:7,amen:8,amount:[2,7],ampl:8,analysi:[7,8,9],analyz:8,ancourt1991:8,ancourt:8,ani:[2,3,8,9],anoth:[2,8,10],ansi:9,anywai:10,apart:8,api:[0,1],appli:[1,2,3,7,8],applic:[8,10],approach:[7,8,9],appropri:9,approxim:2,architectur:[3,7],area:8,arg:[1,2,3],argument:[1,2,3,9,10],arithmet:10,around:2,arrai:[1,8,9,10],arrang:3,art:[7,8],arxiv:[7,8],ask:2,aspect:[8,9],asplo:7,assembl:3,assert:[1,2,3],assign:10,associ:9,assum:2,ast:10,asynchron:7,atol:3,atom:[9,10],atomic_add:9,attribut:[1,9,10],auguin1983:7,auguin:7,auto:[2,8,10],autograd:[1,2],autom:7,automat:[2,3,7,8,9],autotun:[3,8],autotune_config:3,autotune_kei:3,autotune_v:3,avail:[3,7,8],awar:[7,9],axi:[1,2,3,9],back:[1,2,3],backend:10,backward:1,baghdadi2021:[7,8],baghdadi:[7,8],balanc:8,bandwidth:2,base:[7,8,9,10],basic:[1,3,4,8,9],becaus:[2,9],becom:7,been:[7,8],befor:[3,10],begin:8,behavior:8,being:10,believ:8,below:[1,4,8,9,10],bench:0,benchmark:0,benefit:[2,7,8,9,10],best:[1,3,7],better:[3,10],between:[1,7],bin:3,binari:9,bit:[2,10],block:[1,2,3,7,8,9],block_siz:8,blockidx:1,bodi:10,boil:8,bool:[1,2,9],both:[8,9],bound:[1,2,8],branch:[8,10],braun13:10,braun:10,broad:7,broadcast:10,build:[0,3],built:[1,3,8,9],c_0:3,c_1:3,c_2:3,cach:[1,2,7,8,9,10],call:[3,8,10],callabl:1,can:[0,1,2,3,7,8,9,10],cannot:[3,7,8,9],capabl:[2,6,7,9],carri:10,carter99:10,carter:10,cast:3,categori:9,cdiv:[1,3],ceil:1,center:10,certain:9,cfg:10,cgo:[7,8],chang:[3,9],chapter:[6,9,10],characterist:8,cheap:7,check:[1,2,6],chen2018:7,chen:7,chip:2,choic:6,chunk:1,cite:8,citizen:[1,9],click:[1,2,3],clone:[0,3],close:[8,10],cmake:[0,3],cmp:10,coalesc:[7,9],code:[1,2,3,4,7,8,9,10],codegen:0,coher:10,col:8,column:[2,3,9],com:[0,3,8],combin:7,come:[1,2,3,8],command:[0,3],comment:1,common:8,commonli:8,commun:9,compar:[2,3,6,8],compil:[0,1,2,3,6,7,9,10],complet:[8,9],complex:[8,9,10],compos:[7,10],composit:8,comprehens:[7,8],comput:[6,7,8],computation:[7,8],concern:8,concis:1,concurr:9,cond:[1,10],condit:[1,8,9,10],config:3,connectom:7,consecut:8,consequ:7,consid:2,consist:[2,10],constant:[9,10],constraint:[1,2,8,9],construct:[3,7,9,10],constructor:3,contain:[8,10],content:9,context:1,contextu:8,contigu:[1,3],contrari:1,control:[1,7,8,9],convers:9,convolut:7,coordin:1,copi:[1,7],core:[3,7,8,9],correct:1,correspond:[1,2,3,10],cost:8,could:[2,3,8,10],cours:[1,7],creat:[1,3,7,9,10],crucial:10,csv:1,ctx:[1,2,3],cubla:[3,7],cuda:[1,2,3,7,9],cudnn:7,current:[3,9],custom:[1,2,3,6],cutlass_include_dir:3,cutlass_library_dir:3,cutlass_library_kernel:3,cutlass_matmul:3,cutlass_tensorop_f16_s16816gemm_:3,cutlass_tensorop_f16_s884gemm_f16_:3,cvpr:7,dart:8,darte1999:8,data:[1,2,3,7,8,9,10],data_ptr:[1,2,3],dataflow:8,dblock:1,dcutlass_library_kernel:3,dcutlass_nvcc_archs_en:3,decad:7,declar:[1,9,10],decompos:[8,9],decor:1,decreas:3,deep:[3,7,8],def:[1,2,3],defin:[1,2,3,8,9,10],definit:[8,9,10],denom:2,denomin:2,dens:8,depend:8,deploi:7,dereferenc:[1,9],describ:[8,9,10],design:8,desir:[3,10],detail:[1,8,9],detect:7,develop:[7,8,9],devic:[1,2,3],dfg:10,dialect:8,dict:[1,2,3],diesel:8,differ:[1,2,3,7,8],difficult:8,difficulti:[3,7],dijkstra82:8,dim:8,dimens:[3,9,10],dimension:[3,8,9,10],dir:0,direct:0,directli:[0,9,10],discuss:9,disjoint:8,disk:1,dissert:8,distribut:8,diverg:10,divis:1,dnn:[6,7,8],do_bench:[1,2,3],document:[9,10],doe:[1,2,8,9],doesn:8,doing:[7,10],domain:[7,8],don:2,done:[0,3,7,9,10],door:9,dot:[3,10],doubl:9,doubli:3,doubt:8,down:8,download:[0,1,2,3,4],dram:2,driver:0,dsl:[6,7,8,10],dtype:[1,2,3],dure:3,each:[1,2,3,7,8,9,10],eas:8,easi:[1,3],easier:[1,2,7],education:2,effect:8,effici:[3,7,10],effort:8,egg:3,either:[9,10],elango2018:8,elango:8,element:[1,2,9,10],element_s:2,elementwis:2,ellips:9,emerg:7,emit:9,empti:[3,10],empty_lik:[1,2],enabl:8,encod:8,end:[7,8,9,10],enforc:8,engin:[8,9],ensur:8,entir:[2,8],environ:[3,6,10],equal:8,error:9,especi:7,etc:[3,9],euromicro:7,even:8,eventu:10,evidenc:7,evolv:7,exampl:[1,2,3,4,7,8,9,10],execut:[1,5,7,8,9,10],exemplifi:9,exist:[1,7,8,9],exp:2,expect:[1,2],expens:[7,8],expert:3,explor:7,exponenti:2,expos:[3,10],express:[7,8],extend:[3,9,10],extens:[1,10],extrem:8,f32_infin:2,facilit:[7,8,9],fact:[3,8],fairli:3,fall:[3,9],false_valu:10,familiar:9,far:2,fast:[2,7,8],faster:[2,3],fastest:8,featur:9,feel:3,fetch:7,few:[1,8,9],field:7,figur:8,file:[1,2,3,5],find:3,first:[1,6,8,9],fit:2,flexibl:7,float16:3,float32:[1,2,3],flow:[1,7,8,9],focu:[3,8],follow:[0,2,3,6,7,8,9,10],forbidden:9,forc:3,form:10,formal:[8,9],format:8,former:10,forward:[1,2,3,10],found:[0,9,10],foundat:8,fp16:3,framework:[7,8],free:3,from:[1,2,3,7,8,9,10],full:[1,2,3,10],fulli:8,func:8,fundament:8,further:8,fuse:[3,4,5],fusion:[2,8],futur:[3,9,10],galleri:[1,2,3,4],gbp:[1,2],gener:[1,2,3,4,7,8,10],geq:8,get:[1,2,3,5],get_program_id:[1,2,3,9],getelementptr:10,girbal2006:8,girbal:8,git:[0,3],github:[0,3],give:7,given:[2,3],global:[8,9,10],good:[1,3,8],gpgpu:7,gpu:[2,6,7,8,9,10],grammat:8,graph:10,graphic:7,greater:2,grid:[1,2,3],grid_m:3,grid_n:3,grosser2012:8,grosser:8,group:3,group_id:3,group_siz:3,grow:8,guard:[2,10],guess:2,guid:7,had:9,half:[3,9],halid:[7,8],hand:8,handl:[2,3,8,9,10],handwritten:[3,7],happen:3,hard:3,harder:8,hardwar:[3,6,8,9],has:[1,2,7,8],have:[2,7,8,9],haven:9,header:10,heavi:7,helper:2,henc:[3,10],here:[0,1,2,3],heurist:2,hide:9,hierarch:7,hierarchi:8,high:[3,7,8,10],higher:10,highest:10,highli:[3,7],highlight:[1,8],hint:[8,10],hit:3,hook:9,how:[1,2,3,6,7,9],howev:[3,8,9,10],http:[0,3],i32:10,idea:7,ident:[2,9],identifi:9,ids:9,idx:10,imag:[7,8],imper:[6,9],implement:[1,2,3,7,8,9,10],implicitli:[3,9,10],importantli:8,impos:8,improv:3,includ:[3,10],incompat:[3,8,9],increas:1,incred:7,increment:8,inde:8,independ:[8,10],index:2,indic:[2,8,10],individu:10,induc:8,industri:7,inequ:8,infanc:9,inform:8,infrastructur:8,initi:[3,9],inlin:10,inner:3,input:[1,2,3,8,10],instal:6,instanc:[2,3,7,9,10],instead:[1,2,3],instrins:9,instruct:[6,7,9],int16:9,int32:[3,9],int64:[3,9],int8:9,integ:[8,9,10],integr:0,inter:9,interchang:8,interdepend:10,interest:[0,3,7,8],intermedi:[0,6,8,9],intern:[0,2,8],interpret:10,intra:10,intrins:8,introduc:[3,9,10],introduct:[6,9],intuit:9,invari:8,ipynb:[1,2,3],irregular:[2,8],is_contigu:3,issu:[7,8],iter:[3,7,8],its:[2,8,9,10],itself:[9,10],jit:[9,10],journal:8,jrk2013:7,jupyt:[1,2,3,4],just:[1,2,3,8,9],kei:[1,2,3,7],kellei:7,kernel:[6,7,9],kick:3,kind:9,kitwar:3,known:[8,10],label:[1,2,3],lam1991:7,lam:7,lambda:[1,2,3],languag:[1,6,7],larg:[2,7,8],larger:2,later:8,latest:0,lattner2004:8,lattner2019:8,lattner:8,launch:[1,2,3],law:8,layer:[7,8],lda:3,ldb:3,ldc:3,lead:[7,8,9],learn:[1,2,3,6,7,8],least:8,lee2017:7,lee:7,left:[8,9],legal:8,less:[2,7,8],let:2,letter:8,level:[3,7,8,9],lib:3,librari:[3,7,8],lies:7,lifelong:8,like:[1,7,8,9],limit:[2,9],line:[1,2,3,8,10],linear:[7,8],linkag:10,linker:10,linux:3,list:[3,10],literatur:10,litteratur:8,live:3,llvm:[0,8,10],load:[1,2,8,9,10],local:[7,8],locat:3,logarithm:1,longer:2,look:[2,6,7],loop:[3,8,9,10],lot:10,low:8,lowest:9,lstlist:9,machin:[7,8],machineri:[7,8],macro:[1,9],made:[7,9],mai:[0,1,8,9,10],main:[3,7,8,9],maintain:[1,2,8],major:[3,8],make:[0,1,2,3,7,8,10],make_add_kernel:1,make_kernel:[2,3],manag:[7,9],mani:[1,7,8,9,10],manual:[2,8],manual_se:[1,2],mapl:[1,8],markedli:7,mask:9,masked_load:10,masked_stor:10,match:3,mathbb:8,mathbf:8,mathcal:8,mathemat:8,matmul:[3,8],matric:[2,3],matrix:[2,4,5,7,8,9,10],matrix_s:8,matter:[1,3,7,8],max:[1,2,9],max_group_s:3,max_m:[1,2,3],maxim:[6,8],maximum:[1,9],mean:[2,3,8,9,10],mechan:[2,8],memori:[1,2,3,7,8,9,10],mention:[2,3],merg:10,metadata:10,method:[1,8,9],methodolog:8,micro:[7,10],might:9,min:[2,3,9],min_m:[1,2,3],minut:[0,1,2,3],miscellan:10,miss:8,mitig:8,mixtur:3,mkdir:[0,3],mlir:8,model:[1,7,8],modern:[3,6,7,8,9],modifi:[1,3,9],modular:8,moor:8,more:[1,2,6,7,8,10],most:[3,8],move:2,much:10,mullapudi2016:8,mullapudi:8,multi:[3,7,8,9,10],multipl:[1,4,5,7,8,9,10],multipli:8,must:[2,3,9,10],naiv:[2,3],naive_softmax:2,name:[1,2,3,10],nativ:1,natur:[2,7],necessari:[2,10],need:[1,2,3,9,10],nelement:2,nest:[3,8,9],net:8,network:[7,8],neural:[7,8],neurosci:7,newaxi:[3,9],next:[2,3,9,10],next_power_of_2:2,nightli:0,nip:7,non:[7,9,10],none:[2,10],nonetheless:9,normal:2,note:[0,1,2,3,8,9,10],notebook:[1,2,3,4],notic:8,notori:[3,7],novel:7,now:[1,2,3,9],num:2,num_warp:[2,3],number:[1,2,8,10],numer:[2,7],numpi:9,nvidia:[3,7,10],object:[1,3,7],obvious:2,occur:8,offer:[7,9],offici:0,offset:[1,3],often:3,omega:8,onc:[2,7,8,10],one:[2,4,7,8,9,10],ones:[9,10],onli:[1,2,3,7,8,10],open:9,opencl:7,oper:[1,2,3,4,7,9,10],operand:[3,9],opportun:7,ops:1,opsila:7,opt:[1,2,3],optim:[7,8,9,10],order:[3,4,8],origin:8,osdi:7,other:[1,3,6,8,10],our:[1,2,3,7,9,10],out:[1,2,3,6,8,10],outlin:[8,10],outperform:[2,3],output:[1,2,3],over:[7,8],overach:1,pact:[8,10],pad:[2,9,10],paper:1,paradigm:[7,8],parallel:[1,2,3,6,7,8,9],paralleliz:7,paramet:[1,3,8,9,10],parametr:[7,9,10],parenthes:3,pars:10,parser:0,part:[3,8],partial:[2,10],particular:9,particularli:[7,8,10],partit:7,pass:[1,3,8,9],past:[1,7,8],path:[1,3,10],pattern:7,peak:8,per:2,perf:3,perf_report:[1,2,3],perform:[1,2,7,8,9,10],pertain:9,phase:8,philosophi:8,pid:[1,3],pid_m:3,pid_n:3,piec:3,pip:[0,3],pipelin:[7,8],platform:[6,8],pldi:7,plot:[0,1,2,3],plot_nam:[1,2,3],pmatrix:8,point:[8,9],pointer:[1,9],polli:8,polyhedr:7,polyhedra:8,popular:8,portabl:[1,7,8],pose:7,possibl:[1,2,3,8],potenti:10,power:[2,8],ppopp:8,practic:[1,2,3,7],pragma:7,pre:[0,1,7,9],preced:9,predic:[2,10],predict:[2,8],prefer:[1,2],premis:7,prepar:10,preprocessor:10,present:9,preserv:[8,10],pressur:3,pretti:[1,3,9],prevent:8,previou:[2,10],primer:8,primit:[1,7,9],principl:[8,9],print:[1,2,3],probabl:[3,8],problem:1,problemat:[8,10],procedur:8,process:[1,3,7,8],processor:[1,7,9],product:[3,6,8],program:[1,2,3,7],program_id:3,program_id_m:3,program_id_n:3,programm:[7,8,9],project:[7,9],prologu:3,promot:[3,8],properli:2,properti:8,propos:7,proprietari:3,provid:[1,2,3,6,8,9,10],pseudo:[3,9],pssa:10,ptillet:[0,3],ptr:[1,9],publicli:3,purpos:[3,7,8,9,10],push:8,put:3,putat:8,puzzl:9,pytest:0,python:[1,2,3,4],pytorch:[1,2],qquad:8,queri:9,quit:[2,9],ragan:7,rand:[1,3],randn:[2,3],rang:[1,2,3,7,8,9],rapidli:[7,8],rate:3,rather:[3,7],raw:[1,10],rdom:8,read:[2,3,4],reader:8,readi:3,readonli:10,real:7,reason:[3,8,9],reblock:10,recent:[3,7,10],recommend:4,recomput:7,rectifi:7,redmon2016:7,redmon:7,reduc:10,reduct:[2,3,9,10],refer:1,regist:3,regrett:7,regular:8,rel:[1,8],relat:6,releas:[0,3,7],reli:8,remain:7,remateri:3,reorder:8,replic:[3,9,10],repres:8,represent:[0,6],requir:[2,8,9],research:[7,8],reshap:[9,10],resolut:8,resolv:10,resourc:7,resp:8,respect:8,restrict:8,result:[0,1,2,7,8,10],ret:2,retriev:[1,8,9],reus:3,revisit:7,right:[8,9],rise:8,role:10,roughli:3,round:2,row:[2,3,9],rtol:3,rule:9,run:[0,1,2,3,6,8],runtim:[0,8],rvar:8,said:8,same:[7,9,10],satisfi:9,sato2019:8,sato:8,save:[1,2,3],save_path:1,scalabl:8,scalar:[7,9,10],scan:8,schedul:7,scienc:8,scop:8,scope:8,script:[1,2,3],second:[1,2,3,8,9],section:[3,8],see:[1,2,3,8],seem:[1,8],select:[7,10],semant:[8,10],semi:8,sens:[1,7,8,9],separ:8,sequenc:[7,10],serial:10,set:[1,3,8,10],sever:[7,8,9],shall:8,shape:[1,2,3,8,9,10],share:[2,7,9,10],shortcut:1,should:[1,3,7,8],show:3,show_plot:[1,2,3],shown:[1,8,9,10],side:8,sight:8,signal:7,signifi:10,significantli:2,sigplan:8,simd:7,similar:[1,2,9,10],simpl:[1,2,10],simpler:[9,10],simplest:4,simpli:[3,8],simplifi:[9,10],sinc:[3,10],singl:[1,7,9,10],size:[1,3,8,10],slice:9,slower:[7,8],slowest:8,smaller:2,smallest:2,snemi3d:7,softmax:[4,5],solid:8,solut:3,solv:[8,10],some:[3,9],sometim:8,sourc:[1,2,3,4,8],space:[7,8],spars:[7,8],spatial:8,speak:3,spec:2,special:[7,9,10],specif:[3,7],specifi:[3,8,10],speed:2,sphinx:[1,2,3,4],split:8,spmd:[1,7,8,9],src:3,ssa:10,stabil:2,stack:9,standard:[1,8],start:4,started_tutori:5,state:[7,8],statement:8,staticmethod:[1,2,3],step:8,still:[8,9],store:[1,2,9],stoutchinin01:10,stoutchinin:10,straight:10,straightforward:3,strategi:8,strength:7,stress:9,strict:9,stride:[2,3],stride_a_0:3,stride_b_0:3,stride_c_0:3,stride_xm:2,stride_ym:2,string:[1,3],strongli:9,struct:9,structur:[7,8,9],sub:9,subdirectori:3,subscript:8,subsect:9,substanti:7,successfulli:8,suffer:8,suit:7,suitabl:10,sum:[1,2,9],superhuman:7,support:[0,8,9,10],sure:[1,2],surprisingli:7,surround:8,sutskev:7,sutskever2014:7,swizzl:7,symbol:10,synchron:[7,9],syntax:[1,2,9,10],system:[3,7,8],taco:8,take:[0,1,3,6],taken:8,tar:3,target:7,techniqu:[7,8],tempor:8,temporari:2,tend:8,tension:7,tensor:[1,3,7,8,9],tensorrt:7,termin:10,ternari:[1,9],test:0,text:8,tflop:3,than:[2,3,7,8],thei:[1,3,7,8,9,10],them:1,themselv:3,theoret:2,therebi:8,therefor:[3,10],theta:8,theta_:8,thi:[0,1,2,3,7,8,9,10],thing:1,think:2,those:[0,1,9],though:[2,7,8,9],thread:[7,9],three:1,through:[3,4,8,10],throughout:8,throughput:6,tile:[2,3,8],time:[1,2,3,7,8,9,10],tiramisu:[7,8],tmp:[0,3],tog:8,togeth:3,too:2,tool:3,topic:8,total:[1,2,3,5],tradit:[7,8,10],transcompil:9,transfer:2,transform:[1,8,10],travers:8,tree:10,trend:7,trick:2,trigger:3,triton:[0,1,2,3,4,7,8],trivial:7,true_addr:10,true_valu:10,ts1:10,tsn:10,tune:[2,8],tuner:[3,10],tupl:1,tutori:[0,1,2,3,6,9],tutorials_jupyt:4,tutorials_python:4,tvm:[7,8],twice:2,two:[1,2,3,8,9],type:[1,3,9],typic:[3,8],uint16:9,uint32:9,uint64:9,uint8:9,unclear:9,uncommon:8,underneath:8,understand:2,unfortun:[3,8],unifi:7,uninstal:3,union:9,unit:[0,7,10],univers:8,unless:9,unlik:3,unnecessari:10,unrol:8,until:9,updat:[3,8],usag:[0,2],use:[0,1,2,3,7,8,9,10],used:[1,3,9,10],useful:[9,10],uses:[3,10],using:[1,3,7,8,9,10],usual:[9,10],util:1,v100:[3,8],val_fals:1,val_tru:1,valid:1,valu:[1,2,3,10],valuabl:2,vari:9,variabl:[3,9,10],variant:7,variou:4,vasilach:[7,8],vasilache2018:[7,8],vast:8,vec:8,vector:[2,4,5,7,8,10],vendor:3,veri:[2,8],verif:8,verifi:[2,8],version:[3,10],via:8,violat:10,visibl:[8,10],vision:7,wai:[2,3,10],want:[0,1,2,3],warp:[2,9,10],wast:2,well:[7,8],were:[9,10],wget:3,what:1,when:[1,2,3,7,8,10],where:[2,3,8,9,10],whether:[1,7],which:[1,2,3,7,8,9],whose:[1,2,3,8,10],why:9,wide:8,width:3,wise:[1,2,9,10],wish:[3,8],within:10,without:[1,8],wolf:8,wolfe1989:8,won:2,word:[1,8],work:[2,6,7],workload:3,wors:[7,8],would:[2,9,10],wouldn:8,wrapper:3,write:[1,2,3,4,6,8],written:3,wrote:2,x86_64:3,x_1:9,x_2:9,x_log:1,x_max:2,x_name:[1,2,3],x_val:[1,2,3],xii:8,xzvf:3,y_line:[1,2,3],y_name:[1,2,3],y_ref:2,y_tri:2,y_val:[1,2,3],year:8,yet:[7,8,9],yii:8,ylabel:[1,2,3],you:[0,1,2,3,4,7],your:[0,6],yourself:[2,3],zero:3,zip:4},titles:["Installation","Vector Addition","Fused Softmax","Matrix Multiplication","Tutorials","Computation times","Welcome to Triton\u2019s documentation!","Introduction","Related Work","The Triton-C Language","The Triton-IR Intermediate Representation"],titleterms:{"final":3,"function":[3,10],The:[3,9,10],addit:1,advantag:8,analysi:10,arithmet:3,auto:3,autograd:3,basic:10,benchmark:[1,2,3],binari:0,bind:[1,2,3],block:10,broadcast:9,cach:3,challeng:7,compil:8,comput:[1,2,3,5],control:10,cutlass:3,dataflow:10,differ:9,distribut:0,document:6,extens:9,flow:10,from:0,fuse:2,get:6,guid:6,instal:[0,3],instruct:10,intermedi:10,introduct:7,kernel:[1,2,3],languag:[8,9],level:10,limit:8,matrix:3,model:9,modul:10,motiv:[2,3,7],multipl:3,optim:3,packag:0,perform:3,pointer:3,polyhedr:8,program:[6,8,9,10],python:0,refer:[7,8,10],relat:8,represent:[8,10],restrict:9,result:3,schedul:8,semant:9,softmax:2,sourc:0,squar:3,start:6,structur:10,test:[1,2,3],time:5,torch:[1,2,3],triton:[6,9,10],tune:3,tutori:4,type:10,unit:[1,2,3],vector:1,welcom:6,work:8}})