Files
triton/searchindex.js
2021-03-19 16:19:50 -04:00

1 line
17 KiB
JavaScript

Search.setIndex({docnames:["getting-started/installation","getting-started/tutorials/01-vector-add","getting-started/tutorials/02-fused-softmax","getting-started/tutorials/03-matrix-multiplication","getting-started/tutorials/index","getting-started/tutorials/sg_execution_times","index","programming-guide/c-language-contractions","programming-guide/c-language-extensions","programming-guide/introduction","programming-guide/performance-optimization","programming-guide/programming-interface","programming-guide/programming-model","programming-guide/related-work","programming-guide/triton-c"],envversion:{"sphinx.domains.c":2,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":3,"sphinx.domains.index":1,"sphinx.domains.javascript":2,"sphinx.domains.math":2,"sphinx.domains.python":2,"sphinx.domains.rst":2,"sphinx.domains.std":2,sphinx:56},filenames:["getting-started/installation.rst","getting-started/tutorials/01-vector-add.rst","getting-started/tutorials/02-fused-softmax.rst","getting-started/tutorials/03-matrix-multiplication.rst","getting-started/tutorials/index.rst","getting-started/tutorials/sg_execution_times.rst","index.rst","programming-guide/c-language-contractions.rst","programming-guide/c-language-extensions.rst","programming-guide/introduction.rst","programming-guide/performance-optimization.rst","programming-guide/programming-interface.rst","programming-guide/programming-model.rst","programming-guide/related-work.rst","programming-guide/triton-c.rst"],objects:{},objnames:{},objtypes:{},terms:{"000":5,"0000":3,"094":[3,5],"1024":1,"10mn":2,"1250":3,"128":3,"175":3,"180":3,"181":3,"182":3,"1823":2,"185":3,"186":3,"187":3,"188":3,"189":3,"190":3,"191":3,"192":3,"194":3,"195":3,"196":3,"197":3,"198":3,"1983":9,"1984":13,"1989":13,"199":3,"1991":[9,13],"1999":13,"200":3,"2006":13,"201":3,"2012":13,"2013":9,"2014":9,"2016":[9,13],"2017":9,"2018":[9,13],"2019":[1,13],"2021":[9,13],"2048":2,"2141":1,"220":3,"245":3,"2500":3,"256":[2,3],"299":2,"2mn":2,"3076":1,"3713":1,"3750":3,"3mn":2,"4096":2,"442":1,"4940":1,"5000":3,"512":3,"6250":3,"6724":1,"7500":3,"768":3,"781":2,"7mn":2,"8750":3,"896":3,"9733":1,"98432":1,"abstract":[9,13],"break":13,"byte":2,"case":[2,3,9,13,14],"class":[1,2,3,9,13,14],"export":3,"float":[1,2,3,9,13,14],"function":[1,2,13,14],"import":[1,2,3,9,13,14],"int":[1,2,3,9,13,14],"return":[1,2,3],"short":3,"static":[9,13,14],"super":3,"switch":3,"true":[1,2,3],"typeof":3,"var":13,"void":[1,2,3],"while":[0,9,14],For:[1,3,9,13,14],One:[3,14],SMs:13,T_S:13,The:[1,2,6,9,13],These:[3,13,14],Used:[1,2,3],__expf:2,__global__:[1,2,3,14],_add:1,_align8:3,_dot:3,_softmax:2,_src:[1,2],a100:[3,13],abl:[3,13],about:[1,2,3,6],abov:[1,2,3,13,14],abs:1,absolut:3,academ:9,acc:[3,9,13],acceler:9,access:[1,9,13],accomod:[3,14],accordingli:13,account:[3,13],accumul:13,accuraci:9,achiev:[1,9,13,14],across:[9,13],activ:3,actual:[3,9,13],add:[1,5,14],added:14,addit:[2,3,4,5,9,14],addition:[13,14],address:[2,9],adopt:13,advanc:9,advantag:1,advoc:13,affect:3,affin:13,after:3,again:14,against:[1,2,3,6],aggress:[2,9,13],agnost:[9,13],ahead:13,aim:6,algebra:13,algorithm:[3,9,13,14],alia:13,align8:3,all:[2,3,4,9,13,14],allclos:[2,3],allen:13,alloc:[1,9],allow:[1,2,9,13],almost:2,along:[1,14],also:[0,1,2,3,9,13,14],alwai:13,amd:9,amen:13,amount:[2,9],ampl:13,analysi:[9,13,14],analyz:13,ancourt:13,ani:[2,3,13,14],anoth:[2,13],ansi:14,apart:13,api:[0,1],appli:[1,2,3,9,13],applic:13,approach:[9,13,14],appropri:14,approxim:2,apt:0,architectur:[3,9],area:13,arg:[1,2,3],argument:[1,2,3,14],around:2,arrai:[1,13,14],arrang:3,art:[9,13],arxiv:[9,13],ask:2,aspect:[13,14],asplo:9,assembl:3,assert:[1,2,3],associ:14,assum:2,asynchron:9,atol:3,atom:14,atomic_add:14,attribut:[1,14],auguin:9,auto:[2,13],autograd:[1,2],autom:9,automat:[2,3,9,13,14],autotun:[3,13],autotune_config:3,autotune_kei:3,autotune_v:3,avail:[3,9,13],awar:[9,14],axi:[1,2,3,14],back:[1,2,3],backward:1,baghdadi:[9,13],balanc:13,bandwidth:2,base:[9,13,14],basic:[1,3,4,13,14],becaus:[2,14],becom:9,been:[9,13],befor:3,begin:13,behavior:13,believ:13,below:[1,4,13,14],bench:0,benchmark:0,benefit:[2,9,13,14],best:[1,3,9],better:3,between:[1,9],bin:3,binari:[0,14],bit:2,block:[1,2,3,9,13,14],block_siz:13,blockidx:1,boil:13,bool:[1,2,14],both:[13,14],bound:[1,2,13],branch:13,broad:9,build:[0,3],built:[1,3,13,14],c_0:3,c_1:3,c_2:3,cach:[1,2,9,13,14],call:[3,13],callabl:1,can:[0,1,2,3,9,13,14],cannot:[3,9,13,14],capabl:[2,6,9,14],cast:3,categori:14,cdiv:[1,3],ceil:1,certain:14,cgo:[9,13],chang:[3,14],chapter:[6,14],characterist:13,cheap:9,check:[1,2,6],chen:9,chip:2,choic:6,chunk:1,cite:13,citizen:[1,14],click:[1,2,3],clone:[0,3],close:13,cmake:[0,3],coalesc:[9,14],code:[1,2,3,4,9,13,14],codegen:0,col:13,column:[2,3,14],com:[0,3,13],combin:9,come:[1,2,3,13],command:[0,3],comment:1,common:13,commonli:13,commun:14,compar:[2,3,6,13],compil:[1,2,3,6,9,14],complet:[13,14],complex:[13,14],compos:9,composit:13,comprehens:[9,13],comput:[6,9,13],computation:[9,13],concern:13,concis:1,concurr:14,cond:1,condit:[1,13,14],config:[0,3],connectom:9,consecut:13,consequ:9,consid:2,consist:2,constant:14,constraint:[1,2,13,14],construct:[3,9,14],constructor:3,contain:13,content:14,context:1,contextu:13,contigu:[1,3],contrari:1,control:[1,9,13,14],convers:14,convolut:9,coordin:1,copi:[1,9],core:[3,9,13,14],correct:1,correspond:[1,2,3],cost:13,could:[2,3,13],cours:[1,9],creat:[1,3,9,14],csv:1,ctx:[1,2,3],cubla:[3,9],cuda:[1,2,3,9,14],cudnn:9,current:[3,14],custom:[0,1,2,3,6],cutlass_include_dir:3,cutlass_library_dir:3,cutlass_library_kernel:3,cutlass_matmul:3,cutlass_tensorop_f16_s16816gemm_:3,cutlass_tensorop_f16_s884gemm_f16_:3,cvpr:9,dart:13,data:[1,2,3,9,13,14],data_ptr:[1,2,3],dataflow:13,dblock:1,dcutlass_library_kernel:3,dcutlass_nvcc_archs_en:3,decad:9,declar:[1,14],decompos:[13,14],decor:1,decreas:3,deep:[3,9,13],def:[1,2,3],defin:[1,2,3,13,14],definit:[13,14],denom:2,denomin:2,dens:13,depend:13,deploi:9,dereferenc:[1,14],describ:[13,14],design:13,desir:3,detail:[1,13,14],detect:9,dev:0,develop:[9,13,14],devic:[1,2,3],dialect:13,dict:[1,2,3],diesel:13,differ:[1,2,3,9,13],difficult:13,difficulti:[3,9],dijkstra82:13,dim:13,dimens:[3,14],dimension:[3,13,14],dir:0,direct:0,directli:[0,14],discuss:14,disjoint:13,disk:1,dissert:13,distribut:13,divis:1,dllvm_config:0,dnn:[6,9,13],do_bench:[1,2,3],document:14,doe:[1,2,13,14],doesn:13,doing:9,domain:[9,13],don:2,done:[0,3,9,14],door:14,dot:3,doubl:14,doubli:3,doubt:13,down:13,download:[1,2,3,4],dram:2,driver:0,dsl:[6,9,13],dtype:[1,2,3],dure:3,each:[1,2,3,9,13,14],eas:13,easi:[1,3],easier:[1,2,9],education:2,effect:13,effici:[3,9],effort:13,egg:3,either:14,elango:13,element:[1,2,14],element_s:2,elementwis:2,ellips:14,emerg:9,emit:14,empti:3,empty_lik:[1,2],enabl:13,encod:13,end:[9,13,14],enforc:13,engin:[13,14],ensur:13,entir:[2,13],environ:[3,6],equal:13,error:14,especi:9,etc:[3,14],euromicro:9,even:13,evidenc:9,evolv:9,exampl:[1,2,3,4,9,13,14],execut:[1,5,9,13,14],exemplifi:14,exist:[1,9,13,14],exp:2,expect:[1,2],expens:[9,13],expert:3,explor:9,exponenti:2,expos:3,express:[9,13],extend:[3,14],extens:1,extrem:13,f32_infin:2,facilit:[9,13,14],fact:[3,13],fairli:3,fall:[3,14],familiar:14,far:2,fast:[2,9,13],faster:[2,3],fastest:13,featur:14,feel:3,fetch:9,few:[1,13,14],field:9,figur:13,file:[1,2,3,5],find:3,first:[1,6,13,14],fit:2,flexibl:9,float16:3,float32:[1,2,3],flow:[1,9,13,14],focu:[3,13],follow:[0,2,3,6,9,13,14],forbidden:14,forc:3,formal:[13,14],format:13,forward:[1,2,3],found:[0,14],foundat:13,fp16:3,framework:[9,13],free:3,from:[1,2,3,9,13,14],full:[1,2,3],fulli:13,func:13,fundament:13,further:13,fuse:[3,4,5],fusion:[2,13],futur:[3,14],galleri:[1,2,3,4],gbp:[1,2],gener:[1,2,3,4,9,13],geq:13,get:[0,1,2,3,5],get_program_id:[1,2,3,14],girbal:13,git:[0,3],github:[0,3],give:9,given:[2,3],global:[13,14],good:[1,3,13],gpgpu:9,gpu:[2,6,9,13,14],grammat:13,graphic:9,greater:2,grid:[1,2,3],grid_m:3,grid_n:3,grosser:13,group:3,group_id:3,group_siz:3,grow:13,guard:2,guess:2,guid:9,had:14,half:[3,14],halid:[9,13],hand:13,handl:[2,3,13,14],handwritten:[3,9],happen:3,hard:3,harder:13,hardwar:[3,6,13,14],has:[1,2,9,13],have:[2,9,13,14],haven:14,heavi:9,helper:2,henc:3,here:[0,1,2,3],heurist:2,hide:14,hierarch:9,hierarchi:13,high:[3,9,13],highli:[3,9],highlight:[1,13],hint:13,hit:3,hook:14,how:[1,2,3,6,9,14],howev:[3,13,14],http:[0,3],idea:9,ident:[2,14],identifi:14,ids:14,imag:[9,13],imper:[6,14],implement:[1,2,3,9,13,14],implicitli:[3,14],importantli:13,impos:13,improv:3,includ:3,incompat:[3,13,14],increas:1,incred:9,increment:13,inde:13,independ:13,index:2,indic:[2,13],induc:13,industri:9,inequ:13,infanc:14,inform:13,infrastructur:13,initi:[3,14],inner:3,input:[1,2,3,13],instal:6,instanc:[2,3,9,14],instead:[1,2,3],instrins:14,instruct:[6,9,14],int16:14,int32:[3,14],int64:[3,14],int8:14,integ:[13,14],integr:0,inter:14,interchang:13,interest:[0,3,9,13],intermedi:[0,13,14],intern:[0,2,13],intrins:13,introduc:[3,14],introduct:[6,14],intuit:14,invari:13,ipynb:[1,2,3],irregular:[2,13],is_contigu:3,issu:[9,13],iter:[3,9,13],its:[2,13,14],itself:14,jit:14,journal:13,jupyt:[1,2,3,4],just:[1,2,3,13,14],kei:[1,2,3,9],kellei:9,kernel:[6,9,14],kick:3,kind:14,kitwar:3,known:13,label:[1,2,3],lam:9,lambda:[1,2,3],languag:[1,6,9],larg:[2,9,13],larger:2,later:13,lattner:13,launch:[1,2,3],law:13,layer:[9,13],lda:3,ldb:3,ldc:3,lead:[9,13,14],learn:[1,2,3,6,9,13],least:13,lee:9,left:[13,14],legal:13,less:[2,9,13],let:2,letter:13,level:[3,9,13,14],lib:3,librari:[3,9,13],lies:9,lifelong:13,like:[1,9,13,14],limit:[2,14],line:[1,2,3,13],linear:[9,13],linux:3,list:3,litteratur:13,live:3,llvm:[0,13],load:[1,2,13,14],local:[9,13],locat:3,logarithm:1,longer:2,look:[2,6,9],loop:[3,13,14],low:13,lowest:14,lstlist:14,machin:[9,13],machineri:[9,13],macro:[1,14],made:[9,14],mai:[0,1,13,14],main:[3,9,13,14],maintain:[1,2,13],major:[3,13],make:[0,1,2,3,9,13],make_add_kernel:1,make_kernel:[2,3],manag:[9,14],mani:[1,9,13,14],manual:[2,13],manual_se:[1,2],mapl:[1,13],markedli:9,mask:14,match:3,mathbb:13,mathbf:13,mathcal:13,mathemat:13,matmul:[3,13],matric:[2,3],matrix:[2,4,5,9,13,14],matrix_s:13,matter:[1,3,9,13],max:[1,2,14],max_group_s:3,max_m:[1,2,3],maxim:[6,13],maximum:[1,14],mean:[2,3,13,14],mechan:[2,13],memori:[1,2,3,9,13,14],mention:[2,3],method:[1,13,14],methodolog:13,micro:9,might:14,min:[2,3,14],min_m:[1,2,3],minut:[1,2,3],miss:13,mitig:13,mixtur:3,mkdir:[0,3],mlir:13,model:[1,9,13],modern:[3,6,9,13,14],modifi:[1,3,14],modular:13,moor:13,more:[1,2,6,9,13],most:[3,13],move:2,mullapudi:13,multi:[3,9,13,14],multipl:[1,4,5,9,13,14],multipli:13,must:[2,3,14],naiv:[2,3],naive_softmax:2,name:[1,2,3],nativ:1,natur:[2,9],necessari:2,need:[1,2,3,14],nelement:2,nest:[3,13,14],net:13,network:[9,13],neural:[9,13],neurosci:9,newaxi:[3,14],next:[2,3,14],next_power_of_2:2,nip:9,non:[9,14],none:2,nonetheless:14,normal:2,note:[0,1,2,3,13,14],notebook:[1,2,3,4],notic:13,notori:[3,9],novel:9,now:[1,2,3,14],num:2,num_warp:[2,3],number:[1,2,13],numer:[2,9],numpi:14,nvidia:[3,9],object:[1,3,9],obvious:2,occur:13,offer:[9,14],offici:0,offset:[1,3],often:3,omega:13,onc:[2,9,13],one:[2,4,9,13,14],ones:14,onli:[1,2,3,9,13],open:14,opencl:9,oper:[1,2,3,4,9,14],operand:[3,14],opportun:9,ops:1,opsila:9,opt:[1,2,3],optim:[9,13,14],order:[3,4,13],origin:13,osdi:9,other:[1,3,6,13],our:[1,2,3,9,14],out:[1,2,3,6,13],outlin:13,outperform:[2,3],output:[1,2,3],over:[9,13],overach:1,pact:13,pad:[2,14],paper:1,paradigm:[9,13],parallel:[1,2,3,6,9,13,14],paralleliz:9,paramet:[1,3,13,14],parametr:[9,14],parenthes:3,parser:0,part:[3,13],partial:2,particular:14,particularli:[9,13],partit:9,pass:[1,3,13,14],past:[1,9,13],path:[0,1],pattern:9,peak:13,per:2,perf:3,perf_report:[1,2,3],perform:[1,2,9,13,14],pertain:14,phase:13,philosophi:13,pid:[1,3],pid_m:3,pid_n:3,piec:3,pip:[0,3],pipelin:[9,13],platform:[6,13],pldi:9,plot:[0,1,2,3],plot_nam:[1,2,3],pmatrix:13,point:[13,14],pointer:[1,14],polli:13,polyhedr:9,polyhedra:13,popular:13,portabl:[1,9,13],pose:9,possibl:[1,2,3,13],power:[2,13],ppopp:13,practic:[1,2,3,9],pragma:9,pre:[1,9,14],preced:14,predic:2,predict:[2,13],prefer:[1,2],premis:9,present:14,preserv:13,pressur:3,pretti:[1,3,14],prevent:13,previou:2,primer:13,primit:[1,9,14],principl:[13,14],print:[1,2,3],probabl:[3,13],problem:1,problemat:13,procedur:13,process:[1,3,9,13],processor:[1,9,14],product:[3,6,13],program:[1,2,3,9],program_id:3,program_id_m:3,program_id_n:3,programm:[9,13,14],project:[9,14],prologu:3,promot:[3,13],properli:2,properti:13,propos:9,proprietari:3,provid:[0,1,2,3,6,13,14],pseudo:[3,14],ptillet:[0,3],ptr:[1,14],publicli:3,purpos:[3,9,13,14],push:13,put:3,putat:13,puzzl:14,pytest:0,python:[1,2,3,4],pytorch:[1,2],qquad:13,queri:14,quit:[2,14],ragan:9,rand:[1,3],randn:[2,3],rang:[1,2,3,9,13,14],rapidli:[9,13],rate:3,rather:[3,9],raw:1,rdom:13,read:[2,3,4],reader:13,readi:3,real:9,reason:[3,13,14],recent:[3,9],recommend:4,recomput:9,rectifi:9,redmon:9,reduct:[2,3,14],refer:1,regist:3,regrett:9,regular:13,rel:[1,13],relat:6,releas:[3,9],reli:13,remain:9,remateri:3,reorder:13,replic:[3,14],repres:13,represent:0,requir:[2,13,14],research:[9,13],reshap:14,resolut:13,resourc:9,resp:13,respect:13,restrict:13,result:[0,1,2,9,13],ret:2,retriev:[1,13,14],reus:3,revisit:9,right:[13,14],rise:13,roughli:3,round:2,row:[2,3,14],rtol:3,rule:14,run:[0,1,2,3,6,13],runtim:[0,13],rvar:13,said:13,same:[9,14],satisfi:14,sato:13,save:[1,2,3],save_path:1,scalabl:13,scalar:[9,14],scan:13,schedul:9,scienc:13,scop:13,scope:13,script:[1,2,3],second:[1,2,3,13,14],section:[3,13],see:[1,2,3,13],seem:[1,13],select:9,semant:13,semi:13,sens:[1,9,13,14],separ:13,sequenc:9,set:[1,3,13],sever:[9,13,14],shall:13,shape:[1,2,3,13,14],share:[2,9,14],shortcut:1,should:[1,3,9,13],show:3,show_plot:[1,2,3],shown:[1,13,14],side:13,sight:13,signal:9,significantli:2,sigplan:13,simd:9,similar:[1,2,14],simpl:[1,2],simpler:14,simplest:4,simpli:[3,13],simplifi:14,sinc:3,singl:[1,9,14],size:[1,3,13],slice:14,slower:[9,13],slowest:13,smaller:2,smallest:2,snemi3d:9,softmax:[4,5],solid:13,solut:3,solv:13,some:[3,14],sometim:13,sourc:[1,2,3,4,13],space:[9,13],spars:[9,13],spatial:13,speak:3,spec:2,special:[9,14],specif:[3,9],specifi:[3,13],speed:2,sphinx:[1,2,3,4],split:13,spmd:[1,9,13,14],src:3,stabil:2,stack:14,standard:[1,13],start:4,started_tutori:5,state:[9,13],statement:13,staticmethod:[1,2,3],step:13,still:[13,14],store:[1,2,14],straightforward:3,strategi:13,strength:9,stress:14,strict:14,stride:[2,3],stride_a_0:3,stride_b_0:3,stride_c_0:3,stride_xm:2,stride_ym:2,string:[1,3],strongli:14,struct:14,structur:[9,13,14],sub:14,subdirectori:3,subscript:13,subsect:14,substanti:9,successfulli:13,sudo:0,suffer:13,suit:9,sum:[1,2,14],superhuman:9,support:[0,13,14],sure:[1,2],surprisingli:9,surround:13,sutskev:9,swizzl:9,synchron:[9,14],syntax:[1,2,14],system:[3,9,13],taco:13,take:[1,3,6],taken:13,tar:3,target:9,techniqu:[9,13],tempor:13,temporari:2,tend:13,tension:9,tensor:[1,3,9,13,14],tensorrt:9,ternari:[1,14],test:0,text:13,tflop:3,than:[2,3,9,13],thei:[1,3,9,13,14],them:1,themselv:3,theoret:2,therebi:13,therefor:3,theta:13,theta_:13,thi:[0,1,2,3,9,13,14],thing:1,think:2,those:[0,1,14],though:[2,9,13,14],thread:[9,14],three:1,through:[3,4,13],throughout:13,throughput:6,tile:[2,3,13],time:[1,2,3,9,13,14],tiramisu:[9,13],tmp:[0,3],tog:13,togeth:3,too:2,tool:3,topic:13,total:[1,2,3,5],tradit:[9,13],transcompil:14,transfer:2,transform:[1,13],travers:13,trend:9,trick:2,trigger:3,triton:[0,1,2,3,4,9,13],trivial:9,tune:[2,13],tuner:3,tupl:1,tutori:[0,1,2,3,6,14],tutorials_jupyt:4,tutorials_python:4,tvm:[9,13],twice:2,two:[1,2,3,13,14],type:[1,3,14],typic:[3,13],uint16:14,uint32:14,uint64:14,uint8:14,unclear:14,uncommon:13,underneath:13,understand:2,unfortun:[3,13],unifi:9,uninstal:3,union:14,unit:[0,9],univers:13,unless:14,unlik:3,unrol:13,until:14,updat:[3,13],usag:[0,2],use:[0,1,2,3,9,13,14],used:[1,3,14],useful:14,uses:3,using:[1,3,9,13,14],usual:14,util:1,v100:[3,13],val_fals:1,val_tru:1,valid:1,valu:[1,2,3],valuabl:2,vari:14,variabl:[3,14],variant:9,variou:4,vasilach:[9,13],vast:13,vec:13,vector:[2,4,5,9,13],vendor:3,veri:[2,13],verif:13,verifi:[2,13],version:3,via:13,visibl:13,vision:9,wai:[2,3],want:[0,1,2,3],warp:[2,14],wast:2,well:[9,13],were:14,wget:3,what:1,when:[1,2,3,9,13],where:[2,3,13,14],whether:[1,9],which:[1,2,3,9,13,14],whose:[1,2,3,13],why:14,wide:13,width:3,wise:[1,2,14],wish:[3,13],without:[1,13],wolf:13,won:2,word:[1,13],work:[2,6,9],workload:3,wors:[9,13],would:[2,14],wouldn:13,wrapper:3,write:[1,2,3,4,6,13],written:3,wrote:2,x86_64:3,x_1:14,x_2:14,x_log:1,x_max:2,x_name:[1,2,3],x_val:[1,2,3],xii:13,xzvf:3,y_line:[1,2,3],y_name:[1,2,3],y_ref:2,y_tri:2,y_val:[1,2,3],year:13,yet:[9,13,14],yii:13,ylabel:[1,2,3],you:[0,1,2,3,4,9],your:[0,6],yourself:[2,3],zero:3,zip:4},titles:["Installation","Vector Addition","Fused Softmax","Matrix Multiplication","Tutorials","Computation times","Welcome to Triton\u2019s documentation!","<no title>","<no title>","Introduction","<no title>","<no title>","<no title>","Related Work","The Triton-C Language"],titleterms:{"final":3,"function":3,The:[3,14],addit:1,advantag:13,arithmet:3,auto:3,autograd:3,benchmark:[1,2,3],bind:[1,2,3],broadcast:14,cach:3,challeng:9,compil:13,comput:[1,2,3,5],cutlass:3,differ:14,document:6,extens:14,from:0,fuse:2,get:6,guid:6,instal:[0,3],introduct:9,kernel:[1,2,3],languag:[13,14],limit:13,matrix:3,model:14,motiv:[2,3,9],multipl:3,optim:3,packag:0,perform:3,pointer:3,polyhedr:13,program:[6,13,14],python:0,refer:[9,13],relat:13,represent:13,restrict:14,result:3,schedul:13,semant:14,softmax:2,sourc:0,squar:3,start:6,test:[1,2,3],time:5,torch:[1,2,3],triton:[6,14],tune:3,tutori:4,unit:[1,2,3],vector:1,welcom:6,work:13}})