Files
triton/searchindex.js
2021-03-15 13:58:20 -04:00

1 line
9.8 KiB
JavaScript

Search.setIndex({docnames:["getting-started/installation","getting-started/tutorials/01-vector-add","getting-started/tutorials/02-fused-softmax","getting-started/tutorials/03-matrix-multiplication","getting-started/tutorials/index","getting-started/tutorials/sg_execution_times","index"],envversion:{"sphinx.domains.c":2,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":3,"sphinx.domains.index":1,"sphinx.domains.javascript":2,"sphinx.domains.math":2,"sphinx.domains.python":2,"sphinx.domains.rst":2,"sphinx.domains.std":2,sphinx:56},filenames:["getting-started/installation.rst","getting-started/tutorials/01-vector-add.rst","getting-started/tutorials/02-fused-softmax.rst","getting-started/tutorials/03-matrix-multiplication.rst","getting-started/tutorials/index.rst","getting-started/tutorials/sg_execution_times.rst","index.rst"],objects:{},objnames:{},objtypes:{},terms:{"000":5,"0000":3,"1024":1,"10mn":2,"1250":3,"128":3,"157":[],"173":3,"176":3,"179":[],"181":[3,5],"182":3,"1823":2,"183":3,"184":3,"185":3,"186":3,"187":3,"188":[],"189":[],"190":3,"191":3,"192":3,"193":3,"194":3,"195":3,"196":3,"197":3,"198":[],"199":3,"200":3,"201":[],"2019":1,"202":[],"203":[],"2048":2,"205":[],"207":[],"2141":1,"220":3,"245":3,"2500":3,"256":[2,3],"270":[],"2mn":2,"3076":1,"320":[],"3713":1,"3750":3,"3mn":2,"4096":2,"420":[],"438":[],"4940":1,"5000":3,"502":[],"512":3,"515":[],"521":1,"571":[],"6250":3,"6724":1,"684":[],"716":[],"739":[],"7500":3,"768":3,"781":2,"7mn":2,"816":[],"8192":[],"8750":3,"896":[2,3],"916":[],"9733":1,"98432":1,"990":[],"byte":2,"case":[2,3],"class":[1,2,3],"export":3,"float":[1,2,3],"function":[1,2],"import":[1,2,3],"int":[1,2,3],"return":[1,2,3],"short":3,"super":3,"switch":3,"true":[1,2,3],"typeof":3,"void":[1,2,3],"while":0,For:[1,3],One:3,The:[1,2],These:3,Used:[1,2,3],__expf:2,__global__:[1,2,3],_add:1,_align8:3,_dot:3,_softmax:2,_src:[1,2],a100:3,abl:3,about:[1,2,3],abov:[1,2,3],abs:1,absolut:3,acc:3,access:1,accomod:3,account:3,achiev:1,activ:3,actual:3,add:[1,5],addit:[2,3,4,5],address:2,advantag:1,affect:3,after:3,against:[1,2,3],aggress:2,aim:6,algorithm:3,align8:3,all:[2,3,4],allclos:[2,3],alloc:1,allow:[1,2],almost:2,along:1,also:[0,1,2,3],amount:2,ani:[2,3],anoth:2,api:[0,1],appli:[1,2,3],approxim:2,apt:0,architectur:3,arg:[1,2,3],argument:[1,2,3],around:2,arrai:1,arrang:3,ask:2,assembl:3,assert:[1,2,3],assum:2,atol:3,attribut:1,auto:2,autograd:[1,2],automat:[2,3],autotun:3,autotune_config:3,autotune_kei:3,autotune_v:3,avail:3,axi:[1,2,3],back:[1,2,3],backward:1,bandwidth:2,basic:[1,3,4],becaus:2,befor:3,below:[1,4],bench:0,benchmark:0,benefit:2,best:[1,3],better:3,between:1,bin:3,binari:0,bind:[],bit:2,block:[1,2,3],blockidx:1,bool:[1,2],bound:[1,2],build:[0,3],built:[1,3],c_0:3,c_1:3,c_2:3,cach:[1,2],call:3,callabl:1,can:[0,1,2,3],cannot:3,capabl:[2,6],cast:3,cdiv:[1,3],ceil:1,challeng:[],chang:3,check:[1,2],chip:2,choic:6,chunk:1,citizen:1,click:[1,2,3],clone:[0,3],cmake:[0,3],code:[1,2,3,4],codegen:0,column:[2,3],com:[0,3],come:[1,2,3],command:[0,3],comment:1,compar:[2,3],compil:[1,2,3,6],comput:6,concis:1,cond:1,condit:1,config:[0,3],consid:2,consist:2,constraint:[1,2],construct:3,constructor:3,context:1,contigu:[1,3],contrari:1,control:1,coordin:1,copi:1,core:3,correct:1,correspond:[1,2,3],could:[2,3],cours:1,creat:[1,3],csv:1,ctx:[1,2,3],cubla:3,cuda:[1,2,3],current:3,custom:[0,1,2,3,6],cutlass:[],cutlass_include_dir:3,cutlass_library_dir:3,cutlass_library_kernel:3,cutlass_matmul:3,cutlass_tensorop_f16_s16816gemm_:3,cutlass_tensorop_f16_s884gemm_f16_:3,data:[1,2,3],data_ptr:[1,2,3],dblock:1,dcutlass_library_kernel:3,dcutlass_nvcc_archs_en:3,declar:1,decor:1,decreas:3,deep:3,def:[1,2,3],defin:[1,2,3],denom:2,denomin:2,dereferenc:1,desir:3,detail:1,dev:0,devic:[1,2,3],dict:[1,2,3],differ:[1,2,3],difficulti:3,dimens:3,dimension:3,dir:0,direct:0,directli:0,disk:1,divis:1,dllvm_config:0,dnn:6,do_bench:[1,2,3],doe:[1,2],don:2,done:[0,3],dot:3,doubli:3,download:[1,2,3,4],dram:2,driver:0,dtype:[1,2,3],dure:3,each:[1,2,3],easi:[1,3],easier:[1,2],education:2,effici:3,egg:3,element:[1,2],element_s:2,elementwis:2,empti:3,empty_lik:[1,2],entir:2,environ:[3,6],etc:3,exampl:[1,2,3,4],execut:[1,5],exist:1,exp:2,expect:[1,2],expert:3,exponenti:2,expos:3,extend:3,extens:1,f32_infin:2,fact:3,fairli:3,fall:3,far:2,fast:2,faster:[2,3],feel:3,few:1,file:[1,2,3,5],find:3,first:[1,6],fit:2,float16:3,float32:[1,2,3],flow:1,focu:3,follow:[0,2,3,6],forc:3,forward:[1,2,3],found:0,fp16:3,free:3,from:[1,2,3],full:[1,2,3],fuse:[3,4,5],fusion:2,futur:3,galleri:[1,2,3,4],gbp:[1,2],gener:[1,2,3,4],get:[0,1,2,3,5],get_program_id:[1,2,3],git:[0,3],github:[0,3],given:[2,3],good:[1,3],gpu:[2,6],greater:2,grid:[1,2,3],grid_m:3,grid_n:3,group:3,group_id:3,group_siz:3,guard:2,guess:2,half:3,handl:[2,3],handwritten:3,happen:3,hard:3,hardwar:[3,6],has:[1,2],have:2,helper:2,henc:3,here:[0,1,2,3],heurist:2,high:3,highli:3,highlight:1,hit:3,how:[1,2,3,6],howev:3,http:[0,3],ident:2,imper:6,implement:[1,2,3],implicitli:3,improv:3,includ:3,incompat:3,increas:1,index:2,indic:2,initi:3,inner:3,input:[1,2,3],instal:6,instanc:[2,3],instead:[1,2,3],instruct:6,int32:3,int64:3,integr:0,interest:[0,3],intermedi:0,intern:[0,2],introduc:3,ipynb:[1,2,3],irregular:2,is_contigu:3,iter:3,its:2,jupyt:[1,2,3,4],just:[1,2,3],kei:[1,2,3],kernel:6,kick:3,kitwar:3,label:[1,2,3],lambda:[1,2,3],languag:[1,6],larg:2,larger:2,launch:[1,2,3],lda:3,ldb:3,ldc:3,learn:[1,2,3,6],less:2,let:2,level:3,lib:3,librari:3,like:1,limit:2,line:[1,2,3],linux:3,list:3,live:3,llvm:0,load:[1,2],locat:3,logarithm:1,longer:2,look:[2,6],loop:3,macro:1,mai:[0,1],main:3,maintain:[1,2],major:3,make:[0,1,2,3],make_add_kernel:1,make_kernel:[2,3],mani:1,manual:2,manual_se:[1,2],mapl:1,match:3,matmul:3,matric:[2,3],matrix:[2,4,5],matter:[1,3],max:[1,2],max_group_s:3,max_m:[1,2,3],maxim:6,maximum:1,mean:[2,3],mechan:2,memori:[1,2,3],mention:[2,3],method:1,min:[2,3],min_m:[1,2,3],minut:[1,2,3],mixtur:3,mkdir:[0,3],model:1,modern:[3,6],modifi:[1,3],more:[1,2],most:3,move:2,multi:3,multipl:[1,4,5],must:[2,3],naiv:[2,3],naive_softmax:2,name:[1,2,3],nativ:1,natur:2,necessari:2,need:[1,2,3],nelement:2,nest:3,newaxi:3,next:[2,3],next_power_of_2:2,none:2,normal:2,note:[0,1,2,3],notebook:[1,2,3,4],notori:3,now:[1,2,3],num:2,num_warp:[2,3],number:[1,2],numer:2,nvidia:3,object:[1,3],obvious:2,offici:0,offset:[1,3],often:3,onc:2,one:[2,4],onli:[1,2,3],oper:[1,2,3,4],operand:3,ops:1,opt:[1,2,3],optim:[],order:[3,4],other:[1,3],our:[1,2,3],out:[1,2,3],outperform:[2,3],output:[1,2,3],overach:1,pad:2,paper:1,parallel:[1,2,3,6],paramet:[1,3],parenthes:3,parser:0,part:3,partial:2,pass:[1,3],past:1,path:[0,1,3],per:2,perf:3,perf_report:[1,2,3],perform:[1,2],pid:[1,3],pid_m:3,pid_n:3,piec:3,pip:[0,3],platform:6,plot:[0,1,2,3],plot_nam:[1,2,3],pointer:1,portabl:1,possibl:[1,2,3],power:2,practic:[1,2,3],pragma:[],pre:1,predic:2,predict:2,prefer:[1,2],pressur:3,pretti:[1,3],previou:2,primit:1,print:[1,2,3],probabl:3,problem:1,process:[1,3],processor:1,product:[3,6],program:[1,2,3,6],program_id:3,program_id_m:3,program_id_n:3,prologu:3,promot:3,properli:2,proprietari:3,provid:[0,1,2,3,6],pseudo:3,ptillet:[0,3],ptr:1,publicli:3,purpos:3,put:3,pytest:0,python:[1,2,3,4],pytorch:[1,2],quit:2,rand:[1,3],randn:[2,3],rang:[1,2,3],rate:3,rather:3,raw:1,read:[2,3,4],readi:3,reason:3,recent:3,recommend:4,reduct:[2,3],refer:1,regist:3,rel:1,releas:3,remateri:3,replic:3,represent:0,requir:2,result:[0,1,2],ret:2,retriev:1,reus:3,roughli:3,round:2,row:[2,3],rtol:3,run:[0,1,2,3,6],runtim:0,save:[1,2,3],save_path:1,script:[1,2,3],second:[1,2,3],section:3,see:[1,2,3],seem:1,sens:1,set:[1,3],shape:[1,2,3],share:2,shortcut:1,should:[1,3],show:3,show_plot:[1,2,3],shown:1,significantli:2,similar:[1,2],simpl:[1,2],simplest:4,simpli:3,sinc:3,singl:1,size:[1,3],smaller:2,smallest:2,softmax:[4,5],solut:3,some:3,sourc:[1,2,3,4],speak:3,spec:2,specif:3,specifi:3,speed:2,sphinx:[1,2,3,4],spmd:1,squar:[],src:3,stabil:2,standard:1,start:4,started_tutori:5,staticmethod:[1,2,3],store:[1,2],straightforward:3,stride:[2,3],stride_a_0:3,stride_b_0:3,stride_c_0:3,stride_xm:2,stride_ym:2,string:[1,3],subdirectori:3,sudo:0,sum:[1,2],support:0,sure:[1,2],syntax:[1,2],system:3,take:[1,3,6],tar:3,temporari:2,tensor:[1,3],ternari:1,test:0,tflop:3,than:[2,3],thei:[1,3],them:1,themselv:3,theoret:2,therefor:3,thi:[0,1,2,3],thing:1,think:2,those:[0,1],though:2,three:1,through:[3,4],throughput:6,tile:[2,3],time:[1,2,3],tmp:[0,3],togeth:3,too:2,tool:3,torch:[],total:[1,2,3,5],transfer:2,transform:1,trick:2,trigger:3,triton:[0,1,2,3,4],tune:2,tuner:3,tupl:1,tutori:[0,1,2,3,6],tutorials_jupyt:4,tutorials_python:4,twice:2,two:[1,2,3],type:[1,3],typic:3,understand:2,unfortun:3,uninstal:3,unit:0,unlik:3,updat:3,usag:[0,2],use:[0,1,2,3],used:[1,3],uses:3,using:[1,3],util:1,v100:3,val_fals:1,val_tru:1,valid:1,valu:[1,2,3],valuabl:2,variabl:3,variou:4,vector:[2,4,5],vendor:3,veri:2,verifi:2,version:3,wai:[2,3],want:[0,1,2,3],warp:2,wast:2,wget:3,what:1,when:[1,2,3],where:[2,3],whether:1,which:[1,2,3],whose:[1,2,3],width:3,wise:[1,2],wish:3,without:1,won:2,word:1,work:2,workload:3,would:2,wrapper:3,write:[1,2,3,4,6],written:3,wrote:2,x86_64:3,x_log:1,x_max:2,x_name:[1,2,3],x_val:[1,2,3],xzvf:3,y_line:[1,2,3],y_name:[1,2,3],y_ref:2,y_tri:2,y_val:[1,2,3],ylabel:[1,2,3],you:[0,1,2,3,4],your:[0,6],yourself:[2,3],zero:3,zip:4},titles:["Installation","Vector Addition","Fused Softmax","Matrix Multiplication","Tutorials","Computation times","Welcome to Triton\u2019s documentation!"],titleterms:{"final":3,"function":3,The:3,addit:1,all:[],arithmet:3,auto:3,autograd:3,automat:[],benchmark:[1,2,3],bind:[1,2,3],cach:3,comput:[1,2,3,5],cutlass:3,document:6,from:0,fuse:2,get:6,hit:[],improv:[],instal:[0,3],kernel:[1,2,3],matrix:3,motiv:[2,3],multipl:3,optim:3,packag:0,perform:3,pointer:3,put:[],python:0,rate:[],result:3,right:[],softmax:2,sourc:0,squar:3,start:6,test:[1,2,3],time:5,togeth:[],torch:[1,2,3],triton:6,tune:3,tutori:4,unit:[1,2,3],vector:1,welcom:6}})