Files
triton/searchindex.js
2021-09-06 00:14:06 +00:00

1 line
30 KiB
JavaScript

Search.setIndex({docnames:["getting-started/installation","getting-started/tutorials/01-vector-add","getting-started/tutorials/02-fused-softmax","getting-started/tutorials/03-matrix-multiplication","getting-started/tutorials/04-low-memory-dropout","getting-started/tutorials/index","getting-started/tutorials/sg_execution_times","index","programming-guide/chapter-1/introduction","programming-guide/chapter-2/related-work","python-api/generated/triton.Config","python-api/generated/triton.autotune","python-api/generated/triton.heuristics","python-api/generated/triton.jit","python-api/generated/triton.language.arange","python-api/generated/triton.language.atomic_add","python-api/generated/triton.language.atomic_cas","python-api/generated/triton.language.atomic_max","python-api/generated/triton.language.atomic_min","python-api/generated/triton.language.atomic_xchg","python-api/generated/triton.language.broadcast_to","python-api/generated/triton.language.cos","python-api/generated/triton.language.dot","python-api/generated/triton.language.exp","python-api/generated/triton.language.load","python-api/generated/triton.language.log","python-api/generated/triton.language.max","python-api/generated/triton.language.maximum","python-api/generated/triton.language.min","python-api/generated/triton.language.minimum","python-api/generated/triton.language.multiple_of","python-api/generated/triton.language.num_programs","python-api/generated/triton.language.program_id","python-api/generated/triton.language.rand","python-api/generated/triton.language.randint","python-api/generated/triton.language.randint4x","python-api/generated/triton.language.randn","python-api/generated/triton.language.ravel","python-api/generated/triton.language.reshape","python-api/generated/triton.language.sigmoid","python-api/generated/triton.language.sin","python-api/generated/triton.language.softmax","python-api/generated/triton.language.sqrt","python-api/generated/triton.language.store","python-api/generated/triton.language.sum","python-api/generated/triton.language.where","python-api/generated/triton.language.zeros","python-api/generated/triton.testing.Benchmark","python-api/generated/triton.testing.do_bench","python-api/generated/triton.testing.perf_report","python-api/triton","python-api/triton.language","python-api/triton.testing"],envversion:{"sphinx.domains.c":2,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":4,"sphinx.domains.index":1,"sphinx.domains.javascript":2,"sphinx.domains.math":2,"sphinx.domains.python":3,"sphinx.domains.rst":2,"sphinx.domains.std":2,"sphinx.ext.intersphinx":1,sphinx:56},filenames:["getting-started/installation.rst","getting-started/tutorials/01-vector-add.rst","getting-started/tutorials/02-fused-softmax.rst","getting-started/tutorials/03-matrix-multiplication.rst","getting-started/tutorials/04-low-memory-dropout.rst","getting-started/tutorials/index.rst","getting-started/tutorials/sg_execution_times.rst","index.rst","programming-guide/chapter-1/introduction.rst","programming-guide/chapter-2/related-work.rst","python-api/generated/triton.Config.rst","python-api/generated/triton.autotune.rst","python-api/generated/triton.heuristics.rst","python-api/generated/triton.jit.rst","python-api/generated/triton.language.arange.rst","python-api/generated/triton.language.atomic_add.rst","python-api/generated/triton.language.atomic_cas.rst","python-api/generated/triton.language.atomic_max.rst","python-api/generated/triton.language.atomic_min.rst","python-api/generated/triton.language.atomic_xchg.rst","python-api/generated/triton.language.broadcast_to.rst","python-api/generated/triton.language.cos.rst","python-api/generated/triton.language.dot.rst","python-api/generated/triton.language.exp.rst","python-api/generated/triton.language.load.rst","python-api/generated/triton.language.log.rst","python-api/generated/triton.language.max.rst","python-api/generated/triton.language.maximum.rst","python-api/generated/triton.language.min.rst","python-api/generated/triton.language.minimum.rst","python-api/generated/triton.language.multiple_of.rst","python-api/generated/triton.language.num_programs.rst","python-api/generated/triton.language.program_id.rst","python-api/generated/triton.language.rand.rst","python-api/generated/triton.language.randint.rst","python-api/generated/triton.language.randint4x.rst","python-api/generated/triton.language.randn.rst","python-api/generated/triton.language.ravel.rst","python-api/generated/triton.language.reshape.rst","python-api/generated/triton.language.sigmoid.rst","python-api/generated/triton.language.sin.rst","python-api/generated/triton.language.softmax.rst","python-api/generated/triton.language.sqrt.rst","python-api/generated/triton.language.store.rst","python-api/generated/triton.language.sum.rst","python-api/generated/triton.language.where.rst","python-api/generated/triton.language.zeros.rst","python-api/generated/triton.testing.Benchmark.rst","python-api/generated/triton.testing.do_bench.rst","python-api/generated/triton.testing.perf_report.rst","python-api/triton.rst","python-api/triton.language.rst","python-api/triton.testing.rst"],objects:{"triton.Config":{__init__:[10,1,1,""]},"triton.language":{arange:[14,2,1,""],atomic_add:[15,2,1,""],atomic_cas:[16,2,1,""],atomic_max:[17,2,1,""],atomic_min:[18,2,1,""],atomic_xchg:[19,2,1,""],broadcast_to:[20,2,1,""],cos:[21,2,1,""],dot:[22,2,1,""],exp:[23,2,1,""],load:[24,2,1,""],log:[25,2,1,""],max:[26,2,1,""],maximum:[27,2,1,""],min:[28,2,1,""],minimum:[29,2,1,""],multiple_of:[30,2,1,""],num_programs:[31,2,1,""],program_id:[32,2,1,""],rand:[33,2,1,""],randint4x:[35,2,1,""],randint:[34,2,1,""],randn:[36,2,1,""],ravel:[37,2,1,""],reshape:[38,2,1,""],sigmoid:[39,2,1,""],sin:[40,2,1,""],softmax:[41,2,1,""],sqrt:[42,2,1,""],store:[43,2,1,""],sum:[44,2,1,""],where:[45,2,1,""],zeros:[46,2,1,""]},"triton.testing":{Benchmark:[47,0,1,""],do_bench:[48,2,1,""],perf_report:[49,2,1,""]},"triton.testing.Benchmark":{__init__:[47,1,1,""]},triton:{Config:[10,0,1,""],autotune:[11,2,1,""],heuristics:[12,2,1,""],jit:[13,2,1,""]}},objnames:{"0":["py","class","Python class"],"1":["py","method","Python method"],"2":["py","function","Python function"]},objtypes:{"0":"py:class","1":"py:method","2":"py:function"},terms:{"0":[1,2,3,4,6,8,9,31,32,33,36,46,48],"00":6,"0000":3,"000000":[1,2],"000001":2,"000002":2,"004273":1,"007961":2,"01":[1,3,6],"017526":3,"02":[2,6],"025776":3,"028308":3,"03":[3,6],"037087":3,"038365":2,"04":[4,6],"061463":3,"0625":3,"08199":4,"08452":4,"084721":1,"0938":3,"097543":2,"098521":3,"0f":9,"0s":4,"1":[1,2,3,4,7,9,12,31,32,33,36],"10":[1,3,4,6],"100":[2,48],"1024":[1,3,4,11],"1045":3,"1048576":1,"106321":3,"106434":4,"109587":3,"11":[0,1,3],"111113":2,"1152":3,"12":[1,2,3,6],"12160":2,"12288":2,"123":4,"12416":2,"12544":2,"12672":2,"127":1,"128":[1,2,3,11],"1280":3,"13":[1,3,6],"131072":1,"1328":3,"133347":2,"134217728":1,"13686":4,"138541":3,"14":[1,3],"140799":3,"1408":3,"142849":2,"142862":2,"149397":4,"15":[1,3],"153":2,"1536":3,"153853":2,"154":2,"159957":3,"16":[2,3,9,46],"160":2,"162":2,"16384":1,"1664":3,"16777216":1,"17":3,"171410":[1,2],"172588":3,"178":[3,6],"17879":4,"1792":3,"18":3,"181817":2,"1823":2,"186":2,"189":[4,6],"19":[1,3],"190482":1,"192":1,"1920":3,"198":2,"1982":9,"1983":8,"1984":9,"1989":9,"199":2,"1991":[8,9],"1999":9,"1d":[1,2,3],"1e":[1,2,3],"1s":4,"2":[1,2,3,4,7,9,10,12,31,32,48],"20":[3,48],"200000":1,"200001":3,"2004":9,"2006":9,"2011":4,"2012":9,"2013":8,"2014":[4,8],"2016":[8,9],"2017":8,"2018":[8,9],"2019":9,"2021":[8,9],"2048":[2,3],"2097152":1,"21":3,"211821":3,"212868":4,"2141":1,"214186":4,"216187":2,"2176":3,"219":1,"22":3,"220":3,"222812":2,"23":3,"2304":3,"235468":3,"24":3,"2432":3,"244062":3,"245":3,"25":[3,48],"256":[1,2,3,10],"2560":3,"26":3,"260869":3,"262144":1,"264875":2,"265163":3,"2656":3,"267139":3,"2688":3,"27":3,"275679":3,"275764":3,"28":[1,3],"2812":3,"2816":3,"284643":3,"284657":3,"2891":3,"29":3,"293429":4,"2944":3,"296679":3,"298541":2,"298560":3,"298794":4,"2d":[3,22],"2m":2,"2mn":2,"3":[0,1,2,3,4,9],"30":3,"305746":3,"3072":3,"3076":1,"31":3,"3125":3,"32":[3,10],"3200":3,"321474":3,"32768":1,"3281":3,"33":3,"3328":3,"333321":1,"33554432":1,"34":3,"341":1,"34172":4,"3438":3,"3456":3,"3477":3,"3516":3,"355034":3,"3555":3,"3584":3,"359066":2,"36":[3,6],"362445":1,"37":3,"3712":3,"3713":1,"371721":4,"372618":3,"372800":3,"38":1,"380953":3,"384":[2,3],"3840":3,"384000":3,"39":3,"3906":3,"392744":3,"3968":3,"3984":3,"3986":4,"3d":[31,32],"3mn":2,"4":[1,2,3,9,10,11,34],"40":3,"400001":1,"400016":1,"4023":3,"403344":4,"403347":4,"405":2,"4062":3,"408716":4,"4096":[1,2,3],"411":2,"412":2,"415":2,"4194304":1,"420235":3,"42142":4,"428568":1,"428801":3,"429770":[1,2],"430545":3,"431969":4,"44":3,"441243":3,"442822":3,"4492":3,"4531":3,"46":3,"4609":3,"4688":3,"472":1,"485870":3,"49":3,"4940":1,"4m":2,"4x":2,"5":[1,3,4,9],"5000":3,"501144":3,"505492":3,"51":3,"512":[2,3,4],"52":3,"524288":1,"5312":3,"531519":3,"54":3,"540109":3,"541":4,"546":2,"552988":3,"56":3,"560748":3,"563555":3,"564701":3,"566038":2,"566925":3,"568431":4,"577704":1,"584279":3,"585":2,"5859":3,"586":[2,6],"586858":4,"5898":3,"5mn":2,"6":[0,1,3],"600000":1,"600004":2,"602979":3,"6094":3,"614":1,"615390":1,"62":3,"63":3,"630":2,"64":[1,3],"640":[2,3],"643310":3,"645170":2,"65536":1,"656000":3,"656574":1,"660":2,"664":2,"664092":3,"67":3,"67086":4,"67108864":1,"671229":3,"6724":1,"675250":3,"68":3,"683457":3,"69":3,"6953":3,"7":[0,1,3,9],"70":3,"702":[1,2],"7031":3,"7070":3,"707878":4,"71":3,"719258":4,"72":3,"722274":2,"722333":3,"73":3,"730667":3,"738820":3,"74":3,"743443":4,"748617":3,"75":3,"7500":3,"752274":3,"754967":2,"755985":2,"76":[1,3],"768":[2,3],"768000":3,"77":3,"78":3,"780":1,"781":2,"782312":3,"79":3,"79719":4,"8":[1,2,3,9,10,11,46,48],"80":[3,48],"800002":1,"806694":4,"81":3,"810":2,"811":2,"811163":1,"812":[1,2],"813365":3,"818184":3,"8192":1,"82":3,"827088":3,"83":3,"833":1,"833728":3,"838026":4,"8388608":1,"84":3,"840807":2,"842":1,"84284":4,"843":1,"848":1,"85":3,"850":1,"851":1,"855747":3,"86":3,"863938":4,"864488":3,"87":3,"88":3,"880718":3,"8828":3,"885914":3,"8867":3,"888887":3,"8906":3,"8945":3,"896":3,"899428":3,"8mn":2,"9":[0,1,2,3,4],"90":3,"903517":3,"90567":4,"906037":3,"908060":3,"91":3,"92":3,"9219":3,"925276":2,"93":[2,3],"932191":3,"932484":3,"932543":3,"9375":3,"94":2,"949":6,"9492":3,"95":2,"952835":4,"9531":3,"954424":2,"96":2,"9688":3,"97":2,"971025":3,"971190":2,"9733":1,"978909":3,"98":2,"9805":3,"981598":3,"983276":3,"98432":1,"9844":3,"994643":3,"996":[1,6],"999995":1,"abstract":[8,9],"break":9,"byte":2,"case":[1,2,8,9,12,15,16,17,18,19],"class":[2,8,9,10,47],"default":48,"do":[2,3,8,9,24,43],"float":[2,8,9,48],"function":[1,2,3,4,9,11,12,13,47,48,49],"import":[1,2,3,4,8,9],"int":[1,8,9,12,14,20,31,32,38,46,48],"new":[20,38,46],"return":[1,2,3,4,14,15,16,17,18,19,22,24,26,28,31,32,33,34,35,36,37,44,45,46,48,49],"static":[0,8,9],"super":3,"switch":3,"true":[1,2,3,45],"try":[3,10],"var":9,"voil\u00e0":4,"while":[3,8],A:[3,4,8,9],And:[0,3],As:[2,3,4,8,9],At:[4,9],But:4,By:48,For:[3,8,9,10],If:[4,9,34,43,45,47],In:[1,2,3,4,9],It:[1,3,4,5,7,9,13],Of:8,On:9,One:3,The:[1,2,3,4,8,9,15,16,17,18,19,20,22,31,32,33,34,35,36,38,43,45,49],There:1,These:9,To:[1,4,8,9,11],__expf:2,__init__:[10,47],_dropout:4,_matmul:3,_seeded_dropout:4,a100:[3,9],a_ptr:3,ab:1,abl:9,about:[1,2,3,4,7],abov:[1,2,3,4,9,11],academ:8,acc:[3,8,9],acceler:8,access:[1,3,8,9,13],accomod:3,accordingli:9,account:9,accumul:[3,9],accuraci:[3,8],achiev:[3,8,9],across:[2,4,8,9],activ:3,actual:[3,8,9],add:[1,4,6,15],add_kernel:1,addit:[2,5,6,8,48],addition:9,address:[8,24],adopt:9,advanc:[2,3,8],advoc:9,affect:3,affin:9,after:3,against:[0,1,2,3,7],aggress:[8,9],agnost:[8,9],ahead:9,aim:[2,7],al:[8,9],alex:4,algebra:9,algorithm:[3,4,8,9],alia:9,all:[2,3,4,5,8,9,11,26,28,30,44,47],allclos:[2,3],allen1984:9,allen:9,alloc:[1,2,3,8],allow:[1,2,8,9],along:[1,3,26,28,31,32,44,48],also:[1,2,3,4,8,9],altern:4,alwai:[9,45],amd:8,amen:9,amount:8,ampl:9,an:[1,2,3,4,8,9,10,15,16,17,18,19,33,34,35,36],analog:1,analysi:[8,9],analyz:9,ancourt1991:9,ancourt:9,ani:[1,2,3,9,11,12,47],anoth:[2,9],anytim:11,apart:9,api:47,appear:47,appli:[3,4,8,9],applic:[4,9,12],approach:[8,9],appropri:1,approxim:2,ar:[0,1,2,3,4,8,9,11,13,24,30,43,45,47],arang:[1,2,3,4],arbitrari:3,architectur:[3,8],area:9,arg:[1,2,3,12,47],argument:[1,2,3,10,11,12,13,45,47],arrai:[9,46],arrang:3,art:[8,9],artifici:4,arxiv:[8,9],ask:2,aspect:9,asplo:8,assert:[1,3,4],assum:[2,47],asynchron:[1,8],atom:[15,16,17,18,19],auguin1983:8,auguin:8,auto:[2,3,9,10,11,12],autom:8,automat:[2,3,8,9,10],autotun:[3,9],avail:[0,4,8,9],avoid:[2,11,45],awar:8,awkward:4,axi:[1,2,3,4,26,28,31,32,44,47],b:[3,8,9],b_ptr:3,back:[1,2,3,4],backpropag:4,bad:4,baghdadi2021:[8,9],baghdadi:[8,9],balanc:9,bandwidth:2,base:[4,7,8,9],basic:[1,5,9],becom:8,been:[1,8,9],befor:[3,11,15,16,17,18,19],begin:9,behavior:[9,11],being:[2,4],believ:9,below:[4,5,9],bench:0,benchmark:[0,48,49],benefit:[2,8,9],best:[1,8],between:[1,8],bit:4,block:[1,2,3,4,8,9,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,33,34,35,36,37,38,39,40,41,42,43,44,45,46],block_siz:[1,2,4,9,11,12],block_size_k:3,block_size_m:3,block_size_n:3,block_start:[1,4],blue:[1,2,3],boil:9,bool:[45,47],both:[9,45],bound:[1,2,3,9],branch:9,broad:8,broadcast:[20,24,43,45],build:[0,3],built:[1,9],c:[3,8,9],c_mask:3,c_ptr:3,cach:[8,9],call:[1,3,9,13,34],callabl:[1,12,13,48],can:[0,1,2,3,4,8,9,11,49],cannot:[3,8,9],capabl:[7,8],cd:0,cdiv:[1,3,4],ceil:12,certain:12,cgo:[8,9],challeng:4,chang:[3,4,11],chapter:7,characterist:9,cheap:8,check:[3,7],checkpoint:4,chen2018:8,chen:8,chip:2,choic:7,click:[1,2,3,4],clone:0,close:9,cmake:0,cmp:[15,16,17,18,19],coalesc:8,code:[1,2,3,4,5,8,9],col:[3,9],col_offset:2,color:47,column:[2,3],com:0,combin:8,come:[2,3,9],command:0,common:9,commonli:9,compar:[2,3,4,7,9,15,16,17,18,19],compat:22,compil:[2,3,7,8,10,13,30],complet:9,complex:9,compos:[4,8],composit:9,comprehens:[8,9],comput:[4,7,8,9,12,21,23,25,27,29,39,40,41,42],computation:[8,9],concern:9,concis:[1,47],condit:[9,45],config:[3,11],configur:[3,10,11,49],confirm:2,connectom:8,consecut:9,consequ:8,consid:2,consist:4,constraint:[3,9],construct:8,constructor:47,consum:3,contain:[9,15,16,17,18,19,47],contextu:9,contigu:[3,14,37],control:[8,9],conveni:3,convert:[1,3,13],convolut:8,cooper:10,copi:[4,8,15,16,17,18,19],core:[8,9],correct:1,correspond:[1,2,3,47],cosin:21,cost:9,could:[2,9],cours:8,cpython:0,creat:[1,2,3,8],crucial:4,csv:1,cubla:[3,8],cuda:[1,2,3,4,8],cudnn:8,current:32,custom:[1,2,3,7],cut:3,cvpr:8,d:[2,4,11,13],dart:9,darte1999:9,data:[1,3,4,8,9,15,16,17,18,19,24,45,46],data_ptr:13,dataflow:9,david:4,deal:4,decad:8,declar:1,decompos:9,decor:[1,3,11,12,13],decreas:4,dedic:3,deep:[3,4,8,9],def:[1,2,3,4,11,12],defin:[1,2,3,9,24],definit:9,denomin:2,denot:1,dens:9,depend:[0,9,45],deploi:8,describ:[4,9],design:9,desir:[20,38],detail:[3,9],detect:8,develop:[8,9],devic:[1,2,3],dialect:9,dict:12,dictionari:[10,12],diesel:9,differ:[1,2,3,4,8,9,47],difficult:9,difficulti:[3,8],dijkstra82:9,dijkstra:9,dim:[2,9],dimens:[3,22,26,28,44],dimension:[3,9,22],dir:0,direct:3,disjoint:9,disk:1,dissert:9,distribut:[2,4,9],divis:3,dnn:[7,8,9],do_bench:[1,2,3],doc:4,doe:[1,2,3,9],doesn:9,domain:[8,9],don:[1,2,3],done:[3,8,26,28,44],dot:3,doubli:3,doubt:9,down:[3,9],download:[0,1,2,3,4,5],dram:[1,2],dropout:[5,6],dror:4,dsl:[7,8,9],dtype:[1,2,3,15,16,17,18,19,24,43,46],e:[0,2,3,4,8,9,46],each:[1,2,3,4,8,9,10,12],eas:9,easi:[3,4],easier:[1,2,8],easili:3,ed:[1,3],education:2,effect:9,effici:[3,4,8,35],effort:9,either:[1,31,32,45],elango2018:9,elango:9,element:[1,2,3,4,21,23,25,26,27,28,29,39,40,41,42,43,44,45,47],element_s:2,element_ti:[15,16,17,18,19,24,43],elementwis:[2,24],els:3,emerg:8,empti:3,empty_lik:[1,2,4],enabl:9,encod:9,encourag:4,end:[8,9,14],enforc:9,engin:9,enqueu:[1,2],ensur:9,entir:9,entri:35,environ:7,equal:[2,9],error:3,especi:8,et:[4,8,9],euromicro:8,evalu:[3,4,11,45],even:[4,9],evidenc:8,evolv:8,exampl:[1,2,3,4,5,8,9,10],exchang:19,execut:[6,8,9,10,49],exist:[8,9],exp:2,expect:[2,15,16,17,18,19],expens:[8,9,12],explor:[4,8],exponenti:[2,23],express:[8,9],extar:1,extend:[3,4],extract:3,extrem:9,f:[1,2,3,9],facilit:[8,9],fact:9,fairli:3,fals:[24,43,45,47],far:2,fast:[2,8,9],faster:[2,34],fastest:9,feel:3,fetch:8,few:9,field:8,figur:9,file:[1,2,3,6],fill:46,fine:4,first:[1,3,4,7,9,22,27,29],first_pid_m:3,firstli:4,fit:2,fix:47,flag:2,flatten:37,flexibl:8,float16:[3,22,46],float32:[1,2,3,4,22,33,36],flow:[8,9],fly:4,fn:[13,48],focu:[3,9],folder:4,follow:[0,2,3,7,8,9],footprint:4,forc:4,forget:1,formal:9,format:9,found:[15,16,17,18,19],foundat:9,four:35,fp16:3,fp32:3,frac:4,framework:[8,9],free:3,from:[1,2,3,4,8,9,24,45],full:[1,2,3,4],fulli:9,func:9,fundament:9,further:[4,9],fuse:[3,5,6],fusion:[2,9],g:[3,4,8,9,46],galleri:[1,2,3,4,5],gb:[1,2],gbp:[1,2],gener:[1,2,3,4,5,8,9,33,34,35,36,47],geoffrei:4,geq:9,get:[1,2,3,4,6],girbal2006:9,girbal:9,git:0,github:0,give:8,given:[2,3,4,20,31,32,33,34,35,36,38,46],global:9,go:[1,3,9],good:[1,9],gpgpu:8,gpu:[1,2,4,7,8,9,10,13],grad_to_non:48,gradient:48,grammat:9,graphic:8,greater:2,green:[1,2,3],grid:[1,2,3,4,31,32],grid_m:3,grid_n:3,grosser2012:9,grosser:9,group:3,group_id:3,group_m:3,group_size_m:3,grow:9,guard:[1,2],guid:8,ha:[1,3,4,8,9,31,32],had:1,halid:[8,9],hand:9,handl:[1,2,4,9],handwritten:8,hard:3,harder:9,hardwar:[3,7,9],hasn:1,have:[2,4,8,9,13,22,45,47],heavi:8,helper:[1,2],henc:3,here:[1,2,3,4],heurist:2,hierarch:8,hierarchi:9,high:[3,8,9],higher:3,highli:8,highlight:9,hint:9,hinton:4,hit:3,how:[1,2,3,7,8,12],howev:[2,9],html:4,http:[0,4],i:[1,2,3,4,8,9],id:[3,32],idea:8,ideal:2,ident:2,identifi:1,idx:[24,43],ilya:4,imag:[8,9],implement:[1,2,3,4,8,9],implicitli:[1,13,24,43],importantli:9,impos:9,improv:[3,4],incompat:[3,9],incorrect:3,increas:[1,2,3,4],incred:8,increment:9,inde:9,independ:[2,9],index:1,indic:[9,45],induc:9,industri:8,inequ:9,inf:2,inform:9,infrastructur:9,initi:[1,3],inner:[3,22],inplac:3,input:[1,2,3,4,9,12,20,21,22,23,25,26,27,28,29,30,37,38,39,40,41,42,44],input_ptr:2,input_row_strid:2,instal:7,instanc:[1,2,3,4,8,10,31,32],instanti:4,instead:[2,45],instruct:[7,8],int1:[24,43],int32:[4,34,35],integ:9,interchang:9,interest:[8,9],intermedi:9,intern:[2,9],interv:14,intrins:9,introduc:4,introduct:7,invari:[2,9],invoc:4,ipynb:[1,2,3,4],ir:9,irregular:[2,9],is_contigu:[3,4],is_cuda:1,isn:3,issu:[8,9],iter:[3,8,9],its:[1,2,3,9],j:[3,8,9],jit:[1,2,3,4,11,12],jmlr:4,john:4,johnson:4,journal:9,jrk2013:8,jupyt:[1,2,3,4,5],just:[3,9,12],k:[3,4,8,9],kb:8,keep:4,kei:[3,8,11],kellei:8,kernel:[4,7,8,10,11,12],keyword:[1,10],ki:9,kind:2,know:30,known:9,krizhevski:4,label:[1,2,3,47],lam1991:8,lam:8,lambda:[1,2,3,4,12],languag:[1,2,3,4,7,8,13],larg:[8,9],last:3,later:[2,9],latest:0,lattner2004:9,lattner2019:9,lattner:9,launch:[1,2,3,31,32],law:9,layer:[8,9],lead:[4,8,9],leaky_relu:3,leakyrelu:3,learn:[1,2,3,4,7,8,9],least:9,lee2017:8,lee:8,left:9,legal:9,length:1,less:[4,8,9],let:[1,2,4,30],letter:9,level:[3,8,9],li:8,librari:[0,3,8,9],lifelong:9,like:[1,4,8,9,34],limit:[2,4],lindenstrauss:4,line:[1,2,3,4,9,47],line_arg:[1,2,3,47],line_nam:[1,2,3,47],line_v:[1,2,3,47],linear:[8,9],link:0,list:[1,3,11,12,47,48,49],litteratur:9,ll:4,llvm11:0,llvm:[0,9],load:[1,2,3,4,9,45],local:[8,9],locat:[3,15,16,17,18,19,24,43],log2:12,log:47,logarithm:[1,25],look:[4,7,8],loop:[3,9,10],low:[5,6,9],m:[0,2,3,8],machin:[8,9],machineri:[8,9],made:8,mai:[2,9,12],main:[3,8,9],maintain:[2,9],major:[3,9],make:[1,2,8,9],manag:[4,8],mani:[1,8,9],manual:[2,9],manual_se:[1,2,3],map:3,mapl:9,mark:[4,49],markedli:8,mask:[1,2,3,4,15,17,18,19,24,43,45],match:[3,15,16,17,18,19],math:12,mathbb:9,mathbf:9,mathcal:[9,36],mathemat:9,matmul:[3,9],matmul_kernel:3,matric:[2,3],matrix:[2,4,5,6,8,9,10,22],matrix_s:9,matter:[3,8,9],max:[1,2,17],max_m:[1,2,3],maxim:[7,9,35],maximum:[1,2,26],mb:[6,8],mean:[3,9,11],mechan:[2,9],median:48,memori:[1,2,3,5,6,8,9,15,16,17,18,19,24,43,45],mention:3,meta:[1,2,3,4,10,11,12],metaparamet:1,method:[9,10,13,47,49],methodolog:9,micro:8,min:[3,18],min_m:[1,2,3],minimum:28,minut:[1,2,3,4],miss:9,mitig:9,ml:8,mlir:9,mn:2,model:[1,8,9],modern:[3,7,8,9],modular:9,moor:9,mora:4,more:[2,3,4,7,8,9,47],most:[3,9],mostli:10,move:3,movement:4,ms:[1,2,3,48],much:[2,3],mullapudi2016:9,mullapudi:9,multi:[3,8,9],multipl:[1,4,5,6,8,9,10,11,30,34],multipli:[3,4,9,22],must:[2,3,14,22,45],n:[2,3,8,36],n_col:2,n_element:[1,4],n_row:2,naiv:[2,4],naive_softmax:2,name:[1,2,3,11,12,47],nativ:[1,2,3],natur:[2,8,25],nb:8,necessari:2,need:[1,2,3,4,34],nelement:2,nest:[3,9],net:9,network:[4,8,9],neural:[4,8,9],neurosci:8,never:4,next:[2,3],next_power_of_2:2,nightli:0,nip:8,nitish:4,nn:3,non:8,none:[2,3,11,15,17,18,19,24,43,47,48],nonzero:45,norm:4,normal:[2,3],note:[0,1,2,3,4,9,11,13,45],notebook:[1,2,3,4,5],notic:[2,9],notori:[3,8],novel:8,now:[1,3],num_pid_in_group:3,num_pid_m:3,num_pid_n:3,num_stag:[3,10],num_warp:[2,3,10,11],number:[1,2,3,4,9,10,31,33,34,35,36],numel:[1,4],numer:[2,8],nvidia:8,o:[2,4],object:[1,3,8,10,11,13,15,16,17,18,19],obtain:1,obvious:2,occur:9,offer:8,offici:0,offs_am:3,offs_bn:3,offs_cm:3,offs_cn:3,offs_k:3,offset:[1,4,33,34,35,36],often:3,omega:9,onc:[2,8,9],one:[2,3,4,5,8,9,47],onli:[2,3,4,8,9,13],op:[1,2],open:14,openai:0,opencl:8,oper:[1,2,3,4,5,8,15,16,17,18,19,45],opportun:8,opsila:8,optim:[8,9],option:[1,3,24,43,47,48],order:[2,3,5,9],org:4,origin:9,osdi:8,other:[2,3,4,7,9,13,22,24,27,29],otherwis:[4,45],our:[1,2,3,8],out:[1,2,3,4,7,9],outlin:9,output2:4,output3:4,output:[1,2,3,4],output_ptr:[1,2,4],output_row_start_ptr:2,output_row_strid:2,output_torch:1,output_triton:1,over:[2,4,8,9],overfit:4,overflow:2,own:3,p:[4,9],pa:3,packag:13,pact:9,pad:2,par:3,paradigm:[8,9],paragraph:4,parallel:[1,2,3,4,7,8,9,10],paralleliz:8,param:12,paramet:[1,3,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49],parametr:8,part:[3,4,9],particular:[2,3],particularli:[8,9],partit:8,pass:[1,9,10],past:[8,9],path:1,pattern:8,pb:3,peak:9,per:[2,4],percentil:48,perf:3,perf_report:[1,2,3,47],perform:[1,2,4,8,9,15,16,17,18,19,48],persist:4,person:9,perspect:9,phase:9,philosophi:9,philox:[4,35],pid:[1,3,4],pid_m:3,pid_n:3,pip:0,pipelin:[8,9,10],platform:[7,9],pldi:8,plot:[0,1,2,3,47],plot_nam:[1,2,3,47],pmatrix:9,point:[1,9,35],pointer:[1,2,4,13,15,16,17,18,19,24,43],pointerdtyp:[15,16,17,18,19,24,43],polli:9,polyhedr:8,polyhedra:9,popular:9,portabl:[8,9],pose:8,posit:12,possibl:[1,2,3,9,10],power:[2,4,9,12,14],ppopp:9,practic:[1,2,3,8],pragma:8,pre:[0,8],prealloc:1,predict:9,prefer:2,premis:8,present:[0,3],preserv:9,preserve_rng_st:4,prevent:[4,9],primer:9,primit:[8,13],principl:9,print:[1,2,3,4],print_data:[1,2,3],prng:4,probabl:[4,9],problem:1,problemat:9,procedur:9,process:[1,8,9],processor:8,produc:[3,4],product:[7,9,22],program:[1,2,3,4,7,8,31,32],program_id:[1,2,3,4],programm:[8,9],prohibitev:12,project:[4,8],promot:[3,9],properli:2,properti:9,propos:8,proprietari:3,provid:[1,2,3,4,7,9,11,26,28,44,48],prune:4,pseudo:[3,4,35],pseudorandom:4,ptr:3,purpos:[8,9],push:9,put:4,py:[0,1,2,3,4,6],pypi:0,pytest:0,python:[1,2,3,4,5,13],pytorch:[1,2,4],qquad:9,r:2,ragan:8,rand:[1,4],randint4x:34,randn:[2,3,4],random:[4,33,34,35,36],randomli:4,rang:[1,2,3,8,9],rapidli:[8,9],rate:3,rather:8,raw:1,rdom:9,re:[1,3],read:[2,3,5],reader:9,real:8,reason:9,recent:8,recommend:5,recomput:[4,8],rectifi:8,redmon2016:8,redmon:8,reduct:[2,26,28,44],refer:1,regardless:[4,45],regim:4,regrett:8,regular:[4,9],rel:[1,9],relat:7,releas:[0,8],reli:9,relu:3,remain:[8,47],rememb:3,reorder:9,rep:48,repetit:48,repres:[2,3,9,10],requir:[2,4,9],research:[8,9],reset:[11,48],reset_to_zero:11,resolut:9,resourc:8,resp:9,respect:9,restrict:9,result:[0,1,2,8,9],ret:2,retriev:9,reus:3,revisit:8,right:9,rise:9,role:9,ron:4,root:42,roughli:3,row:[2,3,4],row_idx:2,row_minus_max:2,row_start_ptr:2,run:[0,1,2,3,4,7,9,11,13,49],runtim:[9,48],ruslan:4,rvar:9,s:[1,2,4,9,35],said:9,salakhutdinov:4,salmon2011:4,salmon:4,same:[4,8,47],sato2019:9,sato:9,save:[1,2,3],save_path:1,sc:9,scalabl:9,scalar:[4,8,22,33,34,35,36,46],scale:47,scan:9,schedul:8,scienc:9,scientif:9,scop:9,scope:9,script:[0,1,2,3,4],second:[1,2,3,4,9,22,27,29],secondli:4,section:[3,9],see:[1,2,3,4,9],seed:[33,34,35,36],seeded_dropout:4,seem:[1,9],select:[8,9,45],self:[10,47],semant:9,semi:9,sens:[1,8,9],separ:9,sequenc:8,set:[1,4,9],setup:0,sever:[8,9],shall:9,shape:[2,3,4,9,20,24,38,43,45,46],share:8,shaw:4,shift:2,should:[1,3,8,9,10,26,28,44,47],show_plot:[1,2,3],shown:9,side:9,sight:9,signal:8,significantli:2,sigplan:9,simd:8,simpl:[1,2,3,4],simplest:5,simpli:9,simplic:3,simplifi:4,sinc:[1,2,3],sine:40,singl:[2,4,8,34],size:[1,2,4,9],slower:[8,9],slowest:9,sm80:10,sm:9,smaller:[3,4],smallest:[2,12],snemi3d:8,so:[1,2,3,4,9],softmax:[4,5,6],softmax_kernel:2,softmax_output:2,softwar:10,solid:9,solut:3,solv:9,some:3,sometim:9,sourc:[1,2,3,4,5,9],space:[8,9],spars:[4,8,9],spatial:9,speak:3,special:8,specif:[3,8],specifi:[9,12,15,16,17,18,19,43],speed:2,sphinx:[1,2,3,4,5],split:9,spmd:[1,8,9],squar:42,sram:[2,3],srivastava2014:4,srivastava:4,stabil:2,stabl:0,stage:10,standard:9,start:[5,14],started_tutori:6,state:[4,8,9],statement:9,step:9,still:[1,2,3,9],stop:14,store:[1,2,3,4,15,16,17,18,19,45],str:[11,12,47],straightforward:3,strategi:[4,9],stream:34,strength:8,stride:[2,3,4],stride_ak:3,stride_am:3,stride_bk:3,stride_bn:3,stride_cm:3,stride_cn:3,stride_xi:3,stride_xj:3,structur:[8,9],style:[1,2,3,47],subscript:9,substanti:8,substract:2,subtract:2,successfulli:9,suffer:9,suit:8,sum:[1,2],superhuman:8,support:[4,9],sure:2,surprisingli:8,surround:9,suspicion:2,sutskev:[4,8],sutskever2014:8,swap:[15,16,17,18,19],swizzl:8,synchron:[1,8],system:[0,3,8,9],t:[1,2,3,9],t_:9,tabul:4,taco:9,take:[3,4,7,12],taken:9,target:8,techniqu:[3,8,9],temperatur:4,tempor:9,tend:9,tension:8,tensor:[1,2,3,4,8,9,11,13,48],tensorrt:8,test:[0,1,7],text:9,tflop:3,th:48,than:[2,3,8,9,34,47],thei:[3,8,9],them:1,themselv:3,theoret:2,therebi:9,therefor:3,theta:9,theta_:9,thi:[1,2,3,4,8,9,11,12,13,35,47],thing:[1,4],think:2,those:2,though:[8,9],thought:9,thread:[2,8,10],through:[5,9],throughout:[9,47],throughput:7,tile:9,time:[0,1,2,3,4,8,9,11,34,48],tiramisu:[8,9],tl:[1,2,3,4],tmp:0,tog:9,togeth:4,tolist:4,topic:9,torch:[1,2,3,4,13,48],torch_output:3,torch_relu:3,total:[1,2,3,4,6],tradit:[4,8,9],transform:[4,9],travers:9,trend:8,tri:[20,38],trick:2,tricki:4,trigger:[3,11],triton:[0,1,2,3,4,5,8,9],triton_output:3,trivial:8,tune:[2,3,9,11,12],tuner:10,tupl:[1,20,38,46],tutori:[1,2,3,4,7],tutorials_jupyt:5,tutorials_python:5,tvm:[8,9],two:[1,2,3,9,11,12,14,22],type:[12,22,45,46],typecast:[24,43],typic:9,u:[0,33],un:9,uncommon:9,underneath:9,understand:2,undesir:11,unfortun:[3,9],unifi:8,uniformli:4,unint:45,unit:[0,8],univers:9,unrol:9,up:2,updat:[3,9,11],us:[1,2,3,4,8,9,10,11,12,13,34,45,47,49],util:1,v100:9,val:[15,16,17,18,19],valid:1,valu:[1,2,3,4,11,12,14,15,16,17,18,19,21,23,24,25,26,28,30,39,40,41,42,43,44,45,46,47,49],valuabl:2,variabl:[3,10],variant:8,variou:5,vasilach:[8,9],vasilache2018:[8,9],vast:9,vec:9,vector:[4,5,6,8,9],vendor:3,veri:[2,4,9],verif:9,verifi:[2,9],via:9,view:37,visibl:9,vision:8,vs:0,w:9,wa:4,wai:[2,3,4],want:[2,4,45],warmup:48,warp:[2,10],wast:2,we:[1,2,3,4,8,9],well:[4,8,9],whatev:11,wheel:0,when:[2,3,4,8,9,10,11,13,45],where:[1,3,4,9,12,43],whether:[8,47],which:[1,2,3,4,8,9,11,26,28,44,47],whose:[1,2,3,4,9,11,24],wide:9,wise:[1,2,21,23,25,27,29,39,40,41,42,43],wish:[3,9],within:[3,13,14],without:9,wolf:9,wolfe1989:9,won:2,word:9,work:[2,4,7,8],workload:[3,10],wors:[3,8,9],would:[1,2,4],wouldn:9,wrapper:3,write:[1,2,3,4,5,7,9],wrote:2,x:[1,2,3,4,9,21,23,25,27,29,37,39,40,41,42,45,47],x_keep:4,x_keep_ptr:4,x_log:[1,47],x_max:2,x_name:[1,2,3,47],x_ptr:[1,4,11,12],x_size:[11,12],x_val:[1,2,3,47],xi:9,xii:9,xlabel:47,xo:9,y:[1,2,3,9,27,29,45,47],y_log:47,y_name:[1,2],y_ptr:1,y_torch:2,y_triton:2,year:9,yet:[8,9],yi:9,yield:45,yii:9,ylabel:[1,2,3,47],yo:9,you:[0,1,2,3,4,5,8,11,34,45],your:[0,1,7],yourself:[2,3],z:[1,2,9],zero:[3,4,11],zip:5},titles:["Installation","Vector Addition","Fused Softmax","Matrix Multiplication","Low-Memory Dropout","Tutorials","Computation times","Welcome to Triton\u2019s documentation!","Introduction","Related Work","triton.Config","triton.autotune","triton.heuristics","triton.jit","triton.language.arange","triton.language.atomic_add","triton.language.atomic_cas","triton.language.atomic_max","triton.language.atomic_min","triton.language.atomic_xchg","triton.language.broadcast_to","triton.language.cos","triton.language.dot","triton.language.exp","triton.language.load","triton.language.log","triton.language.max","triton.language.maximum","triton.language.min","triton.language.minimum","triton.language.multiple_of","triton.language.num_programs","triton.language.program_id","triton.language.rand","triton.language.randint","triton.language.randint4x","triton.language.randn","triton.language.ravel","triton.language.reshape","triton.language.sigmoid","triton.language.sin","triton.language.softmax","triton.language.sqrt","triton.language.store","triton.language.sum","triton.language.where","triton.language.zeros","triton.testing.Benchmark","triton.testing.do_bench","triton.testing.perf_report","triton","triton.language","triton.testing"],titleterms:{"final":3,addit:1,advantag:9,algebra:51,api:7,arang:14,arithmet:3,atom:51,atomic_add:15,atomic_ca:16,atomic_max:17,atomic_min:18,atomic_xchg:19,autotun:11,baselin:4,benchmark:[1,2,3,47],binari:0,broadcast_to:20,cach:3,challeng:8,co:21,comparison:51,compil:[9,51],comput:[1,2,3,6],config:10,creation:51,distribut:0,do_bench:48,document:7,dot:22,dropout:4,exercis:4,exp:23,from:0,further:7,fuse:2,gener:51,get:7,go:7,heurist:12,hint:51,index:51,instal:0,introduct:8,jit:13,kernel:[1,2,3],l2:3,languag:[9,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,51],limit:9,linear:51,load:24,log:25,low:4,manipul:51,math:51,matrix:3,max:26,maximum:27,memori:[4,51],min:28,minimum:29,model:51,motiv:[2,3,8],multipl:3,multiple_of:30,num_program:31,number:51,op:51,optim:3,packag:0,perf_report:49,perform:3,pointer:3,polyhedr:9,program:[9,51],program_id:32,python:[0,7],rand:33,randint4x:35,randint:34,randn:36,random:51,ravel:37,reduct:51,refer:[4,8,9],relat:9,represent:9,reshap:38,result:3,s:7,schedul:9,seed:4,shape:51,sigmoid:39,sin:40,softmax:[2,41],sourc:0,sqrt:42,squar:3,start:7,store:43,sum:44,test:[2,3,47,48,49,52],time:6,triton:[7,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52],tutori:5,unit:[2,3],vector:1,welcom:7,where:45,work:9,zero:46}})