Files
triton/master/searchindex.js
2022-06-27 00:48:22 +00:00

1 line
35 KiB
JavaScript

Search.setIndex({docnames:["getting-started/installation","getting-started/tutorials/01-vector-add","getting-started/tutorials/02-fused-softmax","getting-started/tutorials/03-matrix-multiplication","getting-started/tutorials/04-low-memory-dropout","getting-started/tutorials/05-layer-norm","getting-started/tutorials/index","getting-started/tutorials/sg_execution_times","index","programming-guide/chapter-1/introduction","programming-guide/chapter-2/related-work","python-api/generated/triton.Config","python-api/generated/triton.autotune","python-api/generated/triton.heuristics","python-api/generated/triton.jit","python-api/generated/triton.language.arange","python-api/generated/triton.language.atomic_add","python-api/generated/triton.language.atomic_and","python-api/generated/triton.language.atomic_cas","python-api/generated/triton.language.atomic_max","python-api/generated/triton.language.atomic_min","python-api/generated/triton.language.atomic_or","python-api/generated/triton.language.atomic_xchg","python-api/generated/triton.language.atomic_xor","python-api/generated/triton.language.broadcast_to","python-api/generated/triton.language.cos","python-api/generated/triton.language.dot","python-api/generated/triton.language.exp","python-api/generated/triton.language.load","python-api/generated/triton.language.log","python-api/generated/triton.language.max","python-api/generated/triton.language.maximum","python-api/generated/triton.language.min","python-api/generated/triton.language.minimum","python-api/generated/triton.language.multiple_of","python-api/generated/triton.language.num_programs","python-api/generated/triton.language.program_id","python-api/generated/triton.language.rand","python-api/generated/triton.language.randint","python-api/generated/triton.language.randint4x","python-api/generated/triton.language.randn","python-api/generated/triton.language.ravel","python-api/generated/triton.language.reshape","python-api/generated/triton.language.sigmoid","python-api/generated/triton.language.sin","python-api/generated/triton.language.softmax","python-api/generated/triton.language.sqrt","python-api/generated/triton.language.store","python-api/generated/triton.language.sum","python-api/generated/triton.language.where","python-api/generated/triton.language.zeros","python-api/generated/triton.testing.Benchmark","python-api/generated/triton.testing.do_bench","python-api/generated/triton.testing.perf_report","python-api/triton","python-api/triton.language","python-api/triton.testing"],envversion:{"sphinx.domains.c":2,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":4,"sphinx.domains.index":1,"sphinx.domains.javascript":2,"sphinx.domains.math":2,"sphinx.domains.python":3,"sphinx.domains.rst":2,"sphinx.domains.std":2,"sphinx.ext.intersphinx":1,sphinx:56},filenames:["getting-started/installation.rst","getting-started/tutorials/01-vector-add.rst","getting-started/tutorials/02-fused-softmax.rst","getting-started/tutorials/03-matrix-multiplication.rst","getting-started/tutorials/04-low-memory-dropout.rst","getting-started/tutorials/05-layer-norm.rst","getting-started/tutorials/index.rst","getting-started/tutorials/sg_execution_times.rst","index.rst","programming-guide/chapter-1/introduction.rst","programming-guide/chapter-2/related-work.rst","python-api/generated/triton.Config.rst","python-api/generated/triton.autotune.rst","python-api/generated/triton.heuristics.rst","python-api/generated/triton.jit.rst","python-api/generated/triton.language.arange.rst","python-api/generated/triton.language.atomic_add.rst","python-api/generated/triton.language.atomic_and.rst","python-api/generated/triton.language.atomic_cas.rst","python-api/generated/triton.language.atomic_max.rst","python-api/generated/triton.language.atomic_min.rst","python-api/generated/triton.language.atomic_or.rst","python-api/generated/triton.language.atomic_xchg.rst","python-api/generated/triton.language.atomic_xor.rst","python-api/generated/triton.language.broadcast_to.rst","python-api/generated/triton.language.cos.rst","python-api/generated/triton.language.dot.rst","python-api/generated/triton.language.exp.rst","python-api/generated/triton.language.load.rst","python-api/generated/triton.language.log.rst","python-api/generated/triton.language.max.rst","python-api/generated/triton.language.maximum.rst","python-api/generated/triton.language.min.rst","python-api/generated/triton.language.minimum.rst","python-api/generated/triton.language.multiple_of.rst","python-api/generated/triton.language.num_programs.rst","python-api/generated/triton.language.program_id.rst","python-api/generated/triton.language.rand.rst","python-api/generated/triton.language.randint.rst","python-api/generated/triton.language.randint4x.rst","python-api/generated/triton.language.randn.rst","python-api/generated/triton.language.ravel.rst","python-api/generated/triton.language.reshape.rst","python-api/generated/triton.language.sigmoid.rst","python-api/generated/triton.language.sin.rst","python-api/generated/triton.language.softmax.rst","python-api/generated/triton.language.sqrt.rst","python-api/generated/triton.language.store.rst","python-api/generated/triton.language.sum.rst","python-api/generated/triton.language.where.rst","python-api/generated/triton.language.zeros.rst","python-api/generated/triton.testing.Benchmark.rst","python-api/generated/triton.testing.do_bench.rst","python-api/generated/triton.testing.perf_report.rst","python-api/triton.rst","python-api/triton.language.rst","python-api/triton.testing.rst"],objects:{"triton.Config":{__init__:[11,1,1,""]},"triton.language":{arange:[15,2,1,""],atomic_add:[16,2,1,""],atomic_and:[17,2,1,""],atomic_cas:[18,2,1,""],atomic_max:[19,2,1,""],atomic_min:[20,2,1,""],atomic_or:[21,2,1,""],atomic_xchg:[22,2,1,""],atomic_xor:[23,2,1,""],broadcast_to:[24,2,1,""],cos:[25,2,1,""],dot:[26,2,1,""],exp:[27,2,1,""],load:[28,2,1,""],log:[29,2,1,""],max:[30,2,1,""],maximum:[31,2,1,""],min:[32,2,1,""],minimum:[33,2,1,""],multiple_of:[34,2,1,""],num_programs:[35,2,1,""],program_id:[36,2,1,""],rand:[37,2,1,""],randint4x:[39,2,1,""],randint:[38,2,1,""],randn:[40,2,1,""],ravel:[41,2,1,""],reshape:[42,2,1,""],sigmoid:[43,2,1,""],sin:[44,2,1,""],softmax:[45,2,1,""],sqrt:[46,2,1,""],store:[47,2,1,""],sum:[48,2,1,""],where:[49,2,1,""],zeros:[50,2,1,""]},"triton.testing":{Benchmark:[51,0,1,""],do_bench:[52,2,1,""],perf_report:[53,2,1,""]},"triton.testing.Benchmark":{__init__:[51,1,1,""]},triton:{Config:[11,0,1,""],autotune:[12,2,1,""],heuristics:[13,2,1,""],jit:[14,2,1,""]}},objnames:{"0":["py","class","Python class"],"1":["py","method","Python method"],"2":["py","function","Python function"]},objtypes:{"0":"py:class","1":"py:method","2":"py:function"},terms:{"0":[1,2,3,4,5,7,9,10,35,36,37,40,50,52],"00":7,"0000":3,"000000":2,"000001":[1,2],"000002":[2,5],"004273":1,"005597":5,"007961":2,"01":[1,3,7],"02":[2,7],"023256":5,"025776":3,"028308":3,"03":[3,7],"033565":3,"04":[4,7],"05":[5,7],"057651":3,"0625":3,"064941":3,"076":[2,7],"078767":5,"08199":4,"08452":4,"084721":1,"092307":5,"0938":3,"096442":3,"097543":2,"0f":10,"0s":4,"1":[1,2,3,4,5,8,10,13,35,36,37,40],"10":[1,3,4,5,7,37,38,39,40],"100":[2,52],"1024":[1,3,4,5,12],"10240":5,"1045":3,"1048576":1,"106434":4,"10752":5,"11":[0,1,3,5],"11264":5,"114273":5,"1151":5,"1152":3,"11776":5,"12":[1,3,5],"12160":2,"12288":[2,5],"123":4,"12416":2,"12544":2,"12672":2,"126988":5,"127":1,"128":[1,2,3,5,12],"1280":3,"12800":5,"13":[1,3,5],"131072":1,"1328":3,"13312":5,"133347":2,"134217728":1,"134567":5,"13686":4,"13824":5,"138541":3,"138932":3,"139506":3,"14":[1,3,5],"140799":3,"1408":3,"142849":[2,5],"142862":2,"14336":5,"147202":3,"14848":5,"149375":2,"149397":4,"15":[1,3,5],"153":2,"1536":[3,5],"15360":5,"153868":5,"154":2,"154861":3,"155572":3,"15872":5,"16":[2,3,5,7,10,50],"160":2,"161033":3,"162":2,"16384":1,"1664":3,"16777216":1,"17":[3,5],"171410":5,"173427":5,"177767":5,"17879":4,"1792":3,"179335":3,"179533":2,"18":[2,3,5,7],"180725":3,"180982":5,"1823":2,"185232":3,"185964":5,"188":2,"189190":3,"19":[1,3,5],"190482":1,"192":1,"1920":3,"192434":5,"198":2,"1982":10,"1983":9,"1984":10,"1989":10,"199":2,"1991":[9,10],"1999":10,"1d":[1,2,3],"1e":[1,2,3,5],"1s":4,"2":[1,2,3,4,5,8,10,11,13,35,36,52],"20":[3,5,52],"200000":1,"200001":3,"2004":10,"2006":10,"2011":4,"2012":10,"2013":9,"2014":[4,9],"2016":[9,10],"2017":9,"2018":[9,10],"2019":10,"2021":[9,10],"2048":[2,3,5],"206879":2,"2097152":1,"209928":2,"21":[3,5],"212868":4,"2141":1,"214186":4,"214870":5,"216187":2,"2176":3,"219":1,"22":[3,5],"220":3,"222812":2,"225175":5,"23":[3,5],"2304":3,"233074":3,"237267":5,"24":[3,5,7],"242181":3,"2432":3,"245":3,"25":[3,5,52],"251457":5,"254540":5,"256":[1,2,3,5,11],"2560":[3,5],"259080":3,"26":[3,5],"260869":3,"262144":1,"264875":2,"2656":3,"2688":3,"269882":5,"27":[3,5],"277":5,"28":[1,3,5],"2812":3,"2816":3,"2891":3,"29":[3,5],"293429":4,"293714":5,"2944":3,"297068":5,"298541":2,"299883":3,"2d":[3,5,26],"2m":2,"2mn":2,"3":[0,1,2,3,4,5,10],"30":3,"304904":5,"305746":3,"3072":[3,5],"3076":1,"31":3,"3125":3,"313806":5,"315309":5,"32":[3,5,11],"3200":3,"321838":2,"323":5,"323259":3,"32768":1,"3281":3,"33":3,"3328":3,"333321":1,"334":5,"33554432":1,"337026":5,"34":[1,3,7],"341":1,"34172":4,"3438":3,"3456":3,"3477":3,"347810":5,"35":3,"3516":3,"355034":3,"3555":3,"357796":5,"3584":[3,5],"359066":2,"36":3,"360017":2,"360920":5,"362445":1,"363633":5,"365":5,"367358":5,"367588":3,"368435":5,"369452":5,"37":3,"3712":3,"3713":1,"371721":4,"372":5,"372618":3,"372800":3,"373":5,"373605":3,"374":5,"375":5,"376":5,"377":5,"377770":5,"378":5,"379":5,"38":1,"380":5,"380953":3,"380957":5,"381":5,"382":5,"383":5,"384":[1,2,3,5],"3840":3,"387":5,"387087":5,"388":5,"389":5,"389355":5,"389441":3,"389457":5,"39":3,"3906":3,"393":5,"393507":3,"396":5,"396572":3,"3968":3,"397":5,"3984":3,"3986":4,"3d":[35,36],"3mn":2,"4":[1,2,3,5,10,11,12,38],"40":3,"400":5,"400001":1,"400016":[1,2],"402":5,"4023":3,"403344":4,"403347":4,"405":5,"406":[2,5],"4062":3,"407":5,"408":5,"408716":4,"409":5,"4096":[1,2,3,5],"410":5,"411":5,"412":2,"413":5,"414":5,"415":2,"41576":4,"4194304":1,"420235":3,"420822":3,"420828":5,"42142":4,"426":5,"428372":4,"428568":1,"428801":3,"429770":[1,2],"431969":4,"435930":3,"445676":5,"446516":3,"446623":3,"447":5,"448255":1,"4492":3,"45":3,"4531":3,"454":5,"458":5,"4608":5,"4609":3,"461":5,"464755":3,"468":5,"4688":3,"47":3,"470582":5,"471":5,"472":1,"476":[4,7],"48":3,"481028":5,"482":5,"49":3,"492372":5,"492442":3,"494":5,"4940":1,"494120":3,"498981":2,"4m":2,"4x":2,"5":[1,3,4,5,10,52],"500":5,"5000":3,"501":5,"502740":5,"505":5,"509816":5,"51":3,"511":5,"512":[2,3,4,5],"5120":5,"516586":3,"517":5,"518":5,"52":[3,7],"520":5,"523":5,"523664":3,"524288":1,"526831":3,"528664":3,"53":3,"5312":3,"536":5,"54":3,"541":4,"546":2,"548180":3,"549":5,"551037":5,"559798":5,"5632":5,"563555":3,"566":5,"566038":2,"566838":2,"568431":4,"57":3,"575753":5,"578":[3,7],"578556":5,"579673":3,"584279":3,"585":[2,5],"5859":3,"586858":4,"587162":5,"589":5,"5898":3,"59":3,"599":7,"599987":5,"5mn":2,"6":[0,1,3,5],"600000":1,"600004":2,"606":5,"6094":3,"609605":5,"614":[1,2],"6144":5,"615390":1,"62":3,"623009":5,"626943":3,"627":5,"627589":5,"63":3,"630":5,"631610":5,"634072":5,"64":[1,3],"640":[2,3],"641":[5,7],"642":5,"643199":3,"64kb":5,"65":3,"655":2,"65536":[1,5],"656574":1,"664":2,"665176":3,"6656":5,"666643":5,"666652":5,"666656":5,"669909":5,"670":5,"67086":4,"67108864":1,"6724":1,"679014":5,"68":3,"682":5,"684049":3,"690139":3,"694":5,"694907":5,"695":5,"695045":5,"6953":3,"699062":5,"7":[0,1,3,5,10],"70":3,"700":5,"702":5,"7031":3,"704":5,"706":2,"7070":3,"707878":4,"71":3,"712":5,"712490":3,"714281":5,"715711":3,"7168":5,"719258":4,"72":3,"722":[1,2],"725":5,"728":5,"73":3,"730667":3,"737433":3,"737435":1,"743443":4,"7500":3,"750943":3,"751978":5,"754967":2,"758038":3,"758863":2,"76":[1,3],"768":[2,3],"7680":5,"768000":3,"77":3,"772874":5,"773130":3,"776119":3,"78":3,"780":1,"781":2,"783251":3,"784810":5,"79":3,"79719":4,"8":[1,2,3,5,10,11,12,50,52],"80":[3,52],"800002":1,"803739":5,"806497":3,"806694":4,"81":3,"811":2,"812":[1,2],"814809":5,"817432":4,"8192":[1,5],"82":3,"822459":3,"823517":[1,2],"825748":3,"826188":5,"829":[1,7],"829164":3,"83":3,"833":1,"833728":3,"838026":4,"8388608":1,"84":3,"842":1,"84284":4,"844306":5,"846167":5,"847":1,"848":1,"849":1,"85":3,"850":1,"858629":3,"859062":5,"86":3,"87":3,"8704":5,"873439":5,"88":3,"8828":3,"885254":5,"8867":3,"887117":5,"888257":5,"89":3,"8906":3,"892307":3,"8945":3,"896":3,"899428":3,"8mn":2,"9":[0,1,2,3,4,5],"90":3,"901241":3,"908470":3,"91":3,"913500":3,"92":3,"920437":5,"9216":5,"9219":3,"93":[2,3],"932191":3,"934503":5,"936606":2,"9375":3,"94":2,"9492":3,"95":[2,3],"952835":4,"9531":3,"954424":2,"954614":3,"956960":3,"959706":3,"96":2,"967074":5,"967162":3,"9688":3,"969169":5,"97":2,"971025":3,"9728":5,"9733":1,"976995":5,"978909":3,"98":2,"9805":3,"983276":3,"98432":1,"9844":3,"999982":5,"999986":5,"999995":1,"abstract":[9,10],"break":10,"byte":2,"case":[1,2,9,10,13,18],"class":[2,5,9,10,11,51],"default":52,"do":[2,3,9,10,12,16,17,19,20,21,22,23,28,47],"float":[2,9,10,52],"function":[1,2,3,4,5,10,11,12,13,14,51,52,53],"import":[1,2,3,4,5,9,10],"int":[1,9,10,13,15,24,35,36,42,50,52],"new":[24,42,50],"return":[1,2,3,4,5,12,15,16,17,18,19,20,21,22,23,26,28,30,32,35,36,37,38,39,40,41,48,49,50,52,53],"static":[0,9,10],"super":3,"switch":3,"true":[1,2,3,5,26,49],"try":[3,5,11],"var":[5,10],"voil\u00e0":4,"while":[3,9],A:[3,4,5,9,10],And:[0,3],As:[2,3,4,9,10],At:[4,10],But:4,By:52,For:[3,9,10,11],If:[4,10,16,17,19,20,21,22,23,38,47,49,51],In:[1,2,3,4,10],It:[1,3,4,6,8,10,12,14],NOT:5,Of:9,On:10,One:3,The:[1,2,3,4,9,10,16,17,18,19,20,21,22,23,24,26,35,36,37,38,39,40,42,47,49,53],There:1,These:10,To:[1,4,6,9,10,12],_:5,__expf:2,__init__:[11,51],_a:5,_da:5,_dout:5,_dropout:4,_layer_norm_bwd_dwdb:5,_layer_norm_bwd_dx_fus:5,_layer_norm_fwd_fus:5,_matmul:3,_mean1:5,_mean2:5,_mean:5,_seeded_dropout:4,_var:5,a100:[3,10],a_arg:5,a_hat:5,a_ptr:3,ab:1,abl:10,about:[1,2,3,4,8],abov:[1,2,3,4,10,12],academ:9,acc:[3,9,10],acceler:9,access:[1,3,9,10,14],accomod:3,accordingli:10,account:10,accumul:[3,5,10],accuraci:[3,9],achiev:[3,9,10],across:[2,4,9,10],activ:3,actual:[3,9,10],ad:5,add:[1,4,5,7,16],add_kernel:1,addit:[2,6,7,9,52],addition:10,address:[9,28],adopt:10,advanc:[2,3,9],advoc:10,affect:3,affin:10,after:3,against:[0,1,2,3,8],aggress:[9,10],agnost:[9,10],ahead:10,aim:[2,8],al:[9,10],alex:4,algebra:10,algorithm:[3,4,9,10],alia:10,all:[2,3,4,6,9,10,12,30,32,34,48,51],allclos:[2,3],allen1984:10,allen:10,alloc:[1,2,3,5,9],allow:[1,2,9,10],allow_tf32:26,along:[1,3,30,32,35,36,48,52],also:[1,2,3,4,5,9,10],altern:4,alwai:[10,49],amd:9,amen:10,amount:[5,9],ampl:10,an:[1,2,3,4,9,10,11,16,17,18,19,20,21,22,23,37,38,39,40],analog:1,analysi:[9,10],analyz:10,ancourt1991:10,ancourt:10,ani:[1,2,3,10,12,13,51],anoth:[2,10],anytim:12,apart:10,apex:5,apex_layer_norm:5,api:51,appear:51,appli:[3,4,5,9,10,16,17,19,20,21,22,23],applic:[4,10,13],approach:[9,10],appropri:1,approxim:2,ar:[0,1,2,3,4,9,10,11,12,14,28,34,47,49,51],arang:[1,2,3,4,5],arbitrari:3,architectur:[3,9],area:10,arg:[1,2,3,5,11,13,14,51],argument:[1,2,3,11,12,13,14,49,51],arrai:[10,50],arrang:3,art:[9,10],artifici:4,arxiv:[9,10],ask:2,aspect:10,asplo:9,assert:[1,2,3,4,5],assert_almost_equ:5,assum:[2,51],asynchron:[1,9],atom:[16,17,18,19,20,21,22,23],auguin1983:9,auguin:9,auto:[2,3,10,11,12,13],autograd:5,autom:9,automat:[2,3,9,10,11],autotun:[3,10],avail:[0,4,9,10],avoid:[2,12,49],awar:9,awkward:4,axi:[1,2,3,4,5,30,32,35,36,48,51],b:[3,9,10],b_ptr:3,back:[1,2,3,4,5],backpropag:4,backward:5,bad:4,baghdadi2021:[9,10],baghdadi:[9,10],balanc:10,bandwidth:2,base:[4,8,9,10],basic:[1,6,10],becom:9,been:[1,9,10],befor:[3,11,12,16,17,18,19,20,21,22,23],begin:10,behavior:[10,12],being:[2,4],believ:10,below:[4,6,10],bench:[0,12],bench_layer_norm:5,benchmark:[0,5,52,53],benefit:[2,9,10],best:[1,9],between:[1,9],bfloat16:26,bia:5,bit:4,block:[1,2,3,4,9,10,16,17,18,19,20,21,22,23,24,25,26,27,28,29,31,33,37,38,39,40,41,43,44,45,46,47,49],block_siz:[1,2,4,5,10,12,13],block_size_k:3,block_size_m:[3,5],block_size_n:[3,5],block_start:[1,4],blue:[1,2,3,5],boil:10,bool:[49,51],both:[10,49],bound:[1,2,3,10],branch:10,broad:9,broadcast:[24,28,47,49],build:[0,3],built:[1,10],c:[3,9,10],c_mask:3,c_ptr:3,cach:[9,10,28],cache_modifi:28,call:[1,3,10,11,14,38],callabl:[1,13,14,52],can:[0,1,2,3,4,9,10,12,53],cannot:[3,9,10],capabl:[8,9],cd:[0,6],cdiv:[1,3,4,5],ceil:13,certain:13,cgo:[9,10],challeng:4,chang:[3,4,12,28],chapter:8,characterist:10,cheap:9,check:[3,8],checkpoint:4,chen2018:9,chen:9,chip:2,choic:8,click:[1,2,3,4,5],clone:[0,5],close:10,cmake:0,cmp:18,coalesc:9,code:[1,2,3,4,5,6,9,10],col:[3,5,10],col_offset:2,color:51,column:[2,3],com:[0,5],combin:9,come:[2,3,10],command:0,common:10,commonli:10,compar:[2,3,4,5,8,10,18],compat:26,compil:[2,3,8,9,11,14,34],complet:10,complex:10,compos:[4,9],composit:10,comprehens:[9,10],comput:[4,5,8,9,10,13,25,27,29,31,33,43,44,45,46],computation:[9,10],concern:10,concis:[1,51],condit:[10,49],config:[3,5,12],configur:[3,11,12,53],confirm:2,connectom:9,consecut:10,consequ:9,consid:2,consist:4,constexpr:[1,2,3,4,5,37,38,39,40,45],constraint:[3,10],construct:9,constructor:51,consum:3,contain:[10,18,51],contextu:10,contigu:[3,15,41],control:[9,10],conveni:3,convert:[1,3,14],convolut:9,cooper:11,copi:[4,9,18],core:[9,10,37,38,39,40,45],correct:1,correspond:[1,2,3,51],cosin:25,cost:10,could:[2,10],cours:9,cpython:0,creat:[1,2,3,5,9],crucial:4,csv:1,ctx:5,cubla:[3,9],cuda:[1,2,3,4,5,9],cudnn:9,current:36,custom:[1,2,3,8],cut:3,cvpr:9,d:[2,4,12,14],da:5,dart:10,darte1999:10,data:[1,3,4,5,9,10,16,17,18,19,20,21,22,23,28,49,50],data_ptr:14,dataflow:10,david:4,db:5,db_ref:5,db_tri:5,dbia:5,deal:4,decad:9,decim:5,declar:1,decompos:10,decor:[1,3,12,13,14],decreas:4,dedic:3,deep:[3,4,9,10],def:[1,2,3,4,5,12,13],defin:[1,2,3,10,28],definit:10,denomin:2,denot:1,dens:10,depend:[0,6,10,49],deploi:9,describ:[4,10],design:10,desir:[24,42],detail:[3,10],detect:9,develop:[9,10],devic:[1,2,3,5],dg:5,dialect:10,dict:[12,13],dictionari:[11,13],diesel:10,differ:[1,2,3,4,9,10,12,51],difficult:10,difficulti:[3,9],dijkstra82:10,dijkstra:10,dim:[2,10],dimens:[3,26,30,32,48],dimension:[3,10,26],dir:0,direct:3,disjoint:10,disk:1,dissert:10,distribut:[2,4,10],divis:3,dnn:[8,9,10],do_bench:[1,2,3,5],doc:4,doe:[1,2,3,10],doesn:10,domain:[9,10],don:[1,2,3],done:[3,9,30,32,48],dot:3,doubli:3,doubt:10,dout:5,down:[3,10],download:[0,1,2,3,4,5,6],dram:[1,2],dropout:[6,7],dror:4,dsl:[8,9,10],dtype:[1,2,3,5,16,17,18,19,20,21,22,23,28,47,50],dw:5,dw_ref:5,dw_tri:5,dweight:5,dx:5,dx_ref:5,dx_tri:5,dy:5,e:[0,2,3,4,6,9,10,50],each:[1,2,3,4,9,10,11,13],earli:12,early_config_prun:12,eas:10,easi:[3,4],easier:[1,2,9],easili:3,ed:[1,3],education:2,effect:10,effici:[3,4,9,39],effort:10,eg:12,either:[1,35,36,49],elango2018:10,elango:10,element:[1,2,3,4,5,25,27,29,30,31,32,33,43,44,45,46,47,48,49,51],element_s:[2,5],element_ti:[16,17,18,19,20,21,22,23,28,47],elementwis:[2,28],els:[3,5],emerg:9,empti:[3,5],empty_lik:[1,2,4,5],enabl:10,encod:10,encourag:4,end:[9,10,15],enforc:10,engin:10,enqueu:[1,2,5],ensur:10,entir:10,entri:39,environ:8,ep:5,equal:10,error:3,especi:9,et:[4,9,10],euromicro:9,evalu:[3,4,12,49],even:[4,10],evict_first:5,evict_last:5,eviction_polici:[5,28],evidenc:9,evolv:9,exampl:[1,2,3,4,5,6,9,10,11],except:5,exchang:22,execut:[7,9,10,11,53],exist:[9,10],exp:2,expect:[2,18],expens:[9,10,13],explor:[4,9],exponenti:[2,27],express:[9,10],extend:[3,4],extra:1,extras_requir:5,extrem:10,f:[1,2,3,10],facilit:[9,10],fact:10,fairli:3,fals:[5,16,17,19,20,21,22,23,28,45,47,49,51,52],far:2,fast:[2,9,10],faster:[2,38],fastest:10,featur:5,feel:3,fetch:9,few:10,field:[9,12],figur:10,file:[1,2,3,7],fill:50,fine:4,first:[1,3,4,8,10,26,31,33],first_pid_m:3,firstli:4,fit:2,fix:51,flag:2,flatten:41,flexibl:9,float16:[3,5,26,50],float32:[1,2,3,4,5,26,37,40],flow:[9,10],fly:4,fn:[14,52],focu:[3,10],folder:4,follow:[0,2,3,8,9,10],footprint:4,forc:4,forget:1,formal:10,format:10,forward:5,found:18,foundat:10,four:39,fp16:3,fp32:3,frac:4,framework:[9,10],free:3,from:[1,2,3,4,9,10,28,49],full:[1,2,3,4,5],fulli:10,func:10,fundament:10,further:[4,10],fuse:[3,5,6,7],fusedlayernorm:5,fusion:[2,10],g:[3,4,9,10,50],galleri:[1,2,3,4,5,6],gb:[1,2,5],gbp:[1,2,5],gener:[1,2,3,4,5,6,9,10,37,38,39,40,51],geoffrei:4,geq:10,get:[1,2,3,4,7],girbal2006:10,girbal:10,git:0,github:[0,5],give:9,given:[2,3,4,24,35,36,37,38,39,40,42,50],global:10,go:[1,3,10],good:[1,10],gpgpu:9,gpu:[1,2,4,8,9,10,11,14],grad:5,grad_scale_gain_bias_nam:5,grad_scale_nam:5,grad_to_non:[5,52],gradient:52,grammat:10,graphic:9,greater:2,green:[1,2,3,5],grid:[1,2,3,4,5,35,36],grid_m:3,grid_n:3,grosser2012:10,grosser:10,group:3,group_id:3,group_m:3,group_size_m:3,grow:10,guard:[1,2],guid:9,ha:[1,3,4,9,10,35,36],had:1,halid:[9,10],hand:10,handl:[1,2,4,10],handwritten:9,hard:3,harder:10,hardwar:[3,8,10],has_apex:5,hasattr:5,hasn:1,have:[2,4,9,10,14,26,49,51],heavi:9,helper:[1,2],henc:3,here:[1,2,3,4,5],heurist:[2,5],hierarch:9,hierarchi:10,high:[3,9,10],higher:3,highli:9,highlight:10,hint:10,hinton:4,hit:3,how:[1,2,3,8,9,13],howev:[2,10],html:4,http:[0,4,5],i:[1,2,3,4,5,9,10],id:[3,36],idea:9,ideal:2,ident:2,identifi:1,idx:[16,17,19,20,21,22,23,28,47],ieee_round:45,ilya:4,imag:[9,10],implement:[1,2,3,4,9,10],implicitli:[1,14,28,47],importantli:10,impos:10,improv:[3,4],incompat:[3,10],incorrect:3,increas:[1,2,3,4],incred:9,increment:10,inde:10,independ:[2,10],index:1,indic:[10,49],induc:10,industri:9,inequ:10,inf:2,inform:10,infrastructur:10,initi:[1,3],inner:[3,26],inplac:3,input:[1,2,3,4,5,10,12,13,24,25,26,27,29,30,31,32,33,34,41,42,43,44,45,46,48],input_ptr:2,input_row_strid:2,instal:[6,8],instanc:[1,2,3,4,9,11,35,36],instanti:4,instead:[2,49],instruct:[8,9],int1:[16,17,19,20,21,22,23,28,47],int32:[4,38,39],integ:10,interchang:10,interest:[9,10],intermedi:10,intern:[2,10],interv:15,intrins:10,introduc:4,introduct:8,invari:[2,10],invoc:4,ipynb:[1,2,3,4,5],ir:10,irregular:[2,10],is_contigu:[3,4,5],is_cuda:1,isn:3,issu:[9,10],iter:[3,9,10],its:[1,2,3,10,12],j:[3,9,10],jit:[1,2,3,4,5,12,13],jmlr:4,john:4,johnson:4,journal:10,jrk2013:9,jupyt:[1,2,3,4,5,6],just:[3,10,13],k:[3,4,9,10],kb:9,keep:4,kei:[3,9,12],kellei:9,kernel:[4,5,8,9,11,12,13],keyword:[1,11],ki:10,kind:2,know:34,known:10,krizhevski:4,kwarg:[11,14],label:[1,2,3,51],lam1991:9,lam:9,lambda:[1,2,3,4,5,13],languag:[1,2,3,4,5,8,9,14],larg:[9,10],last:3,later:[2,10],latest:0,lattner2004:10,lattner2019:10,lattner:10,launch:[1,2,3,35,36],law:10,layer:[6,7,9,10],layer_norm:5,layernorm:5,lead:[4,9,10],leaky_relu:3,leakyrelu:3,learn:[1,2,3,4,8,9,10],least:10,lee2017:9,lee:9,left:10,legal:10,length:1,less:[4,5,9,10],let:[1,2,4,34],letter:10,level:[3,9,10],li:9,librari:[0,3,9,10],lifelong:10,like:[1,4,9,10,38],limit:[2,4],lindenstrauss:4,line:[1,2,3,4,10,51],line_arg:[1,2,3,5,51],line_nam:[1,2,3,5,51],line_v:[1,2,3,5,51],linear:[9,10],link:0,list:[1,3,12,13,51,52,53],litteratur:10,ll:4,llvm11:0,llvm:[0,10],load:[1,2,3,4,5,10,49],local:[9,10],locat:[3,16,17,18,19,20,21,22,23,28,47],log2:13,log:51,logarithm:[1,29],logic:[17,21,23],look:[4,8,9],loop:[3,10,11],low:[6,7,10],m:[0,2,3,5,9],machin:[9,10],machineri:[9,10],made:9,mai:[2,10,13],main:[3,9,10],maintain:[2,10],major:[3,10],make:[1,2,9,10],manag:[4,9],mani:[9,10],manual:[2,10],manual_se:[1,2,3,5],map:3,mapl:10,mark:[4,53],markedli:9,mask:[1,2,3,4,5,16,17,19,20,21,22,23,28,47,49],match:[3,18],math:13,mathbb:10,mathbf:10,mathcal:[10,40],mathemat:10,matmul:[3,10],matmul_kernel:3,matric:[2,3],matrix:[2,4,6,7,9,10,11,26],matrix_s:10,matter:[3,9,10],max:[1,2,5,19],max_fused_s:5,max_m:[1,2,3,5],maxim:[8,10,39],maximum:[1,2,30],mb:[7,9],mean1:5,mean2:5,mean:[3,5,10,12],mechan:[2,10],median:52,memori:[1,2,3,6,7,9,10,16,17,18,19,20,21,22,23,28,47,49],mention:3,meta:[1,2,3,4,5,11,12,13],metaparamet:1,method:[10,11,14,51,53],methodolog:10,micro:9,min:[3,5,20],min_m:[1,2,3,5],minimum:32,minut:[1,2,3,4,5],miss:10,mitig:10,ml:9,mlir:10,mn:2,mode:5,model:[1,9,10,12],modern:[3,8,9,10],modular:10,modulenotfounderror:5,moor:10,mora:4,more:[2,3,4,8,9,10,51],most:[3,10],mostli:11,move:3,movement:4,ms:[1,2,3,5,52],much:[2,3],mullapudi2016:10,mullapudi:10,multi:[3,9,10],multipl:[1,4,6,7,9,10,11,12,34,38],multipli:[3,4,5,10,26],must:[2,3,15,26,49],n:[2,3,5,9,40],n_col:2,n_element:[1,4],n_round:[37,38,39,40],n_row:2,naiv:[2,4],naive_softmax:2,name:[1,2,3,12,13,51],nativ:[1,2,3],natur:[2,9,29],nb:9,necessari:2,need:[1,2,3,4,38],nelement:2,nest:[3,10],net:10,network:[4,9,10],neural:[4,9,10],neurosci:9,never:4,next:[2,3],next_power_of_2:[2,5],nightli:0,nip:9,nitish:4,nn:[3,5],non:9,none:[2,3,5,11,12,16,17,19,20,21,22,23,28,47,51,52],nonzero:49,norm:[4,5,7],normal:[2,6,7],normalized_shap:5,note:[0,1,2,3,4,10,12,14,49],notebook:[1,2,3,4,5,6],notic:[2,10],notori:[3,9],novel:9,now:[1,3],num_pid_in_group:3,num_pid_m:3,num_pid_n:3,num_stag:[3,11,12],num_warp:[2,3,5,11,12],number:[1,2,3,4,5,10,11,12,35,37,38,39,40],numcol:5,numel:[1,4,5],numer:[2,9],numrow:5,nvidia:[5,9,28],o:[2,4],object:[1,3,9,11,12,14,16,17,18,19,20,21,22,23],obtain:1,obvious:2,occur:10,off:5,offer:9,offici:0,offs_am:3,offs_bn:3,offs_cm:3,offs_cn:3,offs_k:3,offset:[1,4,37,38,39,40],often:3,omega:10,onc:[2,9,10],one:[2,3,4,6,9,10,51],onli:[2,3,4,9,10,14],op:[1,2],open:15,openai:0,opencl:9,oper:[1,2,3,4,6,9,16,17,18,19,20,21,22,23,49],opportun:9,opsila:9,optim:[9,10],option:[3,12,16,17,19,20,21,22,23,28,47,51,52],orang:5,order:[2,3,6,10],org:4,origin:10,osdi:9,other:[2,3,4,5,8,10,14,26,28,31,33],otherwis:[4,49],our:[1,2,3,9],out:[1,2,3,4,5,8,10],outlin:10,output2:4,output3:4,output:[1,2,3,4,5],output_ptr:[1,2,4],output_row_start_ptr:2,output_row_strid:2,output_torch:1,output_triton:1,over:[2,4,9,10],overfit:4,overflow:2,own:3,p:[4,10],pa:3,packag:14,pact:10,pad:2,par:3,paradigm:[9,10],paragraph:4,parallel:[1,2,3,4,5,8,9,10,11],paralleliz:9,param:13,paramet:[1,3,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53],parametr:9,part:[3,4,10],partial:5,particular:[2,3],particularli:[9,10],partit:9,pass:[1,5,10,11],past:[9,10],path:1,pattern:9,pb:3,peak:10,per:[2,4,5],percentil:52,perf:3,perf_model:12,perf_report:[1,2,3,5,51],perform:[1,2,4,9,10,12,16,17,18,19,20,21,22,23,52],persist:4,person:10,perspect:10,phase:10,philosophi:10,philox:[4,39],pid:[1,3,4,5],pid_m:3,pid_n:3,pip:[0,6],pipelin:[9,10,11],platform:[8,10],pldi:9,plot:[0,1,2,3,51],plot_nam:[1,2,3,5,51],pmatrix:10,point:[1,10,39],pointer:[1,2,4,14,16,17,18,19,20,21,22,23,28,47],pointerdtyp:[16,17,18,19,20,21,22,23,28,47],polli:10,polyhedr:9,polyhedra:10,popular:10,portabl:[9,10],pose:9,posit:[5,13],possibl:[1,2,3,10,11],power:[2,4,10,13,15],ppopp:10,practic:[1,2,3,9],pragma:9,pre:[0,9],pre_hook:11,prealloc:1,predic:12,predict:10,prefer:2,premis:9,present:0,preserv:10,preserve_rng_st:4,prevent:[4,10],primer:10,primit:[9,14],principl:10,print:[1,3,4],print_data:[1,2,3,5],prng:4,probabl:[4,10],problem:1,problemat:10,procedur:10,process:[1,5,9,10],processor:9,produc:[3,4],product:[8,10,26],program:[1,2,3,4,5,8,9,35,36],program_id:[1,2,3,4,5],programm:[9,10],prohibitev:13,project:[4,9],promot:[3,10],properli:2,properti:10,propos:9,proprietari:3,provid:[1,2,3,4,5,8,10,12,30,32,48,52],prune:[4,12],prune_configs_bi:12,pseudo:[3,4,39],pseudorandom:4,ptr:3,ptx:28,purpos:[9,10],push:10,put:4,py:[0,1,2,3,4,5,7],pypi:[0,5],pytest:0,python:[1,2,3,4,5,6,14],pytorch:[1,2,4],qquad:10,r:2,ragan:9,rand:[1,4,5],randint4x:38,randn:[2,3,4,5],randn_lik:5,random:[4,37,38,39,40],randomli:4,rang:[1,2,3,5,9,10],rapidli:[9,10],rate:3,rather:9,raw:1,rdom:10,re:[1,3],read:[2,3,6],reader:10,real:9,reason:10,recent:9,recommend:6,recomput:[4,9],record_clock:52,rectifi:9,redmon2016:9,redmon:9,reduct:[2,5,30,32,48],refer:1,regardless:[4,49],regim:4,regrett:9,regular:[4,10],rel:[1,10],relat:8,releas:[0,9],reli:10,relu:3,remain:[9,51],rememb:3,reorder:10,rep:[5,52],repetit:52,repres:[2,3,10,11],requir:[2,4,10],requires_grad:5,requires_grad_:5,research:[9,10],reset:[12,52],reset_to_zero:12,reshap:5,resolut:10,resourc:9,resp:10,respect:10,restrict:10,result:[0,1,2,9,10],ret:2,retain_graph:5,retriev:10,reus:3,revisit:9,right:10,rise:10,role:10,ron:4,root:46,roughli:3,row:[2,3,4,5],row_idx:2,row_minus_max:2,row_start_ptr:2,rstd:5,run:[0,1,2,3,4,5,8,10,12,14,53],runtim:[10,52],ruslan:4,rvar:10,s:[1,2,4,5,10,39],said:10,salakhutdinov:4,salmon2011:4,salmon:4,same:[4,9,51],sato2019:10,sato:10,save:[1,2,3],save_for_backward:5,save_path:[1,5],saved_tensor:5,sc:10,scalabl:10,scalar:[4,9,26,37,38,39,40,50],scale:51,scan:10,schedul:9,scienc:10,scientif:10,scop:10,scope:10,script:[0,1,2,3,4,5],second:[1,2,3,4,5,10,26,31,33],secondli:4,section:[3,10],see:[1,2,3,4,10],seed:[37,38,39,40],seeded_dropout:4,seem:[1,10],select:[9,10,49],self:[11,51],semant:10,semi:10,sens:[1,9,10],separ:[5,10],sequenc:9,set:[1,4,10],setup:[0,5],sever:[9,10],shall:10,shape:[1,2,3,4,5,10,24,28,42,47,49,50],share:9,shaw:4,shift:2,should:[1,3,5,9,10,11,30,32,48,51],show_plot:[1,2,3],shown:10,side:10,sight:10,signal:9,significantli:2,sigplan:10,simd:9,simpl:[1,2,3,4],simplest:6,simpli:10,simplic:3,simplifi:4,sinc:[1,2,3],sine:44,singl:[2,4,9,38],size:[1,2,4,10],slower:[9,10],slowest:10,sm80:11,sm:10,smaller:[3,4],smallest:[2,13],snemi3d:9,so:[1,2,3,4,5,10],softmax:[4,6,7],softmax_kernel:2,softmax_output:2,softwar:11,solid:10,solut:3,solv:10,some:3,sometim:10,sourc:[1,2,3,4,5,6,10],space:[9,10],spars:[4,9,10],spatial:10,speak:3,special:9,specif:[3,9],specifi:[10,13,16,17,18,19,20,21,22,23,47],speed:2,sphinx:[1,2,3,4,5,6],split:10,spmd:[1,9,10],sqrt:5,squar:46,sram:[2,3,5],srivastava2014:4,srivastava:4,stabil:2,stabl:0,stage:11,standard:10,start:[6,15],started_tutori:7,state:[4,9,10],statement:10,staticmethod:5,step:10,still:[1,2,3,10],stop:15,store:[1,2,3,4,5,16,17,18,19,20,21,22,23,49],str:[12,13,28,51],straightforward:3,strategi:[4,10],stream:[5,38],strength:9,stride:[2,3,4,5],stride_ak:3,stride_am:3,stride_bk:3,stride_bn:3,stride_cm:3,stride_cn:3,stride_xi:3,stride_xj:3,structur:[9,10],style:[1,2,3,5,51],subscript:10,substanti:9,substract:2,subtract:2,successfulli:10,suffer:10,suit:9,sum:[1,2,5],sum_db:5,sum_dw:5,superhuman:9,support:[4,10],sure:2,surprisingli:9,surround:10,suspicion:2,sutskev:[4,9],sutskever2014:9,swap:18,swizzl:9,synchron:[1,9],system:[0,3,9,10],t:[1,2,3,10],t_:10,tabul:4,taco:10,take:[3,4,8,12,13],taken:10,target:9,techniqu:[9,10],temperatur:4,tempor:10,tend:10,tension:9,tensor:[1,2,3,4,5,9,10,12,14,24,26,28,30,31,32,33,41,42,47,48,49,50,52],tensorrt:9,test:[0,1,5,8],test_layer_norm:5,text:10,tflop:3,th:52,than:[2,3,5,9,10,38,51],thei:[3,9,10],them:1,themselv:3,theoret:2,therebi:10,therefor:3,theta:10,theta_:10,thi:[1,2,3,4,5,9,10,11,12,13,14,39,51],thing:[1,4],think:2,those:2,though:[9,10],thought:10,thread:[2,9,11],through:[6,10],throughout:[10,51],throughput:8,tile:10,time:[0,1,2,3,4,5,9,10,12,38,52],tiramisu:[9,10],tl:[1,2,3,4,5,50],tmp:0,tog:10,togeth:4,tolist:4,top_k:12,topic:10,torch:[1,2,3,4,5,14,52],torch_output:3,torch_relu:3,total:[1,2,3,4,5,7],tradit:[4,9,10],transform:[4,10],travers:10,trend:9,tri:[24,42],trick:2,tricki:4,trigger:[3,12],triton:[0,1,2,3,4,5,6,9,10],triton_output:3,trivial:9,tune:[2,3,10,12,13],tuner:11,tupl:[1,24,42,50],tutori:[1,2,3,4,8],tutorials_jupyt:6,tutorials_python:6,tvm:[9,10],two:[1,2,3,10,12,13,15,26],type:[13,26,28,49,50],typecast:[28,47],typic:10,u:[0,37],un:10,uncommon:10,underneath:10,understand:2,undesir:12,unfortun:[3,10],unifi:9,uniformli:4,unint:49,unit:[0,9],univers:10,unrol:10,up:2,updat:[3,10,12],us:[1,2,3,4,5,9,10,11,12,13,14,38,49,51,53],util:[1,5],v100:10,val:[16,17,18,19,20,21,22,23],valid:1,valu:[1,2,3,4,12,13,15,16,17,18,19,20,21,22,23,25,27,28,29,30,32,34,43,44,45,46,47,48,49,50,51,53],valuabl:2,variabl:[3,11],varianc:5,variant:9,variou:6,vasilach:[9,10],vasilache2018:[9,10],vast:10,vec:10,vector:[4,6,7,9,10],vendor:3,veri:[2,4,10],verif:10,verifi:[2,10],via:10,view:41,visibl:10,vision:9,volatil:28,vs:0,w:10,w_shape:5,wa:4,wai:[2,3,4],want:[2,4,49],warmup:52,warp:[2,5,11],wast:2,wdout:5,we:[1,2,3,4,9,10],weight:5,well:[4,9,10],whatev:12,wheel:0,when:[2,3,4,9,10,11,12,14,49],where:[1,3,4,5,10,13,47],whether:[9,51],which:[1,2,3,4,9,10,12,30,32,48,51],whose:[1,2,3,4,10,12,28],wide:10,wise:[1,2,25,27,29,31,33,43,44,45,46,47],wish:[3,10],within:[3,14,15],without:10,wolf:10,wolfe1989:10,won:2,word:10,work:[2,4,8,9],workload:[3,11],wors:[3,9,10],would:[1,2,4],wouldn:10,wrapper:3,write:[1,2,3,4,5,6,8,10],wrote:2,x:[1,2,3,4,5,10,25,27,29,31,33,41,43,44,45,46,49,51],x_arg:5,x_keep:4,x_keep_ptr:4,x_log:[1,51],x_max:2,x_name:[1,2,3,5,51],x_ptr:[1,4,12,13],x_shape:5,x_size:[12,13],x_val:[1,2,3,5,51],xi:10,xii:10,xlabel:51,xo:10,xor:23,y:[1,2,3,5,10,31,33,49,51],y_fwd:5,y_log:51,y_name:[1,2],y_ptr:1,y_ref:5,y_torch:2,y_tri:5,y_triton:2,year:10,yet:[9,10],yi:10,yield:49,yii:10,ylabel:[1,2,3,5,51],yo:10,you:[0,1,2,3,4,6,9,12,38,49],your:[0,1,8],yourself:[2,3],z:[1,2,10],zero:[3,4,5,12],zip:6},titles:["Installation","Vector Addition","Fused Softmax","Matrix Multiplication","Low-Memory Dropout","Layer Normalization","Tutorials","Computation times","Welcome to Triton\u2019s documentation!","Introduction","Related Work","triton.Config","triton.autotune","triton.heuristics","triton.jit","triton.language.arange","triton.language.atomic_add","triton.language.atomic_and","triton.language.atomic_cas","triton.language.atomic_max","triton.language.atomic_min","triton.language.atomic_or","triton.language.atomic_xchg","triton.language.atomic_xor","triton.language.broadcast_to","triton.language.cos","triton.language.dot","triton.language.exp","triton.language.load","triton.language.log","triton.language.max","triton.language.maximum","triton.language.min","triton.language.minimum","triton.language.multiple_of","triton.language.num_programs","triton.language.program_id","triton.language.rand","triton.language.randint","triton.language.randint4x","triton.language.randn","triton.language.ravel","triton.language.reshape","triton.language.sigmoid","triton.language.sin","triton.language.softmax","triton.language.sqrt","triton.language.store","triton.language.sum","triton.language.where","triton.language.zeros","triton.testing.Benchmark","triton.testing.do_bench","triton.testing.perf_report","triton","triton.language","triton.testing"],titleterms:{"final":3,addit:1,advantag:10,algebra:55,api:8,arang:15,arithmet:3,atom:55,atomic_add:16,atomic_and:17,atomic_ca:18,atomic_max:19,atomic_min:20,atomic_or:21,atomic_xchg:22,atomic_xor:23,autotun:12,baselin:4,benchmark:[1,2,3,51],binari:0,broadcast_to:24,cach:3,challeng:9,co:25,comparison:55,compil:[10,55],comput:[1,2,3,7],config:11,creation:55,distribut:0,do_bench:52,document:8,dot:26,dropout:4,exercis:4,exp:27,from:0,further:8,fuse:2,gener:55,get:8,go:8,heurist:13,hint:55,index:55,instal:0,introduct:9,jit:14,kernel:[1,2,3],l2:3,languag:[10,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,55],layer:5,limit:10,linear:55,load:28,log:29,low:4,manipul:55,math:55,matrix:3,max:30,maximum:31,memori:[4,55],min:32,minimum:33,model:55,motiv:[2,3,9],multipl:3,multiple_of:34,normal:5,num_program:35,number:55,op:55,optim:3,packag:0,perf_report:53,perform:3,pointer:3,polyhedr:10,program:[10,55],program_id:36,python:[0,8],rand:37,randint4x:39,randint:38,randn:40,random:55,ravel:41,reduct:55,refer:[4,9,10],relat:10,represent:10,reshap:42,result:3,s:8,schedul:10,seed:4,shape:55,sigmoid:43,sin:44,softmax:[2,45],sourc:0,sqrt:46,squar:3,start:8,store:47,sum:48,test:[2,3,51,52,53,56],time:7,triton:[8,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56],tutori:6,unit:[2,3],vector:1,welcom:8,where:49,work:10,zero:50}})