From 612d6a9bdff610b461dde36c7adb8eb5875ffc25 Mon Sep 17 00:00:00 2001 From: Philippe Tillet Date: Thu, 9 Nov 2017 13:11:41 -0500 Subject: [PATCH] Package: Major fall update: -> Major performance improvement of CONV for small batches -> Adopted NCHW image layout -> 3-D convolutions supported -> HFMA2 deprecated -> Multi-threaded auto-tuning -> Tensorflow Wrapper --- CMakeLists.txt | 5 +- documentation/bench/CONV.pdf | Bin 12919 -> 0 bytes documentation/bench/CONV.png | Bin 9443 -> 0 bytes documentation/bench/GEMM.pdf | Bin 16408 -> 0 bytes documentation/bench/GEMM.png | Bin 15463 -> 0 bytes examples/bench.cpp | 241 - examples/dump.cpp | 40 - examples/isaac-tools.cpp | 323 +- examples/search.cpp | 180 - include/isaac/api.h | 58 +- include/isaac/driver/buffer.h | 4 +- include/isaac/driver/context.h | 2 +- include/isaac/driver/cublas.h | 76 +- include/isaac/driver/dispatch.h | 14 + include/isaac/driver/kernel.h | 1 + include/isaac/driver/module.h | 2 + include/isaac/driver/stream.h | 2 +- include/isaac/external/CUDA/crt/host_config.h | 2 +- .../isaac/external/CUDA/crt/host_defines.h | 0 include/isaac/external/CUDA/cublas.h | 2 +- include/isaac/external/CUDA/cublas_api.h | 25 +- include/isaac/external/CUDA/cuda.h | 28 +- include/isaac/external/CUDA/cuda_fp16.h | 50 +- include/isaac/external/CUDA/cuda_fp16.hpp | 71 +- include/isaac/external/CUDA/cuda_runtime.h | 6 +- .../isaac/external/CUDA/cuda_runtime_api.h | 71 +- include/isaac/external/CUDA/cudnn.h | 4 +- include/isaac/external/CUDA/driver_types.h | 16 +- include/isaac/external/CUDA/nvml.h | 51 +- include/isaac/runtime/predict.h | 25 +- include/isaac/scalar.h | 6 - include/isaac/templates/common.hpp | 3 - include/isaac/templates/conv.h | 63 +- include/isaac/templates/gemm.h | 21 +- include/isaac/templates/pool.h | 81 + include/isaac/tools/bench.hpp | 2 +- lib/api.cpp | 117 + lib/driver/buffer.cpp | 13 +- lib/driver/context.cpp | 4 +- lib/driver/dispatch.cpp | 15 + lib/driver/kernel.cpp | 3 + lib/driver/module.cpp | 7 + lib/driver/stream.cpp | 8 +- lib/runtime/database.cpp | 40 +- lib/runtime/database/sm_5_2/conv.hpp | 11262 ++++------------ lib/runtime/database/sm_5_2/pool.hpp | 2400 ++++ lib/runtime/database/sm_6_1/conv.hpp | 8569 ++++-------- lib/runtime/database/sm_6_1/gemm.hpp | 4797 ++++--- lib/runtime/database/sm_7_0/conv.hpp | 2813 ++++ lib/runtime/database/sm_7_0/gemm.hpp | 5215 ++++--- lib/runtime/predict.cpp | 118 +- lib/templates/conv.cpp | 1430 +- lib/templates/gemm.cpp | 417 +- lib/templates/pool.cpp | 379 + python/build.py | 140 + python/examples/benchmark.py | 31 + python/examples/conv.py | 45 - python/examples/gemm.py | 51 - python/isaac/__init__.py | 7 + python/setup.py | 78 +- python/src/bind/common.cpp | 1 - python/src/bind/driver.cpp | 6 +- python/src/bind/pybind11/attr.h | 114 +- python/src/bind/pybind11/cast.h | 471 +- python/src/bind/pybind11/chrono.h | 8 +- python/src/bind/pybind11/class_support.h | 504 + python/src/bind/pybind11/common.h | 202 +- python/src/bind/pybind11/complex.h | 15 +- python/src/bind/pybind11/descr.h | 0 python/src/bind/pybind11/eigen.h | 552 +- python/src/bind/pybind11/eval.h | 0 python/src/bind/pybind11/functional.h | 10 +- python/src/bind/pybind11/numpy.h | 464 +- python/src/bind/pybind11/operators.h | 0 python/src/bind/pybind11/options.h | 0 python/src/bind/pybind11/pybind11.h | 846 +- python/src/bind/pybind11/pytypes.h | 461 +- python/src/bind/pybind11/stl.h | 0 python/src/bind/pybind11/stl_bind.h | 104 +- python/src/bind/pybind11/typeid.h | 0 python/src/bind/templates.cpp | 32 +- python/src/extensions/tensorflow.cpp | 88 + tests/CMakeLists.txt | 2 +- tests/conv.cpp | 106 +- tests/gemm.cpp | 12 +- tests/pool.cpp | 153 + tune/data | 1 - tune/dataset.py | 97 +- tune/export.py | 5 +- tune/main.py | 84 +- tune/operations.py | 501 +- tune/regression.py | 115 +- tune/tools.py | 30 +- 93 files changed, 22429 insertions(+), 21989 deletions(-) delete mode 100755 documentation/bench/CONV.pdf delete mode 100755 documentation/bench/CONV.png delete mode 100755 documentation/bench/GEMM.pdf delete mode 100755 documentation/bench/GEMM.png delete mode 100755 examples/bench.cpp delete mode 100755 examples/dump.cpp delete mode 100755 examples/search.cpp mode change 100755 => 100644 include/isaac/external/CUDA/crt/host_config.h mode change 100755 => 100644 include/isaac/external/CUDA/crt/host_defines.h create mode 100644 include/isaac/templates/pool.h create mode 100755 lib/api.cpp create mode 100644 lib/runtime/database/sm_5_2/pool.hpp create mode 100644 lib/runtime/database/sm_7_0/conv.hpp create mode 100644 lib/templates/pool.cpp create mode 100644 python/build.py create mode 100644 python/examples/benchmark.py delete mode 100755 python/examples/conv.py delete mode 100755 python/examples/gemm.py mode change 100755 => 100644 python/setup.py mode change 100755 => 100644 python/src/bind/pybind11/attr.h mode change 100755 => 100644 python/src/bind/pybind11/cast.h mode change 100755 => 100644 python/src/bind/pybind11/chrono.h create mode 100644 python/src/bind/pybind11/class_support.h mode change 100755 => 100644 python/src/bind/pybind11/common.h mode change 100755 => 100644 python/src/bind/pybind11/complex.h mode change 100755 => 100644 python/src/bind/pybind11/descr.h mode change 100755 => 100644 python/src/bind/pybind11/eigen.h mode change 100755 => 100644 python/src/bind/pybind11/eval.h mode change 100755 => 100644 python/src/bind/pybind11/functional.h mode change 100755 => 100644 python/src/bind/pybind11/numpy.h mode change 100755 => 100644 python/src/bind/pybind11/operators.h mode change 100755 => 100644 python/src/bind/pybind11/options.h mode change 100755 => 100644 python/src/bind/pybind11/pybind11.h mode change 100755 => 100644 python/src/bind/pybind11/pytypes.h mode change 100755 => 100644 python/src/bind/pybind11/stl.h mode change 100755 => 100644 python/src/bind/pybind11/stl_bind.h mode change 100755 => 100644 python/src/bind/pybind11/typeid.h create mode 100644 python/src/extensions/tensorflow.cpp create mode 100755 tests/pool.cpp delete mode 100755 tune/data diff --git a/CMakeLists.txt b/CMakeLists.txt index 8defe7b48..177297432 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,10 +2,6 @@ cmake_minimum_required(VERSION 2.8.7) project(isaac) include(CTest) -#QtCreator: add visibility of headers -file( GLOB_RECURSE MAKE_HEADERS_VISIBLE_SRC *.cpp *.hpp *.h *.py) -add_custom_target( MAKE_HEADERS_VISIBLE SOURCES ${MAKE_HEADERS_VISIBLE_SRC} ) - #Default build type if(NOT CMAKE_BUILD_TYPE) message(STATUS "Default build type: Release") @@ -14,6 +10,7 @@ endif() #Compiler flags include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include/isaac/external/CUDA) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Wextra -pedantic -Wno-strict-aliasing") #Source diff --git a/documentation/bench/CONV.pdf b/documentation/bench/CONV.pdf deleted file mode 100755 index 20a71eba58edc5c334eb1d480c8c0ace630702a5..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12919 zcmb_@2RPMV_;||RBMOZV+4D|ZqU>~SDnhyTp4Z4KJ3>O4k!%`fWtCZyk(FeWvMM7* zG~|CiSAFYSe*bUJ@A>ID&pDs>ob#S{o%g)w+&6w*6*Y0B1csLX+6YuqLyLqEkgJ6Q z?f(6cq~R$yYe-U=KqNT1+Cq}L1Y2uQ2n7b{LkbGC)-G1?K$%}AsJXfjAv9@#q>+w= zgS90QlHT}K^(Cqs5D7$U2(tmuB@l_$9xf0T{-l*Oup?O6yVycf-+Mh=Ee))RkO^2; zMGcT*?Ms9tHJkwnO5bbc@3jVG_6s>k1VHd_24K2j>OHIhbsOYM>RWrddU;p^eqj2+ z`dZdj_5>wYU$7#KK?Z}Bz+rJ{X)J_AU?rrbQD7BGWxyXm;tAn4CaHV4dbvSAMYZuQ zX#nW;Ah>wC!An}60`mYw#oF86(pq0#2@nfOD!RD10z4-Fq8-NigLF-67h9qogoO2> z4@s)oI{|5d+6jnI#oE%<${J?cljvbhaHjRmsL#+kzv}@*#KH&B&ndg~MG}@zC0#W- z#4Jf$#1~@|qrv5L|^xuf?{TP?UO&;NG zb6A4MyloNu3R6s5&%s%#&NPKN~crF6jLO;ZL( ziPZD&P;uZH9H~fw1lXLc%iObn%n`k<#3yjJS6O5=iopVP;VOFn#k=nbFwd2&g@<#j zLS~SM)FhHLnGTLbufd{nU~U^MmC(PX$fqRz?inRv=cS>?j>ZOtmM)9cJmxhY z`%^V3lEStTI=gMoBVw?xY?|ngD$MOZAWf|~7%@H{(#Fc0HxZH|#?n5NAoH5bYIdV5 zx+PUpI9Vh*_-wkP1Fl)WW&RS~C7#K?u|?0&24ZW1ki^-zXXVRNV5SB=qNM6t08ps| zGiM+6!On+RID!CYcHQfTAL;L#Po`jD@?R=B4-&i*HR1lg%(CkNc{3 zai=;kR%w~LBxqr6ch36>g|Jk1McFT`7AczFWH8g^OM!G1sICG-ggqm?3vr@*NJE$(juN~Cy$M}LzQ4!EW^?@9-1-gX!q_CZs`faH}-ZSi^wCCR~0Z$ zz4`fm)SYv9?Dkh(qYrmx_SGBSZcm$3udeNDH5A4B5f66STZfhft)CLT!r+^fBR7X% zM)Y6jRKUqQAtvUZvf^5A^*j_i&^B7%p4YxZ#8A^XIj?OQQXA6C>~+P{2h|&*O1|#AB=qRKhfje(<7kWs0{`Y{Myh5m@5%T( z?3wfm?L(Ix$ZfqUbX2;A9tt%se5sBE0vGb}`5f<{R`~V~;nnT59`oEUEo(`zUB}n@ z^B)@EF?2KZ@Okste0!M+Uva(fK-*%@$Y_y5F?sgUb5XShA(T1!o^SJmx%?l%3ojH- z<=EEsU#?Az%A`zgy6!7Z8ZV41o;Me4^?7!=25qDyd=fdWKU%1831*=}Z+Ag>Tv{s- z9rc^hFG($OsvobDn7 zcN#{Op5iaqnvhwd^he)j`T&!M3A)}e8qq>q71Qa-kl1Au;)=Vz!LvyF1H58>0&g+5 z=wZ!U5|pPzZ`1q6=lJKT3?y#9bB=w_JRc9vX~pT0hzKC_%~58(SU#$NA%PN3m7QkG zPx9o(TgG#CjneEDiAZ9`pYMs{38iuDVH>nmExE8WAgMwwT=~fP5WQo?p57=2)?(dx zZfwh9P8Yk+@rXCv*vcdX<@LMBokwylN<|0W?jb3&N|!%pTndTv5mf%Hbt=b&-a9E> z&P=H)dJpX~$K|8QosG4`?wc|7uYh40KuZiv&PElUKINB)OD7!NfH&3anopvX85X8~ zvTQdH`4F+POg8vduN?3T`K+XcWR7^gxIbFsm~xb|DdbJ#Tc$WBpDg+5w|KP$j^1M_ zN@z)B>*U36Iy-LbXNbY=!zV^(?tv*d3;7k162c98Gj192KE$A*K})QKU5azsAw*lf zgl&qWNZVCvEmNdIiep+uCR>@KY^r0l^R<dN6S{Kx4I#QF$N4qp3LFm zcPTx!^1?$LTCtZqu$}Ve$IWk}wFEu2Oz4ep31GJKao(fa#f523R{Eo&8#c5KxV6r6tNjGUdyrBfWENAoW}GwXZk)K|>(-b##KqLZ@&JMaLLa26&b z&7my>IdSV#qiJ8SE??D~H0`OhTYGdh34H1Ki9`9Zx=5lX)9q^dsk`fIUkXy5oci!t zm}~4K+wq{gb-oq*ztEntGad^#-4l6Lq^{MeXTQ$UN!PozUj@E-pZXB=sX4uNyoM}w zzT+JIE&NJQb7BpDpmexJRJ~nwcHoEP&-0umi=SuizV&}u@Gm>s@3Ni`5Ru={vvU5hTEV0aAuWAMIiUrmb=K8%9cx_e(x$ij^2Ij z)3jR4f8>^H=z-yt&+&%G#Y+z)$2i~Pm$&P^`x+lA{@T6uNRZcMov-~}jx-v_)U4!F zvK*%^D{<*(AAOCBw`R!~il!UrBcD`#@@dgMFwpe+^MK{)dtaBA?+2dvR%!l*hv5J5 z{oLvGPwzesACtTknWcHK?Z}|#&CUtc?KkIYUk=)wuh)(9-lsfb6?y&!&G|Zq`PwwP zPRpCcIdg2!i9X^BJ60!4)JBT^-jnI|<3O(7}>OYY-NwuQ7y62`Y(W*XJpEQQV-dY&_NS77Xz zk%qyzV?|G7_%YWdzfgt;2V`VJDo10!EI!>G`1tr#>#8+PR-lVx&-2Hn9N2p!vmU1> z%FN`NB+@l(_ucC0ko(-Izg=}%dzziTe(eO+F^`^o6v^BPRTKXGIVQUkP6w=g{j@Oc zR<~AuR-N`y=gs^(-tT7?+um`eW2VkHHD^9;y{uP7zhBZ~@n&qm$7|UKzD`dLo_wyn zKK1^q5!Z}$llxF7xJWdf`Fh(Q@iU299r zQMi3;uvO;Kdardy=f`pcmUShgwj}CPMGw1#U*i>IHJNTn+$-ayyY9t5+nxqxiY{cQ zmY4NBeQEB-C2-;lhqU>OeJmH2I3AKOaZxF_mH%C8fS3QwT^&we2Tl)fau(%><}?m9 z99l+|`?KP;$zHApHP4GrWG8%R&*c8ZoiX=LuEJ$p(7~Ix$AcZ!Ux9hBlbTI+&;C=Y z`ED;Op7;!4So>}0v@em}&^{2uxW%0Jgz=f#8!9+_Z(LrMYu$qtrh{7yDmRfAd@KD; zG%|l78P%9th6V(+ihsU2d60ljme9qD)7;SE5jUM0|2wj*D1$LVL?n48y;y|7B4uih zIrfg4e(dGhoKlyQUcpm-1|`cYR3bXw-tO|9k2se$q3hS%^!FpS-XEIt;>42s4?61D=%Pe-xIb*6 zFz|ZXmfEb7A44~1+Re~b&GN*0XKwn%`!Qkrw0QInWxo+9LMz6T=alH>#O`Vhdi`kM zWx|nt4)kA4@xz0jY+YO!IgXP3xIFb^8IK;e(uq>7)pa(g8giBFXJ&P~H>|vtF*_kW z)>~cCy0m)2v%C($ZnQX6#T~CzwJI(7a#jA-+fQG&73P_6Y$Cu-?$EfuP^?z0B5MOg zy=|e6S4UUn@jB73cl~5ZKVyA^_SPs4)sQ8u10FGd2gUBFE1Z&)+;9Y8O80F3YWp{o zhUVpnKL6X>LdzH3Lwhy8e02BQQnKbpIm*<#i`VE+3Q@m4P<&T08Y%tXD#q&LJ;1IU zzGvVT+?8@KFq2sBFn{lQn4oB>_(3HJN#kC8P(S~0Uh~AUy(|1>N($%a(mM28S2!xR zcZ=Ut?`F9zPD`%GCclajWasVg?w^tMB_u0syUDCA%;8IwxP|iO_(|U+?a&4}Zqyh{ z;sq{dzQ#2CIjuuU@)}=7Czo?CVu(QkqMo1pJ&LwF{$G3F%YVi zi7g7Vsf@FWGc3Wl`qJq7DLNcS!&&RgX=3!wcKo$|ap4V*Gb-{3_XjfvKYXK7HSy2c zL<_&ZbN$c)4)xz!kk`k%Pyj7-3*R}I_fE?rvuJF7^t4HOhhZ_Tb%SJB zO|&@6aA}M1-4ceVl)|Aakzp4Tig))e#^k;;X*7!>=$e=ell$5{W*!>55X*v|>)~`a zW^p_q|2jtxRi($tq-7hhIEvE$&;!u+wvDzc+jc7nn)6L*q)fZ*8_yh8q(d&b-@K&b zw2~rx^ujv|xpuN`^=o4ZvEBYJ#9~Z`XEg=i_1G8eV!M8KkHuQ(0VjOA=>yS7)$N6K zLexQnifrxyMy)GWIVwJtWKZ_74cPHB)3Y{p%^}|V-aj=o#$GwKq+*w&vZp_eMP8`9 zbuxp!wBYNt&az#ZOalI{mFJT>Its(s`@Q`NZ*^Pl?)S<3^7$*7#FlK%O%w`0?EUW1 z|H5%%|MBQ|SXO1utVHQwSQ}RMD z>;YSSG!`@p1gF;rw=VT3Gh0S)q0aX?*FLmrFKQe#@jyZj&wD$7$~o~%bTL(~OGpR* z1O8)YjoYUW#~W29r5a@wF}8PO3y-We>2zpnM)I-X+ce~(OM^~Oty~GJ&;4Lx^)<&b zwAUm-lj1Vh{KcH4r=^BjToZdRqe(}zQoXII#A2nb@&vp-FZaEo!Xxq|S$h=Ewq#9s zM2gV~Q!RSm7(3{DtQHzD5M`26NjraGOrN3zhbF_T^)c=PTJ5@$!AR zwkWRxPMV0&I+7XQR;oX4#s6(*N;=$J9D4DLf3>ySBGWM+L#{K__c|~sVo<@Z=B|(AOj~D| z<|Ie^q1MEU_fwFD_gFglme&qEu249ghA>p$L}2hE`)>k6O8v#LC?Ah!ZHDNTS2C2; z8&*$e5^D~FBZuL#Q|ZpIrG`KY-W(3Wkee-$lzGj3@0{4f{1HPe9$^#YS`p=;eC`Yk zlzC10mZ=L5^SycxU#sg08uG!|TqZ_~t44bBtWPiVj^w=Hdds<=$sF1C>~P*}D&n|x z{*CvAGgW*0#^sq*N?IqEDQ~=crY>D1B!YkQO~nWE^xK8oU-mrTn`$k?w+W-0guK$2 z|K4P1tn?9tZfWHf*8GSm+9|b5w89al2Nljrk&S_@*tO!7GuZKt{a#w+uV01ZbH!}ktXIZwmrHW^PQrIOPf$ce+FY@�-& zc#LaHo(lcMCJS#8O8$+%L1VGUzKbGX?O&p>wzFrnnLK06asMFYA z)-=`9bgtG1CB67!#ABjOpu61!<59*PC+rU2jGpOMT%byQvm@{|4vgM6XRh=m~`vv1EM$nuPk$ETWtcC$Trmq2lH>+|T_G3?wn(F;b!;r)zG zy9*^UN_-Swn)q+)Mn;xfY;Eu~s8v7qINm!ABXtI_9toHNDm zw`-9+`?^7EjslQN| z>UlZVZ9MeK{@U65&YW&IC6yRmf13M|oCcRz4F7?vqOu>`n=LKd?>knO2{v)e>&r3} z6vf<(F0eJb@`!*ka5e2TH7_o0bI~sQyfYyC{p?iO%_EsRo02tY7!Vibsw0;_>t$RD z;H#wzd6N{|-h@!^T{)s!|>s6Q~WB!6ZTZa0Y-(CEJj8T9c9CW>$%KC|_P!?(^;8EFF(--fAYuU!e|V2$UH zX=}mL>&1Gs?mZQwn5?WFxzg|YDdkOJPdhAMmRg$HR^y;6*YaEHRjH6fk{2Ctqo z&r}~`v#~;XK%8g4R4JT(lCFvSN||f#`B_NAk6HZbD3^DDalB%0BG;waiAXQ;(i&>5 z6mCaePFxiV+x;MO9F@6qpW=WRq>*(;hdt&HdXJo~_^J4D-{KvTLG`hNsPje1M<`IY zXr|2NQfUf=c$x2A;j50SwRX7U(;_CiLvk-tB|ardp}jg~s<5Peic~uC9L_>+U`fFCAF6iE1_p zmZbj1jiWJ!2;Ilnt*kZ?0{`+|Ki#;cfdStovUn*JHnfkK^9q?8rOIipEy6n6wKt(- z(?H`d&P}YIh6@PV>fW+{2;TBi;@$&Aow@a!yL)NO&F%0Roh;nkEWS~V-HH?UO`gQ0 zt)@RTTddvBzK7;^h@c+%i3tB88VT2($!jtl0q;&6$0c&rxo32JZwLf5uSn9h22>nf_~Ex8)leWNjj@O~MQm=5PFmZf&eGlK!nc#SUuo zH8KB8pFq~VtMR~e=JHlV0Te^3ID|UtD9?V$g zWKQ*sc&6quPzxKxd=_992pD|;RZ zX7xQI`-by?dc|4+oqgTd!Oy;7zGvlWnmx=~!+RJGjNR(M)WxAjVwbJ&uNIaa)P790 zlb7vSj>hs_G}`d>_<+TYoU5jivb?Mv39m{H?hBADf1qqKoEjv1^Ykpw0^7QX$g~TA zOrm)H;3xgPi~Z+^^xmXppBE8z+~e@<9(65m(yc&KZR?C0f2ztRThU`*0=E@T%Wmd- zH;IanQpmsI^=Cl(PMtCUjz|~oUFXrNI8#t|kkUtxyr_zTt%pL=8DB%EU_#S%-*-mJ z1ucgZq$J6W|i%To7kN z@{|qRWKV1ohM|C}|1${E(#5-|@X!x?Mw`CS;Nv0dQXV)m{cX6c*>FxIV&~~YOs~t2 z?0IWo;T1LeK2IyT9d(Mma8hS@U5|dN%lhLs#md_;mo27PpR_jd?VrT#;}#qg+k0M4 zhubFneoO4D%MzUCSsY|olZ&0fIA>YugEjG7w~H&rtaRyl-zarbB}!xz^dz6fWfqzr z_7SLx=QdRs$lm6Dyq>n&@?4^8%P!;W2Et=a_a`UGo)kVTH*SM8B6zw2bNrKNV+-3) ziLy5_C1$fMM+{t{yjUH6wP-To>+WY;5fry#yk65E4ldr(_*|qQ@E#doteL?kLfa&8 zL`wgKd1%FJF)HyehpuE?-GRvdue%)W2lu~Q}2 zF~t>|<&_u|RNH6PpXasGw~scXEl!u7+l}IBpFHCTYu7@Wz&%4@&UBe1Cc_;U%7XaEf#_jRN|cg7AmES@{QIY6Mk@(Z zr3>=6JR#)w) zgazTb4anAlupk8R1S?~~_OZ6NwIf1saHfE$3+EC>hgUzQLO&@O2WDu5MEv;uQoKm`_a0~L6X^@7Z3L5`K=F?%bb z9bjf7bqqj${OIy)l=4$!zlfeBuHWShNOU6DdO~Q_UrAii(-O{G!=V7tFlzWioa8~2 z32uir#DJicH2Tp5yn}o_drL(ZTPJJSI1Gr^&PU;f7QvUq3a|(a>erOHAAzt5B!xiy zUliK^Egm#d2EqWnV8G*q42Z?0&{DKGECQ$z0inRv5e?q*;erKp!u{}gG&na%HCh@T z|NkqrKl;&NEL>n5KMEF&1jYahKn4Q}K!lTlq=89Efu|)bSX>$|z!*_DEG;O&_@p2# z79ateL>kzwv(FRfU#g*QX7o{f{-B4t6o5-1QGmgXbunOFEZD(l@TLv-VL>eegay~6 zf`Z3Mk%R!(@cJaB!2$#{0D;oNYhuA1l2CpS2iGL~fmg!79|%}PFxfC`a19DcAMkP; zg%X*t3Q!JDht1%Jx_;C^WwbvCAz{FPLjkZ&A|JMcfAGVj zV4g_E1Tewh|6qZMLLg{Kecut_q;SxW0vp1H4ZwEr??RF_unX8Gekmk-fNkPOA=%1z zn}F@%*8_*EFSo%iG5&|rfuvmj#wE?pOPGqCy0s*f;TFngt zY9h6~z%C-Se!Bi&J--?-yA7&;_Wyd8D*NF^-^5EAyql8UJRaTqwK zehx$bkHZiW;IIP&;Tq%^|D@&@75)VNa@+U`{3R^^3H)(9{5-rjGQlPB_EzwbO-i}e z-^h>l0;dy+pYLZR=GXT&Wss~4M*h*2C^VnI)XKX!O0I}wRd$?!h7X&p4t7 z!QRQ*gY+Ln4D9{D|K|YiN#E5K-uEPTso`Sd3eMdh9@CRZ@F0@591`qI94$Y;s*W1% F{{tT2wj%%l diff --git a/documentation/bench/CONV.png b/documentation/bench/CONV.png deleted file mode 100755 index dc9a4ac0a3a825e2326f4cd7f08e9603d782d7ca..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 9443 zcmdscc{o&k-}qEyDP)O=ES1T=B#yl_);N|S+3&0kVKA~UQQ4EFEFF|+ELk#A_7EaV z8We@GmVMv1-*cz>=X}2BvwuI^nG>N0s?*VO&>|2BI!z6f z0RnLZL?8}rQ6GVKs48YQ5eS-dn5$RyFg7*_gm?er#}0Z4CTz{~%L0ONz zJRh8R!t}zI{G7|b()3hshrc^46d5k1dP4tlosmGvb=2$P3m2@f$b95`_Ug1Tl#dU~*4G{v|V5%xb@@F5bN}EZ(d!Q(3r2b3mIG*~;q2Q9F8yAHX?NS)TBor0)gfcAb>kRH*^S4QP zQM>r-E95G?l{z}zZsB{97Qrd7MMb}Vci(Tm#1IHs`IhtX_DfxNL*MJkJn<=IKWNS* zeOh5Tr{2hq+S6O8)!L)VR=3BCsk^wh9=&oKr!BXR8-9$y%^@ytEqduX=N2XUBjn~5E>ffnDReJAI7)H(5RT^v%U!|G z6cz8OlMh|{;}~b~u?0j85A~}f{wIzw-9^~mH7}-P3Fa%NnxQg!#wr!e|CtsH1+iz$ z4}Gn3#wNi<6S)L8Z-uBIG(fNbS78nADtefqrCE5Pb!hZ z?sEC(J2}*X_w$~~<Dy)CYSHlI@WivL z8bV)UR(RJ>@JfdIqi*U+u`p*xm7ru_?CAL$3OZiQJ7siV?%~ws)y%o)O?rIYY*OqZ zvAtK!p85Q`jFXZ(W)Uo^E|dAa>+9!+UotQw$hE-p%w6eiSsX;gIN z0XE^CPZeOv<$8Dhy*f#TbZvod!)1qShaxbY=gJ?9uc&M80rk=o%DJ$VCPqg`+9yzzvjp3GD+?h88 zH`zBsPw8muMk85y%T!;PzPfr_W9Aa2u09@nVfun>qI)ZQD_3hYU}=685<0_QV^U?h z18szFyrR&l*Q>wEr=NKv(`4l32bPo9dY;MYoz26b$DRB(q%SHgYz-xch3{f=bsnTpk=4N8d_&_PF(90o}J-TMixsJU=U|e znPr;&AZsFPJj*%zZ1%D2?^$8l$}Qq{oVFu&Ypu9eg~zHbj-Py6>|4&nSM$rCRZk)$ zj($4Xn)bv`nBkMtC%;dwiOPIKV#@i?a!6T^GWBw?MmN>sp5Mwpg}?FATi?wz;-zf? zOTOVtoaW?tbbQNAesQ7XYVEj^suF_x1NXG0;H4_sYmK_+b+dKGa|N$FF5t*EecAs) z&;V`VVf@L+N#F0)Lt}^h<{WOld2|Nx4-CbvneJcR$<~?#?^m4Hk2x78yu!Rl$ulYP zu{!pmu{ZLoT#l#l>0arshh2reH7|}#Cr^Et;_PNOziWB3h7Y5<8_8%9562Vwt*V!H z?DL;5lki`2*G;W-ly3wT1!nBA9bpco2t68#d|-H$twEu|rJ**aG-gAsM{V6W-dL{i zM@fm(_-PxiQH{~7O$<7>_~-cLb$qpzo~EYIYMbfc+q>IT+8x`}zTwJ{mB|4OoX#!| zHH|w}o9f5VQMf1~Ymmv5X_J|>8CYOl5NY01?D#{gQLrtqL+e(1Dg7Iz+ajg~cUj;C*aza`(&HS)m?#9Dr5|3)*@KRfY`^@q-nPix`T2?5V{gOBvobv|z$PI(b& zQ)(0S;$zDP%nPEOLn$GD+43i2`r+74r;!NrHgoCnm5O-t0P}qFj7m%;&5w$Kn_g_| z0dp_%dyJ6-FYwb>JgQvSJ~Zcm)8+?W@4f81_1qJU5)=Os! zTg!bBERk}TY!%FtaunSMeAt$v}p^=lnJisKpy7KKg_GtcV2Ha-V450hBCLl zHBx@g(ZJupwZz)}&e9)|pw^X0@g6?Upgawd;4SUYFTF24vx!McjeE!2qq*2^QhHN* zQn*WY2pzW9Y>{^BuAc{sof>Q(+H`cqD}v+D+7O0bgk5Qs#Z@z{P?Lx}d@hA2+Y=xw}ln_^W>6e(`Gx-<$~R-nl3YIJu#; z9>8VSR#IQ$wf%Lz*2Kl!(k~dxGa6i6Uyzw^P~fgRu41I!gMDM+X%^$(UYJ@Q%19MP z-*?8bz$ix~Zx;uwEi}mCD<1mBY%QHlKAWO$DYYT9!ez(p?3m~*?6~7EJej=kZhdi( zpYZaf{pLhG>$~$W?6y!JBPx-zx_918mG!=n=#K>NqS^75drVuOLWzZVzmDS0(sUzb zGa~3DHWMr-jSH8vk-Kkk#Uk{ldB-Zw?9SW$y!GkV%dJ22#WXB0lxw_84su&G=*aH- zB#`7#h_YgzHyK(Ub0qa_S6fye`D$J?@NUj~*O41P|Aj99^iNmQ9|~Sk0r|^ild}&4 z6ZY1Iv;&>E#kqdHv$=zxF5DcLt1}8^3%mWG@CTvRYgcK0Z+5t+A(Ua~i~=VOJEKyx zH?VqPxa(!+gz%#tV$p;0{_<@8>?o zyealktQGl~|LM!Bl!oZ2Zi@6 zcCJs%zV>RG_qA7a$f(x!S5`LOxH8eO7&{Xika1W^r^7+y4>mz1r+~S=ku5u%gqz!< zvS{X`DtCeI$MD$1W20f9gFyK5BM`y&5Qr^!EBF@z;VF(lO#OjC$R#5XY*?s5UjZJp zSoE|FRri1YWoBj?8ykChd6}D=S5{U^OG~3rsDXijl#~=BBcqEKFT#An!^5|?w`XT( z7Z&DscXt^X8Tt75?CtGWR#pZF2Y&vX7#$sHYipaHo?cp7Tw7a{l$5lxvYMNln`oQJ zL}d^;30ci#{1_5aSAyJ{T zLT4rACHW=z&xoE8yC8Pg_$U)>@FAQ5h_2)`DzPR5*4NZgh;T|S4H%%oYM6MySMt3E zLaoI~3BIAiY3is_4O3FFiBOIHC|XA#*rhd5R}8)TX9t}8Mw@(sho7n8c|Jb3IP%I_ zx~x3E$F#TAnGj=xzeBG>lyhJYoJ5xh{ zj3MwcswOi0BO1%M#0rI1Q6l+V>mojy{Tt{4?UAaJ!&~^gByWD$`S>wHC)Js>kKN90 z{q}u@N)x@X7#%l3Dd(p~K-ZZ22#e6MN+8aMDd{b1)VW;OOAqOZcIieLpK)~fC;ULy zO23Ei;upQ`sP0w2xAB^sMQaSD1=-s9%c%_(e8z57i6+z6ib&nP6v$LdKhUe__?ub{ zi`scRA5Y6f86}_eqC6NgTR5Yx*y2siiZvJobl91j_wprNTlET4&;t|NQ;5TCm!}rX zJk57~-^l)GMS1zKc`zBwWw7aUeKaN8BkQ3xp10ZMbe1h@%5jjhY01*dM^t%cGL%_I z)B$XyN)bV?5(Y(uZog{X)(C4DH7j~SLS5`gL*r^qG*jlg>5THugy_9h3u~Z*ZvC(f zT5M!vXs{eR2}QDATRlFH$_t|&lax~$>y1#Gn*dbhQ&3S;5{JtHrK+U+!NI^G~DF=V#N@mZx#gZvp~0jM)#9N zGPDsD1jE2rKv$|K?jbtMrIc1BQnx)<_Bz=b+d6>) zt0Aifflq5iG*3RN{$fwss?8^I{KF!J2**QxUj; z=My+YY>fH(I02ER+xm=xbAMcd@(zSc$Igh@RaxYmeyLE&_F7oB0L9^2vZR={rhMGR zwjo#UK$8Z{+rHXygL9Gs1kV=D$G2O*J{Vv&<#aQBFYxj3IL=L;cdaN-_=BAl^?z~} zxeLH{$%M=c=lhy2gx?42hAt@2itMPtq#pwg^ zfO8!ths1Ij%OUuuK&dnMB-J1qRycK$B$Z;BbDDa99cF0;nyuPD8}Cwci5=WL0TOeo z(x*CtiFy{;q1=(g+|w`bn_thzrfOq3s-t9Qho*{sm@NSvBoH&$2<3U@gG-Jl!2}xc z#m}}}g*g5ehm6==t7n40AD8)o?8-AbqA)Pdpqj{_GV^X;s+uI_o8+N2CK@?sXIfB? zk*ve4yvjJq*jyfcGR?5?L%@?B-Kb!qZ3yvKpF5L5Z)cRA?xT-o?d;f8t+#EHGZ*Dt zgp%;@=k5Uzj8$O=Iylr8c&lyt!vu90r2!_qLs2R{UJ}sZpEWWV5ZL4Zk5J+_v#Pc7 z8Z+Kwo{^+7U=6UTy0AgPK)~|Cf_T5#0$tZ*Xz<6tKsB}&2dG|gOWnQJY1J~>`cxQD z#ipXM9Ew0YZ1Wjtf*Dd{0GBcT;840!{}KGLP@*kI22oe{?J1D$D87XP-xW&yhc2*X z|0M0-IKVEW!f*bU=u_1)x~i@n84P5c!Hd-vhsdIR89*JZV@QGxyr7vO&QN$sC5@ee zmu?DVGrZvW0okMQa;dIG0$y@yz=z~T2kS1Z4Vuax5Kmb`Xu_zJq79aOhn-RcJjFWT zN_GaSVEf5JmNF3KK?_z>;pZAE!>9MF@JnF4xZud^|MWoLL024vz_uCFc^pqJd4?|4V3!Z}wVk z2%5l!bw38$Rk%*i{AO?*#4a&Ht!m>dBk+{fh)6m@*03)JvZTjR2BYU!-wWuJ_zO&- zv64rTK$^42GSE@t?8yW+ktpzl@ji0o72I-%Uey+aQo;x#XseRogaB*^u}|8-kh_&+ z3rmpu?jmqeV17E`fQd-^lDB}dyr=NUM)iEtR?#fk^S(u4+n^7x=0_dRcr&C;Byl}9 zHr8s*Ul#W0JoI7jO1MIiZbM7r$jZfm(arQw^t{i{(hIIhcS`nD0Cu9gL6mNw*iT7t zNSQ?y8HJtd*k|w)Vs~wfr)r&j%8xJVvBs59o}-G7_(iMC%}b?Kn=%(1)&i(uu8T%U~?(^=YT@(0Z8YW zK#8|!gyW;aK5Cc(3HW@s2I^?Q!Lm~zwt^CzlAWt9PT>LrDDf|!{*>w;a>O(lfJqDuayfZMHFkl8F>DCfa2QWCLG${c zQ@`?sE-om#PQVuIl8!TpIM>;BfRKuIJMyL&R*XFq{TV7NPfXe5T?&A`1q;pH1|t%m z$mktFG^^g}#op2W`+ZnRctntG94~_0$qCK}sB%H^=SpbdIAD=R>X(}4#wPgE3PZw8 zD{Yh6tNTn~7dsesZ}BiWM0h=z7I~K&;P;86_*{>YK#6JDT{n3c_g=!)-tE9psKbTs z08;emZ+dELvzTB>fs7)tNZ|)w`tsDP(UQ%$1x9Gky^YiGIIa7qc9M*C*P@Vf7LAkb z7ICY+?^~?30oiUO{@tb!UUeuaW*gVF$^x>&XYyQ0mP@fZXZ~wsFj^jHk40~O5!xo< zzM-ZiGRT${JjJsXYzL+(0&hZzJHTwoD}RY`3t^9C%J(g&Ae9Lg=Zh&wO#^?a+DD8~ z+rSBYMw)2-z^hClby38tH#oNBOduuE3DhJuIoU(Ll52Ub;aQkuNSfIm@y6M5?QJl%ac{WN` z@W3(24BZY*cr7H$UK5IhPj06@I02p!2@SvR9L2%@`8TV{ogwINP(1(g^!0l9eCj7y zw&v4F-EdCLN;oVu4EE)pgW6ycyP-}Oa<#x;Ti=~w)xArMN>Z(`2uAy911Sz+L^4`G zIb*j)+$x0slq~^gwDeyr~D3jS$8rb1#E2i!wbAcf$GBaAqF0%U|pxDA;o&^2_pTev>=l03w@#s#*+ zeGsmB!;`I=hf>pFY-gCHubrFL$M(V_S(RkrhDl18g5Et zdotEBh6X^Z-!kNpx&gYQkoP7VHN z0>+y#DOsz$aFEh~7L5kd;lKOU6kIOh_5ka_MPDFWz;p;nmO&i;`~)rh7E7C+uwBzI z`A+pp?!-BA%blr#d$OdnTR0SN#Nr0GLI6{dj25)v?R7t!M}#96?&CsFjCwG?Bc|ic zi4ZfqFI9)~d`)i9ZAPcydC|%s)kR+h38cI6)3jSr)u&R7P6{9M*$yvI@&$9Kz7^ z1h^nH zX^#Bf6vj*G>40q-1wZ zc{i7A+v*DAvF;ETNF0-zGh+HDd=jn>h6nyZzikefxVC#kazT+5ac~Eza0nyMgwAlr z!Q-p_-v#)Aa|CWmypuimz93v9$>|9AJmmsm7rp(Pf!ZK{|D7cqd|-bM?`}4{S*_OK z(hSzXIZ1M827V#pu(<=Y-zV~Y8i_IQK@Lkb&0tfH;bVTN=?kmEFGx0R^!RdYYd0NH zMKnL*0Q(*hd=hB8dp~G>y;F%2?!74ifzSt6`sO>(hjes&ek>+_r$Lz?zq^(ZIas|t zP#L6TvXew?3=&}rTA#CUFWGR#N9|jR?lo8LD_HCl5mh~hYsFaiw{eR>xTt+I^UsST zc7*+*8-)F}`o5@$monRv$)W9gezgSdtr*<6VbINZXuIv(ovtnB7g=Gd+t+G``---8 zB`e!0b62Beo!6m~Eo&0)7-6W@Xz7VVtT~`M9(qEa25*+Sd%ZCJt%Bd!2Z6ZC3l&M2 z0Lj0-j^ z(OPP}Whn99_gd_#>4ZGd;fd?};!oL=%){NTFq59?_s#AW{w&u7jK>)QC$(C$E0_~s z$*jix2pTC1xC0rn57 zTnDM#J2gMH69|vf8aSXWI*{3=6BFkUbyz20-NCG8dvtFH{-l~Ko1Ihg^F%^YL)yw}~I0}&(Wi>rlZ>JBn@BHzra z37IZPSW-83FlY&BRFZQR^@=4&gaul=yYWoAZtd_WXf<$_$0`nA^v z!0;uR*Oc_nSvQx!xNs{9f1e*jGGGx-1j diff --git a/documentation/bench/GEMM.pdf b/documentation/bench/GEMM.pdf deleted file mode 100755 index 948e4a0ee2b60ec270ee634b83bf0ad29a7b36b2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16408 zcmb_@2RxQv^f+00jf`y7Ym@PAykzg}6@{`RvR77=5m7QS5+So;CPY?7S!I=!Qbxl} zLM8wEyy{zDy}$q0=lA)&KG(g^UFY0$&OPVc`#eX$Kvi8FC4q$r+{!ocRZgEt%vIvBy_a%}2EW9}Qz48y^QacC*63#>dCO%MFf$9$^S$CmVZbx8rchpRHc*cE%1q za0>udRUM$>5a0txXq^NoDF4h=e&$+mt3S{|A;ARyUI2tQguRyoz-|+Lgpq@{yRVlW zSPz6hfUoOd?`)&&9snRhGe~1`5_lXQBZY(Ga2N?J4vj+N03d`4SR$Cm8;;)`r{U%9 z>jD2wt<7hIF+kbN#?9LUf@*gPi~}{Q4t~yd4n`Wv0ADyl$<56j%wzE{^r2aQVXx!h zcHGAaj)GWX1V^Yly8?oN+!at!)xpl)-T_*-w~v>D%}H25W`jl7W$wFlFIUV4(TCGi z!d|vgtUcd4uqbpsQUVrrV!`l)~AKY*PpM=yGbo*<5v?ux8Xjk^3 z8~e=6q>xLZQ4L3K(6JM_*AP&QakktW3TZ2m8g05NUCal;{MwOCc$nII>Kh%dI^c@AymwC z2R^L&)ub`yP6RKOz0%}qSJTTpB2IbPAhVSFglTX^|7$ig<_4Plss30k4@29j!Ps=` z;V1Kw{!~ z{s~!e<~ZM}@1e)pMd?C4aRT=ubQNcLX~yog@CZ5&vP+YTiABX-KOqQOopH#S*KPh5 zg7B|Q=R6A?tKpvoWnCMkuou_gm1+}vkd z&hxFDscA=WoHooc(}#M# z(Ee?%wnMJ|&We{db>$vANx3Kr&(G1@SFC0)T$&&6S6%Xu1(ZdH4U#K3pw~!wwJg8dY2&kQUD5>1WIHFL}y*JVF68Y4v4C*N( zL*7~2vT}$OXCq)w<^4{)j`<2^91pYSaAJ8m3uaU}rdv9V**|zR;#1a*nuPgpZ-jjf zt?g{|cG_xU1@2H}MD}$AR@dw}IaqlPJL~t#oIM{`9ngdGy_43~*BNyC!jAb&?wNuY zQ+$-_m*)C)MuuKbzm>DV=KM%__rTvGz|JOigk^tA@r`n^Q1vly3Z?+Q*51LB!|{uJy2e{>^HDcaGeqe86_g#c9(q4Onfq`?AwHK< z4qdkHq_~290~1MUvZ5`vrkt0*(D!g|?A8y4m<{O{A?!bV?)pnI9b?ROc-S9_I-3``UUo zR?BKI;o~{qy7uDk2wgC`Fmu+WHmi~EM(xbMf`Z+!Lf0BXo-l9~Rb_=&9S(rdjM4vb%4 zoKn{BFgeSjW+EeoysK&Yz2$mu)%}(a^Bb|=;AQ3TpeCeFK`eBo@euKs%uP8%v-RYkTsMkf1E9B4GE4-0)di-4S z@-25Vg(v=shm_2zvfAT9;}%5Qg_h*X_rr9-N|8;`;VA?x~E_0`pak-V5e;t%n=x<&LfN zj;_;SjxqSbb47}5ccEZ9Wy5QjD@${eR~kc?s^9jt9;2KgOKvqo%x!#I`ugLZ+1=K! zPp5h`#Y`JV%P^ztfn&F?(czRI%f~moYI+%#z>LO~A`+jGQqECYk4OvmQ@M%fk-ea6 zI#AbQxUYdwk441jk+B{XX$w;^%lRlQrgm>FRo9}=uc_bS##`3|Z)cxQh1$JW+O?Oa z)tw8^6e2DUHwYd3qF?kr%V#CH@MUuOh+W4W?P}KLNG#5z|HAdw$iQKTuT#_dufE%@ zcZFX4x)}N9WOTuua631w!60Qd=Y{FI%Eh7Or%y|TXgc<@ny;+*UP+?y(ta?vxDqDH zaknHe*z@#8>F788kJ(Z4$9^EY=ZpvW16^|$_C((;X{-wkiJy6>u>VSVC|%5r#`bD={f!5lj9*L z)p)1&D&=Fxgf1St_+ijLlIfA_)RWF98S=ILaZKcwpNv$yk>8MAAZE9WZwMF1uVD?8va zTlH%1{j9~;-;~aVV(cyHyYAkq-eue5A*yt`#7-s;p_XC?EJ!fPI0>+}_1`45mJGri z_fl7Nu~6I3%3VZ_&-V1B4%og7(Qufe)?8^wyOla}+#qQ98U0&r*7yaoXPsZZUXPP` z!n`UgurIje_3~-AS1-8H58cf+$c|T^lZL$IpPt%JznS3HL9QMeD~b8#Ors$8cdP#= z_20f9MzYEP8P>X6MS_GgdB+S9XpkGeWnwh&mH*SL#2Iwasi1b=I}T$l+Px#(KI>3} z?}p3C$Feqz8_MRL$vaOKKkOBGuPra5v#VO7v7DFYmao9fqYQYK=zLCkMfsCwZ>&8y z1&@ZYN?E^ij_1VrjGrlxNKp=N6Ie(O_6>S>U!No31c#R&Iit!$Yw8nqth(k^2eK3N zNZ)LPw$6!9xO($F=WdMfn!2e-jY>=TYz{V=@#s$_C=QRf&Q}HB;##ybCFQXn6U_vk=hI#?SUV7D z(s;b9{{C<#BV0QLmXws(AwctkHg!ybd)m%L;Ub^ET%fgSTm%zo*KX!Km%}pmS~KFm zA1EMIc*E#*+x#xK9g8Mio#%6wd6lJ8ot$vTw9BzDt3xrYp-PL_G;W2gn46fxpUiS= zdBnvVTnh6Gf38W3uVlSfg!pN_SH4C)w4pWNeEgHl^Iwk^OBM6CsNG znFFoAZaiH*_2WAkcXgNv5!U>9&G3^o_-*Vt-FPLYCiqUW`FdV`1J%bHK7suk$8HSL zH#F((h+$Pb^BH$S8#z~lW^pkPNlQ*`GDTX_JfF*b^aHJ>b2Vxp=q}mWibc<}{aW9? zcm|T(SP!IpxvQU>*PIB3&_%&kg^AsUD%Lf;cTyEa`|;Io%hx0yT~1}u7+HOBS9!QI zp#`3U5GfR3{nD;YJ5FV>>;A+gQY@X!ckjyUJcd+jL9F#ncP&A-p?ZvcOH9s%R;4Q@?EbpD#cs3oU^-_H zNma;H!d(LdnBTOMsFIJcCG=S5#g(Tdw0Z3o{QbBxrT~rqkLitmR#PkXZ0-<$c|sb!7du_!<@pm>C^+Q5dCPy_6y&XB%=? zaU*|;-wmWjCaLh z-od`XcQOGssq$o{40?EAwsIpJqK>`az{;5tK)S3(#fTRxaSt|7}C$jI`Y- z02lg1Y83MqbiJ~Q$L3z1wz$%5QUXJeRilzqX^RCEJ|v#Q=!7nNS-mm*)ZlvJ_!Q5a zgk-_0x!19}sApbJGHlEBxgR1rt4z#u4!z7%S&S5B-bEt*I-FwsKKI5VTT%1VvPiGi zAp0HG2M660+f|-$XdYH#wE1?4<=u6IQrGc$VYu1g&Bz=d+jDf6Nks17po>W>8o3r7 zk&;-lr++c-`hrEXRg8^+g~cd&fa7C^k+DnhjF{Oc9G-_6T@>Wr=NY2!7;@~=Js!OH z5^c2f1WeoC`SOv<@jhiCYyK&%v}q5%@vKoL8q{ac(#!g;D`_H!E-g^VJ|ZP+SRYG_ z?+bb@7H2s+qa(EN#JP~0`PTitw(DmVT(z%Qc8Er+?JlYp-WmE-iPFS}GrZ^7hNS(7qhMRO zx|)K$=w13~w2@9xmc@pXT(P9y{$Q=;heXqgmI;mM54&~OPDf~+I!2MhaFOi?ecwoDd7=vc?oT0in$GdLhB4;o&+;*VYiG5UxpX6WLDSfI z5_KN#kP*`6Hz@I99ARyVU(ta_hqYl2%goFUb4@F4H(@SX#Do#?GlxpU0@gy zMiBATrBMIPhe~9%0nz}8MH(ZqQbtHANh2f*XQ+vq*g+khR%?lD=0R)-f+aB4?+Go^ewi8>bl#RZh;N~vvY?ZO&?4!uS!lg&n~8a)Q2lF zU2W0t*3pUPXVmW0l8Y@1JxaB5EwthKl7;>EJiD{~7Ku6(S2^cW@{*sGnPhWL?8UxJ zK9rsA=RhSEFJ+%E==*hf;4PIlG9SV8MCsgv>wwc zqVYGuFhmXlD9LRoO}PYZrdBwu%1WlPM$_u)ET6iAz!jmp>RQGg@wq9)mN$=8=uGK@ zXv+Ln{smW-h#=$$qgTWPxo%X&S$H%7^JnRv#Ii(_}t1f-_XYSir5;9NrBqA6Q zaig)@*!GwT>`#oME)&6sHVkX-Os}IxZ^lWIuSz>X1TYc*Q+hjlB)*x21Vv6UU#v{c zqxregUB7?BS8FB`wdbSGD14&fU>sCDz4|gq35X2M$}}&Sf8NTKh~`Fd!Kjj zyT@amJQcB)JwCfmlBvN_M+7q>KG}9OAqIyt`^lsN^?xzR!O5B4aWd>MYr7()CP$_X z$@S(8Z|gv>ntqbvafTyAP$1$(NN>Y8m%DI}5nOqwKvy11)KMW`#hL4)VG7QVRb=#P zs3mFpp3JuNhD#~UsUgZF+{0iz=lMC-zVx=`9!hU3El~Rqfz`^CGJL0Z{@^s(+t2g? z@^!v@0*RnU%(mkGTa0n>htt< z-6dtigc9UqAIA_+rwRH)^lV3+4wlBg>rx?K?Eft_7%Jm-UdzvczQC_&YAVt zHAVijCz10b!d5?CMUm#4qmE3qgzjPPSV*Kew&PXo-8dF5$JlxElE^`N*F8lNnKuHW zESsOul*e3U&FqqtiWy|F*Hm@3w{UyM6f3D#I5Ds5tt~bA<+|3u$s-(Tfm^Q7t=-V}rD!BZwGPplFp!~5t&1IbQrfw@_kiT}gkHFk zk>wc|hlJVpsd;2fNYq_1ML)CLOabSP{tZml;uLq(@n4ZD|>LxrTI%JA$ZS?Y2!_3RU{P$?id`OOe z)PmILUs2T2y;2w%{{152)Cnbn^r?84%PATzEXLU5Y5Of`&Mb#tlga-|Hn5>n*r4#@ zvm6o64@1QKZu446JJCW3I@!M4ar?xNo>Q3_LlZwnch20r7S76)z$)GOK%3Su-m7ij zsWXc*;f)_BoxjVEMGl@RM7H=jEA+XkYQNx}CadTrRvE_&aD^ zUq{wBI*Xl8X-EvNm0hFH5~qpTD|=l0RKj>b2^%7`A^s`)VsWY|1-hDg%33y^x=@&x zq464jZOlEV55gB5+cQQZqY`;nUdm-=ey(fGRHZmdJuWuPbb%xA5n4!=?%=f(ljLbO z4PPQXGb9hz7!S-^m6yIcE@<;&s;troP1oz`fwt@;K0Ro;pdwBc0ZlA)J?x7Jif1FB8KJZ>c;o6zCMKnsgUx&1rbZc&`WN|{$a2t zNQ1|?9ZZf%nonE-@sHN z4S|X$vBh{}D7^W!Bf*6}9}VW3qC_f%IXt@S@neQZXi1b25{sp%(CLZKu|>u!9@b7! z!W*pPEuBg9O_&fwupwfDw{uj%RAa$Wr3`8O7>AWKK~`W7A*C<|&`|{zZVrwr<>08& z&GZ0`l~S>NpTVK*aLFA0)~m*va=NLzYy2P?$zl~N9}#qjSZ2v>7@wc$V9hqskp!gL zMCT03yIHKvm7Yy!PlN-dEjvdByu3TCJzQX%Hh}~0F#y@Y-m@$NL{K4OiZRG-2#j2! zY$7<7s03X-!`2zMnl!OwFO;(KV^$vh@KBgm42dwpH@%|Z0Nh!SdgICXxUKA@|Ff)~ z?xs7c2<9%4z8uj9FVxm5(&OtYWZFtvkykA~7iFH0(oM>k-mlGZmnN(APR#V>kO#Y@ zutik9@!BnPFNd5C!+Jb?H)I<$M*MLH*sg%jMA$B}8L~{R@#yD9zd!gU_<%5vWe`%G zm1)BGiQ#)4{0_IlOkZs&)x=5mA!yraB1 z;2%a}ql~6(&2F5gm30?GjEHq6chjKN3w5)k13E+FGd`b6nGRSdbzSn7B|>;24hR~! z>D#!*&~gKWum8xh6i)I+qOn6se|DpEPd~M_wUc&c4BV?a!EpVCCT#c*84>XnC9 zi}wz&?4`bYM#zx-XjIS$wS+r+>bi7y@WRm}_#{q!G@NbZ^MY!pb*kdKrWwvbn=8&j zU!x-WE@A8kw}5(w5%k*KE!yKZ; zS7(s|o!l%UAr*OXcp1Hkm}%RRHi^c0n~>%LRBj8m8HN61dxOhz^Mcp*uzh|t6#q&K zd;aJS^d)PXtNc^FhxJy6Tr@fi&-0>$u5(PF8T&JD+ro0F+hk;6`&lSbKX^ZSDZk-l zWuZ%iR>V9t>~@?HgM0BR;No76f^Dbux>!(_zh=s4qufDtk4T;)3W-OU(#|z$)0QW@ zztK=vlA~#4i+qE5(8m_3)EjYKF>{%NAw3}Kxw_jsKabw}R!kgz2V2__kAi{pFP6_H zGO2_ZdQT=V#wpinnXq`8WP9o?mQ?h`KMl{n<61dzX@)00LR(8fFg4V8|3;OF#r}8C z)tJjPC&H6-vEyP2Dv7q;*OCoakasS8_N=(1;rsaENG4u9ePph1RwsCuxnR2Cq{!82 zw7gL9lBHhAT;-l86S?`O-!j+7E?xe_duKFf=)IYMVS4;fVM5J;jkxT^A7mG<9VUFk z`R}*V;QiuX_lqb=%ztl8wS-zpkKrP~!n zLy<37rlLc|Vl+Keru6g-&ph+S{{7Q#Hlz|IbBb$5`xXZ;ju?K($ho**)Mf99=Z!n> z@g`S?Sn4@s)&)^jwHy~U`xZi0G%Z8Kkpt(@zt$?bjXN_8`YvP_v7}5)iIj8t6I!Stq(%fR+<47NvoX2sE zL55{)Hh03SV6Jyf!(;i_vPSt_X3v8r`$_r;$7PYZ;46C!tzO!n?zeLrb~(-RrKGIt zX8vTvXs;FK8u?r?_v|`FvA0YFk(Cm0hR~?(cpwJ%7{O~Ypsi>odV^=B`nvMFew^gb zysj*%%#@s=^DsGD=27BJ5&b>#+P2a4(UWfWk^=kH5J_bME01A|Au&_CS6&WXu2jLM zI^jcUF#Aim_6%rCW_avpuDkZ474wzuqz~Dl=)L6@@W2h1+YBpS$|R>-FR=sMa;C#kzPF$iP%R)K7eh^|5KI=v28r zM2?dqbGbuudn|BG%1OhABW#8_<}Eg#TV-RorsE@7bgU7PZy<0)95ggW^1s!Qu7S3j zDi7_bcdX@WEq)%-UX>x!=^vxzttPYkqu5Vt?s{Kgy7!~8t#8cCr+nSiN9a>5MU(oY z8-}zy+%_I}DplQ$yJ|bdG~Cw0e_#^J$0hVsY~Mv$eJ;ny_6PBAuS#%OXS0&xEK+*H z@h4@Z6zdW=@0L`K*&EREeo*eAN|MMde3E(&pH*ai&|mOQ0+*%yP!5^rkp@_;-T5T< z2i%8qnrt5Hcn%*U9WHuUakvw%6~)sVk{6T=i!XX~N|dE#S5gk+a@5c@%9Ps3+~Uc^ z?|Yu_KvGo4`M#$;7+yls{Azz;NFynKyp=H#nIhs`p|<1j>n7;ZEAud%UCGR4L*^`z zU0iaqe7}CzZ(;4tna|ohDuu0Ndo1*c|dT1|Q`^;^XD*qvB-a1x6usY<`x}I2;TC zbyjuow)1lK@NxHoqo56H1VBFodeJqoV^9V*o-KEa1>v01$Iv2VQppLl)@Zrc!118@ z=d%|99v--`HgG(+54M9N>;V>V6mf+L(k2{gLE5w0Kr0>E{HA;1bezyqlEg2GQHgRKNsY~>+XovY1pZ$QOuaZ}0L4vHwjqXGWV z)X;-CK|86~cxY}?6bVC^|0)8qfk+x>J0-W{t_}cWgt3pq$wN><*Cv3l6d-L_^q;{j zzwDp|ToQ@=zeuqE?K~KiG#m@$6${>*NP~~4Br%dOJn&kiz^8I(APgAr`3{tD08gkL z>W={?n~-Cqp#J~A1pC#F0ezta&G9SYFeo4n5CNpIAc2YS(r_stjgmm#aR9g!lyGP` z8jpj41mso{gzbPyzy_28J3$IuZc8FT8Oot{3`pPuOeruPniDUL2l-|lloJ+%k%B=v zgaouGC`kgrg2v*&!Z+steF0oT8G{9ckR~LY6rPad!OIs(P%nu>g9UEF#R9k>nh3yv z#$n+&kV^w%K{+9TI3tJ{5D}mpf=^%?L_mN8ASeuii34K@MEQjrloR9!f`o-0kPwL= zw4tRzIY zEElBz0bL1Y$PA#MV-M)GX$=S;NdhB7FbaU{geHP*KyPYQf{6f9Kxo)hYG}yVf*AHB&`kwsQUdb7SyG0he*#wlJlL#Lg%e)Mf_~~iqBiTaAmc@tNgK%iW}OZk z5P>kCF4$+AB|V^4HcR?&utf;125`VQLdh8HE2snmMu>153}76BV}HW<(-;9oOBnGl zgM|GF*anUz0EPj>1Wr4Ij`l$JZI*y_{R!9!va^I%XE@-ZQ1tWZc;1EUtoA#5rP@iACRR_CglMKJxq32)iN~k|QgMZ>I5h_Zc z6T;30j)F`ILJ9ppb^|A?-`z0(V>hG(a4JAYC&;M8gtoIiVLPAcP@JSmV4_9{|S7%!| zS^~INVsIxP9}gJ>;&+3DyVr3M7y`Ij_P%x?uJX?@9`=rKTgY#5vljur4>YT@yPGN$ zy(+9KgGQpHkT|3ixEGN|V#JYBLP(^L2;`9b#3{hb!4U>@rW6bqt)D+&lW;(U9pV4f zL7Nr&!QFn=p|E&hOt;oaK{C0eAEd*!*5RT3zNH@?+7?^uz`N#u*MTSC=xwdT0Jm&w z9U8fHJ|r5%$ZqL}!k{4s4Ep_E8wUAz9r&94ud!IP7`VUSf@}J%W04pPq*nd~ z7m2|Fy}Y#!ux)D{7CLor>4(Kg{e=rw68~2n4hejcEn{&wXzxS6e`tWj;UTT^?>b53 zUo=2Uf>$bk^@Dthtz)Hu4ct;E1v$uD>(Iat+FFMpG8XrjynqkXfc@P%RvJ?2(C_!! zr0{>qBvKj;oW(8uq=AtCRfk9Z1p|+S^zOf5;8B0m3QvURc)+B;#!5rl`(N{+kYLCC zRfi=~Ck5V5Z5a#vnXNQMp-_MCRTTO!c>&%2-U}$nzj%&9;UV+)Z=BKSzhU6O=Y+re z`FPnlyE=FgBKVD+108@Kf+LLF-66e6FjZP^j=%~5YHXgFynSrEdCe>R@@?M`Vux006F{f{Z2rfB^;o z(3!9?P$StFDeC|L4!y0kw7RXiIRM}_my~3!4$~*;**_N93YB;!6|OG*k(nVV@u|V9 z@!|rBO}M(OLqt6~%ZCVF*{2#3-?f=4bz~~aU%fJu6zHYREFsq=c%Ddd0w42Un#I4) zXgI&w5VzhFJ^zAF0(d8=_pG@Z$e8!I8NQPeEcspKAf)}mmWe`5P2eETNBl|Vx|j2V zi*s5Tnf$}#mG<%ebp-Y}T2N)O9uJGCWhy=gm#D}cONO!4cgFAz#f+%SR`LCLy(U9)~J<;Dm@?zKc=j zPW_-w=?*JT-U`7X@8s71P|^Xa0y_SK$H&ddi@9WdIE}QAdnxzl&#<-rQp=hT;71jJ z#<$h?R}uia2avcr^i*>!{F>+o0PP*{V5TmKOdf?j#FRjT(qIq;Gmm06e#1^dQ-1xJ zBIxk}pp^!@1jFws#RF|Io1xAb|x`z5aZ}0a0u9i$_Q=dyQ2%qlMtW; zHsd!I8ub0|BAAkT5l_ZwNEL82!=W^E5xDx&c=64XPEsMGbT&^uhuwvVeaBXLw^fc_f`;sHNWo}oZ#^&&Bm;-OUH0=5K#A)B zF5_K1@*}4T3q4@*B1Xd|ctg`4!QL)cAA$8OTIMCX1&?M#BHgJ1%XrMm%k!r%xkLP9 z-l+2u5#~iz$_VA$sQYO#+wl~=&<+K~ZcChI?`3tX(@v7`lCs53OB!Z+Z%ROUL641t z*yRPXe~!(Jc5LFD_&#~q@vWTG>V>t+Q%1Zh0!Xw_gl&gW_Y<>jt|~oNorlFCHSIpH zJIc3ypgS;}VV*tnz!na^4We5~bdeL3H{xW+Ne*Y};ag>233Gwfdp8T3P&x;nf0JJp zSXMs3y>Pmryul1er;&V3P=ek1Ua*~C>OoeLOAM!ymooh`?T8H-4l`C)mWp>?AG|^Y zAum)_)S`jJFKcB> z3`(RO6n6L?sA(WvULjuzCA#&K_EYvp3z`^7e_^3wY}IczxcPDsaZ!S)+OFO%O{!oAWTh*d=p(7n@bJDdI!X5)qIfCPAX8 z$q>VE%TUEo%)p`Wh!k5okF>e5*U4ZLi^5602)n+*3fU zz7NTmcx{VmvopClnPR3`{!`! zYYql7WxI2mVv^J-*yK$st*z|s2Lj^+R6IfIWIa{lvVP|$BW4QE4NO(V^#Z>JWZaQp z5QboeJPZMbX-SiGh;%r0w8d1%T*ytyo$Ds(g39JAEA7|G%_&zER;9b~Rc#pe7==}R zRK(Igr#@0KR7DI;4oVH#4a)t1)&iSS{5vQdovd5CZkjLUA48&`Q4_?0`r8KGhWv)$ zQnS(zMpNZ>^GaRJgGIwiHbd2THDV5I)f%-pRfTolPQ`ZQZ+_Yc_l>P|x`JIyev-3DPyeILo{|CO5a!Mxf$DyO%v(>7D~!|N+iyoN=;Nhy)eDa z=S{OnOHqr77rD=GpYzL6zv*k<>*n*CW41&86~tEZubEY~L;OSMqm0Mp_^)uEKHSD~ zdelrwPfkQW`8*o;4BzQVy76kc)t}FiPhs)z;<4=Y?W<-Mjet$;Zx%vN6HnhDN8V>k zV@Rb*leCMpZ+BRAc>m)2MJtvZ81#p^oj;q^3^dMM$?L}5&s@Z7$2`aEV|oIOR#ovH zRACjUWj<5=^47?4-l$prrsAUOebD<_86z{Bl?Hk{O+QWNN;5Z)qt_q6N|GNqr)b>+ zixifbZB#j*M;FWOJ1k<&hsRuXA=q1zMQ@ZXCgoBtIoX&Wo*G)7h>oBl$L{)?UkO_Gy84&PDUoaK`u0_LFMhG z*~OZrZffgN+A32nHOB6SF@8g3pBq95u)^_Xsq9L%3)qTop@L@z9Uw$wtWV6<5nT#h zs=NvBg}@1=CAFhnq9d!_jUVe)%0d14;UZ&Ie!kV^Mgnm?LyqN@OmAcpa97QvX1jK} zhHLHvI2b~TFu8knH4rjUhVUIOAE{2)7B`##+jHy$f%u^jAahWtv7~XxZ=>N;sG&#Q zpQ3YQ#c}`dVh#n9R}BjFDS@tsn!|as15C-*WiqCu`}#}AYj(?1*DWS37&As+7wY%C zZtbWM`{TI9@)r1^_@Q004>0mqrZ{000f5uA(LT2USH34`*j*H#Ifo? zF)_)=$Sf=@q^727YislH@Ss>lL_`n}5YW=nT3J~Q4h~*iT%4VqU0+{sZEY&14;!{2ggN0|1Iiy-&O#~_1TM|8dy+8RavZG53oqsa4~S?wg3Rm z2t^r5Ew8yLD4#z!XX?PSkmCJgtiXNGgggU~ zYR)Tgva1LBdFOBOhVS6O8}CYWd7#q-uP+HG*mvcadhsp?9MxSjL*PZC3X*SMfZq>y9Uga@AtFyY@sKW?5_;+U?tPU0huoQXn% z?R;?CPy8f_Z~NjyVT7)XmDl#N|n(<^idoljQ>|1$1S4wUldK* zw(F`8@5c}klE8ypdeCjjd*^{#LhdpMeFlzP0Ep37>4pD?7buxZzMw<#Lp>3~=*>hi z2G(;Tu1(DJBp=WvMmKH1BF>cB)EOjvORU+pt5f={1L`62Z z9P7#Itc;N^+2yT*(AxE_#GOm&j@$#u3=swuaak1<=5ICk8zE04--s*@lYglFLNyD6 ze3+!YmS1CfGYjdQ6duJ90Y-wCJ{=4k?_aipPE~GyILe94>zaI+UqEu4ra}HeeBiFv162Eh;M6E8!VeyFA^Mvr) zhysQS#fV;Fy)-+41L`vy!7rn_;`9*W5AjiH&o%uy0j4=Iw)>UC=18S!Y&aNTQNO)b zZ2FZr%IuvC?~oYESo+CSbaL2INf_$oYdIl6sjDu?U)u80_J|RRj*s!5AmJkKr#x_O zy|f4tNlz>vVpIENTEKWnS>8O!EzG7M^N;bATZG;CK($1)hG@x zI!FV>6%(2*0UX8NlnDlB5{yQo!zJkc*PL*;4La?x5jmL9McKa6PFh3m1k7MGANohsMinvOb_YA42Lyk32*qc%s`OEn4(uUlYI)ZE zQI~J9h-`(M?!N~NZ8A(Z zMhKd{q4!m_Q_p^}ncIgv;ej}P6B;Rl3E=9}Gmy&vTP4(mz_WjJKnHb+{>GI6;zEnQ zPat_Id5DutT{ZvdS0U-cXr;O-)YSj2@1>d7yl4Gd?sOQKbhPl{NEw9Y>vC)s#4;0t zYa)#pbsfY9*`nI%roapyiVtn5^!)$@&F>k-)nq3C$d*bG!C|Y}*7Qay-54l7nuxzI z`B!okQVVx8HyF(4g$B?6PX9&l%jag@@vVtuqE)idLXg^5h=`&Om_5Ha7ToaYqQ({% z#S}5aWbw;k`|`FT>i@*^a)pZ^A-F?@?jL3z@UARU5 znw)^%?hxp|?KA;$q(}U|jl($hJHjD<2y>mIdH**WP$s|jZ%=P!%`=fgJVB{DGpe|p z<}~_ZBTxt9FGeOtK;p9w^*W5Kzm7*OJ+8~l90fet3(147xVZszXrqQxD+($TLtsW^ zsyxpn3~mR`cIqIM7e_sr-3nSIp8cpt+kKDuWvEkA&$jqg4p?qMpTs5e0Wivvd% z3`y6YMTiEP0-)0Jz1shNE?>I$2S{MFWlG*7eFOSM!mA>I0DF1B4<3_QWkP@=P2j12 z)`|5D(`#-1e~`UT<@X5xx6hHUkQ6Sq1;F35A<_n}-?KnRdF>dfWr+%T)Y<{SU&(^} zwXaqON4In&5RcvR&eNQ60+$;j!R}XD254Cmr9ohlYIUE{KGMG~hVW&=*xE+>aPCu6 z?Tr)=Se841_;BmBIf*I;3CnUJ-dmE0VstnN?cTKiNP555%pN_4f4Se8ECx8BZui^L z|Aa*8vWR)7X#CiBU^oh)J*s)0-fzEsOen-2-tWEt8PR@{`<2XP5wF0Kv4HzorX+B4En$fT zdE#C(@8`KNIUDj2o(WS9$b#YCV-CIy1@qqnDYQ-kn!N{dEzxw-0^=S?$X}v0-1dM* z?0>0&2&zz_{fA_A-TiYV8nh?{MUHBdg^B&EO$@p{c7D~JX{?F}Qq3ZS+C>>|CN9Nr z+02X=_GrHYj~23__Dq*eOVH*0lA@q{058LaG7s+GqLOj&e%y~vHIvnyziO=3xIfa9 zIwJc=J1|83K;+GK1gcG1=T329(d75E@lP zyI)wS^WURQEsRYl9#nH40shl}QvU3|FXOENkA8suckQ9(@cpgKpmu*_Xmj#!Z8lo? zw>Zm${H@L;5r6Bsci?{tI`}^o-T&Cc>g;b|@}d0BF&DIs2b=5yi-pLJ@({ zLLXGgWsNF#-+@U!z5`Q1w+l`(VLX3>BT7LMh!1kt`kQkw;h7V%xtTC5IEcCB=jJ5! z6MeycRA#=H!W{h1rG+x8R(=!&W;j`&`Eg(3WkS$K`$B^*QFjyfS^!BNmL#*HmKAD> z+c)cN-&@X`)BRc95LDd_O5%>m7Budj#Q{ABz?FXSt5%VqMxVPphTmO~OOahPO)**D zYbiS~g%D>N+Naz~thLvmm?_qxy3}dwLMFx{*6v`fE07x4H^&UMFl_~$5 z?nI3H)ll7g)6e%QPzfI-1kpfzCP8=3hIs2`!65gDFmG#BNn}D?_1aj5a8Y7M`|zMe z-5A?it>pJ>rx10-x>vLabw)rL`CoS%C%57@PuHCLJuy(FCbaU%v1EB!2uk)D{e$y5|eI_p;SrCnli|F@=ClRQ2)K z6{s!hf4442$wYGESJYBKaOY$2r+03#Y@W1();*5lEE3u8?{O!RQSNKlJ_%pa zRB7-Qg)=&Ad!rr0{xupkKHr=U{wT-4tQrfVj; z^Fx7ryBlp|e0RR=0^K)v492S?ySBH*ZT&5SyBmR8^D$f{e_$IkDQRt-yZLr{e`ZV{ z-__k*&-_7DpF}tGg`bY>FuhqCX>rIoIcm66hA=R#UYC@Cw@Ci%|2bwkM2_>dzFJb) zVqP*|YU*~Pa9b^xVNKHl|PQH zN-|nY3MQ(dF(#LCO1iGE&iOr7o=J&&{qci`Z5XcMz-v~#`rv5nBC*4EL4>XKmlcAIrivyeNz%m!DE$cJf zIDU)K*`Ur_oTgslAS@#}6UDmv-b;M?sePNcx*fM+ zdooRi{dqMBW)(3%lpLP_az6lM=@Cbyd`qLM(9d#XHvCX* zg~9BVBXW5j=k6y=F0amUScG(h8DP4rM*uw-6I0ylI9z2VxAuosK}lbphiP~ZUY6xI z>*!6})0LD-vMk2ufLD9-p7XxRsVAw}Aa4u>x7dh0FM6Upb26J4^H^+uAR_$ZR$AV= ziN~U=vekA^nIT%p7vHaO-bD^M{sOA_q739z-K#)kXU_Am05^m%A(Zt#^?A4)?EM2UxP;$>HtkJ8TE)M~qbwmbB)6lQ%ozslw@NgOOO zCwq7_`C4eRSozVJ#V0xSyqB+A-teY!LP9!{{cRc+2fx^w;7(rOw8j6a%5~q4!e5iU z;))9ECN!=pw_0WDl0+5;5usSl*fL{T`Ay^h2&!X?;(Jl`aU`vOth1f(XDd^Q=e2t#MvKKqLE6rose6O}%+J2jqQn6HA6;aT-ck-osi6d4j`DrX+_`N?zU*2v78`Js8m#=eFd|+!Kc3a9wo&>0p7im0 z^M-`;u;AG=O-XgJGrmE|QjOEAf#T;v>Fol1&sr}`JYeZBmlOrHTr+x-z4R-ySmR8j z=iNV_c0uCD+bQr`(zoX5+4L86){VzTUEcLREL=kqrxMP9E6`e1#)3yCGRx-iupac$ zQ$mO4bK>QanSt!&`|t!OC1=Y;1XRl^A$CFqlV7gRLiNO1IpP;Sc;kgKeVT~M*h{b_fNvu0Rh zD-}$j#*4v)XfqzsjmfQ(^WhTl;;@jaF=3PGN}epH2u{roAWnAZ6NF$xyi^JLubT#+ zzi*}#O+p?`j9U6dCU=qdA8qn=$|3|6%%352#jLU)`|FsswDyv+%^uUnTr0X-g=o38 zf4|6ntF5=EPb=4KDZ)A1$-*HX|8T=%1C+m^A%B#=J*tPpf(DPHydx_jXYmcl19r1TRv&h&~sd6vdh?dRUm*fS&NPgv(bBNL73|mMH`)M zJB}uXZzL{stq8w(qp!4zmCHKOsL*mk;Uv)f4lL%(0fR=sK5V8NX?4b*B4$NMS5r8| z+rd*bvSTz!bGEMJ0;)1EF$lKJH~X^y{1k_p)NBtGfwd%i08 zD!oAKnPk98biY!e%}p&3U*hmbb@Ke{;#X*adV(5-`g&}uOxtv#kC?ZT2e-Mlpj?vu zA~~{DMN?1B1b>lInL0#GcF}UErg<2SfD6H*-d?y9X&4u-ueb%260l-VMGgCk$OsP5 zSzhF7CMGtq5xn0jcOiU03E`#z;-UEqmFS4SYqotxY{i)Lvpf2Cjq|XoQ@MM$Tu6c~ z?@+q|XS$TUq@_W2f}%ox=@+&5&nd`o(A33BJc*o0$6;5wav6JnUhmr#GJl{+olKez zWv$hi?2H&3&aEXxfOU49pnZPyetWsgw^=qsYx{BNx|2euzR0DdFf&Lc`EN`F7`@(D zOt!~d4Yu_Q9>?#lz4IZ9&N|felFDy=p}`{5&5K*?D8eA{sK1Fznx8eb*q)m~K+?`G zB_r4BCp}MSSM;gG+|msEqeU%lX;k`@GK6(ux(ZSTp@<}#f{^UXlOz>qO!luORPT^% zDD$@@1J&iDTKyY!hnLz+c+I{e$Ojk-6V z{LHZ@8>X%MsyCNQEUJ9HS8^XLyHu%6gp={M!bB+J-`0DU{#r#N1M*SEBa^~QR;^`o zV+H49orFw@RN}|09Hq4R^S`J5<(+IY*D&r5ycFPe@AVGkP;kpQN$O-Vs9ZBi4-%tp ziBA)!>$H^syB;GP1!LYaHf(d3;N@hJTEQPNDtw&f@6%fO#%Y!@T>H|!;*su`=VLsJ z_7gCYGD~d4$NKz9VSJ*7WS|aix9hnzVx#^^FN6J}0WjHh7&^q>vCaAD^PB0y*YAkn zjyxIyf}|gSa7XjPsC6M5GK`siPD+D)j}v_#YAN1rd=9I+cwg^MhyB^iwR}iTpCNLM zlP1~aEu5lTh_Dg2xZ{;gSY9@XN<3|qqgR;hpYP?74z)3P?4Mj1;A;x0E%f8|)BzTh z@07}rXJLa`89u)7eyla#y6Hlw1bD($&sa{9hZXc(C0>b}2EWS;FU-|QXQY;?Aj+dG z52EFDF;NFPcUb?tFhIB@N!Rw}2u&!Vh8(%ZO_MzO&t!)o&TCAj$aF{ZK?SE;)vqgs zeN86jUr!V@f@fG9+DZ4k^Fy|wbH9F^(`;hjXgiie7GP>wkGHPg8p>XLhV>f+z8@!|O$m0|^# zCRD=bkiB)((R6a65K5)p@jG=X5}a2QJzxccyNa)ypiae-qHu~04Zf9afx7T*gT2io zd;in1la+>(70R}rHiKU$lK~BC9qONCeq3>6ReyzKKAUy+?{IG?VFzw`3*;fcX^LjL{DoD0M74c)81l?_EO9I$1gf*tmeuU^*7%3ur=J(hK(w;dpHW@qfVAAXSSJYH7q8e{!vW zkut)C)!=Yd(WaoaW?~j~4?Lbz-wKAX%MGG8n2Sq`v3F4SUr%>+xK!ciYa)vH-LoMy z8EywJK&ROU0Z_6-#>wa*3$eGdi!^XmDD76HyU$$2&r|oKi;AofAGaP>X(~d$tb|FX zUNLNKp5koPg-xO;J@GK`5gPD38xj}Oxew{}+xXBd2};|P8DB-7R1?&ZZ+6Dvc4S%G z?L^Kti$csfihPz71weg2gQ;bJhaxAwV~X>WJvr8PMpIyGLU^M>+{#1HX-Yv-cX#?` z<_mIPSdo}}4lLp{WRrffjU0ZZGGByZh9aheLw$zHeQ{T(13IG!aHt}z#mLBgkA885 zb(<_}pB1_%$l0{{!{_8>Qi}t3+xh6wi6hg{V`5s62%hBhsP4tv&Y=e%m-SzGeX4@h zYj;3Bj-2*kcJE_*;<5?FG2`~L4I+Xb{2HwK0xwG+Hu7N~0ecb(kW}^G*h>z% z_lEp63pcC6$NHd!H^dfbJZIWv8N(z1^iS^QO-Gx0>rt z{^Nh*{Ox%T`f-i48Lchqu@es%0=blx#TZBwh1IS@j%ztvMeEkbD#gmHj9jOY0^!=a zZd2n=IryV)24cL-+1grG3}+p5P7@l}mjX@qDYonM-kzGYZZKy&ORCQ6|D6W*#VVOf zuiL-wuqz$EY~8ldr58rVxW@hfpcu}RKQ!krZR8Vfr(HsJ#Bd2oTk8(?4Dig9K{Us zd`PmnYRz)AD_z0E4#_L-R4*vsy6Bo?<}=4SaaG=!*4<5+X?J~7?ag7VZC5@|pC|VoLVNtzKs$T^hQi%uzvPuK?09#ypxJS^8e@k2!UIbD%Q*2x7$nV0t-kh$!&2XD znIqgw;{#DM951bfb?(8^PYu6HWB;8gU%MfE{gbuTE-s;($a6ek)}hi1q91FNuFNiG1>5vF$fB3&}bReWsoX zJ6H@;ic~>5X%kLtIf-nCdmYz%^h>b7+Db(!F|p>aYJI-@O{czlY?UEr(1bs45z|ec z6}F2h`unk!zO}O@o-FP!HaM1a(&}z{n%|Yk7$zZHS`3d@ksq8xXELi-6`-HLYUk7{ z!>3Pn8rH9WCc`g~5|wnPf=)8a;Pxl2)nd%>PvNTxM981N%75PjenyAK?bD!LmSUKwvYWd&It9n<8_#2m^v(6ttD=Q9?L-ro`A3;)LBd zY(P9Q2LXKnphN;gmj3K5ge|uk!vuE8JJHdeMRav0gvx`7Aq}zEnk->ZOSq zNmM9yk%>XC64T5|kM|gLl6>H8MZvS|FW@anlQvn;ZzE(#ix&ljU||#dSgiD*2Z7<# zIwgJm+{(|x2CFhixQ zLQNoPt7pIg=#O3^p%7)Nd%wr5oH>W$H>(PT-+kP6ml7`ycOKS+B`7x z+9B<@YQpDun8lyuIm|+y1m?xV@{lU0!-#>IP3KfUU~@&4pCC_(nSXW`C#EG@cDm?t zIX`z5%g6TP+BPX)l%$Y8xPrxp_&UJsLyUR zz#M-~*g%<-R=9=ybI6qyaeJ2)@TX6%UK|T$+>HmMX9Q%=59?6 z(Q+JYYcFkq{raJ5yc(N5g~Cm|MaD?_CWU={fz|k!iK4L(PkraL_{JXuw|e=~&7KO2 zEaSFqFRvXa8;V;RYu6non1wv+jWnJoP-?5tDws+95*;GrC|{SPmnA92&VEs>U{Yjt zV>ln=X~TYw2>*?<|4oq^pV>UEvpwIEyXGiRKgMX^rHht6+}SUrHQu89F( z8tE2&4fjA(wslY80xx^rs{VKxI_YNPl@Py8XBUE=AJ+c-GWnSr*-{{MVxAg%!a@x! z1_Kva3=8pe5Fj?Z@Kdvj^#R1%=0U4%i(SG_uNjC!?31wT0ny7Z9MfC9ROgYzNY@6x zkucinYs!5H1IbNo+S)kXSA4@o=`KMbNe}k%NOtGS(`M5I1`{%@Q=M^Bp%(9kI3CYW zEK~AZ>cEPTBs+XIi(`dEA=*eNiZ14u^Rk_K}ZpuW`e@rOGq?h5Pf`KNF5H0aL z^+Kii#zsg(=gIn`Ew^tHvAt|@Bvnsz(x$Eo+-Rgqpaw?<&7XRz;)ViR?08x${lxLY zqjD69)9X6k_-kUUgVgQQ2V1_pnDO~A-wqXFcteZk3((ovtNN@R$UDNhr-uk>PV^G_ z><%ilCnma7KyxQa5bI&DG!Jh6G~TC@cq(UNlI|QU2d`%!0sg?}n|ho4c$K+$*Vi@j z!pr9CQ9;Amb(GuWuaCMemNnDC2|Tq+oonZ3kTs(GwT?I=l)>|}ic8D23XGG^FyRFY z-a2{jU5)9G%fcMr4_zlKsE?YZ8VJ}ao}|i|{j6!N_5H-Qap$qL-K^F6 zue{VBP^x}4_0%Fyn69{4!1Gibl_LF`ih5=gh86xh*_F+KlB>La^?q?o8AkFhV9Qc}}uXFQIFA2W053SEN z9B8rx#&V@i0+m+s_L0h_@ydW6_c1w?<$rU7$8rY)7p>Ne_ucuivb(G^4`djA=Nr9p z_Zk<{^Cc^Q)=t!h5W!_fiSuLb=y88wxa30`6IH68^}Me;{p_h-ceMALesw?G+?q#h zZSr;kOSgUWhp^st?oSJ!3Lwy9czsacwK{O7CiiyhL3Ux~>=EJCJ;jgMg5z8S!+7J* zUiqX$j47k})HI#NvwTMvOBH!BN-FL=4J;)!M`v-_*f-DA^pqa?2Kc2EzSM#1AWKD% zPF<_Xv7qvF5f^B(s74r~!!`ptHf;DxF=B+=x8W|hkr>joC(nYso6(u1(-Uo#)j?*{9_MMlpCOH?CSG4*@-8!THW`=@|q{ENPh1 zyb};}d8qN(=K(115Z1$R=$0&6U|uEWF+B}yGUz``$f~GQY7w&ES@->^gxERTOezwh z+^QyYREC&4f7A)_W7#Ni+mz2_U2Ex5D&coOdX_OPl^QB5(-;)_}k*e4(nh@7HY|eRtGag|4H=@fyarT(wK}?Sozf(!T%;H!oPT zivzs7i`b-nm(QuV_WDkGHc{lGMN7>#RFHMigWMfs+vk!oJa?MX>PTgcq7U|c_*#vc zJPBv7|9hhxQP!rA=|tYh9=o^5ir&b{<+{M0K2EM%MLd&;mNv#`|@I3&wNF$GN< z%AaBa6rH{~Pz<+*<7RJ3u*Ge?vb)ckux0^>*vPR1;QUuAvTEPY^}NcIq+xQ~bKfP? z9qhznWEJe+H0|gH66uy$ppi|iyZ&nTpPWVjp5Omq0hL^phY*%KN{b;G&1*M@@Kj zFTusa3Z!7GY57*l+1<&)*2!AR*$W`Z%f~LnC*;M)$HUIc$IZgY!dc*$GJ~2!^zS)3 jmTqrtot*&cGIDHO?7TD-@*pqNdw`-WSf)(s^}GKAq6l1% diff --git a/examples/bench.cpp b/examples/bench.cpp deleted file mode 100755 index 6548eb300..000000000 --- a/examples/bench.cpp +++ /dev/null @@ -1,241 +0,0 @@ -#include -#include "opts.hpp" -#include "isaac/driver/backend.h" -#include "isaac/driver/cublas.h" -#include "isaac/driver/context.h" -#include "isaac/driver/buffer.h" -#include "isaac/driver/stream.h" -#include "isaac/tools/bench.hpp" -#include "isaac/api.h" - -namespace sc = isaac; -namespace drv = sc::driver; -using sc::param_t; -using std::make_tuple; - -double geometric_mean(std::vector const&data){ - double logsum = std::accumulate(data.begin(), data.end(), - (double)0, [](double acc, double x){ return acc + std::log(x);}); - return std::exp(logsum/data.size()); -} - -void print_results_header(std::vector sections){ - std::cout << color_stream(ITALIC) << color_stream(BOLD) ; - std::copy(sections.begin(), sections.end(), std::ostream_iterator(std::cout, "\t")); - std::cout << "ISAAC\tcuDNN"; - std::cout << color_stream(RESET) << std::endl; -} - -void print_results(std::vector const & times, std::vector const & prefix, std::function cmp, std::function fn){ - std::copy(prefix.begin(), prefix.end(), std::ostream_iterator(std::cout, "\t")); - std::vector perf; - std::transform(times.begin(), times.end(), std::back_inserter(perf), fn); - auto fastest = perf; - std::sort(fastest.begin(), fastest.end(), cmp); - for(auto x: perf){ - if(std::max(x,fastest[1])/std::min(x, fastest[1]) >= 1.05) - std::cout << color_stream(FG_LIGHT_BLUE) << x << color_stream(RESET); - else - std::cout << x; - std::cout << "\t"; - } - std::cout << std::endl; -} - -struct Metric{ - virtual std::function cmp() const = 0; - virtual double conv(param_t P, param_t Q, param_t K, param_t N, param_t C, param_t R, param_t S, double tsec) const = 0; - virtual double gemm(param_t M, param_t N, param_t K, double tsec) const = 0; -}; - -class FLOPS: public Metric{ -public: - FLOPS(double scale): scale_(scale){} - std::function cmp() const { return std::greater(); } - double conv(param_t P, param_t Q, param_t K, param_t N, param_t C, param_t R, param_t S, double tsec) const - { return sc::templates::Conv::tflops(P,Q,K,N,C,R,S,tsec) * 1e12 / scale_; } - double gemm(param_t M, param_t N, param_t K, double tsec) const - { return sc::templates::GEMM::tflops(M, N, K, tsec) * 1e12 / scale_; } -private: - double scale_; -}; - -class Time: public Metric{ -public: - Time(double scale): scale_(scale){} - std::function cmp() const { return std::less(); } - double conv(param_t, param_t, param_t, param_t, param_t, param_t, param_t, double tsec) const { return tsec*1e-9/scale_; } - double gemm(param_t, param_t, param_t, double tsec) const { return tsec*1e-9/scale_; } -private: - double scale_; -}; - - -int main(int argc, char* argv[]) -{ - std::cout << std::fixed << std::setprecision(2); - - opts::Application program("bench", "benchmarking suite for ISAAC"); - program.add("dtype", "data-type", "float32", {{"float16", sc::HALF_TYPE}, {"float32", sc::FLOAT_TYPE}, {"float64", sc::DOUBLE_TYPE}}); - program.add("conv", "benchmark CONV", true); - program.add("gemm", "benchmark GEMM", true); - program.add>("metric", "performance metric for the results", "tflops", {{"tflops", std::make_shared(1e12)}, {"ms", std::make_shared