// // Generated by LLVM NVPTX Back-End // .version 7.4 .target sm_86 .address_size 64 // .globl _bwd_kernel_0d1d2d34d5d6d7d8d9d10d11d12d13d14d15c16d17d18d19c20d21d22d23c2425d26d27 .extern .shared .align 1 .b8 global_smem[]; .visible .entry _bwd_kernel_0d1d2d34d5d6d7d8d9d10d11d12d13d14d15c16d17d18d19c20d21d22d23c2425d26d27( .param .u64 _bwd_kernel_0d1d2d34d5d6d7d8d9d10d11d12d13d14d15c16d17d18d19c20d21d22d23c2425d26d27_param_0, .param .u64 _bwd_kernel_0d1d2d34d5d6d7d8d9d10d11d12d13d14d15c16d17d18d19c20d21d22d23c2425d26d27_param_1, .param .u64 _bwd_kernel_0d1d2d34d5d6d7d8d9d10d11d12d13d14d15c16d17d18d19c20d21d22d23c2425d26d27_param_2, .param .f32 _bwd_kernel_0d1d2d34d5d6d7d8d9d10d11d12d13d14d15c16d17d18d19c20d21d22d23c2425d26d27_param_3, .param .u64 _bwd_kernel_0d1d2d34d5d6d7d8d9d10d11d12d13d14d15c16d17d18d19c20d21d22d23c2425d26d27_param_4, .param .u64 _bwd_kernel_0d1d2d34d5d6d7d8d9d10d11d12d13d14d15c16d17d18d19c20d21d22d23c2425d26d27_param_5, .param .u64 _bwd_kernel_0d1d2d34d5d6d7d8d9d10d11d12d13d14d15c16d17d18d19c20d21d22d23c2425d26d27_param_6, .param .u64 _bwd_kernel_0d1d2d34d5d6d7d8d9d10d11d12d13d14d15c16d17d18d19c20d21d22d23c2425d26d27_param_7, .param .u64 _bwd_kernel_0d1d2d34d5d6d7d8d9d10d11d12d13d14d15c16d17d18d19c20d21d22d23c2425d26d27_param_8, .param .u64 _bwd_kernel_0d1d2d34d5d6d7d8d9d10d11d12d13d14d15c16d17d18d19c20d21d22d23c2425d26d27_param_9, .param .u64 _bwd_kernel_0d1d2d34d5d6d7d8d9d10d11d12d13d14d15c16d17d18d19c20d21d22d23c2425d26d27_param_10, .param .u64 _bwd_kernel_0d1d2d34d5d6d7d8d9d10d11d12d13d14d15c16d17d18d19c20d21d22d23c2425d26d27_param_11, .param .u32 _bwd_kernel_0d1d2d34d5d6d7d8d9d10d11d12d13d14d15c16d17d18d19c20d21d22d23c2425d26d27_param_12, .param .u32 _bwd_kernel_0d1d2d34d5d6d7d8d9d10d11d12d13d14d15c16d17d18d19c20d21d22d23c2425d26d27_param_13, .param .u32 _bwd_kernel_0d1d2d34d5d6d7d8d9d10d11d12d13d14d15c16d17d18d19c20d21d22d23c2425d26d27_param_14, .param .u32 _bwd_kernel_0d1d2d34d5d6d7d8d9d10d11d12d13d14d15c16d17d18d19c20d21d22d23c2425d26d27_param_15, .param .u32 _bwd_kernel_0d1d2d34d5d6d7d8d9d10d11d12d13d14d15c16d17d18d19c20d21d22d23c2425d26d27_param_16, .param .u32 _bwd_kernel_0d1d2d34d5d6d7d8d9d10d11d12d13d14d15c16d17d18d19c20d21d22d23c2425d26d27_param_17, .param .u32 _bwd_kernel_0d1d2d34d5d6d7d8d9d10d11d12d13d14d15c16d17d18d19c20d21d22d23c2425d26d27_param_18, .param .u32 _bwd_kernel_0d1d2d34d5d6d7d8d9d10d11d12d13d14d15c16d17d18d19c20d21d22d23c2425d26d27_param_19, .param .u32 _bwd_kernel_0d1d2d34d5d6d7d8d9d10d11d12d13d14d15c16d17d18d19c20d21d22d23c2425d26d27_param_20, .param .u32 _bwd_kernel_0d1d2d34d5d6d7d8d9d10d11d12d13d14d15c16d17d18d19c20d21d22d23c2425d26d27_param_21, .param .u32 _bwd_kernel_0d1d2d34d5d6d7d8d9d10d11d12d13d14d15c16d17d18d19c20d21d22d23c2425d26d27_param_22, .param .u32 _bwd_kernel_0d1d2d34d5d6d7d8d9d10d11d12d13d14d15c16d17d18d19c20d21d22d23c2425d26d27_param_23, .param .u32 _bwd_kernel_0d1d2d34d5d6d7d8d9d10d11d12d13d14d15c16d17d18d19c20d21d22d23c2425d26d27_param_24 ) .maxntid 256, 1, 1 { .reg .pred %p<27>; .reg .b16 %h<257>; .reg .b32 %r<3848>; .reg .b32 %hh<321>; .reg .f32 %f<547>; .reg .b64 %rd<70>; mov.u32 %r1, %tid.x; ld.param.u64 %rd28, [_bwd_kernel_0d1d2d34d5d6d7d8d9d10d11d12d13d14d15c16d17d18d19c20d21d22d23c2425d26d27_param_1]; and.b32 %r447, %r1, 31; ld.param.u64 %rd29, [_bwd_kernel_0d1d2d34d5d6d7d8d9d10d11d12d13d14d15c16d17d18d19c20d21d22d23c2425d26d27_param_2]; shr.u32 %r2, %r1, 5; bfe.u32 %r448, %r1, 3, 2; shr.u32 %r449, %r1, 3; and.b32 %r450, %r449, 124; or.b32 %r3, %r450, %r448; ld.param.u64 %rd31, [_bwd_kernel_0d1d2d34d5d6d7d8d9d10d11d12d13d14d15c16d17d18d19c20d21d22d23c2425d26d27_param_7]; ld.param.u64 %rd32, [_bwd_kernel_0d1d2d34d5d6d7d8d9d10d11d12d13d14d15c16d17d18d19c20d21d22d23c2425d26d27_param_8]; and.b32 %r4, %r1, 7; shl.b32 %r5, %r4, 3; ld.param.u32 %r451, [_bwd_kernel_0d1d2d34d5d6d7d8d9d10d11d12d13d14d15c16d17d18d19c20d21d22d23c2425d26d27_param_12]; shl.b32 %r6, %r2, 4; ld.param.u32 %r452, [_bwd_kernel_0d1d2d34d5d6d7d8d9d10d11d12d13d14d15c16d17d18d19c20d21d22d23c2425d26d27_param_13]; ld.param.u32 %r453, [_bwd_kernel_0d1d2d34d5d6d7d8d9d10d11d12d13d14d15c16d17d18d19c20d21d22d23c2425d26d27_param_14]; bfe.u32 %r8, %r1, 2, 3; shl.b32 %r454, %r1, 1; ld.param.u32 %r455, [_bwd_kernel_0d1d2d34d5d6d7d8d9d10d11d12d13d14d15c16d17d18d19c20d21d22d23c2425d26d27_param_17]; and.b32 %r10, %r454, 6; mov.u32 %r456, %ctaid.x; ld.param.u32 %r458, [_bwd_kernel_0d1d2d34d5d6d7d8d9d10d11d12d13d14d15c16d17d18d19c20d21d22d23c2425d26d27_param_22]; div.s32 %r460, %r456, %r458; mul.lo.s32 %r461, %r460, %r458; sub.s32 %r462, %r456, %r461; ld.param.u32 %r463, [_bwd_kernel_0d1d2d34d5d6d7d8d9d10d11d12d13d14d15c16d17d18d19c20d21d22d23c2425d26d27_param_24]; mul.lo.s32 %r464, %r460, %r451; mad.lo.s32 %r465, %r462, %r452, %r464; mul.wide.s32 %rd33, %r465, 2; add.s64 %rd35, %rd28, %rd33; add.s64 %rd36, %rd29, %rd33; add.s64 %rd1, %rd31, %rd33; add.s64 %rd2, %rd32, %rd33; shl.b32 %r466, %r455, 5; mad.lo.s32 %r467, %r3, %r455, %r5; add.s32 %r468, %r467, %r466; add.s32 %r469, %r468, %r466; add.s32 %r470, %r469, %r466; cvt.s64.s32 %rd3, %r467; mul.wide.s32 %rd38, %r467, 2; add.s64 %rd19, %rd35, %rd38; cvt.s64.s32 %rd4, %r468; mul.wide.s32 %rd39, %r468, 2; add.s64 %rd20, %rd35, %rd39; cvt.s64.s32 %rd5, %r469; mul.wide.s32 %rd40, %r469, 2; add.s64 %rd21, %rd35, %rd40; cvt.s64.s32 %rd6, %r470; mul.wide.s32 %rd41, %r470, 2; add.s64 %rd22, %rd35, %rd41; mov.pred %p19, -1; @%p19 ld.global.v4.b32 { %r95, %r96, %r97, %r98 }, [ %rd19 + 0 ]; @%p19 ld.global.v4.b32 { %r99, %r100, %r101, %r102 }, [ %rd20 + 0 ]; @%p19 ld.global.v4.b32 { %r103, %r104, %r105, %r106 }, [ %rd21 + 0 ]; @%p19 ld.global.v4.b32 { %r107, %r108, %r109, %r110 }, [ %rd22 + 0 ]; shl.b32 %r471, %r453, 5; mad.lo.s32 %r472, %r3, %r453, %r5; add.s32 %r473, %r472, %r471; add.s32 %r474, %r473, %r471; add.s32 %r475, %r474, %r471; cvt.s64.s32 %rd7, %r472; mul.wide.s32 %rd42, %r472, 2; add.s64 %rd23, %rd36, %rd42; cvt.s64.s32 %rd8, %r473; mul.wide.s32 %rd43, %r473, 2; add.s64 %rd24, %rd36, %rd43; cvt.s64.s32 %rd9, %r474; mul.wide.s32 %rd44, %r474, 2; add.s64 %rd25, %rd36, %rd44; cvt.s64.s32 %rd10, %r475; mul.wide.s32 %rd45, %r475, 2; add.s64 %rd26, %rd36, %rd45; @%p19 ld.global.v4.b32 { %r111, %r112, %r113, %r114 }, [ %rd23 + 0 ]; @%p19 ld.global.v4.b32 { %r115, %r116, %r117, %r118 }, [ %rd24 + 0 ]; @%p19 ld.global.v4.b32 { %r119, %r120, %r121, %r122 }, [ %rd25 + 0 ]; @%p19 ld.global.v4.b32 { %r123, %r124, %r125, %r126 }, [ %rd26 + 0 ]; shl.b32 %r11, %r463, 7; mov.b32 {%h1, %h2}, %r95; shl.b32 %r12, %r3, 6; or.b32 %r476, %r12, %r5; shl.b32 %r477, %r476, 1; mov.u32 %r478, global_smem; add.s32 %r479, %r478, %r477; st.shared.b16 [%r479], %h1; st.shared.b16 [%r479+2], %h2; mov.b32 {%h3, %h4}, %r96; st.shared.b16 [%r479+4], %h3; st.shared.b16 [%r479+6], %h4; mov.b32 {%h5, %h6}, %r97; st.shared.b16 [%r479+8], %h5; st.shared.b16 [%r479+10], %h6; mov.b32 {%h7, %h8}, %r98; st.shared.b16 [%r479+12], %h7; st.shared.b16 [%r479+14], %h8; mov.b32 {%h9, %h10}, %r99; add.s32 %r13, %r12, 2048; or.b32 %r480, %r13, %r5; shl.b32 %r481, %r480, 1; add.s32 %r482, %r478, %r481; st.shared.b16 [%r482], %h9; st.shared.b16 [%r482+2], %h10; mov.b32 {%h11, %h12}, %r100; st.shared.b16 [%r482+4], %h11; st.shared.b16 [%r482+6], %h12; mov.b32 {%h13, %h14}, %r101; st.shared.b16 [%r482+8], %h13; st.shared.b16 [%r482+10], %h14; mov.b32 {%h15, %h16}, %r102; st.shared.b16 [%r482+12], %h15; st.shared.b16 [%r482+14], %h16; mov.b32 {%h17, %h18}, %r103; add.s32 %r14, %r12, 4096; or.b32 %r483, %r14, %r5; shl.b32 %r484, %r483, 1; add.s32 %r485, %r478, %r484; st.shared.b16 [%r485], %h17; st.shared.b16 [%r485+2], %h18; mov.b32 {%h19, %h20}, %r104; st.shared.b16 [%r485+4], %h19; st.shared.b16 [%r485+6], %h20; mov.b32 {%h21, %h22}, %r105; st.shared.b16 [%r485+8], %h21; st.shared.b16 [%r485+10], %h22; mov.b32 {%h23, %h24}, %r106; st.shared.b16 [%r485+12], %h23; st.shared.b16 [%r485+14], %h24; mov.b32 {%h25, %h26}, %r107; add.s32 %r15, %r12, 6144; or.b32 %r486, %r15, %r5; shl.b32 %r487, %r486, 1; add.s32 %r488, %r478, %r487; st.shared.b16 [%r488], %h25; st.shared.b16 [%r488+2], %h26; mov.b32 {%h27, %h28}, %r108; st.shared.b16 [%r488+4], %h27; st.shared.b16 [%r488+6], %h28; mov.b32 {%h29, %h30}, %r109; st.shared.b16 [%r488+8], %h29; st.shared.b16 [%r488+10], %h30; mov.b32 {%h31, %h32}, %r110; st.shared.b16 [%r488+12], %h31; st.shared.b16 [%r488+14], %h32; bar.sync 0; mov.b32 {%h33, %h34}, %r111; add.s32 %r489, %r478, 16384; add.s32 %r490, %r489, %r477; st.shared.b16 [%r490], %h33; st.shared.b16 [%r490+2], %h34; mov.b32 {%h35, %h36}, %r112; st.shared.b16 [%r490+4], %h35; st.shared.b16 [%r490+6], %h36; mov.b32 {%h37, %h38}, %r113; st.shared.b16 [%r490+8], %h37; st.shared.b16 [%r490+10], %h38; mov.b32 {%h39, %h40}, %r114; st.shared.b16 [%r490+12], %h39; st.shared.b16 [%r490+14], %h40; mov.b32 {%h41, %h42}, %r115; add.s32 %r491, %r489, %r481; st.shared.b16 [%r491], %h41; st.shared.b16 [%r491+2], %h42; mov.b32 {%h43, %h44}, %r116; st.shared.b16 [%r491+4], %h43; st.shared.b16 [%r491+6], %h44; mov.b32 {%h45, %h46}, %r117; st.shared.b16 [%r491+8], %h45; st.shared.b16 [%r491+10], %h46; mov.b32 {%h47, %h48}, %r118; st.shared.b16 [%r491+12], %h47; st.shared.b16 [%r491+14], %h48; mov.b32 {%h49, %h50}, %r119; add.s32 %r492, %r489, %r484; st.shared.b16 [%r492], %h49; st.shared.b16 [%r492+2], %h50; mov.b32 {%h51, %h52}, %r120; st.shared.b16 [%r492+4], %h51; st.shared.b16 [%r492+6], %h52; mov.b32 {%h53, %h54}, %r121; st.shared.b16 [%r492+8], %h53; st.shared.b16 [%r492+10], %h54; mov.b32 {%h55, %h56}, %r122; st.shared.b16 [%r492+12], %h55; st.shared.b16 [%r492+14], %h56; mov.b32 {%h57, %h58}, %r123; add.s32 %r493, %r489, %r487; st.shared.b16 [%r493], %h57; st.shared.b16 [%r493+2], %h58; mov.b32 {%h59, %h60}, %r124; st.shared.b16 [%r493+4], %h59; st.shared.b16 [%r493+6], %h60; mov.b32 {%h61, %h62}, %r125; st.shared.b16 [%r493+8], %h61; st.shared.b16 [%r493+10], %h62; mov.b32 {%h63, %h64}, %r126; st.shared.b16 [%r493+12], %h63; st.shared.b16 [%r493+14], %h64; bar.sync 0; bfe.u32 %r16, %r447, 3, 1; bfe.u32 %r17, %r1, 4, 1; shl.b32 %r18, %r17, 3; or.b32 %r494, %r18, %r4; shl.b32 %r495, %r494, 6; shl.b32 %r19, %r16, 3; or.b32 %r496, %r495, %r19; shl.b32 %r497, %r496, 1; add.s32 %r131, %r478, %r497; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3591, %r3592, %r3593, %r3594 }, [ %r131 + 0 ]; and.b32 %r498, %r1, 8; or.b32 %r499, %r495, %r498; shl.b32 %r500, %r499, 1; add.s32 %r501, %r478, %r500; add.s32 %r136, %r501, 32; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3623, %r3624, %r3625, %r3626 }, [ %r136 + 0 ]; add.s32 %r141, %r131, 64; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3655, %r3656, %r3657, %r3658 }, [ %r141 + 0 ]; add.s32 %r146, %r501, 96; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3687, %r3688, %r3689, %r3690 }, [ %r146 + 0 ]; add.s32 %r151, %r131, 2048; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3595, %r3596, %r3597, %r3598 }, [ %r151 + 0 ]; add.s32 %r156, %r501, 2080; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3627, %r3628, %r3629, %r3630 }, [ %r156 + 0 ]; add.s32 %r161, %r131, 2112; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3659, %r3660, %r3661, %r3662 }, [ %r161 + 0 ]; add.s32 %r166, %r501, 2144; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3691, %r3692, %r3693, %r3694 }, [ %r166 + 0 ]; add.s32 %r171, %r131, 4096; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3599, %r3600, %r3601, %r3602 }, [ %r171 + 0 ]; add.s32 %r176, %r501, 4128; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3631, %r3632, %r3633, %r3634 }, [ %r176 + 0 ]; add.s32 %r181, %r131, 4160; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3663, %r3664, %r3665, %r3666 }, [ %r181 + 0 ]; add.s32 %r186, %r501, 4192; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3695, %r3696, %r3697, %r3698 }, [ %r186 + 0 ]; add.s32 %r191, %r131, 6144; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3603, %r3604, %r3605, %r3606 }, [ %r191 + 0 ]; add.s32 %r196, %r501, 6176; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3635, %r3636, %r3637, %r3638 }, [ %r196 + 0 ]; add.s32 %r201, %r131, 6208; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3667, %r3668, %r3669, %r3670 }, [ %r201 + 0 ]; add.s32 %r206, %r501, 6240; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3699, %r3700, %r3701, %r3702 }, [ %r206 + 0 ]; add.s32 %r211, %r131, 8192; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3607, %r3608, %r3609, %r3610 }, [ %r211 + 0 ]; add.s32 %r216, %r501, 8224; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3639, %r3640, %r3641, %r3642 }, [ %r216 + 0 ]; add.s32 %r221, %r131, 8256; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3671, %r3672, %r3673, %r3674 }, [ %r221 + 0 ]; add.s32 %r226, %r501, 8288; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3703, %r3704, %r3705, %r3706 }, [ %r226 + 0 ]; add.s32 %r231, %r131, 10240; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3611, %r3612, %r3613, %r3614 }, [ %r231 + 0 ]; add.s32 %r236, %r501, 10272; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3643, %r3644, %r3645, %r3646 }, [ %r236 + 0 ]; add.s32 %r241, %r131, 10304; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3675, %r3676, %r3677, %r3678 }, [ %r241 + 0 ]; add.s32 %r246, %r501, 10336; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3707, %r3708, %r3709, %r3710 }, [ %r246 + 0 ]; add.s32 %r251, %r131, 12288; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3615, %r3616, %r3617, %r3618 }, [ %r251 + 0 ]; add.s32 %r256, %r501, 12320; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3647, %r3648, %r3649, %r3650 }, [ %r256 + 0 ]; add.s32 %r261, %r131, 12352; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3679, %r3680, %r3681, %r3682 }, [ %r261 + 0 ]; add.s32 %r266, %r501, 12384; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3711, %r3712, %r3713, %r3714 }, [ %r266 + 0 ]; add.s32 %r271, %r131, 14336; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3619, %r3620, %r3621, %r3622 }, [ %r271 + 0 ]; add.s32 %r276, %r501, 14368; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3651, %r3652, %r3653, %r3654 }, [ %r276 + 0 ]; add.s32 %r281, %r131, 14400; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3683, %r3684, %r3685, %r3686 }, [ %r281 + 0 ]; add.s32 %r286, %r501, 14432; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3715, %r3716, %r3717, %r3718 }, [ %r286 + 0 ]; add.s32 %r291, %r489, %r497; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3719, %r3720, %r3721, %r3722 }, [ %r291 + 0 ]; add.s32 %r502, %r489, %r500; add.s32 %r296, %r502, 32; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3751, %r3752, %r3753, %r3754 }, [ %r296 + 0 ]; add.s32 %r301, %r291, 64; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3783, %r3784, %r3785, %r3786 }, [ %r301 + 0 ]; add.s32 %r306, %r502, 96; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3815, %r3816, %r3817, %r3818 }, [ %r306 + 0 ]; add.s32 %r311, %r291, 2048; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3723, %r3724, %r3725, %r3726 }, [ %r311 + 0 ]; add.s32 %r316, %r502, 2080; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3755, %r3756, %r3757, %r3758 }, [ %r316 + 0 ]; add.s32 %r321, %r291, 2112; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3787, %r3788, %r3789, %r3790 }, [ %r321 + 0 ]; add.s32 %r326, %r502, 2144; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3819, %r3820, %r3821, %r3822 }, [ %r326 + 0 ]; add.s32 %r331, %r291, 4096; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3727, %r3728, %r3729, %r3730 }, [ %r331 + 0 ]; add.s32 %r336, %r502, 4128; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3759, %r3760, %r3761, %r3762 }, [ %r336 + 0 ]; add.s32 %r341, %r291, 4160; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3791, %r3792, %r3793, %r3794 }, [ %r341 + 0 ]; add.s32 %r346, %r502, 4192; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3823, %r3824, %r3825, %r3826 }, [ %r346 + 0 ]; add.s32 %r351, %r291, 6144; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3731, %r3732, %r3733, %r3734 }, [ %r351 + 0 ]; add.s32 %r356, %r502, 6176; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3763, %r3764, %r3765, %r3766 }, [ %r356 + 0 ]; add.s32 %r361, %r291, 6208; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3795, %r3796, %r3797, %r3798 }, [ %r361 + 0 ]; add.s32 %r366, %r502, 6240; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3827, %r3828, %r3829, %r3830 }, [ %r366 + 0 ]; add.s32 %r371, %r291, 8192; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3735, %r3736, %r3737, %r3738 }, [ %r371 + 0 ]; add.s32 %r376, %r502, 8224; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3767, %r3768, %r3769, %r3770 }, [ %r376 + 0 ]; add.s32 %r381, %r291, 8256; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3799, %r3800, %r3801, %r3802 }, [ %r381 + 0 ]; add.s32 %r386, %r502, 8288; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3831, %r3832, %r3833, %r3834 }, [ %r386 + 0 ]; add.s32 %r391, %r291, 10240; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3739, %r3740, %r3741, %r3742 }, [ %r391 + 0 ]; add.s32 %r396, %r502, 10272; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3771, %r3772, %r3773, %r3774 }, [ %r396 + 0 ]; add.s32 %r401, %r291, 10304; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3803, %r3804, %r3805, %r3806 }, [ %r401 + 0 ]; add.s32 %r406, %r502, 10336; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3835, %r3836, %r3837, %r3838 }, [ %r406 + 0 ]; add.s32 %r411, %r291, 12288; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3743, %r3744, %r3745, %r3746 }, [ %r411 + 0 ]; add.s32 %r416, %r502, 12320; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3775, %r3776, %r3777, %r3778 }, [ %r416 + 0 ]; add.s32 %r421, %r291, 12352; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3807, %r3808, %r3809, %r3810 }, [ %r421 + 0 ]; add.s32 %r426, %r502, 12384; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3839, %r3840, %r3841, %r3842 }, [ %r426 + 0 ]; add.s32 %r431, %r291, 14336; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3747, %r3748, %r3749, %r3750 }, [ %r431 + 0 ]; add.s32 %r436, %r502, 14368; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3779, %r3780, %r3781, %r3782 }, [ %r436 + 0 ]; add.s32 %r441, %r291, 14400; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3811, %r3812, %r3813, %r3814 }, [ %r441 + 0 ]; add.s32 %r446, %r502, 14432; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3843, %r3844, %r3845, %r3846 }, [ %r446 + 0 ]; setp.lt.s32 %p9, %r11, 1; mov.f32 %f129, 0f00000000; mov.f32 %f515, %f129; mov.f32 %f516, %f129; mov.f32 %f517, %f129; mov.f32 %f518, %f129; mov.f32 %f519, %f129; mov.f32 %f520, %f129; mov.f32 %f521, %f129; mov.f32 %f522, %f129; mov.f32 %f523, %f129; mov.f32 %f524, %f129; mov.f32 %f525, %f129; mov.f32 %f526, %f129; mov.f32 %f527, %f129; mov.f32 %f528, %f129; mov.f32 %f529, %f129; mov.f32 %f530, %f129; mov.f32 %f531, %f129; mov.f32 %f532, %f129; mov.f32 %f533, %f129; mov.f32 %f534, %f129; mov.f32 %f535, %f129; mov.f32 %f536, %f129; mov.f32 %f537, %f129; mov.f32 %f538, %f129; mov.f32 %f539, %f129; mov.f32 %f540, %f129; mov.f32 %f541, %f129; mov.f32 %f542, %f129; mov.f32 %f543, %f129; mov.f32 %f544, %f129; mov.f32 %f545, %f129; mov.f32 %f546, %f129; @%p9 bra LBB0_3; ld.param.f32 %f97, [_bwd_kernel_0d1d2d34d5d6d7d8d9d10d11d12d13d14d15c16d17d18d19c20d21d22d23c2425d26d27_param_3]; ld.param.u64 %rd27, [_bwd_kernel_0d1d2d34d5d6d7d8d9d10d11d12d13d14d15c16d17d18d19c20d21d22d23c2425d26d27_param_0]; ld.param.u64 %rd30, [_bwd_kernel_0d1d2d34d5d6d7d8d9d10d11d12d13d14d15c16d17d18d19c20d21d22d23c2425d26d27_param_5]; and.b32 %r7, %r6, 112; or.b32 %r9, %r7, %r8; add.s64 %rd34, %rd27, %rd33; add.s64 %rd37, %rd30, %rd33; add.s64 %rd46, %rd34, %rd42; add.s64 %rd47, %rd34, %rd43; add.s64 %rd48, %rd34, %rd44; add.s64 %rd49, %rd34, %rd45; add.s64 %rd50, %rd37, %rd42; add.s64 %rd51, %rd37, %rd43; add.s64 %rd52, %rd37, %rd44; add.s64 %rd53, %rd37, %rd45; mov.b32 %hh1, %r3591; mov.b32 %hh2, %r3592; mov.b32 %hh3, %r3593; mov.b32 %hh4, %r3594; mov.b32 %hh5, %r3623; mov.b32 %hh6, %r3624; mov.b32 %hh7, %r3625; mov.b32 %hh8, %r3626; mov.b32 %hh9, %r3655; mov.b32 %hh10, %r3656; mov.b32 %hh11, %r3657; mov.b32 %hh12, %r3658; mov.b32 %hh13, %r3687; mov.b32 %hh14, %r3688; mov.b32 %hh15, %r3689; mov.b32 %hh16, %r3690; mov.b32 %hh17, %r3595; mov.b32 %hh18, %r3596; mov.b32 %hh19, %r3597; mov.b32 %hh20, %r3598; mov.b32 %hh21, %r3627; mov.b32 %hh22, %r3628; mov.b32 %hh23, %r3629; mov.b32 %hh24, %r3630; mov.b32 %hh25, %r3659; mov.b32 %hh26, %r3660; mov.b32 %hh27, %r3661; mov.b32 %hh28, %r3662; mov.b32 %hh29, %r3691; mov.b32 %hh30, %r3692; mov.b32 %hh31, %r3693; mov.b32 %hh32, %r3694; mov.b32 %hh33, %r3599; mov.b32 %hh34, %r3600; mov.b32 %hh35, %r3601; mov.b32 %hh36, %r3602; mov.b32 %hh37, %r3631; mov.b32 %hh38, %r3632; mov.b32 %hh39, %r3633; mov.b32 %hh40, %r3634; mov.b32 %hh41, %r3663; mov.b32 %hh42, %r3664; mov.b32 %hh43, %r3665; mov.b32 %hh44, %r3666; mov.b32 %hh45, %r3695; mov.b32 %hh46, %r3696; mov.b32 %hh47, %r3697; mov.b32 %hh48, %r3698; mov.b32 %hh49, %r3603; mov.b32 %hh50, %r3604; mov.b32 %hh51, %r3605; mov.b32 %hh52, %r3606; mov.b32 %hh53, %r3635; mov.b32 %hh54, %r3636; mov.b32 %hh55, %r3637; mov.b32 %hh56, %r3638; mov.b32 %hh57, %r3667; mov.b32 %hh58, %r3668; mov.b32 %hh59, %r3669; mov.b32 %hh60, %r3670; mov.b32 %hh61, %r3699; mov.b32 %hh62, %r3700; mov.b32 %hh63, %r3701; mov.b32 %hh64, %r3702; mov.b32 %hh65, %r3607; mov.b32 %hh66, %r3608; mov.b32 %hh67, %r3609; mov.b32 %hh68, %r3610; mov.b32 %hh69, %r3639; mov.b32 %hh70, %r3640; mov.b32 %hh71, %r3641; mov.b32 %hh72, %r3642; mov.b32 %hh73, %r3671; mov.b32 %hh74, %r3672; mov.b32 %hh75, %r3673; mov.b32 %hh76, %r3674; mov.b32 %hh77, %r3703; mov.b32 %hh78, %r3704; mov.b32 %hh79, %r3705; mov.b32 %hh80, %r3706; mov.b32 %hh81, %r3611; mov.b32 %hh82, %r3612; mov.b32 %hh83, %r3613; mov.b32 %hh84, %r3614; mov.b32 %hh85, %r3643; mov.b32 %hh86, %r3644; mov.b32 %hh87, %r3645; mov.b32 %hh88, %r3646; mov.b32 %hh89, %r3675; mov.b32 %hh90, %r3676; mov.b32 %hh91, %r3677; mov.b32 %hh92, %r3678; mov.b32 %hh93, %r3707; mov.b32 %hh94, %r3708; mov.b32 %hh95, %r3709; mov.b32 %hh96, %r3710; mov.b32 %hh97, %r3615; mov.b32 %hh98, %r3616; mov.b32 %hh99, %r3617; mov.b32 %hh100, %r3618; mov.b32 %hh101, %r3647; mov.b32 %hh102, %r3648; mov.b32 %hh103, %r3649; mov.b32 %hh104, %r3650; mov.b32 %hh105, %r3679; mov.b32 %hh106, %r3680; mov.b32 %hh107, %r3681; mov.b32 %hh108, %r3682; mov.b32 %hh109, %r3711; mov.b32 %hh110, %r3712; mov.b32 %hh111, %r3713; mov.b32 %hh112, %r3714; mov.b32 %hh113, %r3619; mov.b32 %hh114, %r3620; mov.b32 %hh115, %r3621; mov.b32 %hh116, %r3622; mov.b32 %hh117, %r3651; mov.b32 %hh118, %r3652; mov.b32 %hh119, %r3653; mov.b32 %hh120, %r3654; mov.b32 %hh121, %r3683; mov.b32 %hh122, %r3684; mov.b32 %hh123, %r3685; mov.b32 %hh124, %r3686; mov.b32 %hh125, %r3715; mov.b32 %hh126, %r3716; mov.b32 %hh127, %r3717; mov.b32 %hh128, %r3718; mov.b32 %hh129, %r3719; mov.b32 %hh130, %r3720; mov.b32 %hh131, %r3721; mov.b32 %hh132, %r3722; mov.b32 %hh133, %r3751; mov.b32 %hh134, %r3752; mov.b32 %hh135, %r3753; mov.b32 %hh136, %r3754; mov.b32 %hh137, %r3783; mov.b32 %hh138, %r3784; mov.b32 %hh139, %r3785; mov.b32 %hh140, %r3786; mov.b32 %hh141, %r3815; mov.b32 %hh142, %r3816; mov.b32 %hh143, %r3817; mov.b32 %hh144, %r3818; mov.b32 %hh145, %r3723; mov.b32 %hh146, %r3724; mov.b32 %hh147, %r3725; mov.b32 %hh148, %r3726; mov.b32 %hh149, %r3755; mov.b32 %hh150, %r3756; mov.b32 %hh151, %r3757; mov.b32 %hh152, %r3758; mov.b32 %hh153, %r3787; mov.b32 %hh154, %r3788; mov.b32 %hh155, %r3789; mov.b32 %hh156, %r3790; mov.b32 %hh157, %r3819; mov.b32 %hh158, %r3820; mov.b32 %hh159, %r3821; mov.b32 %hh160, %r3822; mov.b32 %hh161, %r3727; mov.b32 %hh162, %r3728; mov.b32 %hh163, %r3729; mov.b32 %hh164, %r3730; mov.b32 %hh165, %r3759; mov.b32 %hh166, %r3760; mov.b32 %hh167, %r3761; mov.b32 %hh168, %r3762; mov.b32 %hh169, %r3791; mov.b32 %hh170, %r3792; mov.b32 %hh171, %r3793; mov.b32 %hh172, %r3794; mov.b32 %hh173, %r3823; mov.b32 %hh174, %r3824; mov.b32 %hh175, %r3825; mov.b32 %hh176, %r3826; mov.b32 %hh177, %r3731; mov.b32 %hh178, %r3732; mov.b32 %hh179, %r3733; mov.b32 %hh180, %r3734; mov.b32 %hh181, %r3763; mov.b32 %hh182, %r3764; mov.b32 %hh183, %r3765; mov.b32 %hh184, %r3766; mov.b32 %hh185, %r3795; mov.b32 %hh186, %r3796; mov.b32 %hh187, %r3797; mov.b32 %hh188, %r3798; mov.b32 %hh189, %r3827; mov.b32 %hh190, %r3828; mov.b32 %hh191, %r3829; mov.b32 %hh192, %r3830; mov.b32 %hh193, %r3735; mov.b32 %hh194, %r3736; mov.b32 %hh195, %r3737; mov.b32 %hh196, %r3738; mov.b32 %hh197, %r3767; mov.b32 %hh198, %r3768; mov.b32 %hh199, %r3769; mov.b32 %hh200, %r3770; mov.b32 %hh201, %r3799; mov.b32 %hh202, %r3800; mov.b32 %hh203, %r3801; mov.b32 %hh204, %r3802; mov.b32 %hh205, %r3831; mov.b32 %hh206, %r3832; mov.b32 %hh207, %r3833; mov.b32 %hh208, %r3834; mov.b32 %hh209, %r3739; mov.b32 %hh210, %r3740; mov.b32 %hh211, %r3741; mov.b32 %hh212, %r3742; mov.b32 %hh213, %r3771; mov.b32 %hh214, %r3772; mov.b32 %hh215, %r3773; mov.b32 %hh216, %r3774; mov.b32 %hh217, %r3803; mov.b32 %hh218, %r3804; mov.b32 %hh219, %r3805; mov.b32 %hh220, %r3806; mov.b32 %hh221, %r3835; mov.b32 %hh222, %r3836; mov.b32 %hh223, %r3837; mov.b32 %hh224, %r3838; mov.b32 %hh225, %r3743; mov.b32 %hh226, %r3744; mov.b32 %hh227, %r3745; mov.b32 %hh228, %r3746; mov.b32 %hh229, %r3775; mov.b32 %hh230, %r3776; mov.b32 %hh231, %r3777; mov.b32 %hh232, %r3778; mov.b32 %hh233, %r3807; mov.b32 %hh234, %r3808; mov.b32 %hh235, %r3809; mov.b32 %hh236, %r3810; mov.b32 %hh237, %r3839; mov.b32 %hh238, %r3840; mov.b32 %hh239, %r3841; mov.b32 %hh240, %r3842; mov.b32 %hh241, %r3747; mov.b32 %hh242, %r3748; mov.b32 %hh243, %r3749; mov.b32 %hh244, %r3750; mov.b32 %hh245, %r3779; mov.b32 %hh246, %r3780; mov.b32 %hh247, %r3781; mov.b32 %hh248, %r3782; mov.b32 %hh249, %r3811; mov.b32 %hh250, %r3812; mov.b32 %hh251, %r3813; mov.b32 %hh252, %r3814; mov.b32 %hh253, %r3843; mov.b32 %hh254, %r3844; mov.b32 %hh255, %r3845; mov.b32 %hh256, %r3846; and.b32 %r504, %r3, 7; xor.b32 %r505, %r504, %r4; shl.b32 %r506, %r505, 3; or.b32 %r507, %r506, %r12; shl.b32 %r508, %r507, 1; add.s32 %r20, %r478, %r508; or.b32 %r510, %r506, %r13; shl.b32 %r511, %r510, 1; add.s32 %r21, %r478, %r511; or.b32 %r512, %r506, %r14; shl.b32 %r513, %r512, 1; add.s32 %r22, %r478, %r513; or.b32 %r514, %r506, %r15; shl.b32 %r515, %r514, 1; add.s32 %r23, %r478, %r515; or.b32 %r516, %r19, %r4; or.b32 %r517, %r516, %r7; xor.b32 %r518, %r17, %r4; shl.b32 %r519, %r518, 4; shl.b32 %r520, %r517, 7; or.b32 %r521, %r520, %r519; add.s32 %r599, %r478, %r521; or.b32 %r522, %r17, 2; xor.b32 %r523, %r522, %r4; shl.b32 %r524, %r523, 4; or.b32 %r525, %r524, %r520; add.s32 %r604, %r478, %r525; or.b32 %r526, %r17, 4; xor.b32 %r527, %r526, %r4; shl.b32 %r528, %r527, 4; or.b32 %r529, %r528, %r520; add.s32 %r609, %r478, %r529; or.b32 %r530, %r17, 6; xor.b32 %r531, %r530, %r4; shl.b32 %r532, %r531, 4; or.b32 %r533, %r532, %r520; add.s32 %r614, %r478, %r533; shl.b32 %r534, %r9, 1; shl.b32 %r535, %r10, 8; or.b32 %r536, %r535, %r534; add.s32 %r28, %r478, %r536; or.b32 %r537, %r536, 256; add.s32 %r30, %r478, %r537; add.s32 %r31, %r28, 2048; or.b32 %r538, %r536, 2304; add.s32 %r32, %r478, %r538; add.s32 %r33, %r28, 4096; or.b32 %r539, %r536, 4352; add.s32 %r34, %r478, %r539; add.s32 %r35, %r28, 6144; or.b32 %r540, %r536, 6400; add.s32 %r36, %r478, %r540; add.s32 %r37, %r28, 8192; or.b32 %r541, %r536, 8448; add.s32 %r38, %r478, %r541; add.s32 %r39, %r28, 10240; or.b32 %r542, %r536, 10496; add.s32 %r40, %r478, %r542; add.s32 %r41, %r28, 12288; or.b32 %r543, %r536, 12544; add.s32 %r42, %r478, %r543; add.s32 %r43, %r28, 14336; or.b32 %r544, %r536, 14592; add.s32 %r44, %r478, %r544; add.s32 %r45, %r28, 16384; or.b32 %r545, %r536, 16640; add.s32 %r46, %r478, %r545; add.s32 %r47, %r28, 18432; or.b32 %r546, %r536, 18688; add.s32 %r48, %r478, %r546; add.s32 %r49, %r28, 20480; or.b32 %r547, %r536, 20736; add.s32 %r50, %r478, %r547; add.s32 %r51, %r28, 22528; or.b32 %r548, %r536, 22784; add.s32 %r52, %r478, %r548; add.s32 %r53, %r28, 24576; or.b32 %r549, %r536, 24832; add.s32 %r54, %r478, %r549; add.s32 %r55, %r28, 26624; or.b32 %r550, %r536, 26880; add.s32 %r56, %r478, %r550; add.s32 %r57, %r28, 28672; or.b32 %r551, %r536, 28928; add.s32 %r58, %r478, %r551; add.s32 %r59, %r28, 30720; or.b32 %r552, %r536, 30976; add.s32 %r60, %r478, %r552; shl.b32 %r553, %r2, 1; and.b32 %r554, %r553, 6; or.b32 %r555, %r554, %r16; shl.b32 %r556, %r555, 10; shl.b32 %r557, %r4, 7; or.b32 %r558, %r556, %r557; or.b32 %r559, %r558, %r18; shl.b32 %r560, %r559, 1; add.s32 %r1531, %r478, %r560; shl.b32 %r561, %r522, 4; shl.b32 %r562, %r558, 1; or.b32 %r563, %r561, %r562; add.s32 %r1536, %r478, %r563; shl.b32 %r564, %r526, 4; or.b32 %r565, %r564, %r562; add.s32 %r1541, %r478, %r565; shl.b32 %r566, %r530, 4; or.b32 %r567, %r566, %r562; add.s32 %r1546, %r478, %r567; add.s32 %r1551, %r1531, 128; add.s32 %r1556, %r1531, 160; add.s32 %r1561, %r1531, 192; add.s32 %r1566, %r1531, 224; add.s32 %r1571, %r1531, 16384; add.s32 %r1576, %r1536, 16384; add.s32 %r1581, %r1541, 16384; add.s32 %r1586, %r1546, 16384; add.s32 %r1591, %r1531, 16512; add.s32 %r1596, %r1531, 16544; add.s32 %r1601, %r1531, 16576; add.s32 %r1606, %r1531, 16608; bfe.u32 %r568, %r1, 7, 1; shl.b32 %r569, %r17, 1; or.b32 %r570, %r569, %r568; xor.b32 %r571, %r570, %r4; shl.b32 %r572, %r571, 4; shl.b32 %r573, %r516, 7; or.b32 %r574, %r572, %r573; add.s32 %r1611, %r478, %r574; add.s32 %r1616, %r1611, 2048; add.s32 %r1621, %r1611, 4096; add.s32 %r1626, %r1611, 6144; add.s32 %r1631, %r1611, 8192; add.s32 %r1636, %r1611, 10240; add.s32 %r1641, %r1611, 12288; add.s32 %r1646, %r1611, 14336; or.b32 %r575, %r570, 4; xor.b32 %r576, %r575, %r4; shl.b32 %r577, %r576, 4; or.b32 %r578, %r577, %r573; add.s32 %r1651, %r478, %r578; add.s32 %r1656, %r1651, 2048; add.s32 %r1661, %r1651, 4096; add.s32 %r1666, %r1651, 6144; add.s32 %r1671, %r1651, 8192; add.s32 %r1676, %r1651, 10240; add.s32 %r1681, %r1651, 12288; add.s32 %r1686, %r1651, 14336; mov.f32 %f515, 0f00000000; mov.u32 %r625, 0; mov.f32 %f516, %f515; mov.f32 %f517, %f515; mov.f32 %f518, %f515; mov.f32 %f519, %f515; mov.f32 %f520, %f515; mov.f32 %f521, %f515; mov.f32 %f522, %f515; mov.f32 %f523, %f515; mov.f32 %f524, %f515; mov.f32 %f525, %f515; mov.f32 %f526, %f515; mov.f32 %f527, %f515; mov.f32 %f528, %f515; mov.f32 %f529, %f515; mov.f32 %f530, %f515; mov.f32 %f531, %f515; mov.f32 %f532, %f515; mov.f32 %f533, %f515; mov.f32 %f534, %f515; mov.f32 %f535, %f515; mov.f32 %f536, %f515; mov.f32 %f537, %f515; mov.f32 %f538, %f515; mov.f32 %f539, %f515; mov.f32 %f540, %f515; mov.f32 %f541, %f515; mov.f32 %f542, %f515; mov.f32 %f543, %f515; mov.f32 %f544, %f515; mov.f32 %f545, %f515; mov.f32 %f546, %f515; mov.u32 %r3847, %r625; LBB0_2: @%p19 ld.global.v4.b32 { %r3499, %r3500, %r3501, %r3502 }, [ %rd46 + 0 ]; mov.b32 %hh257, %r3499; mov.b32 %hh258, %r3500; mov.b32 %hh259, %r3501; mov.b32 %hh260, %r3502; @%p19 ld.global.v4.b32 { %r3503, %r3504, %r3505, %r3506 }, [ %rd47 + 0 ]; mov.b32 %hh261, %r3503; mov.b32 %hh262, %r3504; mov.b32 %hh263, %r3505; mov.b32 %hh264, %r3506; @%p19 ld.global.v4.b32 { %r3507, %r3508, %r3509, %r3510 }, [ %rd48 + 0 ]; mov.b32 %hh265, %r3507; mov.b32 %hh266, %r3508; mov.b32 %hh267, %r3509; mov.b32 %hh268, %r3510; @%p19 ld.global.v4.b32 { %r3511, %r3512, %r3513, %r3514 }, [ %rd49 + 0 ]; mov.b32 %hh269, %r3511; mov.b32 %hh270, %r3512; mov.b32 %hh271, %r3513; mov.b32 %hh272, %r3514; bar.sync 0; st.shared.v4.b32 [%r20], {%r3499, %r3500, %r3501, %r3502}; st.shared.v4.b32 [%r21], {%r3503, %r3504, %r3505, %r3506}; st.shared.v4.b32 [%r22], {%r3507, %r3508, %r3509, %r3510}; st.shared.v4.b32 [%r23], {%r3511, %r3512, %r3513, %r3514}; bar.sync 0; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r619, %r620, %r621, %r622 }, [ %r599 + 0 ]; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r843, %r844, %r845, %r846 }, [ %r604 + 0 ]; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r1067, %r1068, %r1069, %r1070 }, [ %r609 + 0 ]; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r1291, %r1292, %r1293, %r1294 }, [ %r614 + 0 ]; mov.u32 %r839, %r625; mov.u32 %r840, %r625; mov.u32 %r841, %r625; mov.u32 %r842, %r625; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r839, %r840, %r841, %r842 }, { %r619, %r620, %r621, %r622 }, { %r3591, %r3592 }, { %r839, %r840, %r841, %r842 }; mov.u32 %r853, %r625; mov.u32 %r854, %r625; mov.u32 %r855, %r625; mov.u32 %r856, %r625; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r853, %r854, %r855, %r856 }, { %r619, %r620, %r621, %r622 }, { %r3593, %r3594 }, { %r853, %r854, %r855, %r856 }; mov.u32 %r867, %r625; mov.u32 %r868, %r625; mov.u32 %r869, %r625; mov.u32 %r870, %r625; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r867, %r868, %r869, %r870 }, { %r619, %r620, %r621, %r622 }, { %r3595, %r3596 }, { %r867, %r868, %r869, %r870 }; mov.u32 %r881, %r625; mov.u32 %r882, %r625; mov.u32 %r883, %r625; mov.u32 %r884, %r625; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r881, %r882, %r883, %r884 }, { %r619, %r620, %r621, %r622 }, { %r3597, %r3598 }, { %r881, %r882, %r883, %r884 }; mov.u32 %r895, %r625; mov.u32 %r896, %r625; mov.u32 %r897, %r625; mov.u32 %r898, %r625; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r895, %r896, %r897, %r898 }, { %r619, %r620, %r621, %r622 }, { %r3599, %r3600 }, { %r895, %r896, %r897, %r898 }; mov.u32 %r909, %r625; mov.u32 %r910, %r625; mov.u32 %r911, %r625; mov.u32 %r912, %r625; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r909, %r910, %r911, %r912 }, { %r619, %r620, %r621, %r622 }, { %r3601, %r3602 }, { %r909, %r910, %r911, %r912 }; mov.u32 %r923, %r625; mov.u32 %r924, %r625; mov.u32 %r925, %r625; mov.u32 %r926, %r625; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r923, %r924, %r925, %r926 }, { %r619, %r620, %r621, %r622 }, { %r3603, %r3604 }, { %r923, %r924, %r925, %r926 }; mov.u32 %r937, %r625; mov.u32 %r938, %r625; mov.u32 %r939, %r625; mov.u32 %r940, %r625; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r937, %r938, %r939, %r940 }, { %r619, %r620, %r621, %r622 }, { %r3605, %r3606 }, { %r937, %r938, %r939, %r940 }; mov.u32 %r951, %r625; mov.u32 %r952, %r625; mov.u32 %r953, %r625; mov.u32 %r954, %r625; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r951, %r952, %r953, %r954 }, { %r619, %r620, %r621, %r622 }, { %r3607, %r3608 }, { %r951, %r952, %r953, %r954 }; mov.u32 %r965, %r625; mov.u32 %r966, %r625; mov.u32 %r967, %r625; mov.u32 %r968, %r625; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r965, %r966, %r967, %r968 }, { %r619, %r620, %r621, %r622 }, { %r3609, %r3610 }, { %r965, %r966, %r967, %r968 }; mov.u32 %r979, %r625; mov.u32 %r980, %r625; mov.u32 %r981, %r625; mov.u32 %r982, %r625; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r979, %r980, %r981, %r982 }, { %r619, %r620, %r621, %r622 }, { %r3611, %r3612 }, { %r979, %r980, %r981, %r982 }; mov.u32 %r993, %r625; mov.u32 %r994, %r625; mov.u32 %r995, %r625; mov.u32 %r996, %r625; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r993, %r994, %r995, %r996 }, { %r619, %r620, %r621, %r622 }, { %r3613, %r3614 }, { %r993, %r994, %r995, %r996 }; mov.u32 %r1007, %r625; mov.u32 %r1008, %r625; mov.u32 %r1009, %r625; mov.u32 %r1010, %r625; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1007, %r1008, %r1009, %r1010 }, { %r619, %r620, %r621, %r622 }, { %r3615, %r3616 }, { %r1007, %r1008, %r1009, %r1010 }; mov.u32 %r1021, %r625; mov.u32 %r1022, %r625; mov.u32 %r1023, %r625; mov.u32 %r1024, %r625; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1021, %r1022, %r1023, %r1024 }, { %r619, %r620, %r621, %r622 }, { %r3617, %r3618 }, { %r1021, %r1022, %r1023, %r1024 }; mov.u32 %r1035, %r625; mov.u32 %r1036, %r625; mov.u32 %r1037, %r625; mov.u32 %r1038, %r625; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1035, %r1036, %r1037, %r1038 }, { %r619, %r620, %r621, %r622 }, { %r3619, %r3620 }, { %r1035, %r1036, %r1037, %r1038 }; mov.u32 %r1049, %r625; mov.u32 %r1050, %r625; mov.u32 %r1051, %r625; mov.u32 %r1052, %r625; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1049, %r1050, %r1051, %r1052 }, { %r619, %r620, %r621, %r622 }, { %r3621, %r3622 }, { %r1049, %r1050, %r1051, %r1052 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r839, %r840, %r841, %r842 }, { %r843, %r844, %r845, %r846 }, { %r3623, %r3624 }, { %r839, %r840, %r841, %r842 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r853, %r854, %r855, %r856 }, { %r843, %r844, %r845, %r846 }, { %r3625, %r3626 }, { %r853, %r854, %r855, %r856 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r867, %r868, %r869, %r870 }, { %r843, %r844, %r845, %r846 }, { %r3627, %r3628 }, { %r867, %r868, %r869, %r870 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r881, %r882, %r883, %r884 }, { %r843, %r844, %r845, %r846 }, { %r3629, %r3630 }, { %r881, %r882, %r883, %r884 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r895, %r896, %r897, %r898 }, { %r843, %r844, %r845, %r846 }, { %r3631, %r3632 }, { %r895, %r896, %r897, %r898 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r909, %r910, %r911, %r912 }, { %r843, %r844, %r845, %r846 }, { %r3633, %r3634 }, { %r909, %r910, %r911, %r912 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r923, %r924, %r925, %r926 }, { %r843, %r844, %r845, %r846 }, { %r3635, %r3636 }, { %r923, %r924, %r925, %r926 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r937, %r938, %r939, %r940 }, { %r843, %r844, %r845, %r846 }, { %r3637, %r3638 }, { %r937, %r938, %r939, %r940 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r951, %r952, %r953, %r954 }, { %r843, %r844, %r845, %r846 }, { %r3639, %r3640 }, { %r951, %r952, %r953, %r954 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r965, %r966, %r967, %r968 }, { %r843, %r844, %r845, %r846 }, { %r3641, %r3642 }, { %r965, %r966, %r967, %r968 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r979, %r980, %r981, %r982 }, { %r843, %r844, %r845, %r846 }, { %r3643, %r3644 }, { %r979, %r980, %r981, %r982 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r993, %r994, %r995, %r996 }, { %r843, %r844, %r845, %r846 }, { %r3645, %r3646 }, { %r993, %r994, %r995, %r996 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1007, %r1008, %r1009, %r1010 }, { %r843, %r844, %r845, %r846 }, { %r3647, %r3648 }, { %r1007, %r1008, %r1009, %r1010 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1021, %r1022, %r1023, %r1024 }, { %r843, %r844, %r845, %r846 }, { %r3649, %r3650 }, { %r1021, %r1022, %r1023, %r1024 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1035, %r1036, %r1037, %r1038 }, { %r843, %r844, %r845, %r846 }, { %r3651, %r3652 }, { %r1035, %r1036, %r1037, %r1038 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1049, %r1050, %r1051, %r1052 }, { %r843, %r844, %r845, %r846 }, { %r3653, %r3654 }, { %r1049, %r1050, %r1051, %r1052 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r839, %r840, %r841, %r842 }, { %r1067, %r1068, %r1069, %r1070 }, { %r3655, %r3656 }, { %r839, %r840, %r841, %r842 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r853, %r854, %r855, %r856 }, { %r1067, %r1068, %r1069, %r1070 }, { %r3657, %r3658 }, { %r853, %r854, %r855, %r856 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r867, %r868, %r869, %r870 }, { %r1067, %r1068, %r1069, %r1070 }, { %r3659, %r3660 }, { %r867, %r868, %r869, %r870 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r881, %r882, %r883, %r884 }, { %r1067, %r1068, %r1069, %r1070 }, { %r3661, %r3662 }, { %r881, %r882, %r883, %r884 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r895, %r896, %r897, %r898 }, { %r1067, %r1068, %r1069, %r1070 }, { %r3663, %r3664 }, { %r895, %r896, %r897, %r898 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r909, %r910, %r911, %r912 }, { %r1067, %r1068, %r1069, %r1070 }, { %r3665, %r3666 }, { %r909, %r910, %r911, %r912 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r923, %r924, %r925, %r926 }, { %r1067, %r1068, %r1069, %r1070 }, { %r3667, %r3668 }, { %r923, %r924, %r925, %r926 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r937, %r938, %r939, %r940 }, { %r1067, %r1068, %r1069, %r1070 }, { %r3669, %r3670 }, { %r937, %r938, %r939, %r940 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r951, %r952, %r953, %r954 }, { %r1067, %r1068, %r1069, %r1070 }, { %r3671, %r3672 }, { %r951, %r952, %r953, %r954 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r965, %r966, %r967, %r968 }, { %r1067, %r1068, %r1069, %r1070 }, { %r3673, %r3674 }, { %r965, %r966, %r967, %r968 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r979, %r980, %r981, %r982 }, { %r1067, %r1068, %r1069, %r1070 }, { %r3675, %r3676 }, { %r979, %r980, %r981, %r982 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r993, %r994, %r995, %r996 }, { %r1067, %r1068, %r1069, %r1070 }, { %r3677, %r3678 }, { %r993, %r994, %r995, %r996 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1007, %r1008, %r1009, %r1010 }, { %r1067, %r1068, %r1069, %r1070 }, { %r3679, %r3680 }, { %r1007, %r1008, %r1009, %r1010 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1021, %r1022, %r1023, %r1024 }, { %r1067, %r1068, %r1069, %r1070 }, { %r3681, %r3682 }, { %r1021, %r1022, %r1023, %r1024 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1035, %r1036, %r1037, %r1038 }, { %r1067, %r1068, %r1069, %r1070 }, { %r3683, %r3684 }, { %r1035, %r1036, %r1037, %r1038 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1049, %r1050, %r1051, %r1052 }, { %r1067, %r1068, %r1069, %r1070 }, { %r3685, %r3686 }, { %r1049, %r1050, %r1051, %r1052 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r839, %r840, %r841, %r842 }, { %r1291, %r1292, %r1293, %r1294 }, { %r3687, %r3688 }, { %r839, %r840, %r841, %r842 }; mov.b32 %f162, %r842; mov.b32 %f163, %r841; mov.b32 %f164, %r840; mov.b32 %f165, %r839; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r853, %r854, %r855, %r856 }, { %r1291, %r1292, %r1293, %r1294 }, { %r3689, %r3690 }, { %r853, %r854, %r855, %r856 }; mov.b32 %f166, %r856; mov.b32 %f167, %r855; mov.b32 %f168, %r854; mov.b32 %f169, %r853; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r867, %r868, %r869, %r870 }, { %r1291, %r1292, %r1293, %r1294 }, { %r3691, %r3692 }, { %r867, %r868, %r869, %r870 }; mov.b32 %f170, %r870; mov.b32 %f171, %r869; mov.b32 %f172, %r868; mov.b32 %f173, %r867; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r881, %r882, %r883, %r884 }, { %r1291, %r1292, %r1293, %r1294 }, { %r3693, %r3694 }, { %r881, %r882, %r883, %r884 }; mov.b32 %f174, %r884; mov.b32 %f175, %r883; mov.b32 %f176, %r882; mov.b32 %f177, %r881; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r895, %r896, %r897, %r898 }, { %r1291, %r1292, %r1293, %r1294 }, { %r3695, %r3696 }, { %r895, %r896, %r897, %r898 }; mov.b32 %f178, %r898; mov.b32 %f179, %r897; mov.b32 %f180, %r896; mov.b32 %f181, %r895; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r909, %r910, %r911, %r912 }, { %r1291, %r1292, %r1293, %r1294 }, { %r3697, %r3698 }, { %r909, %r910, %r911, %r912 }; mov.b32 %f182, %r912; mov.b32 %f183, %r911; mov.b32 %f184, %r910; mov.b32 %f185, %r909; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r923, %r924, %r925, %r926 }, { %r1291, %r1292, %r1293, %r1294 }, { %r3699, %r3700 }, { %r923, %r924, %r925, %r926 }; mov.b32 %f186, %r926; mov.b32 %f187, %r925; mov.b32 %f188, %r924; mov.b32 %f189, %r923; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r937, %r938, %r939, %r940 }, { %r1291, %r1292, %r1293, %r1294 }, { %r3701, %r3702 }, { %r937, %r938, %r939, %r940 }; mov.b32 %f190, %r940; mov.b32 %f191, %r939; mov.b32 %f192, %r938; mov.b32 %f193, %r937; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r951, %r952, %r953, %r954 }, { %r1291, %r1292, %r1293, %r1294 }, { %r3703, %r3704 }, { %r951, %r952, %r953, %r954 }; mov.b32 %f194, %r954; mov.b32 %f195, %r953; mov.b32 %f196, %r952; mov.b32 %f197, %r951; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r965, %r966, %r967, %r968 }, { %r1291, %r1292, %r1293, %r1294 }, { %r3705, %r3706 }, { %r965, %r966, %r967, %r968 }; mov.b32 %f198, %r968; mov.b32 %f199, %r967; mov.b32 %f200, %r966; mov.b32 %f201, %r965; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r979, %r980, %r981, %r982 }, { %r1291, %r1292, %r1293, %r1294 }, { %r3707, %r3708 }, { %r979, %r980, %r981, %r982 }; mov.b32 %f202, %r982; mov.b32 %f203, %r981; mov.b32 %f204, %r980; mov.b32 %f205, %r979; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r993, %r994, %r995, %r996 }, { %r1291, %r1292, %r1293, %r1294 }, { %r3709, %r3710 }, { %r993, %r994, %r995, %r996 }; mov.b32 %f206, %r996; mov.b32 %f207, %r995; mov.b32 %f208, %r994; mov.b32 %f209, %r993; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1007, %r1008, %r1009, %r1010 }, { %r1291, %r1292, %r1293, %r1294 }, { %r3711, %r3712 }, { %r1007, %r1008, %r1009, %r1010 }; mov.b32 %f210, %r1010; mov.b32 %f211, %r1009; mov.b32 %f212, %r1008; mov.b32 %f213, %r1007; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1021, %r1022, %r1023, %r1024 }, { %r1291, %r1292, %r1293, %r1294 }, { %r3713, %r3714 }, { %r1021, %r1022, %r1023, %r1024 }; mov.b32 %f214, %r1024; mov.b32 %f215, %r1023; mov.b32 %f216, %r1022; mov.b32 %f217, %r1021; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1035, %r1036, %r1037, %r1038 }, { %r1291, %r1292, %r1293, %r1294 }, { %r3715, %r3716 }, { %r1035, %r1036, %r1037, %r1038 }; mov.b32 %f218, %r1038; mov.b32 %f219, %r1037; mov.b32 %f220, %r1036; mov.b32 %f221, %r1035; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1049, %r1050, %r1051, %r1052 }, { %r1291, %r1292, %r1293, %r1294 }, { %r3717, %r3718 }, { %r1049, %r1050, %r1051, %r1052 }; mov.b32 %f222, %r1052; mov.b32 %f223, %r1051; mov.b32 %f224, %r1050; mov.b32 %f225, %r1049; @%p19 ld.global.v4.b32 { %r3531, %r3532, %r3533, %r3534 }, [ %rd50 + 0 ]; mov.b32 %hh273, %r3531; mov.b32 %hh274, %r3532; mov.b32 %hh275, %r3533; mov.b32 %hh276, %r3534; @%p19 ld.global.v4.b32 { %r3535, %r3536, %r3537, %r3538 }, [ %rd51 + 0 ]; mov.b32 %hh277, %r3535; mov.b32 %hh278, %r3536; mov.b32 %hh279, %r3537; mov.b32 %hh280, %r3538; @%p19 ld.global.v4.b32 { %r3539, %r3540, %r3541, %r3542 }, [ %rd52 + 0 ]; mov.b32 %hh281, %r3539; mov.b32 %hh282, %r3540; mov.b32 %hh283, %r3541; mov.b32 %hh284, %r3542; @%p19 ld.global.v4.b32 { %r3543, %r3544, %r3545, %r3546 }, [ %rd53 + 0 ]; mov.b32 %hh285, %r3543; mov.b32 %hh286, %r3544; mov.b32 %hh287, %r3545; mov.b32 %hh288, %r3546; cvt.rn.f16.f32 %h65, %f165; cvt.rn.f16.f32 %h66, %f164; cvt.rn.f16.f32 %h67, %f163; cvt.rn.f16.f32 %h68, %f162; cvt.rn.f16.f32 %h69, %f169; cvt.rn.f16.f32 %h70, %f168; cvt.rn.f16.f32 %h71, %f167; cvt.rn.f16.f32 %h72, %f166; cvt.rn.f16.f32 %h73, %f173; cvt.rn.f16.f32 %h74, %f172; cvt.rn.f16.f32 %h75, %f171; cvt.rn.f16.f32 %h76, %f170; cvt.rn.f16.f32 %h77, %f177; cvt.rn.f16.f32 %h78, %f176; cvt.rn.f16.f32 %h79, %f175; cvt.rn.f16.f32 %h80, %f174; cvt.rn.f16.f32 %h81, %f181; cvt.rn.f16.f32 %h82, %f180; cvt.rn.f16.f32 %h83, %f179; cvt.rn.f16.f32 %h84, %f178; cvt.rn.f16.f32 %h85, %f185; cvt.rn.f16.f32 %h86, %f184; cvt.rn.f16.f32 %h87, %f183; cvt.rn.f16.f32 %h88, %f182; cvt.rn.f16.f32 %h89, %f189; cvt.rn.f16.f32 %h90, %f188; cvt.rn.f16.f32 %h91, %f187; cvt.rn.f16.f32 %h92, %f186; cvt.rn.f16.f32 %h93, %f193; cvt.rn.f16.f32 %h94, %f192; cvt.rn.f16.f32 %h95, %f191; cvt.rn.f16.f32 %h96, %f190; cvt.rn.f16.f32 %h97, %f197; cvt.rn.f16.f32 %h98, %f196; cvt.rn.f16.f32 %h99, %f195; cvt.rn.f16.f32 %h100, %f194; cvt.rn.f16.f32 %h101, %f201; cvt.rn.f16.f32 %h102, %f200; cvt.rn.f16.f32 %h103, %f199; cvt.rn.f16.f32 %h104, %f198; cvt.rn.f16.f32 %h105, %f205; cvt.rn.f16.f32 %h106, %f204; cvt.rn.f16.f32 %h107, %f203; cvt.rn.f16.f32 %h108, %f202; cvt.rn.f16.f32 %h109, %f209; cvt.rn.f16.f32 %h110, %f208; cvt.rn.f16.f32 %h111, %f207; cvt.rn.f16.f32 %h112, %f206; cvt.rn.f16.f32 %h113, %f213; cvt.rn.f16.f32 %h114, %f212; cvt.rn.f16.f32 %h115, %f211; cvt.rn.f16.f32 %h116, %f210; cvt.rn.f16.f32 %h117, %f217; cvt.rn.f16.f32 %h118, %f216; cvt.rn.f16.f32 %h119, %f215; cvt.rn.f16.f32 %h120, %f214; cvt.rn.f16.f32 %h121, %f221; cvt.rn.f16.f32 %h122, %f220; cvt.rn.f16.f32 %h123, %f219; cvt.rn.f16.f32 %h124, %f218; cvt.rn.f16.f32 %h125, %f225; cvt.rn.f16.f32 %h126, %f224; cvt.rn.f16.f32 %h127, %f223; cvt.rn.f16.f32 %h128, %f222; bar.sync 0; st.shared.b16 [%r28], %h65; st.shared.b16 [%r28+256], %h66; st.shared.b16 [%r28+16], %h67; st.shared.b16 [%r30+16], %h68; st.shared.b16 [%r28+2048], %h69; st.shared.b16 [%r28+2304], %h70; st.shared.b16 [%r31+16], %h71; st.shared.b16 [%r32+16], %h72; st.shared.b16 [%r28+4096], %h73; st.shared.b16 [%r28+4352], %h74; st.shared.b16 [%r33+16], %h75; st.shared.b16 [%r34+16], %h76; st.shared.b16 [%r28+6144], %h77; st.shared.b16 [%r28+6400], %h78; st.shared.b16 [%r35+16], %h79; st.shared.b16 [%r36+16], %h80; st.shared.b16 [%r28+8192], %h81; st.shared.b16 [%r28+8448], %h82; st.shared.b16 [%r37+16], %h83; st.shared.b16 [%r38+16], %h84; st.shared.b16 [%r28+10240], %h85; st.shared.b16 [%r28+10496], %h86; st.shared.b16 [%r39+16], %h87; st.shared.b16 [%r40+16], %h88; st.shared.b16 [%r28+12288], %h89; st.shared.b16 [%r28+12544], %h90; st.shared.b16 [%r41+16], %h91; st.shared.b16 [%r42+16], %h92; st.shared.b16 [%r28+14336], %h93; st.shared.b16 [%r28+14592], %h94; st.shared.b16 [%r43+16], %h95; st.shared.b16 [%r44+16], %h96; st.shared.b16 [%r28+16384], %h97; st.shared.b16 [%r28+16640], %h98; st.shared.b16 [%r45+16], %h99; st.shared.b16 [%r46+16], %h100; st.shared.b16 [%r28+18432], %h101; st.shared.b16 [%r28+18688], %h102; st.shared.b16 [%r47+16], %h103; st.shared.b16 [%r48+16], %h104; st.shared.b16 [%r28+20480], %h105; st.shared.b16 [%r28+20736], %h106; st.shared.b16 [%r49+16], %h107; st.shared.b16 [%r50+16], %h108; st.shared.b16 [%r28+22528], %h109; st.shared.b16 [%r28+22784], %h110; st.shared.b16 [%r51+16], %h111; st.shared.b16 [%r52+16], %h112; st.shared.b16 [%r28+24576], %h113; st.shared.b16 [%r28+24832], %h114; st.shared.b16 [%r53+16], %h115; st.shared.b16 [%r54+16], %h116; st.shared.b16 [%r28+26624], %h117; st.shared.b16 [%r28+26880], %h118; st.shared.b16 [%r55+16], %h119; st.shared.b16 [%r56+16], %h120; st.shared.b16 [%r28+28672], %h121; st.shared.b16 [%r28+28928], %h122; st.shared.b16 [%r57+16], %h123; st.shared.b16 [%r58+16], %h124; st.shared.b16 [%r28+30720], %h125; st.shared.b16 [%r28+30976], %h126; st.shared.b16 [%r59+16], %h127; st.shared.b16 [%r60+16], %h128; bar.sync 0; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r1691, %r1692, %r1693, %r1694 }, [ %r1531 + 0 ]; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r1803, %r1804, %r1805, %r1806 }, [ %r1536 + 0 ]; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r1915, %r1916, %r1917, %r1918 }, [ %r1541 + 0 ]; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r2027, %r2028, %r2029, %r2030 }, [ %r1546 + 0 ]; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r2139, %r2140, %r2141, %r2142 }, [ %r1551 + 0 ]; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r2251, %r2252, %r2253, %r2254 }, [ %r1556 + 0 ]; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r2363, %r2364, %r2365, %r2366 }, [ %r1561 + 0 ]; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r2475, %r2476, %r2477, %r2478 }, [ %r1566 + 0 ]; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r1747, %r1748, %r1749, %r1750 }, [ %r1571 + 0 ]; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r1859, %r1860, %r1861, %r1862 }, [ %r1576 + 0 ]; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r1971, %r1972, %r1973, %r1974 }, [ %r1581 + 0 ]; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r2083, %r2084, %r2085, %r2086 }, [ %r1586 + 0 ]; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r2195, %r2196, %r2197, %r2198 }, [ %r1591 + 0 ]; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r2307, %r2308, %r2309, %r2310 }, [ %r1596 + 0 ]; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r2419, %r2420, %r2421, %r2422 }, [ %r1601 + 0 ]; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r2531, %r2532, %r2533, %r2534 }, [ %r1606 + 0 ]; bar.sync 0; st.shared.v4.b32 [%r20], {%r3531, %r3532, %r3533, %r3534}; st.shared.v4.b32 [%r21], {%r3535, %r3536, %r3537, %r3538}; st.shared.v4.b32 [%r22], {%r3539, %r3540, %r3541, %r3542}; st.shared.v4.b32 [%r23], {%r3543, %r3544, %r3545, %r3546}; bar.sync 0; ldmatrix.sync.aligned.m8n8.x4.trans.shared.b16 { %r1695, %r1696, %r1709, %r1710 }, [ %r1611 + 0 ]; ldmatrix.sync.aligned.m8n8.x4.trans.shared.b16 { %r1807, %r1808, %r1821, %r1822 }, [ %r1616 + 0 ]; ldmatrix.sync.aligned.m8n8.x4.trans.shared.b16 { %r1919, %r1920, %r1933, %r1934 }, [ %r1621 + 0 ]; ldmatrix.sync.aligned.m8n8.x4.trans.shared.b16 { %r2031, %r2032, %r2045, %r2046 }, [ %r1626 + 0 ]; ldmatrix.sync.aligned.m8n8.x4.trans.shared.b16 { %r2143, %r2144, %r2157, %r2158 }, [ %r1631 + 0 ]; ldmatrix.sync.aligned.m8n8.x4.trans.shared.b16 { %r2255, %r2256, %r2269, %r2270 }, [ %r1636 + 0 ]; ldmatrix.sync.aligned.m8n8.x4.trans.shared.b16 { %r2367, %r2368, %r2381, %r2382 }, [ %r1641 + 0 ]; ldmatrix.sync.aligned.m8n8.x4.trans.shared.b16 { %r2479, %r2480, %r2493, %r2494 }, [ %r1646 + 0 ]; ldmatrix.sync.aligned.m8n8.x4.trans.shared.b16 { %r1723, %r1724, %r1737, %r1738 }, [ %r1651 + 0 ]; ldmatrix.sync.aligned.m8n8.x4.trans.shared.b16 { %r1835, %r1836, %r1849, %r1850 }, [ %r1656 + 0 ]; ldmatrix.sync.aligned.m8n8.x4.trans.shared.b16 { %r1947, %r1948, %r1961, %r1962 }, [ %r1661 + 0 ]; ldmatrix.sync.aligned.m8n8.x4.trans.shared.b16 { %r2059, %r2060, %r2073, %r2074 }, [ %r1666 + 0 ]; ldmatrix.sync.aligned.m8n8.x4.trans.shared.b16 { %r2171, %r2172, %r2185, %r2186 }, [ %r1671 + 0 ]; ldmatrix.sync.aligned.m8n8.x4.trans.shared.b16 { %r2283, %r2284, %r2297, %r2298 }, [ %r1676 + 0 ]; ldmatrix.sync.aligned.m8n8.x4.trans.shared.b16 { %r2395, %r2396, %r2409, %r2410 }, [ %r1681 + 0 ]; ldmatrix.sync.aligned.m8n8.x4.trans.shared.b16 { %r2507, %r2508, %r2521, %r2522 }, [ %r1686 + 0 ]; mov.b32 %r1799, %f515; mov.b32 %r1800, %f516; mov.b32 %r1801, %f517; mov.b32 %r1802, %f518; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1799, %r1800, %r1801, %r1802 }, { %r1691, %r1692, %r1693, %r1694 }, { %r1695, %r1696 }, { %r1799, %r1800, %r1801, %r1802 }; mov.b32 %r1813, %f519; mov.b32 %r1814, %f520; mov.b32 %r1815, %f521; mov.b32 %r1816, %f522; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1813, %r1814, %r1815, %r1816 }, { %r1691, %r1692, %r1693, %r1694 }, { %r1709, %r1710 }, { %r1813, %r1814, %r1815, %r1816 }; mov.b32 %r1827, %f523; mov.b32 %r1828, %f524; mov.b32 %r1829, %f525; mov.b32 %r1830, %f526; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1827, %r1828, %r1829, %r1830 }, { %r1691, %r1692, %r1693, %r1694 }, { %r1723, %r1724 }, { %r1827, %r1828, %r1829, %r1830 }; mov.b32 %r1841, %f527; mov.b32 %r1842, %f528; mov.b32 %r1843, %f529; mov.b32 %r1844, %f530; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1841, %r1842, %r1843, %r1844 }, { %r1691, %r1692, %r1693, %r1694 }, { %r1737, %r1738 }, { %r1841, %r1842, %r1843, %r1844 }; mov.b32 %r1855, %f531; mov.b32 %r1856, %f532; mov.b32 %r1857, %f533; mov.b32 %r1858, %f534; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1855, %r1856, %r1857, %r1858 }, { %r1747, %r1748, %r1749, %r1750 }, { %r1695, %r1696 }, { %r1855, %r1856, %r1857, %r1858 }; mov.b32 %r1869, %f535; mov.b32 %r1870, %f536; mov.b32 %r1871, %f537; mov.b32 %r1872, %f538; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1869, %r1870, %r1871, %r1872 }, { %r1747, %r1748, %r1749, %r1750 }, { %r1709, %r1710 }, { %r1869, %r1870, %r1871, %r1872 }; mov.b32 %r1883, %f539; mov.b32 %r1884, %f540; mov.b32 %r1885, %f541; mov.b32 %r1886, %f542; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1883, %r1884, %r1885, %r1886 }, { %r1747, %r1748, %r1749, %r1750 }, { %r1723, %r1724 }, { %r1883, %r1884, %r1885, %r1886 }; mov.b32 %r1897, %f543; mov.b32 %r1898, %f544; mov.b32 %r1899, %f545; mov.b32 %r1900, %f546; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1897, %r1898, %r1899, %r1900 }, { %r1747, %r1748, %r1749, %r1750 }, { %r1737, %r1738 }, { %r1897, %r1898, %r1899, %r1900 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1799, %r1800, %r1801, %r1802 }, { %r1803, %r1804, %r1805, %r1806 }, { %r1807, %r1808 }, { %r1799, %r1800, %r1801, %r1802 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1813, %r1814, %r1815, %r1816 }, { %r1803, %r1804, %r1805, %r1806 }, { %r1821, %r1822 }, { %r1813, %r1814, %r1815, %r1816 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1827, %r1828, %r1829, %r1830 }, { %r1803, %r1804, %r1805, %r1806 }, { %r1835, %r1836 }, { %r1827, %r1828, %r1829, %r1830 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1841, %r1842, %r1843, %r1844 }, { %r1803, %r1804, %r1805, %r1806 }, { %r1849, %r1850 }, { %r1841, %r1842, %r1843, %r1844 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1855, %r1856, %r1857, %r1858 }, { %r1859, %r1860, %r1861, %r1862 }, { %r1807, %r1808 }, { %r1855, %r1856, %r1857, %r1858 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1869, %r1870, %r1871, %r1872 }, { %r1859, %r1860, %r1861, %r1862 }, { %r1821, %r1822 }, { %r1869, %r1870, %r1871, %r1872 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1883, %r1884, %r1885, %r1886 }, { %r1859, %r1860, %r1861, %r1862 }, { %r1835, %r1836 }, { %r1883, %r1884, %r1885, %r1886 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1897, %r1898, %r1899, %r1900 }, { %r1859, %r1860, %r1861, %r1862 }, { %r1849, %r1850 }, { %r1897, %r1898, %r1899, %r1900 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1799, %r1800, %r1801, %r1802 }, { %r1915, %r1916, %r1917, %r1918 }, { %r1919, %r1920 }, { %r1799, %r1800, %r1801, %r1802 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1813, %r1814, %r1815, %r1816 }, { %r1915, %r1916, %r1917, %r1918 }, { %r1933, %r1934 }, { %r1813, %r1814, %r1815, %r1816 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1827, %r1828, %r1829, %r1830 }, { %r1915, %r1916, %r1917, %r1918 }, { %r1947, %r1948 }, { %r1827, %r1828, %r1829, %r1830 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1841, %r1842, %r1843, %r1844 }, { %r1915, %r1916, %r1917, %r1918 }, { %r1961, %r1962 }, { %r1841, %r1842, %r1843, %r1844 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1855, %r1856, %r1857, %r1858 }, { %r1971, %r1972, %r1973, %r1974 }, { %r1919, %r1920 }, { %r1855, %r1856, %r1857, %r1858 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1869, %r1870, %r1871, %r1872 }, { %r1971, %r1972, %r1973, %r1974 }, { %r1933, %r1934 }, { %r1869, %r1870, %r1871, %r1872 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1883, %r1884, %r1885, %r1886 }, { %r1971, %r1972, %r1973, %r1974 }, { %r1947, %r1948 }, { %r1883, %r1884, %r1885, %r1886 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1897, %r1898, %r1899, %r1900 }, { %r1971, %r1972, %r1973, %r1974 }, { %r1961, %r1962 }, { %r1897, %r1898, %r1899, %r1900 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1799, %r1800, %r1801, %r1802 }, { %r2027, %r2028, %r2029, %r2030 }, { %r2031, %r2032 }, { %r1799, %r1800, %r1801, %r1802 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1813, %r1814, %r1815, %r1816 }, { %r2027, %r2028, %r2029, %r2030 }, { %r2045, %r2046 }, { %r1813, %r1814, %r1815, %r1816 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1827, %r1828, %r1829, %r1830 }, { %r2027, %r2028, %r2029, %r2030 }, { %r2059, %r2060 }, { %r1827, %r1828, %r1829, %r1830 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1841, %r1842, %r1843, %r1844 }, { %r2027, %r2028, %r2029, %r2030 }, { %r2073, %r2074 }, { %r1841, %r1842, %r1843, %r1844 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1855, %r1856, %r1857, %r1858 }, { %r2083, %r2084, %r2085, %r2086 }, { %r2031, %r2032 }, { %r1855, %r1856, %r1857, %r1858 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1869, %r1870, %r1871, %r1872 }, { %r2083, %r2084, %r2085, %r2086 }, { %r2045, %r2046 }, { %r1869, %r1870, %r1871, %r1872 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1883, %r1884, %r1885, %r1886 }, { %r2083, %r2084, %r2085, %r2086 }, { %r2059, %r2060 }, { %r1883, %r1884, %r1885, %r1886 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1897, %r1898, %r1899, %r1900 }, { %r2083, %r2084, %r2085, %r2086 }, { %r2073, %r2074 }, { %r1897, %r1898, %r1899, %r1900 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1799, %r1800, %r1801, %r1802 }, { %r2139, %r2140, %r2141, %r2142 }, { %r2143, %r2144 }, { %r1799, %r1800, %r1801, %r1802 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1813, %r1814, %r1815, %r1816 }, { %r2139, %r2140, %r2141, %r2142 }, { %r2157, %r2158 }, { %r1813, %r1814, %r1815, %r1816 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1827, %r1828, %r1829, %r1830 }, { %r2139, %r2140, %r2141, %r2142 }, { %r2171, %r2172 }, { %r1827, %r1828, %r1829, %r1830 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1841, %r1842, %r1843, %r1844 }, { %r2139, %r2140, %r2141, %r2142 }, { %r2185, %r2186 }, { %r1841, %r1842, %r1843, %r1844 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1855, %r1856, %r1857, %r1858 }, { %r2195, %r2196, %r2197, %r2198 }, { %r2143, %r2144 }, { %r1855, %r1856, %r1857, %r1858 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1869, %r1870, %r1871, %r1872 }, { %r2195, %r2196, %r2197, %r2198 }, { %r2157, %r2158 }, { %r1869, %r1870, %r1871, %r1872 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1883, %r1884, %r1885, %r1886 }, { %r2195, %r2196, %r2197, %r2198 }, { %r2171, %r2172 }, { %r1883, %r1884, %r1885, %r1886 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1897, %r1898, %r1899, %r1900 }, { %r2195, %r2196, %r2197, %r2198 }, { %r2185, %r2186 }, { %r1897, %r1898, %r1899, %r1900 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1799, %r1800, %r1801, %r1802 }, { %r2251, %r2252, %r2253, %r2254 }, { %r2255, %r2256 }, { %r1799, %r1800, %r1801, %r1802 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1813, %r1814, %r1815, %r1816 }, { %r2251, %r2252, %r2253, %r2254 }, { %r2269, %r2270 }, { %r1813, %r1814, %r1815, %r1816 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1827, %r1828, %r1829, %r1830 }, { %r2251, %r2252, %r2253, %r2254 }, { %r2283, %r2284 }, { %r1827, %r1828, %r1829, %r1830 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1841, %r1842, %r1843, %r1844 }, { %r2251, %r2252, %r2253, %r2254 }, { %r2297, %r2298 }, { %r1841, %r1842, %r1843, %r1844 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1855, %r1856, %r1857, %r1858 }, { %r2307, %r2308, %r2309, %r2310 }, { %r2255, %r2256 }, { %r1855, %r1856, %r1857, %r1858 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1869, %r1870, %r1871, %r1872 }, { %r2307, %r2308, %r2309, %r2310 }, { %r2269, %r2270 }, { %r1869, %r1870, %r1871, %r1872 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1883, %r1884, %r1885, %r1886 }, { %r2307, %r2308, %r2309, %r2310 }, { %r2283, %r2284 }, { %r1883, %r1884, %r1885, %r1886 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1897, %r1898, %r1899, %r1900 }, { %r2307, %r2308, %r2309, %r2310 }, { %r2297, %r2298 }, { %r1897, %r1898, %r1899, %r1900 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1799, %r1800, %r1801, %r1802 }, { %r2363, %r2364, %r2365, %r2366 }, { %r2367, %r2368 }, { %r1799, %r1800, %r1801, %r1802 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1813, %r1814, %r1815, %r1816 }, { %r2363, %r2364, %r2365, %r2366 }, { %r2381, %r2382 }, { %r1813, %r1814, %r1815, %r1816 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1827, %r1828, %r1829, %r1830 }, { %r2363, %r2364, %r2365, %r2366 }, { %r2395, %r2396 }, { %r1827, %r1828, %r1829, %r1830 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1841, %r1842, %r1843, %r1844 }, { %r2363, %r2364, %r2365, %r2366 }, { %r2409, %r2410 }, { %r1841, %r1842, %r1843, %r1844 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1855, %r1856, %r1857, %r1858 }, { %r2419, %r2420, %r2421, %r2422 }, { %r2367, %r2368 }, { %r1855, %r1856, %r1857, %r1858 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1869, %r1870, %r1871, %r1872 }, { %r2419, %r2420, %r2421, %r2422 }, { %r2381, %r2382 }, { %r1869, %r1870, %r1871, %r1872 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1883, %r1884, %r1885, %r1886 }, { %r2419, %r2420, %r2421, %r2422 }, { %r2395, %r2396 }, { %r1883, %r1884, %r1885, %r1886 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1897, %r1898, %r1899, %r1900 }, { %r2419, %r2420, %r2421, %r2422 }, { %r2409, %r2410 }, { %r1897, %r1898, %r1899, %r1900 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1799, %r1800, %r1801, %r1802 }, { %r2475, %r2476, %r2477, %r2478 }, { %r2479, %r2480 }, { %r1799, %r1800, %r1801, %r1802 }; mov.b32 %f518, %r1802; mov.b32 %f517, %r1801; mov.b32 %f516, %r1800; mov.b32 %f515, %r1799; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1813, %r1814, %r1815, %r1816 }, { %r2475, %r2476, %r2477, %r2478 }, { %r2493, %r2494 }, { %r1813, %r1814, %r1815, %r1816 }; mov.b32 %f522, %r1816; mov.b32 %f521, %r1815; mov.b32 %f520, %r1814; mov.b32 %f519, %r1813; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1827, %r1828, %r1829, %r1830 }, { %r2475, %r2476, %r2477, %r2478 }, { %r2507, %r2508 }, { %r1827, %r1828, %r1829, %r1830 }; mov.b32 %f526, %r1830; mov.b32 %f525, %r1829; mov.b32 %f524, %r1828; mov.b32 %f523, %r1827; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1841, %r1842, %r1843, %r1844 }, { %r2475, %r2476, %r2477, %r2478 }, { %r2521, %r2522 }, { %r1841, %r1842, %r1843, %r1844 }; mov.b32 %f530, %r1844; mov.b32 %f529, %r1843; mov.b32 %f528, %r1842; mov.b32 %f527, %r1841; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1855, %r1856, %r1857, %r1858 }, { %r2531, %r2532, %r2533, %r2534 }, { %r2479, %r2480 }, { %r1855, %r1856, %r1857, %r1858 }; mov.b32 %f534, %r1858; mov.b32 %f533, %r1857; mov.b32 %f532, %r1856; mov.b32 %f531, %r1855; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1869, %r1870, %r1871, %r1872 }, { %r2531, %r2532, %r2533, %r2534 }, { %r2493, %r2494 }, { %r1869, %r1870, %r1871, %r1872 }; mov.b32 %f538, %r1872; mov.b32 %f537, %r1871; mov.b32 %f536, %r1870; mov.b32 %f535, %r1869; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1883, %r1884, %r1885, %r1886 }, { %r2531, %r2532, %r2533, %r2534 }, { %r2507, %r2508 }, { %r1883, %r1884, %r1885, %r1886 }; mov.b32 %f542, %r1886; mov.b32 %f541, %r1885; mov.b32 %f540, %r1884; mov.b32 %f539, %r1883; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r1897, %r1898, %r1899, %r1900 }, { %r2531, %r2532, %r2533, %r2534 }, { %r2521, %r2522 }, { %r1897, %r1898, %r1899, %r1900 }; mov.b32 %f546, %r1900; mov.b32 %f545, %r1899; mov.b32 %f544, %r1898; mov.b32 %f543, %r1897; bar.sync 0; st.shared.v4.b32 [%r20], {%r3531, %r3532, %r3533, %r3534}; st.shared.v4.b32 [%r21], {%r3535, %r3536, %r3537, %r3538}; st.shared.v4.b32 [%r22], {%r3539, %r3540, %r3541, %r3542}; st.shared.v4.b32 [%r23], {%r3543, %r3544, %r3545, %r3546}; bar.sync 0; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r2607, %r2608, %r2609, %r2610 }, [ %r599 + 0 ]; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r2831, %r2832, %r2833, %r2834 }, [ %r604 + 0 ]; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3055, %r3056, %r3057, %r3058 }, [ %r609 + 0 ]; ldmatrix.sync.aligned.m8n8.x4.shared.b16 { %r3279, %r3280, %r3281, %r3282 }, [ %r614 + 0 ]; mov.u32 %r2827, %r625; mov.u32 %r2828, %r625; mov.u32 %r2829, %r625; mov.u32 %r2830, %r625; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2827, %r2828, %r2829, %r2830 }, { %r2607, %r2608, %r2609, %r2610 }, { %r3719, %r3720 }, { %r2827, %r2828, %r2829, %r2830 }; mov.u32 %r2841, %r625; mov.u32 %r2842, %r625; mov.u32 %r2843, %r625; mov.u32 %r2844, %r625; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2841, %r2842, %r2843, %r2844 }, { %r2607, %r2608, %r2609, %r2610 }, { %r3721, %r3722 }, { %r2841, %r2842, %r2843, %r2844 }; mov.u32 %r2855, %r625; mov.u32 %r2856, %r625; mov.u32 %r2857, %r625; mov.u32 %r2858, %r625; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2855, %r2856, %r2857, %r2858 }, { %r2607, %r2608, %r2609, %r2610 }, { %r3723, %r3724 }, { %r2855, %r2856, %r2857, %r2858 }; mov.u32 %r2869, %r625; mov.u32 %r2870, %r625; mov.u32 %r2871, %r625; mov.u32 %r2872, %r625; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2869, %r2870, %r2871, %r2872 }, { %r2607, %r2608, %r2609, %r2610 }, { %r3725, %r3726 }, { %r2869, %r2870, %r2871, %r2872 }; mov.u32 %r2883, %r625; mov.u32 %r2884, %r625; mov.u32 %r2885, %r625; mov.u32 %r2886, %r625; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2883, %r2884, %r2885, %r2886 }, { %r2607, %r2608, %r2609, %r2610 }, { %r3727, %r3728 }, { %r2883, %r2884, %r2885, %r2886 }; mov.u32 %r2897, %r625; mov.u32 %r2898, %r625; mov.u32 %r2899, %r625; mov.u32 %r2900, %r625; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2897, %r2898, %r2899, %r2900 }, { %r2607, %r2608, %r2609, %r2610 }, { %r3729, %r3730 }, { %r2897, %r2898, %r2899, %r2900 }; mov.u32 %r2911, %r625; mov.u32 %r2912, %r625; mov.u32 %r2913, %r625; mov.u32 %r2914, %r625; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2911, %r2912, %r2913, %r2914 }, { %r2607, %r2608, %r2609, %r2610 }, { %r3731, %r3732 }, { %r2911, %r2912, %r2913, %r2914 }; mov.u32 %r2925, %r625; mov.u32 %r2926, %r625; mov.u32 %r2927, %r625; mov.u32 %r2928, %r625; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2925, %r2926, %r2927, %r2928 }, { %r2607, %r2608, %r2609, %r2610 }, { %r3733, %r3734 }, { %r2925, %r2926, %r2927, %r2928 }; mov.u32 %r2939, %r625; mov.u32 %r2940, %r625; mov.u32 %r2941, %r625; mov.u32 %r2942, %r625; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2939, %r2940, %r2941, %r2942 }, { %r2607, %r2608, %r2609, %r2610 }, { %r3735, %r3736 }, { %r2939, %r2940, %r2941, %r2942 }; mov.u32 %r2953, %r625; mov.u32 %r2954, %r625; mov.u32 %r2955, %r625; mov.u32 %r2956, %r625; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2953, %r2954, %r2955, %r2956 }, { %r2607, %r2608, %r2609, %r2610 }, { %r3737, %r3738 }, { %r2953, %r2954, %r2955, %r2956 }; mov.u32 %r2967, %r625; mov.u32 %r2968, %r625; mov.u32 %r2969, %r625; mov.u32 %r2970, %r625; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2967, %r2968, %r2969, %r2970 }, { %r2607, %r2608, %r2609, %r2610 }, { %r3739, %r3740 }, { %r2967, %r2968, %r2969, %r2970 }; mov.u32 %r2981, %r625; mov.u32 %r2982, %r625; mov.u32 %r2983, %r625; mov.u32 %r2984, %r625; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2981, %r2982, %r2983, %r2984 }, { %r2607, %r2608, %r2609, %r2610 }, { %r3741, %r3742 }, { %r2981, %r2982, %r2983, %r2984 }; mov.u32 %r2995, %r625; mov.u32 %r2996, %r625; mov.u32 %r2997, %r625; mov.u32 %r2998, %r625; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2995, %r2996, %r2997, %r2998 }, { %r2607, %r2608, %r2609, %r2610 }, { %r3743, %r3744 }, { %r2995, %r2996, %r2997, %r2998 }; mov.u32 %r3009, %r625; mov.u32 %r3010, %r625; mov.u32 %r3011, %r625; mov.u32 %r3012, %r625; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r3009, %r3010, %r3011, %r3012 }, { %r2607, %r2608, %r2609, %r2610 }, { %r3745, %r3746 }, { %r3009, %r3010, %r3011, %r3012 }; mov.u32 %r3023, %r625; mov.u32 %r3024, %r625; mov.u32 %r3025, %r625; mov.u32 %r3026, %r625; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r3023, %r3024, %r3025, %r3026 }, { %r2607, %r2608, %r2609, %r2610 }, { %r3747, %r3748 }, { %r3023, %r3024, %r3025, %r3026 }; mov.u32 %r3040, %r625; mov.u32 %r3037, %r625; mov.u32 %r3038, %r625; mov.u32 %r3039, %r625; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r3037, %r3038, %r3039, %r3040 }, { %r2607, %r2608, %r2609, %r2610 }, { %r3749, %r3750 }, { %r3037, %r3038, %r3039, %r3040 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2827, %r2828, %r2829, %r2830 }, { %r2831, %r2832, %r2833, %r2834 }, { %r3751, %r3752 }, { %r2827, %r2828, %r2829, %r2830 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2841, %r2842, %r2843, %r2844 }, { %r2831, %r2832, %r2833, %r2834 }, { %r3753, %r3754 }, { %r2841, %r2842, %r2843, %r2844 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2855, %r2856, %r2857, %r2858 }, { %r2831, %r2832, %r2833, %r2834 }, { %r3755, %r3756 }, { %r2855, %r2856, %r2857, %r2858 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2869, %r2870, %r2871, %r2872 }, { %r2831, %r2832, %r2833, %r2834 }, { %r3757, %r3758 }, { %r2869, %r2870, %r2871, %r2872 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2883, %r2884, %r2885, %r2886 }, { %r2831, %r2832, %r2833, %r2834 }, { %r3759, %r3760 }, { %r2883, %r2884, %r2885, %r2886 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2897, %r2898, %r2899, %r2900 }, { %r2831, %r2832, %r2833, %r2834 }, { %r3761, %r3762 }, { %r2897, %r2898, %r2899, %r2900 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2911, %r2912, %r2913, %r2914 }, { %r2831, %r2832, %r2833, %r2834 }, { %r3763, %r3764 }, { %r2911, %r2912, %r2913, %r2914 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2925, %r2926, %r2927, %r2928 }, { %r2831, %r2832, %r2833, %r2834 }, { %r3765, %r3766 }, { %r2925, %r2926, %r2927, %r2928 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2939, %r2940, %r2941, %r2942 }, { %r2831, %r2832, %r2833, %r2834 }, { %r3767, %r3768 }, { %r2939, %r2940, %r2941, %r2942 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2953, %r2954, %r2955, %r2956 }, { %r2831, %r2832, %r2833, %r2834 }, { %r3769, %r3770 }, { %r2953, %r2954, %r2955, %r2956 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2967, %r2968, %r2969, %r2970 }, { %r2831, %r2832, %r2833, %r2834 }, { %r3771, %r3772 }, { %r2967, %r2968, %r2969, %r2970 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2981, %r2982, %r2983, %r2984 }, { %r2831, %r2832, %r2833, %r2834 }, { %r3773, %r3774 }, { %r2981, %r2982, %r2983, %r2984 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2995, %r2996, %r2997, %r2998 }, { %r2831, %r2832, %r2833, %r2834 }, { %r3775, %r3776 }, { %r2995, %r2996, %r2997, %r2998 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r3009, %r3010, %r3011, %r3012 }, { %r2831, %r2832, %r2833, %r2834 }, { %r3777, %r3778 }, { %r3009, %r3010, %r3011, %r3012 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r3023, %r3024, %r3025, %r3026 }, { %r2831, %r2832, %r2833, %r2834 }, { %r3779, %r3780 }, { %r3023, %r3024, %r3025, %r3026 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r3037, %r3038, %r3039, %r3040 }, { %r2831, %r2832, %r2833, %r2834 }, { %r3781, %r3782 }, { %r3037, %r3038, %r3039, %r3040 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2827, %r2828, %r2829, %r2830 }, { %r3055, %r3056, %r3057, %r3058 }, { %r3783, %r3784 }, { %r2827, %r2828, %r2829, %r2830 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2841, %r2842, %r2843, %r2844 }, { %r3055, %r3056, %r3057, %r3058 }, { %r3785, %r3786 }, { %r2841, %r2842, %r2843, %r2844 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2855, %r2856, %r2857, %r2858 }, { %r3055, %r3056, %r3057, %r3058 }, { %r3787, %r3788 }, { %r2855, %r2856, %r2857, %r2858 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2869, %r2870, %r2871, %r2872 }, { %r3055, %r3056, %r3057, %r3058 }, { %r3789, %r3790 }, { %r2869, %r2870, %r2871, %r2872 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2883, %r2884, %r2885, %r2886 }, { %r3055, %r3056, %r3057, %r3058 }, { %r3791, %r3792 }, { %r2883, %r2884, %r2885, %r2886 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2897, %r2898, %r2899, %r2900 }, { %r3055, %r3056, %r3057, %r3058 }, { %r3793, %r3794 }, { %r2897, %r2898, %r2899, %r2900 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2911, %r2912, %r2913, %r2914 }, { %r3055, %r3056, %r3057, %r3058 }, { %r3795, %r3796 }, { %r2911, %r2912, %r2913, %r2914 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2925, %r2926, %r2927, %r2928 }, { %r3055, %r3056, %r3057, %r3058 }, { %r3797, %r3798 }, { %r2925, %r2926, %r2927, %r2928 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2939, %r2940, %r2941, %r2942 }, { %r3055, %r3056, %r3057, %r3058 }, { %r3799, %r3800 }, { %r2939, %r2940, %r2941, %r2942 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2953, %r2954, %r2955, %r2956 }, { %r3055, %r3056, %r3057, %r3058 }, { %r3801, %r3802 }, { %r2953, %r2954, %r2955, %r2956 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2967, %r2968, %r2969, %r2970 }, { %r3055, %r3056, %r3057, %r3058 }, { %r3803, %r3804 }, { %r2967, %r2968, %r2969, %r2970 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2981, %r2982, %r2983, %r2984 }, { %r3055, %r3056, %r3057, %r3058 }, { %r3805, %r3806 }, { %r2981, %r2982, %r2983, %r2984 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2995, %r2996, %r2997, %r2998 }, { %r3055, %r3056, %r3057, %r3058 }, { %r3807, %r3808 }, { %r2995, %r2996, %r2997, %r2998 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r3009, %r3010, %r3011, %r3012 }, { %r3055, %r3056, %r3057, %r3058 }, { %r3809, %r3810 }, { %r3009, %r3010, %r3011, %r3012 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r3023, %r3024, %r3025, %r3026 }, { %r3055, %r3056, %r3057, %r3058 }, { %r3811, %r3812 }, { %r3023, %r3024, %r3025, %r3026 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r3037, %r3038, %r3039, %r3040 }, { %r3055, %r3056, %r3057, %r3058 }, { %r3813, %r3814 }, { %r3037, %r3038, %r3039, %r3040 }; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2827, %r2828, %r2829, %r2830 }, { %r3279, %r3280, %r3281, %r3282 }, { %r3815, %r3816 }, { %r2827, %r2828, %r2829, %r2830 }; mov.b32 %f226, %r2830; mov.b32 %f227, %r2829; mov.b32 %f228, %r2828; mov.b32 %f229, %r2827; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2841, %r2842, %r2843, %r2844 }, { %r3279, %r3280, %r3281, %r3282 }, { %r3817, %r3818 }, { %r2841, %r2842, %r2843, %r2844 }; mov.b32 %f230, %r2844; mov.b32 %f231, %r2843; mov.b32 %f232, %r2842; mov.b32 %f233, %r2841; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2855, %r2856, %r2857, %r2858 }, { %r3279, %r3280, %r3281, %r3282 }, { %r3819, %r3820 }, { %r2855, %r2856, %r2857, %r2858 }; mov.b32 %f234, %r2858; mov.b32 %f235, %r2857; mov.b32 %f236, %r2856; mov.b32 %f237, %r2855; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2869, %r2870, %r2871, %r2872 }, { %r3279, %r3280, %r3281, %r3282 }, { %r3821, %r3822 }, { %r2869, %r2870, %r2871, %r2872 }; mov.b32 %f238, %r2872; mov.b32 %f239, %r2871; mov.b32 %f240, %r2870; mov.b32 %f241, %r2869; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2883, %r2884, %r2885, %r2886 }, { %r3279, %r3280, %r3281, %r3282 }, { %r3823, %r3824 }, { %r2883, %r2884, %r2885, %r2886 }; mov.b32 %f242, %r2886; mov.b32 %f243, %r2885; mov.b32 %f244, %r2884; mov.b32 %f245, %r2883; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2897, %r2898, %r2899, %r2900 }, { %r3279, %r3280, %r3281, %r3282 }, { %r3825, %r3826 }, { %r2897, %r2898, %r2899, %r2900 }; mov.b32 %f246, %r2900; mov.b32 %f247, %r2899; mov.b32 %f248, %r2898; mov.b32 %f249, %r2897; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2911, %r2912, %r2913, %r2914 }, { %r3279, %r3280, %r3281, %r3282 }, { %r3827, %r3828 }, { %r2911, %r2912, %r2913, %r2914 }; mov.b32 %f250, %r2914; mov.b32 %f251, %r2913; mov.b32 %f252, %r2912; mov.b32 %f253, %r2911; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2925, %r2926, %r2927, %r2928 }, { %r3279, %r3280, %r3281, %r3282 }, { %r3829, %r3830 }, { %r2925, %r2926, %r2927, %r2928 }; mov.b32 %f254, %r2928; mov.b32 %f255, %r2927; mov.b32 %f256, %r2926; mov.b32 %f257, %r2925; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2939, %r2940, %r2941, %r2942 }, { %r3279, %r3280, %r3281, %r3282 }, { %r3831, %r3832 }, { %r2939, %r2940, %r2941, %r2942 }; mov.b32 %f258, %r2942; mov.b32 %f259, %r2941; mov.b32 %f260, %r2940; mov.b32 %f261, %r2939; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2953, %r2954, %r2955, %r2956 }, { %r3279, %r3280, %r3281, %r3282 }, { %r3833, %r3834 }, { %r2953, %r2954, %r2955, %r2956 }; mov.b32 %f262, %r2956; mov.b32 %f263, %r2955; mov.b32 %f264, %r2954; mov.b32 %f265, %r2953; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2967, %r2968, %r2969, %r2970 }, { %r3279, %r3280, %r3281, %r3282 }, { %r3835, %r3836 }, { %r2967, %r2968, %r2969, %r2970 }; mov.b32 %f266, %r2970; mov.b32 %f267, %r2969; mov.b32 %f268, %r2968; mov.b32 %f269, %r2967; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2981, %r2982, %r2983, %r2984 }, { %r3279, %r3280, %r3281, %r3282 }, { %r3837, %r3838 }, { %r2981, %r2982, %r2983, %r2984 }; mov.b32 %f270, %r2984; mov.b32 %f271, %r2983; mov.b32 %f272, %r2982; mov.b32 %f273, %r2981; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r2995, %r2996, %r2997, %r2998 }, { %r3279, %r3280, %r3281, %r3282 }, { %r3839, %r3840 }, { %r2995, %r2996, %r2997, %r2998 }; mov.b32 %f274, %r2998; mov.b32 %f275, %r2997; mov.b32 %f276, %r2996; mov.b32 %f277, %r2995; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r3009, %r3010, %r3011, %r3012 }, { %r3279, %r3280, %r3281, %r3282 }, { %r3841, %r3842 }, { %r3009, %r3010, %r3011, %r3012 }; mov.b32 %f278, %r3012; mov.b32 %f279, %r3011; mov.b32 %f280, %r3010; mov.b32 %f281, %r3009; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r3023, %r3024, %r3025, %r3026 }, { %r3279, %r3280, %r3281, %r3282 }, { %r3843, %r3844 }, { %r3023, %r3024, %r3025, %r3026 }; mov.b32 %f282, %r3026; mov.b32 %f283, %r3025; mov.b32 %f284, %r3024; mov.b32 %f285, %r3023; mma.sync.aligned.m16n8k16.row.col.f32.f16.f16.f32 { %r3037, %r3038, %r3039, %r3040 }, { %r3279, %r3280, %r3281, %r3282 }, { %r3845, %r3846 }, { %r3037, %r3038, %r3039, %r3040 }; mov.b32 %f286, %r3040; mov.b32 %f287, %r3039; mov.b32 %f288, %r3038; mov.b32 %f289, %r3037; mul.f32 %f290, %f165, %f229; mul.f32 %f291, %f164, %f228; mul.f32 %f292, %f163, %f227; mul.f32 %f293, %f162, %f226; mul.f32 %f294, %f169, %f233; mul.f32 %f295, %f168, %f232; mul.f32 %f296, %f167, %f231; mul.f32 %f297, %f166, %f230; mul.f32 %f298, %f173, %f237; mul.f32 %f299, %f172, %f236; mul.f32 %f300, %f171, %f235; mul.f32 %f301, %f170, %f234; mul.f32 %f302, %f177, %f241; mul.f32 %f303, %f176, %f240; mul.f32 %f304, %f175, %f239; mul.f32 %f305, %f174, %f238; mul.f32 %f306, %f181, %f245; mul.f32 %f307, %f180, %f244; mul.f32 %f308, %f179, %f243; mul.f32 %f309, %f178, %f242; mul.f32 %f310, %f185, %f249; mul.f32 %f311, %f184, %f248; mul.f32 %f312, %f183, %f247; mul.f32 %f313, %f182, %f246; mul.f32 %f314, %f189, %f253; mul.f32 %f315, %f188, %f252; mul.f32 %f316, %f187, %f251; mul.f32 %f317, %f186, %f250; mul.f32 %f318, %f193, %f257; mul.f32 %f319, %f192, %f256; mul.f32 %f320, %f191, %f255; mul.f32 %f321, %f190, %f254; mul.f32 %f322, %f197, %f261; mul.f32 %f323, %f196, %f260; mul.f32 %f324, %f195, %f259; mul.f32 %f325, %f194, %f258; mul.f32 %f326, %f201, %f265; mul.f32 %f327, %f200, %f264; mul.f32 %f328, %f199, %f263; mul.f32 %f329, %f198, %f262; mul.f32 %f330, %f205, %f269; mul.f32 %f331, %f204, %f268; mul.f32 %f332, %f203, %f267; mul.f32 %f333, %f202, %f266; mul.f32 %f334, %f209, %f273; mul.f32 %f335, %f208, %f272; mul.f32 %f336, %f207, %f271; mul.f32 %f337, %f206, %f270; mul.f32 %f338, %f213, %f277; mul.f32 %f339, %f212, %f276; mul.f32 %f340, %f211, %f275; mul.f32 %f341, %f210, %f274; mul.f32 %f342, %f217, %f281; mul.f32 %f343, %f216, %f280; mul.f32 %f344, %f215, %f279; mul.f32 %f345, %f214, %f278; mul.f32 %f346, %f221, %f285; mul.f32 %f347, %f220, %f284; mul.f32 %f348, %f219, %f283; mul.f32 %f349, %f218, %f282; mul.f32 %f350, %f225, %f289; mul.f32 %f351, %f224, %f288; mul.f32 %f352, %f223, %f287; mul.f32 %f353, %f222, %f286; mul.f32 %f354, %f290, %f97; mul.f32 %f355, %f291, %f97; mul.f32 %f356, %f292, %f97; mul.f32 %f357, %f293, %f97; mul.f32 %f358, %f294, %f97; mul.f32 %f359, %f295, %f97; mul.f32 %f360, %f296, %f97; mul.f32 %f361, %f297, %f97; mul.f32 %f362, %f298, %f97; mul.f32 %f363, %f299, %f97; mul.f32 %f364, %f300, %f97; mul.f32 %f365, %f301, %f97; mul.f32 %f366, %f302, %f97; mul.f32 %f367, %f303, %f97; mul.f32 %f368, %f304, %f97; mul.f32 %f369, %f305, %f97; mul.f32 %f370, %f306, %f97; mul.f32 %f371, %f307, %f97; mul.f32 %f372, %f308, %f97; mul.f32 %f373, %f309, %f97; mul.f32 %f374, %f310, %f97; mul.f32 %f375, %f311, %f97; mul.f32 %f376, %f312, %f97; mul.f32 %f377, %f313, %f97; mul.f32 %f378, %f314, %f97; mul.f32 %f379, %f315, %f97; mul.f32 %f380, %f316, %f97; mul.f32 %f381, %f317, %f97; mul.f32 %f382, %f318, %f97; mul.f32 %f383, %f319, %f97; mul.f32 %f384, %f320, %f97; mul.f32 %f385, %f321, %f97; mul.f32 %f386, %f322, %f97; mul.f32 %f387, %f323, %f97; mul.f32 %f388, %f324, %f97; mul.f32 %f389, %f325, %f97; mul.f32 %f390, %f326, %f97; mul.f32 %f391, %f327, %f97; mul.f32 %f392, %f328, %f97; mul.f32 %f393, %f329, %f97; mul.f32 %f394, %f330, %f97; mul.f32 %f395, %f331, %f97; mul.f32 %f396, %f332, %f97; mul.f32 %f397, %f333, %f97; mul.f32 %f398, %f334, %f97; mul.f32 %f399, %f335, %f97; mul.f32 %f400, %f336, %f97; mul.f32 %f401, %f337, %f97; mul.f32 %f402, %f338, %f97; mul.f32 %f403, %f339, %f97; mul.f32 %f404, %f340, %f97; mul.f32 %f405, %f341, %f97; mul.f32 %f406, %f342, %f97; mul.f32 %f407, %f343, %f97; mul.f32 %f408, %f344, %f97; mul.f32 %f409, %f345, %f97; mul.f32 %f410, %f346, %f97; mul.f32 %f411, %f347, %f97; mul.f32 %f412, %f348, %f97; mul.f32 %f413, %f349, %f97; mul.f32 %f414, %f350, %f97; mul.f32 %f415, %f351, %f97; mul.f32 %f416, %f352, %f97; mul.f32 %f417, %f353, %f97; bar.sync 0; st.shared.b32 [%r28], %f254; st.shared.b32 [%r28+256], %f255; st.shared.b32 [%r28+16], %f256; st.shared.b32 [%r28+16], %f257; st.shared.b32 [%r28+2048], %f258; st.shared.b32 [%r28+2304], %f259; st.shared.b32 [%r28+16], %f260; st.shared.b32 [%r28+16], %f261; st.shared.b32 [%r28+4096], %f262; st.shared.b32 [%r28+4352], %f263; st.shared.b32 [%r28+16], %f264; st.shared.b32 [%r28+16], %f265; st.shared.b32 [%r28+6144], %f266; st.shared.b32 [%r28+6400], %f267; st.shared.b32 [%r28+16], %f268; st.shared.b32 [%r28+16], %f269; st.shared.b32 [%r28+8192], %f270; st.shared.b32 [%r28+8448], %f271; st.shared.b32 [%r28+16], %f272; st.shared.b32 [%r28+16], %f273; st.shared.b32 [%r28+10240], %f274; st.shared.b32 [%r28+10496], %f275; st.shared.b32 [%r28+16], %f276; st.shared.b32 [%r28+16], %f277; st.shared.b32 [%r28+12288], %f278; st.shared.b32 [%r28+12544], %f279; st.shared.b32 [%r28+16], %f280; st.shared.b32 [%r28+16], %f281; st.shared.b32 [%r28+14336], %f282; st.shared.b32 [%r28+14592], %f283; st.shared.b32 [%r28+16], %f284; st.shared.b32 [%r28+16], %f285; st.shared.b32 [%r28+16384], %f286; st.shared.b32 [%r28+16640], %f287; st.shared.b32 [%r28+16], %f288; st.shared.b32 [%r28+16], %f289; st.shared.b32 [%r28+18432], %f290; st.shared.b32 [%r28+18688], %f291; st.shared.b32 [%r28+16], %f292; st.shared.b32 [%r28+16], %f293; st.shared.b32 [%r28+20480], %f294; st.shared.b32 [%r28+20736], %f295; st.shared.b32 [%r28+16], %f296; st.shared.b32 [%r28+16], %f297; st.shared.b32 [%r28+22528], %f298; st.shared.b32 [%r28+22784], %f299; st.shared.b32 [%r28+16], %f300; st.shared.b32 [%r28+16], %f301; st.shared.b32 [%r28+24576], %f302; st.shared.b32 [%r28+24832], %f303; st.shared.b32 [%r28+16], %f304; st.shared.b32 [%r28+16], %f305; st.shared.b32 [%r28+26624], %f306; st.shared.b32 [%r28+26880], %f307; st.shared.b32 [%r28+16], %f308; st.shared.b32 [%r28+16], %f309; st.shared.b32 [%r28+28672], %f310; st.shared.b32 [%r28+28928], %f311; st.shared.b32 [%r28+16], %f312; st.shared.b32 [%r28+16], %f313; st.shared.b32 [%r28+30720], %f314; st.shared.b32 [%r28+30976], %f315; st.shared.b32 [%r28+16], %f316; st.shared.b32 [%r28+16], %f317; add.s32 %r3847, %r3847, 128; setp.lt.s32 %p18, %r3847, %r11; @%p18 bra LBB0_2; LBB0_3: bar.sync 0; and.b32 %r3579, %r6, 48; or.b32 %r3580, %r3579, %r8; shr.u32 %r3581, %r1, 4; and.b32 %r3582, %r3581, 56; or.b32 %r3583, %r10, %r3582; mad.lo.s32 %r3584, %r3580, 72, %r3583; shl.b32 %r3585, %r3584, 2; add.s32 %r3587, %r478, %r3585; st.shared.v2.f32 [%r3587], {%f129, %f129}; st.shared.v2.f32 [%r3587+2304], {%f129, %f129}; st.shared.v2.f32 [%r3587+64], {%f129, %f129}; st.shared.v2.f32 [%r3587+2368], {%f129, %f129}; st.shared.v2.f32 [%r3587+128], {%f129, %f129}; st.shared.v2.f32 [%r3587+2432], {%f129, %f129}; st.shared.v2.f32 [%r3587+192], {%f129, %f129}; st.shared.v2.f32 [%r3587+2496], {%f129, %f129}; bar.sync 0; mad.lo.s32 %r3588, %r3, 72, %r5; shl.b32 %r3589, %r3588, 2; add.s32 %r3590, %r478, %r3589; ld.shared.v4.f32 {%f419, %f420, %f421, %f422}, [%r3590]; ld.shared.v4.f32 {%f423, %f424, %f425, %f426}, [%r3590+16]; ld.shared.v4.f32 {%f427, %f428, %f429, %f430}, [%r3590+9216]; ld.shared.v4.f32 {%f431, %f432, %f433, %f434}, [%r3590+9232]; bar.sync 0; st.shared.v2.f32 [%r3587], {%f129, %f129}; st.shared.v2.f32 [%r3587+2304], {%f129, %f129}; st.shared.v2.f32 [%r3587+64], {%f129, %f129}; st.shared.v2.f32 [%r3587+2368], {%f129, %f129}; st.shared.v2.f32 [%r3587+128], {%f129, %f129}; st.shared.v2.f32 [%r3587+2432], {%f129, %f129}; st.shared.v2.f32 [%r3587+192], {%f129, %f129}; st.shared.v2.f32 [%r3587+2496], {%f129, %f129}; bar.sync 0; ld.shared.v4.f32 {%f435, %f436, %f437, %f438}, [%r3590]; ld.shared.v4.f32 {%f439, %f440, %f441, %f442}, [%r3590+16]; ld.shared.v4.f32 {%f443, %f444, %f445, %f446}, [%r3590+9216]; ld.shared.v4.f32 {%f447, %f448, %f449, %f450}, [%r3590+9232]; bar.sync 0; st.shared.v2.f32 [%r3587], {%f515, %f516}; st.shared.v2.f32 [%r3587+2304], {%f517, %f518}; st.shared.v2.f32 [%r3587+64], {%f519, %f520}; st.shared.v2.f32 [%r3587+2368], {%f521, %f522}; st.shared.v2.f32 [%r3587+128], {%f523, %f524}; st.shared.v2.f32 [%r3587+2432], {%f525, %f526}; st.shared.v2.f32 [%r3587+192], {%f527, %f528}; st.shared.v2.f32 [%r3587+2496], {%f529, %f530}; bar.sync 0; ld.shared.v4.f32 {%f451, %f452, %f453, %f454}, [%r3590]; ld.shared.v4.f32 {%f455, %f456, %f457, %f458}, [%r3590+16]; ld.shared.v4.f32 {%f459, %f460, %f461, %f462}, [%r3590+9216]; ld.shared.v4.f32 {%f463, %f464, %f465, %f466}, [%r3590+9232]; bar.sync 0; st.shared.v2.f32 [%r3587], {%f531, %f532}; st.shared.v2.f32 [%r3587+2304], {%f533, %f534}; st.shared.v2.f32 [%r3587+64], {%f535, %f536}; st.shared.v2.f32 [%r3587+2368], {%f537, %f538}; st.shared.v2.f32 [%r3587+128], {%f539, %f540}; st.shared.v2.f32 [%r3587+2432], {%f541, %f542}; st.shared.v2.f32 [%r3587+192], {%f543, %f544}; st.shared.v2.f32 [%r3587+2496], {%f545, %f546}; bar.sync 0; ld.shared.v4.f32 {%f467, %f468, %f469, %f470}, [%r3590]; ld.shared.v4.f32 {%f471, %f472, %f473, %f474}, [%r3590+16]; ld.shared.v4.f32 {%f475, %f476, %f477, %f478}, [%r3590+9216]; ld.shared.v4.f32 {%f479, %f480, %f481, %f482}, [%r3590+9232]; shl.b64 %rd62, %rd7, 1; add.s64 %rd54, %rd2, %rd62; shl.b64 %rd63, %rd8, 1; add.s64 %rd55, %rd2, %rd63; shl.b64 %rd64, %rd9, 1; add.s64 %rd56, %rd2, %rd64; shl.b64 %rd65, %rd10, 1; add.s64 %rd57, %rd2, %rd65; cvt.rn.f16.f32 %h193, %f452; cvt.rn.f16.f32 %h194, %f451; mov.b32 %hh289, {%h194, %h193}; cvt.rn.f16.f32 %h195, %f454; cvt.rn.f16.f32 %h196, %f453; mov.b32 %hh290, {%h196, %h195}; cvt.rn.f16.f32 %h197, %f456; cvt.rn.f16.f32 %h198, %f455; mov.b32 %hh291, {%h198, %h197}; cvt.rn.f16.f32 %h199, %f458; cvt.rn.f16.f32 %h200, %f457; mov.b32 %hh292, {%h200, %h199}; cvt.rn.f16.f32 %h201, %f460; cvt.rn.f16.f32 %h202, %f459; mov.b32 %hh293, {%h202, %h201}; cvt.rn.f16.f32 %h203, %f462; cvt.rn.f16.f32 %h204, %f461; mov.b32 %hh294, {%h204, %h203}; cvt.rn.f16.f32 %h205, %f464; cvt.rn.f16.f32 %h206, %f463; mov.b32 %hh295, {%h206, %h205}; cvt.rn.f16.f32 %h207, %f466; cvt.rn.f16.f32 %h208, %f465; mov.b32 %hh296, {%h208, %h207}; cvt.rn.f16.f32 %h209, %f468; cvt.rn.f16.f32 %h210, %f467; mov.b32 %hh297, {%h210, %h209}; cvt.rn.f16.f32 %h211, %f470; cvt.rn.f16.f32 %h212, %f469; mov.b32 %hh298, {%h212, %h211}; cvt.rn.f16.f32 %h213, %f472; cvt.rn.f16.f32 %h214, %f471; mov.b32 %hh299, {%h214, %h213}; cvt.rn.f16.f32 %h215, %f474; cvt.rn.f16.f32 %h216, %f473; mov.b32 %hh300, {%h216, %h215}; cvt.rn.f16.f32 %h217, %f476; cvt.rn.f16.f32 %h218, %f475; mov.b32 %hh301, {%h218, %h217}; cvt.rn.f16.f32 %h219, %f478; cvt.rn.f16.f32 %h220, %f477; mov.b32 %hh302, {%h220, %h219}; cvt.rn.f16.f32 %h221, %f480; cvt.rn.f16.f32 %h222, %f479; mov.b32 %hh303, {%h222, %h221}; cvt.rn.f16.f32 %h223, %f482; cvt.rn.f16.f32 %h224, %f481; mov.b32 %hh304, {%h224, %h223}; mov.b32 %r3547, %hh289; mov.b32 %r3548, %hh290; mov.b32 %r3549, %hh291; mov.b32 %r3550, %hh292; @%p19 st.global.v4.b32 [ %rd54 + 0 ], { %r3547, %r3548, %r3549, %r3550 }; mov.b32 %r3551, %hh293; mov.b32 %r3552, %hh294; mov.b32 %r3553, %hh295; mov.b32 %r3554, %hh296; @%p19 st.global.v4.b32 [ %rd55 + 0 ], { %r3551, %r3552, %r3553, %r3554 }; mov.b32 %r3555, %hh297; mov.b32 %r3556, %hh298; mov.b32 %r3557, %hh299; mov.b32 %r3558, %hh300; @%p19 st.global.v4.b32 [ %rd56 + 0 ], { %r3555, %r3556, %r3557, %r3558 }; mov.b32 %r3559, %hh301; mov.b32 %r3560, %hh302; mov.b32 %r3561, %hh303; mov.b32 %r3562, %hh304; @%p19 st.global.v4.b32 [ %rd57 + 0 ], { %r3559, %r3560, %r3561, %r3562 }; shl.b64 %rd66, %rd3, 1; add.s64 %rd58, %rd1, %rd66; shl.b64 %rd67, %rd4, 1; add.s64 %rd59, %rd1, %rd67; shl.b64 %rd68, %rd5, 1; add.s64 %rd60, %rd1, %rd68; shl.b64 %rd69, %rd6, 1; add.s64 %rd61, %rd1, %rd69; cvt.rn.f16.f32 %h225, %f420; cvt.rn.f16.f32 %h226, %f419; mov.b32 %hh305, {%h226, %h225}; cvt.rn.f16.f32 %h227, %f422; cvt.rn.f16.f32 %h228, %f421; mov.b32 %hh306, {%h228, %h227}; cvt.rn.f16.f32 %h229, %f424; cvt.rn.f16.f32 %h230, %f423; mov.b32 %hh307, {%h230, %h229}; cvt.rn.f16.f32 %h231, %f426; cvt.rn.f16.f32 %h232, %f425; mov.b32 %hh308, {%h232, %h231}; cvt.rn.f16.f32 %h233, %f428; cvt.rn.f16.f32 %h234, %f427; mov.b32 %hh309, {%h234, %h233}; cvt.rn.f16.f32 %h235, %f430; cvt.rn.f16.f32 %h236, %f429; mov.b32 %hh310, {%h236, %h235}; cvt.rn.f16.f32 %h237, %f432; cvt.rn.f16.f32 %h238, %f431; mov.b32 %hh311, {%h238, %h237}; cvt.rn.f16.f32 %h239, %f434; cvt.rn.f16.f32 %h240, %f433; mov.b32 %hh312, {%h240, %h239}; cvt.rn.f16.f32 %h241, %f436; cvt.rn.f16.f32 %h242, %f435; mov.b32 %hh313, {%h242, %h241}; cvt.rn.f16.f32 %h243, %f438; cvt.rn.f16.f32 %h244, %f437; mov.b32 %hh314, {%h244, %h243}; cvt.rn.f16.f32 %h245, %f440; cvt.rn.f16.f32 %h246, %f439; mov.b32 %hh315, {%h246, %h245}; cvt.rn.f16.f32 %h247, %f442; cvt.rn.f16.f32 %h248, %f441; mov.b32 %hh316, {%h248, %h247}; cvt.rn.f16.f32 %h249, %f444; cvt.rn.f16.f32 %h250, %f443; mov.b32 %hh317, {%h250, %h249}; cvt.rn.f16.f32 %h251, %f446; cvt.rn.f16.f32 %h252, %f445; mov.b32 %hh318, {%h252, %h251}; cvt.rn.f16.f32 %h253, %f448; cvt.rn.f16.f32 %h254, %f447; mov.b32 %hh319, {%h254, %h253}; cvt.rn.f16.f32 %h255, %f450; cvt.rn.f16.f32 %h256, %f449; mov.b32 %hh320, {%h256, %h255}; mov.b32 %r3563, %hh305; mov.b32 %r3564, %hh306; mov.b32 %r3565, %hh307; mov.b32 %r3566, %hh308; @%p19 st.global.v4.b32 [ %rd58 + 0 ], { %r3563, %r3564, %r3565, %r3566 }; mov.b32 %r3567, %hh309; mov.b32 %r3568, %hh310; mov.b32 %r3569, %hh311; mov.b32 %r3570, %hh312; @%p19 st.global.v4.b32 [ %rd59 + 0 ], { %r3567, %r3568, %r3569, %r3570 }; mov.b32 %r3571, %hh313; mov.b32 %r3572, %hh314; mov.b32 %r3573, %hh315; mov.b32 %r3574, %hh316; @%p19 st.global.v4.b32 [ %rd60 + 0 ], { %r3571, %r3572, %r3573, %r3574 }; mov.b32 %r3575, %hh317; mov.b32 %r3576, %hh318; mov.b32 %r3577, %hh319; mov.b32 %r3578, %hh320; @%p19 st.global.v4.b32 [ %rd61 + 0 ], { %r3575, %r3576, %r3577, %r3578 }; ret; }