/aosp_15_r20/external/libaom/aom_dsp/x86/ |
H A D | common_avx2.h | 93 tr0_0 = _mm256_unpacklo_epi64(tr1_0, tr1_4); in mm256_transpose_16x16() 95 tr0_2 = _mm256_unpacklo_epi64(tr1_1, tr1_5); in mm256_transpose_16x16() 97 tr0_4 = _mm256_unpacklo_epi64(tr1_2, tr1_6); in mm256_transpose_16x16() 99 tr0_6 = _mm256_unpacklo_epi64(tr1_3, tr1_7); in mm256_transpose_16x16() 102 tr0_8 = _mm256_unpacklo_epi64(tr1_8, tr1_c); in mm256_transpose_16x16() 104 tr0_a = _mm256_unpacklo_epi64(tr1_9, tr1_d); in mm256_transpose_16x16() 106 tr0_c = _mm256_unpacklo_epi64(tr1_a, tr1_e); in mm256_transpose_16x16() 108 tr0_e = _mm256_unpacklo_epi64(tr1_b, tr1_f); in mm256_transpose_16x16()
|
H A D | txfm_common_avx2.h | 149 out[2 * i] = _mm256_unpacklo_epi64(u[2 * i], u[2 * i + 4]); in transpose2_8x8_avx2() 152 out[2 * i + 4] = _mm256_unpacklo_epi64(u[2 * i + 1], u[2 * i + 5]); in transpose2_8x8_avx2() 218 out[0] = _mm256_unpacklo_epi64(b0, b1); in transpose_16bit_16x8_avx2() 220 out[2] = _mm256_unpacklo_epi64(b4, b5); in transpose_16bit_16x8_avx2() 222 out[4] = _mm256_unpacklo_epi64(b2, b3); in transpose_16bit_16x8_avx2() 224 out[6] = _mm256_unpacklo_epi64(b6, b7); in transpose_16bit_16x8_avx2()
|
H A D | avg_intrin_avx2.c | 84 in[0] = _mm256_unpacklo_epi64(b0, b1); in hadamard_col8x2_avx2() 86 in[2] = _mm256_unpacklo_epi64(b2, b3); in hadamard_col8x2_avx2() 88 in[4] = _mm256_unpacklo_epi64(b4, b5); in hadamard_col8x2_avx2() 90 in[6] = _mm256_unpacklo_epi64(b6, b7); in hadamard_col8x2_avx2() 347 b0 = _mm256_unpacklo_epi64(a0, a1); in highbd_hadamard_col8_avx2() 348 b1 = _mm256_unpacklo_epi64(a4, a5); in highbd_hadamard_col8_avx2() 351 b4 = _mm256_unpacklo_epi64(a2, a3); in highbd_hadamard_col8_avx2() 352 b5 = _mm256_unpacklo_epi64(a6, a7); in highbd_hadamard_col8_avx2()
|
H A D | intrapred_avx2.c | 194 d[0] = _mm256_unpacklo_epi64(ww0, ww1); // 00 10 20 30 40 50 60 70 in highbd_transpose4x16_avx2() 200 d[2] = _mm256_unpacklo_epi64(ww0, ww1); // 02 12 22 32 42 52 62 72 in highbd_transpose4x16_avx2() 216 d[0] = _mm256_unpacklo_epi64(ww0, ww1); // 00 10 20 30 40 50 60 70 in highbd_transpose8x16_16x8_avx2() 222 d[2] = _mm256_unpacklo_epi64(ww0, ww1); // 02 12 22 32 42 52 62 72 in highbd_transpose8x16_16x8_avx2() 233 d[4] = _mm256_unpacklo_epi64(ww0, ww1); // 04 14 24 34 44 54 64 74 in highbd_transpose8x16_16x8_avx2() 239 d[6] = _mm256_unpacklo_epi64(ww0, ww1); // 06 16 26 36 46 56 66 76 in highbd_transpose8x16_16x8_avx2() 254 dd[0] = _mm256_unpacklo_epi64(ww0, ww1); in highbd_transpose16x16_avx2() 260 dd[2] = _mm256_unpacklo_epi64(ww0, ww1); in highbd_transpose16x16_avx2() 271 dd[4] = _mm256_unpacklo_epi64(ww0, ww1); in highbd_transpose16x16_avx2() 277 dd[6] = _mm256_unpacklo_epi64(ww0, ww1); in highbd_transpose16x16_avx2() [all …]
|
H A D | variance_avx2.c | 622 __m256i tmp0_16x16 = _mm256_unpacklo_epi64(src0_16x16, src1_16x16); in mse_4xh_quad_16bit_avx2() 626 __m256i tmp2_16x16 = _mm256_unpacklo_epi64(src2_16x16, src3_16x16); in mse_4xh_quad_16bit_avx2()
|
/aosp_15_r20/external/ruy/ruy/ |
H A D | pack_avx2_fma.cc | 179 t0 = _mm256_unpacklo_epi64(r0, r1); 180 t4 = _mm256_unpacklo_epi64(r4, r5); 183 t1 = _mm256_unpacklo_epi64(r2, r3); 184 t5 = _mm256_unpacklo_epi64(r6, r7); 311 t0 = _mm256_unpacklo_epi64(r0, r1); 312 t4 = _mm256_unpacklo_epi64(r4, r5); 315 t1 = _mm256_unpacklo_epi64(r2, r3); 316 t5 = _mm256_unpacklo_epi64(r6, r7); 397 t0 = _mm256_unpacklo_epi64(r0, r1); 398 t4 = _mm256_unpacklo_epi64(r4, r5); [all …]
|
/aosp_15_r20/external/libaom/av1/common/x86/ |
H A D | cdef_block_avx2.c | 58 *x0 = _mm256_unpacklo_epi64(t0, t1); in hsum4_avx2() 60 *x2 = _mm256_unpacklo_epi64(t2, t3); in hsum4_avx2() 169 res[7] = _mm256_unpacklo_epi64(tr1_0, tr1_1); in array_reverse_transpose_8x8_avx2() 171 res[5] = _mm256_unpacklo_epi64(tr1_2, tr1_3); in array_reverse_transpose_8x8_avx2() 173 res[3] = _mm256_unpacklo_epi64(tr1_4, tr1_5); in array_reverse_transpose_8x8_avx2() 175 res[1] = _mm256_unpacklo_epi64(tr1_6, tr1_7); in array_reverse_transpose_8x8_avx2()
|
H A D | highbd_warp_affine_avx2.c | 320 __m256i v_c01 = _mm256_unpacklo_epi64( in av1_highbd_warp_affine_avx2() 325 _mm256_unpacklo_epi64(v_c0123u, v_c4567u); // H5H4 ... A5A4 in av1_highbd_warp_affine_avx2() 449 __m256i v_c01 = _mm256_unpacklo_epi64( in av1_highbd_warp_affine_avx2() 454 _mm256_unpacklo_epi64(v_c0123u, v_c4567u); // H5H4 ... A5A4 in av1_highbd_warp_affine_avx2() 556 __m256i v_c01 = _mm256_unpacklo_epi64( in av1_highbd_warp_affine_avx2() 561 _mm256_unpacklo_epi64(v_c0123u, v_c4567u); // H5H4 ... A5A4 in av1_highbd_warp_affine_avx2()
|
H A D | warp_plane_avx2.c | 188 coeff[0] = _mm256_unpacklo_epi64(res_0, res_2); in prepare_horizontal_filter_coeff_avx2() 190 coeff[2] = _mm256_unpacklo_epi64(res_1, res_3); in prepare_horizontal_filter_coeff_avx2() 228 coeff[0] = _mm256_unpacklo_epi64(res_0, res_2); in prepare_horizontal_filter_coeff_beta0_avx2() 230 coeff[2] = _mm256_unpacklo_epi64(res_1, res_3); in prepare_horizontal_filter_coeff_beta0_avx2() 489 coeffs[0] = _mm256_unpacklo_epi64(res_0, res_1); in prepare_vertical_filter_coeffs_avx2() 491 coeffs[2] = _mm256_unpacklo_epi64(res_2, res_3); in prepare_vertical_filter_coeffs_avx2() 534 coeffs[4] = _mm256_unpacklo_epi64(res_0, res_1); in prepare_vertical_filter_coeffs_avx2() 536 coeffs[6] = _mm256_unpacklo_epi64(res_2, res_3); in prepare_vertical_filter_coeffs_avx2() 565 coeffs[0] = _mm256_unpacklo_epi64(res_0, res_1); in prepare_vertical_filter_coeffs_delta0_avx2() 567 coeffs[2] = _mm256_unpacklo_epi64(res_2, res_3); in prepare_vertical_filter_coeffs_delta0_avx2() [all …]
|
H A D | convolve_avx2.c | 109 __m256i res_a = _mm256_unpacklo_epi64(res_8b_lo, res_8b_hi); in av1_convolve_y_sr_general_avx2() 212 __m256i res_a = _mm256_unpacklo_epi64(res_8b_lo, res_8b_hi); in av1_convolve_y_sr_general_avx2() 475 __m256i res_a = _mm256_unpacklo_epi64(res_8b_lo, res_8b_hi); in av1_convolve_y_sr_general_avx2()
|
/aosp_15_r20/external/libvpx/vpx_dsp/x86/ |
H A D | avg_intrin_avx2.c | 66 b0 = _mm256_unpacklo_epi64(a0, a1); in highbd_hadamard_col8_avx2() 67 b1 = _mm256_unpacklo_epi64(a4, a5); in highbd_hadamard_col8_avx2() 70 b4 = _mm256_unpacklo_epi64(a2, a3); in highbd_hadamard_col8_avx2() 71 b5 = _mm256_unpacklo_epi64(a6, a7); in highbd_hadamard_col8_avx2() 285 in[0] = _mm256_unpacklo_epi64(b0, b1); in hadamard_col8x2_avx2() 287 in[2] = _mm256_unpacklo_epi64(b2, b3); in hadamard_col8x2_avx2() 289 in[4] = _mm256_unpacklo_epi64(b4, b5); in hadamard_col8x2_avx2() 291 in[6] = _mm256_unpacklo_epi64(b6, b7); in hadamard_col8x2_avx2()
|
H A D | fwd_txfm_avx2.c | 73 out[2 * i] = _mm256_unpacklo_epi64(u[2 * i], u[2 * i + 4]); in transpose2_8x8_avx2() 76 out[2 * i + 4] = _mm256_unpacklo_epi64(u[2 * i + 1], u[2 * i + 5]); in transpose2_8x8_avx2()
|
H A D | inv_txfm_avx2.c | 229 out[2 * i] = _mm256_unpacklo_epi64(u[2 * i], u[2 * i + 4]); in transpose2_8x8_avx2() 232 out[2 * i + 4] = _mm256_unpacklo_epi64(u[2 * i + 1], u[2 * i + 5]); in transpose2_8x8_avx2()
|
/aosp_15_r20/external/libgav1/src/dsp/x86/ |
H A D | cdef_avx2.cc | 303 const __m256i v_src_4_0 = _mm256_unpacklo_epi64(v_src[0], v_src[4]); in AddPartial() 304 const __m256i v_src_5_1 = _mm256_unpacklo_epi64(v_src[1], v_src[5]); in AddPartial() 305 const __m256i v_src_6_2 = _mm256_unpacklo_epi64(v_src[2], v_src[6]); in AddPartial() 306 const __m256i v_src_7_3 = _mm256_unpacklo_epi64(v_src[3], v_src[7]); in AddPartial() 316 _mm256_unpacklo_epi64(_mm256_unpacklo_epi32(v_hsum_1_0, v_hsum_3_2), in AddPartial()
|
H A D | convolve_avx2.cc | 238 StoreUnaligned32(&dest8[x], _mm256_unpacklo_epi64(result, result2)); in FilterHorizontal() 276 const __m256i packed_result = _mm256_unpacklo_epi64(result, result2); in FilterHorizontal()
|
/aosp_15_r20/external/eigen/Eigen/src/Core/arch/AVX512/ |
H A D | PacketMath.h | 1723 __m256i abcdefgh_01 = _mm256_unpacklo_epi64(abcd_03, efgh_03); 1725 __m256i ijklmnop_01 = _mm256_unpacklo_epi64(ijkl_03, mnop_03); 1727 __m256i abcdefgh_45 = _mm256_unpacklo_epi64(abcd_47, efgh_47); 1729 __m256i ijklmnop_45 = _mm256_unpacklo_epi64(ijkl_47, mnop_47); 1731 __m256i abcdefgh_89 = _mm256_unpacklo_epi64(abcd_8b, efgh_8b); 1733 __m256i ijklmnop_89 = _mm256_unpacklo_epi64(ijkl_8b, mnop_8b); 1735 __m256i abcdefgh_cd = _mm256_unpacklo_epi64(abcd_cf, efgh_cf); 1737 __m256i ijklmnop_cd = _mm256_unpacklo_epi64(ijkl_cf, mnop_cf); 2240 __m256i abcdefgh_01 = _mm256_unpacklo_epi64(abcd_03, efgh_03); 2242 __m256i ijklmnop_01 = _mm256_unpacklo_epi64(ijkl_03, mnop_03); [all …]
|
/aosp_15_r20/external/mesa3d/src/util/blake3/ |
H A D | blake3_avx2.c | 172 __m256i abcd_04 = _mm256_unpacklo_epi64(ab_0145, cd_0145); in transpose_vecs() 174 __m256i abcd_26 = _mm256_unpacklo_epi64(ab_2367, cd_2367); in transpose_vecs() 176 __m256i efgh_04 = _mm256_unpacklo_epi64(ef_0145, gh_0145); in transpose_vecs() 178 __m256i efgh_26 = _mm256_unpacklo_epi64(ef_2367, gh_2367); in transpose_vecs()
|
H A D | blake3_avx512.c | 689 __m256i abcd_04 = _mm256_unpacklo_epi64(ab_0145, cd_0145); in transpose_vecs_256() 691 __m256i abcd_26 = _mm256_unpacklo_epi64(ab_2367, cd_2367); in transpose_vecs_256() 693 __m256i efgh_04 = _mm256_unpacklo_epi64(ef_0145, gh_0145); in transpose_vecs_256() 695 __m256i efgh_26 = _mm256_unpacklo_epi64(ef_2367, gh_2367); in transpose_vecs_256()
|
/aosp_15_r20/external/swiftshader/third_party/llvm-16.0/llvm/lib/Support/BLAKE3/ |
H A D | blake3_avx2.c | 172 __m256i abcd_04 = _mm256_unpacklo_epi64(ab_0145, cd_0145); in transpose_vecs() 174 __m256i abcd_26 = _mm256_unpacklo_epi64(ab_2367, cd_2367); in transpose_vecs() 176 __m256i efgh_04 = _mm256_unpacklo_epi64(ef_0145, gh_0145); in transpose_vecs() 178 __m256i efgh_26 = _mm256_unpacklo_epi64(ef_2367, gh_2367); in transpose_vecs()
|
H A D | blake3_avx512.c | 689 __m256i abcd_04 = _mm256_unpacklo_epi64(ab_0145, cd_0145); in transpose_vecs_256() 691 __m256i abcd_26 = _mm256_unpacklo_epi64(ab_2367, cd_2367); in transpose_vecs_256() 693 __m256i efgh_04 = _mm256_unpacklo_epi64(ef_0145, gh_0145); in transpose_vecs_256() 695 __m256i efgh_26 = _mm256_unpacklo_epi64(ef_2367, gh_2367); in transpose_vecs_256()
|
/aosp_15_r20/external/libaom/av1/encoder/x86/ |
H A D | pickrst_avx2.c | 396 const __m256i t0 = _mm256_unpacklo_epi64(src0, src1); in add_64bit_lvl_avx2() 438 const __m256i reg0 = _mm256_unpacklo_epi64(src[0], src[1]); in transpose_64bit_4x4_avx2() 439 const __m256i reg1 = _mm256_unpacklo_epi64(src[2], src[3]); in transpose_64bit_4x4_avx2() 1739 __m256i c_low = _mm256_unpacklo_epi64(c0, c1); in calc_proj_params_r0_r1_avx2() 1745 __m256i h0x_low = _mm256_unpacklo_epi64(h00, h01); in calc_proj_params_r0_r1_avx2() 1753 __m256i h1x_low = _mm256_unpacklo_epi64(zero, h11); in calc_proj_params_r0_r1_avx2() 1967 __m256i c_low = _mm256_unpacklo_epi64(c0, c1); in calc_proj_params_r0_r1_high_bd_avx2() 1973 __m256i h0x_low = _mm256_unpacklo_epi64(h00, h01); in calc_proj_params_r0_r1_high_bd_avx2() 1981 __m256i h1x_low = _mm256_unpacklo_epi64(zero, h11); in calc_proj_params_r0_r1_high_bd_avx2()
|
/aosp_15_r20/external/libopenapv/src/avx/ |
H A D | oapv_tq_avx.c | 142 O0 = _mm256_unpacklo_epi64(I0, I2); \ 144 O2 = _mm256_unpacklo_epi64(I1, I3); \
|
/aosp_15_r20/external/tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/ |
H A D | MatMatProductAVX2.h | 269 R_AB_L = _mm256_unpacklo_epi64(R_A, R_B); \ 270 R_CD_L = _mm256_unpacklo_epi64(R_C, R_D); \ 1183 R_AB_L = _mm256_unpacklo_epi64(R_A, R_B); \ 1184 R_CD_L = _mm256_unpacklo_epi64(R_C, R_D); \ 1933 R_AB_L = _mm256_unpacklo_epi64(R_A, R_B); \ 1934 R_CD_L = _mm256_unpacklo_epi64(R_C, R_D); \
|
/aosp_15_r20/external/libaom/aom_dsp/simd/ |
H A D | v256_intrinsics_x86.h | 180 return _mm256_unpacklo_epi64( in v256_ziplo_64() 483 _mm256_srl_epi64(_mm256_sll_epi64(_mm256_unpacklo_epi64(rl, rh), c), c)); in v256_ssd_u8()
|
/aosp_15_r20/external/lzma/C/ |
H A D | Blake2s.c | 1652 s0 = _mm256_unpacklo_epi64(z0, z1); \ 1654 s2 = _mm256_unpacklo_epi64(z2, z3); \ 1698 s0 = _mm256_unpacklo_epi64(z0, z1); \ 1700 s2 = _mm256_unpacklo_epi64(z2, z3); \
|