Home
last modified time | relevance | path

Searched refs:_mm256_unpacklo_epi64 (Results 1 – 25 of 61) sorted by relevance

123

/aosp_15_r20/external/libaom/aom_dsp/x86/
H A Dcommon_avx2.h93 tr0_0 = _mm256_unpacklo_epi64(tr1_0, tr1_4); in mm256_transpose_16x16()
95 tr0_2 = _mm256_unpacklo_epi64(tr1_1, tr1_5); in mm256_transpose_16x16()
97 tr0_4 = _mm256_unpacklo_epi64(tr1_2, tr1_6); in mm256_transpose_16x16()
99 tr0_6 = _mm256_unpacklo_epi64(tr1_3, tr1_7); in mm256_transpose_16x16()
102 tr0_8 = _mm256_unpacklo_epi64(tr1_8, tr1_c); in mm256_transpose_16x16()
104 tr0_a = _mm256_unpacklo_epi64(tr1_9, tr1_d); in mm256_transpose_16x16()
106 tr0_c = _mm256_unpacklo_epi64(tr1_a, tr1_e); in mm256_transpose_16x16()
108 tr0_e = _mm256_unpacklo_epi64(tr1_b, tr1_f); in mm256_transpose_16x16()
H A Dtxfm_common_avx2.h149 out[2 * i] = _mm256_unpacklo_epi64(u[2 * i], u[2 * i + 4]); in transpose2_8x8_avx2()
152 out[2 * i + 4] = _mm256_unpacklo_epi64(u[2 * i + 1], u[2 * i + 5]); in transpose2_8x8_avx2()
218 out[0] = _mm256_unpacklo_epi64(b0, b1); in transpose_16bit_16x8_avx2()
220 out[2] = _mm256_unpacklo_epi64(b4, b5); in transpose_16bit_16x8_avx2()
222 out[4] = _mm256_unpacklo_epi64(b2, b3); in transpose_16bit_16x8_avx2()
224 out[6] = _mm256_unpacklo_epi64(b6, b7); in transpose_16bit_16x8_avx2()
H A Davg_intrin_avx2.c84 in[0] = _mm256_unpacklo_epi64(b0, b1); in hadamard_col8x2_avx2()
86 in[2] = _mm256_unpacklo_epi64(b2, b3); in hadamard_col8x2_avx2()
88 in[4] = _mm256_unpacklo_epi64(b4, b5); in hadamard_col8x2_avx2()
90 in[6] = _mm256_unpacklo_epi64(b6, b7); in hadamard_col8x2_avx2()
347 b0 = _mm256_unpacklo_epi64(a0, a1); in highbd_hadamard_col8_avx2()
348 b1 = _mm256_unpacklo_epi64(a4, a5); in highbd_hadamard_col8_avx2()
351 b4 = _mm256_unpacklo_epi64(a2, a3); in highbd_hadamard_col8_avx2()
352 b5 = _mm256_unpacklo_epi64(a6, a7); in highbd_hadamard_col8_avx2()
H A Dintrapred_avx2.c194 d[0] = _mm256_unpacklo_epi64(ww0, ww1); // 00 10 20 30 40 50 60 70 in highbd_transpose4x16_avx2()
200 d[2] = _mm256_unpacklo_epi64(ww0, ww1); // 02 12 22 32 42 52 62 72 in highbd_transpose4x16_avx2()
216 d[0] = _mm256_unpacklo_epi64(ww0, ww1); // 00 10 20 30 40 50 60 70 in highbd_transpose8x16_16x8_avx2()
222 d[2] = _mm256_unpacklo_epi64(ww0, ww1); // 02 12 22 32 42 52 62 72 in highbd_transpose8x16_16x8_avx2()
233 d[4] = _mm256_unpacklo_epi64(ww0, ww1); // 04 14 24 34 44 54 64 74 in highbd_transpose8x16_16x8_avx2()
239 d[6] = _mm256_unpacklo_epi64(ww0, ww1); // 06 16 26 36 46 56 66 76 in highbd_transpose8x16_16x8_avx2()
254 dd[0] = _mm256_unpacklo_epi64(ww0, ww1); in highbd_transpose16x16_avx2()
260 dd[2] = _mm256_unpacklo_epi64(ww0, ww1); in highbd_transpose16x16_avx2()
271 dd[4] = _mm256_unpacklo_epi64(ww0, ww1); in highbd_transpose16x16_avx2()
277 dd[6] = _mm256_unpacklo_epi64(ww0, ww1); in highbd_transpose16x16_avx2()
[all …]
H A Dvariance_avx2.c622 __m256i tmp0_16x16 = _mm256_unpacklo_epi64(src0_16x16, src1_16x16); in mse_4xh_quad_16bit_avx2()
626 __m256i tmp2_16x16 = _mm256_unpacklo_epi64(src2_16x16, src3_16x16); in mse_4xh_quad_16bit_avx2()
/aosp_15_r20/external/ruy/ruy/
H A Dpack_avx2_fma.cc179 t0 = _mm256_unpacklo_epi64(r0, r1);
180 t4 = _mm256_unpacklo_epi64(r4, r5);
183 t1 = _mm256_unpacklo_epi64(r2, r3);
184 t5 = _mm256_unpacklo_epi64(r6, r7);
311 t0 = _mm256_unpacklo_epi64(r0, r1);
312 t4 = _mm256_unpacklo_epi64(r4, r5);
315 t1 = _mm256_unpacklo_epi64(r2, r3);
316 t5 = _mm256_unpacklo_epi64(r6, r7);
397 t0 = _mm256_unpacklo_epi64(r0, r1);
398 t4 = _mm256_unpacklo_epi64(r4, r5);
[all …]
/aosp_15_r20/external/libaom/av1/common/x86/
H A Dcdef_block_avx2.c58 *x0 = _mm256_unpacklo_epi64(t0, t1); in hsum4_avx2()
60 *x2 = _mm256_unpacklo_epi64(t2, t3); in hsum4_avx2()
169 res[7] = _mm256_unpacklo_epi64(tr1_0, tr1_1); in array_reverse_transpose_8x8_avx2()
171 res[5] = _mm256_unpacklo_epi64(tr1_2, tr1_3); in array_reverse_transpose_8x8_avx2()
173 res[3] = _mm256_unpacklo_epi64(tr1_4, tr1_5); in array_reverse_transpose_8x8_avx2()
175 res[1] = _mm256_unpacklo_epi64(tr1_6, tr1_7); in array_reverse_transpose_8x8_avx2()
H A Dhighbd_warp_affine_avx2.c320 __m256i v_c01 = _mm256_unpacklo_epi64( in av1_highbd_warp_affine_avx2()
325 _mm256_unpacklo_epi64(v_c0123u, v_c4567u); // H5H4 ... A5A4 in av1_highbd_warp_affine_avx2()
449 __m256i v_c01 = _mm256_unpacklo_epi64( in av1_highbd_warp_affine_avx2()
454 _mm256_unpacklo_epi64(v_c0123u, v_c4567u); // H5H4 ... A5A4 in av1_highbd_warp_affine_avx2()
556 __m256i v_c01 = _mm256_unpacklo_epi64( in av1_highbd_warp_affine_avx2()
561 _mm256_unpacklo_epi64(v_c0123u, v_c4567u); // H5H4 ... A5A4 in av1_highbd_warp_affine_avx2()
H A Dwarp_plane_avx2.c188 coeff[0] = _mm256_unpacklo_epi64(res_0, res_2); in prepare_horizontal_filter_coeff_avx2()
190 coeff[2] = _mm256_unpacklo_epi64(res_1, res_3); in prepare_horizontal_filter_coeff_avx2()
228 coeff[0] = _mm256_unpacklo_epi64(res_0, res_2); in prepare_horizontal_filter_coeff_beta0_avx2()
230 coeff[2] = _mm256_unpacklo_epi64(res_1, res_3); in prepare_horizontal_filter_coeff_beta0_avx2()
489 coeffs[0] = _mm256_unpacklo_epi64(res_0, res_1); in prepare_vertical_filter_coeffs_avx2()
491 coeffs[2] = _mm256_unpacklo_epi64(res_2, res_3); in prepare_vertical_filter_coeffs_avx2()
534 coeffs[4] = _mm256_unpacklo_epi64(res_0, res_1); in prepare_vertical_filter_coeffs_avx2()
536 coeffs[6] = _mm256_unpacklo_epi64(res_2, res_3); in prepare_vertical_filter_coeffs_avx2()
565 coeffs[0] = _mm256_unpacklo_epi64(res_0, res_1); in prepare_vertical_filter_coeffs_delta0_avx2()
567 coeffs[2] = _mm256_unpacklo_epi64(res_2, res_3); in prepare_vertical_filter_coeffs_delta0_avx2()
[all …]
H A Dconvolve_avx2.c109 __m256i res_a = _mm256_unpacklo_epi64(res_8b_lo, res_8b_hi); in av1_convolve_y_sr_general_avx2()
212 __m256i res_a = _mm256_unpacklo_epi64(res_8b_lo, res_8b_hi); in av1_convolve_y_sr_general_avx2()
475 __m256i res_a = _mm256_unpacklo_epi64(res_8b_lo, res_8b_hi); in av1_convolve_y_sr_general_avx2()
/aosp_15_r20/external/libvpx/vpx_dsp/x86/
H A Davg_intrin_avx2.c66 b0 = _mm256_unpacklo_epi64(a0, a1); in highbd_hadamard_col8_avx2()
67 b1 = _mm256_unpacklo_epi64(a4, a5); in highbd_hadamard_col8_avx2()
70 b4 = _mm256_unpacklo_epi64(a2, a3); in highbd_hadamard_col8_avx2()
71 b5 = _mm256_unpacklo_epi64(a6, a7); in highbd_hadamard_col8_avx2()
285 in[0] = _mm256_unpacklo_epi64(b0, b1); in hadamard_col8x2_avx2()
287 in[2] = _mm256_unpacklo_epi64(b2, b3); in hadamard_col8x2_avx2()
289 in[4] = _mm256_unpacklo_epi64(b4, b5); in hadamard_col8x2_avx2()
291 in[6] = _mm256_unpacklo_epi64(b6, b7); in hadamard_col8x2_avx2()
H A Dfwd_txfm_avx2.c73 out[2 * i] = _mm256_unpacklo_epi64(u[2 * i], u[2 * i + 4]); in transpose2_8x8_avx2()
76 out[2 * i + 4] = _mm256_unpacklo_epi64(u[2 * i + 1], u[2 * i + 5]); in transpose2_8x8_avx2()
H A Dinv_txfm_avx2.c229 out[2 * i] = _mm256_unpacklo_epi64(u[2 * i], u[2 * i + 4]); in transpose2_8x8_avx2()
232 out[2 * i + 4] = _mm256_unpacklo_epi64(u[2 * i + 1], u[2 * i + 5]); in transpose2_8x8_avx2()
/aosp_15_r20/external/libgav1/src/dsp/x86/
H A Dcdef_avx2.cc303 const __m256i v_src_4_0 = _mm256_unpacklo_epi64(v_src[0], v_src[4]); in AddPartial()
304 const __m256i v_src_5_1 = _mm256_unpacklo_epi64(v_src[1], v_src[5]); in AddPartial()
305 const __m256i v_src_6_2 = _mm256_unpacklo_epi64(v_src[2], v_src[6]); in AddPartial()
306 const __m256i v_src_7_3 = _mm256_unpacklo_epi64(v_src[3], v_src[7]); in AddPartial()
316 _mm256_unpacklo_epi64(_mm256_unpacklo_epi32(v_hsum_1_0, v_hsum_3_2), in AddPartial()
H A Dconvolve_avx2.cc238 StoreUnaligned32(&dest8[x], _mm256_unpacklo_epi64(result, result2)); in FilterHorizontal()
276 const __m256i packed_result = _mm256_unpacklo_epi64(result, result2); in FilterHorizontal()
/aosp_15_r20/external/eigen/Eigen/src/Core/arch/AVX512/
H A DPacketMath.h1723 __m256i abcdefgh_01 = _mm256_unpacklo_epi64(abcd_03, efgh_03);
1725 __m256i ijklmnop_01 = _mm256_unpacklo_epi64(ijkl_03, mnop_03);
1727 __m256i abcdefgh_45 = _mm256_unpacklo_epi64(abcd_47, efgh_47);
1729 __m256i ijklmnop_45 = _mm256_unpacklo_epi64(ijkl_47, mnop_47);
1731 __m256i abcdefgh_89 = _mm256_unpacklo_epi64(abcd_8b, efgh_8b);
1733 __m256i ijklmnop_89 = _mm256_unpacklo_epi64(ijkl_8b, mnop_8b);
1735 __m256i abcdefgh_cd = _mm256_unpacklo_epi64(abcd_cf, efgh_cf);
1737 __m256i ijklmnop_cd = _mm256_unpacklo_epi64(ijkl_cf, mnop_cf);
2240 __m256i abcdefgh_01 = _mm256_unpacklo_epi64(abcd_03, efgh_03);
2242 __m256i ijklmnop_01 = _mm256_unpacklo_epi64(ijkl_03, mnop_03);
[all …]
/aosp_15_r20/external/mesa3d/src/util/blake3/
H A Dblake3_avx2.c172 __m256i abcd_04 = _mm256_unpacklo_epi64(ab_0145, cd_0145); in transpose_vecs()
174 __m256i abcd_26 = _mm256_unpacklo_epi64(ab_2367, cd_2367); in transpose_vecs()
176 __m256i efgh_04 = _mm256_unpacklo_epi64(ef_0145, gh_0145); in transpose_vecs()
178 __m256i efgh_26 = _mm256_unpacklo_epi64(ef_2367, gh_2367); in transpose_vecs()
H A Dblake3_avx512.c689 __m256i abcd_04 = _mm256_unpacklo_epi64(ab_0145, cd_0145); in transpose_vecs_256()
691 __m256i abcd_26 = _mm256_unpacklo_epi64(ab_2367, cd_2367); in transpose_vecs_256()
693 __m256i efgh_04 = _mm256_unpacklo_epi64(ef_0145, gh_0145); in transpose_vecs_256()
695 __m256i efgh_26 = _mm256_unpacklo_epi64(ef_2367, gh_2367); in transpose_vecs_256()
/aosp_15_r20/external/swiftshader/third_party/llvm-16.0/llvm/lib/Support/BLAKE3/
H A Dblake3_avx2.c172 __m256i abcd_04 = _mm256_unpacklo_epi64(ab_0145, cd_0145); in transpose_vecs()
174 __m256i abcd_26 = _mm256_unpacklo_epi64(ab_2367, cd_2367); in transpose_vecs()
176 __m256i efgh_04 = _mm256_unpacklo_epi64(ef_0145, gh_0145); in transpose_vecs()
178 __m256i efgh_26 = _mm256_unpacklo_epi64(ef_2367, gh_2367); in transpose_vecs()
H A Dblake3_avx512.c689 __m256i abcd_04 = _mm256_unpacklo_epi64(ab_0145, cd_0145); in transpose_vecs_256()
691 __m256i abcd_26 = _mm256_unpacklo_epi64(ab_2367, cd_2367); in transpose_vecs_256()
693 __m256i efgh_04 = _mm256_unpacklo_epi64(ef_0145, gh_0145); in transpose_vecs_256()
695 __m256i efgh_26 = _mm256_unpacklo_epi64(ef_2367, gh_2367); in transpose_vecs_256()
/aosp_15_r20/external/libaom/av1/encoder/x86/
H A Dpickrst_avx2.c396 const __m256i t0 = _mm256_unpacklo_epi64(src0, src1); in add_64bit_lvl_avx2()
438 const __m256i reg0 = _mm256_unpacklo_epi64(src[0], src[1]); in transpose_64bit_4x4_avx2()
439 const __m256i reg1 = _mm256_unpacklo_epi64(src[2], src[3]); in transpose_64bit_4x4_avx2()
1739 __m256i c_low = _mm256_unpacklo_epi64(c0, c1); in calc_proj_params_r0_r1_avx2()
1745 __m256i h0x_low = _mm256_unpacklo_epi64(h00, h01); in calc_proj_params_r0_r1_avx2()
1753 __m256i h1x_low = _mm256_unpacklo_epi64(zero, h11); in calc_proj_params_r0_r1_avx2()
1967 __m256i c_low = _mm256_unpacklo_epi64(c0, c1); in calc_proj_params_r0_r1_high_bd_avx2()
1973 __m256i h0x_low = _mm256_unpacklo_epi64(h00, h01); in calc_proj_params_r0_r1_high_bd_avx2()
1981 __m256i h1x_low = _mm256_unpacklo_epi64(zero, h11); in calc_proj_params_r0_r1_high_bd_avx2()
/aosp_15_r20/external/libopenapv/src/avx/
H A Doapv_tq_avx.c142 O0 = _mm256_unpacklo_epi64(I0, I2); \
144 O2 = _mm256_unpacklo_epi64(I1, I3); \
/aosp_15_r20/external/tensorflow/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/
H A DMatMatProductAVX2.h269 R_AB_L = _mm256_unpacklo_epi64(R_A, R_B); \
270 R_CD_L = _mm256_unpacklo_epi64(R_C, R_D); \
1183 R_AB_L = _mm256_unpacklo_epi64(R_A, R_B); \
1184 R_CD_L = _mm256_unpacklo_epi64(R_C, R_D); \
1933 R_AB_L = _mm256_unpacklo_epi64(R_A, R_B); \
1934 R_CD_L = _mm256_unpacklo_epi64(R_C, R_D); \
/aosp_15_r20/external/libaom/aom_dsp/simd/
H A Dv256_intrinsics_x86.h180 return _mm256_unpacklo_epi64( in v256_ziplo_64()
483 _mm256_srl_epi64(_mm256_sll_epi64(_mm256_unpacklo_epi64(rl, rh), c), c)); in v256_ssd_u8()
/aosp_15_r20/external/lzma/C/
H A DBlake2s.c1652 s0 = _mm256_unpacklo_epi64(z0, z1); \
1654 s2 = _mm256_unpacklo_epi64(z2, z3); \
1698 s0 = _mm256_unpacklo_epi64(z0, z1); \
1700 s2 = _mm256_unpacklo_epi64(z2, z3); \

123