Home
last modified time | relevance | path

Searched refs:ven0 (Results 1 – 25 of 52) sorted by relevance

123

/aosp_15_r20/external/XNNPACK/src/f32-velu/gen/
H A Dvelu-avx-rr2-lut4-p4-perm-x16.c52 __m256 ven0 = _mm256_andnot_ps(vindex_mask, vn0); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16() local
54 …128 ven0_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven0)), 21)); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16()
60 …8 ven0_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven0, 1)), 21)); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16()
65 ven0 = _mm256_insertf128_ps(_mm256_castps128_ps256(ven0_lo), ven0_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16()
70 __m256 vs0 = _mm256_mul_ps(vl0, ven0); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x16()
H A Dvelu-avx-rr2-lut4-p4-perm-x24.c55 __m256 ven0 = _mm256_andnot_ps(vindex_mask, vn0); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24() local
57 …128 ven0_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven0)), 21)); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24()
66 …8 ven0_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven0, 1)), 21)); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24()
73 ven0 = _mm256_insertf128_ps(_mm256_castps128_ps256(ven0_lo), ven0_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24()
80 __m256 vs0 = _mm256_mul_ps(vl0, ven0); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x24()
H A Dvelu-avx-rr2-lut4-p4-perm-x32.c58 __m256 ven0 = _mm256_andnot_ps(vindex_mask, vn0); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32() local
60 …128 ven0_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven0)), 21)); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32()
72 …8 ven0_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven0, 1)), 21)); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32()
81 ven0 = _mm256_insertf128_ps(_mm256_castps128_ps256(ven0_lo), ven0_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32()
90 __m256 vs0 = _mm256_mul_ps(vl0, ven0); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x32()
H A Dvelu-avx-rr2-lut4-p4-perm-x40.c61 __m256 ven0 = _mm256_andnot_ps(vindex_mask, vn0); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40() local
63 …128 ven0_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven0)), 21)); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40()
78 …8 ven0_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven0, 1)), 21)); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40()
89 ven0 = _mm256_insertf128_ps(_mm256_castps128_ps256(ven0_lo), ven0_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40()
100 __m256 vs0 = _mm256_mul_ps(vl0, ven0); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x40()
H A Dvelu-avx-rr2-lut4-p4-perm-x48.c64 __m256 ven0 = _mm256_andnot_ps(vindex_mask, vn0); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48() local
66 …128 ven0_lo = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_castps256_ps128(ven0)), 21)); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48()
84 …8 ven0_hi = _mm_castsi128_ps(_mm_slli_epi32(_mm_castps_si128(_mm256_extractf128_ps(ven0, 1)), 21)); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48()
97 ven0 = _mm256_insertf128_ps(_mm256_castps128_ps256(ven0_lo), ven0_hi, 1); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48()
110 __m256 vs0 = _mm256_mul_ps(vl0, ven0); in xnn_f32_velu_ukernel__avx_rr2_lut4_p4_perm_x48()
H A Dvelu-wasm-rr2-lut16-p3-x2.c52 const uint32_t ven0 = float_as_uint32(vn0) << 19; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2() local
60 float vs0 = uint32_as_float(xnn_table_exp2minus_k_over_16[vidx0] + ven0); in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x2()
H A Dvelu-scalar-rr2-lut16-p3-x2.c52 const uint32_t ven0 = float_as_uint32(vn0) << 19; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2() local
60 float vs0 = uint32_as_float(xnn_table_exp2minus_k_over_16[vidx0] + ven0); in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x2()
H A Dvelu-wasm-rr2-lut16-p3-x3.c55 const uint32_t ven0 = float_as_uint32(vn0) << 19; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3() local
66 float vs0 = uint32_as_float(xnn_table_exp2minus_k_over_16[vidx0] + ven0); in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x3()
H A Dvelu-scalar-rr2-lut16-p3-x3.c55 const uint32_t ven0 = float_as_uint32(vn0) << 19; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3() local
66 float vs0 = uint32_as_float(xnn_table_exp2minus_k_over_16[vidx0] + ven0); in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x3()
H A Dvelu-scalar-rr2-lut16-p3-x4.c58 const uint32_t ven0 = float_as_uint32(vn0) << 19; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4() local
72 float vs0 = uint32_as_float(xnn_table_exp2minus_k_over_16[vidx0] + ven0); in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x4()
H A Dvelu-wasm-rr2-lut16-p3-x4.c58 const uint32_t ven0 = float_as_uint32(vn0) << 19; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4() local
72 float vs0 = uint32_as_float(xnn_table_exp2minus_k_over_16[vidx0] + ven0); in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x4()
H A Dvelu-avx512f-rr1-lut16-p3-perm-x32.c50 const __m512i ven0 = _mm512_slli_epi32(_mm512_castps_si512(vn0), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32() local
55 __m512 vs0 = _mm512_castsi512_ps(_mm512_add_epi32(vl0, ven0)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x32()
H A Dvelu-wasm-rr2-lut16-p3-x5.c61 const uint32_t ven0 = float_as_uint32(vn0) << 19; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5() local
78 float vs0 = uint32_as_float(xnn_table_exp2minus_k_over_16[vidx0] + ven0); in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x5()
H A Dvelu-scalar-rr2-lut16-p3-x5.c61 const uint32_t ven0 = float_as_uint32(vn0) << 19; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5() local
78 float vs0 = uint32_as_float(xnn_table_exp2minus_k_over_16[vidx0] + ven0); in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x5()
H A Dvelu-avx2-rr1-lut16-p3-gather-x16.c55 const __m256i ven0 = _mm256_slli_epi32(_mm256_castps_si256(vn0), 19); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16() local
60 __m256 vs0 = _mm256_castsi256_ps(_mm256_add_epi32(vl0, ven0)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x16()
H A Dvelu-avx2-rr1-lut4-p4-perm-x16.c49 const __m256i ven0 = _mm256_slli_epi32(_mm256_castps_si256(vn0), 21); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x16() local
56 __m256 vs0 = _mm256_castsi256_ps(_mm256_add_epi32(vl0, ven0)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x16()
H A Dvelu-avx2-rr1-lut8-p4-perm-x16.c49 const __m256i ven0 = _mm256_slli_epi32(_mm256_castps_si256(vn0), 20); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16() local
56 __m256 vs0 = _mm256_castsi256_ps(_mm256_add_epi32(vl0, ven0)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x16()
H A Dvelu-wasm-rr2-lut16-p3-x6.c64 const uint32_t ven0 = float_as_uint32(vn0) << 19; in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6() local
84 float vs0 = uint32_as_float(xnn_table_exp2minus_k_over_16[vidx0] + ven0); in xnn_f32_velu_ukernel__wasm_rr2_lut16_p3_x6()
H A Dvelu-scalar-rr2-lut16-p3-x6.c64 const uint32_t ven0 = float_as_uint32(vn0) << 19; in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6() local
84 float vs0 = uint32_as_float(xnn_table_exp2minus_k_over_16[vidx0] + ven0); in xnn_f32_velu_ukernel__scalar_rr2_lut16_p3_x6()
H A Dvelu-avx2-rr1-lut8-p4-perm-x24.c52 const __m256i ven0 = _mm256_slli_epi32(_mm256_castps_si256(vn0), 20); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x24() local
62 __m256 vs0 = _mm256_castsi256_ps(_mm256_add_epi32(vl0, ven0)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x24()
H A Dvelu-avx2-rr1-lut16-p3-gather-x24.c60 const __m256i ven0 = _mm256_slli_epi32(_mm256_castps_si256(vn0), 19); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24() local
67 __m256 vs0 = _mm256_castsi256_ps(_mm256_add_epi32(vl0, ven0)); in xnn_f32_velu_ukernel__avx2_rr1_lut16_p3_gather_x24()
H A Dvelu-avx2-rr1-lut4-p4-perm-x24.c52 const __m256i ven0 = _mm256_slli_epi32(_mm256_castps_si256(vn0), 21); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x24() local
62 __m256 vs0 = _mm256_castsi256_ps(_mm256_add_epi32(vl0, ven0)); in xnn_f32_velu_ukernel__avx2_rr1_lut4_p4_perm_x24()
H A Dvelu-avx512f-rr1-lut16-p3-perm-x48.c53 const __m512i ven0 = _mm512_slli_epi32(_mm512_castps_si512(vn0), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48() local
60 __m512 vs0 = _mm512_castsi512_ps(_mm512_add_epi32(vl0, ven0)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x48()
H A Dvelu-avx512f-rr1-lut16-p3-perm-x64.c56 const __m512i ven0 = _mm512_slli_epi32(_mm512_castps_si512(vn0), 19); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64() local
65 __m512 vs0 = _mm512_castsi512_ps(_mm512_add_epi32(vl0, ven0)); in xnn_f32_velu_ukernel__avx512f_rr1_lut16_p3_perm_x64()
H A Dvelu-avx2-rr1-lut8-p4-perm-x32.c55 const __m256i ven0 = _mm256_slli_epi32(_mm256_castps_si256(vn0), 20); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32() local
68 __m256 vs0 = _mm256_castsi256_ps(_mm256_add_epi32(vl0, ven0)); in xnn_f32_velu_ukernel__avx2_rr1_lut8_p4_perm_x32()

123