/aosp_15_r20/external/libvpx/vp8/common/arm/neon/ |
H A D | sixtappredict_neon.c | 51 vreinterpret_u32_u8(vget_high_u8(a))); in filter_add_accumulate() 53 vreinterpret_u32_u8(vget_high_u8(b))); in filter_add_accumulate() 62 vreinterpret_u32_u8(vget_high_u8(a))); in filter_sub_accumulate() 64 vreinterpret_u32_u8(vget_high_u8(b))); in filter_sub_accumulate() 216 s0_f5 = vext_u8(vget_low_u8(s0), vget_high_u8(s0), 5); in vp8_sixtap_predict4x4_neon() 217 s1_f5 = vext_u8(vget_low_u8(s1), vget_high_u8(s1), 5); in vp8_sixtap_predict4x4_neon() 218 s2_f5 = vext_u8(vget_low_u8(s2), vget_high_u8(s2), 5); in vp8_sixtap_predict4x4_neon() 219 s3_f5 = vext_u8(vget_low_u8(s3), vget_high_u8(s3), 5); in vp8_sixtap_predict4x4_neon() 288 s0_f5 = vext_u8(vget_low_u8(s0), vget_high_u8(s0), 5); in vp8_sixtap_predict4x4_neon() 289 s1_f5 = vext_u8(vget_low_u8(s1), vget_high_u8(s1), 5); in vp8_sixtap_predict4x4_neon() [all …]
|
H A D | bilinearpredict_neon.c | 76 vreinterpret_u32_u8(vget_high_u8(a01))); in vp8_bilinear_predict4x4_neon() 78 vreinterpret_u32_u8(vget_high_u8(a23))); in vp8_bilinear_predict4x4_neon() 80 vreinterpret_u32_u8(vget_high_u8(b01))); in vp8_bilinear_predict4x4_neon() 82 vreinterpret_u32_u8(vget_high_u8(b23))); in vp8_bilinear_predict4x4_neon() 161 d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1); in vp8_bilinear_predict8x4_neon() 162 d5u8 = vext_u8(vget_low_u8(q2u8), vget_high_u8(q2u8), 1); in vp8_bilinear_predict8x4_neon() 163 d7u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); in vp8_bilinear_predict8x4_neon() 164 d9u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1); in vp8_bilinear_predict8x4_neon() 165 d11u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1); in vp8_bilinear_predict8x4_neon() 265 d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1); in vp8_bilinear_predict8x8_neon() [all …]
|
H A D | mbloopfilter_neon.c | 266 vst1_u8(v, vget_high_u8(q4)); in vp8_mbloop_filter_horizontal_edge_uv_neon() 270 vst1_u8(v, vget_high_u8(q5)); in vp8_mbloop_filter_horizontal_edge_uv_neon() 274 vst1_u8(v, vget_high_u8(q6)); in vp8_mbloop_filter_horizontal_edge_uv_neon() 278 vst1_u8(v, vget_high_u8(q7)); in vp8_mbloop_filter_horizontal_edge_uv_neon() 282 vst1_u8(v, vget_high_u8(q8)); in vp8_mbloop_filter_horizontal_edge_uv_neon() 285 vst1_u8(v, vget_high_u8(q9)); in vp8_mbloop_filter_horizontal_edge_uv_neon() 420 vst1_u8(s2, vget_high_u8(q3)); in vp8_mbloop_filter_vertical_edge_y_neon() 424 vst1_u8(s2, vget_high_u8(q4)); in vp8_mbloop_filter_vertical_edge_y_neon() 428 vst1_u8(s2, vget_high_u8(q5)); in vp8_mbloop_filter_vertical_edge_y_neon() 432 vst1_u8(s2, vget_high_u8(q6)); in vp8_mbloop_filter_vertical_edge_y_neon() [all …]
|
/aosp_15_r20/external/libjpeg-turbo/simd/arm/ |
H A D | jdsample-neon.c | 90 vmlal_u8(vmovl_u8(vget_high_u8(s1)), vget_high_u8(s0), three_u8); in jsimd_h2v1_fancy_upsample_neon() 94 vmlal_u8(vmovl_u8(vget_high_u8(s0)), vget_high_u8(s1), three_u8); in jsimd_h2v1_fancy_upsample_neon() 129 vmlal_u8(vmovl_u8(vget_high_u8(s1)), vget_high_u8(s0), three_u8); in jsimd_h2v1_fancy_upsample_neon() 133 vmlal_u8(vmovl_u8(vget_high_u8(s0)), vget_high_u8(s1), three_u8); in jsimd_h2v1_fancy_upsample_neon() 249 uint16x8_t s0colsum0_h = vmlal_u8(vmovl_u8(vget_high_u8(s0A)), in jsimd_h2v2_fancy_upsample_neon() 250 vget_high_u8(s0B), three_u8); in jsimd_h2v2_fancy_upsample_neon() 253 uint16x8_t s0colsum1_h = vmlal_u8(vmovl_u8(vget_high_u8(s0C)), in jsimd_h2v2_fancy_upsample_neon() 254 vget_high_u8(s0B), three_u8); in jsimd_h2v2_fancy_upsample_neon() 261 uint16x8_t s1colsum0_h = vmlal_u8(vmovl_u8(vget_high_u8(s1A)), in jsimd_h2v2_fancy_upsample_neon() 262 vget_high_u8(s1B), three_u8); in jsimd_h2v2_fancy_upsample_neon() [all …]
|
H A D | jcsample-neon.c | 112 uint8x8x2_t table = { { vget_low_u8(pixels), vget_high_u8(pixels) } }; in jsimd_h2v1_downsample_neon() 114 vtbl2_u8(table, vget_high_u8(expand_mask))); in jsimd_h2v1_downsample_neon() 176 { { vget_low_u8(pixels_r0), vget_high_u8(pixels_r0) } }; in jsimd_h2v2_downsample_neon() 178 { { vget_low_u8(pixels_r1), vget_high_u8(pixels_r1) } }; in jsimd_h2v2_downsample_neon() 180 vtbl2_u8(table_r0, vget_high_u8(expand_mask))); in jsimd_h2v2_downsample_neon() 182 vtbl2_u8(table_r1, vget_high_u8(expand_mask))); in jsimd_h2v2_downsample_neon()
|
/aosp_15_r20/external/pytorch/aten/src/ATen/native/quantized/cpu/qnnpack/src/q8gemm/ |
H A D | 4x8c2-xzp-neon.c | 89 vmull_u8(va0x01234567, vget_high_u8(vb01234567x01)))); in pytorch_q8gemm_xzp_ukernel_4x8c2__neon() 96 vmull_u8(va1x01234567, vget_high_u8(vb01234567x01)))); in pytorch_q8gemm_xzp_ukernel_4x8c2__neon() 103 vmull_u8(va2x01234567, vget_high_u8(vb01234567x01)))); in pytorch_q8gemm_xzp_ukernel_4x8c2__neon() 110 vmull_u8(va3x01234567, vget_high_u8(vb01234567x01)))); in pytorch_q8gemm_xzp_ukernel_4x8c2__neon() 126 vmull_u8(va0x01234567, vget_high_u8(vb01234567x23)))); in pytorch_q8gemm_xzp_ukernel_4x8c2__neon() 133 vmull_u8(va1x01234567, vget_high_u8(vb01234567x23)))); in pytorch_q8gemm_xzp_ukernel_4x8c2__neon() 140 vmull_u8(va2x01234567, vget_high_u8(vb01234567x23)))); in pytorch_q8gemm_xzp_ukernel_4x8c2__neon() 147 vmull_u8(va3x01234567, vget_high_u8(vb01234567x23)))); in pytorch_q8gemm_xzp_ukernel_4x8c2__neon() 163 vmull_u8(va0x01234567, vget_high_u8(vb01234567x45)))); in pytorch_q8gemm_xzp_ukernel_4x8c2__neon() 170 vmull_u8(va1x01234567, vget_high_u8(vb01234567x45)))); in pytorch_q8gemm_xzp_ukernel_4x8c2__neon() [all …]
|
/aosp_15_r20/external/libaom/av1/encoder/arm/ |
H A D | temporal_filter_neon.c | 51 vmull_u8(vget_high_u8(abs_diff), vget_high_u8(abs_diff)); in get_squared_error() 338 vaddl_u8(vget_high_u8(mat[0][0]), vget_high_u8(mat[2][0])); in av1_estimate_noise_from_single_plane_neon() 342 vaddl_u8(vget_high_u8(mat[0][2]), vget_high_u8(mat[2][2])); in av1_estimate_noise_from_single_plane_neon() 346 gxa_hi, vaddl_u8(vget_high_u8(mat[1][0]), vget_high_u8(mat[1][0]))); in av1_estimate_noise_from_single_plane_neon() 350 gxb_hi, vaddl_u8(vget_high_u8(mat[1][2]), vget_high_u8(mat[1][2]))); in av1_estimate_noise_from_single_plane_neon() 355 vaddl_u8(vget_high_u8(mat[0][0]), vget_high_u8(mat[0][2])); in av1_estimate_noise_from_single_plane_neon() 359 vaddl_u8(vget_high_u8(mat[2][0]), vget_high_u8(mat[2][2])); in av1_estimate_noise_from_single_plane_neon() 363 gya_hi, vaddl_u8(vget_high_u8(mat[0][1]), vget_high_u8(mat[0][1]))); in av1_estimate_noise_from_single_plane_neon() 367 gyb_hi, vaddl_u8(vget_high_u8(mat[2][1]), vget_high_u8(mat[2][1]))); in av1_estimate_noise_from_single_plane_neon() 379 uint16x8_t center_hi = vshll_n_u8(vget_high_u8(mat[1][1]), 2); in av1_estimate_noise_from_single_plane_neon() [all …]
|
/aosp_15_r20/external/libgav1/src/dsp/arm/ |
H A D | intra_edge_neon.cc | 81 uint16x8_t sum_hi = vaddl_u8(vget_high_u8(src_0), vget_high_u8(src_2)); in IntraEdgeFilter_NEON() 83 sum_hi = vmlal_u8(sum_hi, vget_high_u8(src_1), krn1); in IntraEdgeFilter_NEON() 105 uint16x8_t sum_hi = vaddl_u8(vget_high_u8(src_0), vget_high_u8(src_2)); in IntraEdgeFilter_NEON() 107 sum_hi = vmlal_u8(sum_hi, vget_high_u8(src_1), krn1); in IntraEdgeFilter_NEON() 165 vshlq_n_u16(vaddl_u8(vget_high_u8(src_0), vget_high_u8(src_4)), 1); in IntraEdgeFilter_NEON() 167 vaddw_u8(vaddl_u8(vget_high_u8(src_1), vget_high_u8(src_2)), in IntraEdgeFilter_NEON() 168 vget_high_u8(src_3)); in IntraEdgeFilter_NEON() 195 vshlq_n_u16(vaddl_u8(vget_high_u8(src_0), vget_high_u8(src_4)), 1); in IntraEdgeFilter_NEON() 197 vaddw_u8(vaddl_u8(vget_high_u8(src_1), vget_high_u8(src_2)), in IntraEdgeFilter_NEON() 198 vget_high_u8(src_3)); in IntraEdgeFilter_NEON() [all …]
|
H A D | super_res_neon.cc | 73 uint16x8_t res = vmull_u8(src[1], vget_high_u8(f[0])); in SuperRes() 74 res = vmlal_u8(res, src[3], vget_high_u8(f[1])); in SuperRes() 79 temp = vmlal_u8(temp, src[5], vget_high_u8(f[2])); in SuperRes() 80 temp = vmlal_u8(temp, src[7], vget_high_u8(f[3])); in SuperRes() 135 sr[0] = vget_high_u8(s[0]); in SuperRes_NEON() 136 sr[1] = vget_high_u8(s[1]); in SuperRes_NEON() 137 sr[2] = vget_high_u8(s[2]); in SuperRes_NEON() 138 sr[3] = vget_high_u8(s[3]); in SuperRes_NEON() 139 sr[4] = vget_high_u8(s[4]); in SuperRes_NEON() 140 sr[5] = vget_high_u8(s[5]); in SuperRes_NEON() [all …]
|
/aosp_15_r20/external/libhevc/encoder/arm/ |
H A D | ihevce_sad_compute_neon.c | 65 abs = vabal_u8(abs, vget_high_u8(src_u8), vget_high_u8(ref_u8)); in ihevce_4x4_sad_computer_neon() 117 abs_1 = vabal_u8(abs_1, vget_high_u8(src), vget_high_u8(pred)); in ihevce_16xn_sad_computer_neon() 149 abs_0 = vabal_u8(abs_0, vget_high_u8(src_0), vget_high_u8(pred_0)); in ihevce_32xn_sad_computer_neon() 151 abs_1 = vabal_u8(abs_1, vget_high_u8(src_1), vget_high_u8(pred_1)); in ihevce_32xn_sad_computer_neon() 187 abs_0 = vabal_u8(abs_0, vget_high_u8(src_0), vget_high_u8(pred_0)); in ihevce_64xn_sad_computer_neon() 189 abs_0 = vabal_u8(abs_0, vget_high_u8(src_1), vget_high_u8(pred_1)); in ihevce_64xn_sad_computer_neon() 191 abs_1 = vabal_u8(abs_1, vget_high_u8(src_2), vget_high_u8(pred_2)); in ihevce_64xn_sad_computer_neon() 193 abs_1 = vabal_u8(abs_1, vget_high_u8(src_3), vget_high_u8(pred_3)); in ihevce_64xn_sad_computer_neon()
|
H A D | ihevce_ssd_calculator_neon.c | 76 sqabs_high = vmull_u8(vget_high_u8(abs), vget_high_u8(abs)); in ihevce_4x4_ssd_computer_neon() 128 sqabs_high = vmull_u8(vget_high_u8(abs), vget_high_u8(abs)); in ihevce_1x16_ssd_computer_neon() 160 sqabs_1 = vmull_u8(vget_high_u8(abs_0), vget_high_u8(abs_0)); in ihevce_1x32_ssd_computer_neon() 162 sqabs_3 = vmull_u8(vget_high_u8(abs_1), vget_high_u8(abs_1)); in ihevce_1x32_ssd_computer_neon() 212 sqabs_1 = vmull_u8(vget_high_u8(abs_0), vget_high_u8(abs_0)); in ihevce_1x64_ssd_computer_neon() 214 sqabs_3 = vmull_u8(vget_high_u8(abs_1), vget_high_u8(abs_1)); in ihevce_1x64_ssd_computer_neon() 216 sqabs_5 = vmull_u8(vget_high_u8(abs_2), vget_high_u8(abs_2)); in ihevce_1x64_ssd_computer_neon() 218 sqabs_7 = vmull_u8(vget_high_u8(abs_3), vget_high_u8(abs_3)); in ihevce_1x64_ssd_computer_neon()
|
H A D | ihevce_ssd_and_sad_calculator_neon.c | 70 const uint8x8_t abs_h = vabd_u8(vget_high_u8(src_u8), vget_high_u8(ref_u8)); in ihevce_ssd_and_sad_calculator_neon() 142 abs_h = vabdl_u8(vget_high_u8(src), vget_high_u8(pred)); in ihevce_ssd_and_sad_calculator_neon() 191 abs_h = vabd_u8(vget_high_u8(src_0), vget_high_u8(pred_0)); in ihevce_ssd_and_sad_calculator_neon() 202 abs_h = vabd_u8(vget_high_u8(src_1), vget_high_u8(pred_1)); in ihevce_ssd_and_sad_calculator_neon() 254 abs_h = vabd_u8(vget_high_u8(src_0), vget_high_u8(pred_0)); in ihevce_ssd_and_sad_calculator_neon() 265 abs_h = vabd_u8(vget_high_u8(src_1), vget_high_u8(pred_1)); in ihevce_ssd_and_sad_calculator_neon() 276 abs_h = vabd_u8(vget_high_u8(src_2), vget_high_u8(pred_2)); in ihevce_ssd_and_sad_calculator_neon() 287 abs_h = vabd_u8(vget_high_u8(src_3), vget_high_u8(pred_3)); in ihevce_ssd_and_sad_calculator_neon()
|
H A D | ihevce_common_utils_neon.c | 125 a3 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(a0))); in ihevce_wt_avg_2d_16x1_neon() 127 a5 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(a1))); in ihevce_wt_avg_2d_16x1_neon() 251 a1 = vmovl_u8(vget_high_u8(src0_u8)); in ihevce_wt_avg_2d_4xn_neon() 253 a3 = vmovl_u8(vget_high_u8(src1_u8)); in ihevce_wt_avg_2d_4xn_neon() 543 vsubl_u8(vget_high_u8(src_buf_8x16), vget_high_u8(recon_buf_8x16))); in ihevce_get_luma_eo_sao_params_neon() 559 vsubl_u8(vget_high_u8(recon_buf_8x16), vget_high_u8(recon_buf0_8x16))); in ihevce_get_luma_eo_sao_params_neon() 565 vsubl_u8(vget_high_u8(recon_buf_8x16), vget_high_u8(recon_buf1_8x16))); in ihevce_get_luma_eo_sao_params_neon() 728 vsubl_u8(vget_high_u8(src_buf_8x16), vget_high_u8(recon_buf_8x16))); in ihevce_get_luma_eo_sao_params_neon() 743 vsubl_u8(vget_high_u8(recon_buf_8x16), vget_high_u8(recon_buf0_8x16))); in ihevce_get_luma_eo_sao_params_neon() 749 vsubl_u8(vget_high_u8(recon_buf_8x16), vget_high_u8(recon_buf1_8x16))); in ihevce_get_luma_eo_sao_params_neon() [all …]
|
H A D | ihevce_scale_by_2_neon.c | 84 c = vext_u8(vget_low_u8(src.val[1]), vget_high_u8(src.val[1]), 1); in ihevce_horz_scale_neon_w16() 85 l0 = vext_u8(vget_low_u8(src.val[0]), vget_high_u8(src.val[0]), 1); in ihevce_horz_scale_neon_w16() 86 r0 = vext_u8(vget_low_u8(src.val[0]), vget_high_u8(src.val[0]), 2); in ihevce_horz_scale_neon_w16() 87 r3 = vext_u8(vget_low_u8(src.val[0]), vget_high_u8(src.val[0]), 3); in ihevce_horz_scale_neon_w16() 180 p = vreinterpretq_s16_u16(vmull_u8(vget_high_u8(src[c]), wt_0)); in ihevce_vert_scale_neon_w16() 182 q = vreinterpretq_s16_u16(vaddl_u8(vget_high_u8(src[t1]), vget_high_u8(src[b1]))); in ihevce_vert_scale_neon_w16() 185 r = vreinterpretq_s16_u16(vaddl_u8(vget_high_u8(src[t2]), vget_high_u8(src[b2]))); in ihevce_vert_scale_neon_w16()
|
/aosp_15_r20/external/libaom/aom_dsp/arm/ |
H A D | blend_neon.h | 24 uint16x8_t blend_u16_hi = vmull_u8(vget_high_u8(m), vget_high_u8(a)); in alpha_blend_a64_u8x16() 27 blend_u16_hi = vmlal_u8(blend_u16_hi, vget_high_u8(m_inv), vget_high_u8(b)); in alpha_blend_a64_u8x16() 94 uint8x8_t sum_pairwise_a = vpadd_u8(vget_low_u8(a), vget_high_u8(a)); in avg_blend_pairwise_u8x16() 95 uint8x8_t sum_pairwise_b = vpadd_u8(vget_low_u8(b), vget_high_u8(b)); in avg_blend_pairwise_u8x16() 115 uint8x8_t sum_pairwise_a = vpadd_u8(vget_low_u8(a), vget_high_u8(a)); in avg_blend_pairwise_u8x16_4() 116 uint8x8_t sum_pairwise_b = vpadd_u8(vget_low_u8(b), vget_high_u8(b)); in avg_blend_pairwise_u8x16_4() 117 uint8x8_t sum_pairwise_c = vpadd_u8(vget_low_u8(c), vget_high_u8(c)); in avg_blend_pairwise_u8x16_4() 118 uint8x8_t sum_pairwise_d = vpadd_u8(vget_low_u8(d), vget_high_u8(d)); in avg_blend_pairwise_u8x16_4()
|
H A D | avg_neon.c | 107 sum_hi[0] = vaddl_u8(vget_high_u8(r0), vget_high_u8(r1)); in aom_int_pro_row_neon() 109 sum_hi[1] = vaddl_u8(vget_high_u8(r2), vget_high_u8(r3)); in aom_int_pro_row_neon() 120 uint16x8_t tmp0_hi = vaddl_u8(vget_high_u8(r0), vget_high_u8(r1)); in aom_int_pro_row_neon() 122 uint16x8_t tmp1_hi = vaddl_u8(vget_high_u8(r2), vget_high_u8(r3)); in aom_int_pro_row_neon() 292 uint8x8_t ab_max = vmax_u8(vget_high_u8(ab07_max), vget_low_u8(ab07_max)); in aom_minmax_8x8_neon() 293 uint8x8_t ab_min = vmin_u8(vget_high_u8(ab07_min), vget_low_u8(ab07_min)); in aom_minmax_8x8_neon()
|
H A D | intrapred_neon.c | 903 h_store_16x8(dst + 8 * stride, stride, vget_high_u8(d0)); in aom_h_predictor_16x16_neon() 912 h_store_32x8(dst + 8 * stride, stride, vget_high_u8(d0)); in aom_h_predictor_32x32_neon() 914 h_store_32x8(dst + 24 * stride, stride, vget_high_u8(d1)); in aom_h_predictor_32x32_neon() 930 h_store_4x8(dst + 8 * stride, stride, vget_high_u8(d0)); in aom_h_predictor_4x16_neon() 949 h_store_8x8(dst + 8 * stride, stride, vget_high_u8(d0)); in aom_h_predictor_8x16_neon() 959 h_store_8x8(dst + 8 * stride, stride, vget_high_u8(d0)); in aom_h_predictor_8x32_neon() 961 h_store_8x8(dst + 24 * stride, stride, vget_high_u8(d1)); in aom_h_predictor_8x32_neon() 988 h_store_16x8(dst + 8 * stride, stride, vget_high_u8(d0)); in aom_h_predictor_16x32_neon() 990 h_store_16x8(dst + 24 * stride, stride, vget_high_u8(d1)); in aom_h_predictor_16x32_neon() 1002 h_store_16x8(dst + 8 * stride, stride, vget_high_u8(d0)); in aom_h_predictor_16x64_neon() [all …]
|
/aosp_15_r20/external/libvpx/vpx_dsp/arm/ |
H A D | avg_neon.c | 24 const uint16x8_t c = vaddl_u8(vget_low_u8(b), vget_high_u8(b)); in vpx_avg_4x4_neon() 89 sum_hi[0] = vaddl_u8(vget_high_u8(r0), vget_high_u8(r1)); in vpx_int_pro_row_neon() 91 sum_hi[1] = vaddl_u8(vget_high_u8(r2), vget_high_u8(r3)); in vpx_int_pro_row_neon() 102 tmp_hi[0] = vaddl_u8(vget_high_u8(r0), vget_high_u8(r1)); in vpx_int_pro_row_neon() 104 tmp_hi[1] = vaddl_u8(vget_high_u8(r2), vget_high_u8(r3)); in vpx_int_pro_row_neon() 220 uint8x8_t ab_max = vmax_u8(vget_high_u8(ab07_max), vget_low_u8(ab07_max)); in vpx_minmax_8x8_neon() 221 uint8x8_t ab_min = vmin_u8(vget_high_u8(ab07_min), vget_low_u8(ab07_min)); in vpx_minmax_8x8_neon()
|
H A D | loopfilter_neon.c | 188 *sum1 = vsubw_u8(*sum1, vget_high_u8(sub0)); in filter_update_16() 190 *sum1 = vsubw_u8(*sum1, vget_high_u8(sub1)); in filter_update_16() 192 *sum1 = vaddw_u8(*sum1, vget_high_u8(add0)); in filter_update_16() 194 *sum1 = vaddw_u8(*sum1, vget_high_u8(add1)); in filter_update_16() 262 sum1 = vaddl_u8(vget_high_u8(p3), vget_high_u8(p3)); // 2*p3 in calc_7_tap_filter_16() 264 sum1 = vaddw_u8(sum1, vget_high_u8(p3)); // 3*p3 in calc_7_tap_filter_16() 266 sum1 = vaddw_u8(sum1, vget_high_u8(p2)); // 3*p3+p2 in calc_7_tap_filter_16() 268 sum1 = vaddw_u8(sum1, vget_high_u8(p2)); // 3*p3+2*p2 in calc_7_tap_filter_16() 270 sum1 = vaddw_u8(sum1, vget_high_u8(p1)); // 3*p3+2*p2+p1 in calc_7_tap_filter_16() 272 sum1 = vaddw_u8(sum1, vget_high_u8(p0)); // 3*p3+2*p2+p1+p0 in calc_7_tap_filter_16() [all …]
|
H A D | subtract_neon.c | 33 const uint16x8_t d1 = vsubl_u8(vget_high_u8(s0), vget_high_u8(p0)); in vpx_subtract_block_neon() 35 const uint16x8_t d3 = vsubl_u8(vget_high_u8(s1), vget_high_u8(p1)); in vpx_subtract_block_neon() 50 const uint16x8_t d1 = vsubl_u8(vget_high_u8(s), vget_high_u8(p)); in vpx_subtract_block_neon()
|
/aosp_15_r20/external/XNNPACK/src/x8-transposec/gen/ |
H A D | 16x16-reuse-switch-zip-neon.c | 300 v0_low = vget_high_u8(v0_0.val[0]); in xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon() 301 v1_low = vget_high_u8(v0_0.val[1]); in xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon() 302 v2_low = vget_high_u8(v0_1.val[0]); in xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon() 303 v3_low = vget_high_u8(v0_1.val[1]); in xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon() 304 v4_low = vget_high_u8(v0_2.val[0]); in xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon() 305 v5_low = vget_high_u8(v0_2.val[1]); in xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon() 306 v6_low = vget_high_u8(v0_3.val[0]); in xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon() 307 v7_low = vget_high_u8(v0_3.val[1]); in xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon() 308 v8_low = vget_high_u8(v0_4.val[0]); in xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon() 309 v9_low = vget_high_u8(v0_4.val[1]); in xnn_x8_transposec_ukernel__16x16_reuse_switch_zip_neon() [all …]
|
H A D | 16x16-reuse-dec-zip-neon.c | 350 v0_low = vget_high_u8(v0_0.val[0]); in xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon() 351 v1_low = vget_high_u8(v0_0.val[1]); in xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon() 352 v2_low = vget_high_u8(v0_1.val[0]); in xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon() 353 v3_low = vget_high_u8(v0_1.val[1]); in xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon() 354 v4_low = vget_high_u8(v0_2.val[0]); in xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon() 355 v5_low = vget_high_u8(v0_2.val[1]); in xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon() 356 v6_low = vget_high_u8(v0_3.val[0]); in xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon() 357 v7_low = vget_high_u8(v0_3.val[1]); in xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon() 358 v8_low = vget_high_u8(v0_4.val[0]); in xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon() 359 v9_low = vget_high_u8(v0_4.val[1]); in xnn_x8_transposec_ukernel__16x16_reuse_dec_zip_neon() [all …]
|
H A D | 16x16-reuse-mov-zip-neon.c | 380 v0_low = vget_high_u8(v0_0.val[0]); in xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon() 381 v1_low = vget_high_u8(v0_0.val[1]); in xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon() 382 v2_low = vget_high_u8(v0_1.val[0]); in xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon() 383 v3_low = vget_high_u8(v0_1.val[1]); in xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon() 384 v4_low = vget_high_u8(v0_2.val[0]); in xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon() 385 v5_low = vget_high_u8(v0_2.val[1]); in xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon() 386 v6_low = vget_high_u8(v0_3.val[0]); in xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon() 387 v7_low = vget_high_u8(v0_3.val[1]); in xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon() 388 v8_low = vget_high_u8(v0_4.val[0]); in xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon() 389 v9_low = vget_high_u8(v0_4.val[1]); in xnn_x8_transposec_ukernel__16x16_reuse_mov_zip_neon() [all …]
|
/aosp_15_r20/external/ComputeLibrary/src/cpu/kernels/ |
H A D | CpuGemmLowpMatrixMultiplyKernel.cpp | 99 vget_low_u16(vmovl_u8(vget_high_u8(b00_u8))), in vector_matrix_multiply_u8() 100 vget_high_u16(vmovl_u8(vget_high_u8(b00_u8))) in vector_matrix_multiply_u8() 109 vget_low_u16(vmovl_u8(vget_high_u8(b10_u8))), in vector_matrix_multiply_u8() 110 vget_high_u16(vmovl_u8(vget_high_u8(b10_u8))) in vector_matrix_multiply_u8() 119 vget_low_u16(vmovl_u8(vget_high_u8(b20_u8))), in vector_matrix_multiply_u8() 120 vget_high_u16(vmovl_u8(vget_high_u8(b20_u8))) in vector_matrix_multiply_u8() 129 vget_low_u16(vmovl_u8(vget_high_u8(b30_u8))), in vector_matrix_multiply_u8() 130 vget_high_u16(vmovl_u8(vget_high_u8(b30_u8))) in vector_matrix_multiply_u8() 139 vget_low_u16(vmovl_u8(vget_high_u8(b40_u8))), in vector_matrix_multiply_u8() 140 vget_high_u16(vmovl_u8(vget_high_u8(b40_u8))) in vector_matrix_multiply_u8() [all …]
|
/aosp_15_r20/external/libavc/decoder/arm/svc/ |
H A D | isvcd_residual_resamp_neon.c | 805 u1_incr_8x8x2_t.val[1] = vget_high_u8(u1_incr_8x16_r0_0); in isvcd_interpolate_residual_neonintr() 807 u1_incr_8x8_t1 = vtbl2_u8(u1_incr_8x8x2_t, vget_high_u8(x_ref_pos_mask_r0_0)); in isvcd_interpolate_residual_neonintr() 811 u1_incr_8x8x2_t.val[1] = vget_high_u8(u1_incr_8x16_r1_0); in isvcd_interpolate_residual_neonintr() 813 u1_incr_8x8_t1 = vtbl2_u8(u1_incr_8x8x2_t, vget_high_u8(x_ref_pos_mask_r0_0)); in isvcd_interpolate_residual_neonintr() 866 u1_temp_8x8_t1 = vtbl2_u8(u1_temp_8x8x2_t, vget_high_u8(u1_incr_not_8x16_r0_0)); in isvcd_interpolate_residual_neonintr() 875 u1_temp_8x8_t1 = vtbl2_u8(u1_temp_8x8x2_t, vget_high_u8(u1_incr_not_8x16_r1_0)); in isvcd_interpolate_residual_neonintr() 886 vtbl2_u8(u1_temp_8x8x2_t, vget_high_u8(x_ref_pos_mask_temp_r0_0)); in isvcd_interpolate_residual_neonintr() 897 vtbl2_u8(u1_temp_8x8x2_t, vget_high_u8(x_ref_pos_mask_temp_r1_0)); in isvcd_interpolate_residual_neonintr() 906 u1_temp_8x8_t1 = vtbl2_u8(u1_temp_8x8x2_t, vget_high_u8(u1_incr_not_8x16_r0_1)); in isvcd_interpolate_residual_neonintr() 915 u1_temp_8x8_t1 = vtbl2_u8(u1_temp_8x8x2_t, vget_high_u8(u1_incr_not_8x16_r1_1)); in isvcd_interpolate_residual_neonintr() [all …]
|