/aosp_15_r20/external/XNNPACK/src/qs8-vmulc/gen/ |
H A D | minmax-fp32-sse41-mul16-ld64-x8.c | 42 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8() local 45 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8() 46 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8() 76 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8() local 79 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8() 80 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8()
|
H A D | minmax-fp32-sse2-mul16-ld64-x8.c | 43 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8() local 46 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8() 47 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8() 78 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8() local 81 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8() 82 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8()
|
H A D | minmax-fp32-avx-mul16-ld64-x8.c | 42 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb); in xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8() local 45 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8() 46 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8() 76 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb); in xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8() local 79 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8() 80 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8()
|
H A D | minmax-fp32-avx-mul16-ld64-x16.c | 44 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb); in xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16() local 49 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16() 50 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16() 90 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb); in xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16() local 93 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16() 94 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16()
|
H A D | minmax-fp32-sse41-mul16-ld64-x16.c | 44 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16() local 49 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16() 50 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16() 90 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16() local 93 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16() 94 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16()
|
H A D | minmax-fp32-sse2-mul16-ld64-x16.c | 46 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16() local 51 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16() 52 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16() 95 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16() local 98 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16() 99 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16()
|
/aosp_15_r20/external/XNNPACK/src/qu8-vmulc/gen/ |
H A D | minmax-fp32-sse41-mul16-ld64-x8.c | 42 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8() local 45 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8() 46 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8() 76 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8() local 79 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8() 80 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x8()
|
H A D | minmax-fp32-avx-mul16-ld64-x8.c | 42 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb); in xnn_qu8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8() local 45 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8() 46 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8() 76 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb); in xnn_qu8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8() local 79 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8() 80 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x8()
|
H A D | minmax-fp32-sse2-mul16-ld64-x8.c | 44 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8() local 47 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8() 48 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8() 80 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8() local 83 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8() 84 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x8()
|
H A D | minmax-fp32-avx-mul16-ld64-x16.c | 44 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb); in xnn_qu8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16() local 49 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16() 50 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16() 90 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb); in xnn_qu8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16() local 93 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16() 94 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmulc_minmax_fp32_ukernel__avx_mul16_ld64_x16()
|
H A D | minmax-fp32-sse2-mul16-ld64-x16.c | 47 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16() local 52 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16() 53 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16() 95 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16() local 98 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16() 99 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse2_mul16_ld64_x16()
|
H A D | minmax-fp32-sse41-mul16-ld64-x16.c | 44 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16() local 49 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16() 50 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16() 90 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16() local 93 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16() 94 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16()
|
/aosp_15_r20/external/XNNPACK/src/qs8-vmul/gen/ |
H A D | minmax-fp32-sse41-mul16-ld64-x8.c | 43 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb01234567); in xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8() local 46 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8() 47 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8() 79 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb01234567); in xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8() local 82 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8() 83 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8()
|
H A D | minmax-fp32-avx-mul16-ld64-x8.c | 43 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb01234567); in xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8() local 46 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8() 47 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8() 79 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb01234567); in xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8() local 82 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8() 83 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8()
|
H A D | minmax-fp32-sse2-mul16-ld64-x8.c | 45 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb01234567); in xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8() local 48 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8() 49 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8() 83 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb01234567); in xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8() local 86 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8() 87 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8()
|
H A D | minmax-fp32-avx-mul16-ld64-x16.c | 47 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb01234567); in xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16() local 52 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16() 53 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16() 96 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb01234567); in xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16() local 99 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16() 100 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16()
|
H A D | minmax-fp32-sse41-mul16-ld64-x16.c | 47 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb01234567); in xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16() local 52 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16() 53 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16() 96 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb01234567); in xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16() local 99 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16() 100 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16()
|
H A D | minmax-fp32-sse2-mul16-ld64-x16.c | 51 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb01234567); in xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16() local 56 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16() 57 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16() 104 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb01234567); in xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16() local 107 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16() 108 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16()
|
/aosp_15_r20/external/XNNPACK/src/qu8-vmul/gen/ |
H A D | minmax-fp32-sse41-mul16-ld64-x8.c | 43 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb01234567); in xnn_qu8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8() local 46 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8() 47 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8() 79 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb01234567); in xnn_qu8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8() local 82 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8() 83 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x8()
|
H A D | minmax-fp32-avx-mul16-ld64-x8.c | 43 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb01234567); in xnn_qu8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8() local 46 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8() 47 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8() 79 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb01234567); in xnn_qu8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8() local 82 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8() 83 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x8()
|
H A D | minmax-fp32-sse2-mul16-ld64-x8.c | 46 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb01234567); in xnn_qu8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8() local 49 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8() 50 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8() 85 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb01234567); in xnn_qu8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8() local 88 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8() 89 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x8()
|
H A D | minmax-fp32-sse41-mul16-ld64-x16.c | 47 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb01234567); in xnn_qu8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16() local 52 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16() 53 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16() 96 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb01234567); in xnn_qu8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16() local 99 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16() 100 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16()
|
H A D | minmax-fp32-avx-mul16-ld64-x16.c | 47 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb01234567); in xnn_qu8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16() local 52 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16() 53 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16() 96 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb01234567); in xnn_qu8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16() local 99 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16() 100 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmul_minmax_fp32_ukernel__avx_mul16_ld64_x16()
|
H A D | minmax-fp32-sse2-mul16-ld64-x16.c | 52 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb01234567); in xnn_qu8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16() local 57 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16() 58 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16() 104 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb01234567); in xnn_qu8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16() local 107 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16() 108 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qu8_vmul_minmax_fp32_ukernel__sse2_mul16_ld64_x16()
|
/aosp_15_r20/external/XNNPACK/src/amalgam/ |
H A D | sse41.c | 5476 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb01234567); in xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16() local 5481 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16() 5482 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16() 5525 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb01234567); in xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16() local 5528 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16() 5529 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmul_minmax_fp32_ukernel__sse41_mul16_ld64_x16() 5596 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16() local 5601 const __m128i vprod0123 = _mm_unpacklo_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16() 5602 const __m128i vprod4567 = _mm_unpackhi_epi16(vprod01234567lo, vprod01234567hi); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16() 5642 const __m128i vprod01234567lo = _mm_mullo_epi16(vxa01234567, vxb); in xnn_qs8_vmulc_minmax_fp32_ukernel__sse41_mul16_ld64_x16() local [all …]
|