Lines Matching +full:0 +full:- +full:9

1 /* SPDX-License-Identifier: GPL-2.0-or-later */
5 # Copyright 2023- IBM Corp. All rights reserved
10 # Poly1305 - this version mainly using vector/VSX/Scalar
11 # - 26 bits limbs
12 # - Handle multiple 64 byte blcok.
16 # clamp r &= 0x0FFFFFFC0FFFFFFC 0x0FFFFFFC0FFFFFFF
17 # p = 2^130 - 5
25 # 07/22/21 - this revison based on the above sum of products. Setup r^4, r^3, r^2, r and s3, s2, …
26 # to 9 vectors for multiplications.
56 #include <asm/asm-offsets.h>
57 #include <asm/asm-compat.h>
93 mflr 0
94 std 0, 16(1)
95 stdu 1,-752(1)
116 addi 9, 1, 256
117 SAVE_VRS 20, 0, 9
118 SAVE_VRS 21, 16, 9
119 SAVE_VRS 22, 32, 9
120 SAVE_VRS 23, 48, 9
121 SAVE_VRS 24, 64, 9
122 SAVE_VRS 25, 80, 9
123 SAVE_VRS 26, 96, 9
124 SAVE_VRS 27, 112, 9
125 SAVE_VRS 28, 128, 9
126 SAVE_VRS 29, 144, 9
127 SAVE_VRS 30, 160, 9
128 SAVE_VRS 31, 176, 9
130 SAVE_VSX 14, 192, 9
131 SAVE_VSX 15, 208, 9
132 SAVE_VSX 16, 224, 9
133 SAVE_VSX 17, 240, 9
134 SAVE_VSX 18, 256, 9
135 SAVE_VSX 19, 272, 9
136 SAVE_VSX 20, 288, 9
137 SAVE_VSX 21, 304, 9
138 SAVE_VSX 22, 320, 9
139 SAVE_VSX 23, 336, 9
140 SAVE_VSX 24, 352, 9
141 SAVE_VSX 25, 368, 9
142 SAVE_VSX 26, 384, 9
143 SAVE_VSX 27, 400, 9
144 SAVE_VSX 28, 416, 9
145 SAVE_VSX 29, 432, 9
146 SAVE_VSX 30, 448, 9
147 SAVE_VSX 31, 464, 9
151 addi 9, 1, 256
152 RESTORE_VRS 20, 0, 9
153 RESTORE_VRS 21, 16, 9
154 RESTORE_VRS 22, 32, 9
155 RESTORE_VRS 23, 48, 9
156 RESTORE_VRS 24, 64, 9
157 RESTORE_VRS 25, 80, 9
158 RESTORE_VRS 26, 96, 9
159 RESTORE_VRS 27, 112, 9
160 RESTORE_VRS 28, 128, 9
161 RESTORE_VRS 29, 144, 9
162 RESTORE_VRS 30, 160, 9
163 RESTORE_VRS 31, 176, 9
165 RESTORE_VSX 14, 192, 9
166 RESTORE_VSX 15, 208, 9
167 RESTORE_VSX 16, 224, 9
168 RESTORE_VSX 17, 240, 9
169 RESTORE_VSX 18, 256, 9
170 RESTORE_VSX 19, 272, 9
171 RESTORE_VSX 20, 288, 9
172 RESTORE_VSX 21, 304, 9
173 RESTORE_VSX 22, 320, 9
174 RESTORE_VSX 23, 336, 9
175 RESTORE_VSX 24, 352, 9
176 RESTORE_VSX 25, 368, 9
177 RESTORE_VSX 26, 384, 9
178 RESTORE_VSX 27, 400, 9
179 RESTORE_VSX 28, 416, 9
180 RESTORE_VSX 29, 432, 9
181 RESTORE_VSX 30, 448, 9
182 RESTORE_VSX 31, 464, 9
204 ld 0, 16(1)
205 mtlr 0
209 # p[0] = a0*r0 + a1*r4*5 + a2*r3*5 + a3*r2*5 + a4*r1*5;
224 vmulouw 13, 8, 0
268 vmuleuw 9, 4, 26
272 vmuleuw 13, 8, 0
273 vaddudm 14, 14, 9
279 vmuleuw 9, 4, 27
284 vaddudm 15, 15, 9
290 vmuleuw 9, 4, 28
295 vaddudm 16, 16, 9
301 vmuleuw 9, 4, 29
306 vaddudm 17, 17, 9
312 vmuleuw 9, 4, 30
317 vaddudm 18, 18, 9
364 xxpermdi 58, 58, 36, 0x3 # r0
365 xxpermdi 59, 59, 37, 0x3 # r1
366 xxpermdi 60, 60, 38, 0x3 # r2
367 xxpermdi 61, 61, 39, 0x3 # r3
368 xxpermdi 62, 62, 40, 0x3 # r4
369 xxpermdi 36, 36, 36, 0x3
370 xxpermdi 37, 37, 37, 0x3
371 xxpermdi 38, 38, 38, 0x3
372 xxpermdi 39, 39, 39, 0x3
373 xxpermdi 40, 40, 40, 0x3
375 vsld 9, 27, 13
379 vaddudm 0, 9, 27
391 vsld 9, 27, 13
395 vaddudm 0, 9, 27
401 xxlor 0, 58, 58
411 vspltw 9, 26, 3
413 vmrgow 26, 10, 9
414 vspltw 9, 27, 3
416 vmrgow 27, 10, 9
417 vspltw 9, 28, 3
419 vmrgow 28, 10, 9
420 vspltw 9, 29, 3
422 vmrgow 29, 10, 9
423 vspltw 9, 30, 3
425 vmrgow 30, 10, 9
427 vsld 9, 27, 13
431 vaddudm 0, 9, 27
442 vspltisb 9, 2
455 vsld 10, 12, 9
479 ld 11, 0(10)
486 lvx 25, 0, 10 # v25 - mask
494 ld 9, 24(3)
496 and. 9, 9, 11
500 extrdi 14, 9, 26, 38
501 extrdi 15, 9, 26, 12
502 extrdi 16, 9, 12, 0
503 mtvsrdd 58, 0, 14
505 mtvsrdd 59, 0, 15
507 mtvsrdd 60, 0, 16
508 extrdi 18, 10, 24, 0
509 mtvsrdd 61, 0, 17
510 mtvsrdd 62, 0, 18
513 li 9, 5
514 mtvsrdd 36, 0, 9
515 vmulouw 0, 27, 4 # v0 = rr0
537 li 21, 0 # counter to message
543 ld 9, 0(3)
547 mtvsrdd 41, 0, 19
548 extrdi 14, 9, 26, 38
549 extrdi 15, 9, 26, 12
550 extrdi 16, 9, 12, 0
551 mtvsrdd 36, 0, 14
553 mtvsrdd 37, 0, 15
555 mtvsrdd 38, 0, 16
556 extrdi 18, 10, 24, 0
557 mtvsrdd 39, 0, 17
558 mtvsrdd 40, 0, 18
559 vor 8, 8, 9
565 lxvw4x 43, 0, 20
567 lxvw4x 44, 0, 17
570 vand 9, 14, 25 # a0
584 vaddudm 20, 4, 9
592 lxvw4x 43, 0, 17
594 lxvw4x 44, 0, 17
597 vand 9, 14, 25 # a0
612 vmrgow 4, 9, 20
619 addi 5, 5, -64 # len -= 64
622 li 9, 64
623 divdu 31, 5, 9
625 cmpdi 31, 0
633 # h3 = (h1 + m3) * r^2, h4 = (h2 + m4) * r^2 --> (h0 + m1) r*4 + (h3 + m3) r^2, (h0 + m2) r^4 + (h…
635 # h5 = (h3 + m5) * r^2, h6 = (h4 + m6) * r^2 -->
636 # h7 = (h5 + m7) * r^2, h8 = (h6 + m8) * r^1 --> m5 * r^4 + m6 * r^3 + m7 * r^2 + m8 * r
644 vspltisb 9, 2
657 vsld 10, 12, 9
676 lxvw4x 43, 0, 20
678 lxvw4x 44, 0, 17
682 lxvw4x 43, 0, 17
684 lxvw4x 44, 0, 17
689 vand 9, 17, 25 # a0
722 vmrgow 4, 9, 4
729 addi 5, 5, -64 # len -= 64
735 xxlor 58, 0, 0
750 xxpermdi 41, 31, 46, 0
751 xxpermdi 42, 31, 47, 0
752 vaddudm 4, 14, 9
756 xxpermdi 43, 31, 48, 0
759 xxpermdi 44, 31, 49, 0
762 xxpermdi 45, 31, 50, 0
767 vspltisb 9, 2
780 vsld 10, 12, 9
821 std 17, 0(3)
826 li 3, 0
833 li 3, 0
842 # mask 0x0FFFFFFC0FFFFFFC
843 # mask 0x0FFFFFFC0FFFFFFF
846 ld 11, 0(10)
851 ld 9, 24(3)
853 and. 9, 9, 11 # cramp mask r0
857 add 19, 21, 10 # s1: r19 - (r1 >> 2) *5
860 li 25, 0
861 mtvsrdd 32+0, 9, 19 # r0, s1
862 mtvsrdd 32+1, 10, 9 # r1, r0
864 mtvsrdd 32+3, 9, 25 # r0
878 vmsumudm 7, 6, 0, 9 # h0 * r0, h1 * s1
881 vmsumudm 11, 6, 1, 9 # h0 * r1, h1 * r0
885 vmsumudm 11, 8, 3, 9 # d2 = h2 * r0
905 srdi 22, 29, 0x2
906 sldi 23, 22, 0x2
910 andi. 29, 29, 0x3 # h2
923 # - no highbit if final leftover block (highbit = 0)
926 cmpdi 5, 0
929 mflr 0
930 std 0, 16(1)
931 stdu 1,-400(1)
955 li 25, 0 # offset to inp and outp
961 ld 27, 0(3)
973 vxor 9, 9, 9
975 ld 20, 0(11)
983 li 22, 0
993 std 27, 0(3)
997 li 3, 0
1019 ld 0, 16(1)
1020 mtlr 0
1025 li 3, 0
1034 ld 10, 0(3)
1039 # h + 5 + (-p)
1046 srdi 9, 8, 2 # overflow?
1047 cmpdi 9, 0
1054 ld 6, 0(4)
1060 std 10, 0(5)
1068 .byte 0xff, 0xff, 0xff, 0x0f, 0xfc, 0xff, 0xff, 0x0f, 0xfc, 0xff, 0xff, 0x0f, 0xfc, 0xff, 0xff, 0x0f
1070 .long 0x03ffffff, 0x00000000, 0x03ffffff, 0x00000000
1071 .long 0x1a, 0x00, 0x1a, 0x00
1072 .long 0x01000000, 0x01000000, 0x01000000, 0x01000000
1073 .long 0x00010203, 0x04050607, 0x10111213, 0x14151617
1074 .long 0x08090a0b, 0x0c0d0e0f, 0x18191a1b, 0x1c1d1e1f