1/* 2Copyright (c) 2014, Intel Corporation 3All rights reserved. 4 5Redistribution and use in source and binary forms, with or without 6modification, are permitted provided that the following conditions are met: 7 8 * Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 11 * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 15 * Neither the name of Intel Corporation nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29*/ 30 31#include <private/bionic_asm.h> 32 33#define FOR_SILVERMONT 34 35#ifndef L 36# define L(label) .L##label 37#endif 38 39#ifndef ALIGN 40# define ALIGN(n) .p2align n 41#endif 42 43#define CFI_PUSH(REG) \ 44 .cfi_adjust_cfa_offset 4; \ 45 .cfi_rel_offset REG, 0 46 47#define CFI_POP(REG) \ 48 .cfi_adjust_cfa_offset -4; \ 49 .cfi_restore REG 50 51#define PUSH(REG) pushl REG; CFI_PUSH(REG) 52#define POP(REG) popl REG; CFI_POP(REG) 53 54#define PARMS 8 /* Preserve EBX. */ 55#define DST PARMS 56#define CHR (DST+4) 57#define LEN (CHR+4) 58#define CHK_DST_LEN (LEN+4) 59#define SETRTNVAL movl DST(%esp), %eax 60 61# define ENTRANCE PUSH(%ebx); 62# define RETURN_END POP(%ebx); ret 63# define RETURN RETURN_END; CFI_PUSH(%ebx) 64# define JMPTBL(I, B) I - B 65 66#define SETUP_PIC_REG(x) call __x86.get_pc_thunk.x 67 68/* Load an entry in a jump table into EBX and branch to it. TABLE is a 69 jump table with relative offsets. */ 70# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ 71 /* We first load PC into EBX. */ \ 72 call __x86.get_pc_thunk.bx; \ 73 /* Get the address of the jump table. */ \ 74 add $(TABLE - .), %ebx; \ 75 /* Get the entry and convert the relative offset to the \ 76 absolute address. */ \ 77 add (%ebx,%ecx,4), %ebx; \ 78 add %ecx, %edx; \ 79 /* We loaded the jump table and adjusted EDX. Go. */ \ 80 jmp *%ebx 81 82ENTRY(__memset_chk) 83 ENTRANCE 84 85 movl LEN(%esp), %ecx 86 cmpl CHK_DST_LEN(%esp), %ecx 87 jna L(memset_length_loaded) 88 89 POP(%ebx) // Undo ENTRANCE without returning. 90 jmp __memset_chk_fail 91END(__memset_chk) 92 93 .section .text.sse2,"ax",@progbits 94 ALIGN(4) 95ENTRY(memset) 96 ENTRANCE 97 98 movl LEN(%esp), %ecx 99L(memset_length_loaded): 100 cmp $0, %ecx 101 ja L(1byteormore) 102 SETRTNVAL 103 RETURN 104 105L(1byteormore): 106 movzbl CHR(%esp), %eax 107 movb %al, %ah 108 /* Fill the whole EAX with pattern. */ 109 movl %eax, %edx 110 shl $16, %eax 111 or %edx, %eax 112 movl DST(%esp), %edx 113 cmp $1, %ecx 114 je L(1byte) 115 cmp $16, %ecx 116 jae L(16bytesormore) 117 118 cmp $4, %ecx 119 jb L(4bytesless) 120 movl %eax, (%edx) 121 movl %eax, -4(%edx, %ecx) 122 cmp $8, %ecx 123 jb L(8bytesless) 124 movl %eax, 4(%edx) 125 movl %eax, -8(%edx, %ecx) 126L(8bytesless): 127 SETRTNVAL 128 RETURN 129 130L(4bytesless): 131 movw %ax, (%edx) 132 movw %ax, -2(%edx, %ecx) 133 SETRTNVAL 134 RETURN 135 136L(1byte): 137 movb %al, (%edx) 138 SETRTNVAL 139 RETURN 140 141 ALIGN(4) 142L(16bytesormore): 143 movd %eax, %xmm0 144 pshufd $0, %xmm0, %xmm0 145 146 cmp $64, %ecx 147 ja L(64bytesmore) 148 movdqu %xmm0, (%edx) 149 movdqu %xmm0, -16(%edx, %ecx) 150 cmp $32, %ecx 151 jbe L(32bytesless) 152 movdqu %xmm0, 16(%edx) 153 movdqu %xmm0, -32(%edx, %ecx) 154L(32bytesless): 155 SETRTNVAL 156 RETURN 157 158L(64bytesmore): 159 testl $0xf, %edx 160 jz L(aligned_16) 161L(not_aligned_16): 162 movdqu %xmm0, (%edx) 163 movl %edx, %eax 164 and $-16, %edx 165 add $16, %edx 166 sub %edx, %eax 167 add %eax, %ecx 168 movd %xmm0, %eax 169 170 ALIGN(4) 171L(aligned_16): 172 cmp $128, %ecx 173 jae L(128bytesormore) 174 175L(aligned_16_less128bytes): 176 BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes)) 177 178 ALIGN(4) 179L(128bytesormore): 180 PUSH(%ebx) 181 SETUP_PIC_REG(bx) 182 add $_GLOBAL_OFFSET_TABLE_, %ebx 183 mov __x86_shared_cache_size@GOTOFF(%ebx), %ebx 184 cmp %ebx, %ecx 185 jae L(128bytesormore_nt_start) 186 187 POP(%ebx) 188 189 PUSH(%ebx) 190 SETUP_PIC_REG(bx) 191 add $_GLOBAL_OFFSET_TABLE_, %ebx 192 mov __x86_data_cache_size@GOTOFF(%ebx), %ebx 193 194 cmp %ebx, %ecx 195 jae L(128bytes_L2_normal) 196 subl $128, %ecx 197L(128bytesormore_normal): 198 sub $128, %ecx 199 movdqa %xmm0, (%edx) 200 movaps %xmm0, 0x10(%edx) 201 movaps %xmm0, 0x20(%edx) 202 movaps %xmm0, 0x30(%edx) 203 movaps %xmm0, 0x40(%edx) 204 movaps %xmm0, 0x50(%edx) 205 movaps %xmm0, 0x60(%edx) 206 movaps %xmm0, 0x70(%edx) 207 lea 128(%edx), %edx 208 jb L(128bytesless_normal) 209 210 211 sub $128, %ecx 212 movdqa %xmm0, (%edx) 213 movaps %xmm0, 0x10(%edx) 214 movaps %xmm0, 0x20(%edx) 215 movaps %xmm0, 0x30(%edx) 216 movaps %xmm0, 0x40(%edx) 217 movaps %xmm0, 0x50(%edx) 218 movaps %xmm0, 0x60(%edx) 219 movaps %xmm0, 0x70(%edx) 220 lea 128(%edx), %edx 221 jae L(128bytesormore_normal) 222 223L(128bytesless_normal): 224 lea 128(%ecx), %ecx 225 POP(%ebx) 226 BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes)) 227 228 ALIGN(4) 229L(128bytes_L2_normal): 230 prefetchnta 0x380(%edx) 231 prefetchnta 0x3c0(%edx) 232 sub $128, %ecx 233 movdqa %xmm0, (%edx) 234 movaps %xmm0, 0x10(%edx) 235 movaps %xmm0, 0x20(%edx) 236 movaps %xmm0, 0x30(%edx) 237 movaps %xmm0, 0x40(%edx) 238 movaps %xmm0, 0x50(%edx) 239 movaps %xmm0, 0x60(%edx) 240 movaps %xmm0, 0x70(%edx) 241 add $128, %edx 242 cmp $128, %ecx 243 jae L(128bytes_L2_normal) 244 245L(128bytesless_L2_normal): 246 POP(%ebx) 247 BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes)) 248 249L(128bytesormore_nt_start): 250 sub %ebx, %ecx 251 ALIGN(4) 252L(128bytesormore_shared_cache_loop): 253 prefetchnta 0x3c0(%edx) 254 prefetchnta 0x380(%edx) 255 sub $0x80, %ebx 256 movdqa %xmm0, (%edx) 257 movaps %xmm0, 0x10(%edx) 258 movaps %xmm0, 0x20(%edx) 259 movaps %xmm0, 0x30(%edx) 260 movaps %xmm0, 0x40(%edx) 261 movaps %xmm0, 0x50(%edx) 262 movaps %xmm0, 0x60(%edx) 263 movaps %xmm0, 0x70(%edx) 264 add $0x80, %edx 265 cmp $0x80, %ebx 266 jae L(128bytesormore_shared_cache_loop) 267 cmp $0x80, %ecx 268 jb L(shared_cache_loop_end) 269 ALIGN(4) 270L(128bytesormore_nt): 271 sub $0x80, %ecx 272 movntdq %xmm0, (%edx) 273 movntdq %xmm0, 0x10(%edx) 274 movntdq %xmm0, 0x20(%edx) 275 movntdq %xmm0, 0x30(%edx) 276 movntdq %xmm0, 0x40(%edx) 277 movntdq %xmm0, 0x50(%edx) 278 movntdq %xmm0, 0x60(%edx) 279 movntdq %xmm0, 0x70(%edx) 280 add $0x80, %edx 281 cmp $0x80, %ecx 282 jae L(128bytesormore_nt) 283 sfence 284L(shared_cache_loop_end): 285 POP(%ebx) 286 BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes)) 287 288 289 .pushsection .rodata.sse2,"a",@progbits 290 ALIGN(2) 291L(table_16_128bytes): 292 .int JMPTBL(L(aligned_16_0bytes), L(table_16_128bytes)) 293 .int JMPTBL(L(aligned_16_1bytes), L(table_16_128bytes)) 294 .int JMPTBL(L(aligned_16_2bytes), L(table_16_128bytes)) 295 .int JMPTBL(L(aligned_16_3bytes), L(table_16_128bytes)) 296 .int JMPTBL(L(aligned_16_4bytes), L(table_16_128bytes)) 297 .int JMPTBL(L(aligned_16_5bytes), L(table_16_128bytes)) 298 .int JMPTBL(L(aligned_16_6bytes), L(table_16_128bytes)) 299 .int JMPTBL(L(aligned_16_7bytes), L(table_16_128bytes)) 300 .int JMPTBL(L(aligned_16_8bytes), L(table_16_128bytes)) 301 .int JMPTBL(L(aligned_16_9bytes), L(table_16_128bytes)) 302 .int JMPTBL(L(aligned_16_10bytes), L(table_16_128bytes)) 303 .int JMPTBL(L(aligned_16_11bytes), L(table_16_128bytes)) 304 .int JMPTBL(L(aligned_16_12bytes), L(table_16_128bytes)) 305 .int JMPTBL(L(aligned_16_13bytes), L(table_16_128bytes)) 306 .int JMPTBL(L(aligned_16_14bytes), L(table_16_128bytes)) 307 .int JMPTBL(L(aligned_16_15bytes), L(table_16_128bytes)) 308 .int JMPTBL(L(aligned_16_16bytes), L(table_16_128bytes)) 309 .int JMPTBL(L(aligned_16_17bytes), L(table_16_128bytes)) 310 .int JMPTBL(L(aligned_16_18bytes), L(table_16_128bytes)) 311 .int JMPTBL(L(aligned_16_19bytes), L(table_16_128bytes)) 312 .int JMPTBL(L(aligned_16_20bytes), L(table_16_128bytes)) 313 .int JMPTBL(L(aligned_16_21bytes), L(table_16_128bytes)) 314 .int JMPTBL(L(aligned_16_22bytes), L(table_16_128bytes)) 315 .int JMPTBL(L(aligned_16_23bytes), L(table_16_128bytes)) 316 .int JMPTBL(L(aligned_16_24bytes), L(table_16_128bytes)) 317 .int JMPTBL(L(aligned_16_25bytes), L(table_16_128bytes)) 318 .int JMPTBL(L(aligned_16_26bytes), L(table_16_128bytes)) 319 .int JMPTBL(L(aligned_16_27bytes), L(table_16_128bytes)) 320 .int JMPTBL(L(aligned_16_28bytes), L(table_16_128bytes)) 321 .int JMPTBL(L(aligned_16_29bytes), L(table_16_128bytes)) 322 .int JMPTBL(L(aligned_16_30bytes), L(table_16_128bytes)) 323 .int JMPTBL(L(aligned_16_31bytes), L(table_16_128bytes)) 324 .int JMPTBL(L(aligned_16_32bytes), L(table_16_128bytes)) 325 .int JMPTBL(L(aligned_16_33bytes), L(table_16_128bytes)) 326 .int JMPTBL(L(aligned_16_34bytes), L(table_16_128bytes)) 327 .int JMPTBL(L(aligned_16_35bytes), L(table_16_128bytes)) 328 .int JMPTBL(L(aligned_16_36bytes), L(table_16_128bytes)) 329 .int JMPTBL(L(aligned_16_37bytes), L(table_16_128bytes)) 330 .int JMPTBL(L(aligned_16_38bytes), L(table_16_128bytes)) 331 .int JMPTBL(L(aligned_16_39bytes), L(table_16_128bytes)) 332 .int JMPTBL(L(aligned_16_40bytes), L(table_16_128bytes)) 333 .int JMPTBL(L(aligned_16_41bytes), L(table_16_128bytes)) 334 .int JMPTBL(L(aligned_16_42bytes), L(table_16_128bytes)) 335 .int JMPTBL(L(aligned_16_43bytes), L(table_16_128bytes)) 336 .int JMPTBL(L(aligned_16_44bytes), L(table_16_128bytes)) 337 .int JMPTBL(L(aligned_16_45bytes), L(table_16_128bytes)) 338 .int JMPTBL(L(aligned_16_46bytes), L(table_16_128bytes)) 339 .int JMPTBL(L(aligned_16_47bytes), L(table_16_128bytes)) 340 .int JMPTBL(L(aligned_16_48bytes), L(table_16_128bytes)) 341 .int JMPTBL(L(aligned_16_49bytes), L(table_16_128bytes)) 342 .int JMPTBL(L(aligned_16_50bytes), L(table_16_128bytes)) 343 .int JMPTBL(L(aligned_16_51bytes), L(table_16_128bytes)) 344 .int JMPTBL(L(aligned_16_52bytes), L(table_16_128bytes)) 345 .int JMPTBL(L(aligned_16_53bytes), L(table_16_128bytes)) 346 .int JMPTBL(L(aligned_16_54bytes), L(table_16_128bytes)) 347 .int JMPTBL(L(aligned_16_55bytes), L(table_16_128bytes)) 348 .int JMPTBL(L(aligned_16_56bytes), L(table_16_128bytes)) 349 .int JMPTBL(L(aligned_16_57bytes), L(table_16_128bytes)) 350 .int JMPTBL(L(aligned_16_58bytes), L(table_16_128bytes)) 351 .int JMPTBL(L(aligned_16_59bytes), L(table_16_128bytes)) 352 .int JMPTBL(L(aligned_16_60bytes), L(table_16_128bytes)) 353 .int JMPTBL(L(aligned_16_61bytes), L(table_16_128bytes)) 354 .int JMPTBL(L(aligned_16_62bytes), L(table_16_128bytes)) 355 .int JMPTBL(L(aligned_16_63bytes), L(table_16_128bytes)) 356 .int JMPTBL(L(aligned_16_64bytes), L(table_16_128bytes)) 357 .int JMPTBL(L(aligned_16_65bytes), L(table_16_128bytes)) 358 .int JMPTBL(L(aligned_16_66bytes), L(table_16_128bytes)) 359 .int JMPTBL(L(aligned_16_67bytes), L(table_16_128bytes)) 360 .int JMPTBL(L(aligned_16_68bytes), L(table_16_128bytes)) 361 .int JMPTBL(L(aligned_16_69bytes), L(table_16_128bytes)) 362 .int JMPTBL(L(aligned_16_70bytes), L(table_16_128bytes)) 363 .int JMPTBL(L(aligned_16_71bytes), L(table_16_128bytes)) 364 .int JMPTBL(L(aligned_16_72bytes), L(table_16_128bytes)) 365 .int JMPTBL(L(aligned_16_73bytes), L(table_16_128bytes)) 366 .int JMPTBL(L(aligned_16_74bytes), L(table_16_128bytes)) 367 .int JMPTBL(L(aligned_16_75bytes), L(table_16_128bytes)) 368 .int JMPTBL(L(aligned_16_76bytes), L(table_16_128bytes)) 369 .int JMPTBL(L(aligned_16_77bytes), L(table_16_128bytes)) 370 .int JMPTBL(L(aligned_16_78bytes), L(table_16_128bytes)) 371 .int JMPTBL(L(aligned_16_79bytes), L(table_16_128bytes)) 372 .int JMPTBL(L(aligned_16_80bytes), L(table_16_128bytes)) 373 .int JMPTBL(L(aligned_16_81bytes), L(table_16_128bytes)) 374 .int JMPTBL(L(aligned_16_82bytes), L(table_16_128bytes)) 375 .int JMPTBL(L(aligned_16_83bytes), L(table_16_128bytes)) 376 .int JMPTBL(L(aligned_16_84bytes), L(table_16_128bytes)) 377 .int JMPTBL(L(aligned_16_85bytes), L(table_16_128bytes)) 378 .int JMPTBL(L(aligned_16_86bytes), L(table_16_128bytes)) 379 .int JMPTBL(L(aligned_16_87bytes), L(table_16_128bytes)) 380 .int JMPTBL(L(aligned_16_88bytes), L(table_16_128bytes)) 381 .int JMPTBL(L(aligned_16_89bytes), L(table_16_128bytes)) 382 .int JMPTBL(L(aligned_16_90bytes), L(table_16_128bytes)) 383 .int JMPTBL(L(aligned_16_91bytes), L(table_16_128bytes)) 384 .int JMPTBL(L(aligned_16_92bytes), L(table_16_128bytes)) 385 .int JMPTBL(L(aligned_16_93bytes), L(table_16_128bytes)) 386 .int JMPTBL(L(aligned_16_94bytes), L(table_16_128bytes)) 387 .int JMPTBL(L(aligned_16_95bytes), L(table_16_128bytes)) 388 .int JMPTBL(L(aligned_16_96bytes), L(table_16_128bytes)) 389 .int JMPTBL(L(aligned_16_97bytes), L(table_16_128bytes)) 390 .int JMPTBL(L(aligned_16_98bytes), L(table_16_128bytes)) 391 .int JMPTBL(L(aligned_16_99bytes), L(table_16_128bytes)) 392 .int JMPTBL(L(aligned_16_100bytes), L(table_16_128bytes)) 393 .int JMPTBL(L(aligned_16_101bytes), L(table_16_128bytes)) 394 .int JMPTBL(L(aligned_16_102bytes), L(table_16_128bytes)) 395 .int JMPTBL(L(aligned_16_103bytes), L(table_16_128bytes)) 396 .int JMPTBL(L(aligned_16_104bytes), L(table_16_128bytes)) 397 .int JMPTBL(L(aligned_16_105bytes), L(table_16_128bytes)) 398 .int JMPTBL(L(aligned_16_106bytes), L(table_16_128bytes)) 399 .int JMPTBL(L(aligned_16_107bytes), L(table_16_128bytes)) 400 .int JMPTBL(L(aligned_16_108bytes), L(table_16_128bytes)) 401 .int JMPTBL(L(aligned_16_109bytes), L(table_16_128bytes)) 402 .int JMPTBL(L(aligned_16_110bytes), L(table_16_128bytes)) 403 .int JMPTBL(L(aligned_16_111bytes), L(table_16_128bytes)) 404 .int JMPTBL(L(aligned_16_112bytes), L(table_16_128bytes)) 405 .int JMPTBL(L(aligned_16_113bytes), L(table_16_128bytes)) 406 .int JMPTBL(L(aligned_16_114bytes), L(table_16_128bytes)) 407 .int JMPTBL(L(aligned_16_115bytes), L(table_16_128bytes)) 408 .int JMPTBL(L(aligned_16_116bytes), L(table_16_128bytes)) 409 .int JMPTBL(L(aligned_16_117bytes), L(table_16_128bytes)) 410 .int JMPTBL(L(aligned_16_118bytes), L(table_16_128bytes)) 411 .int JMPTBL(L(aligned_16_119bytes), L(table_16_128bytes)) 412 .int JMPTBL(L(aligned_16_120bytes), L(table_16_128bytes)) 413 .int JMPTBL(L(aligned_16_121bytes), L(table_16_128bytes)) 414 .int JMPTBL(L(aligned_16_122bytes), L(table_16_128bytes)) 415 .int JMPTBL(L(aligned_16_123bytes), L(table_16_128bytes)) 416 .int JMPTBL(L(aligned_16_124bytes), L(table_16_128bytes)) 417 .int JMPTBL(L(aligned_16_125bytes), L(table_16_128bytes)) 418 .int JMPTBL(L(aligned_16_126bytes), L(table_16_128bytes)) 419 .int JMPTBL(L(aligned_16_127bytes), L(table_16_128bytes)) 420 .popsection 421 422 ALIGN(4) 423L(aligned_16_112bytes): 424 movdqa %xmm0, -112(%edx) 425L(aligned_16_96bytes): 426 movdqa %xmm0, -96(%edx) 427L(aligned_16_80bytes): 428 movdqa %xmm0, -80(%edx) 429L(aligned_16_64bytes): 430 movdqa %xmm0, -64(%edx) 431L(aligned_16_48bytes): 432 movdqa %xmm0, -48(%edx) 433L(aligned_16_32bytes): 434 movdqa %xmm0, -32(%edx) 435L(aligned_16_16bytes): 436 movdqa %xmm0, -16(%edx) 437L(aligned_16_0bytes): 438 SETRTNVAL 439 RETURN 440 441 ALIGN(4) 442L(aligned_16_113bytes): 443 movdqa %xmm0, -113(%edx) 444L(aligned_16_97bytes): 445 movdqa %xmm0, -97(%edx) 446L(aligned_16_81bytes): 447 movdqa %xmm0, -81(%edx) 448L(aligned_16_65bytes): 449 movdqa %xmm0, -65(%edx) 450L(aligned_16_49bytes): 451 movdqa %xmm0, -49(%edx) 452L(aligned_16_33bytes): 453 movdqa %xmm0, -33(%edx) 454L(aligned_16_17bytes): 455 movdqa %xmm0, -17(%edx) 456L(aligned_16_1bytes): 457 movb %al, -1(%edx) 458 SETRTNVAL 459 RETURN 460 461 ALIGN(4) 462L(aligned_16_114bytes): 463 movdqa %xmm0, -114(%edx) 464L(aligned_16_98bytes): 465 movdqa %xmm0, -98(%edx) 466L(aligned_16_82bytes): 467 movdqa %xmm0, -82(%edx) 468L(aligned_16_66bytes): 469 movdqa %xmm0, -66(%edx) 470L(aligned_16_50bytes): 471 movdqa %xmm0, -50(%edx) 472L(aligned_16_34bytes): 473 movdqa %xmm0, -34(%edx) 474L(aligned_16_18bytes): 475 movdqa %xmm0, -18(%edx) 476L(aligned_16_2bytes): 477 movw %ax, -2(%edx) 478 SETRTNVAL 479 RETURN 480 481 ALIGN(4) 482L(aligned_16_115bytes): 483 movdqa %xmm0, -115(%edx) 484L(aligned_16_99bytes): 485 movdqa %xmm0, -99(%edx) 486L(aligned_16_83bytes): 487 movdqa %xmm0, -83(%edx) 488L(aligned_16_67bytes): 489 movdqa %xmm0, -67(%edx) 490L(aligned_16_51bytes): 491 movdqa %xmm0, -51(%edx) 492L(aligned_16_35bytes): 493 movdqa %xmm0, -35(%edx) 494L(aligned_16_19bytes): 495 movdqa %xmm0, -19(%edx) 496L(aligned_16_3bytes): 497 movw %ax, -3(%edx) 498 movb %al, -1(%edx) 499 SETRTNVAL 500 RETURN 501 502 ALIGN(4) 503L(aligned_16_116bytes): 504 movdqa %xmm0, -116(%edx) 505L(aligned_16_100bytes): 506 movdqa %xmm0, -100(%edx) 507L(aligned_16_84bytes): 508 movdqa %xmm0, -84(%edx) 509L(aligned_16_68bytes): 510 movdqa %xmm0, -68(%edx) 511L(aligned_16_52bytes): 512 movdqa %xmm0, -52(%edx) 513L(aligned_16_36bytes): 514 movdqa %xmm0, -36(%edx) 515L(aligned_16_20bytes): 516 movdqa %xmm0, -20(%edx) 517L(aligned_16_4bytes): 518 movl %eax, -4(%edx) 519 SETRTNVAL 520 RETURN 521 522 ALIGN(4) 523L(aligned_16_117bytes): 524 movdqa %xmm0, -117(%edx) 525L(aligned_16_101bytes): 526 movdqa %xmm0, -101(%edx) 527L(aligned_16_85bytes): 528 movdqa %xmm0, -85(%edx) 529L(aligned_16_69bytes): 530 movdqa %xmm0, -69(%edx) 531L(aligned_16_53bytes): 532 movdqa %xmm0, -53(%edx) 533L(aligned_16_37bytes): 534 movdqa %xmm0, -37(%edx) 535L(aligned_16_21bytes): 536 movdqa %xmm0, -21(%edx) 537L(aligned_16_5bytes): 538 movl %eax, -5(%edx) 539 movb %al, -1(%edx) 540 SETRTNVAL 541 RETURN 542 543 ALIGN(4) 544L(aligned_16_118bytes): 545 movdqa %xmm0, -118(%edx) 546L(aligned_16_102bytes): 547 movdqa %xmm0, -102(%edx) 548L(aligned_16_86bytes): 549 movdqa %xmm0, -86(%edx) 550L(aligned_16_70bytes): 551 movdqa %xmm0, -70(%edx) 552L(aligned_16_54bytes): 553 movdqa %xmm0, -54(%edx) 554L(aligned_16_38bytes): 555 movdqa %xmm0, -38(%edx) 556L(aligned_16_22bytes): 557 movdqa %xmm0, -22(%edx) 558L(aligned_16_6bytes): 559 movl %eax, -6(%edx) 560 movw %ax, -2(%edx) 561 SETRTNVAL 562 RETURN 563 564 ALIGN(4) 565L(aligned_16_119bytes): 566 movdqa %xmm0, -119(%edx) 567L(aligned_16_103bytes): 568 movdqa %xmm0, -103(%edx) 569L(aligned_16_87bytes): 570 movdqa %xmm0, -87(%edx) 571L(aligned_16_71bytes): 572 movdqa %xmm0, -71(%edx) 573L(aligned_16_55bytes): 574 movdqa %xmm0, -55(%edx) 575L(aligned_16_39bytes): 576 movdqa %xmm0, -39(%edx) 577L(aligned_16_23bytes): 578 movdqa %xmm0, -23(%edx) 579L(aligned_16_7bytes): 580 movl %eax, -7(%edx) 581 movw %ax, -3(%edx) 582 movb %al, -1(%edx) 583 SETRTNVAL 584 RETURN 585 586 ALIGN(4) 587L(aligned_16_120bytes): 588 movdqa %xmm0, -120(%edx) 589L(aligned_16_104bytes): 590 movdqa %xmm0, -104(%edx) 591L(aligned_16_88bytes): 592 movdqa %xmm0, -88(%edx) 593L(aligned_16_72bytes): 594 movdqa %xmm0, -72(%edx) 595L(aligned_16_56bytes): 596 movdqa %xmm0, -56(%edx) 597L(aligned_16_40bytes): 598 movdqa %xmm0, -40(%edx) 599L(aligned_16_24bytes): 600 movdqa %xmm0, -24(%edx) 601L(aligned_16_8bytes): 602 movq %xmm0, -8(%edx) 603 SETRTNVAL 604 RETURN 605 606 ALIGN(4) 607L(aligned_16_121bytes): 608 movdqa %xmm0, -121(%edx) 609L(aligned_16_105bytes): 610 movdqa %xmm0, -105(%edx) 611L(aligned_16_89bytes): 612 movdqa %xmm0, -89(%edx) 613L(aligned_16_73bytes): 614 movdqa %xmm0, -73(%edx) 615L(aligned_16_57bytes): 616 movdqa %xmm0, -57(%edx) 617L(aligned_16_41bytes): 618 movdqa %xmm0, -41(%edx) 619L(aligned_16_25bytes): 620 movdqa %xmm0, -25(%edx) 621L(aligned_16_9bytes): 622 movq %xmm0, -9(%edx) 623 movb %al, -1(%edx) 624 SETRTNVAL 625 RETURN 626 627 ALIGN(4) 628L(aligned_16_122bytes): 629 movdqa %xmm0, -122(%edx) 630L(aligned_16_106bytes): 631 movdqa %xmm0, -106(%edx) 632L(aligned_16_90bytes): 633 movdqa %xmm0, -90(%edx) 634L(aligned_16_74bytes): 635 movdqa %xmm0, -74(%edx) 636L(aligned_16_58bytes): 637 movdqa %xmm0, -58(%edx) 638L(aligned_16_42bytes): 639 movdqa %xmm0, -42(%edx) 640L(aligned_16_26bytes): 641 movdqa %xmm0, -26(%edx) 642L(aligned_16_10bytes): 643 movq %xmm0, -10(%edx) 644 movw %ax, -2(%edx) 645 SETRTNVAL 646 RETURN 647 648 ALIGN(4) 649L(aligned_16_123bytes): 650 movdqa %xmm0, -123(%edx) 651L(aligned_16_107bytes): 652 movdqa %xmm0, -107(%edx) 653L(aligned_16_91bytes): 654 movdqa %xmm0, -91(%edx) 655L(aligned_16_75bytes): 656 movdqa %xmm0, -75(%edx) 657L(aligned_16_59bytes): 658 movdqa %xmm0, -59(%edx) 659L(aligned_16_43bytes): 660 movdqa %xmm0, -43(%edx) 661L(aligned_16_27bytes): 662 movdqa %xmm0, -27(%edx) 663L(aligned_16_11bytes): 664 movq %xmm0, -11(%edx) 665 movw %ax, -3(%edx) 666 movb %al, -1(%edx) 667 SETRTNVAL 668 RETURN 669 670 ALIGN(4) 671L(aligned_16_124bytes): 672 movdqa %xmm0, -124(%edx) 673L(aligned_16_108bytes): 674 movdqa %xmm0, -108(%edx) 675L(aligned_16_92bytes): 676 movdqa %xmm0, -92(%edx) 677L(aligned_16_76bytes): 678 movdqa %xmm0, -76(%edx) 679L(aligned_16_60bytes): 680 movdqa %xmm0, -60(%edx) 681L(aligned_16_44bytes): 682 movdqa %xmm0, -44(%edx) 683L(aligned_16_28bytes): 684 movdqa %xmm0, -28(%edx) 685L(aligned_16_12bytes): 686 movq %xmm0, -12(%edx) 687 movl %eax, -4(%edx) 688 SETRTNVAL 689 RETURN 690 691 ALIGN(4) 692L(aligned_16_125bytes): 693 movdqa %xmm0, -125(%edx) 694L(aligned_16_109bytes): 695 movdqa %xmm0, -109(%edx) 696L(aligned_16_93bytes): 697 movdqa %xmm0, -93(%edx) 698L(aligned_16_77bytes): 699 movdqa %xmm0, -77(%edx) 700L(aligned_16_61bytes): 701 movdqa %xmm0, -61(%edx) 702L(aligned_16_45bytes): 703 movdqa %xmm0, -45(%edx) 704L(aligned_16_29bytes): 705 movdqa %xmm0, -29(%edx) 706L(aligned_16_13bytes): 707 movq %xmm0, -13(%edx) 708 movl %eax, -5(%edx) 709 movb %al, -1(%edx) 710 SETRTNVAL 711 RETURN 712 713 ALIGN(4) 714L(aligned_16_126bytes): 715 movdqa %xmm0, -126(%edx) 716L(aligned_16_110bytes): 717 movdqa %xmm0, -110(%edx) 718L(aligned_16_94bytes): 719 movdqa %xmm0, -94(%edx) 720L(aligned_16_78bytes): 721 movdqa %xmm0, -78(%edx) 722L(aligned_16_62bytes): 723 movdqa %xmm0, -62(%edx) 724L(aligned_16_46bytes): 725 movdqa %xmm0, -46(%edx) 726L(aligned_16_30bytes): 727 movdqa %xmm0, -30(%edx) 728L(aligned_16_14bytes): 729 movq %xmm0, -14(%edx) 730 movl %eax, -6(%edx) 731 movw %ax, -2(%edx) 732 SETRTNVAL 733 RETURN 734 735 ALIGN(4) 736L(aligned_16_127bytes): 737 movdqa %xmm0, -127(%edx) 738L(aligned_16_111bytes): 739 movdqa %xmm0, -111(%edx) 740L(aligned_16_95bytes): 741 movdqa %xmm0, -95(%edx) 742L(aligned_16_79bytes): 743 movdqa %xmm0, -79(%edx) 744L(aligned_16_63bytes): 745 movdqa %xmm0, -63(%edx) 746L(aligned_16_47bytes): 747 movdqa %xmm0, -47(%edx) 748L(aligned_16_31bytes): 749 movdqa %xmm0, -31(%edx) 750L(aligned_16_15bytes): 751 movq %xmm0, -15(%edx) 752 movl %eax, -7(%edx) 753 movw %ax, -3(%edx) 754 movb %al, -1(%edx) 755 SETRTNVAL 756 RETURN_END 757 758END(memset) 759