1/* 2Copyright (c) 2014, Intel Corporation 3All rights reserved. 4 5Redistribution and use in source and binary forms, with or without 6modification, are permitted provided that the following conditions are met: 7 8 * Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 11 * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 15 * Neither the name of Intel Corporation nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29*/ 30 31#include <private/bionic_asm.h> 32 33 34#ifndef L 35# define L(label) .L##label 36#endif 37 38#ifndef ALIGN 39# define ALIGN(n) .p2align n 40#endif 41 42 43ENTRY(__memset_chk_generic) 44 # %rdi = dst, %rsi = byte, %rdx = n, %rcx = dst_len 45 cmp %rcx, %rdx 46 ja __memset_chk_fail 47 // Fall through to memset... 48END(__memset_chk_generic) 49 50 51 .section .text.sse2,"ax",@progbits 52ENTRY(memset_generic) 53 movq %rdi, %rax 54 and $0xff, %rsi 55 mov $0x0101010101010101, %rcx 56 imul %rsi, %rcx 57 cmpq $16, %rdx 58 jae L(16bytesormore) 59 testb $8, %dl 60 jnz L(8_15bytes) 61 testb $4, %dl 62 jnz L(4_7bytes) 63 testb $2, %dl 64 jnz L(2_3bytes) 65 testb $1, %dl 66 jz L(return) 67 movb %cl, (%rdi) 68L(return): 69 ret 70 71L(8_15bytes): 72 movq %rcx, (%rdi) 73 movq %rcx, -8(%rdi, %rdx) 74 ret 75 76L(4_7bytes): 77 movl %ecx, (%rdi) 78 movl %ecx, -4(%rdi, %rdx) 79 ret 80 81L(2_3bytes): 82 movw %cx, (%rdi) 83 movw %cx, -2(%rdi, %rdx) 84 ret 85 86 ALIGN (4) 87L(16bytesormore): 88 movd %rcx, %xmm0 89 pshufd $0, %xmm0, %xmm0 90 movdqu %xmm0, (%rdi) 91 movdqu %xmm0, -16(%rdi, %rdx) 92 cmpq $32, %rdx 93 jbe L(32bytesless) 94 movdqu %xmm0, 16(%rdi) 95 movdqu %xmm0, -32(%rdi, %rdx) 96 cmpq $64, %rdx 97 jbe L(64bytesless) 98 movdqu %xmm0, 32(%rdi) 99 movdqu %xmm0, 48(%rdi) 100 movdqu %xmm0, -64(%rdi, %rdx) 101 movdqu %xmm0, -48(%rdi, %rdx) 102 cmpq $128, %rdx 103 ja L(128bytesmore) 104L(32bytesless): 105L(64bytesless): 106 ret 107 108 ALIGN (4) 109L(128bytesmore): 110 leaq 64(%rdi), %rcx 111 andq $-64, %rcx 112 movq %rdx, %r8 113 addq %rdi, %rdx 114 andq $-64, %rdx 115 cmpq %rcx, %rdx 116 je L(return) 117 118 cmp __x86_shared_cache_size(%rip), %r8 119 120 ja L(128bytesmore_nt) 121 122 ALIGN (4) 123L(128bytesmore_normal): 124 movdqa %xmm0, (%rcx) 125 movaps %xmm0, 0x10(%rcx) 126 movaps %xmm0, 0x20(%rcx) 127 movaps %xmm0, 0x30(%rcx) 128 addq $64, %rcx 129 cmpq %rcx, %rdx 130 jne L(128bytesmore_normal) 131 ret 132 133 ALIGN (4) 134L(128bytesmore_nt): 135 movntdq %xmm0, (%rcx) 136 movntdq %xmm0, 0x10(%rcx) 137 movntdq %xmm0, 0x20(%rcx) 138 movntdq %xmm0, 0x30(%rcx) 139 leaq 64(%rcx), %rcx 140 cmpq %rcx, %rdx 141 jne L(128bytesmore_nt) 142 sfence 143 ret 144 145END(memset_generic) 146