This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH 1/2] Improve strcpy: Rename strcpy-sse2-unaligned.S.


Why do you need to move? Can't you just add your version as
sysdeps/x86_64/multiarch/strcpy-sse2-unaligned-new.S and build
strcpy-sse2-unaligned-new.S in Makefile instead of
strcpy-sse2-unaligned.S

--
Liubov
Intel Corporation

On Mon, Sep 9, 2013 at 7:30 PM, OndÅej BÃlka <neleai@seznam.cz> wrote:
> Hi,
>
> This is first part of improving strcpy with unaligned loads. As I do not
> have code for strncpy/stpncpy/strncat this patch just moves
> strcpy-sse2-unaligned.S to make subsequent patch smaller.
>
>         * sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S: Move to ...
>         * sysdeps/x86_64/multiarch/strcpy-sse2-unaligned-old.S: Here.
>         * sysdeps/x86_64/multiarch/stpcpy-sse2-unaligned.S: Update
>         include location.
>         * sysdeps/x86_64/multiarch/stpncpy-sse2-unaligned.S: Likewise.
>         * sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S: Likewise.
>         * sysdeps/x86_64/multiarch/strncpy-sse2-unaligned.S: Likewise.
>
> ---
>  sysdeps/x86_64/multiarch/stpcpy-sse2-unaligned.S   |    2 +-
>  sysdeps/x86_64/multiarch/stpncpy-sse2-unaligned.S  |    2 +-
>  sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S   |    2 +-
>  .../x86_64/multiarch/strcpy-sse2-unaligned-old.S   | 1887 ++++++++++++++++++++
>  sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S   | 1887 --------------------
>  sysdeps/x86_64/multiarch/strncpy-sse2-unaligned.S  |    2 +-
>  6 files changed, 1891 insertions(+), 1891 deletions(-)
>  create mode 100644 sysdeps/x86_64/multiarch/strcpy-sse2-unaligned-old.S
>  delete mode 100644 sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
>
> diff --git a/sysdeps/x86_64/multiarch/stpcpy-sse2-unaligned.S b/sysdeps/x86_64/multiarch/stpcpy-sse2-unaligned.S
> index 34231f8..8f863e5 100644
> --- a/sysdeps/x86_64/multiarch/stpcpy-sse2-unaligned.S
> +++ b/sysdeps/x86_64/multiarch/stpcpy-sse2-unaligned.S
> @@ -1,3 +1,3 @@
>  #define USE_AS_STPCPY
>  #define STRCPY __stpcpy_sse2_unaligned
> -#include "strcpy-sse2-unaligned.S"
> +#include "strcpy-sse2-unaligned-old.S"
> diff --git a/sysdeps/x86_64/multiarch/stpncpy-sse2-unaligned.S b/sysdeps/x86_64/multiarch/stpncpy-sse2-unaligned.S
> index 658520f..90d1533 100644
> --- a/sysdeps/x86_64/multiarch/stpncpy-sse2-unaligned.S
> +++ b/sysdeps/x86_64/multiarch/stpncpy-sse2-unaligned.S
> @@ -1,4 +1,4 @@
>  #define USE_AS_STPCPY
>  #define USE_AS_STRNCPY
>  #define STRCPY __stpncpy_sse2_unaligned
> -#include "strcpy-sse2-unaligned.S"
> +#include "strcpy-sse2-unaligned-old.S"
> diff --git a/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
> index 028c6d3..6c38882 100644
> --- a/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
> +++ b/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
> @@ -275,5 +275,5 @@ L(StartStrcpyPart):
>  #  define USE_AS_STRNCPY
>  # endif
>
> -# include "strcpy-sse2-unaligned.S"
> +# include "strcpy-sse2-unaligned-old.S"
>  #endif
> diff --git a/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned-old.S b/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned-old.S
> new file mode 100644
> index 0000000..7710173
> --- /dev/null
> +++ b/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned-old.S
> @@ -0,0 +1,1887 @@
> +/* strcpy with SSE2 and unaligned load
> +   Copyright (C) 2011-2013 Free Software Foundation, Inc.
> +   Contributed by Intel Corporation.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +#ifndef NOT_IN_libc
> +
> +# ifndef USE_AS_STRCAT
> +#  include <sysdep.h>
> +
> +#  ifndef STRCPY
> +#   define STRCPY  __strcpy_sse2_unaligned
> +#  endif
> +
> +# endif
> +
> +# define JMPTBL(I, B)  I - B
> +# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE)             \
> +       lea     TABLE(%rip), %r11;                              \
> +       movslq  (%r11, INDEX, SCALE), %rcx;                     \
> +       lea     (%r11, %rcx), %rcx;                             \
> +       jmp     *%rcx
> +
> +# ifndef USE_AS_STRCAT
> +
> +.text
> +ENTRY (STRCPY)
> +#  ifdef USE_AS_STRNCPY
> +       mov     %rdx, %r8
> +       test    %r8, %r8
> +       jz      L(ExitZero)
> +#  endif
> +       mov     %rsi, %rcx
> +#  ifndef USE_AS_STPCPY
> +       mov     %rdi, %rax      /* save result */
> +#  endif
> +
> +# endif
> +
> +       and     $63, %rcx
> +       cmp     $32, %rcx
> +       jbe     L(SourceStringAlignmentLess32)
> +
> +       and     $-16, %rsi
> +       and     $15, %rcx
> +       pxor    %xmm0, %xmm0
> +       pxor    %xmm1, %xmm1
> +
> +       pcmpeqb (%rsi), %xmm1
> +       pmovmskb %xmm1, %rdx
> +       shr     %cl, %rdx
> +
> +# ifdef USE_AS_STRNCPY
> +#  if defined USE_AS_STPCPY || defined USE_AS_STRCAT
> +       mov     $16, %r10
> +       sub     %rcx, %r10
> +       cmp     %r10, %r8
> +#  else
> +       mov     $17, %r10
> +       sub     %rcx, %r10
> +       cmp     %r10, %r8
> +#  endif
> +       jbe     L(CopyFrom1To16BytesTailCase2OrCase3)
> +# endif
> +       test    %rdx, %rdx
> +       jnz     L(CopyFrom1To16BytesTail)
> +
> +       pcmpeqb 16(%rsi), %xmm0
> +       pmovmskb %xmm0, %rdx
> +
> +# ifdef USE_AS_STRNCPY
> +       add     $16, %r10
> +       cmp     %r10, %r8
> +       jbe     L(CopyFrom1To32BytesCase2OrCase3)
> +# endif
> +       test    %rdx, %rdx
> +       jnz     L(CopyFrom1To32Bytes)
> +
> +       movdqu  (%rsi, %rcx), %xmm1   /* copy 16 bytes */
> +       movdqu  %xmm1, (%rdi)
> +
> +/* If source address alignment != destination address alignment */
> +       .p2align 4
> +L(Unalign16Both):
> +       sub     %rcx, %rdi
> +# ifdef USE_AS_STRNCPY
> +       add     %rcx, %r8
> +# endif
> +       mov     $16, %rcx
> +       movdqa  (%rsi, %rcx), %xmm1
> +       movaps  16(%rsi, %rcx), %xmm2
> +       movdqu  %xmm1, (%rdi, %rcx)
> +       pcmpeqb %xmm2, %xmm0
> +       pmovmskb %xmm0, %rdx
> +       add     $16, %rcx
> +# ifdef USE_AS_STRNCPY
> +       sub     $48, %r8
> +       jbe     L(CopyFrom1To16BytesCase2OrCase3)
> +# endif
> +       test    %rdx, %rdx
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +       jnz     L(CopyFrom1To16BytesUnalignedXmm2)
> +# else
> +       jnz     L(CopyFrom1To16Bytes)
> +# endif
> +
> +       movaps  16(%rsi, %rcx), %xmm3
> +       movdqu  %xmm2, (%rdi, %rcx)
> +       pcmpeqb %xmm3, %xmm0
> +       pmovmskb %xmm0, %rdx
> +       add     $16, %rcx
> +# ifdef USE_AS_STRNCPY
> +       sub     $16, %r8
> +       jbe     L(CopyFrom1To16BytesCase2OrCase3)
> +# endif
> +       test    %rdx, %rdx
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +       jnz     L(CopyFrom1To16BytesUnalignedXmm3)
> +# else
> +       jnz     L(CopyFrom1To16Bytes)
> +# endif
> +
> +       movaps  16(%rsi, %rcx), %xmm4
> +       movdqu  %xmm3, (%rdi, %rcx)
> +       pcmpeqb %xmm4, %xmm0
> +       pmovmskb %xmm0, %rdx
> +       add     $16, %rcx
> +# ifdef USE_AS_STRNCPY
> +       sub     $16, %r8
> +       jbe     L(CopyFrom1To16BytesCase2OrCase3)
> +# endif
> +       test    %rdx, %rdx
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +       jnz     L(CopyFrom1To16BytesUnalignedXmm4)
> +# else
> +       jnz     L(CopyFrom1To16Bytes)
> +# endif
> +
> +       movaps  16(%rsi, %rcx), %xmm1
> +       movdqu  %xmm4, (%rdi, %rcx)
> +       pcmpeqb %xmm1, %xmm0
> +       pmovmskb %xmm0, %rdx
> +       add     $16, %rcx
> +# ifdef USE_AS_STRNCPY
> +       sub     $16, %r8
> +       jbe     L(CopyFrom1To16BytesCase2OrCase3)
> +# endif
> +       test    %rdx, %rdx
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +       jnz     L(CopyFrom1To16BytesUnalignedXmm1)
> +# else
> +       jnz     L(CopyFrom1To16Bytes)
> +# endif
> +
> +       movaps  16(%rsi, %rcx), %xmm2
> +       movdqu  %xmm1, (%rdi, %rcx)
> +       pcmpeqb %xmm2, %xmm0
> +       pmovmskb %xmm0, %rdx
> +       add     $16, %rcx
> +# ifdef USE_AS_STRNCPY
> +       sub     $16, %r8
> +       jbe     L(CopyFrom1To16BytesCase2OrCase3)
> +# endif
> +       test    %rdx, %rdx
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +       jnz     L(CopyFrom1To16BytesUnalignedXmm2)
> +# else
> +       jnz     L(CopyFrom1To16Bytes)
> +# endif
> +
> +       movaps  16(%rsi, %rcx), %xmm3
> +       movdqu  %xmm2, (%rdi, %rcx)
> +       pcmpeqb %xmm3, %xmm0
> +       pmovmskb %xmm0, %rdx
> +       add     $16, %rcx
> +# ifdef USE_AS_STRNCPY
> +       sub     $16, %r8
> +       jbe     L(CopyFrom1To16BytesCase2OrCase3)
> +# endif
> +       test    %rdx, %rdx
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +       jnz     L(CopyFrom1To16BytesUnalignedXmm3)
> +# else
> +       jnz     L(CopyFrom1To16Bytes)
> +# endif
> +
> +       movdqu  %xmm3, (%rdi, %rcx)
> +       mov     %rsi, %rdx
> +       lea     16(%rsi, %rcx), %rsi
> +       and     $-0x40, %rsi
> +       sub     %rsi, %rdx
> +       sub     %rdx, %rdi
> +# ifdef USE_AS_STRNCPY
> +       lea     128(%r8, %rdx), %r8
> +# endif
> +L(Unaligned64Loop):
> +       movaps  (%rsi), %xmm2
> +       movaps  %xmm2, %xmm4
> +       movaps  16(%rsi), %xmm5
> +       movaps  32(%rsi), %xmm3
> +       movaps  %xmm3, %xmm6
> +       movaps  48(%rsi), %xmm7
> +       pminub  %xmm5, %xmm2
> +       pminub  %xmm7, %xmm3
> +       pminub  %xmm2, %xmm3
> +       pcmpeqb %xmm0, %xmm3
> +       pmovmskb %xmm3, %rdx
> +# ifdef USE_AS_STRNCPY
> +       sub     $64, %r8
> +       jbe     L(UnalignedLeaveCase2OrCase3)
> +# endif
> +       test    %rdx, %rdx
> +       jnz     L(Unaligned64Leave)
> +
> +L(Unaligned64Loop_start):
> +       add     $64, %rdi
> +       add     $64, %rsi
> +       movdqu  %xmm4, -64(%rdi)
> +       movaps  (%rsi), %xmm2
> +       movdqa  %xmm2, %xmm4
> +       movdqu  %xmm5, -48(%rdi)
> +       movaps  16(%rsi), %xmm5
> +       pminub  %xmm5, %xmm2
> +       movaps  32(%rsi), %xmm3
> +       movdqu  %xmm6, -32(%rdi)
> +       movaps  %xmm3, %xmm6
> +       movdqu  %xmm7, -16(%rdi)
> +       movaps  48(%rsi), %xmm7
> +       pminub  %xmm7, %xmm3
> +       pminub  %xmm2, %xmm3
> +       pcmpeqb %xmm0, %xmm3
> +       pmovmskb %xmm3, %rdx
> +# ifdef USE_AS_STRNCPY
> +       sub     $64, %r8
> +       jbe     L(UnalignedLeaveCase2OrCase3)
> +# endif
> +       test    %rdx, %rdx
> +       jz      L(Unaligned64Loop_start)
> +
> +L(Unaligned64Leave):
> +       pxor    %xmm1, %xmm1
> +
> +       pcmpeqb %xmm4, %xmm0
> +       pcmpeqb %xmm5, %xmm1
> +       pmovmskb %xmm0, %rdx
> +       pmovmskb %xmm1, %rcx
> +       test    %rdx, %rdx
> +       jnz     L(CopyFrom1To16BytesUnaligned_0)
> +       test    %rcx, %rcx
> +       jnz     L(CopyFrom1To16BytesUnaligned_16)
> +
> +       pcmpeqb %xmm6, %xmm0
> +       pcmpeqb %xmm7, %xmm1
> +       pmovmskb %xmm0, %rdx
> +       pmovmskb %xmm1, %rcx
> +       test    %rdx, %rdx
> +       jnz     L(CopyFrom1To16BytesUnaligned_32)
> +
> +       bsf     %rcx, %rdx
> +       movdqu  %xmm4, (%rdi)
> +       movdqu  %xmm5, 16(%rdi)
> +       movdqu  %xmm6, 32(%rdi)
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +# ifdef USE_AS_STPCPY
> +       lea     48(%rdi, %rdx), %rax
> +# endif
> +       movdqu  %xmm7, 48(%rdi)
> +       add     $15, %r8
> +       sub     %rdx, %r8
> +       lea     49(%rdi, %rdx), %rdi
> +       jmp     L(StrncpyFillTailWithZero)
> +# else
> +       add     $48, %rsi
> +       add     $48, %rdi
> +       BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
> +# endif
> +
> +/* If source address alignment == destination address alignment */
> +
> +L(SourceStringAlignmentLess32):
> +       pxor    %xmm0, %xmm0
> +       movdqu  (%rsi), %xmm1
> +       movdqu  16(%rsi), %xmm2
> +       pcmpeqb %xmm1, %xmm0
> +       pmovmskb %xmm0, %rdx
> +
> +# ifdef USE_AS_STRNCPY
> +#  if defined USE_AS_STPCPY || defined USE_AS_STRCAT
> +       cmp     $16, %r8
> +#  else
> +       cmp     $17, %r8
> +#  endif
> +       jbe     L(CopyFrom1To16BytesTail1Case2OrCase3)
> +# endif
> +       test    %rdx, %rdx
> +       jnz     L(CopyFrom1To16BytesTail1)
> +
> +       pcmpeqb %xmm2, %xmm0
> +       movdqu  %xmm1, (%rdi)
> +       pmovmskb %xmm0, %rdx
> +
> +# ifdef USE_AS_STRNCPY
> +#  if defined USE_AS_STPCPY || defined USE_AS_STRCAT
> +       cmp     $32, %r8
> +#  else
> +       cmp     $33, %r8
> +#  endif
> +       jbe     L(CopyFrom1To32Bytes1Case2OrCase3)
> +# endif
> +       test    %rdx, %rdx
> +       jnz     L(CopyFrom1To32Bytes1)
> +
> +       and     $-16, %rsi
> +       and     $15, %rcx
> +       jmp     L(Unalign16Both)
> +
> +/*------End of main part with loops---------------------*/
> +
> +/* Case1 */
> +
> +# if (!defined USE_AS_STRNCPY) || (defined USE_AS_STRCAT)
> +       .p2align 4
> +L(CopyFrom1To16Bytes):
> +       add     %rcx, %rdi
> +       add     %rcx, %rsi
> +       bsf     %rdx, %rdx
> +       BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
> +# endif
> +       .p2align 4
> +L(CopyFrom1To16BytesTail):
> +       add     %rcx, %rsi
> +       bsf     %rdx, %rdx
> +       BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
> +
> +       .p2align 4
> +L(CopyFrom1To32Bytes1):
> +       add     $16, %rsi
> +       add     $16, %rdi
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +       sub     $16, %r8
> +# endif
> +L(CopyFrom1To16BytesTail1):
> +       bsf     %rdx, %rdx
> +       BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
> +
> +       .p2align 4
> +L(CopyFrom1To32Bytes):
> +       bsf     %rdx, %rdx
> +       add     %rcx, %rsi
> +       add     $16, %rdx
> +       sub     %rcx, %rdx
> +       BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
> +
> +       .p2align 4
> +L(CopyFrom1To16BytesUnaligned_0):
> +       bsf     %rdx, %rdx
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +# ifdef USE_AS_STPCPY
> +       lea     (%rdi, %rdx), %rax
> +# endif
> +       movdqu  %xmm4, (%rdi)
> +       add     $63, %r8
> +       sub     %rdx, %r8
> +       lea     1(%rdi, %rdx), %rdi
> +       jmp     L(StrncpyFillTailWithZero)
> +# else
> +       BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
> +# endif
> +
> +       .p2align 4
> +L(CopyFrom1To16BytesUnaligned_16):
> +       bsf     %rcx, %rdx
> +       movdqu  %xmm4, (%rdi)
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +# ifdef USE_AS_STPCPY
> +       lea     16(%rdi, %rdx), %rax
> +# endif
> +       movdqu  %xmm5, 16(%rdi)
> +       add     $47, %r8
> +       sub     %rdx, %r8
> +       lea     17(%rdi, %rdx), %rdi
> +       jmp     L(StrncpyFillTailWithZero)
> +# else
> +       add     $16, %rsi
> +       add     $16, %rdi
> +       BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
> +# endif
> +
> +       .p2align 4
> +L(CopyFrom1To16BytesUnaligned_32):
> +       bsf     %rdx, %rdx
> +       movdqu  %xmm4, (%rdi)
> +       movdqu  %xmm5, 16(%rdi)
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +# ifdef USE_AS_STPCPY
> +       lea     32(%rdi, %rdx), %rax
> +# endif
> +       movdqu  %xmm6, 32(%rdi)
> +       add     $31, %r8
> +       sub     %rdx, %r8
> +       lea     33(%rdi, %rdx), %rdi
> +       jmp     L(StrncpyFillTailWithZero)
> +# else
> +       add     $32, %rsi
> +       add     $32, %rdi
> +       BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
> +# endif
> +
> +# ifdef USE_AS_STRNCPY
> +#  ifndef USE_AS_STRCAT
> +       .p2align 4
> +L(CopyFrom1To16BytesUnalignedXmm6):
> +       movdqu  %xmm6, (%rdi, %rcx)
> +       jmp     L(CopyFrom1To16BytesXmmExit)
> +
> +       .p2align 4
> +L(CopyFrom1To16BytesUnalignedXmm5):
> +       movdqu  %xmm5, (%rdi, %rcx)
> +       jmp     L(CopyFrom1To16BytesXmmExit)
> +
> +       .p2align 4
> +L(CopyFrom1To16BytesUnalignedXmm4):
> +       movdqu  %xmm4, (%rdi, %rcx)
> +       jmp     L(CopyFrom1To16BytesXmmExit)
> +
> +       .p2align 4
> +L(CopyFrom1To16BytesUnalignedXmm3):
> +       movdqu  %xmm3, (%rdi, %rcx)
> +       jmp     L(CopyFrom1To16BytesXmmExit)
> +
> +       .p2align 4
> +L(CopyFrom1To16BytesUnalignedXmm1):
> +       movdqu  %xmm1, (%rdi, %rcx)
> +       jmp     L(CopyFrom1To16BytesXmmExit)
> +#  endif
> +
> +       .p2align 4
> +L(CopyFrom1To16BytesExit):
> +       BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
> +
> +/* Case2 */
> +
> +       .p2align 4
> +L(CopyFrom1To16BytesCase2):
> +       add     $16, %r8
> +       add     %rcx, %rdi
> +       add     %rcx, %rsi
> +       bsf     %rdx, %rdx
> +       cmp     %r8, %rdx
> +       jb      L(CopyFrom1To16BytesExit)
> +       BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
> +
> +       .p2align 4
> +L(CopyFrom1To32BytesCase2):
> +       add     %rcx, %rsi
> +       bsf     %rdx, %rdx
> +       add     $16, %rdx
> +       sub     %rcx, %rdx
> +       cmp     %r8, %rdx
> +       jb      L(CopyFrom1To16BytesExit)
> +       BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
> +
> +L(CopyFrom1To16BytesTailCase2):
> +       add     %rcx, %rsi
> +       bsf     %rdx, %rdx
> +       cmp     %r8, %rdx
> +       jb      L(CopyFrom1To16BytesExit)
> +       BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
> +
> +L(CopyFrom1To16BytesTail1Case2):
> +       bsf     %rdx, %rdx
> +       cmp     %r8, %rdx
> +       jb      L(CopyFrom1To16BytesExit)
> +       BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
> +
> +/* Case2 or Case3,  Case3 */
> +
> +       .p2align 4
> +L(CopyFrom1To16BytesCase2OrCase3):
> +       test    %rdx, %rdx
> +       jnz     L(CopyFrom1To16BytesCase2)
> +L(CopyFrom1To16BytesCase3):
> +       add     $16, %r8
> +       add     %rcx, %rdi
> +       add     %rcx, %rsi
> +       BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
> +
> +       .p2align 4
> +L(CopyFrom1To32BytesCase2OrCase3):
> +       test    %rdx, %rdx
> +       jnz     L(CopyFrom1To32BytesCase2)
> +       add     %rcx, %rsi
> +       BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
> +
> +       .p2align 4
> +L(CopyFrom1To16BytesTailCase2OrCase3):
> +       test    %rdx, %rdx
> +       jnz     L(CopyFrom1To16BytesTailCase2)
> +       add     %rcx, %rsi
> +       BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
> +
> +       .p2align 4
> +L(CopyFrom1To32Bytes1Case2OrCase3):
> +       add     $16, %rdi
> +       add     $16, %rsi
> +       sub     $16, %r8
> +L(CopyFrom1To16BytesTail1Case2OrCase3):
> +       test    %rdx, %rdx
> +       jnz     L(CopyFrom1To16BytesTail1Case2)
> +       BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
> +
> +# endif
> +
> +/*------------End labels regarding with copying 1-16 bytes--and 1-32 bytes----*/
> +
> +       .p2align 4
> +L(Exit1):
> +       mov     %dh, (%rdi)
> +# ifdef USE_AS_STPCPY
> +       lea     (%rdi), %rax
> +# endif
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +       sub     $1, %r8
> +       lea     1(%rdi), %rdi
> +       jnz     L(StrncpyFillTailWithZero)
> +# endif
> +       ret
> +
> +       .p2align 4
> +L(Exit2):
> +       mov     (%rsi), %dx
> +       mov     %dx, (%rdi)
> +# ifdef USE_AS_STPCPY
> +       lea     1(%rdi), %rax
> +# endif
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +       sub     $2, %r8
> +       lea     2(%rdi), %rdi
> +       jnz     L(StrncpyFillTailWithZero)
> +# endif
> +       ret
> +
> +       .p2align 4
> +L(Exit3):
> +       mov     (%rsi), %cx
> +       mov     %cx, (%rdi)
> +       mov     %dh, 2(%rdi)
> +# ifdef USE_AS_STPCPY
> +       lea     2(%rdi), %rax
> +# endif
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +       sub     $3, %r8
> +       lea     3(%rdi), %rdi
> +       jnz     L(StrncpyFillTailWithZero)
> +# endif
> +       ret
> +
> +       .p2align 4
> +L(Exit4):
> +       mov     (%rsi), %edx
> +       mov     %edx, (%rdi)
> +# ifdef USE_AS_STPCPY
> +       lea     3(%rdi), %rax
> +# endif
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +       sub     $4, %r8
> +       lea     4(%rdi), %rdi
> +       jnz     L(StrncpyFillTailWithZero)
> +# endif
> +       ret
> +
> +       .p2align 4
> +L(Exit5):
> +       mov     (%rsi), %ecx
> +       mov     %dh, 4(%rdi)
> +       mov     %ecx, (%rdi)
> +# ifdef USE_AS_STPCPY
> +       lea     4(%rdi), %rax
> +# endif
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +       sub     $5, %r8
> +       lea     5(%rdi), %rdi
> +       jnz     L(StrncpyFillTailWithZero)
> +# endif
> +       ret
> +
> +       .p2align 4
> +L(Exit6):
> +       mov     (%rsi), %ecx
> +       mov     4(%rsi), %dx
> +       mov     %ecx, (%rdi)
> +       mov     %dx, 4(%rdi)
> +# ifdef USE_AS_STPCPY
> +       lea     5(%rdi), %rax
> +# endif
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +       sub     $6, %r8
> +       lea     6(%rdi), %rdi
> +       jnz     L(StrncpyFillTailWithZero)
> +# endif
> +       ret
> +
> +       .p2align 4
> +L(Exit7):
> +       mov     (%rsi), %ecx
> +       mov     3(%rsi), %edx
> +       mov     %ecx, (%rdi)
> +       mov     %edx, 3(%rdi)
> +# ifdef USE_AS_STPCPY
> +       lea     6(%rdi), %rax
> +# endif
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +       sub     $7, %r8
> +       lea     7(%rdi), %rdi
> +       jnz     L(StrncpyFillTailWithZero)
> +# endif
> +       ret
> +
> +       .p2align 4
> +L(Exit8):
> +       mov     (%rsi), %rdx
> +       mov     %rdx, (%rdi)
> +# ifdef USE_AS_STPCPY
> +       lea     7(%rdi), %rax
> +# endif
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +       sub     $8, %r8
> +       lea     8(%rdi), %rdi
> +       jnz     L(StrncpyFillTailWithZero)
> +# endif
> +       ret
> +
> +       .p2align 4
> +L(Exit9):
> +       mov     (%rsi), %rcx
> +       mov     %dh, 8(%rdi)
> +       mov     %rcx, (%rdi)
> +# ifdef USE_AS_STPCPY
> +       lea     8(%rdi), %rax
> +# endif
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +       sub     $9, %r8
> +       lea     9(%rdi), %rdi
> +       jnz     L(StrncpyFillTailWithZero)
> +# endif
> +       ret
> +
> +       .p2align 4
> +L(Exit10):
> +       mov     (%rsi), %rcx
> +       mov     8(%rsi), %dx
> +       mov     %rcx, (%rdi)
> +       mov     %dx, 8(%rdi)
> +# ifdef USE_AS_STPCPY
> +       lea     9(%rdi), %rax
> +# endif
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +       sub     $10, %r8
> +       lea     10(%rdi), %rdi
> +       jnz     L(StrncpyFillTailWithZero)
> +# endif
> +       ret
> +
> +       .p2align 4
> +L(Exit11):
> +       mov     (%rsi), %rcx
> +       mov     7(%rsi), %edx
> +       mov     %rcx, (%rdi)
> +       mov     %edx, 7(%rdi)
> +# ifdef USE_AS_STPCPY
> +       lea     10(%rdi), %rax
> +# endif
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +       sub     $11, %r8
> +       lea     11(%rdi), %rdi
> +       jnz     L(StrncpyFillTailWithZero)
> +# endif
> +       ret
> +
> +       .p2align 4
> +L(Exit12):
> +       mov     (%rsi), %rcx
> +       mov     8(%rsi), %edx
> +       mov     %rcx, (%rdi)
> +       mov     %edx, 8(%rdi)
> +# ifdef USE_AS_STPCPY
> +       lea     11(%rdi), %rax
> +# endif
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +       sub     $12, %r8
> +       lea     12(%rdi), %rdi
> +       jnz     L(StrncpyFillTailWithZero)
> +# endif
> +       ret
> +
> +       .p2align 4
> +L(Exit13):
> +       mov     (%rsi), %rcx
> +       mov     5(%rsi), %rdx
> +       mov     %rcx, (%rdi)
> +       mov     %rdx, 5(%rdi)
> +# ifdef USE_AS_STPCPY
> +       lea     12(%rdi), %rax
> +# endif
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +       sub     $13, %r8
> +       lea     13(%rdi), %rdi
> +       jnz     L(StrncpyFillTailWithZero)
> +# endif
> +       ret
> +
> +       .p2align 4
> +L(Exit14):
> +       mov     (%rsi), %rcx
> +       mov     6(%rsi), %rdx
> +       mov     %rcx, (%rdi)
> +       mov     %rdx, 6(%rdi)
> +# ifdef USE_AS_STPCPY
> +       lea     13(%rdi), %rax
> +# endif
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +       sub     $14, %r8
> +       lea     14(%rdi), %rdi
> +       jnz     L(StrncpyFillTailWithZero)
> +# endif
> +       ret
> +
> +       .p2align 4
> +L(Exit15):
> +       mov     (%rsi), %rcx
> +       mov     7(%rsi), %rdx
> +       mov     %rcx, (%rdi)
> +       mov     %rdx, 7(%rdi)
> +# ifdef USE_AS_STPCPY
> +       lea     14(%rdi), %rax
> +# endif
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +       sub     $15, %r8
> +       lea     15(%rdi), %rdi
> +       jnz     L(StrncpyFillTailWithZero)
> +# endif
> +       ret
> +
> +       .p2align 4
> +L(Exit16):
> +       movdqu  (%rsi), %xmm0
> +       movdqu  %xmm0, (%rdi)
> +# ifdef USE_AS_STPCPY
> +       lea     15(%rdi), %rax
> +# endif
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +       sub     $16, %r8
> +       lea     16(%rdi), %rdi
> +       jnz     L(StrncpyFillTailWithZero)
> +# endif
> +       ret
> +
> +       .p2align 4
> +L(Exit17):
> +       movdqu  (%rsi), %xmm0
> +       movdqu  %xmm0, (%rdi)
> +       mov     %dh, 16(%rdi)
> +# ifdef USE_AS_STPCPY
> +       lea     16(%rdi), %rax
> +# endif
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +       sub     $17, %r8
> +       lea     17(%rdi), %rdi
> +       jnz     L(StrncpyFillTailWithZero)
> +# endif
> +       ret
> +
> +       .p2align 4
> +L(Exit18):
> +       movdqu  (%rsi), %xmm0
> +       mov     16(%rsi), %cx
> +       movdqu  %xmm0, (%rdi)
> +       mov     %cx, 16(%rdi)
> +# ifdef USE_AS_STPCPY
> +       lea     17(%rdi), %rax
> +# endif
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +       sub     $18, %r8
> +       lea     18(%rdi), %rdi
> +       jnz     L(StrncpyFillTailWithZero)
> +# endif
> +       ret
> +
> +       .p2align 4
> +L(Exit19):
> +       movdqu  (%rsi), %xmm0
> +       mov     15(%rsi), %ecx
> +       movdqu  %xmm0, (%rdi)
> +       mov     %ecx, 15(%rdi)
> +# ifdef USE_AS_STPCPY
> +       lea     18(%rdi), %rax
> +# endif
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +       sub     $19, %r8
> +       lea     19(%rdi), %rdi
> +       jnz     L(StrncpyFillTailWithZero)
> +# endif
> +       ret
> +
> +       .p2align 4
> +L(Exit20):
> +       movdqu  (%rsi), %xmm0
> +       mov     16(%rsi), %ecx
> +       movdqu  %xmm0, (%rdi)
> +       mov     %ecx, 16(%rdi)
> +# ifdef USE_AS_STPCPY
> +       lea     19(%rdi), %rax
> +# endif
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +       sub     $20, %r8
> +       lea     20(%rdi), %rdi
> +       jnz     L(StrncpyFillTailWithZero)
> +# endif
> +       ret
> +
> +       .p2align 4
> +L(Exit21):
> +       movdqu  (%rsi), %xmm0
> +       mov     16(%rsi), %ecx
> +       movdqu  %xmm0, (%rdi)
> +       mov     %ecx, 16(%rdi)
> +       mov     %dh, 20(%rdi)
> +# ifdef USE_AS_STPCPY
> +       lea     20(%rdi), %rax
> +# endif
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +       sub     $21, %r8
> +       lea     21(%rdi), %rdi
> +       jnz     L(StrncpyFillTailWithZero)
> +# endif
> +       ret
> +
> +       .p2align 4
> +L(Exit22):
> +       movdqu  (%rsi), %xmm0
> +       mov     14(%rsi), %rcx
> +       movdqu  %xmm0, (%rdi)
> +       mov     %rcx, 14(%rdi)
> +# ifdef USE_AS_STPCPY
> +       lea     21(%rdi), %rax
> +# endif
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +       sub     $22, %r8
> +       lea     22(%rdi), %rdi
> +       jnz     L(StrncpyFillTailWithZero)
> +# endif
> +       ret
> +
> +       .p2align 4
> +L(Exit23):
> +       movdqu  (%rsi), %xmm0
> +       mov     15(%rsi), %rcx
> +       movdqu  %xmm0, (%rdi)
> +       mov     %rcx, 15(%rdi)
> +# ifdef USE_AS_STPCPY
> +       lea     22(%rdi), %rax
> +# endif
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +       sub     $23, %r8
> +       lea     23(%rdi), %rdi
> +       jnz     L(StrncpyFillTailWithZero)
> +# endif
> +       ret
> +
> +       .p2align 4
> +L(Exit24):
> +       movdqu  (%rsi), %xmm0
> +       mov     16(%rsi), %rcx
> +       movdqu  %xmm0, (%rdi)
> +       mov     %rcx, 16(%rdi)
> +# ifdef USE_AS_STPCPY
> +       lea     23(%rdi), %rax
> +# endif
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +       sub     $24, %r8
> +       lea     24(%rdi), %rdi
> +       jnz     L(StrncpyFillTailWithZero)
> +# endif
> +       ret
> +
> +       .p2align 4
> +L(Exit25):
> +       movdqu  (%rsi), %xmm0
> +       mov     16(%rsi), %rcx
> +       movdqu  %xmm0, (%rdi)
> +       mov     %rcx, 16(%rdi)
> +       mov     %dh, 24(%rdi)
> +# ifdef USE_AS_STPCPY
> +       lea     24(%rdi), %rax
> +# endif
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +       sub     $25, %r8
> +       lea     25(%rdi), %rdi
> +       jnz     L(StrncpyFillTailWithZero)
> +# endif
> +       ret
> +
> +       .p2align 4
> +L(Exit26):
> +       movdqu  (%rsi), %xmm0
> +       mov     16(%rsi), %rdx
> +       mov     24(%rsi), %cx
> +       movdqu  %xmm0, (%rdi)
> +       mov     %rdx, 16(%rdi)
> +       mov     %cx, 24(%rdi)
> +# ifdef USE_AS_STPCPY
> +       lea     25(%rdi), %rax
> +# endif
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +       sub     $26, %r8
> +       lea     26(%rdi), %rdi
> +       jnz     L(StrncpyFillTailWithZero)
> +# endif
> +       ret
> +
> +       .p2align 4
> +L(Exit27):
> +       movdqu  (%rsi), %xmm0
> +       mov     16(%rsi), %rdx
> +       mov     23(%rsi), %ecx
> +       movdqu  %xmm0, (%rdi)
> +       mov     %rdx, 16(%rdi)
> +       mov     %ecx, 23(%rdi)
> +# ifdef USE_AS_STPCPY
> +       lea     26(%rdi), %rax
> +# endif
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +       sub     $27, %r8
> +       lea     27(%rdi), %rdi
> +       jnz     L(StrncpyFillTailWithZero)
> +# endif
> +       ret
> +
> +       .p2align 4
> +L(Exit28):
> +       movdqu  (%rsi), %xmm0
> +       mov     16(%rsi), %rdx
> +       mov     24(%rsi), %ecx
> +       movdqu  %xmm0, (%rdi)
> +       mov     %rdx, 16(%rdi)
> +       mov     %ecx, 24(%rdi)
> +# ifdef USE_AS_STPCPY
> +       lea     27(%rdi), %rax
> +# endif
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +       sub     $28, %r8
> +       lea     28(%rdi), %rdi
> +       jnz     L(StrncpyFillTailWithZero)
> +# endif
> +       ret
> +
> +       .p2align 4
> +L(Exit29):
> +       movdqu  (%rsi), %xmm0
> +       movdqu  13(%rsi), %xmm2
> +       movdqu  %xmm0, (%rdi)
> +       movdqu  %xmm2, 13(%rdi)
> +# ifdef USE_AS_STPCPY
> +       lea     28(%rdi), %rax
> +# endif
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +       sub     $29, %r8
> +       lea     29(%rdi), %rdi
> +       jnz     L(StrncpyFillTailWithZero)
> +# endif
> +       ret
> +
> +       .p2align 4
> +L(Exit30):
> +       movdqu  (%rsi), %xmm0
> +       movdqu  14(%rsi), %xmm2
> +       movdqu  %xmm0, (%rdi)
> +       movdqu  %xmm2, 14(%rdi)
> +# ifdef USE_AS_STPCPY
> +       lea     29(%rdi), %rax
> +# endif
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +       sub     $30, %r8
> +       lea     30(%rdi), %rdi
> +       jnz     L(StrncpyFillTailWithZero)
> +# endif
> +       ret
> +
> +       .p2align 4
> +L(Exit31):
> +       movdqu  (%rsi), %xmm0
> +       movdqu  15(%rsi), %xmm2
> +       movdqu  %xmm0, (%rdi)
> +       movdqu  %xmm2, 15(%rdi)
> +# ifdef USE_AS_STPCPY
> +       lea     30(%rdi), %rax
> +# endif
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +       sub     $31, %r8
> +       lea     31(%rdi), %rdi
> +       jnz     L(StrncpyFillTailWithZero)
> +# endif
> +       ret
> +
> +       .p2align 4
> +L(Exit32):
> +       movdqu  (%rsi), %xmm0
> +       movdqu  16(%rsi), %xmm2
> +       movdqu  %xmm0, (%rdi)
> +       movdqu  %xmm2, 16(%rdi)
> +# ifdef USE_AS_STPCPY
> +       lea     31(%rdi), %rax
> +# endif
> +# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> +       sub     $32, %r8
> +       lea     32(%rdi), %rdi
> +       jnz     L(StrncpyFillTailWithZero)
> +# endif
> +       ret
> +
> +# ifdef USE_AS_STRNCPY
> +
> +       .p2align 4
> +L(StrncpyExit0):
> +#  ifdef USE_AS_STPCPY
> +       mov     %rdi, %rax
> +#  endif
> +#  ifdef USE_AS_STRCAT
> +       xor     %ch, %ch
> +       movb    %ch, (%rdi)
> +#  endif
> +       ret
> +
> +       .p2align 4
> +L(StrncpyExit1):
> +       mov     (%rsi), %dl
> +       mov     %dl, (%rdi)
> +#  ifdef USE_AS_STPCPY
> +       lea     1(%rdi), %rax
> +#  endif
> +#  ifdef USE_AS_STRCAT
> +       xor     %ch, %ch
> +       movb    %ch, 1(%rdi)
> +#  endif
> +       ret
> +
> +       .p2align 4
> +L(StrncpyExit2):
> +       mov     (%rsi), %dx
> +       mov     %dx, (%rdi)
> +#  ifdef USE_AS_STPCPY
> +       lea     2(%rdi), %rax
> +#  endif
> +#  ifdef USE_AS_STRCAT
> +       xor     %ch, %ch
> +       movb    %ch, 2(%rdi)
> +#  endif
> +       ret
> +
> +       .p2align 4
> +L(StrncpyExit3):
> +       mov     (%rsi), %cx
> +       mov     2(%rsi), %dl
> +       mov     %cx, (%rdi)
> +       mov     %dl, 2(%rdi)
> +#  ifdef USE_AS_STPCPY
> +       lea     3(%rdi), %rax
> +#  endif
> +#  ifdef USE_AS_STRCAT
> +       xor     %ch, %ch
> +       movb    %ch, 3(%rdi)
> +#  endif
> +       ret
> +
> +       .p2align 4
> +L(StrncpyExit4):
> +       mov     (%rsi), %edx
> +       mov     %edx, (%rdi)
> +#  ifdef USE_AS_STPCPY
> +       lea     4(%rdi), %rax
> +#  endif
> +#  ifdef USE_AS_STRCAT
> +       xor     %ch, %ch
> +       movb    %ch, 4(%rdi)
> +#  endif
> +       ret
> +
> +       .p2align 4
> +L(StrncpyExit5):
> +       mov     (%rsi), %ecx
> +       mov     4(%rsi), %dl
> +       mov     %ecx, (%rdi)
> +       mov     %dl, 4(%rdi)
> +#  ifdef USE_AS_STPCPY
> +       lea     5(%rdi), %rax
> +#  endif
> +#  ifdef USE_AS_STRCAT
> +       xor     %ch, %ch
> +       movb    %ch, 5(%rdi)
> +#  endif
> +       ret
> +
> +       .p2align 4
> +L(StrncpyExit6):
> +       mov     (%rsi), %ecx
> +       mov     4(%rsi), %dx
> +       mov     %ecx, (%rdi)
> +       mov     %dx, 4(%rdi)
> +#  ifdef USE_AS_STPCPY
> +       lea     6(%rdi), %rax
> +#  endif
> +#  ifdef USE_AS_STRCAT
> +       xor     %ch, %ch
> +       movb    %ch, 6(%rdi)
> +#  endif
> +       ret
> +
> +       .p2align 4
> +L(StrncpyExit7):
> +       mov     (%rsi), %ecx
> +       mov     3(%rsi), %edx
> +       mov     %ecx, (%rdi)
> +       mov     %edx, 3(%rdi)
> +#  ifdef USE_AS_STPCPY
> +       lea     7(%rdi), %rax
> +#  endif
> +#  ifdef USE_AS_STRCAT
> +       xor     %ch, %ch
> +       movb    %ch, 7(%rdi)
> +#  endif
> +       ret
> +
> +       .p2align 4
> +L(StrncpyExit8):
> +       mov     (%rsi), %rdx
> +       mov     %rdx, (%rdi)
> +#  ifdef USE_AS_STPCPY
> +       lea     8(%rdi), %rax
> +#  endif
> +#  ifdef USE_AS_STRCAT
> +       xor     %ch, %ch
> +       movb    %ch, 8(%rdi)
> +#  endif
> +       ret
> +
> +       .p2align 4
> +L(StrncpyExit9):
> +       mov     (%rsi), %rcx
> +       mov     8(%rsi), %dl
> +       mov     %rcx, (%rdi)
> +       mov     %dl, 8(%rdi)
> +#  ifdef USE_AS_STPCPY
> +       lea     9(%rdi), %rax
> +#  endif
> +#  ifdef USE_AS_STRCAT
> +       xor     %ch, %ch
> +       movb    %ch, 9(%rdi)
> +#  endif
> +       ret
> +
> +       .p2align 4
> +L(StrncpyExit10):
> +       mov     (%rsi), %rcx
> +       mov     8(%rsi), %dx
> +       mov     %rcx, (%rdi)
> +       mov     %dx, 8(%rdi)
> +#  ifdef USE_AS_STPCPY
> +       lea     10(%rdi), %rax
> +#  endif
> +#  ifdef USE_AS_STRCAT
> +       xor     %ch, %ch
> +       movb    %ch, 10(%rdi)
> +#  endif
> +       ret
> +
> +       .p2align 4
> +L(StrncpyExit11):
> +       mov     (%rsi), %rcx
> +       mov     7(%rsi), %edx
> +       mov     %rcx, (%rdi)
> +       mov     %edx, 7(%rdi)
> +#  ifdef USE_AS_STPCPY
> +       lea     11(%rdi), %rax
> +#  endif
> +#  ifdef USE_AS_STRCAT
> +       xor     %ch, %ch
> +       movb    %ch, 11(%rdi)
> +#  endif
> +       ret
> +
> +       .p2align 4
> +L(StrncpyExit12):
> +       mov     (%rsi), %rcx
> +       mov     8(%rsi), %edx
> +       mov     %rcx, (%rdi)
> +       mov     %edx, 8(%rdi)
> +#  ifdef USE_AS_STPCPY
> +       lea     12(%rdi), %rax
> +#  endif
> +#  ifdef USE_AS_STRCAT
> +       xor     %ch, %ch
> +       movb    %ch, 12(%rdi)
> +#  endif
> +       ret
> +
> +       .p2align 4
> +L(StrncpyExit13):
> +       mov     (%rsi), %rcx
> +       mov     5(%rsi), %rdx
> +       mov     %rcx, (%rdi)
> +       mov     %rdx, 5(%rdi)
> +#  ifdef USE_AS_STPCPY
> +       lea     13(%rdi), %rax
> +#  endif
> +#  ifdef USE_AS_STRCAT
> +       xor     %ch, %ch
> +       movb    %ch, 13(%rdi)
> +#  endif
> +       ret
> +
> +       .p2align 4
> +L(StrncpyExit14):
> +       mov     (%rsi), %rcx
> +       mov     6(%rsi), %rdx
> +       mov     %rcx, (%rdi)
> +       mov     %rdx, 6(%rdi)
> +#  ifdef USE_AS_STPCPY
> +       lea     14(%rdi), %rax
> +#  endif
> +#  ifdef USE_AS_STRCAT
> +       xor     %ch, %ch
> +       movb    %ch, 14(%rdi)
> +#  endif
> +       ret
> +
> +       .p2align 4
> +L(StrncpyExit15):
> +       mov     (%rsi), %rcx
> +       mov     7(%rsi), %rdx
> +       mov     %rcx, (%rdi)
> +       mov     %rdx, 7(%rdi)
> +#  ifdef USE_AS_STPCPY
> +       lea     15(%rdi), %rax
> +#  endif
> +#  ifdef USE_AS_STRCAT
> +       xor     %ch, %ch
> +       movb    %ch, 15(%rdi)
> +#  endif
> +       ret
> +
> +       .p2align 4
> +L(StrncpyExit16):
> +       movdqu  (%rsi), %xmm0
> +       movdqu  %xmm0, (%rdi)
> +#  ifdef USE_AS_STPCPY
> +       lea     16(%rdi), %rax
> +#  endif
> +#  ifdef USE_AS_STRCAT
> +       xor     %ch, %ch
> +       movb    %ch, 16(%rdi)
> +#  endif
> +       ret
> +
> +       .p2align 4
> +L(StrncpyExit17):
> +       movdqu  (%rsi), %xmm0
> +       mov     16(%rsi), %cl
> +       movdqu  %xmm0, (%rdi)
> +       mov     %cl, 16(%rdi)
> +#  ifdef USE_AS_STPCPY
> +       lea     17(%rdi), %rax
> +#  endif
> +#  ifdef USE_AS_STRCAT
> +       xor     %ch, %ch
> +       movb    %ch, 17(%rdi)
> +#  endif
> +       ret
> +
> +       .p2align 4
> +L(StrncpyExit18):
> +       movdqu  (%rsi), %xmm0
> +       mov     16(%rsi), %cx
> +       movdqu  %xmm0, (%rdi)
> +       mov     %cx, 16(%rdi)
> +#  ifdef USE_AS_STPCPY
> +       lea     18(%rdi), %rax
> +#  endif
> +#  ifdef USE_AS_STRCAT
> +       xor     %ch, %ch
> +       movb    %ch, 18(%rdi)
> +#  endif
> +       ret
> +
> +       .p2align 4
> +L(StrncpyExit19):
> +       movdqu  (%rsi), %xmm0
> +       mov     15(%rsi), %ecx
> +       movdqu  %xmm0, (%rdi)
> +       mov     %ecx, 15(%rdi)
> +#  ifdef USE_AS_STPCPY
> +       lea     19(%rdi), %rax
> +#  endif
> +#  ifdef USE_AS_STRCAT
> +       xor     %ch, %ch
> +       movb    %ch, 19(%rdi)
> +#  endif
> +       ret
> +
> +       .p2align 4
> +L(StrncpyExit20):
> +       movdqu  (%rsi), %xmm0
> +       mov     16(%rsi), %ecx
> +       movdqu  %xmm0, (%rdi)
> +       mov     %ecx, 16(%rdi)
> +#  ifdef USE_AS_STPCPY
> +       lea     20(%rdi), %rax
> +#  endif
> +#  ifdef USE_AS_STRCAT
> +       xor     %ch, %ch
> +       movb    %ch, 20(%rdi)
> +#  endif
> +       ret
> +
> +       .p2align 4
> +L(StrncpyExit21):
> +       movdqu  (%rsi), %xmm0
> +       mov     16(%rsi), %ecx
> +       mov     20(%rsi), %dl
> +       movdqu  %xmm0, (%rdi)
> +       mov     %ecx, 16(%rdi)
> +       mov     %dl, 20(%rdi)
> +#  ifdef USE_AS_STPCPY
> +       lea     21(%rdi), %rax
> +#  endif
> +#  ifdef USE_AS_STRCAT
> +       xor     %ch, %ch
> +       movb    %ch, 21(%rdi)
> +#  endif
> +       ret
> +
> +       .p2align 4
> +L(StrncpyExit22):
> +       movdqu  (%rsi), %xmm0
> +       mov     14(%rsi), %rcx
> +       movdqu  %xmm0, (%rdi)
> +       mov     %rcx, 14(%rdi)
> +#  ifdef USE_AS_STPCPY
> +       lea     22(%rdi), %rax
> +#  endif
> +#  ifdef USE_AS_STRCAT
> +       xor     %ch, %ch
> +       movb    %ch, 22(%rdi)
> +#  endif
> +       ret
> +
> +       .p2align 4
> +L(StrncpyExit23):
> +       movdqu  (%rsi), %xmm0
> +       mov     15(%rsi), %rcx
> +       movdqu  %xmm0, (%rdi)
> +       mov     %rcx, 15(%rdi)
> +#  ifdef USE_AS_STPCPY
> +       lea     23(%rdi), %rax
> +#  endif
> +#  ifdef USE_AS_STRCAT
> +       xor     %ch, %ch
> +       movb    %ch, 23(%rdi)
> +#  endif
> +       ret
> +
> +       .p2align 4
> +L(StrncpyExit24):
> +       movdqu  (%rsi), %xmm0
> +       mov     16(%rsi), %rcx
> +       movdqu  %xmm0, (%rdi)
> +       mov     %rcx, 16(%rdi)
> +#  ifdef USE_AS_STPCPY
> +       lea     24(%rdi), %rax
> +#  endif
> +#  ifdef USE_AS_STRCAT
> +       xor     %ch, %ch
> +       movb    %ch, 24(%rdi)
> +#  endif
> +       ret
> +
> +       .p2align 4
> +L(StrncpyExit25):
> +       movdqu  (%rsi), %xmm0
> +       mov     16(%rsi), %rdx
> +       mov     24(%rsi), %cl
> +       movdqu  %xmm0, (%rdi)
> +       mov     %rdx, 16(%rdi)
> +       mov     %cl, 24(%rdi)
> +#  ifdef USE_AS_STPCPY
> +       lea     25(%rdi), %rax
> +#  endif
> +#  ifdef USE_AS_STRCAT
> +       xor     %ch, %ch
> +       movb    %ch, 25(%rdi)
> +#  endif
> +       ret
> +
> +       .p2align 4
> +L(StrncpyExit26):
> +       movdqu  (%rsi), %xmm0
> +       mov     16(%rsi), %rdx
> +       mov     24(%rsi), %cx
> +       movdqu  %xmm0, (%rdi)
> +       mov     %rdx, 16(%rdi)
> +       mov     %cx, 24(%rdi)
> +#  ifdef USE_AS_STPCPY
> +       lea     26(%rdi), %rax
> +#  endif
> +#  ifdef USE_AS_STRCAT
> +       xor     %ch, %ch
> +       movb    %ch, 26(%rdi)
> +#  endif
> +       ret
> +
> +       .p2align 4
> +L(StrncpyExit27):
> +       movdqu  (%rsi), %xmm0
> +       mov     16(%rsi), %rdx
> +       mov     23(%rsi), %ecx
> +       movdqu  %xmm0, (%rdi)
> +       mov     %rdx, 16(%rdi)
> +       mov     %ecx, 23(%rdi)
> +#  ifdef USE_AS_STPCPY
> +       lea     27(%rdi), %rax
> +#  endif
> +#  ifdef USE_AS_STRCAT
> +       xor     %ch, %ch
> +       movb    %ch, 27(%rdi)
> +#  endif
> +       ret
> +
> +       .p2align 4
> +L(StrncpyExit28):
> +       movdqu  (%rsi), %xmm0
> +       mov     16(%rsi), %rdx
> +       mov     24(%rsi), %ecx
> +       movdqu  %xmm0, (%rdi)
> +       mov     %rdx, 16(%rdi)
> +       mov     %ecx, 24(%rdi)
> +#  ifdef USE_AS_STPCPY
> +       lea     28(%rdi), %rax
> +#  endif
> +#  ifdef USE_AS_STRCAT
> +       xor     %ch, %ch
> +       movb    %ch, 28(%rdi)
> +#  endif
> +       ret
> +
> +       .p2align 4
> +L(StrncpyExit29):
> +       movdqu  (%rsi), %xmm0
> +       movdqu  13(%rsi), %xmm2
> +       movdqu  %xmm0, (%rdi)
> +       movdqu  %xmm2, 13(%rdi)
> +#  ifdef USE_AS_STPCPY
> +       lea     29(%rdi), %rax
> +#  endif
> +#  ifdef USE_AS_STRCAT
> +       xor     %ch, %ch
> +       movb    %ch, 29(%rdi)
> +#  endif
> +       ret
> +
> +       .p2align 4
> +L(StrncpyExit30):
> +       movdqu  (%rsi), %xmm0
> +       movdqu  14(%rsi), %xmm2
> +       movdqu  %xmm0, (%rdi)
> +       movdqu  %xmm2, 14(%rdi)
> +#  ifdef USE_AS_STPCPY
> +       lea     30(%rdi), %rax
> +#  endif
> +#  ifdef USE_AS_STRCAT
> +       xor     %ch, %ch
> +       movb    %ch, 30(%rdi)
> +#  endif
> +       ret
> +
> +       .p2align 4
> +L(StrncpyExit31):
> +       movdqu  (%rsi), %xmm0
> +       movdqu  15(%rsi), %xmm2
> +       movdqu  %xmm0, (%rdi)
> +       movdqu  %xmm2, 15(%rdi)
> +#  ifdef USE_AS_STPCPY
> +       lea     31(%rdi), %rax
> +#  endif
> +#  ifdef USE_AS_STRCAT
> +       xor     %ch, %ch
> +       movb    %ch, 31(%rdi)
> +#  endif
> +       ret
> +
> +       .p2align 4
> +L(StrncpyExit32):
> +       movdqu  (%rsi), %xmm0
> +       movdqu  16(%rsi), %xmm2
> +       movdqu  %xmm0, (%rdi)
> +       movdqu  %xmm2, 16(%rdi)
> +#  ifdef USE_AS_STPCPY
> +       lea     32(%rdi), %rax
> +#  endif
> +#  ifdef USE_AS_STRCAT
> +       xor     %ch, %ch
> +       movb    %ch, 32(%rdi)
> +#  endif
> +       ret
> +
> +       .p2align 4
> +L(StrncpyExit33):
> +       movdqu  (%rsi), %xmm0
> +       movdqu  16(%rsi), %xmm2
> +       mov     32(%rsi), %cl
> +       movdqu  %xmm0, (%rdi)
> +       movdqu  %xmm2, 16(%rdi)
> +       mov     %cl, 32(%rdi)
> +#  ifdef USE_AS_STRCAT
> +       xor     %ch, %ch
> +       movb    %ch, 33(%rdi)
> +#  endif
> +       ret
> +
> +#  ifndef USE_AS_STRCAT
> +
> +       .p2align 4
> +L(Fill0):
> +       ret
> +
> +       .p2align 4
> +L(Fill1):
> +       mov     %dl, (%rdi)
> +       ret
> +
> +       .p2align 4
> +L(Fill2):
> +       mov     %dx, (%rdi)
> +       ret
> +
> +       .p2align 4
> +L(Fill3):
> +       mov     %edx, -1(%rdi)
> +       ret
> +
> +       .p2align 4
> +L(Fill4):
> +       mov     %edx, (%rdi)
> +       ret
> +
> +       .p2align 4
> +L(Fill5):
> +       mov     %edx, (%rdi)
> +       mov     %dl, 4(%rdi)
> +       ret
> +
> +       .p2align 4
> +L(Fill6):
> +       mov     %edx, (%rdi)
> +       mov     %dx, 4(%rdi)
> +       ret
> +
> +       .p2align 4
> +L(Fill7):
> +       mov     %rdx, -1(%rdi)
> +       ret
> +
> +       .p2align 4
> +L(Fill8):
> +       mov     %rdx, (%rdi)
> +       ret
> +
> +       .p2align 4
> +L(Fill9):
> +       mov     %rdx, (%rdi)
> +       mov     %dl, 8(%rdi)
> +       ret
> +
> +       .p2align 4
> +L(Fill10):
> +       mov     %rdx, (%rdi)
> +       mov     %dx, 8(%rdi)
> +       ret
> +
> +       .p2align 4
> +L(Fill11):
> +       mov     %rdx, (%rdi)
> +       mov     %edx, 7(%rdi)
> +       ret
> +
> +       .p2align 4
> +L(Fill12):
> +       mov     %rdx, (%rdi)
> +       mov     %edx, 8(%rdi)
> +       ret
> +
> +       .p2align 4
> +L(Fill13):
> +       mov     %rdx, (%rdi)
> +       mov     %rdx, 5(%rdi)
> +       ret
> +
> +       .p2align 4
> +L(Fill14):
> +       mov     %rdx, (%rdi)
> +       mov     %rdx, 6(%rdi)
> +       ret
> +
> +       .p2align 4
> +L(Fill15):
> +       movdqu  %xmm0, -1(%rdi)
> +       ret
> +
> +       .p2align 4
> +L(Fill16):
> +       movdqu  %xmm0, (%rdi)
> +       ret
> +
> +       .p2align 4
> +L(CopyFrom1To16BytesUnalignedXmm2):
> +       movdqu  %xmm2, (%rdi, %rcx)
> +
> +       .p2align 4
> +L(CopyFrom1To16BytesXmmExit):
> +       bsf     %rdx, %rdx
> +       add     $15, %r8
> +       add     %rcx, %rdi
> +#   ifdef USE_AS_STPCPY
> +       lea     (%rdi, %rdx), %rax
> +#   endif
> +       sub     %rdx, %r8
> +       lea     1(%rdi, %rdx), %rdi
> +
> +       .p2align 4
> +L(StrncpyFillTailWithZero):
> +       pxor    %xmm0, %xmm0
> +       xor     %rdx, %rdx
> +       sub     $16, %r8
> +       jbe     L(StrncpyFillExit)
> +
> +       movdqu  %xmm0, (%rdi)
> +       add     $16, %rdi
> +
> +       mov     %rdi, %rsi
> +       and     $0xf, %rsi
> +       sub     %rsi, %rdi
> +       add     %rsi, %r8
> +       sub     $64, %r8
> +       jb      L(StrncpyFillLess64)
> +
> +L(StrncpyFillLoopMovdqa):
> +       movdqa  %xmm0, (%rdi)
> +       movdqa  %xmm0, 16(%rdi)
> +       movdqa  %xmm0, 32(%rdi)
> +       movdqa  %xmm0, 48(%rdi)
> +       add     $64, %rdi
> +       sub     $64, %r8
> +       jae     L(StrncpyFillLoopMovdqa)
> +
> +L(StrncpyFillLess64):
> +       add     $32, %r8
> +       jl      L(StrncpyFillLess32)
> +       movdqa  %xmm0, (%rdi)
> +       movdqa  %xmm0, 16(%rdi)
> +       add     $32, %rdi
> +       sub     $16, %r8
> +       jl      L(StrncpyFillExit)
> +       movdqa  %xmm0, (%rdi)
> +       add     $16, %rdi
> +       BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)
> +
> +L(StrncpyFillLess32):
> +       add     $16, %r8
> +       jl      L(StrncpyFillExit)
> +       movdqa  %xmm0, (%rdi)
> +       add     $16, %rdi
> +       BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)
> +
> +L(StrncpyFillExit):
> +       add     $16, %r8
> +       BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)
> +
> +/* end of ifndef USE_AS_STRCAT */
> +#  endif
> +
> +       .p2align 4
> +L(UnalignedLeaveCase2OrCase3):
> +       test    %rdx, %rdx
> +       jnz     L(Unaligned64LeaveCase2)
> +L(Unaligned64LeaveCase3):
> +       lea     64(%r8), %rcx
> +       and     $-16, %rcx
> +       add     $48, %r8
> +       jl      L(CopyFrom1To16BytesCase3)
> +       movdqu  %xmm4, (%rdi)
> +       sub     $16, %r8
> +       jb      L(CopyFrom1To16BytesCase3)
> +       movdqu  %xmm5, 16(%rdi)
> +       sub     $16, %r8
> +       jb      L(CopyFrom1To16BytesCase3)
> +       movdqu  %xmm6, 32(%rdi)
> +       sub     $16, %r8
> +       jb      L(CopyFrom1To16BytesCase3)
> +       movdqu  %xmm7, 48(%rdi)
> +#  ifdef USE_AS_STPCPY
> +       lea     64(%rdi), %rax
> +#  endif
> +#  ifdef USE_AS_STRCAT
> +       xor     %ch, %ch
> +       movb    %ch, 64(%rdi)
> +#  endif
> +       ret
> +
> +       .p2align 4
> +L(Unaligned64LeaveCase2):
> +       xor     %rcx, %rcx
> +       pcmpeqb %xmm4, %xmm0
> +       pmovmskb %xmm0, %rdx
> +       add     $48, %r8
> +       jle     L(CopyFrom1To16BytesCase2OrCase3)
> +       test    %rdx, %rdx
> +#  ifndef USE_AS_STRCAT
> +       jnz     L(CopyFrom1To16BytesUnalignedXmm4)
> +#  else
> +       jnz     L(CopyFrom1To16Bytes)
> +#  endif
> +       pcmpeqb %xmm5, %xmm0
> +       pmovmskb %xmm0, %rdx
> +       movdqu  %xmm4, (%rdi)
> +       add     $16, %rcx
> +       sub     $16, %r8
> +       jbe     L(CopyFrom1To16BytesCase2OrCase3)
> +       test    %rdx, %rdx
> +#  ifndef USE_AS_STRCAT
> +       jnz     L(CopyFrom1To16BytesUnalignedXmm5)
> +#  else
> +       jnz     L(CopyFrom1To16Bytes)
> +#  endif
> +
> +       pcmpeqb %xmm6, %xmm0
> +       pmovmskb %xmm0, %rdx
> +       movdqu  %xmm5, 16(%rdi)
> +       add     $16, %rcx
> +       sub     $16, %r8
> +       jbe     L(CopyFrom1To16BytesCase2OrCase3)
> +       test    %rdx, %rdx
> +#  ifndef USE_AS_STRCAT
> +       jnz     L(CopyFrom1To16BytesUnalignedXmm6)
> +#  else
> +       jnz     L(CopyFrom1To16Bytes)
> +#  endif
> +
> +       pcmpeqb %xmm7, %xmm0
> +       pmovmskb %xmm0, %rdx
> +       movdqu  %xmm6, 32(%rdi)
> +       lea     16(%rdi, %rcx), %rdi
> +       lea     16(%rsi, %rcx), %rsi
> +       bsf     %rdx, %rdx
> +       cmp     %r8, %rdx
> +       jb      L(CopyFrom1To16BytesExit)
> +       BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
> +
> +       .p2align 4
> +L(ExitZero):
> +#  ifndef USE_AS_STRCAT
> +       mov     %rdi, %rax
> +#  endif
> +       ret
> +
> +# endif
> +
> +# ifndef USE_AS_STRCAT
> +END (STRCPY)
> +# else
> +END (STRCAT)
> +# endif
> +       .p2align 4
> +       .section .rodata
> +L(ExitTable):
> +       .int    JMPTBL(L(Exit1), L(ExitTable))
> +       .int    JMPTBL(L(Exit2), L(ExitTable))
> +       .int    JMPTBL(L(Exit3), L(ExitTable))
> +       .int    JMPTBL(L(Exit4), L(ExitTable))
> +       .int    JMPTBL(L(Exit5), L(ExitTable))
> +       .int    JMPTBL(L(Exit6), L(ExitTable))
> +       .int    JMPTBL(L(Exit7), L(ExitTable))
> +       .int    JMPTBL(L(Exit8), L(ExitTable))
> +       .int    JMPTBL(L(Exit9), L(ExitTable))
> +       .int    JMPTBL(L(Exit10), L(ExitTable))
> +       .int    JMPTBL(L(Exit11), L(ExitTable))
> +       .int    JMPTBL(L(Exit12), L(ExitTable))
> +       .int    JMPTBL(L(Exit13), L(ExitTable))
> +       .int    JMPTBL(L(Exit14), L(ExitTable))
> +       .int    JMPTBL(L(Exit15), L(ExitTable))
> +       .int    JMPTBL(L(Exit16), L(ExitTable))
> +       .int    JMPTBL(L(Exit17), L(ExitTable))
> +       .int    JMPTBL(L(Exit18), L(ExitTable))
> +       .int    JMPTBL(L(Exit19), L(ExitTable))
> +       .int    JMPTBL(L(Exit20), L(ExitTable))
> +       .int    JMPTBL(L(Exit21), L(ExitTable))
> +       .int    JMPTBL(L(Exit22), L(ExitTable))
> +       .int    JMPTBL(L(Exit23), L(ExitTable))
> +       .int    JMPTBL(L(Exit24), L(ExitTable))
> +       .int    JMPTBL(L(Exit25), L(ExitTable))
> +       .int    JMPTBL(L(Exit26), L(ExitTable))
> +       .int    JMPTBL(L(Exit27), L(ExitTable))
> +       .int    JMPTBL(L(Exit28), L(ExitTable))
> +       .int    JMPTBL(L(Exit29), L(ExitTable))
> +       .int    JMPTBL(L(Exit30), L(ExitTable))
> +       .int    JMPTBL(L(Exit31), L(ExitTable))
> +       .int    JMPTBL(L(Exit32), L(ExitTable))
> +# ifdef USE_AS_STRNCPY
> +L(ExitStrncpyTable):
> +       .int    JMPTBL(L(StrncpyExit0), L(ExitStrncpyTable))
> +       .int    JMPTBL(L(StrncpyExit1), L(ExitStrncpyTable))
> +       .int    JMPTBL(L(StrncpyExit2), L(ExitStrncpyTable))
> +       .int    JMPTBL(L(StrncpyExit3), L(ExitStrncpyTable))
> +       .int    JMPTBL(L(StrncpyExit4), L(ExitStrncpyTable))
> +       .int    JMPTBL(L(StrncpyExit5), L(ExitStrncpyTable))
> +       .int    JMPTBL(L(StrncpyExit6), L(ExitStrncpyTable))
> +       .int    JMPTBL(L(StrncpyExit7), L(ExitStrncpyTable))
> +       .int    JMPTBL(L(StrncpyExit8), L(ExitStrncpyTable))
> +       .int    JMPTBL(L(StrncpyExit9), L(ExitStrncpyTable))
> +       .int    JMPTBL(L(StrncpyExit10), L(ExitStrncpyTable))
> +       .int    JMPTBL(L(StrncpyExit11), L(ExitStrncpyTable))
> +       .int    JMPTBL(L(StrncpyExit12), L(ExitStrncpyTable))
> +       .int    JMPTBL(L(StrncpyExit13), L(ExitStrncpyTable))
> +       .int    JMPTBL(L(StrncpyExit14), L(ExitStrncpyTable))
> +       .int    JMPTBL(L(StrncpyExit15), L(ExitStrncpyTable))
> +       .int    JMPTBL(L(StrncpyExit16), L(ExitStrncpyTable))
> +       .int    JMPTBL(L(StrncpyExit17), L(ExitStrncpyTable))
> +       .int    JMPTBL(L(StrncpyExit18), L(ExitStrncpyTable))
> +       .int    JMPTBL(L(StrncpyExit19), L(ExitStrncpyTable))
> +       .int    JMPTBL(L(StrncpyExit20), L(ExitStrncpyTable))
> +       .int    JMPTBL(L(StrncpyExit21), L(ExitStrncpyTable))
> +       .int    JMPTBL(L(StrncpyExit22), L(ExitStrncpyTable))
> +       .int    JMPTBL(L(StrncpyExit23), L(ExitStrncpyTable))
> +       .int    JMPTBL(L(StrncpyExit24), L(ExitStrncpyTable))
> +       .int    JMPTBL(L(StrncpyExit25), L(ExitStrncpyTable))
> +       .int    JMPTBL(L(StrncpyExit26), L(ExitStrncpyTable))
> +       .int    JMPTBL(L(StrncpyExit27), L(ExitStrncpyTable))
> +       .int    JMPTBL(L(StrncpyExit28), L(ExitStrncpyTable))
> +       .int    JMPTBL(L(StrncpyExit29), L(ExitStrncpyTable))
> +       .int    JMPTBL(L(StrncpyExit30), L(ExitStrncpyTable))
> +       .int    JMPTBL(L(StrncpyExit31), L(ExitStrncpyTable))
> +       .int    JMPTBL(L(StrncpyExit32), L(ExitStrncpyTable))
> +       .int    JMPTBL(L(StrncpyExit33), L(ExitStrncpyTable))
> +#  ifndef USE_AS_STRCAT
> +       .p2align 4
> +L(FillTable):
> +       .int    JMPTBL(L(Fill0), L(FillTable))
> +       .int    JMPTBL(L(Fill1), L(FillTable))
> +       .int    JMPTBL(L(Fill2), L(FillTable))
> +       .int    JMPTBL(L(Fill3), L(FillTable))
> +       .int    JMPTBL(L(Fill4), L(FillTable))
> +       .int    JMPTBL(L(Fill5), L(FillTable))
> +       .int    JMPTBL(L(Fill6), L(FillTable))
> +       .int    JMPTBL(L(Fill7), L(FillTable))
> +       .int    JMPTBL(L(Fill8), L(FillTable))
> +       .int    JMPTBL(L(Fill9), L(FillTable))
> +       .int    JMPTBL(L(Fill10), L(FillTable))
> +       .int    JMPTBL(L(Fill11), L(FillTable))
> +       .int    JMPTBL(L(Fill12), L(FillTable))
> +       .int    JMPTBL(L(Fill13), L(FillTable))
> +       .int    JMPTBL(L(Fill14), L(FillTable))
> +       .int    JMPTBL(L(Fill15), L(FillTable))
> +       .int    JMPTBL(L(Fill16), L(FillTable))
> +#  endif
> +# endif
> +#endif
> diff --git a/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
> deleted file mode 100644
> index 7710173..0000000
> --- a/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
> +++ /dev/null
> @@ -1,1887 +0,0 @@
> -/* strcpy with SSE2 and unaligned load
> -   Copyright (C) 2011-2013 Free Software Foundation, Inc.
> -   Contributed by Intel Corporation.
> -   This file is part of the GNU C Library.
> -
> -   The GNU C Library is free software; you can redistribute it and/or
> -   modify it under the terms of the GNU Lesser General Public
> -   License as published by the Free Software Foundation; either
> -   version 2.1 of the License, or (at your option) any later version.
> -
> -   The GNU C Library is distributed in the hope that it will be useful,
> -   but WITHOUT ANY WARRANTY; without even the implied warranty of
> -   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> -   Lesser General Public License for more details.
> -
> -   You should have received a copy of the GNU Lesser General Public
> -   License along with the GNU C Library; if not, see
> -   <http://www.gnu.org/licenses/>.  */
> -
> -#ifndef NOT_IN_libc
> -
> -# ifndef USE_AS_STRCAT
> -#  include <sysdep.h>
> -
> -#  ifndef STRCPY
> -#   define STRCPY  __strcpy_sse2_unaligned
> -#  endif
> -
> -# endif
> -
> -# define JMPTBL(I, B)  I - B
> -# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE)             \
> -       lea     TABLE(%rip), %r11;                              \
> -       movslq  (%r11, INDEX, SCALE), %rcx;                     \
> -       lea     (%r11, %rcx), %rcx;                             \
> -       jmp     *%rcx
> -
> -# ifndef USE_AS_STRCAT
> -
> -.text
> -ENTRY (STRCPY)
> -#  ifdef USE_AS_STRNCPY
> -       mov     %rdx, %r8
> -       test    %r8, %r8
> -       jz      L(ExitZero)
> -#  endif
> -       mov     %rsi, %rcx
> -#  ifndef USE_AS_STPCPY
> -       mov     %rdi, %rax      /* save result */
> -#  endif
> -
> -# endif
> -
> -       and     $63, %rcx
> -       cmp     $32, %rcx
> -       jbe     L(SourceStringAlignmentLess32)
> -
> -       and     $-16, %rsi
> -       and     $15, %rcx
> -       pxor    %xmm0, %xmm0
> -       pxor    %xmm1, %xmm1
> -
> -       pcmpeqb (%rsi), %xmm1
> -       pmovmskb %xmm1, %rdx
> -       shr     %cl, %rdx
> -
> -# ifdef USE_AS_STRNCPY
> -#  if defined USE_AS_STPCPY || defined USE_AS_STRCAT
> -       mov     $16, %r10
> -       sub     %rcx, %r10
> -       cmp     %r10, %r8
> -#  else
> -       mov     $17, %r10
> -       sub     %rcx, %r10
> -       cmp     %r10, %r8
> -#  endif
> -       jbe     L(CopyFrom1To16BytesTailCase2OrCase3)
> -# endif
> -       test    %rdx, %rdx
> -       jnz     L(CopyFrom1To16BytesTail)
> -
> -       pcmpeqb 16(%rsi), %xmm0
> -       pmovmskb %xmm0, %rdx
> -
> -# ifdef USE_AS_STRNCPY
> -       add     $16, %r10
> -       cmp     %r10, %r8
> -       jbe     L(CopyFrom1To32BytesCase2OrCase3)
> -# endif
> -       test    %rdx, %rdx
> -       jnz     L(CopyFrom1To32Bytes)
> -
> -       movdqu  (%rsi, %rcx), %xmm1   /* copy 16 bytes */
> -       movdqu  %xmm1, (%rdi)
> -
> -/* If source address alignment != destination address alignment */
> -       .p2align 4
> -L(Unalign16Both):
> -       sub     %rcx, %rdi
> -# ifdef USE_AS_STRNCPY
> -       add     %rcx, %r8
> -# endif
> -       mov     $16, %rcx
> -       movdqa  (%rsi, %rcx), %xmm1
> -       movaps  16(%rsi, %rcx), %xmm2
> -       movdqu  %xmm1, (%rdi, %rcx)
> -       pcmpeqb %xmm2, %xmm0
> -       pmovmskb %xmm0, %rdx
> -       add     $16, %rcx
> -# ifdef USE_AS_STRNCPY
> -       sub     $48, %r8
> -       jbe     L(CopyFrom1To16BytesCase2OrCase3)
> -# endif
> -       test    %rdx, %rdx
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -       jnz     L(CopyFrom1To16BytesUnalignedXmm2)
> -# else
> -       jnz     L(CopyFrom1To16Bytes)
> -# endif
> -
> -       movaps  16(%rsi, %rcx), %xmm3
> -       movdqu  %xmm2, (%rdi, %rcx)
> -       pcmpeqb %xmm3, %xmm0
> -       pmovmskb %xmm0, %rdx
> -       add     $16, %rcx
> -# ifdef USE_AS_STRNCPY
> -       sub     $16, %r8
> -       jbe     L(CopyFrom1To16BytesCase2OrCase3)
> -# endif
> -       test    %rdx, %rdx
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -       jnz     L(CopyFrom1To16BytesUnalignedXmm3)
> -# else
> -       jnz     L(CopyFrom1To16Bytes)
> -# endif
> -
> -       movaps  16(%rsi, %rcx), %xmm4
> -       movdqu  %xmm3, (%rdi, %rcx)
> -       pcmpeqb %xmm4, %xmm0
> -       pmovmskb %xmm0, %rdx
> -       add     $16, %rcx
> -# ifdef USE_AS_STRNCPY
> -       sub     $16, %r8
> -       jbe     L(CopyFrom1To16BytesCase2OrCase3)
> -# endif
> -       test    %rdx, %rdx
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -       jnz     L(CopyFrom1To16BytesUnalignedXmm4)
> -# else
> -       jnz     L(CopyFrom1To16Bytes)
> -# endif
> -
> -       movaps  16(%rsi, %rcx), %xmm1
> -       movdqu  %xmm4, (%rdi, %rcx)
> -       pcmpeqb %xmm1, %xmm0
> -       pmovmskb %xmm0, %rdx
> -       add     $16, %rcx
> -# ifdef USE_AS_STRNCPY
> -       sub     $16, %r8
> -       jbe     L(CopyFrom1To16BytesCase2OrCase3)
> -# endif
> -       test    %rdx, %rdx
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -       jnz     L(CopyFrom1To16BytesUnalignedXmm1)
> -# else
> -       jnz     L(CopyFrom1To16Bytes)
> -# endif
> -
> -       movaps  16(%rsi, %rcx), %xmm2
> -       movdqu  %xmm1, (%rdi, %rcx)
> -       pcmpeqb %xmm2, %xmm0
> -       pmovmskb %xmm0, %rdx
> -       add     $16, %rcx
> -# ifdef USE_AS_STRNCPY
> -       sub     $16, %r8
> -       jbe     L(CopyFrom1To16BytesCase2OrCase3)
> -# endif
> -       test    %rdx, %rdx
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -       jnz     L(CopyFrom1To16BytesUnalignedXmm2)
> -# else
> -       jnz     L(CopyFrom1To16Bytes)
> -# endif
> -
> -       movaps  16(%rsi, %rcx), %xmm3
> -       movdqu  %xmm2, (%rdi, %rcx)
> -       pcmpeqb %xmm3, %xmm0
> -       pmovmskb %xmm0, %rdx
> -       add     $16, %rcx
> -# ifdef USE_AS_STRNCPY
> -       sub     $16, %r8
> -       jbe     L(CopyFrom1To16BytesCase2OrCase3)
> -# endif
> -       test    %rdx, %rdx
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -       jnz     L(CopyFrom1To16BytesUnalignedXmm3)
> -# else
> -       jnz     L(CopyFrom1To16Bytes)
> -# endif
> -
> -       movdqu  %xmm3, (%rdi, %rcx)
> -       mov     %rsi, %rdx
> -       lea     16(%rsi, %rcx), %rsi
> -       and     $-0x40, %rsi
> -       sub     %rsi, %rdx
> -       sub     %rdx, %rdi
> -# ifdef USE_AS_STRNCPY
> -       lea     128(%r8, %rdx), %r8
> -# endif
> -L(Unaligned64Loop):
> -       movaps  (%rsi), %xmm2
> -       movaps  %xmm2, %xmm4
> -       movaps  16(%rsi), %xmm5
> -       movaps  32(%rsi), %xmm3
> -       movaps  %xmm3, %xmm6
> -       movaps  48(%rsi), %xmm7
> -       pminub  %xmm5, %xmm2
> -       pminub  %xmm7, %xmm3
> -       pminub  %xmm2, %xmm3
> -       pcmpeqb %xmm0, %xmm3
> -       pmovmskb %xmm3, %rdx
> -# ifdef USE_AS_STRNCPY
> -       sub     $64, %r8
> -       jbe     L(UnalignedLeaveCase2OrCase3)
> -# endif
> -       test    %rdx, %rdx
> -       jnz     L(Unaligned64Leave)
> -
> -L(Unaligned64Loop_start):
> -       add     $64, %rdi
> -       add     $64, %rsi
> -       movdqu  %xmm4, -64(%rdi)
> -       movaps  (%rsi), %xmm2
> -       movdqa  %xmm2, %xmm4
> -       movdqu  %xmm5, -48(%rdi)
> -       movaps  16(%rsi), %xmm5
> -       pminub  %xmm5, %xmm2
> -       movaps  32(%rsi), %xmm3
> -       movdqu  %xmm6, -32(%rdi)
> -       movaps  %xmm3, %xmm6
> -       movdqu  %xmm7, -16(%rdi)
> -       movaps  48(%rsi), %xmm7
> -       pminub  %xmm7, %xmm3
> -       pminub  %xmm2, %xmm3
> -       pcmpeqb %xmm0, %xmm3
> -       pmovmskb %xmm3, %rdx
> -# ifdef USE_AS_STRNCPY
> -       sub     $64, %r8
> -       jbe     L(UnalignedLeaveCase2OrCase3)
> -# endif
> -       test    %rdx, %rdx
> -       jz      L(Unaligned64Loop_start)
> -
> -L(Unaligned64Leave):
> -       pxor    %xmm1, %xmm1
> -
> -       pcmpeqb %xmm4, %xmm0
> -       pcmpeqb %xmm5, %xmm1
> -       pmovmskb %xmm0, %rdx
> -       pmovmskb %xmm1, %rcx
> -       test    %rdx, %rdx
> -       jnz     L(CopyFrom1To16BytesUnaligned_0)
> -       test    %rcx, %rcx
> -       jnz     L(CopyFrom1To16BytesUnaligned_16)
> -
> -       pcmpeqb %xmm6, %xmm0
> -       pcmpeqb %xmm7, %xmm1
> -       pmovmskb %xmm0, %rdx
> -       pmovmskb %xmm1, %rcx
> -       test    %rdx, %rdx
> -       jnz     L(CopyFrom1To16BytesUnaligned_32)
> -
> -       bsf     %rcx, %rdx
> -       movdqu  %xmm4, (%rdi)
> -       movdqu  %xmm5, 16(%rdi)
> -       movdqu  %xmm6, 32(%rdi)
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -# ifdef USE_AS_STPCPY
> -       lea     48(%rdi, %rdx), %rax
> -# endif
> -       movdqu  %xmm7, 48(%rdi)
> -       add     $15, %r8
> -       sub     %rdx, %r8
> -       lea     49(%rdi, %rdx), %rdi
> -       jmp     L(StrncpyFillTailWithZero)
> -# else
> -       add     $48, %rsi
> -       add     $48, %rdi
> -       BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
> -# endif
> -
> -/* If source address alignment == destination address alignment */
> -
> -L(SourceStringAlignmentLess32):
> -       pxor    %xmm0, %xmm0
> -       movdqu  (%rsi), %xmm1
> -       movdqu  16(%rsi), %xmm2
> -       pcmpeqb %xmm1, %xmm0
> -       pmovmskb %xmm0, %rdx
> -
> -# ifdef USE_AS_STRNCPY
> -#  if defined USE_AS_STPCPY || defined USE_AS_STRCAT
> -       cmp     $16, %r8
> -#  else
> -       cmp     $17, %r8
> -#  endif
> -       jbe     L(CopyFrom1To16BytesTail1Case2OrCase3)
> -# endif
> -       test    %rdx, %rdx
> -       jnz     L(CopyFrom1To16BytesTail1)
> -
> -       pcmpeqb %xmm2, %xmm0
> -       movdqu  %xmm1, (%rdi)
> -       pmovmskb %xmm0, %rdx
> -
> -# ifdef USE_AS_STRNCPY
> -#  if defined USE_AS_STPCPY || defined USE_AS_STRCAT
> -       cmp     $32, %r8
> -#  else
> -       cmp     $33, %r8
> -#  endif
> -       jbe     L(CopyFrom1To32Bytes1Case2OrCase3)
> -# endif
> -       test    %rdx, %rdx
> -       jnz     L(CopyFrom1To32Bytes1)
> -
> -       and     $-16, %rsi
> -       and     $15, %rcx
> -       jmp     L(Unalign16Both)
> -
> -/*------End of main part with loops---------------------*/
> -
> -/* Case1 */
> -
> -# if (!defined USE_AS_STRNCPY) || (defined USE_AS_STRCAT)
> -       .p2align 4
> -L(CopyFrom1To16Bytes):
> -       add     %rcx, %rdi
> -       add     %rcx, %rsi
> -       bsf     %rdx, %rdx
> -       BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
> -# endif
> -       .p2align 4
> -L(CopyFrom1To16BytesTail):
> -       add     %rcx, %rsi
> -       bsf     %rdx, %rdx
> -       BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
> -
> -       .p2align 4
> -L(CopyFrom1To32Bytes1):
> -       add     $16, %rsi
> -       add     $16, %rdi
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -       sub     $16, %r8
> -# endif
> -L(CopyFrom1To16BytesTail1):
> -       bsf     %rdx, %rdx
> -       BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
> -
> -       .p2align 4
> -L(CopyFrom1To32Bytes):
> -       bsf     %rdx, %rdx
> -       add     %rcx, %rsi
> -       add     $16, %rdx
> -       sub     %rcx, %rdx
> -       BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
> -
> -       .p2align 4
> -L(CopyFrom1To16BytesUnaligned_0):
> -       bsf     %rdx, %rdx
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -# ifdef USE_AS_STPCPY
> -       lea     (%rdi, %rdx), %rax
> -# endif
> -       movdqu  %xmm4, (%rdi)
> -       add     $63, %r8
> -       sub     %rdx, %r8
> -       lea     1(%rdi, %rdx), %rdi
> -       jmp     L(StrncpyFillTailWithZero)
> -# else
> -       BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
> -# endif
> -
> -       .p2align 4
> -L(CopyFrom1To16BytesUnaligned_16):
> -       bsf     %rcx, %rdx
> -       movdqu  %xmm4, (%rdi)
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -# ifdef USE_AS_STPCPY
> -       lea     16(%rdi, %rdx), %rax
> -# endif
> -       movdqu  %xmm5, 16(%rdi)
> -       add     $47, %r8
> -       sub     %rdx, %r8
> -       lea     17(%rdi, %rdx), %rdi
> -       jmp     L(StrncpyFillTailWithZero)
> -# else
> -       add     $16, %rsi
> -       add     $16, %rdi
> -       BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
> -# endif
> -
> -       .p2align 4
> -L(CopyFrom1To16BytesUnaligned_32):
> -       bsf     %rdx, %rdx
> -       movdqu  %xmm4, (%rdi)
> -       movdqu  %xmm5, 16(%rdi)
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -# ifdef USE_AS_STPCPY
> -       lea     32(%rdi, %rdx), %rax
> -# endif
> -       movdqu  %xmm6, 32(%rdi)
> -       add     $31, %r8
> -       sub     %rdx, %r8
> -       lea     33(%rdi, %rdx), %rdi
> -       jmp     L(StrncpyFillTailWithZero)
> -# else
> -       add     $32, %rsi
> -       add     $32, %rdi
> -       BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
> -# endif
> -
> -# ifdef USE_AS_STRNCPY
> -#  ifndef USE_AS_STRCAT
> -       .p2align 4
> -L(CopyFrom1To16BytesUnalignedXmm6):
> -       movdqu  %xmm6, (%rdi, %rcx)
> -       jmp     L(CopyFrom1To16BytesXmmExit)
> -
> -       .p2align 4
> -L(CopyFrom1To16BytesUnalignedXmm5):
> -       movdqu  %xmm5, (%rdi, %rcx)
> -       jmp     L(CopyFrom1To16BytesXmmExit)
> -
> -       .p2align 4
> -L(CopyFrom1To16BytesUnalignedXmm4):
> -       movdqu  %xmm4, (%rdi, %rcx)
> -       jmp     L(CopyFrom1To16BytesXmmExit)
> -
> -       .p2align 4
> -L(CopyFrom1To16BytesUnalignedXmm3):
> -       movdqu  %xmm3, (%rdi, %rcx)
> -       jmp     L(CopyFrom1To16BytesXmmExit)
> -
> -       .p2align 4
> -L(CopyFrom1To16BytesUnalignedXmm1):
> -       movdqu  %xmm1, (%rdi, %rcx)
> -       jmp     L(CopyFrom1To16BytesXmmExit)
> -#  endif
> -
> -       .p2align 4
> -L(CopyFrom1To16BytesExit):
> -       BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %rdx, 4)
> -
> -/* Case2 */
> -
> -       .p2align 4
> -L(CopyFrom1To16BytesCase2):
> -       add     $16, %r8
> -       add     %rcx, %rdi
> -       add     %rcx, %rsi
> -       bsf     %rdx, %rdx
> -       cmp     %r8, %rdx
> -       jb      L(CopyFrom1To16BytesExit)
> -       BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
> -
> -       .p2align 4
> -L(CopyFrom1To32BytesCase2):
> -       add     %rcx, %rsi
> -       bsf     %rdx, %rdx
> -       add     $16, %rdx
> -       sub     %rcx, %rdx
> -       cmp     %r8, %rdx
> -       jb      L(CopyFrom1To16BytesExit)
> -       BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
> -
> -L(CopyFrom1To16BytesTailCase2):
> -       add     %rcx, %rsi
> -       bsf     %rdx, %rdx
> -       cmp     %r8, %rdx
> -       jb      L(CopyFrom1To16BytesExit)
> -       BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
> -
> -L(CopyFrom1To16BytesTail1Case2):
> -       bsf     %rdx, %rdx
> -       cmp     %r8, %rdx
> -       jb      L(CopyFrom1To16BytesExit)
> -       BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
> -
> -/* Case2 or Case3,  Case3 */
> -
> -       .p2align 4
> -L(CopyFrom1To16BytesCase2OrCase3):
> -       test    %rdx, %rdx
> -       jnz     L(CopyFrom1To16BytesCase2)
> -L(CopyFrom1To16BytesCase3):
> -       add     $16, %r8
> -       add     %rcx, %rdi
> -       add     %rcx, %rsi
> -       BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
> -
> -       .p2align 4
> -L(CopyFrom1To32BytesCase2OrCase3):
> -       test    %rdx, %rdx
> -       jnz     L(CopyFrom1To32BytesCase2)
> -       add     %rcx, %rsi
> -       BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
> -
> -       .p2align 4
> -L(CopyFrom1To16BytesTailCase2OrCase3):
> -       test    %rdx, %rdx
> -       jnz     L(CopyFrom1To16BytesTailCase2)
> -       add     %rcx, %rsi
> -       BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
> -
> -       .p2align 4
> -L(CopyFrom1To32Bytes1Case2OrCase3):
> -       add     $16, %rdi
> -       add     $16, %rsi
> -       sub     $16, %r8
> -L(CopyFrom1To16BytesTail1Case2OrCase3):
> -       test    %rdx, %rdx
> -       jnz     L(CopyFrom1To16BytesTail1Case2)
> -       BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
> -
> -# endif
> -
> -/*------------End labels regarding with copying 1-16 bytes--and 1-32 bytes----*/
> -
> -       .p2align 4
> -L(Exit1):
> -       mov     %dh, (%rdi)
> -# ifdef USE_AS_STPCPY
> -       lea     (%rdi), %rax
> -# endif
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -       sub     $1, %r8
> -       lea     1(%rdi), %rdi
> -       jnz     L(StrncpyFillTailWithZero)
> -# endif
> -       ret
> -
> -       .p2align 4
> -L(Exit2):
> -       mov     (%rsi), %dx
> -       mov     %dx, (%rdi)
> -# ifdef USE_AS_STPCPY
> -       lea     1(%rdi), %rax
> -# endif
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -       sub     $2, %r8
> -       lea     2(%rdi), %rdi
> -       jnz     L(StrncpyFillTailWithZero)
> -# endif
> -       ret
> -
> -       .p2align 4
> -L(Exit3):
> -       mov     (%rsi), %cx
> -       mov     %cx, (%rdi)
> -       mov     %dh, 2(%rdi)
> -# ifdef USE_AS_STPCPY
> -       lea     2(%rdi), %rax
> -# endif
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -       sub     $3, %r8
> -       lea     3(%rdi), %rdi
> -       jnz     L(StrncpyFillTailWithZero)
> -# endif
> -       ret
> -
> -       .p2align 4
> -L(Exit4):
> -       mov     (%rsi), %edx
> -       mov     %edx, (%rdi)
> -# ifdef USE_AS_STPCPY
> -       lea     3(%rdi), %rax
> -# endif
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -       sub     $4, %r8
> -       lea     4(%rdi), %rdi
> -       jnz     L(StrncpyFillTailWithZero)
> -# endif
> -       ret
> -
> -       .p2align 4
> -L(Exit5):
> -       mov     (%rsi), %ecx
> -       mov     %dh, 4(%rdi)
> -       mov     %ecx, (%rdi)
> -# ifdef USE_AS_STPCPY
> -       lea     4(%rdi), %rax
> -# endif
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -       sub     $5, %r8
> -       lea     5(%rdi), %rdi
> -       jnz     L(StrncpyFillTailWithZero)
> -# endif
> -       ret
> -
> -       .p2align 4
> -L(Exit6):
> -       mov     (%rsi), %ecx
> -       mov     4(%rsi), %dx
> -       mov     %ecx, (%rdi)
> -       mov     %dx, 4(%rdi)
> -# ifdef USE_AS_STPCPY
> -       lea     5(%rdi), %rax
> -# endif
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -       sub     $6, %r8
> -       lea     6(%rdi), %rdi
> -       jnz     L(StrncpyFillTailWithZero)
> -# endif
> -       ret
> -
> -       .p2align 4
> -L(Exit7):
> -       mov     (%rsi), %ecx
> -       mov     3(%rsi), %edx
> -       mov     %ecx, (%rdi)
> -       mov     %edx, 3(%rdi)
> -# ifdef USE_AS_STPCPY
> -       lea     6(%rdi), %rax
> -# endif
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -       sub     $7, %r8
> -       lea     7(%rdi), %rdi
> -       jnz     L(StrncpyFillTailWithZero)
> -# endif
> -       ret
> -
> -       .p2align 4
> -L(Exit8):
> -       mov     (%rsi), %rdx
> -       mov     %rdx, (%rdi)
> -# ifdef USE_AS_STPCPY
> -       lea     7(%rdi), %rax
> -# endif
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -       sub     $8, %r8
> -       lea     8(%rdi), %rdi
> -       jnz     L(StrncpyFillTailWithZero)
> -# endif
> -       ret
> -
> -       .p2align 4
> -L(Exit9):
> -       mov     (%rsi), %rcx
> -       mov     %dh, 8(%rdi)
> -       mov     %rcx, (%rdi)
> -# ifdef USE_AS_STPCPY
> -       lea     8(%rdi), %rax
> -# endif
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -       sub     $9, %r8
> -       lea     9(%rdi), %rdi
> -       jnz     L(StrncpyFillTailWithZero)
> -# endif
> -       ret
> -
> -       .p2align 4
> -L(Exit10):
> -       mov     (%rsi), %rcx
> -       mov     8(%rsi), %dx
> -       mov     %rcx, (%rdi)
> -       mov     %dx, 8(%rdi)
> -# ifdef USE_AS_STPCPY
> -       lea     9(%rdi), %rax
> -# endif
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -       sub     $10, %r8
> -       lea     10(%rdi), %rdi
> -       jnz     L(StrncpyFillTailWithZero)
> -# endif
> -       ret
> -
> -       .p2align 4
> -L(Exit11):
> -       mov     (%rsi), %rcx
> -       mov     7(%rsi), %edx
> -       mov     %rcx, (%rdi)
> -       mov     %edx, 7(%rdi)
> -# ifdef USE_AS_STPCPY
> -       lea     10(%rdi), %rax
> -# endif
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -       sub     $11, %r8
> -       lea     11(%rdi), %rdi
> -       jnz     L(StrncpyFillTailWithZero)
> -# endif
> -       ret
> -
> -       .p2align 4
> -L(Exit12):
> -       mov     (%rsi), %rcx
> -       mov     8(%rsi), %edx
> -       mov     %rcx, (%rdi)
> -       mov     %edx, 8(%rdi)
> -# ifdef USE_AS_STPCPY
> -       lea     11(%rdi), %rax
> -# endif
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -       sub     $12, %r8
> -       lea     12(%rdi), %rdi
> -       jnz     L(StrncpyFillTailWithZero)
> -# endif
> -       ret
> -
> -       .p2align 4
> -L(Exit13):
> -       mov     (%rsi), %rcx
> -       mov     5(%rsi), %rdx
> -       mov     %rcx, (%rdi)
> -       mov     %rdx, 5(%rdi)
> -# ifdef USE_AS_STPCPY
> -       lea     12(%rdi), %rax
> -# endif
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -       sub     $13, %r8
> -       lea     13(%rdi), %rdi
> -       jnz     L(StrncpyFillTailWithZero)
> -# endif
> -       ret
> -
> -       .p2align 4
> -L(Exit14):
> -       mov     (%rsi), %rcx
> -       mov     6(%rsi), %rdx
> -       mov     %rcx, (%rdi)
> -       mov     %rdx, 6(%rdi)
> -# ifdef USE_AS_STPCPY
> -       lea     13(%rdi), %rax
> -# endif
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -       sub     $14, %r8
> -       lea     14(%rdi), %rdi
> -       jnz     L(StrncpyFillTailWithZero)
> -# endif
> -       ret
> -
> -       .p2align 4
> -L(Exit15):
> -       mov     (%rsi), %rcx
> -       mov     7(%rsi), %rdx
> -       mov     %rcx, (%rdi)
> -       mov     %rdx, 7(%rdi)
> -# ifdef USE_AS_STPCPY
> -       lea     14(%rdi), %rax
> -# endif
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -       sub     $15, %r8
> -       lea     15(%rdi), %rdi
> -       jnz     L(StrncpyFillTailWithZero)
> -# endif
> -       ret
> -
> -       .p2align 4
> -L(Exit16):
> -       movdqu  (%rsi), %xmm0
> -       movdqu  %xmm0, (%rdi)
> -# ifdef USE_AS_STPCPY
> -       lea     15(%rdi), %rax
> -# endif
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -       sub     $16, %r8
> -       lea     16(%rdi), %rdi
> -       jnz     L(StrncpyFillTailWithZero)
> -# endif
> -       ret
> -
> -       .p2align 4
> -L(Exit17):
> -       movdqu  (%rsi), %xmm0
> -       movdqu  %xmm0, (%rdi)
> -       mov     %dh, 16(%rdi)
> -# ifdef USE_AS_STPCPY
> -       lea     16(%rdi), %rax
> -# endif
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -       sub     $17, %r8
> -       lea     17(%rdi), %rdi
> -       jnz     L(StrncpyFillTailWithZero)
> -# endif
> -       ret
> -
> -       .p2align 4
> -L(Exit18):
> -       movdqu  (%rsi), %xmm0
> -       mov     16(%rsi), %cx
> -       movdqu  %xmm0, (%rdi)
> -       mov     %cx, 16(%rdi)
> -# ifdef USE_AS_STPCPY
> -       lea     17(%rdi), %rax
> -# endif
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -       sub     $18, %r8
> -       lea     18(%rdi), %rdi
> -       jnz     L(StrncpyFillTailWithZero)
> -# endif
> -       ret
> -
> -       .p2align 4
> -L(Exit19):
> -       movdqu  (%rsi), %xmm0
> -       mov     15(%rsi), %ecx
> -       movdqu  %xmm0, (%rdi)
> -       mov     %ecx, 15(%rdi)
> -# ifdef USE_AS_STPCPY
> -       lea     18(%rdi), %rax
> -# endif
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -       sub     $19, %r8
> -       lea     19(%rdi), %rdi
> -       jnz     L(StrncpyFillTailWithZero)
> -# endif
> -       ret
> -
> -       .p2align 4
> -L(Exit20):
> -       movdqu  (%rsi), %xmm0
> -       mov     16(%rsi), %ecx
> -       movdqu  %xmm0, (%rdi)
> -       mov     %ecx, 16(%rdi)
> -# ifdef USE_AS_STPCPY
> -       lea     19(%rdi), %rax
> -# endif
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -       sub     $20, %r8
> -       lea     20(%rdi), %rdi
> -       jnz     L(StrncpyFillTailWithZero)
> -# endif
> -       ret
> -
> -       .p2align 4
> -L(Exit21):
> -       movdqu  (%rsi), %xmm0
> -       mov     16(%rsi), %ecx
> -       movdqu  %xmm0, (%rdi)
> -       mov     %ecx, 16(%rdi)
> -       mov     %dh, 20(%rdi)
> -# ifdef USE_AS_STPCPY
> -       lea     20(%rdi), %rax
> -# endif
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -       sub     $21, %r8
> -       lea     21(%rdi), %rdi
> -       jnz     L(StrncpyFillTailWithZero)
> -# endif
> -       ret
> -
> -       .p2align 4
> -L(Exit22):
> -       movdqu  (%rsi), %xmm0
> -       mov     14(%rsi), %rcx
> -       movdqu  %xmm0, (%rdi)
> -       mov     %rcx, 14(%rdi)
> -# ifdef USE_AS_STPCPY
> -       lea     21(%rdi), %rax
> -# endif
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -       sub     $22, %r8
> -       lea     22(%rdi), %rdi
> -       jnz     L(StrncpyFillTailWithZero)
> -# endif
> -       ret
> -
> -       .p2align 4
> -L(Exit23):
> -       movdqu  (%rsi), %xmm0
> -       mov     15(%rsi), %rcx
> -       movdqu  %xmm0, (%rdi)
> -       mov     %rcx, 15(%rdi)
> -# ifdef USE_AS_STPCPY
> -       lea     22(%rdi), %rax
> -# endif
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -       sub     $23, %r8
> -       lea     23(%rdi), %rdi
> -       jnz     L(StrncpyFillTailWithZero)
> -# endif
> -       ret
> -
> -       .p2align 4
> -L(Exit24):
> -       movdqu  (%rsi), %xmm0
> -       mov     16(%rsi), %rcx
> -       movdqu  %xmm0, (%rdi)
> -       mov     %rcx, 16(%rdi)
> -# ifdef USE_AS_STPCPY
> -       lea     23(%rdi), %rax
> -# endif
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -       sub     $24, %r8
> -       lea     24(%rdi), %rdi
> -       jnz     L(StrncpyFillTailWithZero)
> -# endif
> -       ret
> -
> -       .p2align 4
> -L(Exit25):
> -       movdqu  (%rsi), %xmm0
> -       mov     16(%rsi), %rcx
> -       movdqu  %xmm0, (%rdi)
> -       mov     %rcx, 16(%rdi)
> -       mov     %dh, 24(%rdi)
> -# ifdef USE_AS_STPCPY
> -       lea     24(%rdi), %rax
> -# endif
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -       sub     $25, %r8
> -       lea     25(%rdi), %rdi
> -       jnz     L(StrncpyFillTailWithZero)
> -# endif
> -       ret
> -
> -       .p2align 4
> -L(Exit26):
> -       movdqu  (%rsi), %xmm0
> -       mov     16(%rsi), %rdx
> -       mov     24(%rsi), %cx
> -       movdqu  %xmm0, (%rdi)
> -       mov     %rdx, 16(%rdi)
> -       mov     %cx, 24(%rdi)
> -# ifdef USE_AS_STPCPY
> -       lea     25(%rdi), %rax
> -# endif
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -       sub     $26, %r8
> -       lea     26(%rdi), %rdi
> -       jnz     L(StrncpyFillTailWithZero)
> -# endif
> -       ret
> -
> -       .p2align 4
> -L(Exit27):
> -       movdqu  (%rsi), %xmm0
> -       mov     16(%rsi), %rdx
> -       mov     23(%rsi), %ecx
> -       movdqu  %xmm0, (%rdi)
> -       mov     %rdx, 16(%rdi)
> -       mov     %ecx, 23(%rdi)
> -# ifdef USE_AS_STPCPY
> -       lea     26(%rdi), %rax
> -# endif
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -       sub     $27, %r8
> -       lea     27(%rdi), %rdi
> -       jnz     L(StrncpyFillTailWithZero)
> -# endif
> -       ret
> -
> -       .p2align 4
> -L(Exit28):
> -       movdqu  (%rsi), %xmm0
> -       mov     16(%rsi), %rdx
> -       mov     24(%rsi), %ecx
> -       movdqu  %xmm0, (%rdi)
> -       mov     %rdx, 16(%rdi)
> -       mov     %ecx, 24(%rdi)
> -# ifdef USE_AS_STPCPY
> -       lea     27(%rdi), %rax
> -# endif
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -       sub     $28, %r8
> -       lea     28(%rdi), %rdi
> -       jnz     L(StrncpyFillTailWithZero)
> -# endif
> -       ret
> -
> -       .p2align 4
> -L(Exit29):
> -       movdqu  (%rsi), %xmm0
> -       movdqu  13(%rsi), %xmm2
> -       movdqu  %xmm0, (%rdi)
> -       movdqu  %xmm2, 13(%rdi)
> -# ifdef USE_AS_STPCPY
> -       lea     28(%rdi), %rax
> -# endif
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -       sub     $29, %r8
> -       lea     29(%rdi), %rdi
> -       jnz     L(StrncpyFillTailWithZero)
> -# endif
> -       ret
> -
> -       .p2align 4
> -L(Exit30):
> -       movdqu  (%rsi), %xmm0
> -       movdqu  14(%rsi), %xmm2
> -       movdqu  %xmm0, (%rdi)
> -       movdqu  %xmm2, 14(%rdi)
> -# ifdef USE_AS_STPCPY
> -       lea     29(%rdi), %rax
> -# endif
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -       sub     $30, %r8
> -       lea     30(%rdi), %rdi
> -       jnz     L(StrncpyFillTailWithZero)
> -# endif
> -       ret
> -
> -       .p2align 4
> -L(Exit31):
> -       movdqu  (%rsi), %xmm0
> -       movdqu  15(%rsi), %xmm2
> -       movdqu  %xmm0, (%rdi)
> -       movdqu  %xmm2, 15(%rdi)
> -# ifdef USE_AS_STPCPY
> -       lea     30(%rdi), %rax
> -# endif
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -       sub     $31, %r8
> -       lea     31(%rdi), %rdi
> -       jnz     L(StrncpyFillTailWithZero)
> -# endif
> -       ret
> -
> -       .p2align 4
> -L(Exit32):
> -       movdqu  (%rsi), %xmm0
> -       movdqu  16(%rsi), %xmm2
> -       movdqu  %xmm0, (%rdi)
> -       movdqu  %xmm2, 16(%rdi)
> -# ifdef USE_AS_STPCPY
> -       lea     31(%rdi), %rax
> -# endif
> -# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
> -       sub     $32, %r8
> -       lea     32(%rdi), %rdi
> -       jnz     L(StrncpyFillTailWithZero)
> -# endif
> -       ret
> -
> -# ifdef USE_AS_STRNCPY
> -
> -       .p2align 4
> -L(StrncpyExit0):
> -#  ifdef USE_AS_STPCPY
> -       mov     %rdi, %rax
> -#  endif
> -#  ifdef USE_AS_STRCAT
> -       xor     %ch, %ch
> -       movb    %ch, (%rdi)
> -#  endif
> -       ret
> -
> -       .p2align 4
> -L(StrncpyExit1):
> -       mov     (%rsi), %dl
> -       mov     %dl, (%rdi)
> -#  ifdef USE_AS_STPCPY
> -       lea     1(%rdi), %rax
> -#  endif
> -#  ifdef USE_AS_STRCAT
> -       xor     %ch, %ch
> -       movb    %ch, 1(%rdi)
> -#  endif
> -       ret
> -
> -       .p2align 4
> -L(StrncpyExit2):
> -       mov     (%rsi), %dx
> -       mov     %dx, (%rdi)
> -#  ifdef USE_AS_STPCPY
> -       lea     2(%rdi), %rax
> -#  endif
> -#  ifdef USE_AS_STRCAT
> -       xor     %ch, %ch
> -       movb    %ch, 2(%rdi)
> -#  endif
> -       ret
> -
> -       .p2align 4
> -L(StrncpyExit3):
> -       mov     (%rsi), %cx
> -       mov     2(%rsi), %dl
> -       mov     %cx, (%rdi)
> -       mov     %dl, 2(%rdi)
> -#  ifdef USE_AS_STPCPY
> -       lea     3(%rdi), %rax
> -#  endif
> -#  ifdef USE_AS_STRCAT
> -       xor     %ch, %ch
> -       movb    %ch, 3(%rdi)
> -#  endif
> -       ret
> -
> -       .p2align 4
> -L(StrncpyExit4):
> -       mov     (%rsi), %edx
> -       mov     %edx, (%rdi)
> -#  ifdef USE_AS_STPCPY
> -       lea     4(%rdi), %rax
> -#  endif
> -#  ifdef USE_AS_STRCAT
> -       xor     %ch, %ch
> -       movb    %ch, 4(%rdi)
> -#  endif
> -       ret
> -
> -       .p2align 4
> -L(StrncpyExit5):
> -       mov     (%rsi), %ecx
> -       mov     4(%rsi), %dl
> -       mov     %ecx, (%rdi)
> -       mov     %dl, 4(%rdi)
> -#  ifdef USE_AS_STPCPY
> -       lea     5(%rdi), %rax
> -#  endif
> -#  ifdef USE_AS_STRCAT
> -       xor     %ch, %ch
> -       movb    %ch, 5(%rdi)
> -#  endif
> -       ret
> -
> -       .p2align 4
> -L(StrncpyExit6):
> -       mov     (%rsi), %ecx
> -       mov     4(%rsi), %dx
> -       mov     %ecx, (%rdi)
> -       mov     %dx, 4(%rdi)
> -#  ifdef USE_AS_STPCPY
> -       lea     6(%rdi), %rax
> -#  endif
> -#  ifdef USE_AS_STRCAT
> -       xor     %ch, %ch
> -       movb    %ch, 6(%rdi)
> -#  endif
> -       ret
> -
> -       .p2align 4
> -L(StrncpyExit7):
> -       mov     (%rsi), %ecx
> -       mov     3(%rsi), %edx
> -       mov     %ecx, (%rdi)
> -       mov     %edx, 3(%rdi)
> -#  ifdef USE_AS_STPCPY
> -       lea     7(%rdi), %rax
> -#  endif
> -#  ifdef USE_AS_STRCAT
> -       xor     %ch, %ch
> -       movb    %ch, 7(%rdi)
> -#  endif
> -       ret
> -
> -       .p2align 4
> -L(StrncpyExit8):
> -       mov     (%rsi), %rdx
> -       mov     %rdx, (%rdi)
> -#  ifdef USE_AS_STPCPY
> -       lea     8(%rdi), %rax
> -#  endif
> -#  ifdef USE_AS_STRCAT
> -       xor     %ch, %ch
> -       movb    %ch, 8(%rdi)
> -#  endif
> -       ret
> -
> -       .p2align 4
> -L(StrncpyExit9):
> -       mov     (%rsi), %rcx
> -       mov     8(%rsi), %dl
> -       mov     %rcx, (%rdi)
> -       mov     %dl, 8(%rdi)
> -#  ifdef USE_AS_STPCPY
> -       lea     9(%rdi), %rax
> -#  endif
> -#  ifdef USE_AS_STRCAT
> -       xor     %ch, %ch
> -       movb    %ch, 9(%rdi)
> -#  endif
> -       ret
> -
> -       .p2align 4
> -L(StrncpyExit10):
> -       mov     (%rsi), %rcx
> -       mov     8(%rsi), %dx
> -       mov     %rcx, (%rdi)
> -       mov     %dx, 8(%rdi)
> -#  ifdef USE_AS_STPCPY
> -       lea     10(%rdi), %rax
> -#  endif
> -#  ifdef USE_AS_STRCAT
> -       xor     %ch, %ch
> -       movb    %ch, 10(%rdi)
> -#  endif
> -       ret
> -
> -       .p2align 4
> -L(StrncpyExit11):
> -       mov     (%rsi), %rcx
> -       mov     7(%rsi), %edx
> -       mov     %rcx, (%rdi)
> -       mov     %edx, 7(%rdi)
> -#  ifdef USE_AS_STPCPY
> -       lea     11(%rdi), %rax
> -#  endif
> -#  ifdef USE_AS_STRCAT
> -       xor     %ch, %ch
> -       movb    %ch, 11(%rdi)
> -#  endif
> -       ret
> -
> -       .p2align 4
> -L(StrncpyExit12):
> -       mov     (%rsi), %rcx
> -       mov     8(%rsi), %edx
> -       mov     %rcx, (%rdi)
> -       mov     %edx, 8(%rdi)
> -#  ifdef USE_AS_STPCPY
> -       lea     12(%rdi), %rax
> -#  endif
> -#  ifdef USE_AS_STRCAT
> -       xor     %ch, %ch
> -       movb    %ch, 12(%rdi)
> -#  endif
> -       ret
> -
> -       .p2align 4
> -L(StrncpyExit13):
> -       mov     (%rsi), %rcx
> -       mov     5(%rsi), %rdx
> -       mov     %rcx, (%rdi)
> -       mov     %rdx, 5(%rdi)
> -#  ifdef USE_AS_STPCPY
> -       lea     13(%rdi), %rax
> -#  endif
> -#  ifdef USE_AS_STRCAT
> -       xor     %ch, %ch
> -       movb    %ch, 13(%rdi)
> -#  endif
> -       ret
> -
> -       .p2align 4
> -L(StrncpyExit14):
> -       mov     (%rsi), %rcx
> -       mov     6(%rsi), %rdx
> -       mov     %rcx, (%rdi)
> -       mov     %rdx, 6(%rdi)
> -#  ifdef USE_AS_STPCPY
> -       lea     14(%rdi), %rax
> -#  endif
> -#  ifdef USE_AS_STRCAT
> -       xor     %ch, %ch
> -       movb    %ch, 14(%rdi)
> -#  endif
> -       ret
> -
> -       .p2align 4
> -L(StrncpyExit15):
> -       mov     (%rsi), %rcx
> -       mov     7(%rsi), %rdx
> -       mov     %rcx, (%rdi)
> -       mov     %rdx, 7(%rdi)
> -#  ifdef USE_AS_STPCPY
> -       lea     15(%rdi), %rax
> -#  endif
> -#  ifdef USE_AS_STRCAT
> -       xor     %ch, %ch
> -       movb    %ch, 15(%rdi)
> -#  endif
> -       ret
> -
> -       .p2align 4
> -L(StrncpyExit16):
> -       movdqu  (%rsi), %xmm0
> -       movdqu  %xmm0, (%rdi)
> -#  ifdef USE_AS_STPCPY
> -       lea     16(%rdi), %rax
> -#  endif
> -#  ifdef USE_AS_STRCAT
> -       xor     %ch, %ch
> -       movb    %ch, 16(%rdi)
> -#  endif
> -       ret
> -
> -       .p2align 4
> -L(StrncpyExit17):
> -       movdqu  (%rsi), %xmm0
> -       mov     16(%rsi), %cl
> -       movdqu  %xmm0, (%rdi)
> -       mov     %cl, 16(%rdi)
> -#  ifdef USE_AS_STPCPY
> -       lea     17(%rdi), %rax
> -#  endif
> -#  ifdef USE_AS_STRCAT
> -       xor     %ch, %ch
> -       movb    %ch, 17(%rdi)
> -#  endif
> -       ret
> -
> -       .p2align 4
> -L(StrncpyExit18):
> -       movdqu  (%rsi), %xmm0
> -       mov     16(%rsi), %cx
> -       movdqu  %xmm0, (%rdi)
> -       mov     %cx, 16(%rdi)
> -#  ifdef USE_AS_STPCPY
> -       lea     18(%rdi), %rax
> -#  endif
> -#  ifdef USE_AS_STRCAT
> -       xor     %ch, %ch
> -       movb    %ch, 18(%rdi)
> -#  endif
> -       ret
> -
> -       .p2align 4
> -L(StrncpyExit19):
> -       movdqu  (%rsi), %xmm0
> -       mov     15(%rsi), %ecx
> -       movdqu  %xmm0, (%rdi)
> -       mov     %ecx, 15(%rdi)
> -#  ifdef USE_AS_STPCPY
> -       lea     19(%rdi), %rax
> -#  endif
> -#  ifdef USE_AS_STRCAT
> -       xor     %ch, %ch
> -       movb    %ch, 19(%rdi)
> -#  endif
> -       ret
> -
> -       .p2align 4
> -L(StrncpyExit20):
> -       movdqu  (%rsi), %xmm0
> -       mov     16(%rsi), %ecx
> -       movdqu  %xmm0, (%rdi)
> -       mov     %ecx, 16(%rdi)
> -#  ifdef USE_AS_STPCPY
> -       lea     20(%rdi), %rax
> -#  endif
> -#  ifdef USE_AS_STRCAT
> -       xor     %ch, %ch
> -       movb    %ch, 20(%rdi)
> -#  endif
> -       ret
> -
> -       .p2align 4
> -L(StrncpyExit21):
> -       movdqu  (%rsi), %xmm0
> -       mov     16(%rsi), %ecx
> -       mov     20(%rsi), %dl
> -       movdqu  %xmm0, (%rdi)
> -       mov     %ecx, 16(%rdi)
> -       mov     %dl, 20(%rdi)
> -#  ifdef USE_AS_STPCPY
> -       lea     21(%rdi), %rax
> -#  endif
> -#  ifdef USE_AS_STRCAT
> -       xor     %ch, %ch
> -       movb    %ch, 21(%rdi)
> -#  endif
> -       ret
> -
> -       .p2align 4
> -L(StrncpyExit22):
> -       movdqu  (%rsi), %xmm0
> -       mov     14(%rsi), %rcx
> -       movdqu  %xmm0, (%rdi)
> -       mov     %rcx, 14(%rdi)
> -#  ifdef USE_AS_STPCPY
> -       lea     22(%rdi), %rax
> -#  endif
> -#  ifdef USE_AS_STRCAT
> -       xor     %ch, %ch
> -       movb    %ch, 22(%rdi)
> -#  endif
> -       ret
> -
> -       .p2align 4
> -L(StrncpyExit23):
> -       movdqu  (%rsi), %xmm0
> -       mov     15(%rsi), %rcx
> -       movdqu  %xmm0, (%rdi)
> -       mov     %rcx, 15(%rdi)
> -#  ifdef USE_AS_STPCPY
> -       lea     23(%rdi), %rax
> -#  endif
> -#  ifdef USE_AS_STRCAT
> -       xor     %ch, %ch
> -       movb    %ch, 23(%rdi)
> -#  endif
> -       ret
> -
> -       .p2align 4
> -L(StrncpyExit24):
> -       movdqu  (%rsi), %xmm0
> -       mov     16(%rsi), %rcx
> -       movdqu  %xmm0, (%rdi)
> -       mov     %rcx, 16(%rdi)
> -#  ifdef USE_AS_STPCPY
> -       lea     24(%rdi), %rax
> -#  endif
> -#  ifdef USE_AS_STRCAT
> -       xor     %ch, %ch
> -       movb    %ch, 24(%rdi)
> -#  endif
> -       ret
> -
> -       .p2align 4
> -L(StrncpyExit25):
> -       movdqu  (%rsi), %xmm0
> -       mov     16(%rsi), %rdx
> -       mov     24(%rsi), %cl
> -       movdqu  %xmm0, (%rdi)
> -       mov     %rdx, 16(%rdi)
> -       mov     %cl, 24(%rdi)
> -#  ifdef USE_AS_STPCPY
> -       lea     25(%rdi), %rax
> -#  endif
> -#  ifdef USE_AS_STRCAT
> -       xor     %ch, %ch
> -       movb    %ch, 25(%rdi)
> -#  endif
> -       ret
> -
> -       .p2align 4
> -L(StrncpyExit26):
> -       movdqu  (%rsi), %xmm0
> -       mov     16(%rsi), %rdx
> -       mov     24(%rsi), %cx
> -       movdqu  %xmm0, (%rdi)
> -       mov     %rdx, 16(%rdi)
> -       mov     %cx, 24(%rdi)
> -#  ifdef USE_AS_STPCPY
> -       lea     26(%rdi), %rax
> -#  endif
> -#  ifdef USE_AS_STRCAT
> -       xor     %ch, %ch
> -       movb    %ch, 26(%rdi)
> -#  endif
> -       ret
> -
> -       .p2align 4
> -L(StrncpyExit27):
> -       movdqu  (%rsi), %xmm0
> -       mov     16(%rsi), %rdx
> -       mov     23(%rsi), %ecx
> -       movdqu  %xmm0, (%rdi)
> -       mov     %rdx, 16(%rdi)
> -       mov     %ecx, 23(%rdi)
> -#  ifdef USE_AS_STPCPY
> -       lea     27(%rdi), %rax
> -#  endif
> -#  ifdef USE_AS_STRCAT
> -       xor     %ch, %ch
> -       movb    %ch, 27(%rdi)
> -#  endif
> -       ret
> -
> -       .p2align 4
> -L(StrncpyExit28):
> -       movdqu  (%rsi), %xmm0
> -       mov     16(%rsi), %rdx
> -       mov     24(%rsi), %ecx
> -       movdqu  %xmm0, (%rdi)
> -       mov     %rdx, 16(%rdi)
> -       mov     %ecx, 24(%rdi)
> -#  ifdef USE_AS_STPCPY
> -       lea     28(%rdi), %rax
> -#  endif
> -#  ifdef USE_AS_STRCAT
> -       xor     %ch, %ch
> -       movb    %ch, 28(%rdi)
> -#  endif
> -       ret
> -
> -       .p2align 4
> -L(StrncpyExit29):
> -       movdqu  (%rsi), %xmm0
> -       movdqu  13(%rsi), %xmm2
> -       movdqu  %xmm0, (%rdi)
> -       movdqu  %xmm2, 13(%rdi)
> -#  ifdef USE_AS_STPCPY
> -       lea     29(%rdi), %rax
> -#  endif
> -#  ifdef USE_AS_STRCAT
> -       xor     %ch, %ch
> -       movb    %ch, 29(%rdi)
> -#  endif
> -       ret
> -
> -       .p2align 4
> -L(StrncpyExit30):
> -       movdqu  (%rsi), %xmm0
> -       movdqu  14(%rsi), %xmm2
> -       movdqu  %xmm0, (%rdi)
> -       movdqu  %xmm2, 14(%rdi)
> -#  ifdef USE_AS_STPCPY
> -       lea     30(%rdi), %rax
> -#  endif
> -#  ifdef USE_AS_STRCAT
> -       xor     %ch, %ch
> -       movb    %ch, 30(%rdi)
> -#  endif
> -       ret
> -
> -       .p2align 4
> -L(StrncpyExit31):
> -       movdqu  (%rsi), %xmm0
> -       movdqu  15(%rsi), %xmm2
> -       movdqu  %xmm0, (%rdi)
> -       movdqu  %xmm2, 15(%rdi)
> -#  ifdef USE_AS_STPCPY
> -       lea     31(%rdi), %rax
> -#  endif
> -#  ifdef USE_AS_STRCAT
> -       xor     %ch, %ch
> -       movb    %ch, 31(%rdi)
> -#  endif
> -       ret
> -
> -       .p2align 4
> -L(StrncpyExit32):
> -       movdqu  (%rsi), %xmm0
> -       movdqu  16(%rsi), %xmm2
> -       movdqu  %xmm0, (%rdi)
> -       movdqu  %xmm2, 16(%rdi)
> -#  ifdef USE_AS_STPCPY
> -       lea     32(%rdi), %rax
> -#  endif
> -#  ifdef USE_AS_STRCAT
> -       xor     %ch, %ch
> -       movb    %ch, 32(%rdi)
> -#  endif
> -       ret
> -
> -       .p2align 4
> -L(StrncpyExit33):
> -       movdqu  (%rsi), %xmm0
> -       movdqu  16(%rsi), %xmm2
> -       mov     32(%rsi), %cl
> -       movdqu  %xmm0, (%rdi)
> -       movdqu  %xmm2, 16(%rdi)
> -       mov     %cl, 32(%rdi)
> -#  ifdef USE_AS_STRCAT
> -       xor     %ch, %ch
> -       movb    %ch, 33(%rdi)
> -#  endif
> -       ret
> -
> -#  ifndef USE_AS_STRCAT
> -
> -       .p2align 4
> -L(Fill0):
> -       ret
> -
> -       .p2align 4
> -L(Fill1):
> -       mov     %dl, (%rdi)
> -       ret
> -
> -       .p2align 4
> -L(Fill2):
> -       mov     %dx, (%rdi)
> -       ret
> -
> -       .p2align 4
> -L(Fill3):
> -       mov     %edx, -1(%rdi)
> -       ret
> -
> -       .p2align 4
> -L(Fill4):
> -       mov     %edx, (%rdi)
> -       ret
> -
> -       .p2align 4
> -L(Fill5):
> -       mov     %edx, (%rdi)
> -       mov     %dl, 4(%rdi)
> -       ret
> -
> -       .p2align 4
> -L(Fill6):
> -       mov     %edx, (%rdi)
> -       mov     %dx, 4(%rdi)
> -       ret
> -
> -       .p2align 4
> -L(Fill7):
> -       mov     %rdx, -1(%rdi)
> -       ret
> -
> -       .p2align 4
> -L(Fill8):
> -       mov     %rdx, (%rdi)
> -       ret
> -
> -       .p2align 4
> -L(Fill9):
> -       mov     %rdx, (%rdi)
> -       mov     %dl, 8(%rdi)
> -       ret
> -
> -       .p2align 4
> -L(Fill10):
> -       mov     %rdx, (%rdi)
> -       mov     %dx, 8(%rdi)
> -       ret
> -
> -       .p2align 4
> -L(Fill11):
> -       mov     %rdx, (%rdi)
> -       mov     %edx, 7(%rdi)
> -       ret
> -
> -       .p2align 4
> -L(Fill12):
> -       mov     %rdx, (%rdi)
> -       mov     %edx, 8(%rdi)
> -       ret
> -
> -       .p2align 4
> -L(Fill13):
> -       mov     %rdx, (%rdi)
> -       mov     %rdx, 5(%rdi)
> -       ret
> -
> -       .p2align 4
> -L(Fill14):
> -       mov     %rdx, (%rdi)
> -       mov     %rdx, 6(%rdi)
> -       ret
> -
> -       .p2align 4
> -L(Fill15):
> -       movdqu  %xmm0, -1(%rdi)
> -       ret
> -
> -       .p2align 4
> -L(Fill16):
> -       movdqu  %xmm0, (%rdi)
> -       ret
> -
> -       .p2align 4
> -L(CopyFrom1To16BytesUnalignedXmm2):
> -       movdqu  %xmm2, (%rdi, %rcx)
> -
> -       .p2align 4
> -L(CopyFrom1To16BytesXmmExit):
> -       bsf     %rdx, %rdx
> -       add     $15, %r8
> -       add     %rcx, %rdi
> -#   ifdef USE_AS_STPCPY
> -       lea     (%rdi, %rdx), %rax
> -#   endif
> -       sub     %rdx, %r8
> -       lea     1(%rdi, %rdx), %rdi
> -
> -       .p2align 4
> -L(StrncpyFillTailWithZero):
> -       pxor    %xmm0, %xmm0
> -       xor     %rdx, %rdx
> -       sub     $16, %r8
> -       jbe     L(StrncpyFillExit)
> -
> -       movdqu  %xmm0, (%rdi)
> -       add     $16, %rdi
> -
> -       mov     %rdi, %rsi
> -       and     $0xf, %rsi
> -       sub     %rsi, %rdi
> -       add     %rsi, %r8
> -       sub     $64, %r8
> -       jb      L(StrncpyFillLess64)
> -
> -L(StrncpyFillLoopMovdqa):
> -       movdqa  %xmm0, (%rdi)
> -       movdqa  %xmm0, 16(%rdi)
> -       movdqa  %xmm0, 32(%rdi)
> -       movdqa  %xmm0, 48(%rdi)
> -       add     $64, %rdi
> -       sub     $64, %r8
> -       jae     L(StrncpyFillLoopMovdqa)
> -
> -L(StrncpyFillLess64):
> -       add     $32, %r8
> -       jl      L(StrncpyFillLess32)
> -       movdqa  %xmm0, (%rdi)
> -       movdqa  %xmm0, 16(%rdi)
> -       add     $32, %rdi
> -       sub     $16, %r8
> -       jl      L(StrncpyFillExit)
> -       movdqa  %xmm0, (%rdi)
> -       add     $16, %rdi
> -       BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)
> -
> -L(StrncpyFillLess32):
> -       add     $16, %r8
> -       jl      L(StrncpyFillExit)
> -       movdqa  %xmm0, (%rdi)
> -       add     $16, %rdi
> -       BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)
> -
> -L(StrncpyFillExit):
> -       add     $16, %r8
> -       BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)
> -
> -/* end of ifndef USE_AS_STRCAT */
> -#  endif
> -
> -       .p2align 4
> -L(UnalignedLeaveCase2OrCase3):
> -       test    %rdx, %rdx
> -       jnz     L(Unaligned64LeaveCase2)
> -L(Unaligned64LeaveCase3):
> -       lea     64(%r8), %rcx
> -       and     $-16, %rcx
> -       add     $48, %r8
> -       jl      L(CopyFrom1To16BytesCase3)
> -       movdqu  %xmm4, (%rdi)
> -       sub     $16, %r8
> -       jb      L(CopyFrom1To16BytesCase3)
> -       movdqu  %xmm5, 16(%rdi)
> -       sub     $16, %r8
> -       jb      L(CopyFrom1To16BytesCase3)
> -       movdqu  %xmm6, 32(%rdi)
> -       sub     $16, %r8
> -       jb      L(CopyFrom1To16BytesCase3)
> -       movdqu  %xmm7, 48(%rdi)
> -#  ifdef USE_AS_STPCPY
> -       lea     64(%rdi), %rax
> -#  endif
> -#  ifdef USE_AS_STRCAT
> -       xor     %ch, %ch
> -       movb    %ch, 64(%rdi)
> -#  endif
> -       ret
> -
> -       .p2align 4
> -L(Unaligned64LeaveCase2):
> -       xor     %rcx, %rcx
> -       pcmpeqb %xmm4, %xmm0
> -       pmovmskb %xmm0, %rdx
> -       add     $48, %r8
> -       jle     L(CopyFrom1To16BytesCase2OrCase3)
> -       test    %rdx, %rdx
> -#  ifndef USE_AS_STRCAT
> -       jnz     L(CopyFrom1To16BytesUnalignedXmm4)
> -#  else
> -       jnz     L(CopyFrom1To16Bytes)
> -#  endif
> -       pcmpeqb %xmm5, %xmm0
> -       pmovmskb %xmm0, %rdx
> -       movdqu  %xmm4, (%rdi)
> -       add     $16, %rcx
> -       sub     $16, %r8
> -       jbe     L(CopyFrom1To16BytesCase2OrCase3)
> -       test    %rdx, %rdx
> -#  ifndef USE_AS_STRCAT
> -       jnz     L(CopyFrom1To16BytesUnalignedXmm5)
> -#  else
> -       jnz     L(CopyFrom1To16Bytes)
> -#  endif
> -
> -       pcmpeqb %xmm6, %xmm0
> -       pmovmskb %xmm0, %rdx
> -       movdqu  %xmm5, 16(%rdi)
> -       add     $16, %rcx
> -       sub     $16, %r8
> -       jbe     L(CopyFrom1To16BytesCase2OrCase3)
> -       test    %rdx, %rdx
> -#  ifndef USE_AS_STRCAT
> -       jnz     L(CopyFrom1To16BytesUnalignedXmm6)
> -#  else
> -       jnz     L(CopyFrom1To16Bytes)
> -#  endif
> -
> -       pcmpeqb %xmm7, %xmm0
> -       pmovmskb %xmm0, %rdx
> -       movdqu  %xmm6, 32(%rdi)
> -       lea     16(%rdi, %rcx), %rdi
> -       lea     16(%rsi, %rcx), %rsi
> -       bsf     %rdx, %rdx
> -       cmp     %r8, %rdx
> -       jb      L(CopyFrom1To16BytesExit)
> -       BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %r8, 4)
> -
> -       .p2align 4
> -L(ExitZero):
> -#  ifndef USE_AS_STRCAT
> -       mov     %rdi, %rax
> -#  endif
> -       ret
> -
> -# endif
> -
> -# ifndef USE_AS_STRCAT
> -END (STRCPY)
> -# else
> -END (STRCAT)
> -# endif
> -       .p2align 4
> -       .section .rodata
> -L(ExitTable):
> -       .int    JMPTBL(L(Exit1), L(ExitTable))
> -       .int    JMPTBL(L(Exit2), L(ExitTable))
> -       .int    JMPTBL(L(Exit3), L(ExitTable))
> -       .int    JMPTBL(L(Exit4), L(ExitTable))
> -       .int    JMPTBL(L(Exit5), L(ExitTable))
> -       .int    JMPTBL(L(Exit6), L(ExitTable))
> -       .int    JMPTBL(L(Exit7), L(ExitTable))
> -       .int    JMPTBL(L(Exit8), L(ExitTable))
> -       .int    JMPTBL(L(Exit9), L(ExitTable))
> -       .int    JMPTBL(L(Exit10), L(ExitTable))
> -       .int    JMPTBL(L(Exit11), L(ExitTable))
> -       .int    JMPTBL(L(Exit12), L(ExitTable))
> -       .int    JMPTBL(L(Exit13), L(ExitTable))
> -       .int    JMPTBL(L(Exit14), L(ExitTable))
> -       .int    JMPTBL(L(Exit15), L(ExitTable))
> -       .int    JMPTBL(L(Exit16), L(ExitTable))
> -       .int    JMPTBL(L(Exit17), L(ExitTable))
> -       .int    JMPTBL(L(Exit18), L(ExitTable))
> -       .int    JMPTBL(L(Exit19), L(ExitTable))
> -       .int    JMPTBL(L(Exit20), L(ExitTable))
> -       .int    JMPTBL(L(Exit21), L(ExitTable))
> -       .int    JMPTBL(L(Exit22), L(ExitTable))
> -       .int    JMPTBL(L(Exit23), L(ExitTable))
> -       .int    JMPTBL(L(Exit24), L(ExitTable))
> -       .int    JMPTBL(L(Exit25), L(ExitTable))
> -       .int    JMPTBL(L(Exit26), L(ExitTable))
> -       .int    JMPTBL(L(Exit27), L(ExitTable))
> -       .int    JMPTBL(L(Exit28), L(ExitTable))
> -       .int    JMPTBL(L(Exit29), L(ExitTable))
> -       .int    JMPTBL(L(Exit30), L(ExitTable))
> -       .int    JMPTBL(L(Exit31), L(ExitTable))
> -       .int    JMPTBL(L(Exit32), L(ExitTable))
> -# ifdef USE_AS_STRNCPY
> -L(ExitStrncpyTable):
> -       .int    JMPTBL(L(StrncpyExit0), L(ExitStrncpyTable))
> -       .int    JMPTBL(L(StrncpyExit1), L(ExitStrncpyTable))
> -       .int    JMPTBL(L(StrncpyExit2), L(ExitStrncpyTable))
> -       .int    JMPTBL(L(StrncpyExit3), L(ExitStrncpyTable))
> -       .int    JMPTBL(L(StrncpyExit4), L(ExitStrncpyTable))
> -       .int    JMPTBL(L(StrncpyExit5), L(ExitStrncpyTable))
> -       .int    JMPTBL(L(StrncpyExit6), L(ExitStrncpyTable))
> -       .int    JMPTBL(L(StrncpyExit7), L(ExitStrncpyTable))
> -       .int    JMPTBL(L(StrncpyExit8), L(ExitStrncpyTable))
> -       .int    JMPTBL(L(StrncpyExit9), L(ExitStrncpyTable))
> -       .int    JMPTBL(L(StrncpyExit10), L(ExitStrncpyTable))
> -       .int    JMPTBL(L(StrncpyExit11), L(ExitStrncpyTable))
> -       .int    JMPTBL(L(StrncpyExit12), L(ExitStrncpyTable))
> -       .int    JMPTBL(L(StrncpyExit13), L(ExitStrncpyTable))
> -       .int    JMPTBL(L(StrncpyExit14), L(ExitStrncpyTable))
> -       .int    JMPTBL(L(StrncpyExit15), L(ExitStrncpyTable))
> -       .int    JMPTBL(L(StrncpyExit16), L(ExitStrncpyTable))
> -       .int    JMPTBL(L(StrncpyExit17), L(ExitStrncpyTable))
> -       .int    JMPTBL(L(StrncpyExit18), L(ExitStrncpyTable))
> -       .int    JMPTBL(L(StrncpyExit19), L(ExitStrncpyTable))
> -       .int    JMPTBL(L(StrncpyExit20), L(ExitStrncpyTable))
> -       .int    JMPTBL(L(StrncpyExit21), L(ExitStrncpyTable))
> -       .int    JMPTBL(L(StrncpyExit22), L(ExitStrncpyTable))
> -       .int    JMPTBL(L(StrncpyExit23), L(ExitStrncpyTable))
> -       .int    JMPTBL(L(StrncpyExit24), L(ExitStrncpyTable))
> -       .int    JMPTBL(L(StrncpyExit25), L(ExitStrncpyTable))
> -       .int    JMPTBL(L(StrncpyExit26), L(ExitStrncpyTable))
> -       .int    JMPTBL(L(StrncpyExit27), L(ExitStrncpyTable))
> -       .int    JMPTBL(L(StrncpyExit28), L(ExitStrncpyTable))
> -       .int    JMPTBL(L(StrncpyExit29), L(ExitStrncpyTable))
> -       .int    JMPTBL(L(StrncpyExit30), L(ExitStrncpyTable))
> -       .int    JMPTBL(L(StrncpyExit31), L(ExitStrncpyTable))
> -       .int    JMPTBL(L(StrncpyExit32), L(ExitStrncpyTable))
> -       .int    JMPTBL(L(StrncpyExit33), L(ExitStrncpyTable))
> -#  ifndef USE_AS_STRCAT
> -       .p2align 4
> -L(FillTable):
> -       .int    JMPTBL(L(Fill0), L(FillTable))
> -       .int    JMPTBL(L(Fill1), L(FillTable))
> -       .int    JMPTBL(L(Fill2), L(FillTable))
> -       .int    JMPTBL(L(Fill3), L(FillTable))
> -       .int    JMPTBL(L(Fill4), L(FillTable))
> -       .int    JMPTBL(L(Fill5), L(FillTable))
> -       .int    JMPTBL(L(Fill6), L(FillTable))
> -       .int    JMPTBL(L(Fill7), L(FillTable))
> -       .int    JMPTBL(L(Fill8), L(FillTable))
> -       .int    JMPTBL(L(Fill9), L(FillTable))
> -       .int    JMPTBL(L(Fill10), L(FillTable))
> -       .int    JMPTBL(L(Fill11), L(FillTable))
> -       .int    JMPTBL(L(Fill12), L(FillTable))
> -       .int    JMPTBL(L(Fill13), L(FillTable))
> -       .int    JMPTBL(L(Fill14), L(FillTable))
> -       .int    JMPTBL(L(Fill15), L(FillTable))
> -       .int    JMPTBL(L(Fill16), L(FillTable))
> -#  endif
> -# endif
> -#endif
> diff --git a/sysdeps/x86_64/multiarch/strncpy-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strncpy-sse2-unaligned.S
> index fcc23a7..43c1f97 100644
> --- a/sysdeps/x86_64/multiarch/strncpy-sse2-unaligned.S
> +++ b/sysdeps/x86_64/multiarch/strncpy-sse2-unaligned.S
> @@ -1,3 +1,3 @@
>  #define USE_AS_STRNCPY
>  #define STRCPY __strncpy_sse2_unaligned
> -#include "strcpy-sse2-unaligned.S"
> +#include "strcpy-sse2-unaligned-old.S"
> --
> 1.8.3.2
>


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]