This is the mail archive of the
glibc-cvs@sourceware.org
mailing list for the glibc project.
GNU C Library master sources branch ldmitrie/intel_mpx updated. glibc-2.18-87-ga6d6ba9
- From: ldmitrie at sourceware dot org
- To: glibc-cvs at sourceware dot org
- Date: 4 Sep 2013 09:01:21 -0000
- Subject: GNU C Library master sources branch ldmitrie/intel_mpx updated. glibc-2.18-87-ga6d6ba9
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".
The branch, ldmitrie/intel_mpx has been updated
via a6d6ba90509ca637ff5e24e167f480e60b9a09c9 (commit)
via e745179c54860d69ef8cb7f516d0f256b502f89d (commit)
via c1057ab824007e18036efff12d9f080d16a37f2c (commit)
via 5ea96e116e1a6bc7039d4db564fad35ff4a692a9 (commit)
via 2f16fa64a350b1977034d27f8546c2b4c822d035 (commit)
via 774ecb15b08e64ff458e924007ded47de6ec87fd (commit)
via 87372602ca6c23df5784f3f200131e862a236eec (commit)
via 53b31dab5b9733bde88d1d2bc7c2a29182af72e2 (commit)
via cccbfcfeed3c5da5c2be2110fc43ee3accd41c55 (commit)
via ba4e1c2b51e9ea57f00f5082353793f23af85747 (commit)
via 9bbe2c25421b4f3b06c6053d98ed8a4379aa6fb4 (commit)
via bc2adc60820301b8780d021795e86030033d99d6 (commit)
via 9abc3d74ab5958f675378c35f325e1511d2c1c15 (commit)
via 8b1885f652bc90514b36117c07312415fd7b09a7 (commit)
via 2c42c71e641f5b841d222dfd3e4a2ba1d8131ea7 (commit)
via 5c77173eec2808c6940abb2bddd57551dec6ccfe (commit)
via abfb12dc5f16cca6fc52083814710a6111e9f6a0 (commit)
via 4a9acc37ff2f154d0c3957af830242bac5bda851 (commit)
via af1d2d1ffb534abeadb2a82365f0b6ef6fc96e3a (commit)
via fcad0ab9dc3deded2cfd8e7bc467e204d8b55a63 (commit)
via 0a6a6f8037cc93a6d165d925d6c029fe42998acc (commit)
via 01d45f7e37130cb47d0ef788283e99bc07564f34 (commit)
via 00ae469c06aeed1e7bd988875d241cc5a6339d01 (commit)
via 4cd77a6b091db5450ec634eeaeab8e36ea3bb1dd (commit)
via 5d92ac866acfc532a4d49af0b1c2b69c260c0ce3 (commit)
via 022ff6731b1e0938048df6904d40e9bf876625b1 (commit)
via 558bf1c0479495b1b7759bbe58b5f0b455fe7b46 (commit)
via 152d00ad923c702a8753499421baba027d0beffc (commit)
via 3e938a73037cfad920442148c39208d321248485 (commit)
via b7835e3223de60d3f5bf1fed66468b5052cd595f (commit)
via 8220f4f00803cfb26520a8bf2ca493493dde4f60 (commit)
via b0350db9747d602158377af3cf8ceb70764ea840 (commit)
via 21bfcecf7a4ac3c150430a1a312268781c145414 (commit)
via f24a6d086b96a65a73c68ecf349b03321cb03720 (commit)
via 382466e04e347d3b3fa221897efb8f4cdc239105 (commit)
via 6b06ac56cdfc9293908724e51e827534e97819aa (commit)
via 2601bc1860c33653f2d402b871f1211d460aa350 (commit)
via 0186c6e97ecd58e91a45cbb74e5643325777fd6f (commit)
via 99541e80777c82fb0cea14944209877ef961aa9f (commit)
via d4e16af1aea18abe082bd7bf213b85b0c432d304 (commit)
via 883359805325a82f4f291ff85624f141f6c93636 (commit)
via ed479b02c2b04291d07708baf50bc6d2929f8b94 (commit)
via d821f07c9e9575b47df87cc38f26c31e09b3ce0d (commit)
via c54290cf086940e3859b236d0fb880bacab2288b (commit)
via 0007fc9bdd1d9efcd52d07837f2cd085b5a8f58b (commit)
via f1cc4c8654b6bc431273286d3562942c50975caf (commit)
via 6f65e668957a50e153cba6822f747d13ad1b40ad (commit)
via d0721e703d222c01a9e8c329311c4fb01dac6972 (commit)
via 3a3092398356c90788479fa75524a491e9d860cd (commit)
via 3b813b29653a3ebfd78c2529127a9967d8f2857d (commit)
via 7f9d00341057eb80b43fa83956b8d7798b4dabea (commit)
via fb431262c12a8e2630225518300a4e1e0c4e918b (commit)
via bd81123a8ba99ac30bcf227be5103cf2863e27cb (commit)
via c4e42566cfecc43f9ef3ddf9c7af57c6744e2ceb (commit)
via f890a59b10b6a4b8723b86545e8b38ccc690021c (commit)
via 5d1f0a09f3634b29d9cbbd7c7337d546502363cb (commit)
via 6fcf28b352c435ee3d1a975619c5ae31923c44eb (commit)
via e041fb8b6557882b6710a655a97bbf3541b56b54 (commit)
via 8cc3269f95fa7faa8f448d741f68cbc40efbf4ee (commit)
via 45b8acccaf43ec06d31413c75a8f1737ae3ff0e2 (commit)
via 5d228a436a8257f082e84671bf8c89b79a2c3853 (commit)
via 53234cc3a368c1b61be017d87b71cfe2affbba68 (commit)
via 59d112a4c7b6bd3ab220abbb68d8127f316a109c (commit)
via 595aba70a4c676f7efaf6a012f54cd22aa189c5b (commit)
via ffabd3937aea799d2779e8b1b0a5385eebadd452 (commit)
via e0a69f5cb8147bc9d746de14ec66d060c583d863 (commit)
via 8c12f01d07fd6a0eea9f012b5547abc6b0fc8b91 (commit)
via 8fe89494e6516048759425ec30d8878a6233e00f (commit)
via 936241e4b2ec90bbb97d1b37bc78726372ec966f (commit)
via 75b181d6154a5fc71f5701522db6e14075d96f0c (commit)
via b72ca61b71abd3e2d5b6cdb0680d7179f95be222 (commit)
via d1141ff6c875bc53c5ef6cd62b1bbfe91bdccd21 (commit)
via acd06bb11f6d6436e15d0c7608fc7ea6008c224f (commit)
via c0c3f78afb6070721848574e2e5dff5cfa20e28d (commit)
via 46ed103824ff42668ddfc36c1b3fdb9219d48eee (commit)
via d400dcac5e66047f86291d1a4b90fffb6327dc43 (commit)
via c980f2f4fe0f5d301f706017a1f7e4e942193ec0 (commit)
via 0716c4fc9fff70870e28f84d18ddbce1db162101 (commit)
via 42c8fdd8f1d3245a0abf957fc9f0a6eb35145643 (commit)
via b31469d0d40ac858687657c72baaafd7a63041d9 (commit)
via 4e39af6eca18a984c346cb3396940a9dcbed02ab (commit)
via f315524e034cfc644157cb4af5ecc99f645dd067 (commit)
via 1326ba1af22068db9488c2328bdaf852b8a93dcf (commit)
via 3f8b479fe0d50e8c7d759b71d1db69d6f0aec0c0 (commit)
via 91ce40854d0b7f865cf5024ef95a8026b76096f3 (commit)
via ca0a6bc4c5c53aa6c4a735c36336408a06b8cd89 (commit)
via 6c1fd795711bb510cffaab5ad2ab2739bb8db210 (commit)
via eefa3be8e4c2c721a9f277d8ea2e11180231829f (commit)
via 85891acadf1b5207df93c9078b0d1ff5b28838de (commit)
via 31ad488dff6354bbe0aef38eeca682c2ac5336b7 (commit)
via db24e6842d7bfca7cdd7a01e205282eb0b8e0aa5 (commit)
via f723285b1390a6949d93a2781e97d5167457a1ed (commit)
via 30bbc0ccfa0a6526519a752960acd1a04660711b (commit)
via 3cbc52dbbc382f6d88ae4b267bad69f3408a1155 (commit)
via f1a7f492590214ec8321b7158e8992943b110a60 (commit)
via 90e62a4aae5d9adebb56c2d7575d0437566aeb4d (commit)
via d492e6d080919d470d22f6dbe33758b1c78169ea (commit)
via 641aa7b45991b6564a8fa825c681ad6ad1c7721f (commit)
via d4baa62d45cf6c2cf18c902e0b43a3a90e72f25b (commit)
via f8b4877a75765b14432a6f83ead11dcecc5b1985 (commit)
via 61a9dcb9d6c719cd0ec2b8fd871701e4ae69919e (commit)
via be063fa4580d9529e72c014d9510a9dedc6c39f8 (commit)
from cb821cb1756f9948d6232f49edfaa7ee01225d00 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=a6d6ba90509ca637ff5e24e167f480e60b9a09c9
commit a6d6ba90509ca637ff5e24e167f480e60b9a09c9
Author: Liubov Dmitrieva <ldmitrie@sourceware.org>
Date: Fri Aug 30 18:37:28 2013 +0400
Implemented bound check support for string/memory routines for x86_64.
TODO: Fix bound check support in strcmp-sse2 and implement in strspn, strstr and strcspn.
diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile
index 08db331..db6838d 100644
--- a/sysdeps/x86_64/Makefile
+++ b/sysdeps/x86_64/Makefile
@@ -18,6 +18,9 @@ endif
ifeq ($(subdir),string)
sysdep_routines += cacheinfo strcasecmp_l-nonascii strncase_l-nonascii
gen-as-const-headers += locale-defines.sym
+ifeq ($(enable-mpx), yes)
+sysdep_routines += strcpy_chk-c stpcpy_chk-c
+endif
endif
ifeq ($(subdir),elf)
diff --git a/sysdeps/x86_64/Versions b/sysdeps/x86_64/Versions
index a437f85..083770a 100644
--- a/sysdeps/x86_64/Versions
+++ b/sysdeps/x86_64/Versions
@@ -2,6 +2,13 @@ libc {
GLIBC_2.14 {
memcpy;
}
+%ifdef __CHKP__
+ GLIBC_2.17 {
+ mpx_memset_nobnd;
+ mpx_memset_nochk;
+ mpx_memset_nobnd_nochk;
+ }
+%endif
}
libm {
GLIBC_2.1 {
diff --git a/sysdeps/x86_64/memchr.S b/sysdeps/x86_64/memchr.S
index 891ee70..205345b 100644
--- a/sysdeps/x86_64/memchr.S
+++ b/sysdeps/x86_64/memchr.S
@@ -20,8 +20,17 @@
/* fast SSE2 version with using pmaxub and 64 byte loop */
+# ifdef __CHKP__
+# define RETURN \
+ bndcu (%rax), %bnd0; \
+ ret
+# else
+# define RETURN ret
+# endif
+
.text
ENTRY(memchr)
+
movd %rsi, %xmm1
mov %rdi, %rcx
@@ -33,6 +42,10 @@ ENTRY(memchr)
and $63, %rcx
pshufd $0, %xmm1, %xmm1
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+#endif
cmp $48, %rcx
ja L(crosscache)
@@ -72,7 +85,7 @@ L(crosscache):
jbe L(return_null)
add %rdi, %rax
add %rcx, %rax
- ret
+ RETURN
.p2align 4
L(unaligned_no_match):
@@ -85,24 +98,36 @@ L(unaligned_no_match):
.p2align 4
L(loop_prolog):
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
+#ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+#endif
movdqa 16(%rdi), %xmm2
pcmpeqb %xmm1, %xmm2
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(matches16)
+#ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+#endif
movdqa 32(%rdi), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(matches32)
+#ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+#endif
movdqa 48(%rdi), %xmm4
pcmpeqb %xmm1, %xmm4
add $64, %rdi
@@ -116,24 +141,36 @@ L(loop_prolog):
sub $64, %rdx
jbe L(exit_loop)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
+#ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+#endif
movdqa 16(%rdi), %xmm2
pcmpeqb %xmm1, %xmm2
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(matches16)
+#ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+#endif
movdqa 32(%rdi), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(matches32)
+#ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+#endif
movdqa 48(%rdi), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
@@ -151,6 +188,9 @@ L(loop_prolog):
L(align64_loop):
sub $64, %rdx
jbe L(exit_loop)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
movdqa 16(%rdi), %xmm2
movdqa 32(%rdi), %xmm3
@@ -192,25 +232,34 @@ L(align64_loop):
pmovmskb %xmm1, %eax
bsf %eax, %eax
lea 48(%rdi, %rax), %rax
- ret
+ RETURN
.p2align 4
L(exit_loop):
add $32, %rdx
jle L(exit_loop_32)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
+#ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+#endif
movdqa 16(%rdi), %xmm2
pcmpeqb %xmm1, %xmm2
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(matches16)
+#ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+#endif
movdqa 32(%rdi), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
@@ -219,6 +268,9 @@ L(exit_loop):
sub $16, %rdx
jle L(return_null)
+#ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+#endif
pcmpeqb 48(%rdi), %xmm1
pmovmskb %xmm1, %eax
test %eax, %eax
@@ -229,6 +281,9 @@ L(exit_loop):
.p2align 4
L(exit_loop_32):
add $32, %rdx
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
@@ -237,6 +292,9 @@ L(exit_loop_32):
sub $16, %rdx
jbe L(return_null)
+#ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+#endif
pcmpeqb 16(%rdi), %xmm1
pmovmskb %xmm1, %eax
test %eax, %eax
@@ -248,25 +306,25 @@ L(exit_loop_32):
L(matches0):
bsf %eax, %eax
lea -16(%rax, %rdi), %rax
- ret
+ RETURN
.p2align 4
L(matches):
bsf %eax, %eax
add %rdi, %rax
- ret
+ RETURN
.p2align 4
L(matches16):
bsf %eax, %eax
lea 16(%rax, %rdi), %rax
- ret
+ RETURN
.p2align 4
L(matches32):
bsf %eax, %eax
lea 32(%rax, %rdi), %rax
- ret
+ RETURN
.p2align 4
L(matches_1):
@@ -274,7 +332,7 @@ L(matches_1):
sub %rax, %rdx
jbe L(return_null)
add %rdi, %rax
- ret
+ RETURN
.p2align 4
L(matches16_1):
@@ -282,7 +340,7 @@ L(matches16_1):
sub %rax, %rdx
jbe L(return_null)
lea 16(%rdi, %rax), %rax
- ret
+ RETURN
.p2align 4
L(matches32_1):
@@ -290,7 +348,7 @@ L(matches32_1):
sub %rax, %rdx
jbe L(return_null)
lea 32(%rdi, %rax), %rax
- ret
+ RETURN
.p2align 4
L(matches48_1):
@@ -298,7 +356,7 @@ L(matches48_1):
sub %rax, %rdx
jbe L(return_null)
lea 48(%rdi, %rax), %rax
- ret
+ RETURN
.p2align 4
L(return_null):
diff --git a/sysdeps/x86_64/memcmp.S b/sysdeps/x86_64/memcmp.S
index d5c072c..77a7bca 100644
--- a/sysdeps/x86_64/memcmp.S
+++ b/sysdeps/x86_64/memcmp.S
@@ -23,6 +23,11 @@
ENTRY (memcmp)
test %rdx, %rdx
jz L(finz)
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcl (%rsi), %bnd1
+#endif
+ pxor %xmm0, %xmm0
cmpq $1, %rdx
jle L(finr1b)
subq %rdi, %rsi
@@ -86,6 +91,10 @@ L(s16b):
.p2align 4,, 4
L(finr1b):
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+#endif
movzbl (%rdi), %eax
movzbl (%rsi), %edx
L(finz1):
@@ -132,6 +141,10 @@ L(gt32):
andq $15, %r8
jz L(16am)
/* Both pointers may be misaligned. */
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqu (%rdi), %xmm1
movdqu (%rdi, %rsi), %xmm0
pcmpeqb %xmm0, %xmm1
@@ -146,6 +159,10 @@ L(16am):
jz L(ATR)
testq $16, %rdi
jz L(A32)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqu (%rdi, %rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -160,6 +177,10 @@ L(A32):
/* Pre-unroll to be ready for unrolled 64B loop. */
testq $32, %rdi
jz L(A64)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqu (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -167,6 +188,10 @@ L(A32):
jnz L(neq)
addq $16, %rdi
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqu (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -181,6 +206,10 @@ L(A64):
jge L(mt32)
L(A64main):
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqu (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -188,6 +217,10 @@ L(A64main):
jnz L(neq)
addq $16, %rdi
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqu (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -195,6 +228,10 @@ L(A64main):
jnz L(neq)
addq $16, %rdi
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqu (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -202,6 +239,10 @@ L(A64main):
jnz L(neq)
addq $16, %rdi
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqu (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -219,6 +260,10 @@ L(mt32):
jge L(mt16)
L(A32main):
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqu (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -226,6 +271,10 @@ L(A32main):
jnz L(neq)
addq $16, %rdi
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqu (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -258,6 +307,10 @@ L(ATR):
testq $16, %rdi
jz L(ATR32)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqa (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -273,6 +326,10 @@ L(ATR32):
testq $32, %rdi
jz L(ATR64)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqa (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -280,6 +337,10 @@ L(ATR32):
jnz L(neq)
addq $16, %rdi
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqa (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -292,6 +353,10 @@ L(ATR64):
je L(mt32)
L(ATR64main):
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqa (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -299,6 +364,10 @@ L(ATR64main):
jnz L(neq)
addq $16, %rdi
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqa (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -306,6 +375,10 @@ L(ATR64main):
jnz L(neq)
addq $16, %rdi
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqa (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -313,6 +386,10 @@ L(ATR64main):
jnz L(neq)
addq $16, %rdi
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqa (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -328,6 +405,10 @@ L(ATR64main):
jge L(mt16)
L(ATR32res):
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqa (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
@@ -335,6 +416,10 @@ L(ATR32res):
jnz L(neq)
addq $16, %rdi
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rdi, %rsi), %bnd1
+#endif
movdqa (%rdi,%rsi), %xmm0
pcmpeqb (%rdi), %xmm0
pmovmskb %xmm0, %edx
diff --git a/sysdeps/x86_64/memrchr.S b/sysdeps/x86_64/memrchr.S
index 5a659fe..3afa97c 100644
--- a/sysdeps/x86_64/memrchr.S
+++ b/sysdeps/x86_64/memrchr.S
@@ -27,6 +27,11 @@ ENTRY (memrchr)
sub $16, %rdx
jbe L(length_less16)
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu -1(%rdi, %rdx), %bnd0
+#endif
+
punpcklbw %xmm1, %xmm1
punpcklbw %xmm1, %xmm1
@@ -284,6 +289,10 @@ L(length_less16_offset0):
test %edx, %edx
jz L(return_null)
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu -1(%rdi, %rdx), %bnd0
+#endif
mov %dl, %cl
pcmpeqb (%rdi), %xmm1
@@ -314,6 +323,10 @@ L(length_less16):
and $15, %rcx
jz L(length_less16_offset0)
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu -1(%rdi, %rdx), %bnd0
+#endif
mov %rdi, %rcx
and $15, %rcx
mov %cl, %dh
diff --git a/sysdeps/x86_64/memset.S b/sysdeps/x86_64/memset.S
index 6c69f4b..4e1bb84 100644
--- a/sysdeps/x86_64/memset.S
+++ b/sysdeps/x86_64/memset.S
@@ -26,6 +26,15 @@
.text
#if !defined NOT_IN_libc
ENTRY(__bzero)
+ testq %rsi, %rsi
+ jz L(only_return)
+
+# ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu -1(%rdi, %rsi), %bnd0
+# endif
+
+ mov %rdi, %rax
movq %rdi, %rax /* Set return value. */
movq %rsi, %rdx /* Set n. */
pxor %xmm8, %xmm8
@@ -53,7 +62,20 @@ ENTRY_CHK (__memset_chk)
END_CHK (__memset_chk)
#endif
+#ifdef __CHKP__
+ENTRY (mpx_memset_nochk)
+ jmp L(entry_from_mpx_memset_nochk)
+END (mpx_memset_nochk)
+#endif
+
ENTRY (memset)
+ testq %rdx, %rdx
+ jz L(only_return)
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu -1(%rdi, %rdx), %bnd0
+L(entry_from_mpx_memset_nochk):
+#endif
movd %esi, %xmm8
movq %rdi, %rax
punpcklbw %xmm8, %xmm8
@@ -71,6 +93,9 @@ L(entry_from_bzero):
L(return):
rep
ret
+L(only_return):
+ movq %rdi, %rax
+ ret
ALIGN (4)
L(between_32_64_bytes):
movdqu %xmm8, 16(%rdi)
@@ -129,6 +154,11 @@ L(between8_16bytes):
END (memset)
libc_hidden_builtin_def (memset)
+#ifdef __CHKP__
+weak_alias (memset, mpx_memset_nobnd)
+weak_alias (mpx_memset_nochk, mpx_memset_nobnd_nochk)
+#endif
+
#if defined PIC && !defined NOT_IN_libc && !defined USE_MULTIARCH
strong_alias (__memset_chk, __memset_zero_constant_len_parameter)
.section .gnu.warning.__memset_zero_constant_len_parameter
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
index 203d16e..490950e 100644
--- a/sysdeps/x86_64/multiarch/Makefile
+++ b/sysdeps/x86_64/multiarch/Makefile
@@ -26,6 +26,15 @@ CFLAGS-strstr.c += -msse4
CFLAGS-strcasestr.c += -msse4
CFLAGS-strcasestr-nonascii.c += -msse4
endif
+
+ifeq ($(enable-mpx), yes)
+sysdep_routines += memcpy-ssse3-back-1 mempcpy-ssse3-back-1 memmove-ssse3-back-1 \
+ memcpy-c memmove-c mempcpy-c memcpy_chk-c mempcpy_chk-c memmove_chk-c
+ASFLAGS-memcpy-ssse3-back-1.S += -fno-mpx
+ASFLAGS-mempcpy-ssse3-back-1.S += -fno-mpx
+ASFLAGS-memmove-ssse3-back-1.S += -fno-mpx
+endif
+
endif
ifeq ($(subdir),wcsmbs)
diff --git a/sysdeps/x86_64/multiarch/Versions b/sysdeps/x86_64/multiarch/Versions
index 59b185a..fa1cf0a 100644
--- a/sysdeps/x86_64/multiarch/Versions
+++ b/sysdeps/x86_64/multiarch/Versions
@@ -2,4 +2,17 @@ libc {
GLIBC_PRIVATE {
__get_cpu_features;
}
+%ifdef __CHKP__
+ GLIBC_2.17 {
+ mpx_memcpy_nobnd;
+ mpx_memmove_nobnd;
+ mpx_mempcpy_nobnd;
+ mpx_memcpy_nobnd_nochk;
+ mpx_memmove_nobnd_nochk;
+ mpx_mempcpy_nobnd_nochk;
+ mpx_memcpy_nochk;
+ mpx_memmove_nochk;
+ mpx_mempcpy_nochk;
+ }
+%endif
}
diff --git a/sysdeps/x86_64/multiarch/bcopy.S b/sysdeps/x86_64/multiarch/bcopy.S
index 639f02b..9809d47 100644
--- a/sysdeps/x86_64/multiarch/bcopy.S
+++ b/sysdeps/x86_64/multiarch/bcopy.S
@@ -3,5 +3,10 @@
.text
ENTRY(bcopy)
xchg %rdi, %rsi
+#ifdef __CHKP__
+ bndmov %bnd0, %bnd2
+ bndmov %bnd1, %bnd0
+ bndmov %bnd2, %bnd1
+#endif
jmp __libc_memmove /* Branch to IFUNC memmove. */
END(bcopy)
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
index d0992e1..e3a4163 100644
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
@@ -44,6 +44,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, memcmp, HAS_SSSE3, __memcmp_ssse3)
IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_sse2))
+#ifndef __CHKP__
+ /* We use specific version for MPX glibc */
/* Support sysdeps/x86_64/multiarch/memmove_chk.S. */
IFUNC_IMPL (i, name, __memmove_chk,
IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSSE3,
@@ -60,6 +62,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, memmove, HAS_SSSE3,
__memmove_ssse3)
IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_sse2))
+#endif
/* Support sysdeps/x86_64/multiarch/stpncpy.S. */
IFUNC_IMPL (i, name, stpncpy,
@@ -207,6 +210,8 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, wmemcmp, 1, __wmemcmp_sse2))
#ifdef SHARED
+#ifndef __CHKP__
+ /* We use specific version of memcpy, memcpy_chk, mempcpy if Intel MPX is enabled. */
/* Support sysdeps/x86_64/multiarch/memcpy_chk.S. */
IFUNC_IMPL (i, name, __memcpy_chk,
IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_SSSE3,
@@ -240,6 +245,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, mempcpy, HAS_SSSE3,
__mempcpy_ssse3)
IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_sse2))
+#endif
/* Support sysdeps/x86_64/multiarch/strncmp.S. */
IFUNC_IMPL (i, name, strncmp,
diff --git a/sysdeps/x86_64/multiarch/memcmp-sse4.S b/sysdeps/x86_64/multiarch/memcmp-sse4.S
index 1ed4200..b5c6675 100644
--- a/sysdeps/x86_64/multiarch/memcmp-sse4.S
+++ b/sysdeps/x86_64/multiarch/memcmp-sse4.S
@@ -48,6 +48,13 @@ ENTRY (MEMCMP)
# ifdef USE_AS_WMEMCMP
shl $2, %rdx
# endif
+# ifdef __CHKP__
+ testq %rdx, %rdx
+ jz L(NoEntryCheck)
+ bndcl (%rdi), %bnd0
+ bndcl (%rsi), %bnd1
+L(NoEntryCheck):
+# endif
pxor %xmm0, %xmm0
cmp $79, %rdx
ja L(79bytesormore)
@@ -70,6 +77,10 @@ L(firstbyte):
ALIGN (4)
L(79bytesormore):
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqu (%rsi), %xmm1
movdqu (%rdi), %xmm2
pxor %xmm1, %xmm2
@@ -90,21 +101,37 @@ L(79bytesormore):
L(less128bytes):
sub $64, %rdx
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqu (%rdi), %xmm2
pxor (%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(16bytesin256)
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqu 16(%rdi), %xmm2
pxor 16(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(32bytesin256)
+# ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+ bndcu 32(%rsi), %bnd1
+# endif
movdqu 32(%rdi), %xmm2
pxor 32(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(48bytesin256)
+# ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+ bndcu 48(%rsi), %bnd1
+# endif
movdqu 48(%rdi), %xmm2
pxor 48(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -112,11 +139,19 @@ L(less128bytes):
cmp $32, %rdx
jb L(less32bytesin64)
+# ifdef __CHKP__
+ bndcu 64(%rdi), %bnd0
+ bndcu 64(%rsi), %bnd1
+# endif
movdqu 64(%rdi), %xmm2
pxor 64(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(80bytesin256)
+# ifdef __CHKP__
+ bndcu 80(%rdi), %bnd0
+ bndcu 80(%rsi), %bnd1
+# endif
movdqu 80(%rdi), %xmm2
pxor 80(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -139,41 +174,73 @@ L(128bytesormore):
L(less256bytes):
sub $128, %rdx
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqu (%rdi), %xmm2
pxor (%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(16bytesin256)
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqu 16(%rdi), %xmm2
pxor 16(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(32bytesin256)
+# ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+ bndcu 32(%rsi), %bnd1
+# endif
movdqu 32(%rdi), %xmm2
pxor 32(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(48bytesin256)
+# ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+ bndcu 48(%rsi), %bnd1
+# endif
movdqu 48(%rdi), %xmm2
pxor 48(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(64bytesin256)
+# ifdef __CHKP__
+ bndcu 64(%rdi), %bnd0
+ bndcu 64(%rsi), %bnd1
+# endif
movdqu 64(%rdi), %xmm2
pxor 64(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(80bytesin256)
+# ifdef __CHKP__
+ bndcu 80(%rdi), %bnd0
+ bndcu 80(%rsi), %bnd1
+# endif
movdqu 80(%rdi), %xmm2
pxor 80(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(96bytesin256)
+# ifdef __CHKP__
+ bndcu 96(%rdi), %bnd0
+ bndcu 96(%rsi), %bnd1
+# endif
movdqu 96(%rdi), %xmm2
pxor 96(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(112bytesin256)
+# ifdef __CHKP__
+ bndcu 112(%rdi), %bnd0
+ bndcu 112(%rsi), %bnd1
+# endif
movdqu 112(%rdi), %xmm2
pxor 112(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -188,11 +255,19 @@ L(less256bytes):
cmp $32, %rdx
jb L(less32bytesin128)
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqu (%rdi), %xmm2
pxor (%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(16bytesin256)
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqu 16(%rdi), %xmm2
pxor 16(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -207,81 +282,145 @@ L(less32bytesin128):
L(less512bytes):
sub $256, %rdx
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqu (%rdi), %xmm2
pxor (%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(16bytesin256)
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqu 16(%rdi), %xmm2
pxor 16(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(32bytesin256)
+# ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+ bndcu 32(%rsi), %bnd1
+# endif
movdqu 32(%rdi), %xmm2
pxor 32(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(48bytesin256)
+# ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+ bndcu 48(%rsi), %bnd1
+# endif
movdqu 48(%rdi), %xmm2
pxor 48(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(64bytesin256)
+# ifdef __CHKP__
+ bndcu 64(%rdi), %bnd0
+ bndcu 64(%rsi), %bnd1
+# endif
movdqu 64(%rdi), %xmm2
pxor 64(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(80bytesin256)
+# ifdef __CHKP__
+ bndcu 80(%rdi), %bnd0
+ bndcu 80(%rsi), %bnd1
+# endif
movdqu 80(%rdi), %xmm2
pxor 80(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(96bytesin256)
+# ifdef __CHKP__
+ bndcu 96(%rdi), %bnd0
+ bndcu 96(%rsi), %bnd1
+# endif
movdqu 96(%rdi), %xmm2
pxor 96(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(112bytesin256)
+# ifdef __CHKP__
+ bndcu 112(%rdi), %bnd0
+ bndcu 112(%rsi), %bnd1
+# endif
movdqu 112(%rdi), %xmm2
pxor 112(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(128bytesin256)
+# ifdef __CHKP__
+ bndcu 128(%rdi), %bnd0
+ bndcu 128(%rsi), %bnd1
+# endif
movdqu 128(%rdi), %xmm2
pxor 128(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(144bytesin256)
+# ifdef __CHKP__
+ bndcu 144(%rdi), %bnd0
+ bndcu 144(%rsi), %bnd1
+# endif
movdqu 144(%rdi), %xmm2
pxor 144(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(160bytesin256)
+# ifdef __CHKP__
+ bndcu 160(%rdi), %bnd0
+ bndcu 160(%rsi), %bnd1
+# endif
movdqu 160(%rdi), %xmm2
pxor 160(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(176bytesin256)
+# ifdef __CHKP__
+ bndcu 176(%rdi), %bnd0
+ bndcu 176(%rsi), %bnd1
+# endif
movdqu 176(%rdi), %xmm2
pxor 176(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(192bytesin256)
+# ifdef __CHKP__
+ bndcu 192(%rdi), %bnd0
+ bndcu 192(%rsi), %bnd1
+# endif
movdqu 192(%rdi), %xmm2
pxor 192(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(208bytesin256)
+# ifdef __CHKP__
+ bndcu 208(%rdi), %bnd0
+ bndcu 208(%rsi), %bnd1
+# endif
movdqu 208(%rdi), %xmm2
pxor 208(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(224bytesin256)
+# ifdef __CHKP__
+ bndcu 224(%rdi), %bnd0
+ bndcu 224(%rsi), %bnd1
+# endif
movdqu 224(%rdi), %xmm2
pxor 224(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(240bytesin256)
+# ifdef __CHKP__
+ bndcu 240(%rdi), %bnd0
+ bndcu 240(%rsi), %bnd1
+# endif
movdqu 240(%rdi), %xmm2
pxor 240(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -299,11 +438,19 @@ L(less512bytes):
cmp $32, %rdx
jb L(less32bytesin256)
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqu (%rdi), %xmm2
pxor (%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(16bytesin256)
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqu 16(%rdi), %xmm2
pxor 16(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -331,18 +478,34 @@ L(512bytesormore):
sub $64, %rdx
ALIGN (4)
L(64bytesormore_loop):
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqu (%rdi), %xmm2
pxor (%rsi), %xmm2
movdqa %xmm2, %xmm1
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqu 16(%rdi), %xmm3
pxor 16(%rsi), %xmm3
por %xmm3, %xmm1
+# ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+ bndcu 32(%rsi), %bnd1
+# endif
movdqu 32(%rdi), %xmm4
pxor 32(%rsi), %xmm4
por %xmm4, %xmm1
+# ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+ bndcu 48(%rsi), %bnd1
+# endif
movdqu 48(%rdi), %xmm5
pxor 48(%rsi), %xmm5
por %xmm5, %xmm1
@@ -365,18 +528,34 @@ L(L2_L3_cache_unaglined):
L(L2_L3_unaligned_128bytes_loop):
prefetchnta 0x1c0(%rdi)
prefetchnta 0x1c0(%rsi)
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqu (%rdi), %xmm2
pxor (%rsi), %xmm2
movdqa %xmm2, %xmm1
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqu 16(%rdi), %xmm3
pxor 16(%rsi), %xmm3
por %xmm3, %xmm1
+# ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+ bndcu 32(%rsi), %bnd1
+# endif
movdqu 32(%rdi), %xmm4
pxor 32(%rsi), %xmm4
por %xmm4, %xmm1
+# ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+ bndcu 48(%rsi), %bnd1
+# endif
movdqu 48(%rdi), %xmm5
pxor 48(%rsi), %xmm5
por %xmm5, %xmm1
@@ -403,21 +582,37 @@ L(2aligned):
L(less128bytesin2aligned):
sub $64, %rdx
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqa (%rdi), %xmm2
pxor (%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(16bytesin256)
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqa 16(%rdi), %xmm2
pxor 16(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(32bytesin256)
+# ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+ bndcu 32(%rsi), %bnd1
+# endif
movdqa 32(%rdi), %xmm2
pxor 32(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(48bytesin256)
+# ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+ bndcu 48(%rsi), %bnd1
+# endif
movdqa 48(%rdi), %xmm2
pxor 48(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -425,11 +620,19 @@ L(less128bytesin2aligned):
cmp $32, %rdx
jb L(less32bytesin64in2alinged)
+# ifdef __CHKP__
+ bndcu 64(%rdi), %bnd0
+ bndcu 64(%rsi), %bnd1
+# endif
movdqa 64(%rdi), %xmm2
pxor 64(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(80bytesin256)
+# ifdef __CHKP__
+ bndcu 80(%rdi), %bnd0
+ bndcu 80(%rsi), %bnd1
+# endif
movdqa 80(%rdi), %xmm2
pxor 80(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -453,41 +656,73 @@ L(128bytesormorein2aligned):
L(less256bytesin2alinged):
sub $128, %rdx
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqa (%rdi), %xmm2
pxor (%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(16bytesin256)
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqa 16(%rdi), %xmm2
pxor 16(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(32bytesin256)
+# ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+ bndcu 32(%rsi), %bnd1
+# endif
movdqa 32(%rdi), %xmm2
pxor 32(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(48bytesin256)
+# ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+ bndcu 48(%rsi), %bnd1
+# endif
movdqa 48(%rdi), %xmm2
pxor 48(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(64bytesin256)
+# ifdef __CHKP__
+ bndcu 64(%rdi), %bnd0
+ bndcu 64(%rsi), %bnd1
+# endif
movdqa 64(%rdi), %xmm2
pxor 64(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(80bytesin256)
+# ifdef __CHKP__
+ bndcu 80(%rdi), %bnd0
+ bndcu 80(%rsi), %bnd1
+# endif
movdqa 80(%rdi), %xmm2
pxor 80(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(96bytesin256)
+# ifdef __CHKP__
+ bndcu 96(%rdi), %bnd0
+ bndcu 96(%rsi), %bnd1
+# endif
movdqa 96(%rdi), %xmm2
pxor 96(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(112bytesin256)
+# ifdef __CHKP__
+ bndcu 112(%rdi), %bnd0
+ bndcu 112(%rsi), %bnd1
+# endif
movdqa 112(%rdi), %xmm2
pxor 112(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -502,11 +737,19 @@ L(less256bytesin2alinged):
cmp $32, %rdx
jb L(less32bytesin128in2aligned)
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqu (%rdi), %xmm2
pxor (%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(16bytesin256)
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqu 16(%rdi), %xmm2
pxor 16(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -523,81 +766,145 @@ L(less32bytesin128in2aligned):
L(256bytesormorein2aligned):
sub $256, %rdx
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqa (%rdi), %xmm2
pxor (%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(16bytesin256)
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqa 16(%rdi), %xmm2
pxor 16(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(32bytesin256)
+# ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+ bndcu 32(%rsi), %bnd1
+# endif
movdqa 32(%rdi), %xmm2
pxor 32(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(48bytesin256)
+# ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+ bndcu 48(%rsi), %bnd1
+# endif
movdqa 48(%rdi), %xmm2
pxor 48(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(64bytesin256)
+# ifdef __CHKP__
+ bndcu 64(%rdi), %bnd0
+ bndcu 64(%rsi), %bnd1
+# endif
movdqa 64(%rdi), %xmm2
pxor 64(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(80bytesin256)
+# ifdef __CHKP__
+ bndcu 80(%rdi), %bnd0
+ bndcu 80(%rsi), %bnd1
+# endif
movdqa 80(%rdi), %xmm2
pxor 80(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(96bytesin256)
+# ifdef __CHKP__
+ bndcu 96(%rdi), %bnd0
+ bndcu 96(%rsi), %bnd1
+# endif
movdqa 96(%rdi), %xmm2
pxor 96(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(112bytesin256)
+# ifdef __CHKP__
+ bndcu 112(%rdi), %bnd0
+ bndcu 112(%rsi), %bnd1
+# endif
movdqa 112(%rdi), %xmm2
pxor 112(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(128bytesin256)
+# ifdef __CHKP__
+ bndcu 128(%rdi), %bnd0
+ bndcu 128(%rsi), %bnd1
+# endif
movdqa 128(%rdi), %xmm2
pxor 128(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(144bytesin256)
+# ifdef __CHKP__
+ bndcu 144(%rdi), %bnd0
+ bndcu 144(%rsi), %bnd1
+# endif
movdqa 144(%rdi), %xmm2
pxor 144(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(160bytesin256)
+# ifdef __CHKP__
+ bndcu 160(%rdi), %bnd0
+ bndcu 160(%rsi), %bnd1
+# endif
movdqa 160(%rdi), %xmm2
pxor 160(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(176bytesin256)
+# ifdef __CHKP__
+ bndcu 176(%rdi), %bnd0
+ bndcu 176(%rsi), %bnd1
+# endif
movdqa 176(%rdi), %xmm2
pxor 176(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(192bytesin256)
+# ifdef __CHKP__
+ bndcu 192(%rdi), %bnd0
+ bndcu 192(%rsi), %bnd1
+# endif
movdqa 192(%rdi), %xmm2
pxor 192(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(208bytesin256)
+# ifdef __CHKP__
+ bndcu 208(%rdi), %bnd0
+ bndcu 208(%rsi), %bnd1
+# endif
movdqa 208(%rdi), %xmm2
pxor 208(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(224bytesin256)
+# ifdef __CHKP__
+ bndcu 224(%rdi), %bnd0
+ bndcu 224(%rsi), %bnd1
+# endif
movdqa 224(%rdi), %xmm2
pxor 224(%rsi), %xmm2
ptest %xmm2, %xmm0
jnc L(240bytesin256)
+# ifdef __CHKP__
+ bndcu 240(%rdi), %bnd0
+ bndcu 240(%rsi), %bnd1
+# endif
movdqa 240(%rdi), %xmm2
pxor 240(%rsi), %xmm2
ptest %xmm2, %xmm0
@@ -648,18 +955,34 @@ L(512bytesormorein2aligned):
sub $64, %rdx
ALIGN (4)
L(64bytesormore_loopin2aligned):
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqa (%rdi), %xmm2
pxor (%rsi), %xmm2
movdqa %xmm2, %xmm1
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqa 16(%rdi), %xmm3
pxor 16(%rsi), %xmm3
por %xmm3, %xmm1
+# ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+ bndcu 32(%rsi), %bnd1
+# endif
movdqa 32(%rdi), %xmm4
pxor 32(%rsi), %xmm4
por %xmm4, %xmm1
+# ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+ bndcu 48(%rsi), %bnd1
+# endif
movdqa 48(%rdi), %xmm5
pxor 48(%rsi), %xmm5
por %xmm5, %xmm1
@@ -682,18 +1005,34 @@ L(L2_L3_cache_aglined):
L(L2_L3_aligned_128bytes_loop):
prefetchnta 0x1c0(%rdi)
prefetchnta 0x1c0(%rsi)
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+# endif
movdqa (%rdi), %xmm2
pxor (%rsi), %xmm2
movdqa %xmm2, %xmm1
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+ bndcu 16(%rsi), %bnd1
+# endif
movdqa 16(%rdi), %xmm3
pxor 16(%rsi), %xmm3
por %xmm3, %xmm1
+# ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+ bndcu 32(%rsi), %bnd1
+# endif
movdqa 32(%rdi), %xmm4
pxor 32(%rsi), %xmm4
por %xmm4, %xmm1
+# ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+ bndcu 48(%rsi), %bnd1
+# endif
movdqa 48(%rdi), %xmm5
pxor 48(%rsi), %xmm5
por %xmm5, %xmm1
diff --git a/sysdeps/x86_64/multiarch/memcpy-c.c b/sysdeps/x86_64/multiarch/memcpy-c.c
new file mode 100644
index 0000000..7076d4a
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memcpy-c.c
@@ -0,0 +1,70 @@
+/* C-version of memcpy for using when Intel MPX is enabled
+ in order to prosess with a buffer of pointers correctly.
+ Copyright (C) 2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifdef __CHKP__
+
+# include <stddef.h>
+
+void *
+__memcpy (void *dst, const void *src, size_t n)
+{
+ const char *s = src;
+ char *d = dst;
+ void *ret = dst;
+ size_t offset_src = ((size_t) s) & (sizeof(size_t) - 1);
+ size_t offset_dst = ((size_t) d) & (sizeof(size_t) - 1);
+
+ if (offset_src != offset_dst)
+ {
+ while (n--)
+ *d++ = *s++;
+ }
+ else
+ {
+ if (offset_src) offset_src = sizeof(size_t) - offset_src;
+ while (n-- && offset_src--)
+ *d++ = *s++;
+ n++;
+ if (!n) return ret;
+ void **d1 = (void **)d;
+ void **s1 = (void **)s;
+ while (n >= sizeof(void *))
+ {
+ n -= sizeof(void *);
+ *d1++ = *s1++;
+ }
+ s = (char *)s1;
+ d = (char *)d1;
+ while (n--)
+ *d++ = *s++;
+ }
+ return ret;
+}
+
+weak_alias (__memcpy, __GI_memcpy)
+
+# if defined SHARED && !defined NOT_IN_libc && !defined IA32
+# include <shlib-compat.h>
+versioned_symbol (libc, __memcpy, memcpy, GLIBC_2_14);
+# else
+weak_alias (__memcpy, memcpy)
+# endif
+
+weak_alias (__memcpy, mpx_memcpy_nochk)
+#endif
diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3-back-1.S b/sysdeps/x86_64/multiarch/memcpy-ssse3-back-1.S
new file mode 100644
index 0000000..e0c179a
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memcpy-ssse3-back-1.S
@@ -0,0 +1,4 @@
+#ifdef __CHKP__
+# define MEMCPY mpx_memcpy_nobnd_nochk
+# include "memcpy-ssse3-back.S"
+#endif
diff --git a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
index fc9fcef..5731b9d 100644
--- a/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
+++ b/sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
@@ -27,7 +27,11 @@
#include "asm-syntax.h"
#ifndef MEMCPY
-# define MEMCPY __memcpy_ssse3_back
+# ifdef __CHKP__
+# define MEMCPY mpx_memcpy_nobnd
+# else
+# define MEMCPY __memcpy_ssse3_back
+# endif
# define MEMCPY_CHK __memcpy_chk_ssse3_back
#endif
@@ -48,7 +52,7 @@
ud2
.section .text.ssse3,"ax",@progbits
-#if !defined USE_AS_BCOPY
+#if !defined USE_AS_BCOPY && defined MEMCPY_CHK
ENTRY (MEMCPY_CHK)
cmpq %rdx, %rcx
jb HIDDEN_JUMPTARGET (__chk_fail)
@@ -56,6 +60,15 @@ END (MEMCPY_CHK)
#endif
ENTRY (MEMCPY)
+#ifdef __CHKP__
+ testq %rdx, %rdx
+ jz L(NoEntryCheck)
+ bndcl (%rdi), %bnd0
+ bndcu -1(%rdi, %rdx), %bnd0
+ bndcl (%rsi), %bnd1
+ bndcu -1(%rsi, %rdx), %bnd1
+#endif
+
mov %rdi, %rax
#ifdef USE_AS_MEMPCPY
add %rdx, %rax
@@ -87,6 +100,15 @@ L(bk_write):
BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
#endif
+#ifdef __CHKP__
+L(NoEntryCheck):
+ mov %rdi, %rax
+# ifdef USE_AS_MEMPCPY
+ add %rdx, %rax
+# endif
+ ret
+#endif
+
ALIGN (4)
L(144bytesormore):
diff --git a/sysdeps/x86_64/multiarch/memcpy.S b/sysdeps/x86_64/multiarch/memcpy.S
index a1e5031..34987b8 100644
--- a/sysdeps/x86_64/multiarch/memcpy.S
+++ b/sysdeps/x86_64/multiarch/memcpy.S
@@ -18,14 +18,15 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <sysdep.h>
-#include <shlib-compat.h>
-#include <init-arch.h>
+#ifndef __CHKP__
+# include <sysdep.h>
+# include <shlib-compat.h>
+# include <init-arch.h>
/* Define multiple versions only for the definition in lib and for
DSO. In static binaries we need memcpy before the initialization
happened. */
-#if defined SHARED && !defined NOT_IN_libc
+# if defined SHARED && !defined NOT_IN_libc
.text
ENTRY(__new_memcpy)
.type __new_memcpy, @gnu_indirect_function
@@ -43,37 +44,39 @@ ENTRY(__new_memcpy)
3: ret
END(__new_memcpy)
-# undef ENTRY
-# define ENTRY(name) \
+# undef ENTRY
+# define ENTRY(name) \
.type __memcpy_sse2, @function; \
.globl __memcpy_sse2; \
.hidden __memcpy_sse2; \
.p2align 4; \
__memcpy_sse2: cfi_startproc; \
CALL_MCOUNT
-# undef END
-# define END(name) \
+# undef END
+# define END(name) \
cfi_endproc; .size __memcpy_sse2, .-__memcpy_sse2
-# undef ENTRY_CHK
-# define ENTRY_CHK(name) \
+# undef ENTRY_CHK
+# define ENTRY_CHK(name) \
.type __memcpy_chk_sse2, @function; \
.globl __memcpy_chk_sse2; \
.p2align 4; \
__memcpy_chk_sse2: cfi_startproc; \
CALL_MCOUNT
-# undef END_CHK
-# define END_CHK(name) \
+# undef END_CHK
+# define END_CHK(name) \
cfi_endproc; .size __memcpy_chk_sse2, .-__memcpy_chk_sse2
-# undef libc_hidden_builtin_def
+# undef libc_hidden_builtin_def
/* It doesn't make sense to send libc-internal memcpy calls through a PLT.
The speedup we get from using SSSE3 instruction is likely eaten away
by the indirect call in the PLT. */
-# define libc_hidden_builtin_def(name) \
+# define libc_hidden_builtin_def(name) \
.globl __GI_memcpy; __GI_memcpy = __memcpy_sse2
versioned_symbol (libc, __new_memcpy, memcpy, GLIBC_2_14);
-#endif
+# endif
+
+# include "../memcpy.S"
-#include "../memcpy.S"
+#endif
diff --git a/sysdeps/x86_64/multiarch/memcpy_chk-c.c b/sysdeps/x86_64/multiarch/memcpy_chk-c.c
new file mode 100644
index 0000000..3bca281
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memcpy_chk-c.c
@@ -0,0 +1,3 @@
+#ifdef __CHKP__
+# include <debug/memcpy_chk.c>
+#endif
diff --git a/sysdeps/x86_64/multiarch/memcpy_chk.S b/sysdeps/x86_64/multiarch/memcpy_chk.S
index ad01d8c..5b03f20 100644
--- a/sysdeps/x86_64/multiarch/memcpy_chk.S
+++ b/sysdeps/x86_64/multiarch/memcpy_chk.S
@@ -18,14 +18,15 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <sysdep.h>
-#include <init-arch.h>
+#ifndef __CHKP__
+# include <sysdep.h>
+# include <init-arch.h>
/* Define multiple versions only for the definition in lib and for
DSO. There are no multiarch memcpy functions for static binaries.
*/
-#ifndef NOT_IN_libc
-# ifdef SHARED
+# ifndef NOT_IN_libc
+# ifdef SHARED
.text
ENTRY(__memcpy_chk)
.type __memcpy_chk, @gnu_indirect_function
@@ -41,7 +42,8 @@ ENTRY(__memcpy_chk)
leaq __memcpy_chk_ssse3_back(%rip), %rax
2: ret
END(__memcpy_chk)
-# else
-# include "../memcpy_chk.S"
+# else
+# include "../memcpy_chk.S"
+# endif
# endif
#endif
diff --git a/sysdeps/x86_64/multiarch/memmove-c.c b/sysdeps/x86_64/multiarch/memmove-c.c
new file mode 100644
index 0000000..63d779e
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memmove-c.c
@@ -0,0 +1,108 @@
+/* C-version of memmove for using when Intel MPX is enabled
+ in order to prosess with a buffer of pointers correctly.
+ Copyright (C) 2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifdef __CHKP__
+
+# include <stddef.h>
+
+void *
+__memmove (void *dst, const void *src, size_t n)
+{
+ const char *s = src;
+ char *d = dst;
+ void *ret = dst;
+ size_t offset_src = ((size_t) s) & (sizeof(size_t) - 1);
+ size_t offset_dst = ((size_t) d) & (sizeof(size_t) - 1);
+
+ if (offset_src != offset_dst)
+ {
+ if (s < d)
+ {
+ /* backward copying */
+ d += n;
+ s += n;
+ while (n--)
+ *--d = *--s;
+ }
+ else
+ /* forward copying */
+ while (n--)
+ *d++ = *s++;
+ }
+ else
+ {
+ if (s < d)
+ {
+ offset_src = (offset_src + (size_t)src) & (sizeof(size_t) - 1);
+ /* backward copying */
+ d += n;
+ s += n;
+ while (n-- && offset_src--)
+ *--d = *--s;
+ n++;
+ if (!n) return ret;
+ void **d1 = (void **)d;
+ void **s1 = (void **)s;
+ while (n >= sizeof(void *))
+ {
+ n -= sizeof(void *);
+ *--d1 = *--s1;
+ }
+ s = (char *)s1;
+ d = (char *)d1;
+ while (n--)
+ *--d = *--s;
+ }
+ else
+ {
+ if (offset_src) offset_src = sizeof(size_t) - offset_src;
+ /* forward copying */
+ while (n-- && offset_src--)
+ *d++ = *s++;
+ n++;
+ if (!n) return ret;
+ void **d1 = (void **)d;
+ void **s1 = (void **)s;
+ while (n >= sizeof(void *))
+ {
+ n -= sizeof(void *);
+ *d1++ = *s1++;
+ }
+ s = (char *)s1;
+ d = (char *)d1;
+ while (n--)
+ *d++ = *s++;
+ }
+ }
+ return ret;
+}
+
+weak_alias (__memmove, __libc_memmove)
+weak_alias (__memmove, __GI_memmove)
+weak_alias (__memmove, memmove)
+
+# if defined SHARED && !defined NOT_IN_libc
+# include <shlib-compat.h>
+# if SHLIB_COMPAT (libc, GLIBC_2_2_5, GLIBC_2_14)
+compat_symbol (libc, memmove, memcpy, GLIBC_2_2_5);
+# endif
+# endif
+
+weak_alias (__memmove, mpx_memmove_nochk)
+#endif
diff --git a/sysdeps/x86_64/multiarch/memmove-ssse3-back-1.S b/sysdeps/x86_64/multiarch/memmove-ssse3-back-1.S
new file mode 100644
index 0000000..45a8209
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memmove-ssse3-back-1.S
@@ -0,0 +1,5 @@
+#ifdef __CHKP__
+# define USE_AS_MEMMOVE
+# define MEMCPY mpx_memmove_nobnd_nochk
+# include "memcpy-ssse3-back.S"
+#endif
diff --git a/sysdeps/x86_64/multiarch/memmove-ssse3-back.S b/sysdeps/x86_64/multiarch/memmove-ssse3-back.S
index f9a4e9a..53e90e7 100644
--- a/sysdeps/x86_64/multiarch/memmove-ssse3-back.S
+++ b/sysdeps/x86_64/multiarch/memmove-ssse3-back.S
@@ -1,4 +1,10 @@
#define USE_AS_MEMMOVE
-#define MEMCPY __memmove_ssse3_back
+#ifdef __CHKP__
+/* version of memmove with no copying of bounds support
+ if there are pointers in the source buffer. */
+# define MEMCPY mpx_memmove_nobnd
+# else
+# define MEMCPY __memmove_ssse3_back
+#endif
#define MEMCPY_CHK __memmove_chk_ssse3_back
#include "memcpy-ssse3-back.S"
diff --git a/sysdeps/x86_64/multiarch/memmove.c b/sysdeps/x86_64/multiarch/memmove.c
index 8149c48..0d2c6f0 100644
--- a/sysdeps/x86_64/multiarch/memmove.c
+++ b/sysdeps/x86_64/multiarch/memmove.c
@@ -17,31 +17,32 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#ifndef NOT_IN_libc
-# define MEMMOVE __memmove_sse2
-# ifdef SHARED
-# undef libc_hidden_builtin_def
-# define libc_hidden_builtin_def(name) \
+#ifndef __CHKP__
+# ifndef NOT_IN_libc
+# define MEMMOVE __memmove_sse2
+# ifdef SHARED
+# undef libc_hidden_builtin_def
+# define libc_hidden_builtin_def(name) \
__hidden_ver1 (__memmove_sse2, __GI_memmove, __memmove_sse2);
-# endif
+# endif
/* Redefine memmove so that the compiler won't complain about the type
mismatch with the IFUNC selector in strong_alias, below. */
-# undef memmove
-# define memmove __redirect_memmove
-# include <string.h>
-# undef memmove
+# undef memmove
+# define memmove __redirect_memmove
+# include <string.h>
+# undef memmove
extern __typeof (__redirect_memmove) __memmove_sse2 attribute_hidden;
extern __typeof (__redirect_memmove) __memmove_ssse3 attribute_hidden;
extern __typeof (__redirect_memmove) __memmove_ssse3_back attribute_hidden;
-#endif
+# endif
-#include "string/memmove.c"
+# include "string/memmove.c"
-#ifndef NOT_IN_libc
-# include <shlib-compat.h>
-# include "init-arch.h"
+# ifndef NOT_IN_libc
+# include <shlib-compat.h>
+# include "init-arch.h"
/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
ifunc symbol properly. */
@@ -54,7 +55,8 @@ libc_ifunc (__libc_memmove,
strong_alias (__libc_memmove, memmove)
-# if SHLIB_COMPAT (libc, GLIBC_2_2_5, GLIBC_2_14)
+# if SHLIB_COMPAT (libc, GLIBC_2_2_5, GLIBC_2_14)
compat_symbol (libc, memmove, memcpy, GLIBC_2_2_5);
+# endif
# endif
#endif
diff --git a/sysdeps/x86_64/multiarch/memmove_chk-c.c b/sysdeps/x86_64/multiarch/memmove_chk-c.c
new file mode 100644
index 0000000..bbf53d0
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/memmove_chk-c.c
@@ -0,0 +1 @@
+#include <debug/memmove_chk.c>
diff --git a/sysdeps/x86_64/multiarch/memmove_chk.c b/sysdeps/x86_64/multiarch/memmove_chk.c
index 17ed460..c1b0b93 100644
--- a/sysdeps/x86_64/multiarch/memmove_chk.c
+++ b/sysdeps/x86_64/multiarch/memmove_chk.c
@@ -17,19 +17,21 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <string.h>
-#include "init-arch.h"
+#ifndef __CHKP__
+# include <string.h>
+# include "init-arch.h"
-#define MEMMOVE_CHK __memmove_chk_sse2
+# define MEMMOVE_CHK __memmove_chk_sse2
extern __typeof (__memmove_chk) __memmove_chk_sse2 attribute_hidden;
extern __typeof (__memmove_chk) __memmove_chk_ssse3 attribute_hidden;
extern __typeof (__memmove_chk) __memmove_chk_ssse3_back attribute_hidden;
-#include "debug/memmove_chk.c"
+# include "debug/memmove_chk.c"
libc_ifunc (__memmove_chk,
HAS_SSSE3
? (HAS_FAST_COPY_BACKWARD
? __memmove_chk_ssse3_back : __memmove_chk_ssse3)
: __memmove_chk_sse2);
+#endif
diff --git a/sysdeps/x86_64/multiarch/mempcpy-c.c b/sysdeps/x86_64/multiarch/mempcpy-c.c
new file mode 100644
index 0000000..b9fcb11
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/mempcpy-c.c
@@ -0,0 +1,64 @@
+/* C-version of mempcpy for using when Intel MPX is enabled
+ in order to process with an array of pointers correctly.
+ Copyright (C) 2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#ifdef __CHKP__
+
+# include <stddef.h>
+
+void *
+mempcpy (void *dst, const void *src, size_t n)
+{
+ const char *s = src;
+ char *d = dst;
+ void *ret = dst + n;
+ size_t offset_src = ((size_t) s) & (sizeof(size_t) - 1);
+ size_t offset_dst = ((size_t) d) & (sizeof(size_t) - 1);
+
+ if (offset_src != offset_dst)
+ {
+ while (n--)
+ *d++ = *s++;
+ }
+ else
+ {
+ if (offset_src) offset_src = sizeof(size_t) - offset_src;
+ while (n-- && offset_src--)
+ *d++ = *s++;
+ n++;
+ if (!n) return ret;
+ void **d1 = (void **)d;
+ void **s1 = (void **)s;
+ while (n >= sizeof(void *))
+ {
+ n -= sizeof(void *);
+ *d1++ = *s1++;
+ }
+ s = (char *)s1;
+ d = (char *)d1;
+ while (n--)
+ *d++ = *s++;
+ }
+ return ret;
+}
+
+weak_alias (mempcpy, __GI_mempcpy)
+weak_alias (mempcpy, __GI___mempcpy)
+weak_alias (mempcpy, __mempcpy)
+weak_alias (mempcpy, mpx_mempcpy_nochk)
+#endif
diff --git a/sysdeps/x86_64/multiarch/mempcpy-ssse3-back-1.S b/sysdeps/x86_64/multiarch/mempcpy-ssse3-back-1.S
new file mode 100644
index 0000000..8fa99b5
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/mempcpy-ssse3-back-1.S
@@ -0,0 +1,6 @@
+#ifdef __CHKP__
+# define USE_AS_MEMPCPY
+/* the version of mempcpy without ant checks or copying bounds. */
+# define MEMCPY mpx_mempcpy_nobnd_nochk
+# include "memcpy-ssse3-back.S"
+#endif
diff --git a/sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S b/sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S
index 82ffacb..2aa5313 100644
--- a/sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S
+++ b/sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S
@@ -1,4 +1,12 @@
#define USE_AS_MEMPCPY
-#define MEMCPY __mempcpy_ssse3_back
-#define MEMCPY_CHK __mempcpy_chk_ssse3_back
+
+#ifdef __CHKP__
+/* version of mempcpy with no copying of bounds support
+ if there are pointers in the source buffer. */
+# define MEMCPY mpx_mempcpy_nobnd
+#else
+# define MEMCPY __mempcpy_ssse3_back
+#endif
+
+#define MEMCPY_CHK __mempcpy_chk_ssse3_back
#include "memcpy-ssse3-back.S"
diff --git a/sysdeps/x86_64/multiarch/mempcpy.S b/sysdeps/x86_64/multiarch/mempcpy.S
index b8b7fcd..b4bfbdc 100644
--- a/sysdeps/x86_64/multiarch/mempcpy.S
+++ b/sysdeps/x86_64/multiarch/mempcpy.S
@@ -18,13 +18,14 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <sysdep.h>
-#include <init-arch.h>
+#ifndef __CHKP__
+# include <sysdep.h>
+# include <init-arch.h>
/* Define multiple versions only for the definition in lib and for
DSO. In static binaries we need mempcpy before the initialization
happened. */
-#if defined SHARED && !defined NOT_IN_libc
+# if defined SHARED && !defined NOT_IN_libc
ENTRY(__mempcpy)
.type __mempcpy, @gnu_indirect_function
cmpl $0, KIND_OFFSET+__cpu_features(%rip)
@@ -40,38 +41,40 @@ ENTRY(__mempcpy)
2: ret
END(__mempcpy)
-# undef ENTRY
-# define ENTRY(name) \
+# undef ENTRY
+# define ENTRY(name) \
.type __mempcpy_sse2, @function; \
.p2align 4; \
.globl __mempcpy_sse2; \
.hidden __mempcpy_sse2; \
__mempcpy_sse2: cfi_startproc; \
CALL_MCOUNT
-# undef END
-# define END(name) \
+# undef END
+# define END(name) \
cfi_endproc; .size __mempcpy_sse2, .-__mempcpy_sse2
-# undef ENTRY_CHK
-# define ENTRY_CHK(name) \
+# undef ENTRY_CHK
+# define ENTRY_CHK(name) \
.type __mempcpy_chk_sse2, @function; \
.globl __mempcpy_chk_sse2; \
.p2align 4; \
__mempcpy_chk_sse2: cfi_startproc; \
CALL_MCOUNT
-# undef END_CHK
-# define END_CHK(name) \
+# undef END_CHK
+# define END_CHK(name) \
cfi_endproc; .size __mempcpy_chk_sse2, .-__mempcpy_chk_sse2
-# undef libc_hidden_def
-# undef libc_hidden_builtin_def
+# undef libc_hidden_def
+# undef libc_hidden_builtin_def
/* It doesn't make sense to send libc-internal mempcpy calls through a PLT.
The speedup we get from using SSSE3 instruction is likely eaten away
by the indirect call in the PLT. */
-# define libc_hidden_def(name) \
+# define libc_hidden_def(name) \
.globl __GI_mempcpy; __GI_mempcpy = __mempcpy_sse2
-# define libc_hidden_builtin_def(name) \
+# define libc_hidden_builtin_def(name) \
.globl __GI___mempcpy; __GI___mempcpy = __mempcpy_sse2
-#endif
+# endif
+
+# include "../mempcpy.S"
-#include "../mempcpy.S"
+#endif
diff --git a/sysdeps/x86_64/multiarch/mempcpy_chk-c.c b/sysdeps/x86_64/multiarch/mempcpy_chk-c.c
new file mode 100644
index 0000000..40ae725
--- /dev/null
+++ b/sysdeps/x86_64/multiarch/mempcpy_chk-c.c
@@ -0,0 +1,3 @@
+#ifdef __CHKP__
+# include <debug/mempcpy_chk.c>
+#endif
diff --git a/sysdeps/x86_64/multiarch/mempcpy_chk.S b/sysdeps/x86_64/multiarch/mempcpy_chk.S
index 3801db3..10653c5 100644
--- a/sysdeps/x86_64/multiarch/mempcpy_chk.S
+++ b/sysdeps/x86_64/multiarch/mempcpy_chk.S
@@ -18,14 +18,15 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
-#include <sysdep.h>
-#include <init-arch.h>
+#ifndef __CHKP__
+# include <sysdep.h>
+# include <init-arch.h>
/* Define multiple versions only for the definition in lib and for
DSO. There are no multiarch mempcpy functions for static binaries.
*/
-#ifndef NOT_IN_libc
-# ifdef SHARED
+# ifndef NOT_IN_libc
+# ifdef SHARED
.text
ENTRY(__mempcpy_chk)
.type __mempcpy_chk, @gnu_indirect_function
@@ -41,7 +42,8 @@ ENTRY(__mempcpy_chk)
leaq __mempcpy_chk_ssse3_back(%rip), %rax
2: ret
END(__mempcpy_chk)
-# else
-# include "../mempcpy_chk.S"
+# else
+# include "../mempcpy_chk.S"
+# endif
# endif
#endif
diff --git a/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
index 028c6d3..a3535ad 100644
--- a/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
+++ b/sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S
@@ -25,6 +25,14 @@
# define STRCAT __strcat_sse2_unaligned
# endif
+# ifdef __CHKP__
+# define RETURN \
+ bndcu -1(%rdi, %rax), %bnd0; \
+ ret
+# else
+# define RETURN ret
+# endif
+
# define USE_AS_STRCAT
.text
@@ -37,6 +45,10 @@ ENTRY (STRCAT)
/* Inline corresponding strlen file, temporary until new strcpy
implementation gets merged. */
+# ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+# endif
xor %rax, %rax
mov %edi, %ecx
and $0x3f, %ecx
@@ -67,84 +79,132 @@ L(align16_start):
pxor %xmm1, %xmm1
pxor %xmm2, %xmm2
pxor %xmm3, %xmm3
+# ifdef __CHKP__
+ bndcu 16(%rax), %bnd0
+# endif
pcmpeqb 16(%rax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(exit16)
+# ifdef __CHKP__
+ bndcu 32(%rax), %bnd0
+# endif
pcmpeqb 32(%rax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
jnz L(exit32)
+# ifdef __CHKP__
+ bndcu 48(%rax), %bnd0
+# endif
pcmpeqb 48(%rax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
jnz L(exit48)
+# ifdef __CHKP__
+ bndcu 64(%rax), %bnd0
+# endif
pcmpeqb 64(%rax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
jnz L(exit64)
+# ifdef __CHKP__
+ bndcu 80(%rax), %bnd0
+# endif
pcmpeqb 80(%rax), %xmm0
add $64, %rax
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(exit16)
+# ifdef __CHKP__
+ bndcu 32(%rax), %bnd0
+# endif
pcmpeqb 32(%rax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
jnz L(exit32)
+# ifdef __CHKP__
+ bndcu 48(%rax), %bnd0
+# endif
pcmpeqb 48(%rax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
jnz L(exit48)
+# ifdef __CHKP__
+ bndcu 64(%rax), %bnd0
+# endif
pcmpeqb 64(%rax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
jnz L(exit64)
+# ifdef __CHKP__
+ bndcu 80(%rax), %bnd0
+# endif
pcmpeqb 80(%rax), %xmm0
add $64, %rax
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(exit16)
+# ifdef __CHKP__
+ bndcu 32(%rax), %bnd0
+# endif
pcmpeqb 32(%rax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
jnz L(exit32)
+# ifdef __CHKP__
+ bndcu 48(%rax), %bnd0
+# endif
pcmpeqb 48(%rax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
jnz L(exit48)
+# ifdef __CHKP__
+ bndcu 64(%rax), %bnd0
+# endif
pcmpeqb 64(%rax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
jnz L(exit64)
+# ifdef __CHKP__
+ bndcu 80(%rax), %bnd0
+# endif
pcmpeqb 80(%rax), %xmm0
add $64, %rax
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(exit16)
+# ifdef __CHKP__
+ bndcu 32(%rax), %bnd0
+# endif
pcmpeqb 32(%rax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
jnz L(exit32)
+# ifdef __CHKP__
+ bndcu 48(%rax), %bnd0
+# endif
pcmpeqb 48(%rax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
jnz L(exit48)
+# ifdef __CHKP__
+ bndcu 64(%rax), %bnd0
+# endif
pcmpeqb 64(%rax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
@@ -153,6 +213,9 @@ L(align16_start):
test $0x3f, %rax
jz L(align64_loop)
+# ifdef __CHKP__
+ bndcu 80(%rax), %bnd0
+# endif
pcmpeqb 80(%rax), %xmm0
add $80, %rax
pmovmskb %xmm0, %edx
@@ -162,6 +225,9 @@ L(align16_start):
test $0x3f, %rax
jz L(align64_loop)
+# ifdef __CHKP__
+ bndcu 16(%rax), %bnd0
+# endif
pcmpeqb 16(%rax), %xmm1
add $16, %rax
pmovmskb %xmm1, %edx
@@ -171,6 +237,9 @@ L(align16_start):
test $0x3f, %rax
jz L(align64_loop)
+# ifdef __CHKP__
+ bndcu 16(%rax), %bnd0
+# endif
pcmpeqb 16(%rax), %xmm2
add $16, %rax
pmovmskb %xmm2, %edx
@@ -180,6 +249,9 @@ L(align16_start):
test $0x3f, %rax
jz L(align64_loop)
+# ifdef __CHKP__
+ bndcu 16(%rax), %bnd0
+# endif
pcmpeqb 16(%rax), %xmm3
add $16, %rax
pmovmskb %xmm3, %edx
@@ -187,8 +259,12 @@ L(align16_start):
jnz L(exit)
add $16, %rax
+
.p2align 4
L(align64_loop):
+# ifdef __CHKP__
+ bndcu (%rax), %bnd0
+# endif
movaps (%rax), %xmm4
pminub 16(%rax), %xmm4
movaps 32(%rax), %xmm5
diff --git a/sysdeps/x86_64/multiarch/strchr.S b/sysdeps/x86_64/multiarch/strchr.S
index f170238..4311e86 100644
--- a/sysdeps/x86_64/multiarch/strchr.S
+++ b/sysdeps/x86_64/multiarch/strchr.S
@@ -91,6 +91,10 @@ __strchr_sse42:
CALL_MCOUNT
testb %sil, %sil
je __strend_sse4
+# ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+# endif
pxor %xmm2, %xmm2
movd %esi, %xmm1
movl %edi, %ecx
@@ -124,6 +128,9 @@ __strchr_sse42:
ja L(return_null)
L(unaligned_match):
addq %rdi, %rax
+# ifdef __CHKP__
+ bndcu (%rax), %bnd0
+# endif
ret
.p2align 4
@@ -135,15 +142,27 @@ L(unaligned_no_match):
L(loop):
addq $16, %r8
L(aligned_start):
+# ifdef __CHKP__
+ bndcu (%r8), %bnd0
+# endif
pcmpistri $0x2, (%r8), %xmm1
jbe L(wrap)
addq $16, %r8
+# ifdef __CHKP__
+ bndcu (%r8), %bnd0
+# endif
pcmpistri $0x2, (%r8), %xmm1
jbe L(wrap)
addq $16, %r8
+# ifdef __CHKP__
+ bndcu (%r8), %bnd0
+# endif
pcmpistri $0x2, (%r8), %xmm1
jbe L(wrap)
addq $16, %r8
+# ifdef __CHKP__
+ bndcu (%r8), %bnd0
+# endif
pcmpistri $0x2, (%r8), %xmm1
jbe L(wrap)
jmp L(loop)
@@ -159,6 +178,9 @@ L(return_null):
.p2align 4
L(loop_exit):
leaq (%r8,%rcx), %rax
+# ifdef __CHKP__
+ bndcu (%rax), %bnd0
+# endif
ret
cfi_endproc
.size __strchr_sse42, .-__strchr_sse42
diff --git a/sysdeps/x86_64/multiarch/strcmp-sse42.S b/sysdeps/x86_64/multiarch/strcmp-sse42.S
index c84f1c2..edfa915 100644
--- a/sysdeps/x86_64/multiarch/strcmp-sse42.S
+++ b/sysdeps/x86_64/multiarch/strcmp-sse42.S
@@ -127,6 +127,14 @@ STRCMP_SSE42:
je LABEL(Byte0)
mov %rdx, %r11
#endif
+
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+ bndcl (%rsi), %bnd1
+ bndcu (%rsi), %bnd1
+#endif
+
mov %esi, %ecx
mov %edi, %eax
/* Use 64bit AND here to avoid long NOP padding. */
@@ -210,6 +218,10 @@ LABEL(touppermask):
#endif
add $16, %rsi /* prepare to search next 16 bytes */
add $16, %rdi /* prepare to search next 16 bytes */
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+#endif
/*
* Determine source and destination string offsets from 16-byte
@@ -231,6 +243,11 @@ LABEL(crosscache):
mov %edx, %r8d /* r8d is offset flag for exit tail */
xchg %ecx, %eax
xchg %rsi, %rdi
+#ifdef __CHKP__
+ bndmov %bnd0, %bnd2
+ bndmov %bnd1, %bnd0
+ bndmov %bnd2, %bnd1
+#endif
LABEL(bigger):
movdqa (%rdi), %xmm2
movdqa (%rsi), %xmm1
@@ -280,6 +297,10 @@ LABEL(ashr_0):
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
.p2align 4
LABEL(ashr_0_use):
+#ifdef __CHKP__
+ bndcu -1(%rdi, %rdx), %bnd0
+ bndcu -1(%rsi, %rdx), %bnd1
+#endif
movdqa (%rdi,%rdx), %xmm0
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
@@ -295,6 +316,10 @@ LABEL(ashr_0_use):
jbe LABEL(strcmp_exitz)
#endif
+#ifdef __CHKP__
+ bndcu -1(%rdi, %rdx), %bnd0
+ bndcu -1(%rsi, %rdx), %bnd1
+#endif
movdqa (%rdi,%rdx), %xmm0
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
@@ -320,6 +345,10 @@ LABEL(ashr_0_exit_use):
jbe LABEL(strcmp_exitz)
#endif
lea -16(%rdx, %rcx), %rcx
+#ifdef __CHKP__
+ bndcu -1(%rdi, %rcx), %bnd0
+ bndcu -1(%rsi, %rcx), %bnd1
+#endif
movzbl (%rdi, %rcx), %eax
movzbl (%rsi, %rcx), %edx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
@@ -362,6 +391,15 @@ LABEL(ashr_1):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_1_use)
+LABEL(ashr_1_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_1_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_1_use):
@@ -416,7 +454,11 @@ LABEL(nibble_ashr_1_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $14, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_1_check)
+#else
ja LABEL(nibble_ashr_1_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -450,6 +492,15 @@ LABEL(ashr_2):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_2_use)
+LABEL(ashr_2_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_2_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_2_use):
@@ -504,7 +555,11 @@ LABEL(nibble_ashr_2_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $13, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_2_check)
+#else
ja LABEL(nibble_ashr_2_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -539,6 +594,15 @@ LABEL(ashr_3):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_3_use)
+LABEL(ashr_3_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_3_restart_use)
+#endif
LABEL(loop_ashr_3_use):
add $16, %r10
@@ -592,7 +656,11 @@ LABEL(nibble_ashr_3_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $12, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_3_check)
+#else
ja LABEL(nibble_ashr_3_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -627,6 +695,15 @@ LABEL(ashr_4):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_4_use)
+LABEL(ashr_4_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_4_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_4_use):
@@ -681,7 +758,11 @@ LABEL(nibble_ashr_4_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $11, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_4_check)
+#else
ja LABEL(nibble_ashr_4_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -716,6 +797,15 @@ LABEL(ashr_5):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_5_use)
+LABEL(ashr_5_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_5_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_5_use):
@@ -771,7 +861,11 @@ LABEL(nibble_ashr_5_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $10, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_5_check)
+#else
ja LABEL(nibble_ashr_5_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -806,6 +900,15 @@ LABEL(ashr_6):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_6_use)
+LABEL(ashr_6_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_6_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_6_use):
@@ -860,7 +963,11 @@ LABEL(nibble_ashr_6_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $9, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_6_check)
+#else
ja LABEL(nibble_ashr_6_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -895,6 +1002,15 @@ LABEL(ashr_7):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_7_use)
+LABEL(ashr_7_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_7_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_7_use):
@@ -949,7 +1065,11 @@ LABEL(nibble_ashr_7_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $8, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_7_check)
+#else
ja LABEL(nibble_ashr_7_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -984,6 +1104,15 @@ LABEL(ashr_8):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_8_use)
+LABEL(ashr_8_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_8_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_8_use):
@@ -1038,7 +1167,11 @@ LABEL(nibble_ashr_8_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $7, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_8_check)
+#else
ja LABEL(nibble_ashr_8_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -1073,6 +1206,15 @@ LABEL(ashr_9):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_9_use)
+LABEL(ashr_9_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_9_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_9_use):
@@ -1128,7 +1270,11 @@ LABEL(nibble_ashr_9_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $6, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_9_check)
+#else
ja LABEL(nibble_ashr_9_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -1163,6 +1309,15 @@ LABEL(ashr_10):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_10_use)
+LABEL(ashr_10_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_10_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_10_use):
@@ -1217,7 +1372,11 @@ LABEL(nibble_ashr_10_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $5, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_10_check)
+#else
ja LABEL(nibble_ashr_10_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -1252,6 +1411,15 @@ LABEL(ashr_11):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_11_use)
+LABEL(ashr_11_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_11_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_11_use):
@@ -1306,7 +1474,11 @@ LABEL(nibble_ashr_11_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $4, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_11_check)
+#else
ja LABEL(nibble_ashr_11_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -1341,6 +1513,15 @@ LABEL(ashr_12):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_12_use)
+LABEL(ashr_12_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_12_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_12_use):
@@ -1395,7 +1576,11 @@ LABEL(nibble_ashr_12_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $3, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_12_check)
+#else
ja LABEL(nibble_ashr_12_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -1431,6 +1616,15 @@ LABEL(ashr_13):
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_13_use)
+LABEL(ashr_13_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_13_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_13_use):
@@ -1485,7 +1679,11 @@ LABEL(nibble_ashr_13_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $2, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_13_check)
+#else
ja LABEL(nibble_ashr_13_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -1521,6 +1719,15 @@ LABEL(ashr_14):
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_14_use)
+LABEL(ashr_14_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_14_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_14_use):
@@ -1575,7 +1782,11 @@ LABEL(nibble_ashr_14_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $1, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_14_check)
+#else
ja LABEL(nibble_ashr_14_restart_use)
+#endif
jmp LABEL(nibble_ashr_exit_use)
@@ -1613,6 +1824,15 @@ LABEL(ashr_15):
sub $0x1000, %r10 /* subtract 4K pagesize */
mov %rcx, %rdx /* only for offset of sse4 instruction loop*/
+#ifdef __CHKP__
+ bndcu -16(%rdi, %rdx), %bnd0
+ bndcu -16(%rsi, %rdx), %bnd1
+ jmp LABEL(loop_ashr_15_use)
+LABEL(ashr_15_check):
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+ jmp LABEL(nibble_ashr_15_restart_use)
+#endif
.p2align 4
LABEL(loop_ashr_15_use):
@@ -1667,7 +1887,11 @@ LABEL(nibble_ashr_15_use):
jae LABEL(nibble_ashr_exit_use)
#endif
cmp $0, %ecx
+#ifdef __CHKP__
+ ja LABEL(ashr_15_check)
+#else
ja LABEL(nibble_ashr_15_restart_use)
+#endif
LABEL(nibble_ashr_exit_use):
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
@@ -1691,6 +1915,11 @@ LABEL(exit_use):
test %r8d, %r8d
jz LABEL(ret_use)
xchg %eax, %edx
+#ifdef __CHKP__
+ bndmov %bnd0, %bnd2
+ bndmov %bnd1, %bnd0
+ bndmov %bnd2, %bnd1
+#endif
LABEL(ret_use):
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rcx
@@ -1707,6 +1936,11 @@ LABEL(less32bytes):
test %r8d, %r8d
jz LABEL(ret)
xchg %rsi, %rdi /* recover original order according to flag(%r8d) */
+#ifdef __CHKP__
+ bndmov %bnd0, %bnd2
+ bndmov %bnd1, %bnd0
+ bndmov %bnd2, %bnd1
+#endif
.p2align 4
LABEL(ret):
@@ -1717,6 +1951,10 @@ LABEL(less16bytes):
sub %rdx, %r11
jbe LABEL(strcmp_exitz)
#endif
+#ifdef __CHKP__
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+#endif
movzbl (%rsi, %rdx), %ecx
movzbl (%rdi, %rdx), %eax
diff --git a/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S b/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
index 7710173..e6baee9 100644
--- a/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
+++ b/sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S
@@ -33,7 +33,7 @@
lea TABLE(%rip), %r11; \
movslq (%r11, INDEX, SCALE), %rcx; \
lea (%r11, %rcx), %rcx; \
- jmp *%rcx
+ jmp *%rcx
# ifndef USE_AS_STRCAT
@@ -51,6 +51,16 @@ ENTRY (STRCPY)
# endif
+# ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+ bndcl (%rsi), %bnd1
+ bndcu (%rsi), %bnd1
+# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
+ bndcu -1(%rdi, %rdx), %bnd0
+# endif
+# endif
+
and $63, %rcx
cmp $32, %rcx
jbe L(SourceStringAlignmentLess32)
@@ -79,6 +89,9 @@ ENTRY (STRCPY)
test %rdx, %rdx
jnz L(CopyFrom1To16BytesTail)
+# ifdef __CHKP__
+ bndcu 16(%rsi), %bnd1
+# endif
pcmpeqb 16(%rsi), %xmm0
pmovmskb %xmm0, %rdx
@@ -91,6 +104,9 @@ ENTRY (STRCPY)
jnz L(CopyFrom1To32Bytes)
movdqu (%rsi, %rcx), %xmm1 /* copy 16 bytes */
+# ifdef __CHKP__
+ bndcu 15(%rdi), %bnd0
+# endif
movdqu %xmm1, (%rdi)
/* If source address alignment != destination address alignment */
@@ -101,6 +117,10 @@ L(Unalign16Both):
add %rcx, %r8
# endif
mov $16, %rcx
+# ifdef __CHKP__
+ bndcu 16(%rsi, %rcx), %bnd1
+ bndcu 15(%rdi, %rcx), %bnd0
+# endif
movdqa (%rsi, %rcx), %xmm1
movaps 16(%rsi, %rcx), %xmm2
movdqu %xmm1, (%rdi, %rcx)
@@ -118,6 +138,10 @@ L(Unalign16Both):
jnz L(CopyFrom1To16Bytes)
# endif
+# ifdef __CHKP__
+ bndcu 16(%rsi, %rcx), %bnd1
+ bndcu 15(%rdi, %rcx), %bnd0
+# endif
movaps 16(%rsi, %rcx), %xmm3
movdqu %xmm2, (%rdi, %rcx)
pcmpeqb %xmm3, %xmm0
@@ -134,6 +158,10 @@ L(Unalign16Both):
jnz L(CopyFrom1To16Bytes)
# endif
+# ifdef __CHKP__
+ bndcu 16(%rsi, %rcx), %bnd1
+ bndcu 15(%rdi, %rcx), %bnd0
+# endif
movaps 16(%rsi, %rcx), %xmm4
movdqu %xmm3, (%rdi, %rcx)
pcmpeqb %xmm4, %xmm0
@@ -150,6 +178,10 @@ L(Unalign16Both):
jnz L(CopyFrom1To16Bytes)
# endif
+# ifdef __CHKP__
+ bndcu 16(%rsi, %rcx), %bnd1
+ bndcu 15(%rdi, %rcx), %bnd0
+# endif
movaps 16(%rsi, %rcx), %xmm1
movdqu %xmm4, (%rdi, %rcx)
pcmpeqb %xmm1, %xmm0
@@ -166,6 +198,10 @@ L(Unalign16Both):
jnz L(CopyFrom1To16Bytes)
# endif
+# ifdef __CHKP__
+ bndcu 16(%rsi, %rcx), %bnd1
+ bndcu 15(%rdi, %rcx), %bnd0
+# endif
movaps 16(%rsi, %rcx), %xmm2
movdqu %xmm1, (%rdi, %rcx)
pcmpeqb %xmm2, %xmm0
@@ -182,6 +218,10 @@ L(Unalign16Both):
jnz L(CopyFrom1To16Bytes)
# endif
+# ifdef __CHKP__
+ bndcu 16(%rsi, %rcx), %bnd1
+ bndcu 15(%rdi, %rcx), %bnd0
+# endif
movaps 16(%rsi, %rcx), %xmm3
movdqu %xmm2, (%rdi, %rcx)
pcmpeqb %xmm3, %xmm0
@@ -198,6 +238,10 @@ L(Unalign16Both):
jnz L(CopyFrom1To16Bytes)
# endif
+# ifdef __CHKP__
+ bndcu 16(%rsi, %rcx), %bnd1
+ bndcu 15(%rdi, %rcx), %bnd0
+# endif
movdqu %xmm3, (%rdi, %rcx)
mov %rsi, %rdx
lea 16(%rsi, %rcx), %rsi
@@ -208,6 +252,9 @@ L(Unalign16Both):
lea 128(%r8, %rdx), %r8
# endif
L(Unaligned64Loop):
+# ifdef __CHKP__
+ bndcu 48(%rsi), %bnd1
+# endif
movaps (%rsi), %xmm2
movaps %xmm2, %xmm4
movaps 16(%rsi), %xmm5
@@ -229,6 +276,10 @@ L(Unaligned64Loop):
L(Unaligned64Loop_start):
add $64, %rdi
add $64, %rsi
+# ifdef __CHKP__
+ bndcu (%rsi), %bnd1
+ bndcu (%rdi), %bnd0
+# endif
movdqu %xmm4, -64(%rdi)
movaps (%rsi), %xmm2
movdqa %xmm2, %xmm4
@@ -271,16 +322,28 @@ L(Unaligned64Leave):
jnz L(CopyFrom1To16BytesUnaligned_32)
bsf %rcx, %rdx
+# ifdef __CHKP__
+ bndcu 47(%rdi), %bnd0
+# endif
movdqu %xmm4, (%rdi)
movdqu %xmm5, 16(%rdi)
movdqu %xmm6, 32(%rdi)
# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
# ifdef USE_AS_STPCPY
+# ifdef __CHKP__
+ bndcu 48(%rdi, %rdx), %bnd0
+# endif
lea 48(%rdi, %rdx), %rax
# endif
+# ifdef __CHKP__
+ bndcu 63(%rdi), %bnd0
+# endif
movdqu %xmm7, 48(%rdi)
add $15, %r8
sub %rdx, %r8
+# ifdef __CHKP__
+ bndcu 49(%rdi, %rdx), %bnd0
+# endif
lea 49(%rdi, %rdx), %rdi
jmp L(StrncpyFillTailWithZero)
# else
@@ -309,6 +372,10 @@ L(SourceStringAlignmentLess32):
test %rdx, %rdx
jnz L(CopyFrom1To16BytesTail1)
+# ifdef __CHKP__
+ bndcu 16(%rsi), %bnd1
+ bndcu 15(%rdi), %bnd0
+# endif
pcmpeqb %xmm2, %xmm0
movdqu %xmm1, (%rdi)
pmovmskb %xmm0, %rdx
@@ -372,6 +439,9 @@ L(CopyFrom1To16BytesUnaligned_0):
# ifdef USE_AS_STPCPY
lea (%rdi, %rdx), %rax
# endif
+# ifdef __CHKP__
+ bndcu 15(%rdi), %bnd0
+# endif
movdqu %xmm4, (%rdi)
add $63, %r8
sub %rdx, %r8
@@ -384,6 +454,9 @@ L(CopyFrom1To16BytesUnaligned_0):
.p2align 4
L(CopyFrom1To16BytesUnaligned_16):
bsf %rcx, %rdx
+# ifdef __CHKP__
+ bndcu 31(%rdi), %bnd0
+# endif
movdqu %xmm4, (%rdi)
# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
# ifdef USE_AS_STPCPY
@@ -403,6 +476,9 @@ L(CopyFrom1To16BytesUnaligned_16):
.p2align 4
L(CopyFrom1To16BytesUnaligned_32):
bsf %rdx, %rdx
+# ifdef __CHKP__
+ bndcu 47(%rdi), %bnd0
+# endif
movdqu %xmm4, (%rdi)
movdqu %xmm5, 16(%rdi)
# if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
@@ -529,6 +605,9 @@ L(CopyFrom1To16BytesTail1Case2OrCase3):
.p2align 4
L(Exit1):
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+# endif
mov %dh, (%rdi)
# ifdef USE_AS_STPCPY
lea (%rdi), %rax
@@ -543,6 +622,9 @@ L(Exit1):
.p2align 4
L(Exit2):
mov (%rsi), %dx
+# ifdef __CHKP__
+ bndcu 1(%rdi), %bnd0
+# endif
mov %dx, (%rdi)
# ifdef USE_AS_STPCPY
lea 1(%rdi), %rax
@@ -557,6 +639,9 @@ L(Exit2):
.p2align 4
L(Exit3):
mov (%rsi), %cx
+# ifdef __CHKP__
+ bndcu 2(%rdi), %bnd0
+# endif
mov %cx, (%rdi)
mov %dh, 2(%rdi)
# ifdef USE_AS_STPCPY
@@ -572,6 +657,9 @@ L(Exit3):
.p2align 4
L(Exit4):
mov (%rsi), %edx
+# ifdef __CHKP__
+ bndcu 3(%rdi), %bnd0
+# endif
mov %edx, (%rdi)
# ifdef USE_AS_STPCPY
lea 3(%rdi), %rax
@@ -586,6 +674,9 @@ L(Exit4):
.p2align 4
L(Exit5):
mov (%rsi), %ecx
+# ifdef __CHKP__
+ bndcu 4(%rdi), %bnd0
+# endif
mov %dh, 4(%rdi)
mov %ecx, (%rdi)
# ifdef USE_AS_STPCPY
@@ -602,6 +693,9 @@ L(Exit5):
L(Exit6):
mov (%rsi), %ecx
mov 4(%rsi), %dx
+# ifdef __CHKP__
+ bndcu 5(%rdi), %bnd0
+# endif
mov %ecx, (%rdi)
mov %dx, 4(%rdi)
# ifdef USE_AS_STPCPY
@@ -618,6 +712,9 @@ L(Exit6):
L(Exit7):
mov (%rsi), %ecx
mov 3(%rsi), %edx
+# ifdef __CHKP__
+ bndcu 6(%rdi), %bnd0
+# endif
mov %ecx, (%rdi)
mov %edx, 3(%rdi)
# ifdef USE_AS_STPCPY
@@ -633,6 +730,9 @@ L(Exit7):
.p2align 4
L(Exit8):
mov (%rsi), %rdx
+# ifdef __CHKP__
+ bndcu 7(%rdi), %bnd0
+# endif
mov %rdx, (%rdi)
# ifdef USE_AS_STPCPY
lea 7(%rdi), %rax
@@ -647,6 +747,9 @@ L(Exit8):
.p2align 4
L(Exit9):
mov (%rsi), %rcx
+# ifdef __CHKP__
+ bndcu 8(%rdi), %bnd0
+# endif
mov %dh, 8(%rdi)
mov %rcx, (%rdi)
# ifdef USE_AS_STPCPY
@@ -663,6 +766,9 @@ L(Exit9):
L(Exit10):
mov (%rsi), %rcx
mov 8(%rsi), %dx
+# ifdef __CHKP__
+ bndcu 9(%rdi), %bnd0
+# endif
mov %rcx, (%rdi)
mov %dx, 8(%rdi)
# ifdef USE_AS_STPCPY
@@ -679,6 +785,9 @@ L(Exit10):
L(Exit11):
mov (%rsi), %rcx
mov 7(%rsi), %edx
+# ifdef __CHKP__
+ bndcu 10(%rdi), %bnd0
+# endif
mov %rcx, (%rdi)
mov %edx, 7(%rdi)
# ifdef USE_AS_STPCPY
@@ -695,6 +804,9 @@ L(Exit11):
L(Exit12):
mov (%rsi), %rcx
mov 8(%rsi), %edx
+# ifdef __CHKP__
+ bndcu 11(%rdi), %bnd0
+# endif
mov %rcx, (%rdi)
mov %edx, 8(%rdi)
# ifdef USE_AS_STPCPY
@@ -711,6 +823,9 @@ L(Exit12):
L(Exit13):
mov (%rsi), %rcx
mov 5(%rsi), %rdx
+# ifdef __CHKP__
+ bndcu 12(%rdi), %bnd0
+# endif
mov %rcx, (%rdi)
mov %rdx, 5(%rdi)
# ifdef USE_AS_STPCPY
@@ -727,6 +842,9 @@ L(Exit13):
L(Exit14):
mov (%rsi), %rcx
mov 6(%rsi), %rdx
+# ifdef __CHKP__
+ bndcu 13(%rdi), %bnd0
+# endif
mov %rcx, (%rdi)
mov %rdx, 6(%rdi)
# ifdef USE_AS_STPCPY
@@ -743,6 +861,9 @@ L(Exit14):
L(Exit15):
mov (%rsi), %rcx
mov 7(%rsi), %rdx
+# ifdef __CHKP__
+ bndcu 14(%rdi), %bnd0
+# endif
mov %rcx, (%rdi)
mov %rdx, 7(%rdi)
# ifdef USE_AS_STPCPY
@@ -758,6 +879,9 @@ L(Exit15):
.p2align 4
L(Exit16):
movdqu (%rsi), %xmm0
+# ifdef __CHKP__
+ bndcu 15(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
# ifdef USE_AS_STPCPY
lea 15(%rdi), %rax
@@ -772,6 +896,9 @@ L(Exit16):
.p2align 4
L(Exit17):
movdqu (%rsi), %xmm0
+# ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %dh, 16(%rdi)
# ifdef USE_AS_STPCPY
@@ -788,6 +915,9 @@ L(Exit17):
L(Exit18):
movdqu (%rsi), %xmm0
mov 16(%rsi), %cx
+# ifdef __CHKP__
+ bndcu 17(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %cx, 16(%rdi)
# ifdef USE_AS_STPCPY
@@ -804,6 +934,9 @@ L(Exit18):
L(Exit19):
movdqu (%rsi), %xmm0
mov 15(%rsi), %ecx
+# ifdef __CHKP__
+ bndcu 18(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %ecx, 15(%rdi)
# ifdef USE_AS_STPCPY
@@ -820,6 +953,9 @@ L(Exit19):
L(Exit20):
movdqu (%rsi), %xmm0
mov 16(%rsi), %ecx
+# ifdef __CHKP__
+ bndcu 19(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %ecx, 16(%rdi)
# ifdef USE_AS_STPCPY
@@ -836,6 +972,9 @@ L(Exit20):
L(Exit21):
movdqu (%rsi), %xmm0
mov 16(%rsi), %ecx
+# ifdef __CHKP__
+ bndcu 20(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %ecx, 16(%rdi)
mov %dh, 20(%rdi)
@@ -853,6 +992,9 @@ L(Exit21):
L(Exit22):
movdqu (%rsi), %xmm0
mov 14(%rsi), %rcx
+# ifdef __CHKP__
+ bndcu 21(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %rcx, 14(%rdi)
# ifdef USE_AS_STPCPY
@@ -869,6 +1011,9 @@ L(Exit22):
L(Exit23):
movdqu (%rsi), %xmm0
mov 15(%rsi), %rcx
+# ifdef __CHKP__
+ bndcu 22(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %rcx, 15(%rdi)
# ifdef USE_AS_STPCPY
@@ -885,6 +1030,9 @@ L(Exit23):
L(Exit24):
movdqu (%rsi), %xmm0
mov 16(%rsi), %rcx
+# ifdef __CHKP__
+ bndcu 23(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %rcx, 16(%rdi)
# ifdef USE_AS_STPCPY
@@ -901,6 +1049,9 @@ L(Exit24):
L(Exit25):
movdqu (%rsi), %xmm0
mov 16(%rsi), %rcx
+# ifdef __CHKP__
+ bndcu 24(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %rcx, 16(%rdi)
mov %dh, 24(%rdi)
@@ -919,6 +1070,9 @@ L(Exit26):
movdqu (%rsi), %xmm0
mov 16(%rsi), %rdx
mov 24(%rsi), %cx
+# ifdef __CHKP__
+ bndcu 25(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %rdx, 16(%rdi)
mov %cx, 24(%rdi)
@@ -937,6 +1091,9 @@ L(Exit27):
movdqu (%rsi), %xmm0
mov 16(%rsi), %rdx
mov 23(%rsi), %ecx
+# ifdef __CHKP__
+ bndcu 26(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %rdx, 16(%rdi)
mov %ecx, 23(%rdi)
@@ -955,6 +1112,9 @@ L(Exit28):
movdqu (%rsi), %xmm0
mov 16(%rsi), %rdx
mov 24(%rsi), %ecx
+# ifdef __CHKP__
+ bndcu 27(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
mov %rdx, 16(%rdi)
mov %ecx, 24(%rdi)
@@ -972,6 +1132,9 @@ L(Exit28):
L(Exit29):
movdqu (%rsi), %xmm0
movdqu 13(%rsi), %xmm2
+# ifdef __CHKP__
+ bndcu 28(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
movdqu %xmm2, 13(%rdi)
# ifdef USE_AS_STPCPY
@@ -988,6 +1151,9 @@ L(Exit29):
L(Exit30):
movdqu (%rsi), %xmm0
movdqu 14(%rsi), %xmm2
+# ifdef __CHKP__
+ bndcu 29(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
movdqu %xmm2, 14(%rdi)
# ifdef USE_AS_STPCPY
@@ -1004,6 +1170,9 @@ L(Exit30):
L(Exit31):
movdqu (%rsi), %xmm0
movdqu 15(%rsi), %xmm2
+# ifdef __CHKP__
+ bndcu 30(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
movdqu %xmm2, 15(%rdi)
# ifdef USE_AS_STPCPY
@@ -1020,6 +1189,9 @@ L(Exit31):
L(Exit32):
movdqu (%rsi), %xmm0
movdqu 16(%rsi), %xmm2
+# ifdef __CHKP__
+ bndcu 31(%rdi), %bnd0
+# endif
movdqu %xmm0, (%rdi)
movdqu %xmm2, 16(%rdi)
# ifdef USE_AS_STPCPY
diff --git a/sysdeps/x86_64/multiarch/strcspn-c.c b/sysdeps/x86_64/multiarch/strcspn-c.c
index 9c0dcf0..dfdde27 100644
--- a/sysdeps/x86_64/multiarch/strcspn-c.c
+++ b/sysdeps/x86_64/multiarch/strcspn-c.c
@@ -20,6 +20,8 @@
#include <nmmintrin.h>
#include <string.h>
#include "varshift.h"
+#ifdef __CHKP__
+#endif
/* We use 0x2:
_SIDD_SBYTE_OPS
@@ -84,6 +86,12 @@ STRCSPN_SSE42 (const char *s, const char *a)
if (*a == 0)
RETURN (NULL, strlen (s));
+#ifdef __CHKP__
+/* TODO: Implement MPX support for these vertorized version manually using mpx intrinsics */
+ a = __bnd_init_ptr_bounds(a);
+ s = __bnd_init_ptr_bounds(s);
+#endif
+
const char *aligned;
__m128i mask;
int offset = (int) ((size_t) a & 15);
diff --git a/sysdeps/x86_64/multiarch/strrchr.S b/sysdeps/x86_64/multiarch/strrchr.S
index 3f92a41..1fed105 100644
--- a/sysdeps/x86_64/multiarch/strrchr.S
+++ b/sysdeps/x86_64/multiarch/strrchr.S
@@ -97,6 +97,10 @@ __strrchr_sse42:
CALL_MCOUNT
testb %sil, %sil
je __strend_sse4
+# ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+# endif
xor %eax,%eax /* RAX has the last occurrence of s. */
movd %esi, %xmm1
punpcklbw %xmm1, %xmm1
@@ -135,6 +139,9 @@ L(unaligned_no_byte):
contain the NULL terminator. */
jg L(exit)
addq $16, %r8
+# ifdef __CHKP__
+ bndcu (%r8), %bnd0
+# endif
/* Loop start on aligned string. */
.p2align 4
@@ -142,6 +149,9 @@ L(loop):
pcmpistri $0x4a, (%r8), %xmm1
jbe L(match_or_eos)
addq $16, %r8
+# ifdef __CHKP__
+ bndcu (%r8), %bnd0
+# endif
jmp L(loop)
.p2align 4
L(match_or_eos):
@@ -149,11 +159,17 @@ L(match_or_eos):
L(match_no_eos):
leaq (%r8,%rcx), %rax
addq $16, %r8
+# ifdef __CHKP__
+ bndcu (%r8), %bnd0
+# endif
jmp L(loop)
.p2align 4
L(had_eos):
jnc L(exit)
leaq (%r8,%rcx), %rax
+# ifdef __CHKP__
+ bndcu (%rax), %bnd0
+# endif
.p2align 4
L(exit):
ret
diff --git a/sysdeps/x86_64/multiarch/strspn-c.c b/sysdeps/x86_64/multiarch/strspn-c.c
index 8128cb9..ecc3a3a 100644
--- a/sysdeps/x86_64/multiarch/strspn-c.c
+++ b/sysdeps/x86_64/multiarch/strspn-c.c
@@ -18,6 +18,8 @@
<http://www.gnu.org/licenses/>. */
#include <nmmintrin.h>
+#ifdef __CHKP__
+#endif
#include <string.h>
#include "varshift.h"
@@ -62,6 +64,12 @@ __strspn_sse42 (const char *s, const char *a)
if (*a == 0)
return 0;
+#ifdef __CHKP__
+/* TODO: Implement Intel MPX manual checks for these vertorized version using new intrinsics */
+ s = __bnd_init_ptr_bounds(s);
+ a = __bnd_init_ptr_bounds(a);
+#endif
+
const char *aligned;
__m128i mask;
int offset = (int) ((size_t) a & 15);
diff --git a/sysdeps/x86_64/multiarch/strstr.c b/sysdeps/x86_64/multiarch/strstr.c
index cd63b68..577744b 100644
--- a/sysdeps/x86_64/multiarch/strstr.c
+++ b/sysdeps/x86_64/multiarch/strstr.c
@@ -165,8 +165,14 @@ char *
__attribute__ ((section (".text.sse4.2")))
STRSTR_SSE42 (const unsigned char *s1, const unsigned char *s2)
{
-#define p1 s1
+#ifdef __CHKP__
+/* TODO: Implement Intel MPX manual checks for these vertorized version using new intrinsics */
+ unsigned char *p1 = __bnd_init_ptr_bounds(s1);
+ unsigned char *p2 = __bnd_init_ptr_bounds(s2);
+#else
+# define p1 s1
const unsigned char *p2 = s2;
+#endif
#ifndef STRCASESTR_NONASCII
if (__builtin_expect (p2[0] == '\0', 0))
diff --git a/sysdeps/x86_64/multiarch/wcscpy-ssse3.S b/sysdeps/x86_64/multiarch/wcscpy-ssse3.S
index b7de092..77889dd 100644
--- a/sysdeps/x86_64/multiarch/wcscpy-ssse3.S
+++ b/sysdeps/x86_64/multiarch/wcscpy-ssse3.S
@@ -25,13 +25,27 @@ ENTRY (__wcscpy_ssse3)
mov %rsi, %rcx
mov %rdi, %rdx
+# ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcl (%rsi), %bnd1
+ bndcu (%rsi), %bnd1
+# endif
cmpl $0, (%rcx)
jz L(Exit4)
+# ifdef __CHKP__
+ bndcu 4(%rcx), %bnd1
+# endif
cmpl $0, 4(%rcx)
jz L(Exit8)
+# ifdef __CHKP__
+ bndcu 8(%rcx), %bnd1
+# endif
cmpl $0, 8(%rcx)
jz L(Exit12)
+# ifdef __CHKP__
+ bndcu 12(%rcx), %bnd1
+# endif
cmpl $0, 12(%rcx)
jz L(Exit16)
@@ -40,10 +54,19 @@ ENTRY (__wcscpy_ssse3)
pxor %xmm0, %xmm0
mov (%rcx), %r9
+# ifdef __CHKP__
+ bndcu 7(%rdx), %bnd0
+# endif
mov %r9, (%rdx)
+# ifdef __CHKP__
+ bndcu (%rsi), %bnd1
+# endif
pcmpeqd (%rsi), %xmm0
mov 8(%rcx), %r9
+# ifdef __CHKP__
+ bndcu 15(%rdx), %bnd0
+# endif
mov %r9, 8(%rdx)
pmovmskb %xmm0, %rax
@@ -72,6 +95,10 @@ ENTRY (__wcscpy_ssse3)
jmp L(Shl12)
L(Align16Both):
+# ifdef __CHKP__
+ bndcu 16(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps (%rcx), %xmm1
movaps 16(%rcx), %xmm2
movaps %xmm1, (%rdx)
@@ -82,6 +109,10 @@ L(Align16Both):
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%rcx, %rsi), %bnd1
+ bndcu 15(%rdx, %rsi), %bnd0
+# endif
movaps 16(%rcx, %rsi), %xmm3
movaps %xmm2, (%rdx, %rsi)
pcmpeqd %xmm3, %xmm0
@@ -91,6 +122,10 @@ L(Align16Both):
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%rcx, %rsi), %bnd1
+ bndcu 15(%rdx, %rsi), %bnd0
+# endif
movaps 16(%rcx, %rsi), %xmm4
movaps %xmm3, (%rdx, %rsi)
pcmpeqd %xmm4, %xmm0
@@ -100,6 +135,10 @@ L(Align16Both):
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%rcx, %rsi), %bnd1
+ bndcu 15(%rdx, %rsi), %bnd0
+# endif
movaps 16(%rcx, %rsi), %xmm1
movaps %xmm4, (%rdx, %rsi)
pcmpeqd %xmm1, %xmm0
@@ -109,6 +148,10 @@ L(Align16Both):
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%rcx, %rsi), %bnd1
+ bndcu 15(%rdx, %rsi), %bnd0
+# endif
movaps 16(%rcx, %rsi), %xmm2
movaps %xmm1, (%rdx, %rsi)
pcmpeqd %xmm2, %xmm0
@@ -118,6 +161,10 @@ L(Align16Both):
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%rcx, %rsi), %bnd1
+ bndcu 15(%rdx, %rsi), %bnd0
+# endif
movaps 16(%rcx, %rsi), %xmm3
movaps %xmm2, (%rdx, %rsi)
pcmpeqd %xmm3, %xmm0
@@ -127,6 +174,10 @@ L(Align16Both):
test %rax, %rax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%rcx, %rsi), %bnd1
+ bndcu 15(%rdx, %rsi), %bnd0
+# endif
movaps %xmm3, (%rdx, %rsi)
mov %rcx, %rax
lea 16(%rcx, %rsi), %rcx
@@ -138,6 +189,10 @@ L(Align16Both):
.p2align 4
L(Aligned64Loop):
+# ifdef __CHKP__
+ bndcu (%rcx), %bnd1
+ bndcu 63(%rdx), %bnd0
+# endif
movaps (%rcx), %xmm2
movaps %xmm2, %xmm4
movaps 16(%rcx), %xmm5
@@ -168,6 +223,9 @@ L(Aligned64Leave):
pcmpeqd %xmm5, %xmm0
pmovmskb %xmm0, %rax
+# ifdef __CHKP__
+ bndcu -49(%rdx), %bnd0
+# endif
movaps %xmm4, -64(%rdx)
test %rax, %rax
lea 16(%rsi), %rsi
@@ -176,11 +234,17 @@ L(Aligned64Leave):
pcmpeqd %xmm6, %xmm0
pmovmskb %xmm0, %rax
+# ifdef __CHKP__
+ bndcu -33(%rdx), %bnd0
+# endif
movaps %xmm5, -48(%rdx)
test %rax, %rax
lea 16(%rsi), %rsi
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu -17(%rdx), %bnd0
+# endif
movaps %xmm6, -32(%rdx)
pcmpeqd %xmm7, %xmm0
@@ -190,11 +254,17 @@ L(Aligned64Leave):
jnz L(CopyFrom1To16Bytes)
mov $-0x40, %rsi
+# ifdef __CHKP__
+ bndcu -1(%rdx), %bnd0
+# endif
movaps %xmm7, -16(%rdx)
jmp L(Aligned64Loop)
.p2align 4
L(Shl4):
+# ifdef __CHKP__
+ bndcu 12(%rcx), %bnd1
+# endif
movaps -4(%rcx), %xmm1
movaps 12(%rcx), %xmm2
L(Shl4Start):
@@ -206,6 +276,10 @@ L(Shl4Start):
jnz L(Shl4LoopExit)
palignr $4, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 28(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
movaps 28(%rcx), %xmm2
@@ -219,6 +293,10 @@ L(Shl4Start):
jnz L(Shl4LoopExit)
palignr $4, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 28(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
movaps 28(%rcx), %xmm2
@@ -232,6 +310,10 @@ L(Shl4Start):
jnz L(Shl4LoopExit)
palignr $4, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 28(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
movaps 28(%rcx), %xmm2
@@ -244,6 +326,9 @@ L(Shl4Start):
jnz L(Shl4LoopExit)
palignr $4, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
lea 28(%rcx), %rcx
lea 16(%rdx), %rdx
@@ -258,6 +343,9 @@ L(Shl4Start):
.p2align 4
L(Shl4LoopStart):
+# ifdef __CHKP__
+ bndcu 12(%rcx), %bnd1
+# endif
movaps 12(%rcx), %xmm2
movaps 28(%rcx), %xmm3
movaps %xmm3, %xmm6
@@ -279,6 +367,9 @@ L(Shl4LoopStart):
lea 64(%rcx), %rcx
palignr $4, %xmm1, %xmm2
movaps %xmm7, %xmm1
+# ifdef __CHKP__
+ bndcu 63(%rdx), %bnd0
+# endif
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
@@ -287,6 +378,10 @@ L(Shl4LoopStart):
jmp L(Shl4LoopStart)
L(Shl4LoopExit):
+# ifdef __CHKP__
+ bndcu -4(%rcx), %bnd1
+ bndcu 11(%rdx), %bnd0
+# endif
movdqu -4(%rcx), %xmm1
mov $12, %rsi
movdqu %xmm1, -4(%rdx)
@@ -294,6 +389,9 @@ L(Shl4LoopExit):
.p2align 4
L(Shl8):
+# ifdef __CHKP__
+ bndcu 8(%rcx), %bnd1
+# endif
movaps -8(%rcx), %xmm1
movaps 8(%rcx), %xmm2
L(Shl8Start):
@@ -305,6 +403,10 @@ L(Shl8Start):
jnz L(Shl8LoopExit)
palignr $8, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 24(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
movaps 24(%rcx), %xmm2
@@ -318,6 +420,10 @@ L(Shl8Start):
jnz L(Shl8LoopExit)
palignr $8, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 24(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
movaps 24(%rcx), %xmm2
@@ -331,6 +437,10 @@ L(Shl8Start):
jnz L(Shl8LoopExit)
palignr $8, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 24(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
movaps 24(%rcx), %xmm2
@@ -343,6 +453,10 @@ L(Shl8Start):
jnz L(Shl8LoopExit)
palignr $8, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 24(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
lea 24(%rcx), %rcx
lea 16(%rdx), %rdx
@@ -357,6 +471,9 @@ L(Shl8Start):
.p2align 4
L(Shl8LoopStart):
+# ifdef __CHKP__
+ bndcu 8(%rcx), %bnd1
+# endif
movaps 8(%rcx), %xmm2
movaps 24(%rcx), %xmm3
movaps %xmm3, %xmm6
@@ -378,6 +495,9 @@ L(Shl8LoopStart):
lea 64(%rcx), %rcx
palignr $8, %xmm1, %xmm2
movaps %xmm7, %xmm1
+# ifdef __CHKP__
+ bndcu 63(%rdx), %bnd0
+# endif
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
@@ -386,6 +506,10 @@ L(Shl8LoopStart):
jmp L(Shl8LoopStart)
L(Shl8LoopExit):
+# ifdef __CHKP__
+ bndcu (%rcx), %bnd1
+ bndcu 7(%rdx), %bnd0
+# endif
mov (%rcx), %r9
mov $8, %rsi
mov %r9, (%rdx)
@@ -393,6 +517,9 @@ L(Shl8LoopExit):
.p2align 4
L(Shl12):
+# ifdef __CHKP__
+ bndcu 4(%rcx), %bnd1
+# endif
movaps -12(%rcx), %xmm1
movaps 4(%rcx), %xmm2
L(Shl12Start):
@@ -404,6 +531,10 @@ L(Shl12Start):
jnz L(Shl12LoopExit)
palignr $12, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 20(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
movaps 20(%rcx), %xmm2
@@ -417,6 +548,10 @@ L(Shl12Start):
jnz L(Shl12LoopExit)
palignr $12, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 20(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
movaps 20(%rcx), %xmm2
@@ -430,6 +565,10 @@ L(Shl12Start):
jnz L(Shl12LoopExit)
palignr $12, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 20(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
movaps 20(%rcx), %xmm2
@@ -442,6 +581,10 @@ L(Shl12Start):
jnz L(Shl12LoopExit)
palignr $12, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 20(%rcx), %bnd1
+ bndcu 15(%rdx), %bnd0
+# endif
movaps %xmm2, (%rdx)
lea 20(%rcx), %rcx
lea 16(%rdx), %rdx
@@ -456,6 +599,9 @@ L(Shl12Start):
.p2align 4
L(Shl12LoopStart):
+# ifdef __CHKP__
+ bndcu 4(%rcx), %bnd1
+# endif
movaps 4(%rcx), %xmm2
movaps 20(%rcx), %xmm3
movaps %xmm3, %xmm6
@@ -476,6 +622,9 @@ L(Shl12LoopStart):
lea 64(%rcx), %rcx
palignr $12, %xmm1, %xmm2
movaps %xmm7, %xmm1
+# ifdef __CHKP__
+ bndcu 63(%rdx), %bnd0
+# endif
movaps %xmm5, 48(%rdx)
movaps %xmm4, 32(%rdx)
movaps %xmm3, 16(%rdx)
@@ -484,6 +633,10 @@ L(Shl12LoopStart):
jmp L(Shl12LoopStart)
L(Shl12LoopExit):
+# ifdef __CHKP__
+ bndcu (%rcx), %bnd1
+ bndcu 3(%rdx), %bnd0
+# endif
mov (%rcx), %r9d
mov $4, %rsi
mov %r9d, (%rdx)
@@ -500,6 +653,9 @@ L(CopyFrom1To16Bytes):
jnz L(Exit4)
mov (%rcx), %rax
+# ifdef __CHKP__
+ bndcu 7(%rdx), %bnd0
+# endif
mov %rax, (%rdx)
mov %rdi, %rax
ret
@@ -510,6 +666,9 @@ L(ExitHigh):
jnz L(Exit12)
mov (%rcx), %rax
+# ifdef __CHKP__
+ bndcu 15(%rdx), %bnd0
+# endif
mov %rax, (%rdx)
mov 8(%rcx), %rax
mov %rax, 8(%rdx)
@@ -519,6 +678,9 @@ L(ExitHigh):
.p2align 4
L(Exit4):
movl (%rcx), %eax
+# ifdef __CHKP__
+ bndcu 3(%rdx), %bnd0
+# endif
movl %eax, (%rdx)
mov %rdi, %rax
ret
@@ -526,6 +688,9 @@ L(Exit4):
.p2align 4
L(Exit8):
mov (%rcx), %rax
+# ifdef __CHKP__
+ bndcu 7(%rdx), %bnd0
+# endif
mov %rax, (%rdx)
mov %rdi, %rax
ret
@@ -533,6 +698,9 @@ L(Exit8):
.p2align 4
L(Exit12):
mov (%rcx), %rax
+# ifdef __CHKP__
+ bndcu 11(%rdx), %bnd0
+# endif
mov %rax, (%rdx)
mov 8(%rcx), %eax
mov %eax, 8(%rdx)
@@ -542,6 +710,9 @@ L(Exit12):
.p2align 4
L(Exit16):
mov (%rcx), %rax
+# ifdef __CHKP__
+ bndcu 15(%rdx), %bnd0
+# endif
mov %rax, (%rdx)
mov 8(%rcx), %rax
mov %rax, 8(%rdx)
diff --git a/sysdeps/x86_64/rawmemchr.S b/sysdeps/x86_64/rawmemchr.S
index f4d5591..2f4cb25 100644
--- a/sysdeps/x86_64/rawmemchr.S
+++ b/sysdeps/x86_64/rawmemchr.S
@@ -20,11 +20,23 @@
#include <sysdep.h>
+#ifdef __CHKP__
+# define RETURN \
+ bndcu (%rax), %bnd0; \
+ ret
+#else
+# define RETURN ret
+#endif
+
.text
ENTRY (rawmemchr)
movd %rsi, %xmm1
mov %rdi, %rcx
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+#endif
+
punpcklbw %xmm1, %xmm1
punpcklbw %xmm1, %xmm1
@@ -63,7 +75,7 @@ L(crosscache):
add %rdi, %rax
add %rcx, %rax
- ret
+ RETURN
.p2align 4
L(unaligned_no_match):
@@ -71,24 +83,36 @@ L(unaligned_no_match):
.p2align 4
L(loop_prolog):
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
+#ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+#endif
movdqa 16(%rdi), %xmm2
pcmpeqb %xmm1, %xmm2
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(matches16)
+#ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+#endif
movdqa 32(%rdi), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(matches32)
+#ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+#endif
movdqa 48(%rdi), %xmm4
pcmpeqb %xmm1, %xmm4
add $64, %rdi
@@ -99,24 +123,36 @@ L(loop_prolog):
test $0x3f, %rdi
jz L(align64_loop)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %eax
test %eax, %eax
jnz L(matches)
+#ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+#endif
movdqa 16(%rdi), %xmm2
pcmpeqb %xmm1, %xmm2
pmovmskb %xmm2, %eax
test %eax, %eax
jnz L(matches16)
+#ifdef __CHKP__
+ bndcu 32(%rdi), %bnd0
+#endif
movdqa 32(%rdi), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
test %eax, %eax
jnz L(matches32)
+#ifdef __CHKP__
+ bndcu 48(%rdi), %bnd0
+#endif
movdqa 48(%rdi), %xmm3
pcmpeqb %xmm1, %xmm3
pmovmskb %xmm3, %eax
@@ -129,6 +165,9 @@ L(loop_prolog):
.p2align 4
L(align64_loop):
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
movdqa 16(%rdi), %xmm2
movdqa 32(%rdi), %xmm3
@@ -170,36 +209,36 @@ L(align64_loop):
pmovmskb %xmm1, %eax
bsf %eax, %eax
lea 48(%rdi, %rax), %rax
- ret
+ RETURN
.p2align 4
L(matches0):
bsf %eax, %eax
lea -16(%rax, %rdi), %rax
- ret
+ RETURN
.p2align 4
L(matches):
bsf %eax, %eax
add %rdi, %rax
- ret
+ RETURN
.p2align 4
L(matches16):
bsf %eax, %eax
lea 16(%rax, %rdi), %rax
- ret
+ RETURN
.p2align 4
L(matches32):
bsf %eax, %eax
lea 32(%rax, %rdi), %rax
- ret
+ RETURN
.p2align 4
L(return_null):
xor %rax, %rax
- ret
+ RETURN
END (rawmemchr)
diff --git a/sysdeps/x86_64/stpcpy_chk-c.c b/sysdeps/x86_64/stpcpy_chk-c.c
new file mode 100644
index 0000000..5de29f9
--- /dev/null
+++ b/sysdeps/x86_64/stpcpy_chk-c.c
@@ -0,0 +1,3 @@
+#ifdef __CHKP__
+# include <debug/stpcpy_chk.c>
+#endif
diff --git a/sysdeps/x86_64/stpcpy_chk.S b/sysdeps/x86_64/stpcpy_chk.S
index 905e8d7..d4a2764 100644
--- a/sysdeps/x86_64/stpcpy_chk.S
+++ b/sysdeps/x86_64/stpcpy_chk.S
@@ -1,3 +1,5 @@
-#define USE_AS_STPCPY_CHK
-#define STRCPY_CHK __stpcpy_chk
-#include <sysdeps/x86_64/strcpy_chk.S>
+#ifndef __CHKP__
+# define USE_AS_STPCPY_CHK
+# define STRCPY_CHK __stpcpy_chk
+# include <sysdeps/x86_64/strcpy_chk.S>
+#endif
diff --git a/sysdeps/x86_64/strcat.S b/sysdeps/x86_64/strcat.S
index 8bea6fb..7832379 100644
--- a/sysdeps/x86_64/strcat.S
+++ b/sysdeps/x86_64/strcat.S
@@ -25,6 +25,11 @@
.text
ENTRY (strcat)
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcl (%rsi), %bnd1
+#endif
+
movq %rdi, %rcx /* Dest. register. */
andl $7, %ecx /* mask alignment bits */
movq %rdi, %rax /* Duplicate destination pointer. */
@@ -36,7 +41,11 @@ ENTRY (strcat)
neg %ecx /* We need to align to 8 bytes. */
addl $8,%ecx
/* Search the first bytes directly. */
-0: cmpb $0x0,(%rax) /* is byte NUL? */
+0:
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
+ cmpb $0x0,(%rax) /* is byte NUL? */
je 2f /* yes => start copy */
incq %rax /* increment pointer */
decl %ecx
@@ -48,6 +57,9 @@ ENTRY (strcat)
.p2align 4
4:
/* First unroll. */
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
movq (%rax), %rcx /* get double word (= 8 bytes) in question */
addq $8,%rax /* adjust pointer for next word */
movq %r8, %rdx /* magic value */
@@ -62,6 +74,9 @@ ENTRY (strcat)
jnz 3f /* found NUL => return pointer */
/* Second unroll. */
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
movq (%rax), %rcx /* get double word (= 8 bytes) in question */
addq $8,%rax /* adjust pointer for next word */
movq %r8, %rdx /* magic value */
@@ -76,6 +91,9 @@ ENTRY (strcat)
jnz 3f /* found NUL => return pointer */
/* Third unroll. */
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
movq (%rax), %rcx /* get double word (= 8 bytes) in question */
addq $8,%rax /* adjust pointer for next word */
movq %r8, %rdx /* magic value */
@@ -90,6 +108,9 @@ ENTRY (strcat)
jnz 3f /* found NUL => return pointer */
/* Fourth unroll. */
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
movq (%rax), %rcx /* get double word (= 8 bytes) in question */
addq $8,%rax /* adjust pointer for next word */
movq %r8, %rdx /* magic value */
@@ -163,6 +184,9 @@ ENTRY (strcat)
.p2align 4
22:
/* 1st unroll. */
+#ifdef __CHKP__
+ bndcu (%rsi), %bnd1
+#endif
movq (%rsi), %rax /* Read double word (8 bytes). */
addq $8, %rsi /* Adjust pointer for next word. */
movq %rax, %r9 /* Save a copy for NUL finding. */
@@ -177,10 +201,16 @@ ENTRY (strcat)
jnz 23f /* found NUL => return pointer */
+#ifdef __CHKP__
+ bndcu (%rdx), %bnd0
+#endif
movq %rax, (%rdx) /* Write value to destination. */
addq $8, %rdx /* Adjust pointer. */
/* 2nd unroll. */
+#ifdef __CHKP__
+ bndcu (%rsi), %bnd1
+#endif
movq (%rsi), %rax /* Read double word (8 bytes). */
addq $8, %rsi /* Adjust pointer for next word. */
movq %rax, %r9 /* Save a copy for NUL finding. */
@@ -195,10 +225,16 @@ ENTRY (strcat)
jnz 23f /* found NUL => return pointer */
+#ifdef __CHKP__
+ bndcu (%rdx), %bnd0
+#endif
movq %rax, (%rdx) /* Write value to destination. */
addq $8, %rdx /* Adjust pointer. */
/* 3rd unroll. */
+#ifdef __CHKP__
+ bndcu (%rsi), %bnd1
+#endif
movq (%rsi), %rax /* Read double word (8 bytes). */
addq $8, %rsi /* Adjust pointer for next word. */
movq %rax, %r9 /* Save a copy for NUL finding. */
@@ -213,10 +249,16 @@ ENTRY (strcat)
jnz 23f /* found NUL => return pointer */
+#ifdef __CHKP__
+ bndcu (%rdx), %bnd0
+#endif
movq %rax, (%rdx) /* Write value to destination. */
addq $8, %rdx /* Adjust pointer. */
/* 4th unroll. */
+#ifdef __CHKP__
+ bndcu (%rsi), %bnd1
+#endif
movq (%rsi), %rax /* Read double word (8 bytes). */
addq $8, %rsi /* Adjust pointer for next word. */
movq %rax, %r9 /* Save a copy for NUL finding. */
@@ -231,6 +273,9 @@ ENTRY (strcat)
jnz 23f /* found NUL => return pointer */
+#ifdef __CHKP__
+ bndcu (%rdx), %bnd0
+#endif
movq %rax, (%rdx) /* Write value to destination. */
addq $8, %rdx /* Adjust pointer. */
jmp 22b /* Next iteration. */
@@ -239,10 +284,16 @@ ENTRY (strcat)
The loop is unrolled twice. */
.p2align 4
23:
+#ifdef __CHKP__
+ bndcu (%rdx), %bnd0
+#endif
movb %al, (%rdx) /* 1st byte. */
testb %al, %al /* Is it NUL. */
jz 24f /* yes, finish. */
incq %rdx /* Increment destination. */
+#ifdef __CHKP__
+ bndcu (%rdx), %bnd0
+#endif
movb %ah, (%rdx) /* 2nd byte. */
testb %ah, %ah /* Is it NUL?. */
jz 24f /* yes, finish. */
diff --git a/sysdeps/x86_64/strchr.S b/sysdeps/x86_64/strchr.S
index d89f1eb..8519a81 100644
--- a/sysdeps/x86_64/strchr.S
+++ b/sysdeps/x86_64/strchr.S
@@ -22,6 +22,10 @@
.text
ENTRY (strchr)
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+#endif
movd %esi, %xmm1
movq %rdi, %rcx
punpcklbw %xmm1, %xmm1
@@ -29,6 +33,9 @@ ENTRY (strchr)
pxor %xmm2, %xmm2
punpcklbw %xmm1, %xmm1
orl $0xffffffff, %esi
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
pshufd $0, %xmm1, %xmm1
subq %rdi, %rcx
@@ -44,7 +51,11 @@ ENTRY (strchr)
orl %edx, %ecx
jnz 1f
-2: movdqa (%rdi), %xmm0
+2:
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
+ movdqa (%rdi), %xmm0
leaq 16(%rdi), %rdi
movdqa %xmm0, %xmm3
pcmpeqb %xmm1, %xmm0
diff --git a/sysdeps/x86_64/strchrnul.S b/sysdeps/x86_64/strchrnul.S
index d8c345b..3e4abfa 100644
--- a/sysdeps/x86_64/strchrnul.S
+++ b/sysdeps/x86_64/strchrnul.S
@@ -23,6 +23,10 @@
.text
ENTRY (__strchrnul)
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+#endif
movd %esi, %xmm1
movq %rdi, %rcx
punpcklbw %xmm1, %xmm1
@@ -44,7 +48,11 @@ ENTRY (__strchrnul)
andl %esi, %ecx
jnz 1f
-2: movdqa (%rdi), %xmm0
+2:
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
+ movdqa (%rdi), %xmm0
leaq 16(%rdi), %rdi
movdqa %xmm0, %xmm3
pcmpeqb %xmm1, %xmm0
@@ -56,6 +64,9 @@ ENTRY (__strchrnul)
1: bsfl %ecx, %edx
leaq -16(%rdi,%rdx), %rax
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
ret
END (__strchrnul)
diff --git a/sysdeps/x86_64/strcmp.S b/sysdeps/x86_64/strcmp.S
index 7680937..ece49c9 100644
--- a/sysdeps/x86_64/strcmp.S
+++ b/sysdeps/x86_64/strcmp.S
@@ -128,7 +128,16 @@ libc_hidden_def (__strncasecmp)
ENTRY (STRCMP)
#ifdef NOT_IN_libc
/* Simple version since we can't use SSE registers in ld.so. */
-L(oop): movb (%rdi), %al
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcl (%rsi), %bnd1
+#endif
+L(oop):
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+#endif
+ movb (%rdi), %al
cmpb (%rsi), %al
jne L(neq)
incq %rdi
@@ -177,6 +186,12 @@ END (STRCMP)
je LABEL(Byte0)
mov %rdx, %r11
# endif
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+ bndcl (%rsi), %bnd1
+ bndcu (%rsi), %bnd1
+#endif
mov %esi, %ecx
mov %edi, %eax
/* Use 64bit AND here to avoid long NOP padding. */
@@ -243,6 +258,10 @@ END (STRCMP)
# endif
add $16, %rsi /* prepare to search next 16 bytes */
add $16, %rdi /* prepare to search next 16 bytes */
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+ bndcu (%rsi), %bnd1
+#endif
/*
* Determine source and destination string offsets from 16-byte alignment.
@@ -263,6 +282,11 @@ LABEL(crosscache):
mov %edx, %r8d /* r8d is offset flag for exit tail */
xchg %ecx, %eax
xchg %rsi, %rdi
+#ifdef __CHKP__
+ bndmov %bnd0, %bnd2
+ bndmov %bnd1, %bnd0
+ bndmov %bnd2, %bnd1
+#endif
LABEL(bigger):
lea 15(%rax), %r9
sub %rcx, %r9
@@ -310,6 +334,10 @@ LABEL(ashr_0):
*/
.p2align 4
LABEL(loop_ashr_0):
+#ifdef __CHKP__
+ bndcu -1(%rdi, %rcx), %bnd0
+ bndcu -1(%rsi, %rcx), %bnd1
+#endif
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
TOLOWER (%xmm1, %xmm2)
@@ -326,6 +354,10 @@ LABEL(loop_ashr_0):
jbe LABEL(strcmp_exitz)
# endif
add $16, %rcx
+#ifdef __CHKP__
+ bndcu -1(%rdi, %rcx), %bnd0
+ bndcu -1(%rsi, %rcx), %bnd1
+#endif
movdqa (%rsi, %rcx), %xmm1
movdqa (%rdi, %rcx), %xmm2
TOLOWER (%xmm1, %xmm2)
@@ -377,6 +409,15 @@ LABEL(ashr_1):
lea 1(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_1)
+LABEL(ashr_1_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_1)
+# endif
.p2align 4
LABEL(loop_ashr_1):
@@ -460,7 +501,11 @@ LABEL(nibble_ashr_1):
pxor %xmm0, %xmm0
sub $0x1000, %r10 /* substract 4K from %r10 */
+# ifdef __CHKP__
+ ja LABEL(ashr_1_check)
+# else
jmp LABEL(gobble_ashr_1)
+# endif
/*
* Once find null char, determine if there is a string mismatch
@@ -507,6 +552,15 @@ LABEL(ashr_2):
lea 2(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_2)
+LABEL(ashr_2_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_2)
+# endif
.p2align 4
LABEL(loop_ashr_2):
@@ -588,7 +642,11 @@ LABEL(nibble_ashr_2):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ ja LABEL(ashr_2_check)
+# else
jmp LABEL(gobble_ashr_2)
+# endif
.p2align 4
LABEL(ashr_2_exittail):
@@ -632,6 +690,15 @@ LABEL(ashr_3):
lea 3(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_3)
+LABEL(ashr_3_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_3)
+# endif
.p2align 4
LABEL(loop_ashr_3):
@@ -713,7 +780,11 @@ LABEL(nibble_ashr_3):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ ja LABEL(ashr_3_check)
+# else
jmp LABEL(gobble_ashr_3)
+# endif
.p2align 4
LABEL(ashr_3_exittail):
@@ -757,6 +828,15 @@ LABEL(ashr_4):
lea 4(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_4)
+LABEL(ashr_4_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_4)
+# endif
.p2align 4
LABEL(loop_ashr_4):
@@ -838,7 +918,11 @@ LABEL(nibble_ashr_4):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ ja LABEL(ashr_4_check)
+# else
jmp LABEL(gobble_ashr_4)
+# endif
.p2align 4
LABEL(ashr_4_exittail):
@@ -882,6 +966,15 @@ LABEL(ashr_5):
lea 5(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_5)
+LABEL(ashr_5_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_5)
+# endif
.p2align 4
LABEL(loop_ashr_5):
@@ -963,7 +1056,11 @@ LABEL(nibble_ashr_5):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ ja LABEL(ashr_5_check)
+# else
jmp LABEL(gobble_ashr_5)
+# endif
.p2align 4
LABEL(ashr_5_exittail):
@@ -1007,6 +1104,15 @@ LABEL(ashr_6):
lea 6(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_6)
+LABEL(ashr_6_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_6)
+# endif
.p2align 4
LABEL(loop_ashr_6):
@@ -1088,7 +1194,11 @@ LABEL(nibble_ashr_6):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ ja LABEL(ashr_6_check)
+# else
jmp LABEL(gobble_ashr_6)
+# endif
.p2align 4
LABEL(ashr_6_exittail):
@@ -1132,6 +1242,15 @@ LABEL(ashr_7):
lea 7(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_7)
+LABEL(ashr_7_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_7)
+# endif
.p2align 4
LABEL(loop_ashr_7):
@@ -1213,7 +1332,11 @@ LABEL(nibble_ashr_7):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ ja LABEL(ashr_7_check)
+# else
jmp LABEL(gobble_ashr_7)
+# endif
.p2align 4
LABEL(ashr_7_exittail):
@@ -1257,6 +1380,15 @@ LABEL(ashr_8):
lea 8(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_8)
+LABEL(ashr_8_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_8)
+# endif
.p2align 4
LABEL(loop_ashr_8):
@@ -1338,7 +1470,11 @@ LABEL(nibble_ashr_8):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ ja LABEL(ashr_8_check)
+# else
jmp LABEL(gobble_ashr_8)
+# endif
.p2align 4
LABEL(ashr_8_exittail):
@@ -1382,6 +1518,15 @@ LABEL(ashr_9):
lea 9(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_9)
+LABEL(ashr_9_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_9)
+# endif
.p2align 4
LABEL(loop_ashr_9):
@@ -1463,7 +1608,11 @@ LABEL(nibble_ashr_9):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ ja LABEL(ashr_9_check)
+# else
jmp LABEL(gobble_ashr_9)
+# endif
.p2align 4
LABEL(ashr_9_exittail):
@@ -1507,6 +1656,15 @@ LABEL(ashr_10):
lea 10(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_10)
+LABEL(ashr_10_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_10)
+# endif
.p2align 4
LABEL(loop_ashr_10):
@@ -1588,7 +1746,11 @@ LABEL(nibble_ashr_10):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ ja LABEL(ashr_10_check)
+# else
jmp LABEL(gobble_ashr_10)
+# endif
.p2align 4
LABEL(ashr_10_exittail):
@@ -1632,6 +1794,15 @@ LABEL(ashr_11):
lea 11(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_11)
+LABEL(ashr_11_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_11)
+# endif
.p2align 4
LABEL(loop_ashr_11):
@@ -1713,7 +1884,11 @@ LABEL(nibble_ashr_11):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ ja LABEL(ashr_11_check)
+# else
jmp LABEL(gobble_ashr_11)
+# endif
.p2align 4
LABEL(ashr_11_exittail):
@@ -1757,6 +1932,15 @@ LABEL(ashr_12):
lea 12(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_12)
+LABEL(ashr_12_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_12)
+# endif
.p2align 4
LABEL(loop_ashr_12):
@@ -1838,7 +2022,11 @@ LABEL(nibble_ashr_12):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ ja LABEL(ashr_12_check)
+# else
jmp LABEL(gobble_ashr_12)
+# endif
.p2align 4
LABEL(ashr_12_exittail):
@@ -1882,6 +2070,15 @@ LABEL(ashr_13):
lea 13(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_13)
+LABEL(ashr_13_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_13)
+# endif
.p2align 4
LABEL(loop_ashr_13):
@@ -1963,7 +2160,11 @@ LABEL(nibble_ashr_13):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ ja LABEL(ashr_13_check)
+# else
jmp LABEL(gobble_ashr_13)
+# endif
.p2align 4
LABEL(ashr_13_exittail):
@@ -2007,6 +2208,15 @@ LABEL(ashr_14):
lea 14(%rdi), %r10
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_14)
+LABEL(ashr_14_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_14)
+# endif
.p2align 4
LABEL(loop_ashr_14):
@@ -2088,7 +2298,11 @@ LABEL(nibble_ashr_14):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ ja LABEL(ashr_14_check)
+# else
jmp LABEL(gobble_ashr_14)
+# endif
.p2align 4
LABEL(ashr_14_exittail):
@@ -2134,6 +2348,15 @@ LABEL(ashr_15):
and $0xfff, %r10 /* offset into 4K page */
sub $0x1000, %r10 /* subtract 4K pagesize */
+# ifdef __CHKP__
+ bndcu -16(%rdi, %rcx), %bnd0
+ bndcu -16(%rsi, %rcx), %bnd1
+ jmp LABEL(loop_ashr_15)
+LABEL(ashr_15_check):
+ bndcu (%rdi, %rcx), %bnd0
+ bndcu (%rsi, %rcx), %bnd1
+ jmp LABEL(gobble_ashr_15)
+# endif
.p2align 4
LABEL(loop_ashr_15):
@@ -2215,7 +2438,11 @@ LABEL(nibble_ashr_15):
pxor %xmm0, %xmm0
sub $0x1000, %r10
+# ifdef __CHKP__
+ ja LABEL(ashr_15_check)
+# else
jmp LABEL(gobble_ashr_15)
+# endif
.p2align 4
LABEL(ashr_15_exittail):
@@ -2240,6 +2467,11 @@ LABEL(less32bytes):
test %r8d, %r8d
jz LABEL(ret)
xchg %rsi, %rdi /* recover original order according to flag(%r8d) */
+#ifdef __CHKP__
+ bndmov %bnd0, %bnd2
+ bndmov %bnd1, %bnd0
+ bndmov %bnd2, %bnd1
+#endif
.p2align 4
LABEL(ret):
@@ -2250,6 +2482,10 @@ LABEL(less16bytes):
sub %rdx, %r11
jbe LABEL(strcmp_exitz)
# endif
+/*#ifdef __CHKP__
+ bndcu (%rdi, %rdx), %bnd0
+ bndcu (%rsi, %rdx), %bnd1
+#endif*/
movzbl (%rsi, %rdx), %ecx
movzbl (%rdi, %rdx), %eax
diff --git a/sysdeps/x86_64/strcpy.S b/sysdeps/x86_64/strcpy.S
index 6128247..2b78e95 100644
--- a/sysdeps/x86_64/strcpy.S
+++ b/sysdeps/x86_64/strcpy.S
@@ -26,6 +26,10 @@
.text
ENTRY (STRCPY)
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcl (%rsi), %bnd1
+#endif
movq %rsi, %rcx /* Source register. */
andl $7, %ecx /* mask alignment bits */
movq %rdi, %rdx /* Duplicate destination pointer. */
@@ -36,8 +40,14 @@ ENTRY (STRCPY)
addl $8,%ecx
/* Search the first bytes directly. */
0:
+#ifdef __CHKP__
+ bndcu (%rsi), %bnd1
+#endif
movb (%rsi), %al /* Fetch a byte */
testb %al, %al /* Is it NUL? */
+#ifdef __CHKP__
+ bndcu (%rdx), %bnd0
+#endif
movb %al, (%rdx) /* Store it */
jz 4f /* If it was NUL, done! */
incq %rsi
@@ -54,6 +64,9 @@ ENTRY (STRCPY)
.p2align 4
1:
/* 1st unroll. */
+#ifdef __CHKP__
+ bndcu (%rsi), %bnd1
+#endif
movq (%rsi), %rax /* Read double word (8 bytes). */
addq $8, %rsi /* Adjust pointer for next word. */
movq %rax, %r9 /* Save a copy for NUL finding. */
@@ -68,10 +81,16 @@ ENTRY (STRCPY)
jnz 3f /* found NUL => return pointer */
+#ifdef __CHKP__
+ bndcu (%rdx), %bnd0
+#endif
movq %rax, (%rdx) /* Write value to destination. */
addq $8, %rdx /* Adjust pointer. */
/* 2nd unroll. */
+#ifdef __CHKP__
+ bndcu (%rsi), %bnd1
+#endif
movq (%rsi), %rax /* Read double word (8 bytes). */
addq $8, %rsi /* Adjust pointer for next word. */
movq %rax, %r9 /* Save a copy for NUL finding. */
@@ -86,10 +105,16 @@ ENTRY (STRCPY)
jnz 3f /* found NUL => return pointer */
+#ifdef __CHKP__
+ bndcu (%rdx), %bnd0
+#endif
movq %rax, (%rdx) /* Write value to destination. */
addq $8, %rdx /* Adjust pointer. */
/* 3rd unroll. */
+#ifdef __CHKP__
+ bndcu (%rsi), %bnd1
+#endif
movq (%rsi), %rax /* Read double word (8 bytes). */
addq $8, %rsi /* Adjust pointer for next word. */
movq %rax, %r9 /* Save a copy for NUL finding. */
@@ -104,10 +129,16 @@ ENTRY (STRCPY)
jnz 3f /* found NUL => return pointer */
+#ifdef __CHKP__
+ bndcu (%rdx), %bnd0
+#endif
movq %rax, (%rdx) /* Write value to destination. */
addq $8, %rdx /* Adjust pointer. */
/* 4th unroll. */
+#ifdef __CHKP__
+ bndcu (%rsi), %bnd1
+#endif
movq (%rsi), %rax /* Read double word (8 bytes). */
addq $8, %rsi /* Adjust pointer for next word. */
movq %rax, %r9 /* Save a copy for NUL finding. */
@@ -122,6 +153,9 @@ ENTRY (STRCPY)
jnz 3f /* found NUL => return pointer */
+#ifdef __CHKP__
+ bndcu (%rdx), %bnd0
+#endif
movq %rax, (%rdx) /* Write value to destination. */
addq $8, %rdx /* Adjust pointer. */
jmp 1b /* Next iteration. */
@@ -132,10 +166,16 @@ ENTRY (STRCPY)
3:
/* Note that stpcpy needs to return with the value of the NUL
byte. */
+#ifdef __CHKP__
+ bndcu (%rdx), %bnd0
+#endif
movb %al, (%rdx) /* 1st byte. */
testb %al, %al /* Is it NUL. */
jz 4f /* yes, finish. */
incq %rdx /* Increment destination. */
+#ifdef __CHKP__
+ bndcu (%rdx), %bnd0
+#endif
movb %ah, (%rdx) /* 2nd byte. */
testb %ah, %ah /* Is it NUL?. */
jz 4f /* yes, finish. */
diff --git a/sysdeps/x86_64/strcpy_chk-c.c b/sysdeps/x86_64/strcpy_chk-c.c
new file mode 100644
index 0000000..4deabcc
--- /dev/null
+++ b/sysdeps/x86_64/strcpy_chk-c.c
@@ -0,0 +1,3 @@
+#ifdef __CHKP__
+# include <debug/strcpy_chk.c>
+#endif
diff --git a/sysdeps/x86_64/strcpy_chk.S b/sysdeps/x86_64/strcpy_chk.S
index 7e171de..4b79124 100644
--- a/sysdeps/x86_64/strcpy_chk.S
+++ b/sysdeps/x86_64/strcpy_chk.S
@@ -18,6 +18,7 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
+#ifndef __CHKP__
#include <sysdep.h>
#include "asm-syntax.h"
@@ -206,3 +207,4 @@ ENTRY (STRCPY_CHK)
jmp HIDDEN_JUMPTARGET (__chk_fail)
END (STRCPY_CHK)
+#endif
diff --git a/sysdeps/x86_64/strcspn.S b/sysdeps/x86_64/strcspn.S
index 65f8a9e..0acca21 100644
--- a/sysdeps/x86_64/strcspn.S
+++ b/sysdeps/x86_64/strcspn.S
@@ -29,6 +29,12 @@
.text
ENTRY (strcspn)
+# ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+ bndcl (%rsi), %bnd1
+ bndcu (%rsi), %bnd1
+# endif
movq %rdi, %rdx /* Save SRC. */
@@ -54,21 +60,34 @@ ENTRY (strcspn)
have a correct zero-extended 64-bit value in %rcx. */
.p2align 4
-L(2): movb (%rax), %cl /* get byte from skipset */
+L(2):
+# ifdef __CHKP__
+ bndcu (%rax), %bnd1
+# endif
+ movb (%rax), %cl /* get byte from skipset */
testb %cl, %cl /* is NUL char? */
jz L(1) /* yes => start compare loop */
movb %cl, (%rsp,%rcx) /* set corresponding byte in skipset table */
+# ifdef __CHKP__
+ bndcu 1(%rax), %bnd1
+# endif
movb 1(%rax), %cl /* get byte from skipset */
testb $0xff, %cl /* is NUL char? */
jz L(1) /* yes => start compare loop */
movb %cl, (%rsp,%rcx) /* set corresponding byte in skipset table */
+# ifdef __CHKP__
+ bndcu 2(%rax), %bnd1
+# endif
movb 2(%rax), %cl /* get byte from skipset */
testb $0xff, %cl /* is NUL char? */
jz L(1) /* yes => start compare loop */
movb %cl, (%rsp,%rcx) /* set corresponding byte in skipset table */
+# ifdef __CHKP__
+ bndcu 3(%rax), %bnd1
+# endif
movb 3(%rax), %cl /* get byte from skipset */
addq $4, %rax /* increment skipset pointer */
movb %cl, (%rsp,%rcx) /* set corresponding byte in skipset table */
@@ -89,18 +108,30 @@ L(1): leaq -4(%rdx), %rax /* prepare loop */
.p2align 4
L(3): addq $4, %rax /* adjust pointer for full loop round */
+# ifdef __CHKP__
+ bndcu (%rax), %bnd0
+# endif
movb (%rax), %cl /* get byte from string */
cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */
je L(4) /* yes => return */
+# ifdef __CHKP__
+ bndcu 1(%rax), %bnd0
+# endif
movb 1(%rax), %cl /* get byte from string */
cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */
je L(5) /* yes => return */
+# ifdef __CHKP__
+ bndcu 2(%rax), %bnd0
+# endif
movb 2(%rax), %cl /* get byte from string */
cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */
jz L(6) /* yes => return */
+# ifdef __CHKP__
+ bndcu 3(%rax), %bnd0
+# endif
movb 3(%rax), %cl /* get byte from string */
cmpb %cl, (%rsp,%rcx) /* is it contained in skipset? */
jne L(3) /* no => start loop again */
diff --git a/sysdeps/x86_64/strlen.S b/sysdeps/x86_64/strlen.S
index eeb1092..065f0e6 100644
--- a/sysdeps/x86_64/strlen.S
+++ b/sysdeps/x86_64/strlen.S
@@ -63,6 +63,10 @@ L(n_nonzero):
mov %rsi, %r11
#endif
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+#endif
pxor %xmm8, %xmm8
pxor %xmm9, %xmm9
pxor %xmm10, %xmm10
@@ -157,6 +161,9 @@ L(loop_init):
L(loop):
addq $64, %rax
+# ifdef __CHKP__
+ bndcu (%rax), %bnd0
+# endif
cmpq %rax, %r10
je L(exit_end)
@@ -182,6 +189,9 @@ L(first):
bsfq %rdx, %rdx
addq %rdx, %rax
subq %rdi, %rax
+# ifdef __CHKP__
+ bndcu -1(%rdi, %rax), %bnd0
+# endif
ret
.p2align 4
@@ -192,6 +202,9 @@ L(exit):
bsfq %rdx, %rdx
addq %rdx, %rax
subq %rdi, %rax
+# ifdef __CHKP__
+ bndcu -1(%rdi, %rax), %bnd0
+# endif
ret
#else
@@ -199,6 +212,9 @@ L(exit):
/* Main loop. Unrolled twice to improve L2 cache performance on core2. */
.p2align 4
L(loop):
+# ifdef __CHKP__
+ bndcu 64(%rax), %bnd0
+# endif
movdqa 64(%rax), %xmm8
pminub 80(%rax), %xmm8
@@ -231,6 +247,9 @@ L(exit0):
bsfq %rdx, %rdx
addq %rdx, %rax
subq %rdi, %rax
+# ifdef __CHKP__
+ bndcu -1(%rdi, %rax), %bnd0
+# endif
ret
#endif
diff --git a/sysdeps/x86_64/strrchr.S b/sysdeps/x86_64/strrchr.S
index e413b07..0bd3405 100644
--- a/sysdeps/x86_64/strrchr.S
+++ b/sysdeps/x86_64/strrchr.S
@@ -22,6 +22,10 @@
.text
ENTRY (strrchr)
+# ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+# endif
movd %esi, %xmm1
movq %rdi, %rcx
punpcklbw %xmm1, %xmm1
@@ -46,7 +50,11 @@ ENTRY (strrchr)
orl %ecx, %esi
jnz 1f
-2: movdqa (%rdi), %xmm0
+2:
+# ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+# endif
+ movdqa (%rdi), %xmm0
leaq 16(%rdi), %rdi
movdqa %xmm0, %xmm3
pcmpeqb %xmm1, %xmm0
@@ -73,6 +81,9 @@ ENTRY (strrchr)
bsrl %edx, %edx
jz 4f
leaq -16(%rdi,%rdx), %rax
+# ifdef __CHKP__
+ bndcu (%rax), %bnd0
+# endif
4: ret
END (strrchr)
diff --git a/sysdeps/x86_64/strspn.S b/sysdeps/x86_64/strspn.S
index 2911da2..bd3be8a 100644
--- a/sysdeps/x86_64/strspn.S
+++ b/sysdeps/x86_64/strspn.S
@@ -25,6 +25,12 @@
.text
ENTRY (strspn)
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+ bndcl (%rsi), %bnd1
+ bndcu (%rsi), %bnd1
+#endif
movq %rdi, %rdx /* Save SRC. */
@@ -50,21 +56,34 @@ ENTRY (strspn)
have a correct zero-extended 64-bit value in %rcx. */
.p2align 4
-L(2): movb (%rax), %cl /* get byte from stopset */
+L(2):
+#ifdef __CHKP__
+ bndcu (%rax), %bnd1
+#endif
+ movb (%rax), %cl /* get byte from stopset */
testb %cl, %cl /* is NUL char? */
jz L(1) /* yes => start compare loop */
movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */
+#ifdef __CHKP__
+ bndcu 1(%rax), %bnd1
+#endif
movb 1(%rax), %cl /* get byte from stopset */
testb $0xff, %cl /* is NUL char? */
jz L(1) /* yes => start compare loop */
movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */
+#ifdef __CHKP__
+ bndcu 2(%rax), %bnd1
+#endif
movb 2(%rax), %cl /* get byte from stopset */
testb $0xff, %cl /* is NUL char? */
jz L(1) /* yes => start compare loop */
movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */
+#ifdef __CHKP__
+ bndcu 3(%rax), %bnd1
+#endif
movb 3(%rax), %cl /* get byte from stopset */
addq $4, %rax /* increment stopset pointer */
movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */
@@ -85,18 +104,30 @@ L(1): leaq -4(%rdx), %rax /* prepare loop */
.p2align 4
L(3): addq $4, %rax /* adjust pointer for full loop round */
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
movb (%rax), %cl /* get byte from string */
testb %cl, (%rsp,%rcx) /* is it contained in skipset? */
jz L(4) /* no => return */
+#ifdef __CHKP__
+ bndcu 1(%rax), %bnd0
+#endif
movb 1(%rax), %cl /* get byte from string */
testb %cl, (%rsp,%rcx) /* is it contained in skipset? */
jz L(5) /* no => return */
+#ifdef __CHKP__
+ bndcu 2(%rax), %bnd0
+#endif
movb 2(%rax), %cl /* get byte from string */
testb %cl, (%rsp,%rcx) /* is it contained in skipset? */
jz L(6) /* no => return */
+#ifdef __CHKP__
+ bndcu 3(%rax), %bnd0
+#endif
movb 3(%rax), %cl /* get byte from string */
testb %cl, (%rsp,%rcx) /* is it contained in skipset? */
jnz L(3) /* yes => start loop again */
diff --git a/sysdeps/x86_64/strtok.S b/sysdeps/x86_64/strtok.S
index 5636d9a..17e2521 100644
--- a/sysdeps/x86_64/strtok.S
+++ b/sysdeps/x86_64/strtok.S
@@ -90,6 +90,9 @@ ENTRY (FUNCTION)
the last run. */
cmpq $0, %rdx
cmove %rax, %rdx
+#ifdef __CHKP__
+ bndldx (,%rax,1),%bnd0
+#endif
testq %rdx, %rdx
jz L(returnNULL)
movq %rsi, %rax /* Get start of delimiter set. */
diff --git a/sysdeps/x86_64/wcschr.S b/sysdeps/x86_64/wcschr.S
index 3f098dc..3ab1e47 100644
--- a/sysdeps/x86_64/wcschr.S
+++ b/sysdeps/x86_64/wcschr.S
@@ -22,6 +22,11 @@
.text
ENTRY (wcschr)
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+#endif
+
movd %rsi, %xmm1
pxor %xmm2, %xmm2
mov %rdi, %rcx
@@ -43,6 +48,9 @@ ENTRY (wcschr)
and $-16, %rdi
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
pcmpeqd %xmm0, %xmm2
add $16, %rdi
@@ -78,6 +86,9 @@ L(cross_cache):
L(unaligned_match):
add %rdi, %rax
add %rcx, %rax
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
ret
.p2align 4
@@ -91,6 +102,9 @@ L(unaligned_no_match):
.p2align 4
/* Loop start on aligned string. */
L(loop):
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
pcmpeqd %xmm0, %xmm2
add $16, %rdi
@@ -100,6 +114,9 @@ L(loop):
or %rax, %rdx
jnz L(matches)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
pcmpeqd %xmm0, %xmm2
add $16, %rdi
@@ -109,6 +126,9 @@ L(loop):
or %rax, %rdx
jnz L(matches)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
pcmpeqd %xmm0, %xmm2
add $16, %rdi
@@ -118,6 +138,9 @@ L(loop):
or %rax, %rdx
jnz L(matches)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
pcmpeqd %xmm0, %xmm2
add $16, %rdi
@@ -142,6 +165,9 @@ L(matches):
L(match):
sub $16, %rdi
add %rdi, %rax
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
ret
.p2align 4
diff --git a/sysdeps/x86_64/wcscmp.S b/sysdeps/x86_64/wcscmp.S
index d6b516b..38e2849 100644
--- a/sysdeps/x86_64/wcscmp.S
+++ b/sysdeps/x86_64/wcscmp.S
@@ -28,6 +28,14 @@ ENTRY (wcscmp)
*/
mov %esi, %eax
mov %edi, %edx
+
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+ bndcl (%rsi), %bnd1
+ bndcu (%rsi), %bnd1
+#endif
+
pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
mov %al, %ch
mov %dl, %cl
diff --git a/sysdeps/x86_64/wcslen.S b/sysdeps/x86_64/wcslen.S
index 5927352..a7d944f 100644
--- a/sysdeps/x86_64/wcslen.S
+++ b/sysdeps/x86_64/wcslen.S
@@ -21,20 +21,45 @@
.text
ENTRY (__wcslen)
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+#endif
cmpl $0, (%rdi)
jz L(exit_tail0)
+#ifdef __CHKP__
+ bndcu 4(%rdi), %bnd0
+#endif
cmpl $0, 4(%rdi)
jz L(exit_tail1)
+#ifdef __CHKP__
+ bndcu 8(%rdi), %bnd0
+#endif
cmpl $0, 8(%rdi)
jz L(exit_tail2)
+#ifdef __CHKP__
+ bndcu 12(%rdi), %bnd0
+#endif
cmpl $0, 12(%rdi)
jz L(exit_tail3)
+#ifdef __CHKP__
+ bndcu 16(%rdi), %bnd0
+#endif
cmpl $0, 16(%rdi)
jz L(exit_tail4)
+#ifdef __CHKP__
+ bndcu 20(%rdi), %bnd0
+#endif
cmpl $0, 20(%rdi)
jz L(exit_tail5)
+#ifdef __CHKP__
+ bndcu 24(%rdi), %bnd0
+#endif
cmpl $0, 24(%rdi)
jz L(exit_tail6)
+#ifdef __CHKP__
+ bndcu 28(%rdi), %bnd0
+#endif
cmpl $0, 28(%rdi)
jz L(exit_tail7)
@@ -44,6 +69,9 @@ ENTRY (__wcslen)
lea 16(%rdi), %rcx
and $-16, %rax
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
pcmpeqd (%rax), %xmm0
pmovmskb %xmm0, %edx
pxor %xmm1, %xmm1
@@ -51,6 +79,9 @@ ENTRY (__wcslen)
lea 16(%rax), %rax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
pcmpeqd (%rax), %xmm1
pmovmskb %xmm1, %edx
pxor %xmm2, %xmm2
@@ -58,6 +89,9 @@ ENTRY (__wcslen)
lea 16(%rax), %rax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
pcmpeqd (%rax), %xmm2
pmovmskb %xmm2, %edx
pxor %xmm3, %xmm3
@@ -65,54 +99,81 @@ ENTRY (__wcslen)
lea 16(%rax), %rax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
pcmpeqd (%rax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
lea 16(%rax), %rax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
pcmpeqd (%rax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
lea 16(%rax), %rax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
pcmpeqd (%rax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
lea 16(%rax), %rax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
pcmpeqd (%rax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
lea 16(%rax), %rax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
pcmpeqd (%rax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
lea 16(%rax), %rax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
pcmpeqd (%rax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
lea 16(%rax), %rax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
pcmpeqd (%rax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
lea 16(%rax), %rax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
pcmpeqd (%rax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
lea 16(%rax), %rax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
pcmpeqd (%rax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
@@ -123,6 +184,9 @@ ENTRY (__wcslen)
.p2align 4
L(aligned_64_loop):
+#ifdef __CHKP__
+ bndcu (%rax), %bnd0
+#endif
movaps (%rax), %xmm0
movaps 16(%rax), %xmm1
movaps 32(%rax), %xmm2
@@ -173,6 +237,9 @@ L(exit):
mov %dl, %cl
and $15, %cl
jz L(exit_1)
+#ifdef __CHKP__
+ bndcu -1(%rdi, %rax, 4), %bnd0
+#endif
ret
.p2align 4
@@ -181,11 +248,17 @@ L(exit_high):
and $15, %ch
jz L(exit_3)
add $2, %rax
+#ifdef __CHKP__
+ bndcu -1(%rdi, %rax, 4), %bnd0
+#endif
ret
.p2align 4
L(exit_1):
add $1, %rax
+#ifdef __CHKP__
+ bndcu -1(%rdi, %rax, 4), %bnd0
+#endif
ret
.p2align 4
diff --git a/sysdeps/x86_64/wcsrchr.S b/sysdeps/x86_64/wcsrchr.S
index ea1e2e5..8edfc46 100644
--- a/sysdeps/x86_64/wcsrchr.S
+++ b/sysdeps/x86_64/wcsrchr.S
@@ -19,9 +19,22 @@
#include <sysdep.h>
+#ifdef __CHKP__
+# define RETURN \
+ bndcu (%rax), %bnd0; \
+ ret
+#else
+# define RETURN ret
+#endif
+
+
.text
ENTRY (wcsrchr)
+#ifdef __CHKP__
+ bndcl (%rdi), %bnd0
+ bndcu (%rdi), %bnd0
+#endif
movd %rsi, %xmm1
mov %rdi, %rcx
punpckldq %xmm1, %xmm1
@@ -92,6 +105,9 @@ L(unaligned_match):
/* Loop start on aligned string. */
.p2align 4
L(loop):
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm0
pcmpeqd %xmm0, %xmm2
add $16, %rdi
@@ -101,6 +117,9 @@ L(loop):
or %rax, %rcx
jnz L(matches)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm3
pcmpeqd %xmm3, %xmm2
add $16, %rdi
@@ -110,6 +129,9 @@ L(loop):
or %rax, %rcx
jnz L(matches)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm4
pcmpeqd %xmm4, %xmm2
add $16, %rdi
@@ -119,6 +141,9 @@ L(loop):
or %rax, %rcx
jnz L(matches)
+#ifdef __CHKP__
+ bndcu (%rdi), %bnd0
+#endif
movdqa (%rdi), %xmm5
pcmpeqd %xmm5, %xmm2
add $16, %rdi
@@ -145,7 +170,7 @@ L(return_value):
test $15 << 4, %al
jnz L(match_second_wchar)
lea -16(%rdi), %rax
- ret
+ RETURN
.p2align 4
L(match):
@@ -175,14 +200,14 @@ L(find_zero):
test $15 << 4, %al
jnz L(match_second_wchar)
lea -16(%rdi), %rax
- ret
+ RETURN
.p2align 4
L(find_zero_in_first_wchar):
test $1, %rax
jz L(return_value)
lea -16(%rdi), %rax
- ret
+ RETURN
.p2align 4
L(find_zero_in_second_wchar):
@@ -192,7 +217,7 @@ L(find_zero_in_second_wchar):
test $15 << 4, %al
jnz L(match_second_wchar)
lea -16(%rdi), %rax
- ret
+ RETURN
.p2align 4
L(find_zero_in_third_wchar):
@@ -204,12 +229,12 @@ L(find_zero_in_third_wchar):
test $15 << 4, %al
jnz L(match_second_wchar)
lea -16(%rdi), %rax
- ret
+ RETURN
.p2align 4
L(prolog_find_zero):
add %rcx, %rdi
- mov %rdx, %rcx
+ mov %rdx, %rcx
L(prolog_find_zero_1):
test $15, %cl
jnz L(prolog_find_zero_in_first_wchar)
@@ -228,14 +253,14 @@ L(prolog_find_zero_1):
test $15 << 4, %al
jnz L(match_second_wchar)
lea -16(%rdi), %rax
- ret
+ RETURN
.p2align 4
L(prolog_find_zero_in_first_wchar):
test $1, %rax
jz L(return_null)
lea -16(%rdi), %rax
- ret
+ RETURN
.p2align 4
L(prolog_find_zero_in_second_wchar):
@@ -245,7 +270,7 @@ L(prolog_find_zero_in_second_wchar):
test $15 << 4, %al
jnz L(match_second_wchar)
lea -16(%rdi), %rax
- ret
+ RETURN
.p2align 4
L(prolog_find_zero_in_third_wchar):
@@ -257,22 +282,22 @@ L(prolog_find_zero_in_third_wchar):
test $15 << 4, %al
jnz L(match_second_wchar)
lea -16(%rdi), %rax
- ret
+ RETURN
.p2align 4
L(match_second_wchar):
lea -12(%rdi), %rax
- ret
+ RETURN
.p2align 4
L(match_third_wchar):
lea -8(%rdi), %rax
- ret
+ RETURN
.p2align 4
L(match_fourth_wchar):
lea -4(%rdi), %rax
- ret
+ RETURN
.p2align 4
L(return_null):
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=e745179c54860d69ef8cb7f516d0f256b502f89d
commit e745179c54860d69ef8cb7f516d0f256b502f89d
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Fri May 24 13:18:17 2013 +0400
Implemented bounds check support for string/memory routines for x86_32.
Warning: Not completed and haven't tested.
diff --git a/sysdeps/i386/i486/strcat.S b/sysdeps/i386/i486/strcat.S
index 7d45862..af2602e 100644
--- a/sysdeps/i386/i486/strcat.S
+++ b/sysdeps/i386/i486/strcat.S
@@ -35,9 +35,19 @@ ENTRY (strcat)
movl DEST(%esp), %edx
movl SRC(%esp), %ecx
+#ifdef __CHKP__
+ bndldx DEST(%esp,%edx,1), %bnd0
+ bndldx SRC(%esp,%ecx,1), %bnd1
+ bndcl (%ecx), %bnd1
+ bndcu (%ecx), %bnd1
+#endif
testb $0xff, (%ecx) /* Is source string empty? */
jz L(8) /* yes => return */
+#ifdef __CHKP__
+ bndcl (%edx), %bnd0
+ bndcu (%edx), %bnd0
+#endif
/* Test the first bytes separately until destination is aligned. */
testl $3, %edx /* destination pointer aligned? */
@@ -66,7 +76,11 @@ ENTRY (strcat)
L(4): addl $16,%edx /* increment destination pointer for round */
-L(1): movl (%edx), %eax /* get word (= 4 bytes) in question */
+L(1):
+#ifdef __CHKP__
+ bndcu (%edx), %bnd0
+#endif
+ movl (%edx), %eax /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
/* If you compare this with the algorithm in memchr.S you will
@@ -98,6 +112,9 @@ L(1): movl (%edx), %eax /* get word (= 4 bytes) in question */
/* If at least one byte of the word is C we don't get 0 in %ecx. */
jnz L(3)
+#ifdef __CHKP__
+ bndcu 4(%edx), %bnd0
+#endif
movl 4(%edx), %eax /* get word from source */
movl $0xfefefeff, %edi /* magic value */
addl %eax, %edi /* add the magic value to the word. We get
@@ -110,6 +127,9 @@ L(1): movl (%edx), %eax /* get word (= 4 bytes) in question */
the addition will not result in 0. */
jnz L(5) /* one byte is NUL => stop copying */
+#ifdef __CHKP__
+ bndcu 8(%edx), %bnd0
+#endif
movl 8(%edx), %eax /* get word from source */
movl $0xfefefeff, %edi /* magic value */
addl %eax, %edi /* add the magic value to the word. We get
@@ -122,6 +142,9 @@ L(1): movl (%edx), %eax /* get word (= 4 bytes) in question */
the addition will not result in 0. */
jnz L(6) /* one byte is NUL => stop copying */
+#ifdef __CHKP__
+ bndcu 12(%edx), %bnd0
+#endif
movl 12(%edx), %eax /* get word from source */
movl $0xfefefeff, %edi /* magic value */
addl %eax, %edi /* add the magic value to the word. We get
@@ -155,6 +178,10 @@ L(2): subl %ecx, %edx /* reduce number of loop variants */
/* Now we have to align the source pointer. */
testl $3, %ecx /* pointer correctly aligned? */
jz L(29) /* yes => start copy loop */
+#ifdef __CHKP__
+ bndcu (%ecx), %bnd1
+ bndcu (%ecx, %edx), %bnd0
+#endif
movb (%ecx), %al /* get first byte */
movb %al, (%ecx,%edx) /* and store it */
andb %al, %al /* is byte NUL? */
@@ -163,6 +190,10 @@ L(2): subl %ecx, %edx /* reduce number of loop variants */
testl $3, %ecx /* pointer correctly aligned? */
jz L(29) /* yes => start copy loop */
+#ifdef __CHKP__
+ bndcu (%ecx), %bnd1
+ bndcu (%ecx, %edx), %bnd0
+#endif
movb (%ecx), %al /* get first byte */
movb %al, (%ecx,%edx) /* and store it */
andb %al, %al /* is byte NUL? */
@@ -171,6 +202,10 @@ L(2): subl %ecx, %edx /* reduce number of loop variants */
testl $3, %ecx /* pointer correctly aligned? */
jz L(29) /* yes => start copy loop */
+#ifdef __CHKP__
+ bndcu (%ecx), %bnd1
+ bndcu (%ecx, %edx), %bnd0
+#endif
movb (%ecx), %al /* get first byte */
movb %al, (%ecx,%edx) /* and store it */
andb %al, %al /* is byte NUL? */
@@ -182,10 +217,18 @@ L(2): subl %ecx, %edx /* reduce number of loop variants */
ALIGN(4)
-L(28): movl %eax, 12(%ecx,%edx)/* store word at destination */
+L(28):
+#ifdef __CHKP__
+ bndcu 12(%ecx, %edx), %bnd0
+#endif
+ movl %eax, 12(%ecx,%edx)/* store word at destination */
addl $16, %ecx /* adjust pointer for full round */
-L(29): movl (%ecx), %eax /* get word from source */
+L(29):
+#ifdef __CHKP__
+ bndcu (%ecx), %bnd1
+#endif
+ movl (%ecx), %eax /* get word from source */
movl $0xfefefeff, %edi /* magic value */
addl %eax, %edi /* add the magic value to the word. We get
carry bits reported for each byte which
@@ -196,8 +239,14 @@ L(29): movl (%ecx), %eax /* get word from source */
incl %edi /* add 1: if one carry bit was *not* set
the addition will not result in 0. */
jnz L(9) /* one byte is NUL => stop copying */
+#ifdef __CHKP__
+ bndcu (%ecx, %edx), %bnd0
+#endif
movl %eax, (%ecx,%edx) /* store word to destination */
+#ifdef __CHKP__
+ bndcu 4(%ecx), %bnd1
+#endif
movl 4(%ecx), %eax /* get word from source */
movl $0xfefefeff, %edi /* magic value */
addl %eax, %edi /* add the magic value to the word. We get
@@ -209,8 +258,14 @@ L(29): movl (%ecx), %eax /* get word from source */
incl %edi /* add 1: if one carry bit was *not* set
the addition will not result in 0. */
jnz L(91) /* one byte is NUL => stop copying */
+#ifdef __CHKP__
+ bndcu 4(%ecx, %edx), %bnd0
+#endif
movl %eax, 4(%ecx,%edx) /* store word to destination */
+#ifdef __CHKP__
+ bndcu 8(%ecx), %bnd1
+#endif
movl 8(%ecx), %eax /* get word from source */
movl $0xfefefeff, %edi /* magic value */
addl %eax, %edi /* add the magic value to the word. We get
@@ -222,8 +277,14 @@ L(29): movl (%ecx), %eax /* get word from source */
incl %edi /* add 1: if one carry bit was *not* set
the addition will not result in 0. */
jnz L(92) /* one byte is NUL => stop copying */
+#ifdef __CHKP__
+ bndcu 8(%ecx, %edx), %bnd0
+#endif
movl %eax, 8(%ecx,%edx) /* store word to destination */
+#ifdef __CHKP__
+ bndcu 12(%ecx), %bnd1
+#endif
movl 12(%ecx), %eax /* get word from source */
movl $0xfefefeff, %edi /* magic value */
addl %eax, %edi /* add the magic value to the word. We get
@@ -240,15 +301,25 @@ L(93): addl $4, %ecx /* adjust pointer */
L(92): addl $4, %ecx
L(91): addl $4, %ecx
-L(9): movb %al, (%ecx,%edx) /* store first byte of last word */
+L(9):
+#ifdef __CHKP__
+ bndcu (%ecx, %edx), %bnd0
+#endif
+ movb %al, (%ecx,%edx) /* store first byte of last word */
orb %al, %al /* is it NUL? */
jz L(8) /* yes => return */
+#ifdef __CHKP__
+ bndcu 1(%ecx, %edx), %bnd0
+#endif
movb %ah, 1(%ecx,%edx) /* store second byte of last word */
orb %ah, %ah /* is it NUL? */
jz L(8) /* yes => return */
shrl $16, %eax /* make upper bytes accessible */
+#ifdef __CHKP__
+ bndcu 2(%ecx, %edx), %bnd0
+#endif
movb %al, 2(%ecx,%edx) /* store third byte of last word */
orb %al, %al /* is it NUL? */
jz L(8) /* yes => return */
diff --git a/sysdeps/i386/i586/strchr.S b/sysdeps/i386/i586/strchr.S
index 648d528..4efa935 100644
--- a/sysdeps/i386/i586/strchr.S
+++ b/sysdeps/i386/i586/strchr.S
@@ -54,6 +54,10 @@ ENTRY (strchr)
movl STR(%esp), %eax
movl CHR(%esp), %edx
+#ifdef __CHKP__
+ bndldx STR(%esp,%eax,1), %bnd0
+ bndcl (%eax), %bnd0
+#endif
movl %eax, %edi /* duplicate string pointer for later */
cfi_rel_offset (edi, 12)
@@ -83,6 +87,9 @@ ENTRY (strchr)
xorb %dl, %cl /* load single byte and test for NUL */
je L(3) /* yes => return NULL */
+#ifdef __CHKP__
+ bndcu 1(%eax), %bnd0
+#endif
movb 1(%eax), %cl /* load single byte */
incl %eax
@@ -97,7 +104,11 @@ ENTRY (strchr)
jne L(11)
-L(0): movb (%eax), %cl /* load single byte */
+L(0):
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
+ movb (%eax), %cl /* load single byte */
cmpb %cl, %dl /* is byte == C? */
je L(out) /* aligned => return pointer */
@@ -115,7 +126,11 @@ L(0): movb (%eax), %cl /* load single byte */
four instruction up to `L1' will not be executed in the loop
because the same code is found at the end of the loop, but
there it is executed in parallel with other instructions. */
-L(11): movl (%eax), %ecx
+L(11):
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
+ movl (%eax), %ecx
movl $magic, %ebp
movl $magic, %edi
@@ -159,6 +174,9 @@ L(1): xorl %ecx, %ebp /* (word^magic) */
movl $magic, %esi /* load magic value */
xorl %edx, %ebx /* clear words which are C */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
movl (%eax), %ecx
addl %ebx, %esi /* (word+magic) */
@@ -189,6 +207,9 @@ L(1): xorl %ecx, %ebp /* (word^magic) */
movl $magic, %esi
xorl %edx, %ebx
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
movl (%eax), %ecx
addl %ebx, %esi
@@ -219,6 +240,9 @@ L(1): xorl %ecx, %ebp /* (word^magic) */
movl $magic, %esi
xorl %edx, %ebx
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
movl (%eax), %ecx
addl %ebx, %esi
@@ -249,6 +273,9 @@ L(1): xorl %ecx, %ebp /* (word^magic) */
movl $magic, %esi
xorl %edx, %ebx
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
movl (%eax), %ecx
addl %ebx, %esi
diff --git a/sysdeps/i386/i586/strcpy.S b/sysdeps/i386/i586/strcpy.S
index c940369..6392a8e 100644
--- a/sysdeps/i386/i586/strcpy.S
+++ b/sysdeps/i386/i586/strcpy.S
@@ -45,6 +45,10 @@ ENTRY (STRCPY)
cfi_rel_offset (edi, 8)
movl SRC(%esp), %esi
cfi_rel_offset (esi, 4)
+#ifdef __CHKP__
+ bndldx DEST(%esp,%edi,1), %bnd0
+ bndldx SRC(%esp,%esi,1), %bnd1
+#endif
xorl %eax, %eax
leal -1(%esi), %ecx
@@ -61,6 +65,9 @@ ENTRY (STRCPY)
/* 0xb is the distance between 2: and 1: but we avoid writing
1f-2b because the assembler generates worse code. */
leal 0xb(%edx,%ecx,8), %ecx
+# ifdef __CHKP__
+ jmp L(1)
+# endif
#else
leal 1f(,%ecx,8), %ecx
#endif
diff --git a/sysdeps/i386/i586/strlen.S b/sysdeps/i386/i586/strlen.S
index b50fffa..9034625 100644
--- a/sysdeps/i386/i586/strlen.S
+++ b/sysdeps/i386/i586/strlen.S
@@ -41,6 +41,10 @@
ENTRY (strlen)
movl STR(%esp), %eax
+#ifdef __CHKP__
+ bndldx STR(%esp,%eax,1), %bnd0
+ bndcu (%eax),%bnd0
+#endif
movl $3, %edx /* load mask (= 3) */
andl %eax, %edx /* separate last two bits of address */
@@ -48,10 +52,16 @@ ENTRY (strlen)
jz L(1) /* aligned => start loop */
jp L(0) /* exactly two bits set */
+#ifdef __CHKP__
+ bndcu (%eax),%bnd0
+#endif
cmpb %dh, (%eax) /* is byte NUL? */
je L(2) /* yes => return */
incl %eax /* increment pointer */
+#ifdef __CHKP__
+ bndcu (%eax),%bnd0
+#endif
cmpb %dh, (%eax) /* is byte NUL? */
je L(2) /* yes => return */
@@ -61,7 +71,11 @@ ENTRY (strlen)
jz L(1)
-L(0): cmpb %dh, (%eax) /* is byte NUL? */
+L(0):
+#ifdef __CHKP__
+ bndcu (%eax),%bnd0
+#endif
+ cmpb %dh, (%eax) /* is byte NUL? */
je L(2) /* yes => return */
incl %eax /* increment pointer */
@@ -174,7 +188,11 @@ L(3): subl $4, %eax /* correct too early pointer increment */
incl %eax /* increment pointer */
-L(2): subl STR(%esp), %eax /* now compute the length as difference
+L(2):
+#ifdef __CHKP__
+ bndcu (%eax),%bnd0
+#endif
+ subl STR(%esp), %eax /* now compute the length as difference
between start and terminating NUL
character */
ret
diff --git a/sysdeps/i386/i686/memcmp.S b/sysdeps/i386/i686/memcmp.S
index b8091a6..6cb03e7 100644
--- a/sysdeps/i386/i686/memcmp.S
+++ b/sysdeps/i386/i686/memcmp.S
@@ -48,9 +48,19 @@ ENTRY (memcmp)
movl BLK1(%esp), %eax
movl BLK2(%esp), %edx
movl LEN(%esp), %ecx
+#ifdef __CHKP__
+ bndldx BLK1(%esp,%eax,1), %bnd0
+ bndldx BLK2(%esp,%edx,1), %bnd1
+#endif
cmpl $1, %ecx
jne L(not_1)
+#ifdef __CHKP__
+ bndcl (%eax), %bnd0
+ bndcu (%eax), %bnd0
+ bndcl (%edx), %bnd1
+ bndcu (%edx), %bnd1
+#endif
movzbl (%eax), %ecx /* LEN == 1 */
cmpb (%edx), %cl
jne L(neq)
@@ -69,6 +79,12 @@ L(neq):
cfi_rel_offset (ebx, 0)
L(not_1):
jl L(bye) /* LEN == 0 */
+#ifdef __CHKP__
+ bndcl (%eax), %bnd0
+ bndcu (%eax), %bnd0
+ bndcl (%edx), %bnd1
+ bndcu (%edx), %bnd1
+#endif
pushl %esi
cfi_adjust_cfa_offset (4)
@@ -84,36 +100,64 @@ L(not_1):
ALIGN (4)
L(28bytes):
+#ifdef __CHKP__
+ bndcu -28(%esi), %bnd0
+ bndcu -28(%edx), %bnd1
+#endif
movl -28(%esi), %eax
movl -28(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(24bytes):
+#ifdef __CHKP__
+ bndcu -24(%esi), %bnd0
+ bndcu -24(%edx), %bnd1
+#endif
movl -24(%esi), %eax
movl -24(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(20bytes):
+#ifdef __CHKP__
+ bndcu -20(%esi), %bnd0
+ bndcu -20(%edx), %bnd1
+#endif
movl -20(%esi), %eax
movl -20(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(16bytes):
+#ifdef __CHKP__
+ bndcu -16(%esi), %bnd0
+ bndcu -16(%edx), %bnd1
+#endif
movl -16(%esi), %eax
movl -16(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(12bytes):
+#ifdef __CHKP__
+ bndcu -12(%esi), %bnd0
+ bndcu -12(%edx), %bnd1
+#endif
movl -12(%esi), %eax
movl -12(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(8bytes):
+#ifdef __CHKP__
+ bndcu -8(%esi), %bnd0
+ bndcu -8(%edx), %bnd1
+#endif
movl -8(%esi), %eax
movl -8(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(4bytes):
+#ifdef __CHKP__
+ bndcu -4(%esi), %bnd0
+ bndcu -4(%edx), %bnd1
+#endif
movl -4(%esi), %eax
movl -4(%edx), %ecx
cmpl %ecx, %eax
@@ -129,41 +173,73 @@ L(0bytes):
cfi_rel_offset (esi, 0)
cfi_rel_offset (ebx, 4)
L(29bytes):
+#ifdef __CHKP__
+ bndcu -29(%esi), %bnd0
+ bndcu -29(%edx), %bnd1
+#endif
movl -29(%esi), %eax
movl -29(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(25bytes):
+#ifdef __CHKP__
+ bndcu -25(%esi), %bnd0
+ bndcu -25(%edx), %bnd1
+#endif
movl -25(%esi), %eax
movl -25(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(21bytes):
+#ifdef __CHKP__
+ bndcu -21(%esi), %bnd0
+ bndcu -21(%edx), %bnd1
+#endif
movl -21(%esi), %eax
movl -21(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(17bytes):
+#ifdef __CHKP__
+ bndcu -17(%esi), %bnd0
+ bndcu -17(%edx), %bnd1
+#endif
movl -17(%esi), %eax
movl -17(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(13bytes):
+#ifdef __CHKP__
+ bndcu -13(%esi), %bnd0
+ bndcu -13(%edx), %bnd1
+#endif
movl -13(%esi), %eax
movl -13(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(9bytes):
+#ifdef __CHKP__
+ bndcu -9(%esi), %bnd0
+ bndcu -9(%edx), %bnd1
+#endif
movl -9(%esi), %eax
movl -9(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(5bytes):
+#ifdef __CHKP__
+ bndcu -5(%esi), %bnd0
+ bndcu -5(%edx), %bnd1
+#endif
movl -5(%esi), %eax
movl -5(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(1bytes):
+#ifdef __CHKP__
+ bndcu -1(%esi), %bnd0
+ bndcu -1(%edx), %bnd1
+#endif
movzbl -1(%esi), %eax
cmpb -1(%edx), %al
jne L(set)
@@ -177,41 +253,73 @@ L(1bytes):
cfi_rel_offset (esi, 0)
cfi_rel_offset (ebx, 4)
L(30bytes):
+#ifdef __CHKP__
+ bndcu -30(%esi), %bnd0
+ bndcu -30(%edx), %bnd1
+#endif
movl -30(%esi), %eax
movl -30(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(26bytes):
+#ifdef __CHKP__
+ bndcu -26(%esi), %bnd0
+ bndcu -26(%edx), %bnd1
+#endif
movl -26(%esi), %eax
movl -26(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(22bytes):
+#ifdef __CHKP__
+ bndcu -22(%esi), %bnd0
+ bndcu -22(%edx), %bnd1
+#endif
movl -22(%esi), %eax
movl -22(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(18bytes):
+#ifdef __CHKP__
+ bndcu -18(%esi), %bnd0
+ bndcu -18(%edx), %bnd1
+#endif
movl -18(%esi), %eax
movl -18(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(14bytes):
+#ifdef __CHKP__
+ bndcu -14(%esi), %bnd0
+ bndcu -14(%edx), %bnd1
+#endif
movl -14(%esi), %eax
movl -14(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(10bytes):
+#ifdef __CHKP__
+ bndcu -10(%esi), %bnd0
+ bndcu -10(%edx), %bnd1
+#endif
movl -10(%esi), %eax
movl -10(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(6bytes):
+#ifdef __CHKP__
+ bndcu -6(%esi), %bnd0
+ bndcu -6(%edx), %bnd1
+#endif
movl -6(%esi), %eax
movl -6(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(2bytes):
+#ifdef __CHKP__
+ bndcu -2(%esi), %bnd0
+ bndcu -2(%edx), %bnd1
+#endif
movzwl -2(%esi), %eax
movzwl -2(%edx), %ecx
cmpb %cl, %al
@@ -228,41 +336,73 @@ L(2bytes):
cfi_rel_offset (esi, 0)
cfi_rel_offset (ebx, 4)
L(31bytes):
+#ifdef __CHKP__
+ bndcu -31(%esi), %bnd0
+ bndcu -31(%edx), %bnd1
+#endif
movl -31(%esi), %eax
movl -31(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(27bytes):
+#ifdef __CHKP__
+ bndcu -27(%esi), %bnd0
+ bndcu -27(%edx), %bnd1
+#endif
movl -27(%esi), %eax
movl -27(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(23bytes):
+#ifdef __CHKP__
+ bndcu -23(%esi), %bnd0
+ bndcu -23(%edx), %bnd1
+#endif
movl -23(%esi), %eax
movl -23(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(19bytes):
+#ifdef __CHKP__
+ bndcu -19(%esi), %bnd0
+ bndcu -19(%edx), %bnd1
+#endif
movl -19(%esi), %eax
movl -19(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(15bytes):
+#ifdef __CHKP__
+ bndcu -15(%esi), %bnd0
+ bndcu -15(%edx), %bnd1
+#endif
movl -15(%esi), %eax
movl -15(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(11bytes):
+#ifdef __CHKP__
+ bndcu -11(%esi), %bnd0
+ bndcu -11(%edx), %bnd1
+#endif
movl -11(%esi), %eax
movl -11(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(7bytes):
+#ifdef __CHKP__
+ bndcu -7(%esi), %bnd0
+ bndcu -7(%edx), %bnd1
+#endif
movl -7(%esi), %eax
movl -7(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(3bytes):
+#ifdef __CHKP__
+ bndcu -3(%esi), %bnd0
+ bndcu -3(%edx), %bnd1
+#endif
movzwl -3(%esi), %eax
movzwl -3(%edx), %ecx
cmpb %cl, %al
@@ -286,34 +426,66 @@ L(3bytes):
L(32bytesormore):
subl $32, %ecx
+#ifdef __CHKP__
+ bndcu (%esi), %bnd0
+ bndcu (%edx), %bnd1
+#endif
movl (%esi), %eax
cmpl (%edx), %eax
jne L(load_ecx)
+#ifdef __CHKP__
+ bndcu 4(%esi), %bnd0
+ bndcu 4(%edx), %bnd1
+#endif
movl 4(%esi), %eax
cmpl 4(%edx), %eax
jne L(load_ecx_4)
+#ifdef __CHKP__
+ bndcu 8(%esi), %bnd0
+ bndcu 8(%edx), %bnd1
+#endif
movl 8(%esi), %eax
cmpl 8(%edx), %eax
jne L(load_ecx_8)
+#ifdef __CHKP__
+ bndcu 12(%esi), %bnd0
+ bndcu 12(%edx), %bnd1
+#endif
movl 12(%esi), %eax
cmpl 12(%edx), %eax
jne L(load_ecx_12)
+#ifdef __CHKP__
+ bndcu 16(%esi), %bnd0
+ bndcu 16(%edx), %bnd1
+#endif
movl 16(%esi), %eax
cmpl 16(%edx), %eax
jne L(load_ecx_16)
+#ifdef __CHKP__
+ bndcu 20(%esi), %bnd0
+ bndcu 20(%edx), %bnd1
+#endif
movl 20(%esi), %eax
cmpl 20(%edx), %eax
jne L(load_ecx_20)
+#ifdef __CHKP__
+ bndcu 24(%esi), %bnd0
+ bndcu 24(%edx), %bnd1
+#endif
movl 24(%esi), %eax
cmpl 24(%edx), %eax
jne L(load_ecx_24)
+#ifdef __CHKP__
+ bndcu 28(%esi), %bnd0
+ bndcu 28(%edx), %bnd1
+#endif
movl 28(%esi), %eax
cmpl 28(%edx), %eax
jne L(load_ecx_28)
diff --git a/sysdeps/i386/i686/memset.S b/sysdeps/i386/i686/memset.S
index aed79a8..3fd4370 100644
--- a/sysdeps/i386/i686/memset.S
+++ b/sysdeps/i386/i686/memset.S
@@ -50,6 +50,11 @@ ENTRY (memset)
cfi_adjust_cfa_offset (4)
movl DEST(%esp), %edx
movl LEN(%esp), %ecx
+#ifdef __CHKP__
+ bndldx DEST(%esp,%edx,1),%bnd0
+ bndcl (%edx), %bnd0
+ bndcu -1(%edx, %ecx), %bnd0
+#endif
#if BZERO_P
xorl %eax, %eax /* fill with 0 */
#else
diff --git a/sysdeps/i386/i686/multiarch/Makefile b/sysdeps/i386/i686/multiarch/Makefile
index 8946bfa..7a4999a 100644
--- a/sysdeps/i386/i686/multiarch/Makefile
+++ b/sysdeps/i386/i686/multiarch/Makefile
@@ -6,9 +6,7 @@ endif
ifeq ($(subdir),string)
gen-as-const-headers += locale-defines.sym
-sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \
- memmove-ssse3 memcpy-ssse3-rep mempcpy-ssse3-rep \
- memmove-ssse3-rep bcopy-ssse3 bcopy-ssse3-rep \
+sysdep_routines += bzero-sse2 memset-sse2 \
memset-sse2-rep bzero-sse2-rep strcmp-ssse3 \
strcmp-sse4 strncmp-c strncmp-ssse3 strncmp-sse4 \
memcmp-ssse3 memcmp-sse4 strcasestr-nonascii varshift \
@@ -23,7 +21,8 @@ sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \
strnlen-sse2 strnlen-c \
strcasecmp_l-c strcasecmp-c strcasecmp_l-ssse3 \
strncase_l-c strncase-c strncase_l-ssse3 \
- strcasecmp_l-sse4 strncase_l-sse4
+ strcasecmp_l-sse4 strncase_l-sse4 mpx_memcpy_nobnd \
+ mpx_mempcpy_nobnd mpx_memmove_nobnd
ifeq (yes,$(config-cflags-sse4))
sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
CFLAGS-varshift.c += -msse4
diff --git a/sysdeps/i386/i686/multiarch/Versions b/sysdeps/i386/i686/multiarch/Versions
index 59b185a..7f0cbbc 100644
--- a/sysdeps/i386/i686/multiarch/Versions
+++ b/sysdeps/i386/i686/multiarch/Versions
@@ -2,4 +2,11 @@ libc {
GLIBC_PRIVATE {
__get_cpu_features;
}
+%ifdef __CHKP__
+ GLIBC_2.14 {
+ mpx_memcpy_nobnd;
+ mpx_memmove_nobnd;
+ mpx_mempcpy_nobnd;
+ }
+%endif
}
diff --git a/sysdeps/i386/i686/multiarch/bcopy.S b/sysdeps/i386/i686/multiarch/__bcopy.S
similarity index 100%
rename from sysdeps/i386/i686/multiarch/bcopy.S
rename to sysdeps/i386/i686/multiarch/__bcopy.S
diff --git a/sysdeps/i386/i686/multiarch/memcpy.S b/sysdeps/i386/i686/multiarch/__memcpy.S
similarity index 100%
rename from sysdeps/i386/i686/multiarch/memcpy.S
rename to sysdeps/i386/i686/multiarch/__memcpy.S
diff --git a/sysdeps/i386/i686/multiarch/memcpy_chk.S b/sysdeps/i386/i686/multiarch/__memcpy_chk.S
similarity index 100%
rename from sysdeps/i386/i686/multiarch/memcpy_chk.S
rename to sysdeps/i386/i686/multiarch/__memcpy_chk.S
diff --git a/sysdeps/i386/i686/multiarch/memmove.S b/sysdeps/i386/i686/multiarch/__memmove.S
similarity index 100%
rename from sysdeps/i386/i686/multiarch/memmove.S
rename to sysdeps/i386/i686/multiarch/__memmove.S
diff --git a/sysdeps/i386/i686/multiarch/memmove_chk.S b/sysdeps/i386/i686/multiarch/__memmove_chk.S
similarity index 100%
rename from sysdeps/i386/i686/multiarch/memmove_chk.S
rename to sysdeps/i386/i686/multiarch/__memmove_chk.S
diff --git a/sysdeps/i386/i686/multiarch/mempcpy.S b/sysdeps/i386/i686/multiarch/__mempcpy.S
similarity index 100%
rename from sysdeps/i386/i686/multiarch/mempcpy.S
rename to sysdeps/i386/i686/multiarch/__mempcpy.S
diff --git a/sysdeps/i386/i686/multiarch/mempcpy_chk.S b/sysdeps/i386/i686/multiarch/__mempcpy_chk.S
similarity index 100%
rename from sysdeps/i386/i686/multiarch/mempcpy_chk.S
rename to sysdeps/i386/i686/multiarch/__mempcpy_chk.S
diff --git a/sysdeps/i386/i686/multiarch/bcopy.c b/sysdeps/i386/i686/multiarch/bcopy.c
new file mode 100644
index 0000000..6f5efba
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/bcopy.c
@@ -0,0 +1,7 @@
+#include <stddef.h>
+
+void
+bcopy (const void *src, void *dst, size_t n)
+{
+ memmove (dst, src, n);
+}
diff --git a/sysdeps/i386/i686/multiarch/ifunc-impl-list.c b/sysdeps/i386/i686/multiarch/ifunc-impl-list.c
index 2c282bd..63f0704 100644
--- a/sysdeps/i386/i686/multiarch/ifunc-impl-list.c
+++ b/sysdeps/i386/i686/multiarch/ifunc-impl-list.c
@@ -37,11 +37,11 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
size_t i = 0;
/* Support sysdeps/i386/i686/multiarch/bcopy.S. */
- IFUNC_IMPL (i, name, bcopy,
- IFUNC_IMPL_ADD (array, i, bcopy, HAS_SSSE3,
- __bcopy_ssse3_rep)
- IFUNC_IMPL_ADD (array, i, bcopy, HAS_SSSE3, __bcopy_ssse3)
- IFUNC_IMPL_ADD (array, i, bcopy, 1, __bcopy_ia32))
+// IFUNC_IMPL (i, name, bcopy,
+// IFUNC_IMPL_ADD (array, i, bcopy, HAS_SSSE3,
+// __bcopy_ssse3_rep)
+// IFUNC_IMPL_ADD (array, i, bcopy, HAS_SSSE3, __bcopy_ssse3)
+// IFUNC_IMPL_ADD (array, i, bcopy, 1, __bcopy_ia32))
/* Support sysdeps/i386/i686/multiarch/bzero.S. */
IFUNC_IMPL (i, name, bzero,
@@ -64,21 +64,21 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_ia32))
/* Support sysdeps/i386/i686/multiarch/memmove_chk.S. */
- IFUNC_IMPL (i, name, __memmove_chk,
- IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSSE3,
- __memmove_chk_ssse3_rep)
- IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSSE3,
- __memmove_chk_ssse3)
- IFUNC_IMPL_ADD (array, i, __memmove_chk, 1,
- __memmove_chk_ia32))
+// IFUNC_IMPL (i, name, __memmove_chk,
+// IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSSE3,
+// __memmove_chk_ssse3_rep)
+// IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSSE3,
+// __memmove_chk_ssse3)
+// IFUNC_IMPL_ADD (array, i, __memmove_chk, 1,
+// __memmove_chk_ia32))
/* Support sysdeps/i386/i686/multiarch/memmove.S. */
- IFUNC_IMPL (i, name, memmove,
- IFUNC_IMPL_ADD (array, i, memmove, HAS_SSSE3,
- __memmove_ssse3_rep)
- IFUNC_IMPL_ADD (array, i, memmove, HAS_SSSE3,
- __memmove_ssse3)
- IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_ia32))
+// IFUNC_IMPL (i, name, memmove,
+// IFUNC_IMPL_ADD (array, i, memmove, HAS_SSSE3,
+// __memmove_ssse3_rep)
+// IFUNC_IMPL_ADD (array, i, memmove, HAS_SSSE3,
+// __memmove_ssse3)
+// IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_ia32))
/* Support sysdeps/i386/i686/multiarch/memrchr.S. */
IFUNC_IMPL (i, name, memrchr,
@@ -274,37 +274,37 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
#ifdef SHARED
/* Support sysdeps/i386/i686/multiarch/memcpy_chk.S. */
- IFUNC_IMPL (i, name, __memcpy_chk,
- IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_SSSE3,
- __memcpy_chk_ssse3_rep)
- IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_SSSE3,
- __memcpy_chk_ssse3)
- IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1,
- __memcpy_chk_ia32))
+// IFUNC_IMPL (i, name, __memcpy_chk,
+// IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_SSSE3,
+// __memcpy_chk_ssse3_rep)
+// IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_SSSE3,
+// __memcpy_chk_ssse3)
+// IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1,
+// __memcpy_chk_ia32))
/* Support sysdeps/i386/i686/multiarch/memcpy.S. */
- IFUNC_IMPL (i, name, memcpy,
- IFUNC_IMPL_ADD (array, i, memcpy, HAS_SSSE3,
- __memcpy_ssse3_rep)
- IFUNC_IMPL_ADD (array, i, memcpy, HAS_SSSE3, __memcpy_ssse3)
- IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_ia32))
+// IFUNC_IMPL (i, name, memcpy,
+// IFUNC_IMPL_ADD (array, i, memcpy, HAS_SSSE3,
+// __memcpy_ssse3_rep)
+// IFUNC_IMPL_ADD (array, i, memcpy, HAS_SSSE3, __memcpy_ssse3)
+// IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_ia32))
/* Support sysdeps/i386/i686/multiarch/mempcpy_chk.S. */
- IFUNC_IMPL (i, name, __mempcpy_chk,
- IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_SSSE3,
- __mempcpy_chk_ssse3_rep)
- IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_SSSE3,
- __mempcpy_chk_ssse3)
- IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1,
- __mempcpy_chk_ia32))
+// IFUNC_IMPL (i, name, __mempcpy_chk,
+// IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_SSSE3,
+// __mempcpy_chk_ssse3_rep)
+// IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_SSSE3,
+// __mempcpy_chk_ssse3)
+// IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1,
+// __mempcpy_chk_ia32))
/* Support sysdeps/i386/i686/multiarch/mempcpy.S. */
- IFUNC_IMPL (i, name, mempcpy,
- IFUNC_IMPL_ADD (array, i, mempcpy, HAS_SSSE3,
- __mempcpy_ssse3_rep)
- IFUNC_IMPL_ADD (array, i, mempcpy, HAS_SSSE3,
- __mempcpy_ssse3)
- IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_ia32))
+// IFUNC_IMPL (i, name, mempcpy,
+// IFUNC_IMPL_ADD (array, i, mempcpy, HAS_SSSE3,
+// __mempcpy_ssse3_rep)
+// IFUNC_IMPL_ADD (array, i, mempcpy, HAS_SSSE3,
+// __mempcpy_ssse3)
+// IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_ia32))
/* Support sysdeps/i386/i686/multiarch/strlen.S. */
IFUNC_IMPL (i, name, strlen,
diff --git a/sysdeps/i386/i686/multiarch/memchr-sse2-bsf.S b/sysdeps/i386/i686/multiarch/memchr-sse2-bsf.S
index d364177..80be0d9 100644
--- a/sysdeps/i386/i686/multiarch/memchr-sse2-bsf.S
+++ b/sysdeps/i386/i686/multiarch/memchr-sse2-bsf.S
@@ -58,6 +58,12 @@ ENTRY (MEMCHR)
# endif
mov %ecx, %eax
+# ifdef __CHKP__
+ bndldx STR1(%esp,%ecx,1), %bnd0
+ bndcl (%ecx), %bnd0
+ bndcu (%ecx), %bnd0
+# endif
+
punpcklbw %xmm1, %xmm1
punpcklbw %xmm1, %xmm1
@@ -79,9 +85,18 @@ ENTRY (MEMCHR)
# ifndef USE_AS_RAWMEMCHR
sub %ecx, %edx
jbe L(return_null_1)
-# endif
add %ecx, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
+ ret
+# else
+ add %ecx, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
ret
+# endif
.p2align 4
L(unaligned_no_match_1):
@@ -163,8 +178,15 @@ L(loop_prolog):
# ifndef USE_AS_RAWMEMCHR
sub $64, %edx
jbe L(exit_loop)
+# ifdef __CHKP__
+ bndcu (%edi), %bnd0
+# endif
movdqa (%edi), %xmm0
# else
+
+# ifdef __CHKP__
+ bndcu (%edx), %bnd0
+# endif
movdqa (%edx), %xmm0
# endif
pcmpeqb %xmm1, %xmm0
@@ -173,8 +195,15 @@ L(loop_prolog):
jnz L(matches)
# ifndef USE_AS_RAWMEMCHR
+# ifdef __CHKP__
+ bndcu 16(%edi), %bnd0
+# endif
movdqa 16(%edi), %xmm2
# else
+
+# ifdef __CHKP__
+ bndcu 16(%edx), %bnd0
+# endif
movdqa 16(%edx), %xmm2
# endif
pcmpeqb %xmm1, %xmm2
@@ -183,8 +212,15 @@ L(loop_prolog):
jnz L(matches16)
# ifndef USE_AS_RAWMEMCHR
+# ifdef __CHKP__
+ bndcu 32(%edi), %bnd0
+# endif
movdqa 32(%edi), %xmm3
# else
+
+# ifdef __CHKP__
+ bndcu 32(%edx), %bnd0
+# endif
movdqa 32(%edx), %xmm3
# endif
pcmpeqb %xmm1, %xmm3
@@ -193,8 +229,15 @@ L(loop_prolog):
jnz L(matches32)
# ifndef USE_AS_RAWMEMCHR
+# ifdef __CHKP__
+ bndcu 48(%edi), %bnd0
+# endif
movdqa 48(%edi), %xmm4
# else
+
+# ifdef __CHKP__
+ bndcu 48(%edx), %bnd0
+# endif
movdqa 48(%edx), %xmm4
# endif
pcmpeqb %xmm1, %xmm4
@@ -277,11 +320,18 @@ L(align64_loop):
# ifndef USE_AS_RAWMEMCHR
sub $64, %edx
jbe L(exit_loop)
+# ifdef __CHKP__
+ bndcu (%edi), %bnd0
+# endif
movdqa (%edi), %xmm0
movdqa 16(%edi), %xmm2
movdqa 32(%edi), %xmm3
movdqa 48(%edi), %xmm4
# else
+
+# ifdef __CHKP__
+ bndcu (%edx), %bnd0
+# endif
movdqa (%edx), %xmm0
movdqa 16(%edx), %xmm2
movdqa 32(%edx), %xmm3
@@ -342,9 +392,15 @@ L(align64_loop):
# ifndef USE_AS_RAWMEMCHR
lea 48(%edi, %eax), %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
RETURN
# else
lea 48(%edx, %eax), %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
ret
# endif
@@ -404,9 +460,15 @@ L(matches0):
bsf %eax, %eax
# ifndef USE_AS_RAWMEMCHR
lea -16(%eax, %edi), %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
RETURN
# else
lea -16(%eax, %edx), %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
ret
# endif
@@ -415,9 +477,15 @@ L(matches):
bsf %eax, %eax
# ifndef USE_AS_RAWMEMCHR
add %edi, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
RETURN
# else
add %edx, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
ret
# endif
@@ -426,9 +494,15 @@ L(matches16):
bsf %eax, %eax
# ifndef USE_AS_RAWMEMCHR
lea 16(%eax, %edi), %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
RETURN
# else
lea 16(%eax, %edx), %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
ret
# endif
@@ -437,9 +511,15 @@ L(matches32):
bsf %eax, %eax
# ifndef USE_AS_RAWMEMCHR
lea 32(%eax, %edi), %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
RETURN
# else
lea 32(%eax, %edx), %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
ret
# endif
diff --git a/sysdeps/i386/i686/multiarch/memcmp-sse4.S b/sysdeps/i386/i686/multiarch/memcmp-sse4.S
index 2984a37..3ccfe66 100644
--- a/sysdeps/i386/i686/multiarch/memcmp-sse4.S
+++ b/sysdeps/i386/i686/multiarch/memcmp-sse4.S
@@ -91,6 +91,15 @@ ENTRY (MEMCMP)
jbe L(less1bytes)
# endif
+# ifdef __CHKP__
+ bndldx BLK1(%esp,%eax,1), %bnd0
+ bndldx BLK2(%esp,%edx,1), %bnd1
+ bndcl (%eax), %bnd0
+ bndcl (%edx), %bnd1
+ bndcu (%eax), %bnd0
+ bndcu (%edx), %bnd1
+# endif
+
pxor %xmm0, %xmm0
cmp $64, %ecx
ja L(64bytesormore)
@@ -115,6 +124,10 @@ L(less8bytes):
cmpb (%edx), %bl
jne L(nonzero)
+# ifdef __CHKP__
+ bndcu 1(%eax), %bnd0
+ bndcu 1(%edx), %bnd1
+# endif
mov 1(%eax), %bl
cmpb 1(%edx), %bl
jne L(nonzero)
@@ -122,6 +135,10 @@ L(less8bytes):
cmp $2, %ecx
jz L(0bytes)
+# ifdef __CHKP__
+ bndcu 2(%eax), %bnd0
+ bndcu 2(%edx), %bnd1
+# endif
mov 2(%eax), %bl
cmpb 2(%edx), %bl
jne L(nonzero)
@@ -129,6 +146,10 @@ L(less8bytes):
cmp $3, %ecx
jz L(0bytes)
+# ifdef __CHKP__
+ bndcu 3(%eax), %bnd0
+ bndcu 3(%edx), %bnd1
+# endif
mov 3(%eax), %bl
cmpb 3(%edx), %bl
jne L(nonzero)
@@ -136,6 +157,10 @@ L(less8bytes):
cmp $4, %ecx
jz L(0bytes)
+# ifdef __CHKP__
+ bndcu 4(%eax), %bnd0
+ bndcu 4(%edx), %bnd1
+# endif
mov 4(%eax), %bl
cmpb 4(%edx), %bl
jne L(nonzero)
@@ -143,6 +168,10 @@ L(less8bytes):
cmp $5, %ecx
jz L(0bytes)
+# ifdef __CHKP__
+ bndcu 5(%eax), %bnd0
+ bndcu 5(%edx), %bnd1
+# endif
mov 5(%eax), %bl
cmpb 5(%edx), %bl
jne L(nonzero)
@@ -150,6 +179,10 @@ L(less8bytes):
cmp $6, %ecx
jz L(0bytes)
+# ifdef __CHKP__
+ bndcu 6(%eax), %bnd0
+ bndcu 6(%edx), %bnd1
+# endif
mov 6(%eax), %bl
cmpb 6(%edx), %bl
je L(0bytes)
@@ -198,6 +231,14 @@ L(return0):
.p2align 4
L(less1bytes):
jb L(0bytesend)
+# ifdef __CHKP__
+ bndldx BLK1(%esp,%eax,1), %bnd0
+ bndldx BLK2(%esp,%edx,1), %bnd1
+ bndcl (%eax), %bnd0
+ bndcl (%edx), %bnd1
+ bndcu (%eax), %bnd0
+ bndcu (%edx), %bnd1
+# endif
movzbl (%eax), %eax
movzbl (%edx), %edx
sub %edx, %eax
@@ -221,18 +262,30 @@ L(64bytesormore_loop):
ptest %xmm2, %xmm0
jnc L(find_16diff)
+# ifdef __CHKP__
+ bndcu 16(%eax), %bnd0
+ bndcu 16(%edx), %bnd1
+# endif
movdqu 16(%eax), %xmm1
movdqu 16(%edx), %xmm2
pxor %xmm1, %xmm2
ptest %xmm2, %xmm0
jnc L(find_32diff)
+# ifdef __CHKP__
+ bndcu 32(%eax), %bnd0
+ bndcu 32(%edx), %bnd1
+# endif
movdqu 32(%eax), %xmm1
movdqu 32(%edx), %xmm2
pxor %xmm1, %xmm2
ptest %xmm2, %xmm0
jnc L(find_48diff)
+# ifdef __CHKP__
+ bndcu 48(%eax), %bnd0
+ bndcu 48(%edx), %bnd1
+# endif
movdqu 48(%eax), %xmm1
movdqu 48(%edx), %xmm2
pxor %xmm1, %xmm2
diff --git a/sysdeps/i386/i686/multiarch/memcpy.c b/sysdeps/i386/i686/multiarch/memcpy.c
new file mode 100644
index 0000000..824cdcb
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/memcpy.c
@@ -0,0 +1,40 @@
+#include <stddef.h>
+
+void *
+__memcpy (void *dst, const void *src, size_t n)
+{
+ const char *s = src;
+ char *d = dst;
+ void *ret = dst;
+ size_t offset_src = ((size_t) s) & (sizeof(size_t) - 1);
+ size_t offset_dst = ((size_t) d) & (sizeof(size_t) - 1);
+
+ if (offset_src != offset_dst)
+ {
+ while (n--)
+ *d++ = *s++;
+ }
+ else
+ {
+ if (offset_src) offset_src = sizeof(size_t) - offset_src;
+ while (n-- && offset_src--)
+ *d++ = *s++;
+ n++;
+ if (!n) return ret;
+ void **d1 = (void **)d;
+ void **s1 = (void **)s;
+ while (n >= sizeof(void *))
+ {
+ n -= sizeof(void *);
+ *d1++ = *s1++;
+ }
+ s = (char *)s1;
+ d = (char *)d1;
+ while (n--)
+ *d++ = *s++;
+ }
+ return ret;
+}
+
+weak_alias (__memcpy, __GI_memcpy)
+weak_alias (__memcpy, memcpy)
diff --git a/sysdeps/i386/i686/multiarch/memcpy_chk.c b/sysdeps/i386/i686/multiarch/memcpy_chk.c
new file mode 100644
index 0000000..1eee86c
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/memcpy_chk.c
@@ -0,0 +1 @@
+#include <debug/memcpy_chk.c>
diff --git a/sysdeps/i386/i686/multiarch/memmove.c b/sysdeps/i386/i686/multiarch/memmove.c
new file mode 100644
index 0000000..9e5ad6d
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/memmove.c
@@ -0,0 +1,76 @@
+#include <stddef.h>
+
+void *
+__memmove (void *dst, const void *src, size_t n)
+{
+ const char *s = src;
+ char *d = dst;
+ void *ret = dst;
+ size_t offset_src = ((size_t) s) & (sizeof(size_t) - 1);
+ size_t offset_dst = ((size_t) d) & (sizeof(size_t) - 1);
+
+ if (offset_src != offset_dst)
+ {
+ if (s < d)
+ {
+ // backward copying
+ d += n;
+ s += n;
+ while (n--)
+ *--d = *--s;
+ }
+ else
+ // forward copying
+ while (n--)
+ *d++ = *s++;
+ }
+ else
+ {
+ if (s < d)
+ {
+ offset_src = (offset_src + (size_t)src) & (sizeof(size_t) - 1);
+ // backward copying
+ d += n;
+ s += n;
+ while (n-- && offset_src--)
+ *--d = *--s;
+ n++;
+ if (!n) return ret;
+ void **d1 = (void **)d;
+ void **s1 = (void **)s;
+ while (n >= sizeof(void *))
+ {
+ n -= sizeof(void *);
+ *--d1 = *--s1;
+ }
+ s = (char *)s1;
+ d = (char *)d1;
+ while (n--)
+ *--d = *--s;
+ }
+ else
+ {
+ if (offset_src) offset_src = sizeof(size_t) - offset_src;
+ // forward copying
+ while (n-- && offset_src--)
+ *d++ = *s++;
+ n++;
+ if (!n) return ret;
+ void **d1 = (void **)d;
+ void **s1 = (void **)s;
+ while (n >= sizeof(void *))
+ {
+ n -= sizeof(void *);
+ *d1++ = *s1++;
+ }
+ s = (char *)s1;
+ d = (char *)d1;
+ while (n--)
+ *d++ = *s++;
+ }
+ }
+ return ret;
+}
+
+weak_alias (__memmove, __GI_memmove)
+weak_alias (__memmove, memmove)
diff --git a/sysdeps/i386/i686/multiarch/memmove_chk.c b/sysdeps/i386/i686/multiarch/memmove_chk.c
new file mode 100644
index 0000000..bbf53d0
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/memmove_chk.c
@@ -0,0 +1 @@
+#include <debug/memmove_chk.c>
diff --git a/sysdeps/i386/i686/multiarch/mempcpy.c b/sysdeps/i386/i686/multiarch/mempcpy.c
new file mode 100644
index 0000000..6cbdad1
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/mempcpy.c
@@ -0,0 +1,40 @@
+#include <stddef.h>
+
+void *
+mempcpy (void *dst, const void *src, size_t n)
+{
+ const char *s = src;
+ char *d = dst;
+ void *ret = dst + n;
+ size_t offset_src = ((size_t) s) & (sizeof(size_t) - 1);
+ size_t offset_dst = ((size_t) d) & (sizeof(size_t) - 1);
+
+ if (offset_src != offset_dst)
+ {
+ while (n--)
+ *d++ = *s++;
+ }
+ else
+ {
+ if (offset_src) offset_src = sizeof(size_t) - offset_src;
+ while (n-- && offset_src--)
+ *d++ = *s++;
+ n++;
+ if (!n) return ret;
+ void **d1 = (void **)d;
+ void **s1 = (void **)s;
+ while (n >= sizeof(void *))
+ {
+ n -= sizeof(void *);
+ *d1++ = *s1++;
+ }
+ s = (char *)s1;
+ d = (char *)d1;
+ while (n--)
+ *d++ = *s++;
+ }
+ return ret;
+}
+
+weak_alias (mempcpy, __GI_mempcpy)
+weak_alias (mempcpy, __mempcpy)
diff --git a/sysdeps/i386/i686/multiarch/mempcpy_chk.c b/sysdeps/i386/i686/multiarch/mempcpy_chk.c
new file mode 100644
index 0000000..ba17078
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/mempcpy_chk.c
@@ -0,0 +1 @@
+#include <debug/mempcpy_chk.c>
diff --git a/sysdeps/i386/i686/multiarch/memrchr-sse2-bsf.S b/sysdeps/i386/i686/multiarch/memrchr-sse2-bsf.S
index c5c3e97..75c947c 100644
--- a/sysdeps/i386/i686/multiarch/memrchr-sse2-bsf.S
+++ b/sysdeps/i386/i686/multiarch/memrchr-sse2-bsf.S
@@ -45,6 +45,12 @@ ENTRY (MEMCHR)
movd STR2(%esp), %xmm1
mov LEN(%esp), %edx
+# ifdef __CHKP__
+ bndldx STR1(%esp,%ecx,1), %bnd0
+ bndcl (%ecx), %bnd0
+ bndcu -1(%ecx, %edx), %bnd0
+# endif
+
sub $16, %edx
jbe L(length_less16)
diff --git a/sysdeps/i386/i686/multiarch/memset-sse2-rep.S b/sysdeps/i386/i686/multiarch/memset-sse2-rep.S
index bcea296..ce112b1 100644
--- a/sysdeps/i386/i686/multiarch/memset-sse2-rep.S
+++ b/sysdeps/i386/i686/multiarch/memset-sse2-rep.S
@@ -90,6 +90,7 @@ ENTRY (__memset_sse2_rep)
ENTRANCE
movl LEN(%esp), %ecx
+
#ifdef USE_AS_BZERO
xor %eax, %eax
#else
@@ -101,6 +102,11 @@ ENTRY (__memset_sse2_rep)
or %edx, %eax
#endif
movl DEST(%esp), %edx
+#ifdef __CHKP__
+ bndldx DEST(%esp,%edx,1),%bnd0
+ bndcl (%edx), %bnd0
+ bndcu -1(%edx, %ecx), %bnd0
+#endif
cmp $32, %ecx
jae L(32bytesormore)
diff --git a/sysdeps/i386/i686/multiarch/mpx_memcpy_nobnd.S b/sysdeps/i386/i686/multiarch/mpx_memcpy_nobnd.S
new file mode 100644
index 0000000..b7f4e0e
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/mpx_memcpy_nobnd.S
@@ -0,0 +1,1803 @@
+/* memcpy with SSSE3 and REP string.
+ Copyright (C) 2010-2013 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+#include "asm-syntax.h"
+
+#ifndef MEMCPY
+# define MEMCPY mpx_memcpy_nobnd
+#endif
+
+#ifdef USE_AS_BCOPY
+# define SRC PARMS
+# define DEST SRC+4
+# define LEN DEST+4
+#else
+# define DEST PARMS
+# define SRC DEST+4
+# define LEN SRC+4
+#endif
+
+#define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+#define PUSH(REG) pushl REG; CFI_PUSH (REG)
+#define POP(REG) popl REG; CFI_POP (REG)
+
+#ifdef SHARED
+# define PARMS 8 /* Preserve EBX. */
+# define ENTRANCE PUSH (%ebx);
+# define RETURN_END POP (%ebx); ret
+# define RETURN RETURN_END; CFI_PUSH (%ebx)
+# define JMPTBL(I, B) I - B
+
+/* Load an entry in a jump table into EBX and branch to it. TABLE is a
+ jump table with relative offsets. INDEX is a register contains the
+ index into the jump table. SCALE is the scale of INDEX. */
+# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
+ /* We first load PC into EBX. */ \
+ SETUP_PIC_REG(bx); \
+ /* Get the address of the jump table. */ \
+ addl $(TABLE - .), %ebx; \
+ /* Get the entry and convert the relative offset to the \
+ absolute address. */ \
+ addl (%ebx,INDEX,SCALE), %ebx; \
+ /* We loaded the jump table. Go. */ \
+ jmp *%ebx
+
+# define BRANCH_TO_JMPTBL_ENTRY_VALUE(TABLE) \
+ addl $(TABLE - .), %ebx
+
+# define BRANCH_TO_JMPTBL_ENTRY_TAIL(TABLE, INDEX, SCALE) \
+ addl (%ebx,INDEX,SCALE), %ebx; \
+ /* We loaded the jump table. Go. */ \
+ jmp *%ebx
+#else
+# define PARMS 4
+# define ENTRANCE
+# define RETURN_END ret
+# define RETURN RETURN_END
+# define JMPTBL(I, B) I
+
+/* Branch to an entry in a jump table. TABLE is a jump table with
+ absolute offsets. INDEX is a register contains the index into the
+ jump table. SCALE is the scale of INDEX. */
+# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
+ jmp *TABLE(,INDEX,SCALE)
+
+# define BRANCH_TO_JMPTBL_ENTRY_VALUE(TABLE)
+
+# define BRANCH_TO_JMPTBL_ENTRY_TAIL(TABLE, INDEX, SCALE) \
+ jmp *TABLE(,INDEX,SCALE)
+#endif
+
+ .section .text.ssse3,"ax",@progbits
+ENTRY (MEMCPY)
+ ENTRANCE
+ movl LEN(%esp), %ecx
+ movl SRC(%esp), %eax
+ movl DEST(%esp), %edx
+
+#ifdef __CHKP__
+ bndldx SRC(%esp,%eax,1), %bnd1
+ bndldx DEST(%esp,%edx,1), %bnd0
+ bndcl (%eax), %bnd1
+ bndcu -1(%eax, %ecx), %bnd1
+ bndcl (%edx), %bnd0
+ bndcu -1(%edx, %ecx), %bnd0
+#endif
+
+#ifdef USE_AS_MEMMOVE
+ cmp %eax, %edx
+ jb L(copy_forward)
+ je L(fwd_write_0bytes)
+ cmp $48, %ecx
+ jb L(bk_write_less48bytes)
+ add %ecx, %eax
+ cmp %eax, %edx
+ movl SRC(%esp), %eax
+ jb L(copy_backward)
+
+L(copy_forward):
+#endif
+ cmp $48, %ecx
+ jae L(48bytesormore)
+
+L(fwd_write_less32bytes):
+#ifndef USE_AS_MEMMOVE
+ cmp %dl, %al
+ jb L(bk_write)
+#endif
+ add %ecx, %edx
+ add %ecx, %eax
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
+#ifndef USE_AS_MEMMOVE
+L(bk_write):
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4)
+#endif
+
+ ALIGN (4)
+/* ECX > 32 and EDX is 4 byte aligned. */
+L(48bytesormore):
+ movdqu (%eax), %xmm0
+ PUSH (%edi)
+ movl %edx, %edi
+ and $-16, %edx
+ PUSH (%esi)
+ cfi_remember_state
+ add $16, %edx
+ movl %edi, %esi
+ sub %edx, %edi
+ add %edi, %ecx
+ sub %edi, %eax
+
+#ifdef SHARED_CACHE_SIZE_HALF
+ cmp $SHARED_CACHE_SIZE_HALF, %ecx
+#else
+# ifdef SHARED
+ SETUP_PIC_REG(bx)
+ add $_GLOBAL_OFFSET_TABLE_, %ebx
+ cmp __x86_shared_cache_size_half@GOTOFF(%ebx), %ecx
+# else
+ cmp __x86_shared_cache_size_half, %ecx
+# endif
+#endif
+
+ mov %eax, %edi
+ jae L(large_page)
+ and $0xf, %edi
+ jz L(shl_0)
+
+ BRANCH_TO_JMPTBL_ENTRY (L(shl_table), %edi, 4)
+
+ ALIGN (4)
+L(shl_0):
+ movdqu %xmm0, (%esi)
+ xor %edi, %edi
+ cmp $127, %ecx
+ ja L(shl_0_gobble)
+ lea -32(%ecx), %ecx
+L(shl_0_loop):
+ movdqa (%eax, %edi), %xmm0
+ movdqa 16(%eax, %edi), %xmm1
+ sub $32, %ecx
+ movdqa %xmm0, (%edx, %edi)
+ movdqa %xmm1, 16(%edx, %edi)
+ lea 32(%edi), %edi
+ jb L(shl_0_end)
+
+ movdqa (%eax, %edi), %xmm0
+ movdqa 16(%eax, %edi), %xmm1
+ sub $32, %ecx
+ movdqa %xmm0, (%edx, %edi)
+ movdqa %xmm1, 16(%edx, %edi)
+ lea 32(%edi), %edi
+ jb L(shl_0_end)
+
+ movdqa (%eax, %edi), %xmm0
+ movdqa 16(%eax, %edi), %xmm1
+ sub $32, %ecx
+ movdqa %xmm0, (%edx, %edi)
+ movdqa %xmm1, 16(%edx, %edi)
+ lea 32(%edi), %edi
+ jb L(shl_0_end)
+
+ movdqa (%eax, %edi), %xmm0
+ movdqa 16(%eax, %edi), %xmm1
+ sub $32, %ecx
+ movdqa %xmm0, (%edx, %edi)
+ movdqa %xmm1, 16(%edx, %edi)
+ lea 32(%edi), %edi
+L(shl_0_end):
+ lea 32(%ecx), %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ add %edi, %eax
+ POP (%esi)
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+L(shl_0_gobble):
+
+#ifdef DATA_CACHE_SIZE_HALF
+ cmp $DATA_CACHE_SIZE_HALF, %ecx
+#else
+# ifdef SHARED
+ SETUP_PIC_REG(bx)
+ add $_GLOBAL_OFFSET_TABLE_, %ebx
+ mov __x86_data_cache_size_half@GOTOFF(%ebx), %edi
+# else
+ mov __x86_data_cache_size_half, %edi
+# endif
+#endif
+ mov %edi, %esi
+ shr $3, %esi
+ sub %esi, %edi
+ cmp %edi, %ecx
+ jae L(shl_0_gobble_mem_start)
+ sub $128, %ecx
+ ALIGN (4)
+L(shl_0_gobble_cache_loop):
+ movdqa (%eax), %xmm0
+ movaps 0x10(%eax), %xmm1
+ movaps 0x20(%eax), %xmm2
+ movaps 0x30(%eax), %xmm3
+ movaps 0x40(%eax), %xmm4
+ movaps 0x50(%eax), %xmm5
+ movaps 0x60(%eax), %xmm6
+ movaps 0x70(%eax), %xmm7
+ lea 0x80(%eax), %eax
+ sub $128, %ecx
+ movdqa %xmm0, (%edx)
+ movaps %xmm1, 0x10(%edx)
+ movaps %xmm2, 0x20(%edx)
+ movaps %xmm3, 0x30(%edx)
+ movaps %xmm4, 0x40(%edx)
+ movaps %xmm5, 0x50(%edx)
+ movaps %xmm6, 0x60(%edx)
+ movaps %xmm7, 0x70(%edx)
+ lea 0x80(%edx), %edx
+
+ jae L(shl_0_gobble_cache_loop)
+ add $0x80, %ecx
+ cmp $0x40, %ecx
+ jb L(shl_0_cache_less_64bytes)
+
+ movdqa (%eax), %xmm0
+ sub $0x40, %ecx
+ movdqa 0x10(%eax), %xmm1
+
+ movdqa %xmm0, (%edx)
+ movdqa %xmm1, 0x10(%edx)
+
+ movdqa 0x20(%eax), %xmm0
+ movdqa 0x30(%eax), %xmm1
+ add $0x40, %eax
+
+ movdqa %xmm0, 0x20(%edx)
+ movdqa %xmm1, 0x30(%edx)
+ add $0x40, %edx
+L(shl_0_cache_less_64bytes):
+ cmp $0x20, %ecx
+ jb L(shl_0_cache_less_32bytes)
+ movdqa (%eax), %xmm0
+ sub $0x20, %ecx
+ movdqa 0x10(%eax), %xmm1
+ add $0x20, %eax
+ movdqa %xmm0, (%edx)
+ movdqa %xmm1, 0x10(%edx)
+ add $0x20, %edx
+L(shl_0_cache_less_32bytes):
+ cmp $0x10, %ecx
+ jb L(shl_0_cache_less_16bytes)
+ sub $0x10, %ecx
+ movdqa (%eax), %xmm0
+ add $0x10, %eax
+ movdqa %xmm0, (%edx)
+ add $0x10, %edx
+L(shl_0_cache_less_16bytes):
+ add %ecx, %edx
+ add %ecx, %eax
+ POP (%esi)
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_0_gobble_mem_start):
+ cmp %al, %dl
+ je L(copy_page_by_rep)
+ sub $128, %ecx
+L(shl_0_gobble_mem_loop):
+ prefetchnta 0x1c0(%eax)
+ prefetchnta 0x280(%eax)
+ prefetchnta 0x1c0(%edx)
+ prefetchnta 0x280(%edx)
+
+ movdqa (%eax), %xmm0
+ movaps 0x10(%eax), %xmm1
+ movaps 0x20(%eax), %xmm2
+ movaps 0x30(%eax), %xmm3
+ movaps 0x40(%eax), %xmm4
+ movaps 0x50(%eax), %xmm5
+ movaps 0x60(%eax), %xmm6
+ movaps 0x70(%eax), %xmm7
+ lea 0x80(%eax), %eax
+ sub $0x80, %ecx
+ movdqa %xmm0, (%edx)
+ movaps %xmm1, 0x10(%edx)
+ movaps %xmm2, 0x20(%edx)
+ movaps %xmm3, 0x30(%edx)
+ movaps %xmm4, 0x40(%edx)
+ movaps %xmm5, 0x50(%edx)
+ movaps %xmm6, 0x60(%edx)
+ movaps %xmm7, 0x70(%edx)
+ lea 0x80(%edx), %edx
+
+ jae L(shl_0_gobble_mem_loop)
+ add $0x80, %ecx
+ cmp $0x40, %ecx
+ jb L(shl_0_mem_less_64bytes)
+
+ movdqa (%eax), %xmm0
+ sub $0x40, %ecx
+ movdqa 0x10(%eax), %xmm1
+
+ movdqa %xmm0, (%edx)
+ movdqa %xmm1, 0x10(%edx)
+
+ movdqa 0x20(%eax), %xmm0
+ movdqa 0x30(%eax), %xmm1
+ add $0x40, %eax
+
+ movdqa %xmm0, 0x20(%edx)
+ movdqa %xmm1, 0x30(%edx)
+ add $0x40, %edx
+L(shl_0_mem_less_64bytes):
+ cmp $0x20, %ecx
+ jb L(shl_0_mem_less_32bytes)
+ movdqa (%eax), %xmm0
+ sub $0x20, %ecx
+ movdqa 0x10(%eax), %xmm1
+ add $0x20, %eax
+ movdqa %xmm0, (%edx)
+ movdqa %xmm1, 0x10(%edx)
+ add $0x20, %edx
+L(shl_0_mem_less_32bytes):
+ cmp $0x10, %ecx
+ jb L(shl_0_mem_less_16bytes)
+ sub $0x10, %ecx
+ movdqa (%eax), %xmm0
+ add $0x10, %eax
+ movdqa %xmm0, (%edx)
+ add $0x10, %edx
+L(shl_0_mem_less_16bytes):
+ add %ecx, %edx
+ add %ecx, %eax
+ POP (%esi)
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_1):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $1, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_1_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $1, %xmm2, %xmm3
+ palignr $1, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_1_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $1, %xmm2, %xmm3
+ palignr $1, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_1_loop)
+
+L(shl_1_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 1(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_2):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $2, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_2_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $2, %xmm2, %xmm3
+ palignr $2, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_2_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $2, %xmm2, %xmm3
+ palignr $2, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_2_loop)
+
+L(shl_2_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 2(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_3):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $3, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_3_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $3, %xmm2, %xmm3
+ palignr $3, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_3_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $3, %xmm2, %xmm3
+ palignr $3, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_3_loop)
+
+L(shl_3_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 3(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_4):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $4, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_4_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $4, %xmm2, %xmm3
+ palignr $4, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_4_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $4, %xmm2, %xmm3
+ palignr $4, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_4_loop)
+
+L(shl_4_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 4(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_5):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $5, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_5_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $5, %xmm2, %xmm3
+ palignr $5, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_5_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $5, %xmm2, %xmm3
+ palignr $5, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_5_loop)
+
+L(shl_5_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 5(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_6):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $6, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_6_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $6, %xmm2, %xmm3
+ palignr $6, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_6_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $6, %xmm2, %xmm3
+ palignr $6, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_6_loop)
+
+L(shl_6_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 6(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_7):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $7, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_7_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $7, %xmm2, %xmm3
+ palignr $7, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_7_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $7, %xmm2, %xmm3
+ palignr $7, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_7_loop)
+
+L(shl_7_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 7(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_8):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $8, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_8_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $8, %xmm2, %xmm3
+ palignr $8, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_8_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $8, %xmm2, %xmm3
+ palignr $8, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_8_loop)
+
+L(shl_8_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 8(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_9):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $9, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_9_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $9, %xmm2, %xmm3
+ palignr $9, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_9_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $9, %xmm2, %xmm3
+ palignr $9, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_9_loop)
+
+L(shl_9_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 9(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_10):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $10, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_10_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $10, %xmm2, %xmm3
+ palignr $10, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_10_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $10, %xmm2, %xmm3
+ palignr $10, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_10_loop)
+
+L(shl_10_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 10(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_11):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $11, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_11_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $11, %xmm2, %xmm3
+ palignr $11, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_11_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $11, %xmm2, %xmm3
+ palignr $11, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_11_loop)
+
+L(shl_11_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 11(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_12):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $12, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_12_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $12, %xmm2, %xmm3
+ palignr $12, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_12_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $12, %xmm2, %xmm3
+ palignr $12, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_12_loop)
+
+L(shl_12_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 12(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_13):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $13, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_13_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $13, %xmm2, %xmm3
+ palignr $13, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_13_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $13, %xmm2, %xmm3
+ palignr $13, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_13_loop)
+
+L(shl_13_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 13(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_14):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $14, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_14_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $14, %xmm2, %xmm3
+ palignr $14, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_14_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $14, %xmm2, %xmm3
+ palignr $14, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_14_loop)
+
+L(shl_14_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 14(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_15):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $15, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_15_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $15, %xmm2, %xmm3
+ palignr $15, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_15_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $15, %xmm2, %xmm3
+ palignr $15, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_15_loop)
+
+L(shl_15_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 15(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+
+ ALIGN (4)
+L(fwd_write_44bytes):
+ movl -44(%eax), %ecx
+ movl %ecx, -44(%edx)
+L(fwd_write_40bytes):
+ movl -40(%eax), %ecx
+ movl %ecx, -40(%edx)
+L(fwd_write_36bytes):
+ movl -36(%eax), %ecx
+ movl %ecx, -36(%edx)
+L(fwd_write_32bytes):
+ movl -32(%eax), %ecx
+ movl %ecx, -32(%edx)
+L(fwd_write_28bytes):
+ movl -28(%eax), %ecx
+ movl %ecx, -28(%edx)
+L(fwd_write_24bytes):
+ movl -24(%eax), %ecx
+ movl %ecx, -24(%edx)
+L(fwd_write_20bytes):
+ movl -20(%eax), %ecx
+ movl %ecx, -20(%edx)
+L(fwd_write_16bytes):
+ movl -16(%eax), %ecx
+ movl %ecx, -16(%edx)
+L(fwd_write_12bytes):
+ movl -12(%eax), %ecx
+ movl %ecx, -12(%edx)
+L(fwd_write_8bytes):
+ movl -8(%eax), %ecx
+ movl %ecx, -8(%edx)
+L(fwd_write_4bytes):
+ movl -4(%eax), %ecx
+ movl %ecx, -4(%edx)
+L(fwd_write_0bytes):
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+#endif
+ RETURN
+
+ ALIGN (4)
+L(fwd_write_5bytes):
+ movl -5(%eax), %ecx
+ movl -4(%eax), %eax
+ movl %ecx, -5(%edx)
+ movl %eax, -4(%edx)
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+#endif
+ RETURN
+
+ ALIGN (4)
+L(fwd_write_45bytes):
+ movl -45(%eax), %ecx
+ movl %ecx, -45(%edx)
+L(fwd_write_41bytes):
+ movl -41(%eax), %ecx
+ movl %ecx, -41(%edx)
+L(fwd_write_37bytes):
+ movl -37(%eax), %ecx
+ movl %ecx, -37(%edx)
+L(fwd_write_33bytes):
+ movl -33(%eax), %ecx
+ movl %ecx, -33(%edx)
+L(fwd_write_29bytes):
+ movl -29(%eax), %ecx
+ movl %ecx, -29(%edx)
+L(fwd_write_25bytes):
+ movl -25(%eax), %ecx
+ movl %ecx, -25(%edx)
+L(fwd_write_21bytes):
+ movl -21(%eax), %ecx
+ movl %ecx, -21(%edx)
+L(fwd_write_17bytes):
+ movl -17(%eax), %ecx
+ movl %ecx, -17(%edx)
+L(fwd_write_13bytes):
+ movl -13(%eax), %ecx
+ movl %ecx, -13(%edx)
+L(fwd_write_9bytes):
+ movl -9(%eax), %ecx
+ movl %ecx, -9(%edx)
+ movl -5(%eax), %ecx
+ movl %ecx, -5(%edx)
+L(fwd_write_1bytes):
+ movzbl -1(%eax), %ecx
+ movb %cl, -1(%edx)
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+#endif
+ RETURN
+
+ ALIGN (4)
+L(fwd_write_46bytes):
+ movl -46(%eax), %ecx
+ movl %ecx, -46(%edx)
+L(fwd_write_42bytes):
+ movl -42(%eax), %ecx
+ movl %ecx, -42(%edx)
+L(fwd_write_38bytes):
+ movl -38(%eax), %ecx
+ movl %ecx, -38(%edx)
+L(fwd_write_34bytes):
+ movl -34(%eax), %ecx
+ movl %ecx, -34(%edx)
+L(fwd_write_30bytes):
+ movl -30(%eax), %ecx
+ movl %ecx, -30(%edx)
+L(fwd_write_26bytes):
+ movl -26(%eax), %ecx
+ movl %ecx, -26(%edx)
+L(fwd_write_22bytes):
+ movl -22(%eax), %ecx
+ movl %ecx, -22(%edx)
+L(fwd_write_18bytes):
+ movl -18(%eax), %ecx
+ movl %ecx, -18(%edx)
+L(fwd_write_14bytes):
+ movl -14(%eax), %ecx
+ movl %ecx, -14(%edx)
+L(fwd_write_10bytes):
+ movl -10(%eax), %ecx
+ movl %ecx, -10(%edx)
+L(fwd_write_6bytes):
+ movl -6(%eax), %ecx
+ movl %ecx, -6(%edx)
+L(fwd_write_2bytes):
+ movzwl -2(%eax), %ecx
+ movw %cx, -2(%edx)
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+#endif
+ RETURN
+
+ ALIGN (4)
+L(fwd_write_47bytes):
+ movl -47(%eax), %ecx
+ movl %ecx, -47(%edx)
+L(fwd_write_43bytes):
+ movl -43(%eax), %ecx
+ movl %ecx, -43(%edx)
+L(fwd_write_39bytes):
+ movl -39(%eax), %ecx
+ movl %ecx, -39(%edx)
+L(fwd_write_35bytes):
+ movl -35(%eax), %ecx
+ movl %ecx, -35(%edx)
+L(fwd_write_31bytes):
+ movl -31(%eax), %ecx
+ movl %ecx, -31(%edx)
+L(fwd_write_27bytes):
+ movl -27(%eax), %ecx
+ movl %ecx, -27(%edx)
+L(fwd_write_23bytes):
+ movl -23(%eax), %ecx
+ movl %ecx, -23(%edx)
+L(fwd_write_19bytes):
+ movl -19(%eax), %ecx
+ movl %ecx, -19(%edx)
+L(fwd_write_15bytes):
+ movl -15(%eax), %ecx
+ movl %ecx, -15(%edx)
+L(fwd_write_11bytes):
+ movl -11(%eax), %ecx
+ movl %ecx, -11(%edx)
+L(fwd_write_7bytes):
+ movl -7(%eax), %ecx
+ movl %ecx, -7(%edx)
+L(fwd_write_3bytes):
+ movzwl -3(%eax), %ecx
+ movzbl -1(%eax), %eax
+ movw %cx, -3(%edx)
+ movb %al, -1(%edx)
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+#endif
+ RETURN_END
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(large_page):
+ movdqu (%eax), %xmm1
+ movdqu %xmm0, (%esi)
+ movntdq %xmm1, (%edx)
+ add $0x10, %eax
+ add $0x10, %edx
+ sub $0x10, %ecx
+ cmp %al, %dl
+ je L(copy_page_by_rep)
+L(large_page_loop_init):
+ POP (%esi)
+ sub $0x80, %ecx
+ POP (%edi)
+L(large_page_loop):
+ prefetchnta 0x1c0(%eax)
+ prefetchnta 0x280(%eax)
+ movdqu (%eax), %xmm0
+ movdqu 0x10(%eax), %xmm1
+ movdqu 0x20(%eax), %xmm2
+ movdqu 0x30(%eax), %xmm3
+ movdqu 0x40(%eax), %xmm4
+ movdqu 0x50(%eax), %xmm5
+ movdqu 0x60(%eax), %xmm6
+ movdqu 0x70(%eax), %xmm7
+ lea 0x80(%eax), %eax
+ lfence
+ sub $0x80, %ecx
+ movntdq %xmm0, (%edx)
+ movntdq %xmm1, 0x10(%edx)
+ movntdq %xmm2, 0x20(%edx)
+ movntdq %xmm3, 0x30(%edx)
+ movntdq %xmm4, 0x40(%edx)
+ movntdq %xmm5, 0x50(%edx)
+ movntdq %xmm6, 0x60(%edx)
+ movntdq %xmm7, 0x70(%edx)
+ lea 0x80(%edx), %edx
+ jae L(large_page_loop)
+ add $0x80, %ecx
+ cmp $0x40, %ecx
+ jb L(large_page_less_64bytes)
+
+ movdqu (%eax), %xmm0
+ movdqu 0x10(%eax), %xmm1
+ movdqu 0x20(%eax), %xmm2
+ movdqu 0x30(%eax), %xmm3
+ lea 0x40(%eax), %eax
+
+ movntdq %xmm0, (%edx)
+ movntdq %xmm1, 0x10(%edx)
+ movntdq %xmm2, 0x20(%edx)
+ movntdq %xmm3, 0x30(%edx)
+ lea 0x40(%edx), %edx
+ sub $0x40, %ecx
+L(large_page_less_64bytes):
+ cmp $32, %ecx
+ jb L(large_page_less_32bytes)
+ movdqu (%eax), %xmm0
+ movdqu 0x10(%eax), %xmm1
+ lea 0x20(%eax), %eax
+ movntdq %xmm0, (%edx)
+ movntdq %xmm1, 0x10(%edx)
+ lea 0x20(%edx), %edx
+ sub $0x20, %ecx
+L(large_page_less_32bytes):
+ add %ecx, %edx
+ add %ecx, %eax
+ sfence
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(copy_page_by_rep):
+ mov %eax, %esi
+ mov %edx, %edi
+ mov %ecx, %edx
+ shr $2, %ecx
+ and $3, %edx
+ rep movsl
+ jz L(copy_page_by_rep_exit)
+ cmp $2, %edx
+ jb L(copy_page_by_rep_left_1)
+ movzwl (%esi), %eax
+ movw %ax, (%edi)
+ add $2, %esi
+ add $2, %edi
+ sub $2, %edx
+ jz L(copy_page_by_rep_exit)
+L(copy_page_by_rep_left_1):
+ movzbl (%esi), %eax
+ movb %al, (%edi)
+L(copy_page_by_rep_exit):
+ POP (%esi)
+ POP (%edi)
+#ifndef USE_AS_BCOPY
+ movl DEST(%esp), %eax
+# ifdef USE_AS_MEMPCPY
+ movl LEN(%esp), %ecx
+ add %ecx, %eax
+# endif
+#endif
+ RETURN
+
+ ALIGN (4)
+L(bk_write_44bytes):
+ movl 40(%eax), %ecx
+ movl %ecx, 40(%edx)
+L(bk_write_40bytes):
+ movl 36(%eax), %ecx
+ movl %ecx, 36(%edx)
+L(bk_write_36bytes):
+ movl 32(%eax), %ecx
+ movl %ecx, 32(%edx)
+L(bk_write_32bytes):
+ movl 28(%eax), %ecx
+ movl %ecx, 28(%edx)
+L(bk_write_28bytes):
+ movl 24(%eax), %ecx
+ movl %ecx, 24(%edx)
+L(bk_write_24bytes):
+ movl 20(%eax), %ecx
+ movl %ecx, 20(%edx)
+L(bk_write_20bytes):
+ movl 16(%eax), %ecx
+ movl %ecx, 16(%edx)
+L(bk_write_16bytes):
+ movl 12(%eax), %ecx
+ movl %ecx, 12(%edx)
+L(bk_write_12bytes):
+ movl 8(%eax), %ecx
+ movl %ecx, 8(%edx)
+L(bk_write_8bytes):
+ movl 4(%eax), %ecx
+ movl %ecx, 4(%edx)
+L(bk_write_4bytes):
+ movl (%eax), %ecx
+ movl %ecx, (%edx)
+L(bk_write_0bytes):
+#ifndef USE_AS_BCOPY
+ movl DEST(%esp), %eax
+# ifdef USE_AS_MEMPCPY
+ movl LEN(%esp), %ecx
+ add %ecx, %eax
+# endif
+#endif
+ RETURN
+
+ ALIGN (4)
+L(bk_write_45bytes):
+ movl 41(%eax), %ecx
+ movl %ecx, 41(%edx)
+L(bk_write_41bytes):
+ movl 37(%eax), %ecx
+ movl %ecx, 37(%edx)
+L(bk_write_37bytes):
+ movl 33(%eax), %ecx
+ movl %ecx, 33(%edx)
+L(bk_write_33bytes):
+ movl 29(%eax), %ecx
+ movl %ecx, 29(%edx)
+L(bk_write_29bytes):
+ movl 25(%eax), %ecx
+ movl %ecx, 25(%edx)
+L(bk_write_25bytes):
+ movl 21(%eax), %ecx
+ movl %ecx, 21(%edx)
+L(bk_write_21bytes):
+ movl 17(%eax), %ecx
+ movl %ecx, 17(%edx)
+L(bk_write_17bytes):
+ movl 13(%eax), %ecx
+ movl %ecx, 13(%edx)
+L(bk_write_13bytes):
+ movl 9(%eax), %ecx
+ movl %ecx, 9(%edx)
+L(bk_write_9bytes):
+ movl 5(%eax), %ecx
+ movl %ecx, 5(%edx)
+L(bk_write_5bytes):
+ movl 1(%eax), %ecx
+ movl %ecx, 1(%edx)
+L(bk_write_1bytes):
+ movzbl (%eax), %ecx
+ movb %cl, (%edx)
+#ifndef USE_AS_BCOPY
+ movl DEST(%esp), %eax
+# ifdef USE_AS_MEMPCPY
+ movl LEN(%esp), %ecx
+ add %ecx, %eax
+# endif
+#endif
+ RETURN
+
+ ALIGN (4)
+L(bk_write_46bytes):
+ movl 42(%eax), %ecx
+ movl %ecx, 42(%edx)
+L(bk_write_42bytes):
+ movl 38(%eax), %ecx
+ movl %ecx, 38(%edx)
+L(bk_write_38bytes):
+ movl 34(%eax), %ecx
+ movl %ecx, 34(%edx)
+L(bk_write_34bytes):
+ movl 30(%eax), %ecx
+ movl %ecx, 30(%edx)
+L(bk_write_30bytes):
+ movl 26(%eax), %ecx
+ movl %ecx, 26(%edx)
+L(bk_write_26bytes):
+ movl 22(%eax), %ecx
+ movl %ecx, 22(%edx)
+L(bk_write_22bytes):
+ movl 18(%eax), %ecx
+ movl %ecx, 18(%edx)
+L(bk_write_18bytes):
+ movl 14(%eax), %ecx
+ movl %ecx, 14(%edx)
+L(bk_write_14bytes):
+ movl 10(%eax), %ecx
+ movl %ecx, 10(%edx)
+L(bk_write_10bytes):
+ movl 6(%eax), %ecx
+ movl %ecx, 6(%edx)
+L(bk_write_6bytes):
+ movl 2(%eax), %ecx
+ movl %ecx, 2(%edx)
+L(bk_write_2bytes):
+ movzwl (%eax), %ecx
+ movw %cx, (%edx)
+#ifndef USE_AS_BCOPY
+ movl DEST(%esp), %eax
+# ifdef USE_AS_MEMPCPY
+ movl LEN(%esp), %ecx
+ add %ecx, %eax
+# endif
+#endif
+ RETURN
+
+ ALIGN (4)
+L(bk_write_47bytes):
+ movl 43(%eax), %ecx
+ movl %ecx, 43(%edx)
+L(bk_write_43bytes):
+ movl 39(%eax), %ecx
+ movl %ecx, 39(%edx)
+L(bk_write_39bytes):
+ movl 35(%eax), %ecx
+ movl %ecx, 35(%edx)
+L(bk_write_35bytes):
+ movl 31(%eax), %ecx
+ movl %ecx, 31(%edx)
+L(bk_write_31bytes):
+ movl 27(%eax), %ecx
+ movl %ecx, 27(%edx)
+L(bk_write_27bytes):
+ movl 23(%eax), %ecx
+ movl %ecx, 23(%edx)
+L(bk_write_23bytes):
+ movl 19(%eax), %ecx
+ movl %ecx, 19(%edx)
+L(bk_write_19bytes):
+ movl 15(%eax), %ecx
+ movl %ecx, 15(%edx)
+L(bk_write_15bytes):
+ movl 11(%eax), %ecx
+ movl %ecx, 11(%edx)
+L(bk_write_11bytes):
+ movl 7(%eax), %ecx
+ movl %ecx, 7(%edx)
+L(bk_write_7bytes):
+ movl 3(%eax), %ecx
+ movl %ecx, 3(%edx)
+L(bk_write_3bytes):
+ movzwl 1(%eax), %ecx
+ movw %cx, 1(%edx)
+ movzbl (%eax), %eax
+ movb %al, (%edx)
+#ifndef USE_AS_BCOPY
+ movl DEST(%esp), %eax
+# ifdef USE_AS_MEMPCPY
+ movl LEN(%esp), %ecx
+ add %ecx, %eax
+# endif
+#endif
+ RETURN_END
+
+
+ .pushsection .rodata.ssse3,"a",@progbits
+ ALIGN (2)
+L(table_48bytes_fwd):
+ .int JMPTBL (L(fwd_write_0bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_1bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_2bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_3bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_4bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_5bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_6bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_7bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_8bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_9bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_10bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_11bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_12bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_13bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_14bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_15bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_16bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_17bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_18bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_19bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_20bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_21bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_22bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_23bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_24bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_25bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_26bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_27bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_28bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_29bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_30bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_31bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_32bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_33bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_34bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_35bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_36bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_37bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_38bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_39bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_40bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_41bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_42bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_43bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_44bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_45bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_46bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_47bytes), L(table_48bytes_fwd))
+
+ ALIGN (2)
+L(shl_table):
+ .int JMPTBL (L(shl_0), L(shl_table))
+ .int JMPTBL (L(shl_1), L(shl_table))
+ .int JMPTBL (L(shl_2), L(shl_table))
+ .int JMPTBL (L(shl_3), L(shl_table))
+ .int JMPTBL (L(shl_4), L(shl_table))
+ .int JMPTBL (L(shl_5), L(shl_table))
+ .int JMPTBL (L(shl_6), L(shl_table))
+ .int JMPTBL (L(shl_7), L(shl_table))
+ .int JMPTBL (L(shl_8), L(shl_table))
+ .int JMPTBL (L(shl_9), L(shl_table))
+ .int JMPTBL (L(shl_10), L(shl_table))
+ .int JMPTBL (L(shl_11), L(shl_table))
+ .int JMPTBL (L(shl_12), L(shl_table))
+ .int JMPTBL (L(shl_13), L(shl_table))
+ .int JMPTBL (L(shl_14), L(shl_table))
+ .int JMPTBL (L(shl_15), L(shl_table))
+
+ ALIGN (2)
+L(table_48_bytes_bwd):
+ .int JMPTBL (L(bk_write_0bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_1bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_2bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_3bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_4bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_5bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_6bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_7bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_8bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_9bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_10bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_11bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_12bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_13bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_14bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_15bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_16bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_17bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_18bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_19bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_20bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_21bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_22bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_23bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_24bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_25bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_26bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_27bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_28bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_29bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_30bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_31bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_32bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_33bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_34bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_35bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_36bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_37bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_38bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_39bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_40bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_41bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_42bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_43bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_44bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_45bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_46bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_47bytes), L(table_48_bytes_bwd))
+
+ .popsection
+
+#ifdef USE_AS_MEMMOVE
+ ALIGN (4)
+L(copy_backward):
+ PUSH (%esi)
+ movl %eax, %esi
+ add %ecx, %edx
+ add %ecx, %esi
+ testl $0x3, %edx
+ jnz L(bk_align)
+
+L(bk_aligned_4):
+ cmp $64, %ecx
+ jae L(bk_write_more64bytes)
+
+L(bk_write_64bytesless):
+ cmp $32, %ecx
+ jb L(bk_write_less32bytes)
+
+L(bk_write_more32bytes):
+ /* Copy 32 bytes at a time. */
+ sub $32, %ecx
+ movl -4(%esi), %eax
+ movl %eax, -4(%edx)
+ movl -8(%esi), %eax
+ movl %eax, -8(%edx)
+ movl -12(%esi), %eax
+ movl %eax, -12(%edx)
+ movl -16(%esi), %eax
+ movl %eax, -16(%edx)
+ movl -20(%esi), %eax
+ movl %eax, -20(%edx)
+ movl -24(%esi), %eax
+ movl %eax, -24(%edx)
+ movl -28(%esi), %eax
+ movl %eax, -28(%edx)
+ movl -32(%esi), %eax
+ movl %eax, -32(%edx)
+ sub $32, %edx
+ sub $32, %esi
+
+L(bk_write_less32bytes):
+ movl %esi, %eax
+ sub %ecx, %edx
+ sub %ecx, %eax
+ POP (%esi)
+L(bk_write_less48bytes):
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4)
+
+ CFI_PUSH (%esi)
+ ALIGN (4)
+L(bk_align):
+ cmp $8, %ecx
+ jbe L(bk_write_less32bytes)
+ testl $1, %edx
+ /* We get here only if (EDX & 3 ) != 0 so if (EDX & 1) ==0,
+ then (EDX & 2) must be != 0. */
+ jz L(bk_got2)
+ sub $1, %esi
+ sub $1, %ecx
+ sub $1, %edx
+ movzbl (%esi), %eax
+ movb %al, (%edx)
+
+ testl $2, %edx
+ jz L(bk_aligned_4)
+
+L(bk_got2):
+ sub $2, %esi
+ sub $2, %ecx
+ sub $2, %edx
+ movzwl (%esi), %eax
+ movw %ax, (%edx)
+ jmp L(bk_aligned_4)
+
+ ALIGN (4)
+L(bk_write_more64bytes):
+ /* Check alignment of last byte. */
+ testl $15, %edx
+ jz L(bk_ssse3_cpy_pre)
+
+/* EDX is aligned 4 bytes, but not 16 bytes. */
+L(bk_ssse3_align):
+ sub $4, %esi
+ sub $4, %ecx
+ sub $4, %edx
+ movl (%esi), %eax
+ movl %eax, (%edx)
+
+ testl $15, %edx
+ jz L(bk_ssse3_cpy_pre)
+
+ sub $4, %esi
+ sub $4, %ecx
+ sub $4, %edx
+ movl (%esi), %eax
+ movl %eax, (%edx)
+
+ testl $15, %edx
+ jz L(bk_ssse3_cpy_pre)
+
+ sub $4, %esi
+ sub $4, %ecx
+ sub $4, %edx
+ movl (%esi), %eax
+ movl %eax, (%edx)
+
+L(bk_ssse3_cpy_pre):
+ cmp $64, %ecx
+ jb L(bk_write_more32bytes)
+
+L(bk_ssse3_cpy):
+ sub $64, %esi
+ sub $64, %ecx
+ sub $64, %edx
+ movdqu 0x30(%esi), %xmm3
+ movdqa %xmm3, 0x30(%edx)
+ movdqu 0x20(%esi), %xmm2
+ movdqa %xmm2, 0x20(%edx)
+ movdqu 0x10(%esi), %xmm1
+ movdqa %xmm1, 0x10(%edx)
+ movdqu (%esi), %xmm0
+ movdqa %xmm0, (%edx)
+ cmp $64, %ecx
+ jae L(bk_ssse3_cpy)
+ jmp L(bk_write_64bytesless)
+
+#endif
+
+END (MEMCPY)
diff --git a/sysdeps/i386/i686/multiarch/mpx_memmove_nobnd.S b/sysdeps/i386/i686/multiarch/mpx_memmove_nobnd.S
new file mode 100644
index 0000000..caaa89a
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/mpx_memmove_nobnd.S
@@ -0,0 +1,3 @@
+#define USE_AS_MEMMOVE
+#define MEMCPY mpx_memmove_nobnd
+#include "mpx_memcpy_nobnd.S"
diff --git a/sysdeps/i386/i686/multiarch/mpx_mempcpy_nobnd.S b/sysdeps/i386/i686/multiarch/mpx_mempcpy_nobnd.S
new file mode 100644
index 0000000..4b0af49
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/mpx_mempcpy_nobnd.S
@@ -0,0 +1,3 @@
+#define USE_AS_MEMPCPY
+#define MEMCPY mpx_mempcpy_nobnd
+#include "mpx_memcpy_nobnd.S"
diff --git a/sysdeps/i386/i686/multiarch/strcat-sse2.S b/sysdeps/i386/i686/multiarch/strcat-sse2.S
index 62d60cd..b1d39ae 100644
--- a/sysdeps/i386/i686/multiarch/strcat-sse2.S
+++ b/sysdeps/i386/i686/multiarch/strcat-sse2.S
@@ -95,10 +95,20 @@ ENTRY (STRCAT)
test %ebx, %ebx
jz L(ExitZero)
# endif
+# ifdef __CHKP__
+ bndldx STR1(%esp,%eax,1), %bnd0
+ bndldx STR2(%esp,%esi,1), %bnd1
+ bndcl (%esi), %bnd1
+ bndcu (%esi), %bnd1
+# endif
cmpb $0, (%esi)
mov %esi, %ecx
mov %eax, %edx
jz L(ExitZero)
+# ifdef __CHKP__
+ bndcl (%eax), %bnd0
+ bndcu (%eax), %bnd0
+# endif
and $63, %ecx
and $63, %edx
@@ -113,6 +123,9 @@ ENTRY (STRCAT)
movdqu (%eax), %xmm1
movdqu (%esi), %xmm5
pcmpeqb %xmm1, %xmm0
+# ifdef __CHKP__
+ bndcu 16(%esi), %bnd1
+# endif
movdqu 16(%esi), %xmm6
pmovmskb %xmm0, %ecx
pcmpeqb %xmm5, %xmm4
@@ -132,6 +145,9 @@ L(alignment_prolog):
and $-16, %eax
pcmpeqb (%eax), %xmm0
movdqu (%esi), %xmm5
+# ifdef __CHKP__
+ bndcu 16(%esi), %bnd1
+# endif
movdqu 16(%esi), %xmm6
pmovmskb %xmm0, %edx
pcmpeqb %xmm5, %xmm4
@@ -148,21 +164,33 @@ L(loop_prolog):
pxor %xmm3, %xmm3
.p2align 4
L(align16_loop):
+# ifdef __CHKP__
+ bndcu 16(%eax), %bnd0
+# endif
pcmpeqb 16(%eax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(exit16)
+# ifdef __CHKP__
+ bndcu 32(%eax), %bnd0
+# endif
pcmpeqb 32(%eax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
jnz L(exit32)
+# ifdef __CHKP__
+ bndcu 48(%eax), %bnd0
+# endif
pcmpeqb 48(%eax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
jnz L(exit48)
+# ifdef __CHKP__
+ bndcu 64(%eax), %bnd0
+# endif
pcmpeqb 64(%eax), %xmm3
pmovmskb %xmm3, %edx
lea 64(%eax), %eax
@@ -212,6 +240,9 @@ L(StartStrcpyPart):
test %edx, %edx
jnz L(CopyFrom1To16BytesTail1)
+# ifdef __CHKP__
+ bndcu 15(%eax), %bnd0
+# endif
movdqu %xmm5, (%eax)
pmovmskb %xmm7, %edx
# ifdef USE_AS_STRNCAT
@@ -250,21 +281,33 @@ L(StrlenCore7_1):
.p2align 4
L(align16_loop_1):
+# ifdef __CHKP__
+ bndcu 16(%eax), %bnd0
+# endif
pcmpeqb 16(%eax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(exit16_1)
+# ifdef __CHKP__
+ bndcu 32(%eax), %bnd0
+# endif
pcmpeqb 32(%eax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
jnz L(exit32_1)
+# ifdef __CHKP__
+ bndcu 48(%eax), %bnd0
+# endif
pcmpeqb 48(%eax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
jnz L(exit48_1)
+# ifdef __CHKP__
+ bndcu 64(%eax), %bnd0
+# endif
pcmpeqb 64(%eax), %xmm3
pmovmskb %xmm3, %edx
lea 64(%eax), %eax
@@ -323,6 +366,9 @@ L(StartStrcpyPart_1):
test %edx, %edx
jnz L(CopyFrom1To16BytesTail)
+# ifdef __CHKP__
+ bndcu 16(%esi), %bnd1
+# endif
pcmpeqb 16(%esi), %xmm0
pmovmskb %xmm0, %edx
# ifdef USE_AS_STRNCAT
@@ -341,6 +387,9 @@ L(Unalign16Both):
mov $16, %ecx
movdqa (%esi, %ecx), %xmm1
movaps 16(%esi, %ecx), %xmm2
+# ifdef __CHKP__
+ bndcu 15(%eax, %ecx), %bnd0
+# endif
movdqu %xmm1, (%eax, %ecx)
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %edx
@@ -352,6 +401,10 @@ L(Unalign16Both):
test %edx, %edx
jnz L(CopyFrom1To16Bytes)
L(Unalign16BothBigN):
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%eax, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm3
movdqu %xmm2, (%eax, %ecx)
pcmpeqb %xmm3, %xmm0
@@ -364,6 +417,10 @@ L(Unalign16BothBigN):
test %edx, %edx
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%eax, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm4
movdqu %xmm3, (%eax, %ecx)
pcmpeqb %xmm4, %xmm0
@@ -376,6 +433,10 @@ L(Unalign16BothBigN):
test %edx, %edx
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%eax, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm1
movdqu %xmm4, (%eax, %ecx)
pcmpeqb %xmm1, %xmm0
@@ -388,6 +449,10 @@ L(Unalign16BothBigN):
test %edx, %edx
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%eax, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm2
movdqu %xmm1, (%eax, %ecx)
pcmpeqb %xmm2, %xmm0
@@ -400,6 +465,10 @@ L(Unalign16BothBigN):
test %edx, %edx
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%eax, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm3
movdqu %xmm2, (%eax, %ecx)
pcmpeqb %xmm3, %xmm0
@@ -412,6 +481,9 @@ L(Unalign16BothBigN):
test %edx, %edx
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 15(%eax, %ecx), %bnd0
+# endif
movdqu %xmm3, (%eax, %ecx)
mov %esi, %edx
lea 16(%esi, %ecx), %esi
@@ -421,6 +493,9 @@ L(Unalign16BothBigN):
# ifdef USE_AS_STRNCAT
lea 128(%ebx, %edx), %ebx
# endif
+# ifdef __CHKP__
+ bndcu (%esi), %bnd1
+# endif
movaps (%esi), %xmm2
movaps %xmm2, %xmm4
movaps 16(%esi), %xmm5
@@ -443,6 +518,10 @@ L(Unalign16BothBigN):
L(Unaligned64Loop_start):
add $64, %eax
add $64, %esi
+# ifdef __CHKP__
+ bndcu (%esi), %bnd1
+ bndcu -1(%eax), %bnd0
+# endif
movdqu %xmm4, -64(%eax)
movaps (%esi), %xmm2
movdqa %xmm2, %xmm4
@@ -485,11 +564,18 @@ L(Unaligned64Leave):
jnz L(CopyFrom1To16BytesUnaligned_32)
bsf %ecx, %edx
+# ifdef __CHKP__
+ bndcu 47(%eax), %bnd0
+# endif
movdqu %xmm4, (%eax)
movdqu %xmm5, 16(%eax)
movdqu %xmm6, 32(%eax)
add $48, %esi
add $48, %eax
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
# ifdef USE_AS_STRNCAT
@@ -501,12 +587,18 @@ L(BigN):
test %edx, %edx
jnz L(CopyFrom1To16BytesTail)
+# ifdef __CHKP__
+ bndcu 16(%esi), %bnd1
+# endif
pcmpeqb 16(%esi), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(CopyFrom1To32Bytes)
movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */
+# ifdef __CHKP__
+ bndcu 15(%eax), %bnd0
+# endif
movdqu %xmm1, (%eax)
sub %ecx, %eax
sub $48, %ebx
@@ -515,6 +607,9 @@ L(BigN):
mov $16, %ecx
movdqa (%esi, %ecx), %xmm1
movaps 16(%esi, %ecx), %xmm2
+# ifdef __CHKP__
+ bndcu 15(%eax, %ecx), %bnd0
+# endif
movdqu %xmm1, (%eax, %ecx)
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %edx
@@ -532,12 +627,20 @@ L(CopyFrom1To16Bytes):
add %ecx, %eax
add %ecx, %esi
bsf %edx, %edx
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
.p2align 4
L(CopyFrom1To16BytesTail):
add %ecx, %esi
bsf %edx, %edx
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
.p2align 4
@@ -546,6 +649,10 @@ L(CopyFrom1To32Bytes1):
add $16, %eax
L(CopyFrom1To16BytesTail1):
bsf %edx, %edx
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
.p2align 4
@@ -554,34 +661,60 @@ L(CopyFrom1To32Bytes):
add %ecx, %esi
add $16, %edx
sub %ecx, %edx
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
.p2align 4
L(CopyFrom1To16BytesUnaligned_0):
bsf %edx, %edx
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
.p2align 4
L(CopyFrom1To16BytesUnaligned_16):
bsf %ecx, %edx
+# ifdef __CHKP__
+ bndcu 15(%eax), %bnd0
+# endif
movdqu %xmm4, (%eax)
add $16, %esi
add $16, %eax
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
.p2align 4
L(CopyFrom1To16BytesUnaligned_32):
bsf %edx, %edx
+# ifdef __CHKP__
+ bndcu 31(%eax), %bnd0
+# endif
movdqu %xmm4, (%eax)
movdqu %xmm5, 16(%eax)
add $32, %esi
add $32, %eax
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
# ifdef USE_AS_STRNCAT
.p2align 4
L(CopyFrom1To16BytesExit):
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
/* Case2 */
@@ -594,6 +727,10 @@ L(CopyFrom1To16BytesCase2):
bsf %edx, %edx
cmp %ebx, %edx
jb L(CopyFrom1To16BytesExit)
+# ifdef __CHKP__
+ bndcu 1(%eax, %ebx), %bnd0
+ bndcu 1(%esi, %ebx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
.p2align 4
@@ -605,6 +742,10 @@ L(CopyFrom1To32BytesCase2):
sub %ecx, %edx
cmp %ebx, %edx
jb L(CopyFrom1To16BytesExit)
+# ifdef __CHKP__
+ bndcu 1(%eax, %ebx), %bnd0
+ bndcu 1(%esi, %ebx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
L(CopyFrom1To16BytesTailCase2):
@@ -613,12 +754,20 @@ L(CopyFrom1To16BytesTailCase2):
bsf %edx, %edx
cmp %ebx, %edx
jb L(CopyFrom1To16BytesExit)
+# ifdef __CHKP__
+ bndcu 1(%eax, %ebx), %bnd0
+ bndcu 1(%esi, %ebx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
L(CopyFrom1To16BytesTail1Case2):
bsf %edx, %edx
cmp %ebx, %edx
jb L(CopyFrom1To16BytesExit)
+# ifdef __CHKP__
+ bndcu 1(%eax, %ebx), %bnd0
+ bndcu 1(%esi, %ebx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
/* Case2 or Case3, Case3 */
@@ -631,6 +780,10 @@ L(CopyFrom1To16BytesCase3):
add $16, %ebx
add %ecx, %eax
add %ecx, %esi
+# ifdef __CHKP__
+ bndcu 1(%eax, %ebx), %bnd0
+ bndcu 1(%esi, %ebx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
.p2align 4
@@ -639,6 +792,10 @@ L(CopyFrom1To32BytesCase2OrCase3):
jnz L(CopyFrom1To32BytesCase2)
sub %ecx, %ebx
add %ecx, %esi
+# ifdef __CHKP__
+ bndcu 1(%eax, %ebx), %bnd0
+ bndcu 1(%esi, %ebx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
.p2align 4
@@ -647,6 +804,10 @@ L(CopyFrom1To16BytesTailCase2OrCase3):
jnz L(CopyFrom1To16BytesTailCase2)
sub %ecx, %ebx
add %ecx, %esi
+# ifdef __CHKP__
+ bndcu 1(%eax, %ebx), %bnd0
+ bndcu 1(%esi, %ebx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
.p2align 4
@@ -657,6 +818,10 @@ L(CopyFrom1To32Bytes1Case2OrCase3):
L(CopyFrom1To16BytesTail1Case2OrCase3):
test %edx, %edx
jnz L(CopyFrom1To16BytesTail1Case2)
+# ifdef __CHKP__
+ bndcu 1(%eax, %ebx), %bnd0
+ bndcu 1(%esi, %ebx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
# endif
@@ -1110,15 +1275,27 @@ L(Unaligned64LeaveCase3):
and $-16, %ecx
add $48, %ebx
jl L(CopyFrom1To16BytesCase3)
+# ifdef __CHKP__
+ bndcu 15(%eax), %bnd0
+# endif
movdqu %xmm4, (%eax)
sub $16, %ebx
jb L(CopyFrom1To16BytesCase3)
+# ifdef __CHKP__
+ bndcu 31(%eax), %bnd0
+# endif
movdqu %xmm5, 16(%eax)
sub $16, %ebx
jb L(CopyFrom1To16BytesCase3)
+# ifdef __CHKP__
+ bndcu 47(%eax), %bnd0
+# endif
movdqu %xmm6, 32(%eax)
sub $16, %ebx
jb L(CopyFrom1To16BytesCase3)
+# ifdef __CHKP__
+ bndcu 63(%eax), %bnd0
+# endif
movdqu %xmm7, 48(%eax)
xor %bh, %bh
movb %bh, 64(%eax)
@@ -1137,6 +1314,9 @@ L(Unaligned64LeaveCase2):
pcmpeqb %xmm5, %xmm0
pmovmskb %xmm0, %edx
+# ifdef __CHKP__
+ bndcu 15(%eax), %bnd0
+# endif
movdqu %xmm4, (%eax)
add $16, %ecx
sub $16, %ebx
@@ -1146,6 +1326,9 @@ L(Unaligned64LeaveCase2):
pcmpeqb %xmm6, %xmm0
pmovmskb %xmm0, %edx
+# ifdef __CHKP__
+ bndcu 31(%eax), %bnd0
+# endif
movdqu %xmm5, 16(%eax)
add $16, %ecx
sub $16, %ebx
@@ -1155,6 +1338,9 @@ L(Unaligned64LeaveCase2):
pcmpeqb %xmm7, %xmm0
pmovmskb %xmm0, %edx
+# ifdef __CHKP__
+ bndcu 47(%eax), %bnd0
+# endif
movdqu %xmm6, 32(%eax)
lea 16(%eax, %ecx), %eax
lea 16(%esi, %ecx), %esi
diff --git a/sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S b/sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S
index 938d74d..1e59581 100644
--- a/sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S
+++ b/sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S
@@ -46,6 +46,12 @@ ENTRY (__strchr_sse2_bsf)
mov STR1(%esp), %ecx
movd STR2(%esp), %xmm1
+# ifdef __CHKP__
+ bndldx STR1(%esp,%ecx,1), %bnd0
+ bndcl (%ecx), %bnd0
+ bndcu (%ecx), %bnd0
+# endif
+
pxor %xmm2, %xmm2
mov %ecx, %edi
punpcklbw %xmm1, %xmm1
@@ -81,6 +87,9 @@ ENTRY (__strchr_sse2_bsf)
L(unaligned_match):
add %edi, %eax
add %ecx, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
RETURN
.p2align 4
@@ -94,6 +103,9 @@ L(unaligned_no_match):
.p2align 4
/* Loop start on aligned string. */
L(loop):
+# ifdef __CHKP__
+ bndcu (%edi), %bnd0
+# endif
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
add $16, %edi
@@ -103,6 +115,9 @@ L(loop):
or %eax, %edx
jnz L(matches)
+# ifdef __CHKP__
+ bndcu (%edi), %bnd0
+# endif
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
add $16, %edi
@@ -112,6 +127,9 @@ L(loop):
or %eax, %edx
jnz L(matches)
+# ifdef __CHKP__
+ bndcu (%edi), %bnd0
+# endif
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
add $16, %edi
@@ -121,6 +139,9 @@ L(loop):
or %eax, %edx
jnz L(matches)
+# ifdef __CHKP__
+ bndcu (%edi), %bnd0
+# endif
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
add $16, %edi
@@ -146,6 +167,9 @@ L(matches):
L(match):
sub $16, %edi
add %edi, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
RETURN
/* Return NULL. */
diff --git a/sysdeps/i386/i686/multiarch/strcmp-sse4.S b/sysdeps/i386/i686/multiarch/strcmp-sse4.S
index 355ed4e..1958b36 100644
--- a/sysdeps/i386/i686/multiarch/strcmp-sse4.S
+++ b/sysdeps/i386/i686/multiarch/strcmp-sse4.S
@@ -222,6 +222,12 @@ L(ascii):
test REM, REM
je L(eq)
#endif
+#ifdef __CHKP__
+ bndldx STR1(%esp,%edx,1), %bnd0
+ bndldx STR2(%esp,%eax,1), %bnd1
+ bndcl (%edx), %bnd0
+ bndcl (%eax), %bnd1
+#endif
mov %dx, %cx
and $0xfff, %cx
cmp $0xff0, %cx
@@ -280,6 +286,10 @@ L(ascii):
add $16, %edx
add $16, %eax
L(first4bytes):
+#ifdef __CHKP__
+ bndcu (%edx), %bnd0
+ bndcu (%eax), %bnd1
+#endif
movzbl (%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl (%edx), %edi
@@ -303,6 +313,10 @@ L(first4bytes):
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 1(%edx), %bnd0
+ bndcu 1(%eax), %bnd1
+#endif
movzbl 1(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 1(%edx), %edi
@@ -325,6 +339,10 @@ L(first4bytes):
cmp $2, REM
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 2(%edx), %bnd0
+ bndcu 2(%eax), %bnd1
+#endif
movzbl 2(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 2(%edx), %edi
@@ -347,6 +365,10 @@ L(first4bytes):
cmp $3, REM
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 3(%edx), %bnd0
+ bndcu 3(%eax), %bnd1
+#endif
movzbl 3(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 3(%edx), %edi
@@ -369,6 +391,10 @@ L(first4bytes):
cmp $4, REM
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 4(%edx), %bnd0
+ bndcu 4(%eax), %bnd1
+#endif
movzbl 4(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 4(%edx), %edi
@@ -391,6 +417,10 @@ L(first4bytes):
cmp $5, REM
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 5(%edx), %bnd0
+ bndcu 5(%eax), %bnd1
+#endif
movzbl 5(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 5(%edx), %edi
@@ -413,6 +443,10 @@ L(first4bytes):
cmp $6, REM
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 6(%edx), %bnd0
+ bndcu 6(%eax), %bnd1
+#endif
movzbl 6(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 6(%edx), %edi
@@ -435,6 +469,10 @@ L(first4bytes):
cmp $7, REM
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 7(%edx), %bnd0
+ bndcu 7(%eax), %bnd1
+#endif
movzbl 7(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 7(%edx), %edi
@@ -483,6 +521,10 @@ L(check_offset):
testl %edx, %edx
jg L(crosspage)
L(loop):
+#ifdef __CHKP__
+ bndcu (%edi,%edx), %bnd0
+ bndcu (%esi,%edx), %bnd1
+#endif
movdqu (%esi,%edx), %xmm2
movdqu (%edi,%edx), %xmm1
TOLOWER (%xmm2, %xmm1)
@@ -497,6 +539,10 @@ L(loop):
add $16, %edx
jle L(loop)
L(crosspage):
+#ifdef __CHKP__
+ bndcu (%edi,%edx), %bnd0
+ bndcu (%esi,%edx), %bnd1
+#endif
movzbl (%edi,%edx), %eax
movzbl (%esi,%edx), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
@@ -625,6 +671,10 @@ L(less16bytes):
add $8, %eax
L(less4bytes):
+#ifdef __CHKP__
+ bndcu (%edx), %bnd0
+ bndcu (%eax), %bnd1
+#endif
movzbl (%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl (%edx), %edi
@@ -647,6 +697,10 @@ L(less4bytes):
cmp $1, REM
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 1(%edx), %bnd0
+ bndcu 1(%eax), %bnd1
+#endif
movzbl 1(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 1(%edx), %edi
@@ -670,6 +724,10 @@ L(less4bytes):
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 2(%edx), %bnd0
+ bndcu 2(%eax), %bnd1
+#endif
movzbl 2(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 2(%edx), %edi
@@ -692,6 +750,10 @@ L(less4bytes):
cmp $3, REM
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 3(%edx), %bnd0
+ bndcu 3(%eax), %bnd1
+#endif
movzbl 3(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 3(%edx), %edi
@@ -715,6 +777,10 @@ L(more4bytes):
cmp $4, REM
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 4(%edx), %bnd0
+ bndcu 4(%eax), %bnd1
+#endif
movzbl 4(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 4(%edx), %edi
@@ -738,6 +804,10 @@ L(more4bytes):
cmp $5, REM
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 5(%edx), %bnd0
+ bndcu 5(%eax), %bnd1
+#endif
movzbl 5(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 5(%edx), %edi
@@ -760,6 +830,10 @@ L(more4bytes):
cmp $6, REM
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 6(%edx), %bnd0
+ bndcu 6(%eax), %bnd1
+#endif
movzbl 6(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 6(%edx), %edi
@@ -782,6 +856,10 @@ L(more4bytes):
cmp $7, REM
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 7(%edx), %bnd0
+ bndcu 7(%eax), %bnd1
+#endif
movzbl 7(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 7(%edx), %edi
diff --git a/sysdeps/i386/i686/multiarch/strcpy-sse2.S b/sysdeps/i386/i686/multiarch/strcpy-sse2.S
index d942ac2..4fdf7e0 100644
--- a/sysdeps/i386/i686/multiarch/strcpy-sse2.S
+++ b/sysdeps/i386/i686/multiarch/strcpy-sse2.S
@@ -85,6 +85,14 @@ ENTRY (STRCPY)
movl LEN(%esp), %ebx
test %ebx, %ebx
jz L(ExitZero)
+# ifdef __CHKP__
+ bndldx STR1(%esp,%edi,1), %bnd0
+ bndldx STR2(%esp,%esi,1), %bnd1
+ bndcl (%esi), %bnd1
+ bndcu (%esi), %bnd1
+ bndcl (%edi), %bnd0
+ bndcu -1(%edi, %ebx), %bnd0
+# endif
mov %esi, %ecx
# ifndef USE_AS_STPCPY
@@ -111,6 +119,9 @@ ENTRY (STRCPY)
test %edx, %edx
jnz L(CopyFrom1To16BytesTail)
+# ifdef __CHKP__
+ bndcu 16(%esi), %bnd1
+# endif
pcmpeqb 16(%esi), %xmm0
pmovmskb %xmm0, %edx
# ifdef USE_AS_STPCPY
@@ -124,6 +135,9 @@ ENTRY (STRCPY)
jnz L(CopyFrom1To32Bytes)
movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */
+# ifdef __CHKP__
+ bndcu 15(%edi), %bnd0
+# endif
movdqu %xmm1, (%edi)
sub %ecx, %edi
@@ -132,6 +146,10 @@ ENTRY (STRCPY)
.p2align 4
L(Unalign16Both):
mov $16, %ecx
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%edi, %ecx), %bnd0
+# endif
movdqa (%esi, %ecx), %xmm1
movaps 16(%esi, %ecx), %xmm2
movdqu %xmm1, (%edi, %ecx)
@@ -143,6 +161,10 @@ L(Unalign16Both):
test %edx, %edx
jnz L(CopyFrom1To16BytesUnalignedXmm2)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%edi, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm3
movdqu %xmm2, (%edi, %ecx)
pcmpeqb %xmm3, %xmm0
@@ -153,6 +175,10 @@ L(Unalign16Both):
test %edx, %edx
jnz L(CopyFrom1To16BytesUnalignedXmm3)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%edi, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm4
movdqu %xmm3, (%edi, %ecx)
pcmpeqb %xmm4, %xmm0
@@ -163,6 +189,10 @@ L(Unalign16Both):
test %edx, %edx
jnz L(CopyFrom1To16BytesUnalignedXmm4)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%edi, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm1
movdqu %xmm4, (%edi, %ecx)
pcmpeqb %xmm1, %xmm0
@@ -173,6 +203,10 @@ L(Unalign16Both):
test %edx, %edx
jnz L(CopyFrom1To16BytesUnalignedXmm1)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%edi, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm2
movdqu %xmm1, (%edi, %ecx)
pcmpeqb %xmm2, %xmm0
@@ -183,6 +217,10 @@ L(Unalign16Both):
test %edx, %edx
jnz L(CopyFrom1To16BytesUnalignedXmm2)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%edi, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm3
movdqu %xmm2, (%edi, %ecx)
pcmpeqb %xmm3, %xmm0
@@ -193,6 +231,10 @@ L(Unalign16Both):
test %edx, %edx
jnz L(CopyFrom1To16BytesUnalignedXmm3)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%edi, %ecx), %bnd0
+# endif
movdqu %xmm3, (%edi, %ecx)
mov %esi, %edx
lea 16(%esi, %ecx), %esi
@@ -202,6 +244,9 @@ L(Unalign16Both):
lea 128(%ebx, %edx), %ebx
L(Unaligned64Loop):
+# ifdef __CHKP__
+ bndcu 48(%esi), %bnd1
+# endif
movaps (%esi), %xmm2
movaps %xmm2, %xmm4
movaps 16(%esi), %xmm5
@@ -220,6 +265,10 @@ L(Unaligned64Loop):
L(Unaligned64Loop_start):
add $64, %edi
add $64, %esi
+# ifdef __CHKP__
+ bndcu (%esi), %bnd1
+ bndcu (%edi), %bnd0
+# endif
movdqu %xmm4, -64(%edi)
movaps (%esi), %xmm2
movdqa %xmm2, %xmm4
@@ -259,15 +308,27 @@ L(Unaligned64Leave):
jnz L(CopyFrom1To16BytesUnaligned_32)
bsf %ecx, %edx
+# ifdef __CHKP__
+ bndcu 47(%edi), %bnd0
+# endif
movdqu %xmm4, (%edi)
movdqu %xmm5, 16(%edi)
movdqu %xmm6, 32(%edi)
# ifdef USE_AS_STPCPY
+# ifdef __CHKP__
+ bndcu 48(%edi, %edx), %bnd0
+# endif
lea 48(%edi, %edx), %eax
# endif
+# ifdef __CHKP__
+ bndcu 63(%edi), %bnd0
+# endif
movdqu %xmm7, 48(%edi)
add $15, %ebx
sub %edx, %ebx
+# ifdef __CHKP__
+ bndcu 49(%edi, %edx), %bnd0
+# endif
lea 49(%edi, %edx), %edi
jmp L(StrncpyFillTailWithZero)
@@ -288,6 +349,10 @@ L(SourceStringAlignmentZero):
test %edx, %edx
jnz L(CopyFrom1To16BytesTail1)
+# ifdef __CHKP__
+ bndcu 15(%edi), %bnd0
+ bndcu 16(%esi), %bnd1
+# endif
pcmpeqb 16(%esi), %xmm0
movdqu %xmm1, (%edi)
pmovmskb %xmm0, %edx
@@ -313,7 +378,7 @@ L(CopyFrom1To16BytesTail):
bsf %edx, %edx
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
- .p2align 4
+ .p2align 9
L(CopyFrom1To32Bytes1):
add $16, %esi
add $16, %edi
@@ -337,6 +402,9 @@ L(CopyFrom1To16BytesUnaligned_0):
# ifdef USE_AS_STPCPY
lea (%edi, %edx), %eax
# endif
+# ifdef __CHKP__
+ bndcu 15(%edi), %bnd0
+# endif
movdqu %xmm4, (%edi)
add $63, %ebx
sub %edx, %ebx
@@ -350,6 +418,9 @@ L(CopyFrom1To16BytesUnaligned_16):
# ifdef USE_AS_STPCPY
lea 16(%edi, %edx), %eax
# endif
+# ifdef __CHKP__
+ bndcu 31(%edi), %bnd0
+# endif
movdqu %xmm5, 16(%edi)
add $47, %ebx
sub %edx, %ebx
@@ -364,6 +435,9 @@ L(CopyFrom1To16BytesUnaligned_32):
# ifdef USE_AS_STPCPY
lea 32(%edi, %edx), %eax
# endif
+# ifdef __CHKP__
+ bndcu 47(%edi), %bnd0
+# endif
movdqu %xmm6, 32(%edi)
add $31, %ebx
sub %edx, %ebx
@@ -495,6 +569,9 @@ L(Exit1):
.p2align 4
L(Exit2):
movw (%esi), %dx
+# ifdef __CHKP__
+ bndcu 1(%edi), %bnd0
+# endif
movw %dx, (%edi)
# ifdef USE_AS_STPCPY
lea 1(%edi), %eax
@@ -507,6 +584,9 @@ L(Exit2):
.p2align 4
L(Exit3):
movw (%esi), %cx
+# ifdef __CHKP__
+ bndcu 2(%edi), %bnd0
+# endif
movw %cx, (%edi)
movb %dh, 2(%edi)
# ifdef USE_AS_STPCPY
@@ -520,6 +600,9 @@ L(Exit3):
.p2align 4
L(Exit4):
movl (%esi), %edx
+# ifdef __CHKP__
+ bndcu 3(%edi), %bnd0
+# endif
movl %edx, (%edi)
# ifdef USE_AS_STPCPY
lea 3(%edi), %eax
@@ -532,6 +615,9 @@ L(Exit4):
.p2align 4
L(Exit5):
movl (%esi), %ecx
+# ifdef __CHKP__
+ bndcu 4(%edi), %bnd0
+# endif
movb %dh, 4(%edi)
movl %ecx, (%edi)
# ifdef USE_AS_STPCPY
@@ -546,6 +632,9 @@ L(Exit5):
L(Exit6):
movl (%esi), %ecx
movw 4(%esi), %dx
+# ifdef __CHKP__
+ bndcu 5(%edi), %bnd0
+# endif
movl %ecx, (%edi)
movw %dx, 4(%edi)
# ifdef USE_AS_STPCPY
@@ -560,6 +649,9 @@ L(Exit6):
L(Exit7):
movl (%esi), %ecx
movl 3(%esi), %edx
+# ifdef __CHKP__
+ bndcu 6(%edi), %bnd0
+# endif
movl %ecx, (%edi)
movl %edx, 3(%edi)
# ifdef USE_AS_STPCPY
@@ -573,6 +665,9 @@ L(Exit7):
.p2align 4
L(Exit8):
movlpd (%esi), %xmm0
+# ifdef __CHKP__
+ bndcu 7(%edi), %bnd0
+# endif
movlpd %xmm0, (%edi)
# ifdef USE_AS_STPCPY
lea 7(%edi), %eax
@@ -585,6 +680,9 @@ L(Exit8):
.p2align 4
L(Exit9):
movlpd (%esi), %xmm0
+# ifdef __CHKP__
+ bndcu 8(%edi), %bnd0
+# endif
movb %dh, 8(%edi)
movlpd %xmm0, (%edi)
# ifdef USE_AS_STPCPY
@@ -599,6 +697,9 @@ L(Exit9):
L(Exit10):
movlpd (%esi), %xmm0
movw 8(%esi), %dx
+# ifdef __CHKP__
+ bndcu 9(%edi), %bnd0
+# endif
movlpd %xmm0, (%edi)
movw %dx, 8(%edi)
# ifdef USE_AS_STPCPY
@@ -613,6 +714,9 @@ L(Exit10):
L(Exit11):
movlpd (%esi), %xmm0
movl 7(%esi), %edx
+# ifdef __CHKP__
+ bndcu 10(%edi), %bnd0
+# endif
movlpd %xmm0, (%edi)
movl %edx, 7(%edi)
# ifdef USE_AS_STPCPY
@@ -627,6 +731,9 @@ L(Exit11):
L(Exit12):
movlpd (%esi), %xmm0
movl 8(%esi), %edx
+# ifdef __CHKP__
+ bndcu 11(%edi), %bnd0
+# endif
movlpd %xmm0, (%edi)
movl %edx, 8(%edi)
# ifdef USE_AS_STPCPY
@@ -641,6 +748,9 @@ L(Exit12):
L(Exit13):
movlpd (%esi), %xmm0
movlpd 5(%esi), %xmm1
+# ifdef __CHKP__
+ bndcu 12(%edi), %bnd0
+# endif
movlpd %xmm0, (%edi)
movlpd %xmm1, 5(%edi)
# ifdef USE_AS_STPCPY
@@ -655,6 +765,9 @@ L(Exit13):
L(Exit14):
movlpd (%esi), %xmm0
movlpd 6(%esi), %xmm1
+# ifdef __CHKP__
+ bndcu 13(%edi), %bnd0
+# endif
movlpd %xmm0, (%edi)
movlpd %xmm1, 6(%edi)
# ifdef USE_AS_STPCPY
@@ -669,6 +782,9 @@ L(Exit14):
L(Exit15):
movlpd (%esi), %xmm0
movlpd 7(%esi), %xmm1
+# ifdef __CHKP__
+ bndcu 14(%edi), %bnd0
+# endif
movlpd %xmm0, (%edi)
movlpd %xmm1, 7(%edi)
# ifdef USE_AS_STPCPY
@@ -682,6 +798,9 @@ L(Exit15):
.p2align 4
L(Exit16):
movdqu (%esi), %xmm0
+# ifdef __CHKP__
+ bndcu 15(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
# ifdef USE_AS_STPCPY
lea 15(%edi), %eax
@@ -694,6 +813,9 @@ L(Exit16):
.p2align 4
L(Exit17):
movdqu (%esi), %xmm0
+# ifdef __CHKP__
+ bndcu 16(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movb %dh, 16(%edi)
# ifdef USE_AS_STPCPY
@@ -708,6 +830,9 @@ L(Exit17):
L(Exit18):
movdqu (%esi), %xmm0
movw 16(%esi), %cx
+# ifdef __CHKP__
+ bndcu 17(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movw %cx, 16(%edi)
# ifdef USE_AS_STPCPY
@@ -722,6 +847,9 @@ L(Exit18):
L(Exit19):
movdqu (%esi), %xmm0
movl 15(%esi), %ecx
+# ifdef __CHKP__
+ bndcu 18(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movl %ecx, 15(%edi)
# ifdef USE_AS_STPCPY
@@ -736,6 +864,9 @@ L(Exit19):
L(Exit20):
movdqu (%esi), %xmm0
movl 16(%esi), %ecx
+# ifdef __CHKP__
+ bndcu 19(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movl %ecx, 16(%edi)
# ifdef USE_AS_STPCPY
@@ -750,6 +881,9 @@ L(Exit20):
L(Exit21):
movdqu (%esi), %xmm0
movl 16(%esi), %ecx
+# ifdef __CHKP__
+ bndcu 20(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movl %ecx, 16(%edi)
movb %dh, 20(%edi)
@@ -765,6 +899,9 @@ L(Exit21):
L(Exit22):
movdqu (%esi), %xmm0
movlpd 14(%esi), %xmm3
+# ifdef __CHKP__
+ bndcu 21(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movlpd %xmm3, 14(%edi)
# ifdef USE_AS_STPCPY
@@ -779,6 +916,9 @@ L(Exit22):
L(Exit23):
movdqu (%esi), %xmm0
movlpd 15(%esi), %xmm3
+# ifdef __CHKP__
+ bndcu 22(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movlpd %xmm3, 15(%edi)
# ifdef USE_AS_STPCPY
@@ -793,6 +933,9 @@ L(Exit23):
L(Exit24):
movdqu (%esi), %xmm0
movlpd 16(%esi), %xmm2
+# ifdef __CHKP__
+ bndcu 23(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movlpd %xmm2, 16(%edi)
# ifdef USE_AS_STPCPY
@@ -807,6 +950,9 @@ L(Exit24):
L(Exit25):
movdqu (%esi), %xmm0
movlpd 16(%esi), %xmm2
+# ifdef __CHKP__
+ bndcu 24(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movlpd %xmm2, 16(%edi)
movb %dh, 24(%edi)
@@ -823,6 +969,9 @@ L(Exit26):
movdqu (%esi), %xmm0
movlpd 16(%esi), %xmm2
movw 24(%esi), %cx
+# ifdef __CHKP__
+ bndcu 25(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movlpd %xmm2, 16(%edi)
movw %cx, 24(%edi)
@@ -839,6 +988,9 @@ L(Exit27):
movdqu (%esi), %xmm0
movlpd 16(%esi), %xmm2
movl 23(%esi), %ecx
+# ifdef __CHKP__
+ bndcu 26(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movlpd %xmm2, 16(%edi)
movl %ecx, 23(%edi)
@@ -855,6 +1007,9 @@ L(Exit28):
movdqu (%esi), %xmm0
movlpd 16(%esi), %xmm2
movl 24(%esi), %ecx
+# ifdef __CHKP__
+ bndcu 27(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movlpd %xmm2, 16(%edi)
movl %ecx, 24(%edi)
@@ -870,6 +1025,9 @@ L(Exit28):
L(Exit29):
movdqu (%esi), %xmm0
movdqu 13(%esi), %xmm2
+# ifdef __CHKP__
+ bndcu 28(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movdqu %xmm2, 13(%edi)
# ifdef USE_AS_STPCPY
@@ -884,6 +1042,9 @@ L(Exit29):
L(Exit30):
movdqu (%esi), %xmm0
movdqu 14(%esi), %xmm2
+# ifdef __CHKP__
+ bndcu 29(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movdqu %xmm2, 14(%edi)
# ifdef USE_AS_STPCPY
@@ -899,6 +1060,9 @@ L(Exit30):
L(Exit31):
movdqu (%esi), %xmm0
movdqu 15(%esi), %xmm2
+# ifdef __CHKP__
+ bndcu 30(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movdqu %xmm2, 15(%edi)
# ifdef USE_AS_STPCPY
@@ -913,6 +1077,9 @@ L(Exit31):
L(Exit32):
movdqu (%esi), %xmm0
movdqu 16(%esi), %xmm2
+# ifdef __CHKP__
+ bndcu 31(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movdqu %xmm2, 16(%edi)
# ifdef USE_AS_STPCPY
@@ -1612,37 +1779,90 @@ ENTRY (STRCPY)
ENTRANCE
mov STR1(%esp), %edx
mov STR2(%esp), %ecx
+# ifdef __CHKP__
+ bndldx STR1(%esp,%edx,1), %bnd0
+ bndldx STR2(%esp,%ecx,1), %bnd1
+ bndcl (%ecx), %bnd1
+ bndcu (%ecx), %bnd1
+ bndcl (%edx), %bnd0
+ bndcu (%edx), %bnd0
+# endif
cmpb $0, (%ecx)
jz L(ExitTail1)
+# ifdef __CHKP__
+ bndcu 1(%ecx), %bnd1
+# endif
cmpb $0, 1(%ecx)
jz L(ExitTail2)
+# ifdef __CHKP__
+ bndcu 2(%ecx), %bnd1
+# endif
cmpb $0, 2(%ecx)
jz L(ExitTail3)
+# ifdef __CHKP__
+ bndcu 3(%ecx), %bnd1
+# endif
cmpb $0, 3(%ecx)
jz L(ExitTail4)
+# ifdef __CHKP__
+ bndcu 4(%ecx), %bnd1
+# endif
cmpb $0, 4(%ecx)
jz L(ExitTail5)
+# ifdef __CHKP__
+ bndcu 5(%ecx), %bnd1
+# endif
cmpb $0, 5(%ecx)
jz L(ExitTail6)
+# ifdef __CHKP__
+ bndcu 6(%ecx), %bnd1
+# endif
cmpb $0, 6(%ecx)
jz L(ExitTail7)
+# ifdef __CHKP__
+ bndcu 7(%ecx), %bnd1
+# endif
cmpb $0, 7(%ecx)
jz L(ExitTail8)
+# ifdef __CHKP__
+ bndcu 8(%ecx), %bnd1
+# endif
cmpb $0, 8(%ecx)
jz L(ExitTail9)
+# ifdef __CHKP__
+ bndcu 9(%ecx), %bnd1
+# endif
cmpb $0, 9(%ecx)
jz L(ExitTail10)
+# ifdef __CHKP__
+ bndcu 10(%ecx), %bnd1
+# endif
cmpb $0, 10(%ecx)
jz L(ExitTail11)
+# ifdef __CHKP__
+ bndcu 11(%ecx), %bnd1
+# endif
cmpb $0, 11(%ecx)
jz L(ExitTail12)
+# ifdef __CHKP__
+ bndcu 12(%ecx), %bnd1
+# endif
cmpb $0, 12(%ecx)
jz L(ExitTail13)
+# ifdef __CHKP__
+ bndcu 13(%ecx), %bnd1
+# endif
cmpb $0, 13(%ecx)
jz L(ExitTail14)
+# ifdef __CHKP__
+ bndcu 14(%ecx), %bnd1
+# endif
cmpb $0, 14(%ecx)
jz L(ExitTail15)
+# ifdef __CHKP__
+ bndcu 15(%ecx), %bnd1
+# endif
cmpb $0, 15(%ecx)
jz L(ExitTail16)
@@ -1654,6 +1874,9 @@ ENTRY (STRCPY)
and $-16, %ebx
pxor %xmm0, %xmm0
movdqu (%ecx), %xmm1
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+# endif
movdqu %xmm1, (%edx)
pcmpeqb (%ebx), %xmm0
pmovmskb %xmm0, %eax
@@ -1669,6 +1892,10 @@ ENTRY (STRCPY)
xor %ebx, %ebx
.p2align 4
+# ifdef __CHKP__
+ bndcu 16(%ecx), %bnd1
+ bndcu 15(%edx), %bnd0
+# endif
movdqa (%ecx), %xmm1
movaps 16(%ecx), %xmm2
movdqu %xmm1, (%edx)
@@ -1678,6 +1905,10 @@ ENTRY (STRCPY)
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%ecx, %ebx), %bnd1
+ bndcu 15(%edx, %ebx), %bnd0
+# endif
movaps 16(%ecx, %ebx), %xmm3
movdqu %xmm2, (%edx, %ebx)
pcmpeqb %xmm3, %xmm0
@@ -1686,6 +1917,10 @@ ENTRY (STRCPY)
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%ecx, %ebx), %bnd1
+ bndcu 15(%edx, %ebx), %bnd0
+# endif
movaps 16(%ecx, %ebx), %xmm4
movdqu %xmm3, (%edx, %ebx)
pcmpeqb %xmm4, %xmm0
@@ -1694,6 +1929,10 @@ ENTRY (STRCPY)
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%ecx, %ebx), %bnd1
+ bndcu 15(%edx, %ebx), %bnd0
+# endif
movaps 16(%ecx, %ebx), %xmm1
movdqu %xmm4, (%edx, %ebx)
pcmpeqb %xmm1, %xmm0
@@ -1702,6 +1941,10 @@ ENTRY (STRCPY)
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%ecx, %ebx), %bnd1
+ bndcu 15(%edx, %ebx), %bnd0
+# endif
movaps 16(%ecx, %ebx), %xmm2
movdqu %xmm1, (%edx, %ebx)
pcmpeqb %xmm2, %xmm0
@@ -1710,6 +1953,10 @@ ENTRY (STRCPY)
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%ecx, %ebx), %bnd1
+ bndcu 15(%edx, %ebx), %bnd0
+# endif
movaps 16(%ecx, %ebx), %xmm3
movdqu %xmm2, (%edx, %ebx)
pcmpeqb %xmm3, %xmm0
@@ -1718,6 +1965,9 @@ ENTRY (STRCPY)
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 15(%edx, %ebx), %bnd0
+# endif
movdqu %xmm3, (%edx, %ebx)
mov %ecx, %eax
lea 16(%ecx, %ebx), %ecx
@@ -1726,6 +1976,9 @@ ENTRY (STRCPY)
sub %eax, %edx
L(Aligned64Loop):
+# ifdef __CHKP__
+ bndcu (%ecx), %bnd1
+# endif
movaps (%ecx), %xmm2
movaps %xmm2, %xmm4
movaps 16(%ecx), %xmm5
@@ -1742,6 +1995,10 @@ L(Aligned64Loop):
test %eax, %eax
jnz L(Aligned64Leave)
L(Aligned64Loop_start):
+# ifdef __CHKP__
+ bndcu (%ecx), %bnd1
+ bndcu -1(%edx), %bnd0
+# endif
movdqu %xmm4, -64(%edx)
movaps (%ecx), %xmm2
movdqa %xmm2, %xmm4
@@ -1771,6 +2028,9 @@ L(Aligned64Leave):
pcmpeqb %xmm5, %xmm0
pmovmskb %xmm0, %eax
+# ifdef __CHKP__
+ bndcu -49(%edx), %bnd0
+# endif
movdqu %xmm4, -64(%edx)
test %eax, %eax
lea 16(%ebx), %ebx
@@ -1778,11 +2038,17 @@ L(Aligned64Leave):
pcmpeqb %xmm6, %xmm0
pmovmskb %xmm0, %eax
+# ifdef __CHKP__
+ bndcu -33(%edx), %bnd0
+# endif
movdqu %xmm5, -48(%edx)
test %eax, %eax
lea 16(%ebx), %ebx
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu -17(%edx), %bnd0
+# endif
movdqu %xmm6, -32(%edx)
pcmpeqb %xmm7, %xmm0
pmovmskb %xmm0, %eax
@@ -1813,6 +2079,10 @@ L(CopyFrom1To16Bytes):
test $0x40, %al
jnz L(Exit7)
/* Exit 8 */
+# ifdef __CHKP__
+ bndcu 7(%edx), %bnd0
+ bndcu 7(%ecx), %bnd1
+# endif
movl (%ecx), %eax
movl %eax, (%edx)
movl 4(%ecx), %eax
@@ -1841,6 +2111,10 @@ L(ExitHigh):
test $0x40, %ah
jnz L(Exit15)
/* Exit 16 */
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 15(%ecx), %bnd1
+# endif
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movlpd 8(%ecx), %xmm0
@@ -1854,6 +2128,10 @@ L(ExitHigh):
.p2align 4
L(Exit1):
+# ifdef __CHKP__
+ bndcu (%edx), %bnd0
+ bndcu (%ecx), %bnd1
+# endif
movb (%ecx), %al
movb %al, (%edx)
# ifdef USE_AS_STPCPY
@@ -1865,6 +2143,10 @@ L(Exit1):
.p2align 4
L(Exit2):
+# ifdef __CHKP__
+ bndcu 1(%edx), %bnd0
+ bndcu 1(%ecx), %bnd1
+# endif
movw (%ecx), %ax
movw %ax, (%edx)
# ifdef USE_AS_STPCPY
@@ -1876,6 +2158,10 @@ L(Exit2):
.p2align 4
L(Exit3):
+# ifdef __CHKP__
+ bndcu 2(%edx), %bnd0
+ bndcu 2(%ecx), %bnd1
+# endif
movw (%ecx), %ax
movw %ax, (%edx)
movb 2(%ecx), %al
@@ -1889,6 +2175,10 @@ L(Exit3):
.p2align 4
L(Exit4):
+# ifdef __CHKP__
+ bndcu 3(%edx), %bnd0
+ bndcu 3(%ecx), %bnd1
+# endif
movl (%ecx), %eax
movl %eax, (%edx)
# ifdef USE_AS_STPCPY
@@ -1900,6 +2190,10 @@ L(Exit4):
.p2align 4
L(Exit5):
+# ifdef __CHKP__
+ bndcu 4(%edx), %bnd0
+ bndcu 4(%ecx), %bnd1
+# endif
movl (%ecx), %eax
movl %eax, (%edx)
movb 4(%ecx), %al
@@ -1913,6 +2207,10 @@ L(Exit5):
.p2align 4
L(Exit6):
+# ifdef __CHKP__
+ bndcu 5(%edx), %bnd0
+ bndcu 5(%ecx), %bnd1
+# endif
movl (%ecx), %eax
movl %eax, (%edx)
movw 4(%ecx), %ax
@@ -1926,6 +2224,10 @@ L(Exit6):
.p2align 4
L(Exit7):
+# ifdef __CHKP__
+ bndcu 6(%edx), %bnd0
+ bndcu 6(%ecx), %bnd1
+# endif
movl (%ecx), %eax
movl %eax, (%edx)
movl 3(%ecx), %eax
@@ -1939,6 +2241,10 @@ L(Exit7):
.p2align 4
L(Exit9):
+# ifdef __CHKP__
+ bndcu 8(%edx), %bnd0
+ bndcu 8(%ecx), %bnd1
+# endif
movl (%ecx), %eax
movl %eax, (%edx)
movl 4(%ecx), %eax
@@ -1954,6 +2260,10 @@ L(Exit9):
.p2align 4
L(Exit10):
+# ifdef __CHKP__
+ bndcu 9(%edx), %bnd0
+ bndcu 9(%ecx), %bnd1
+# endif
movl (%ecx), %eax
movl %eax, (%edx)
movl 4(%ecx), %eax
@@ -1969,6 +2279,10 @@ L(Exit10):
.p2align 4
L(Exit11):
+# ifdef __CHKP__
+ bndcu 10(%edx), %bnd0
+ bndcu 10(%ecx), %bnd1
+# endif
movl (%ecx), %eax
movl %eax, (%edx)
movl 4(%ecx), %eax
@@ -1984,6 +2298,10 @@ L(Exit11):
.p2align 4
L(Exit12):
+# ifdef __CHKP__
+ bndcu 11(%edx), %bnd0
+ bndcu 11(%ecx), %bnd1
+# endif
movl (%ecx), %eax
movl %eax, (%edx)
movl 4(%ecx), %eax
@@ -1999,6 +2317,10 @@ L(Exit12):
.p2align 4
L(Exit13):
+# ifdef __CHKP__
+ bndcu 12(%edx), %bnd0
+ bndcu 12(%ecx), %bnd1
+# endif
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movlpd 5(%ecx), %xmm0
@@ -2012,6 +2334,10 @@ L(Exit13):
.p2align 4
L(Exit14):
+# ifdef __CHKP__
+ bndcu 13(%edx), %bnd0
+ bndcu 13(%ecx), %bnd1
+# endif
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movlpd 6(%ecx), %xmm0
@@ -2025,6 +2351,10 @@ L(Exit14):
.p2align 4
L(Exit15):
+# ifdef __CHKP__
+ bndcu 14(%edx), %bnd0
+ bndcu 14(%ecx), %bnd1
+# endif
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movlpd 7(%ecx), %xmm0
@@ -2040,6 +2370,9 @@ CFI_POP (%edi)
.p2align 4
L(ExitTail1):
+# ifdef __CHKP__
+ bndcu (%edx), %bnd0
+# endif
movb (%ecx), %al
movb %al, (%edx)
movl %edx, %eax
@@ -2048,6 +2381,9 @@ L(ExitTail1):
.p2align 4
L(ExitTail2):
movw (%ecx), %ax
+# ifdef __CHKP__
+ bndcu 1(%edx), %bnd0
+# endif
movw %ax, (%edx)
# ifdef USE_AS_STPCPY
lea 1(%edx), %eax
@@ -2059,6 +2395,9 @@ L(ExitTail2):
.p2align 4
L(ExitTail3):
movw (%ecx), %ax
+# ifdef __CHKP__
+ bndcu 2(%edx), %bnd0
+# endif
movw %ax, (%edx)
movb 2(%ecx), %al
movb %al, 2(%edx)
@@ -2072,6 +2411,9 @@ L(ExitTail3):
.p2align 4
L(ExitTail4):
movl (%ecx), %eax
+# ifdef __CHKP__
+ bndcu 3(%edx), %bnd0
+# endif
movl %eax, (%edx)
# ifdef USE_AS_STPCPY
lea 3(%edx), %eax
@@ -2083,6 +2425,9 @@ L(ExitTail4):
.p2align 4
L(ExitTail5):
movl (%ecx), %eax
+# ifdef __CHKP__
+ bndcu 4(%edx), %bnd0
+# endif
movl %eax, (%edx)
movb 4(%ecx), %al
movb %al, 4(%edx)
@@ -2096,6 +2441,9 @@ L(ExitTail5):
.p2align 4
L(ExitTail6):
movl (%ecx), %eax
+# ifdef __CHKP__
+ bndcu 5(%edx), %bnd0
+# endif
movl %eax, (%edx)
movw 4(%ecx), %ax
movw %ax, 4(%edx)
@@ -2109,6 +2457,9 @@ L(ExitTail6):
.p2align 4
L(ExitTail7):
movl (%ecx), %eax
+# ifdef __CHKP__
+ bndcu 6(%edx), %bnd0
+# endif
movl %eax, (%edx)
movl 3(%ecx), %eax
movl %eax, 3(%edx)
@@ -2122,6 +2473,9 @@ L(ExitTail7):
.p2align 4
L(ExitTail8):
movl (%ecx), %eax
+# ifdef __CHKP__
+ bndcu 7(%edx), %bnd0
+# endif
movl %eax, (%edx)
movl 4(%ecx), %eax
movl %eax, 4(%edx)
@@ -2135,6 +2489,9 @@ L(ExitTail8):
.p2align 4
L(ExitTail9):
movl (%ecx), %eax
+# ifdef __CHKP__
+ bndcu 8(%edx), %bnd0
+# endif
movl %eax, (%edx)
movl 4(%ecx), %eax
movl %eax, 4(%edx)
@@ -2150,6 +2507,9 @@ L(ExitTail9):
.p2align 4
L(ExitTail10):
movl (%ecx), %eax
+# ifdef __CHKP__
+ bndcu 9(%edx), %bnd0
+# endif
movl %eax, (%edx)
movl 4(%ecx), %eax
movl %eax, 4(%edx)
@@ -2165,6 +2525,9 @@ L(ExitTail10):
.p2align 4
L(ExitTail11):
movl (%ecx), %eax
+# ifdef __CHKP__
+ bndcu 10(%edx), %bnd0
+# endif
movl %eax, (%edx)
movl 4(%ecx), %eax
movl %eax, 4(%edx)
@@ -2180,6 +2543,9 @@ L(ExitTail11):
.p2align 4
L(ExitTail12):
movl (%ecx), %eax
+# ifdef __CHKP__
+ bndcu 11(%edx), %bnd0
+# endif
movl %eax, (%edx)
movl 4(%ecx), %eax
movl %eax, 4(%edx)
@@ -2195,6 +2561,9 @@ L(ExitTail12):
.p2align 4
L(ExitTail13):
movlpd (%ecx), %xmm0
+# ifdef __CHKP__
+ bndcu 12(%edx), %bnd0
+# endif
movlpd %xmm0, (%edx)
movlpd 5(%ecx), %xmm0
movlpd %xmm0, 5(%edx)
@@ -2208,6 +2577,9 @@ L(ExitTail13):
.p2align 4
L(ExitTail14):
movlpd (%ecx), %xmm0
+# ifdef __CHKP__
+ bndcu 13(%edx), %bnd0
+# endif
movlpd %xmm0, (%edx)
movlpd 6(%ecx), %xmm0
movlpd %xmm0, 6(%edx)
@@ -2221,6 +2593,9 @@ L(ExitTail14):
.p2align 4
L(ExitTail15):
movlpd (%ecx), %xmm0
+# ifdef __CHKP__
+ bndcu 14(%edx), %bnd0
+# endif
movlpd %xmm0, (%edx)
movlpd 7(%ecx), %xmm0
movlpd %xmm0, 7(%edx)
@@ -2234,6 +2609,9 @@ L(ExitTail15):
.p2align 4
L(ExitTail16):
movlpd (%ecx), %xmm0
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+# endif
movlpd %xmm0, (%edx)
movlpd 8(%ecx), %xmm0
movlpd %xmm0, 8(%edx)
diff --git a/sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S b/sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S
index 32db65c..ab537c1 100644
--- a/sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S
+++ b/sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S
@@ -41,6 +41,11 @@
ENTRY ( __strlen_sse2_bsf)
ENTRANCE
mov STR(%esp), %edi
+#ifdef __CHKP__
+ bndldx STR(%esp,%edi,1), %bnd0
+ bndcl (%edi),%bnd0
+ bndcu (%edi),%bnd0
+#endif
xor %eax, %eax
mov %edi, %ecx
and $0x3f, %ecx
@@ -73,21 +78,33 @@ L(align16_start):
pxor %xmm3, %xmm3
.p2align 4
L(align16_loop):
+#ifdef __CHKP__
+ bndcu 16(%eax), %bnd0
+#endif
pcmpeqb 16(%eax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(exit16)
+#ifdef __CHKP__
+ bndcu 32(%eax), %bnd0
+#endif
pcmpeqb 32(%eax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
jnz L(exit32)
+#ifdef __CHKP__
+ bndcu 48(%eax), %bnd0
+#endif
pcmpeqb 48(%eax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
jnz L(exit48)
+#ifdef __CHKP__
+ bndcu 64(%eax), %bnd0
+#endif
pcmpeqb 64(%eax), %xmm3
pmovmskb %xmm3, %edx
lea 64(%eax), %eax
@@ -98,24 +115,36 @@ L(exit):
L(exit_less16):
bsf %edx, %edx
add %edx, %eax
+#ifdef __CHKP__
+ bndcu -1(%edi, %eax), %bnd0
+#endif
RETURN
L(exit16):
sub %edi, %eax
bsf %edx, %edx
add %edx, %eax
add $16, %eax
+#ifdef __CHKP__
+ bndcu -1(%edi, %eax), %bnd0
+#endif
RETURN
L(exit32):
sub %edi, %eax
bsf %edx, %edx
add %edx, %eax
add $32, %eax
+#ifdef __CHKP__
+ bndcu -1(%edi, %eax), %bnd0
+#endif
RETURN
L(exit48):
sub %edi, %eax
bsf %edx, %edx
add %edx, %eax
add $48, %eax
+#ifdef __CHKP__
+ bndcu -1(%edi, %eax), %bnd0
+#endif
POP (%edi)
POP (%esi)
ret
diff --git a/sysdeps/i386/i686/multiarch/strlen-sse2.S b/sysdeps/i386/i686/multiarch/strlen-sse2.S
index a4f2806..3d0743e 100644
--- a/sysdeps/i386/i686/multiarch/strlen-sse2.S
+++ b/sysdeps/i386/i686/multiarch/strlen-sse2.S
@@ -41,7 +41,10 @@
# define PUSH(REG) pushl REG; CFI_PUSH (REG)
# define POP(REG) popl REG; CFI_POP (REG)
# undef RETURN
-# define RETURN POP (%edi); CFI_PUSH(%edi); ret
+# define RETURN \
+ mov STR+4(%esp),%edx; \
+ bndcu -1(%edx,%eax), %bnd0; \
+ POP (%edi); CFI_PUSH(%edi); ret
# endif
# ifndef STRLEN
@@ -51,12 +54,19 @@
atom_text_section
ENTRY (STRLEN)
mov STR(%esp), %edx
+# ifdef __CHKP__
+ bndldx STR(%esp,%edx,1), %bnd0
+# endif
# ifdef USE_AS_STRNLEN
PUSH (%edi)
movl LEN(%esp), %edi
sub $4, %edi
jbe L(len_less4_prolog)
# endif
+# ifdef __CHKP__
+ bndcl (%edx),%bnd0
+ bndcu (%edx),%bnd0
+# endif
# endif
xor %eax, %eax
cmpb $0, (%edx)
@@ -122,6 +132,9 @@ ENTRY (STRLEN)
jbe L(len_less64)
# endif
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm0
pmovmskb %xmm0, %edx
pxor %xmm1, %xmm1
@@ -129,6 +142,9 @@ ENTRY (STRLEN)
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm1
pmovmskb %xmm1, %edx
pxor %xmm2, %xmm2
@@ -136,6 +152,9 @@ ENTRY (STRLEN)
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm2
pmovmskb %xmm2, %edx
pxor %xmm3, %xmm3
@@ -143,6 +162,9 @@ ENTRY (STRLEN)
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
@@ -154,24 +176,36 @@ ENTRY (STRLEN)
jbe L(len_less64)
# endif
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
@@ -183,24 +217,36 @@ ENTRY (STRLEN)
jbe L(len_less64)
# endif
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
@@ -212,24 +258,36 @@ ENTRY (STRLEN)
jbe L(len_less64)
# endif
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
@@ -250,6 +308,9 @@ L(aligned_64_loop):
sub $64, %edi
jbe L(len_less64)
# endif
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
movaps (%eax), %xmm0
movaps 16(%eax), %xmm1
movaps 32(%eax), %xmm2
@@ -535,6 +596,10 @@ L(len_less4_prolog):
add $4, %edi
jz L(exit_tail0)
+# ifdef __CHKP__
+ bndcl (%edx),%bnd0
+ bndcu (%edx),%bnd0
+# endif
cmpb $0, (%edx)
jz L(exit_tail0)
cmp $1, %edi
diff --git a/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S b/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S
index e026c40..1c907a4 100644
--- a/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S
+++ b/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S
@@ -42,6 +42,12 @@ ENTRY (__strrchr_sse2_bsf)
mov STR1(%esp), %ecx
movd STR2(%esp), %xmm1
+# ifdef __CHKP__
+ bndldx STR1(%esp,%ecx,1), %bnd0
+ bndcl (%ecx), %bnd0
+ bndcu (%ecx), %bnd0
+# endif
+
PUSH (%edi)
pxor %xmm2, %xmm2
mov %ecx, %edi
@@ -90,6 +96,9 @@ L(unaligned_return_value1):
jz L(return_null)
bsr %eax, %eax
add %edi, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
POP (%edi)
ret
CFI_PUSH (%edi)
@@ -156,6 +165,9 @@ L(unaligned_return_value):
jz L(return_null)
bsr %eax, %eax
add %edi, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
POP (%edi)
ret
CFI_PUSH (%edi)
@@ -175,6 +187,9 @@ L(unaligned_match):
/* Loop start on aligned string. */
.p2align 4
L(loop):
+# ifdef __CHKP__
+ bndcu (%edi), %bnd0
+# endif
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
add $16, %edi
@@ -184,6 +199,9 @@ L(loop):
or %eax, %ecx
jnz L(matches)
+# ifdef __CHKP__
+ bndcu (%edi), %bnd0
+# endif
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
add $16, %edi
@@ -193,6 +211,9 @@ L(loop):
or %eax, %ecx
jnz L(matches)
+# ifdef __CHKP__
+ bndcu (%edi), %bnd0
+# endif
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
add $16, %edi
@@ -202,6 +223,9 @@ L(loop):
or %eax, %ecx
jnz L(matches)
+# ifdef __CHKP__
+ bndcu (%edi), %bnd0
+# endif
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
add $16, %edi
@@ -224,6 +248,9 @@ L(return_value):
POP (%esi)
sub $16, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
POP (%edi)
ret
@@ -255,6 +282,9 @@ L(return_value_1):
bsr %eax, %eax
add %edi, %eax
sub $16, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
POP (%edi)
ret
diff --git a/sysdeps/i386/i686/multiarch/wcschr-sse2.S b/sysdeps/i386/i686/multiarch/wcschr-sse2.S
index 63101d9..e06274a 100644
--- a/sysdeps/i386/i686/multiarch/wcschr-sse2.S
+++ b/sysdeps/i386/i686/multiarch/wcschr-sse2.S
@@ -40,7 +40,11 @@ ENTRY (__wcschr_sse2)
mov STR1(%esp), %ecx
movd STR2(%esp), %xmm1
-
+# ifdef __CHKP__
+ bndldx STR1(%esp,%ecx,1), %bnd0
+ bndcl (%ecx),%bnd0
+ bndcu (%ecx),%bnd0
+# endif
mov %ecx, %eax
punpckldq %xmm1, %xmm1
pxor %xmm2, %xmm2
@@ -90,6 +94,9 @@ L(cross_cache):
test $15, %dl
jnz L(return_null)
lea 4(%ecx), %eax
+# ifdef __CHKP__
+ bndcu (%eax),%bnd0
+# endif
ret
CFI_PUSH (%edi)
@@ -108,6 +115,9 @@ L(unaligned_no_match):
.p2align 4
L(loop):
add $16, %ecx
+# ifdef __CHKP__
+ bndcu (%ecx),%bnd0
+# endif
movdqa (%ecx), %xmm0
pcmpeqd %xmm0, %xmm2
pcmpeqd %xmm1, %xmm0
@@ -117,6 +127,9 @@ L(loop):
jnz L(matches)
add $16, %ecx
+# ifdef __CHKP__
+ bndcu (%ecx),%bnd0
+# endif
movdqa (%ecx), %xmm0
pcmpeqd %xmm0, %xmm2
pcmpeqd %xmm1, %xmm0
@@ -126,6 +139,9 @@ L(loop):
jnz L(matches)
add $16, %ecx
+# ifdef __CHKP__
+ bndcu (%ecx),%bnd0
+# endif
movdqa (%ecx), %xmm0
pcmpeqd %xmm0, %xmm2
pcmpeqd %xmm1, %xmm0
@@ -135,6 +151,9 @@ L(loop):
jnz L(matches)
add $16, %ecx
+# ifdef __CHKP__
+ bndcu (%ecx),%bnd0
+# endif
movdqa (%ecx), %xmm0
pcmpeqd %xmm0, %xmm2
pcmpeqd %xmm1, %xmm0
@@ -160,11 +179,17 @@ L(match_case2):
test $15, %dl
jnz L(return_null)
lea 4(%ecx), %eax
+# ifdef __CHKP__
+ bndcu (%eax),%bnd0
+# endif
ret
.p2align 4
L(match_case2_4):
mov %ecx, %eax
+# ifdef __CHKP__
+ bndcu (%eax),%bnd0
+# endif
ret
.p2align 4
@@ -176,11 +201,17 @@ L(match_higth_case2):
test $15, %dh
jnz L(return_null)
lea 12(%ecx), %eax
+# ifdef __CHKP__
+ bndcu (%eax),%bnd0
+# endif
ret
.p2align 4
L(match_case2_12):
lea 8(%ecx), %eax
+# ifdef __CHKP__
+ bndcu (%eax),%bnd0
+# endif
ret
.p2align 4
@@ -191,6 +222,9 @@ L(match_case1):
test $0x01, %al
jnz L(exit0)
lea 4(%ecx), %eax
+# ifdef __CHKP__
+ bndcu (%eax),%bnd0
+# endif
ret
.p2align 4
@@ -198,16 +232,25 @@ L(match_higth_case1):
test $0x01, %ah
jnz L(exit3)
lea 12(%ecx), %eax
+# ifdef __CHKP__
+ bndcu (%eax),%bnd0
+# endif
ret
.p2align 4
L(exit0):
mov %ecx, %eax
+# ifdef __CHKP__
+ bndcu (%eax),%bnd0
+# endif
ret
.p2align 4
L(exit3):
lea 8(%ecx), %eax
+# ifdef __CHKP__
+ bndcu (%eax),%bnd0
+# endif
ret
.p2align 4
diff --git a/sysdeps/i386/i686/multiarch/wcscmp-sse2.S b/sysdeps/i386/i686/multiarch/wcscmp-sse2.S
index 9b248c1..108e7fb 100644
--- a/sysdeps/i386/i686/multiarch/wcscmp-sse2.S
+++ b/sysdeps/i386/i686/multiarch/wcscmp-sse2.S
@@ -47,6 +47,14 @@ ENTRY (__wcscmp_sse2)
*/
mov STR1(%esp), %edx
mov STR2(%esp), %eax
+#ifdef __CHKP__
+ bndldx STR1(%esp,%edx,1), %bnd0
+ bndldx STR2(%esp,%eax,1), %bnd1
+ bndcl (%edx), %bnd0
+ bndcl (%eax), %bnd1
+ bndcu (%edx), %bnd0
+ bndcu (%eax), %bnd1
+#endif
mov (%eax), %ecx
cmp %ecx, (%edx)
diff --git a/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S b/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S
index 47fb516..708ef41 100644
--- a/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S
+++ b/sysdeps/i386/i686/multiarch/wcscpy-ssse3.S
@@ -41,13 +41,29 @@
ENTRY (__wcscpy_ssse3)
mov STR1(%esp), %edx
mov STR2(%esp), %ecx
+# ifdef __CHKP__
+ bndldx STR1(%esp,%edx,1), %bnd0
+ bndldx STR2(%esp,%ecx,1), %bnd1
+ bndcl (%edx), %bnd0
+ bndcl (%ecx), %bnd1
+ bndcu (%ecx), %bnd1
+# endif
cmp $0, (%ecx)
jz L(ExitTail4)
+# ifdef __CHKP__
+ bndcu 4(%ecx), %bnd1
+# endif
cmp $0, 4(%ecx)
jz L(ExitTail8)
+# ifdef __CHKP__
+ bndcu 8(%ecx), %bnd1
+# endif
cmp $0, 8(%ecx)
jz L(ExitTail12)
+# ifdef __CHKP__
+ bndcu 12(%ecx), %bnd1
+# endif
cmp $0, 12(%ecx)
jz L(ExitTail16)
@@ -61,6 +77,9 @@ ENTRY (__wcscpy_ssse3)
pxor %xmm0, %xmm0
pcmpeqd (%esi), %xmm0
movdqu (%ecx), %xmm1
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+# endif
movdqu %xmm1, (%edx)
pmovmskb %xmm0, %eax
@@ -87,6 +106,10 @@ ENTRY (__wcscpy_ssse3)
jmp L(Shl12)
L(Align16Both):
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 16(%ecx), %bnd1
+# endif
movaps (%ecx), %xmm1
movaps 16(%ecx), %xmm2
movaps %xmm1, (%edx)
@@ -97,6 +120,10 @@ L(Align16Both):
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 15(%edx, %esi), %bnd0
+ bndcu 16(%ecx, %esi), %bnd1
+# endif
movaps 16(%ecx, %esi), %xmm3
movaps %xmm2, (%edx, %esi)
pcmpeqd %xmm3, %xmm0
@@ -106,6 +133,10 @@ L(Align16Both):
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 15(%edx, %esi), %bnd0
+ bndcu 16(%ecx, %esi), %bnd1
+# endif
movaps 16(%ecx, %esi), %xmm4
movaps %xmm3, (%edx, %esi)
pcmpeqd %xmm4, %xmm0
@@ -115,6 +146,10 @@ L(Align16Both):
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 15(%edx, %esi), %bnd0
+ bndcu 16(%ecx, %esi), %bnd1
+# endif
movaps 16(%ecx, %esi), %xmm1
movaps %xmm4, (%edx, %esi)
pcmpeqd %xmm1, %xmm0
@@ -124,6 +159,10 @@ L(Align16Both):
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 15(%edx, %esi), %bnd0
+ bndcu 16(%ecx, %esi), %bnd1
+# endif
movaps 16(%ecx, %esi), %xmm2
movaps %xmm1, (%edx, %esi)
pcmpeqd %xmm2, %xmm0
@@ -133,6 +172,10 @@ L(Align16Both):
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 15(%edx, %esi), %bnd0
+ bndcu 16(%ecx, %esi), %bnd1
+# endif
movaps 16(%ecx, %esi), %xmm3
movaps %xmm2, (%edx, %esi)
pcmpeqd %xmm3, %xmm0
@@ -142,6 +185,9 @@ L(Align16Both):
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 15(%edx, %esi), %bnd0
+# endif
movaps %xmm3, (%edx, %esi)
mov %ecx, %eax
lea 16(%ecx, %esi), %ecx
@@ -152,6 +198,9 @@ L(Align16Both):
mov $-0x40, %esi
L(Aligned64Loop):
+# ifdef __CHKP__
+ bndcu (%ecx), %bnd1
+# endif
movaps (%ecx), %xmm2
movaps 32(%ecx), %xmm3
movaps %xmm2, %xmm4
@@ -168,6 +217,9 @@ L(Aligned64Loop):
test %eax, %eax
jnz L(Aligned64Leave)
+# ifdef __CHKP__
+ bndcu -1(%edx), %bnd0
+# endif
movaps %xmm4, -64(%edx)
movaps %xmm5, -48(%edx)
movaps %xmm6, -32(%edx)
@@ -182,6 +234,9 @@ L(Aligned64Leave):
pcmpeqd %xmm5, %xmm0
pmovmskb %xmm0, %eax
+# ifdef __CHKP__
+ bndcu -49(%edx), %bnd0
+# endif
movaps %xmm4, -64(%edx)
test %eax, %eax
lea 16(%esi), %esi
@@ -189,11 +244,17 @@ L(Aligned64Leave):
pcmpeqd %xmm6, %xmm0
pmovmskb %xmm0, %eax
+# ifdef __CHKP__
+ bndcu -33(%edx), %bnd0
+# endif
movaps %xmm5, -48(%edx)
test %eax, %eax
lea 16(%esi), %esi
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu -17(%edx), %bnd0
+# endif
movaps %xmm6, -32(%edx)
pcmpeqd %xmm7, %xmm0
pmovmskb %xmm0, %eax
@@ -202,11 +263,17 @@ L(Aligned64Leave):
jnz L(CopyFrom1To16Bytes)
mov $-0x40, %esi
+# ifdef __CHKP__
+ bndcu -1(%edx), %bnd0
+# endif
movaps %xmm7, -16(%edx)
jmp L(Aligned64Loop)
.p2align 4
L(Shl4):
+# ifdef __CHKP__
+ bndcu 12(%ecx), %bnd1
+# endif
movaps -4(%ecx), %xmm1
movaps 12(%ecx), %xmm2
L(Shl4Start):
@@ -218,6 +285,10 @@ L(Shl4Start):
jnz L(Shl4LoopExit)
palignr $4, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 28(%ecx), %bnd1
+# endif
movaps %xmm2, (%edx)
movaps 28(%ecx), %xmm2
@@ -231,6 +302,10 @@ L(Shl4Start):
jnz L(Shl4LoopExit)
palignr $4, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 28(%ecx), %bnd1
+# endif
movaps %xmm2, (%edx)
movaps 28(%ecx), %xmm2
@@ -244,6 +319,10 @@ L(Shl4Start):
jnz L(Shl4LoopExit)
palignr $4, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 28(%ecx), %bnd1
+# endif
movaps %xmm2, (%edx)
movaps 28(%ecx), %xmm2
@@ -256,6 +335,10 @@ L(Shl4Start):
jnz L(Shl4LoopExit)
palignr $4, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 28(%ecx), %bnd1
+# endif
movaps %xmm2, (%edx)
lea 28(%ecx), %ecx
lea 16(%edx), %edx
@@ -269,6 +352,9 @@ L(Shl4Start):
movaps -4(%ecx), %xmm1
L(Shl4LoopStart):
+# ifdef __CHKP__
+ bndcu 12(%ecx), %bnd1
+# endif
movaps 12(%ecx), %xmm2
movaps 28(%ecx), %xmm3
movaps %xmm3, %xmm6
@@ -290,6 +376,9 @@ L(Shl4LoopStart):
lea 64(%ecx), %ecx
palignr $4, %xmm1, %xmm2
movaps %xmm7, %xmm1
+# ifdef __CHKP__
+ bndcu 63(%edx), %bnd0
+# endif
movaps %xmm5, 48(%edx)
movaps %xmm4, 32(%edx)
movaps %xmm3, 16(%edx)
@@ -300,6 +389,10 @@ L(Shl4LoopStart):
L(Shl4LoopExit):
movlpd (%ecx), %xmm0
movl 8(%ecx), %esi
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+# endif
+ movaps %xmm2, (%edx)
movlpd %xmm0, (%edx)
movl %esi, 8(%edx)
POP (%esi)
@@ -310,6 +403,9 @@ L(Shl4LoopExit):
test $0x01, %al
jnz L(Exit4)
movlpd (%ecx), %xmm0
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+# endif
movlpd %xmm0, (%edx)
movl %edi, %eax
RETURN
@@ -318,6 +414,9 @@ L(Shl4LoopExit):
.p2align 4
L(Shl8):
+# ifdef __CHKP__
+ bndcu 8(%ecx), %bnd1
+# endif
movaps -8(%ecx), %xmm1
movaps 8(%ecx), %xmm2
L(Shl8Start):
@@ -329,6 +428,10 @@ L(Shl8Start):
jnz L(Shl8LoopExit)
palignr $8, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 24(%ecx), %bnd1
+# endif
movaps %xmm2, (%edx)
movaps 24(%ecx), %xmm2
@@ -342,6 +445,10 @@ L(Shl8Start):
jnz L(Shl8LoopExit)
palignr $8, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 24(%ecx), %bnd1
+# endif
movaps %xmm2, (%edx)
movaps 24(%ecx), %xmm2
@@ -355,6 +462,10 @@ L(Shl8Start):
jnz L(Shl8LoopExit)
palignr $8, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 24(%ecx), %bnd1
+# endif
movaps %xmm2, (%edx)
movaps 24(%ecx), %xmm2
@@ -367,6 +478,9 @@ L(Shl8Start):
jnz L(Shl8LoopExit)
palignr $8, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+# endif
movaps %xmm2, (%edx)
lea 24(%ecx), %ecx
lea 16(%edx), %edx
@@ -380,6 +494,9 @@ L(Shl8Start):
movaps -8(%ecx), %xmm1
L(Shl8LoopStart):
+# ifdef __CHKP__
+ bndcu 8(%ecx), %bnd1
+# endif
movaps 8(%ecx), %xmm2
movaps 24(%ecx), %xmm3
movaps %xmm3, %xmm6
@@ -401,6 +518,9 @@ L(Shl8LoopStart):
lea 64(%ecx), %ecx
palignr $8, %xmm1, %xmm2
movaps %xmm7, %xmm1
+# ifdef __CHKP__
+ bndcu 63(%edx), %bnd0
+# endif
movaps %xmm5, 48(%edx)
movaps %xmm4, 32(%edx)
movaps %xmm3, 16(%edx)
@@ -410,6 +530,9 @@ L(Shl8LoopStart):
L(Shl8LoopExit):
movlpd (%ecx), %xmm0
+# ifdef __CHKP__
+ bndcu 7(%edx), %bnd0
+# endif
movlpd %xmm0, (%edx)
POP (%esi)
add $8, %edx
@@ -419,6 +542,9 @@ L(Shl8LoopExit):
test $0x01, %al
jnz L(Exit4)
movlpd (%ecx), %xmm0
+# ifdef __CHKP__
+ bndcu 7(%edx), %bnd0
+# endif
movlpd %xmm0, (%edx)
movl %edi, %eax
RETURN
@@ -427,6 +553,9 @@ L(Shl8LoopExit):
.p2align 4
L(Shl12):
+# ifdef __CHKP__
+ bndcu 4(%ecx), %bnd1
+# endif
movaps -12(%ecx), %xmm1
movaps 4(%ecx), %xmm2
L(Shl12Start):
@@ -438,6 +567,10 @@ L(Shl12Start):
jnz L(Shl12LoopExit)
palignr $12, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 20(%ecx), %bnd1
+# endif
movaps %xmm2, (%edx)
movaps 20(%ecx), %xmm2
@@ -451,6 +584,10 @@ L(Shl12Start):
jnz L(Shl12LoopExit)
palignr $12, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 20(%ecx), %bnd1
+# endif
movaps %xmm2, (%edx)
movaps 20(%ecx), %xmm2
@@ -464,6 +601,10 @@ L(Shl12Start):
jnz L(Shl12LoopExit)
palignr $12, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 20(%ecx), %bnd1
+# endif
movaps %xmm2, (%edx)
movaps 20(%ecx), %xmm2
@@ -476,6 +617,9 @@ L(Shl12Start):
jnz L(Shl12LoopExit)
palignr $12, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+# endif
movaps %xmm2, (%edx)
lea 20(%ecx), %ecx
lea 16(%edx), %edx
@@ -489,6 +633,9 @@ L(Shl12Start):
movaps -12(%ecx), %xmm1
L(Shl12LoopStart):
+# ifdef __CHKP__
+ bndcu 4(%ecx), %bnd1
+# endif
movaps 4(%ecx), %xmm2
movaps 20(%ecx), %xmm3
movaps %xmm3, %xmm6
@@ -510,6 +657,9 @@ L(Shl12LoopStart):
lea 64(%ecx), %ecx
palignr $12, %xmm1, %xmm2
movaps %xmm7, %xmm1
+# ifdef __CHKP__
+ bndcu 63(%edx), %bnd0
+# endif
movaps %xmm5, 48(%edx)
movaps %xmm4, 32(%edx)
movaps %xmm3, 16(%edx)
@@ -519,6 +669,9 @@ L(Shl12LoopStart):
L(Shl12LoopExit):
movl (%ecx), %esi
+# ifdef __CHKP__
+ bndcu 3(%edx), %bnd0
+# endif
movl %esi, (%edx)
mov $4, %esi
@@ -533,6 +686,10 @@ L(CopyFrom1To16Bytes):
test $0x01, %al
jnz L(Exit4)
L(Exit8):
+# ifdef __CHKP__
+ bndcu 7(%edx), %bnd0
+ bndcu 7(%ecx), %bnd1
+# endif
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movl %edi, %eax
@@ -543,6 +700,10 @@ L(ExitHigh):
test $0x01, %ah
jnz L(Exit12)
L(Exit16):
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 15(%ecx), %bnd1
+# endif
movdqu (%ecx), %xmm0
movdqu %xmm0, (%edx)
movl %edi, %eax
@@ -550,6 +711,10 @@ L(Exit16):
.p2align 4
L(Exit4):
+# ifdef __CHKP__
+ bndcu 3(%edx), %bnd0
+ bndcu 3(%ecx), %bnd1
+# endif
movl (%ecx), %eax
movl %eax, (%edx)
movl %edi, %eax
@@ -557,6 +722,10 @@ L(Exit4):
.p2align 4
L(Exit12):
+# ifdef __CHKP__
+ bndcu 11(%edx), %bnd0
+ bndcu 11(%ecx), %bnd1
+# endif
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movl 8(%ecx), %eax
@@ -569,6 +738,9 @@ CFI_POP (%edi)
.p2align 4
L(ExitTail4):
movl (%ecx), %eax
+# ifdef __CHKP__
+ bndcu 3(%edx), %bnd0
+# endif
movl %eax, (%edx)
movl %edx, %eax
ret
@@ -576,6 +748,9 @@ L(ExitTail4):
.p2align 4
L(ExitTail8):
movlpd (%ecx), %xmm0
+# ifdef __CHKP__
+ bndcu 7(%edx), %bnd0
+# endif
movlpd %xmm0, (%edx)
movl %edx, %eax
ret
@@ -583,6 +758,9 @@ L(ExitTail8):
.p2align 4
L(ExitTail12):
movlpd (%ecx), %xmm0
+# ifdef __CHKP__
+ bndcu 11(%edx), %bnd0
+# endif
movlpd %xmm0, (%edx)
movl 8(%ecx), %eax
movl %eax, 8(%edx)
@@ -592,6 +770,9 @@ L(ExitTail12):
.p2align 4
L(ExitTail16):
movdqu (%ecx), %xmm0
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+# endif
movdqu %xmm0, (%edx)
movl %edx, %eax
ret
diff --git a/sysdeps/i386/i686/multiarch/wcslen-sse2.S b/sysdeps/i386/i686/multiarch/wcslen-sse2.S
index a92b92f..9c53149 100644
--- a/sysdeps/i386/i686/multiarch/wcslen-sse2.S
+++ b/sysdeps/i386/i686/multiarch/wcslen-sse2.S
@@ -24,21 +24,47 @@
.text
ENTRY (__wcslen_sse2)
mov STR(%esp), %edx
+# ifdef __CHKP__
+ bndldx STR(%esp,%edx,1), %bnd0
+ bndcl (%edx),%bnd0
+ bndcu (%edx),%bnd0
+# endif
cmp $0, (%edx)
jz L(exit_tail0)
+# ifdef __CHKP__
+ bndcu 4(%edx), %bnd0
+# endif
cmp $0, 4(%edx)
jz L(exit_tail1)
+# ifdef __CHKP__
+ bndcu 8(%edx), %bnd0
+# endif
cmp $0, 8(%edx)
jz L(exit_tail2)
+# ifdef __CHKP__
+ bndcu 12(%edx), %bnd0
+# endif
cmp $0, 12(%edx)
jz L(exit_tail3)
+# ifdef __CHKP__
+ bndcu 16(%edx), %bnd0
+# endif
cmp $0, 16(%edx)
jz L(exit_tail4)
+# ifdef __CHKP__
+ bndcu 20(%edx), %bnd0
+# endif
cmp $0, 20(%edx)
jz L(exit_tail5)
+# ifdef __CHKP__
+ bndcu 24(%edx), %bnd0
+# endif
cmp $0, 24(%edx)
jz L(exit_tail6)
+# ifdef __CHKP__
+ bndcu 28(%edx), %bnd0
+# endif
cmp $0, 28(%edx)
jz L(exit_tail7)
@@ -48,6 +74,9 @@ ENTRY (__wcslen_sse2)
lea 16(%edx), %ecx
and $-16, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
pcmpeqd (%eax), %xmm0
pmovmskb %xmm0, %edx
pxor %xmm1, %xmm1
@@ -55,6 +84,9 @@ ENTRY (__wcslen_sse2)
lea 16(%eax), %eax
jnz L(exit)
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
pcmpeqd (%eax), %xmm1
pmovmskb %xmm1, %edx
pxor %xmm2, %xmm2
@@ -62,6 +94,9 @@ ENTRY (__wcslen_sse2)
lea 16(%eax), %eax
jnz L(exit)
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
pcmpeqd (%eax), %xmm2
pmovmskb %xmm2, %edx
pxor %xmm3, %xmm3
@@ -69,6 +104,9 @@ ENTRY (__wcslen_sse2)
lea 16(%eax), %eax
jnz L(exit)
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
pcmpeqd (%eax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
@@ -79,6 +117,9 @@ ENTRY (__wcslen_sse2)
.p2align 4
L(aligned_64_loop):
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
movaps (%eax), %xmm0
movaps 16(%eax), %xmm1
movaps 32(%eax), %xmm2
@@ -129,6 +170,10 @@ L(exit):
mov %dl, %cl
and $15, %cl
jz L(exit_1)
+# ifdef __CHKP__
+ mov STR(%esp), %edx
+ bndcu -1(%edx, %eax, 4), %bnd0
+# endif
ret
.p2align 4
@@ -137,16 +182,28 @@ L(exit_high):
and $15, %ch
jz L(exit_3)
add $2, %eax
+# ifdef __CHKP__
+ mov STR(%esp), %edx
+ bndcu -1(%edx, %eax, 4), %bnd0
+# endif
ret
.p2align 4
L(exit_1):
add $1, %eax
+# ifdef __CHKP__
+ mov STR(%esp), %edx
+ bndcu -1(%edx, %eax, 4), %bnd0
+# endif
ret
.p2align 4
L(exit_3):
add $3, %eax
+# ifdef __CHKP__
+ mov STR(%esp), %edx
+ bndcu -1(%edx, %eax, 4), %bnd0
+# endif
ret
.p2align 4
diff --git a/sysdeps/i386/i686/multiarch/wcsrchr-sse2.S b/sysdeps/i386/i686/multiarch/wcsrchr-sse2.S
index d31e48e..f7c70e6 100644
--- a/sysdeps/i386/i686/multiarch/wcsrchr-sse2.S
+++ b/sysdeps/i386/i686/multiarch/wcsrchr-sse2.S
@@ -36,12 +36,23 @@
# define STR1 PARMS
# define STR2 STR1+4
+# ifdef __CHKP__
+# undef RETURN
+# define RETURN bndcu (%eax),%bnd0; \
+ POP (%edi); ret; CFI_PUSH (%edi);
+# endif
+
atom_text_section
ENTRY (__wcsrchr_sse2)
ENTRANCE
mov STR1(%esp), %ecx
movd STR2(%esp), %xmm1
+# ifdef __CHKP__
+ bndldx STR1(%esp,%ecx,1), %bnd0
+ bndcl (%ecx),%bnd0
+ bndcu (%ecx),%bnd0
+# endif
mov %ecx, %edi
punpckldq %xmm1, %xmm1
@@ -137,6 +148,9 @@ L(unaligned_match):
/* Loop start on aligned string. */
.p2align 4
L(loop):
+# ifdef __CHKP__
+ bndcu (%edi),%bnd0
+# endif
movdqa (%edi), %xmm0
pcmpeqd %xmm0, %xmm2
add $16, %edi
@@ -146,6 +160,9 @@ L(loop):
or %eax, %ecx
jnz L(matches)
+# ifdef __CHKP__
+ bndcu (%edi),%bnd0
+# endif
movdqa (%edi), %xmm3
pcmpeqd %xmm3, %xmm2
add $16, %edi
@@ -155,6 +172,9 @@ L(loop):
or %eax, %ecx
jnz L(matches)
+# ifdef __CHKP__
+ bndcu (%edi),%bnd0
+# endif
movdqa (%edi), %xmm4
pcmpeqd %xmm4, %xmm2
add $16, %edi
@@ -164,6 +184,9 @@ L(loop):
or %eax, %ecx
jnz L(matches)
+# ifdef __CHKP__
+ bndcu (%edi),%bnd0
+# endif
movdqa (%edi), %xmm5
pcmpeqd %xmm5, %xmm2
add $16, %edi
diff --git a/sysdeps/i386/i686/strcmp.S b/sysdeps/i386/i686/strcmp.S
index 6ca6220..67134af 100644
--- a/sysdeps/i386/i686/strcmp.S
+++ b/sysdeps/i386/i686/strcmp.S
@@ -29,8 +29,19 @@ ENTRY (strcmp)
movl STR1(%esp), %ecx
movl STR2(%esp), %edx
-
-L(oop): movb (%ecx), %al
+#ifdef __CHKP__
+ bndldx STR1(%esp,%ecx,1), %bnd0
+ bndldx STR2(%esp,%edx,1), %bnd1
+ bndcl (%ecx), %bnd0
+ bndcl (%edx), %bnd1
+#endif
+
+L(oop):
+#ifdef __CHKP__
+ bndcu (%ecx), %bnd0
+ bndcu (%edx), %bnd1
+#endif
+ movb (%ecx), %al
cmpb (%edx), %al
jne L(neq)
incl %ecx
diff --git a/sysdeps/i386/i686/strtok.S b/sysdeps/i386/i686/strtok.S
index 8848faf..78a2ea9 100644
--- a/sysdeps/i386/i686/strtok.S
+++ b/sysdeps/i386/i686/strtok.S
@@ -121,6 +121,14 @@ ENTRY (FUNCTION)
testl %edx, %edx
jz L(returnNULL)
movl DELIM(%esp), %eax /* Get start of delimiter set. */
+#ifdef __CHKP__
+ bndldx STR(%esp,%edx,1),%bnd0
+ bndldx DELIM(%esp,%eax,1),%bnd1
+ bndcl (%edx), %bnd0
+ bndcu (%edx), %bnd0
+ bndcl (%eax), %bnd1
+ bndcu (%eax), %bnd1
+#endif
/* For understanding the following code remember that %ecx == 0 now.
Although all the following instruction only modify %cl we always
diff --git a/sysdeps/i386/memchr.S b/sysdeps/i386/memchr.S
index 6799500..39fe616 100644
--- a/sysdeps/i386/memchr.S
+++ b/sysdeps/i386/memchr.S
@@ -51,6 +51,11 @@ ENTRY (__memchr)
movl LEN(%esp), %esi /* len: length of memory block. */
cfi_rel_offset (esi, 4)
+#ifdef __CHKP__
+ bndldx STR(%esp,%eax,1), %bnd0
+ bndcl (%eax), %bnd0
+#endif
+
/* If my must not test more than three characters test
them one by one. This is especially true for 0. */
cmpl $4, %esi
@@ -72,6 +77,9 @@ ENTRY (__memchr)
testb $3, %al /* correctly aligned ? */
je L(2) /* yes => begin loop */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
cmpb %dl, (%eax) /* compare byte */
je L(9) /* target found => return */
incl %eax /* increment source pointer */
@@ -80,6 +88,9 @@ ENTRY (__memchr)
testb $3, %al /* correctly aligned ? */
je L(2) /* yes => begin loop */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
cmpb %dl, (%eax) /* compare byte */
je L(9) /* target found => return */
incl %eax /* increment source pointer */
@@ -88,6 +99,9 @@ ENTRY (__memchr)
testb $3, %al /* correctly aligned ? */
je L(2) /* yes => begin loop */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
cmpb %dl, (%eax) /* compare byte */
je L(9) /* target found => return */
incl %eax /* increment source pointer */
@@ -127,7 +141,11 @@ ENTRY (__memchr)
ALIGN (4)
-L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */
+L(1):
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
+ movl (%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
are now 0 */
@@ -162,6 +180,9 @@ L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */
(following LL(13) below). Even the len can be compared with
constants instead of decrementing each time. */
+#ifdef __CHKP__
+ bndcu 4(%eax), %bnd0
+#endif
movl 4(%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
@@ -176,6 +197,9 @@ L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */
the addition will not result in 0. */
jnz L(7) /* found it => return pointer */
+#ifdef __CHKP__
+ bndcu 8(%eax), %bnd0
+#endif
movl 8(%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
@@ -190,6 +214,9 @@ L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */
the addition will not result in 0. */
jnz L(6) /* found it => return pointer */
+#ifdef __CHKP__
+ bndcu 12(%eax), %bnd0
+#endif
movl 12(%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
@@ -213,6 +240,9 @@ L(2): subl $16, %esi
cmpl $4-16, %esi /* rest < 4 bytes? */
jb L(3) /* yes, than test byte by byte */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
movl (%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
@@ -231,6 +261,9 @@ L(2): subl $16, %esi
cmpl $8-16, %esi /* rest < 8 bytes? */
jb L(3) /* yes, than test byte by byte */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
movl (%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
@@ -249,6 +282,9 @@ L(2): subl $16, %esi
cmpl $12-16, %esi /* rest < 12 bytes? */
jb L(3) /* yes, than test byte by byte */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
movl (%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
@@ -268,18 +304,27 @@ L(2): subl $16, %esi
L(3): andl $3, %esi /* mask out uninteresting bytes */
jz L(4) /* no remaining bytes => return NULL */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
cmpb %dl, (%eax) /* compare byte with CHR */
je L(9) /* equal, than return pointer */
incl %eax /* increment source pointer */
decl %esi /* decrement length */
jz L(4) /* no remaining bytes => return NULL */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
cmpb %dl, (%eax) /* compare byte with CHR */
je L(9) /* equal, than return pointer */
incl %eax /* increment source pointer */
decl %esi /* decrement length */
jz L(4) /* no remaining bytes => return NULL */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
cmpb %dl, (%eax) /* compare byte with CHR */
je L(9) /* equal, than return pointer */
diff --git a/sysdeps/i386/memcmp.S b/sysdeps/i386/memcmp.S
index 21e0bfc..7beab65 100644
--- a/sysdeps/i386/memcmp.S
+++ b/sysdeps/i386/memcmp.S
@@ -37,6 +37,12 @@ ENTRY (memcmp)
cfi_rel_offset (esi, 0)
movl BLK2(%esp), %edi
movl LEN(%esp), %ecx
+#ifdef __CHKP__
+ bndldx BLK1(%esp,%esi,1), %bnd0
+ bndldx BLK2(%esp,%edi,1), %bnd1
+ bndcl (%esi), %bnd0
+ bndcl (%edi), %bnd1
+#endif
cld /* Set direction of comparison. */
@@ -59,7 +65,13 @@ ENTRY (memcmp)
Note that the following operation does not change 0xffffffff. */
orb $1, %al /* Change 0 to 1. */
-L(1): popl %esi /* Restore registers. */
+L(1):
+#ifdef __CHKP__
+ bndcu (%esi), %bnd0
+ bndcu (%edi), %bnd1
+#endif
+ popl %esi /* Restore registers. */
+
cfi_adjust_cfa_offset (-4)
cfi_restore (esi)
movl %edx, %edi
diff --git a/sysdeps/i386/rawmemchr.S b/sysdeps/i386/rawmemchr.S
index 2bd20e0..27441dd 100644
--- a/sysdeps/i386/rawmemchr.S
+++ b/sysdeps/i386/rawmemchr.S
@@ -46,6 +46,11 @@ ENTRY (__rawmemchr)
movl STR(%esp), %eax
movl CHR(%esp), %edx
+#ifdef __CHKP__
+ bndldx STR(%esp,%eax,1), %bnd0
+ bndcl (%eax), %bnd0
+#endif
+
/* At the moment %edx contains C. What we need for the
algorithm is C in all bytes of the dword. Avoid
operations on 16 bit words because these require an
@@ -62,18 +67,27 @@ ENTRY (__rawmemchr)
testb $3, %al /* correctly aligned ? */
je L(1) /* yes => begin loop */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
cmpb %dl, (%eax) /* compare byte */
je L(9) /* target found => return */
incl %eax /* increment source pointer */
testb $3, %al /* correctly aligned ? */
je L(1) /* yes => begin loop */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
cmpb %dl, (%eax) /* compare byte */
je L(9) /* target found => return */
incl %eax /* increment source pointer */
testb $3, %al /* correctly aligned ? */
je L(1) /* yes => begin loop */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
cmpb %dl, (%eax) /* compare byte */
je L(9) /* target found => return */
incl %eax /* increment source pointer */
@@ -108,7 +122,11 @@ ENTRY (__rawmemchr)
/* Each round the main loop processes 16 bytes. */
ALIGN (4)
-L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */
+L(1):
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
+ movl (%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
are now 0 */
@@ -143,6 +161,9 @@ L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */
(following LL(13) below). Even the len can be compared with
constants instead of decrementing each time. */
+#ifdef __CHKP__
+ bndcu 4(%eax), %bnd0
+#endif
movl 4(%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
@@ -157,6 +178,9 @@ L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */
the addition will not result in 0. */
jnz L(7) /* found it => return pointer */
+#ifdef __CHKP__
+ bndcu 8(%eax), %bnd0
+#endif
movl 8(%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
@@ -171,6 +195,9 @@ L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */
the addition will not result in 0. */
jnz L(6) /* found it => return pointer */
+#ifdef __CHKP__
+ bndcu 12(%eax), %bnd0
+#endif
movl 12(%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
@@ -211,6 +238,9 @@ L(8): testb %cl, %cl /* test first byte in dword */
/* No further test needed we we know it is one of the four bytes. */
L(9):
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
popl %edi /* pop saved register */
cfi_adjust_cfa_offset (-4)
cfi_restore (edi)
diff --git a/sysdeps/i386/stpncpy.S b/sysdeps/i386/stpncpy.S
index b23e820..22d727a 100644
--- a/sysdeps/i386/stpncpy.S
+++ b/sysdeps/i386/stpncpy.S
@@ -42,6 +42,14 @@ ENTRY (__stpncpy)
movl SRC(%esp), %esi
cfi_rel_offset (esi, 0)
movl LEN(%esp), %ecx
+#ifdef __CHKP__
+ bndldx DEST(%esp,%eax,1), %bnd0
+ bndldx SRC(%esp,%esi,1), %bnd1
+ bndcl (%eax), %bnd0
+ bndcu -1(%eax, %ecx), %bnd0
+ bndcl (%esi), %bnd1
+ bndcu (%esi), %bnd1
+#endif
subl %eax, %esi /* magic: reduce number of loop variants
to one using addressing mode */
diff --git a/sysdeps/i386/strchrnul.S b/sysdeps/i386/strchrnul.S
index 7ceb88e..86bf770 100644
--- a/sysdeps/i386/strchrnul.S
+++ b/sysdeps/i386/strchrnul.S
@@ -38,6 +38,11 @@ ENTRY (__strchrnul)
movl STR(%esp), %eax
movl CHR(%esp), %edx
+# ifdef __CHKP__
+ bndldx STR(%esp,%eax,1), %bnd0
+ bndcl (%eax), %bnd0
+ bndcu (%eax), %bnd0
+# endif
/* At the moment %edx contains CHR. What we need for the
algorithm is CHR in all bytes of the dword. Avoid
operations on 16 bit words because these require an
@@ -60,6 +65,9 @@ ENTRY (__strchrnul)
testb $3, %al /* correctly aligned ? */
jz L(11) /* yes => begin loop */
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
movb (%eax), %cl /* load byte in question (we need it twice) */
cmpb %cl, %dl /* compare byte */
je L(6) /* target found => return */
@@ -69,6 +77,9 @@ ENTRY (__strchrnul)
testb $3, %al /* correctly aligned ? */
jz L(11) /* yes => begin loop */
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
movb (%eax), %cl /* load byte in question (we need it twice) */
cmpb %cl, %dl /* compare byte */
je L(6) /* target found => return */
@@ -78,6 +89,9 @@ ENTRY (__strchrnul)
testb $3, %al /* correctly aligned ? */
jz L(11) /* yes => begin loop */
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
movb (%eax), %cl /* load byte in question (we need it twice) */
cmpb %cl, %dl /* compare byte */
je L(6) /* target found => return */
@@ -120,7 +134,11 @@ ENTRY (__strchrnul)
L(1): addl $16, %eax /* adjust pointer for whole round */
-L(11): movl (%eax), %ecx /* get word (= 4 bytes) in question */
+L(11):
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
+ movl (%eax), %ecx /* get word (= 4 bytes) in question */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
are now 0 */
movl $0xfefefeff, %edi /* magic value */
@@ -164,6 +182,9 @@ L(11): movl (%eax), %ecx /* get word (= 4 bytes) in question */
the addition will not result in 0. */
jnz L(7) /* found NUL => return NULL */
+# ifdef __CHKP__
+ bndcu 4(%eax), %bnd0
+# endif
movl 4(%eax), %ecx /* get word (= 4 bytes) in question */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
are now 0 */
@@ -189,6 +210,9 @@ L(11): movl (%eax), %ecx /* get word (= 4 bytes) in question */
the addition will not result in 0. */
jnz L(71) /* found NUL => return NULL */
+# ifdef __CHKP__
+ bndcu 8(%eax), %bnd0
+# endif
movl 8(%eax), %ecx /* get word (= 4 bytes) in question */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
are now 0 */
@@ -214,6 +238,9 @@ L(11): movl (%eax), %ecx /* get word (= 4 bytes) in question */
the addition will not result in 0. */
jnz L(72) /* found NUL => return NULL */
+# ifdef __CHKP__
+ bndcu 12(%eax), %bnd0
+# endif
movl 12(%eax), %ecx /* get word (= 4 bytes) in question */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
are now 0 */
@@ -268,7 +295,11 @@ L(7): testb %cl, %cl /* is first byte CHR? */
/* It must be in the fourth byte and it cannot be NUL. */
incl %eax
-L(6): popl %edi /* restore saved register content */
+L(6):
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
+ popl %edi /* restore saved register content */
cfi_adjust_cfa_offset (-4)
cfi_restore (edi)
diff --git a/sysdeps/i386/strcspn.S b/sysdeps/i386/strcspn.S
index 0c262d6..1352b03 100644
--- a/sysdeps/i386/strcspn.S
+++ b/sysdeps/i386/strcspn.S
@@ -32,6 +32,14 @@ ENTRY (strcspn)
movl STR(%esp), %edx
movl STOP(%esp), %eax
+#ifdef __CHKP__
+ bndldx STR(%esp,%edx,1), %bnd0
+ bndldx STOP(%esp,%eax,1), %bnd1
+ bndcl (%edx), %bnd0
+ bndcl (%eax), %bnd1
+ bndcu (%edx), %bnd0
+ bndcu (%eax), %bnd1
+#endif
/* First we create a table with flags for all possible characters.
For the ASCII (7bit/8bit) or ISO-8859-X character sets which are
diff --git a/sysdeps/i386/strpbrk.S b/sysdeps/i386/strpbrk.S
index 246ae27..7190a06 100644
--- a/sysdeps/i386/strpbrk.S
+++ b/sysdeps/i386/strpbrk.S
@@ -33,6 +33,14 @@ ENTRY (strpbrk)
movl STR(%esp), %edx
movl STOP(%esp), %eax
+#ifdef __CHKP__
+ bndldx STR(%esp,%edx,1), %bnd0
+ bndldx STOP(%esp,%eax,1), %bnd1
+ bndcl (%edx), %bnd0
+ bndcl (%eax), %bnd1
+ bndcu (%edx), %bnd0
+ bndcu (%eax), %bnd1
+#endif
/* First we create a table with flags for all possible characters.
For the ASCII (7bit/8bit) or ISO-8859-X character sets which are
diff --git a/sysdeps/i386/strrchr.S b/sysdeps/i386/strrchr.S
index 31b8a45..858bba4 100644
--- a/sysdeps/i386/strrchr.S
+++ b/sysdeps/i386/strrchr.S
@@ -40,6 +40,10 @@ ENTRY (strrchr)
movl STR(%esp), %esi
cfi_rel_offset (esi, 0)
movl CHR(%esp), %ecx
+#ifdef __CHKP__
+ bndldx STR(%esp,%esi,1), %bnd0
+ bndcl (%esi), %bnd0
+#endif
/* At the moment %ecx contains C. What we need for the
algorithm is C in all bytes of the dword. Avoid
@@ -63,6 +67,9 @@ ENTRY (strrchr)
testl $3, %esi /* correctly aligned ? */
jz L(19) /* yes => begin loop */
+#ifdef __CHKP__
+ bndcu (%esi), %bnd0
+#endif
movb (%esi), %dl /* load byte in question (we need it twice) */
cmpb %dl, %cl /* compare byte */
jne L(11) /* target found => return */
@@ -73,6 +80,9 @@ L(11): orb %dl, %dl /* is NUL? */
testl $3, %esi /* correctly aligned ? */
jz L(19) /* yes => begin loop */
+#ifdef __CHKP__
+ bndcu (%esi), %bnd0
+#endif
movb (%esi), %dl /* load byte in question (we need it twice) */
cmpb %dl, %cl /* compare byte */
jne L(12) /* target found => return */
@@ -83,6 +93,9 @@ L(12): orb %dl, %dl /* is NUL? */
testl $3, %esi /* correctly aligned ? */
jz L(19) /* yes => begin loop */
+#ifdef __CHKP__
+ bndcu (%esi), %bnd0
+#endif
movb (%esi), %dl /* load byte in question (we need it twice) */
cmpb %dl, %cl /* compare byte */
jne L(13) /* target found => return */
@@ -170,7 +183,11 @@ L(51):
L(1): addl $16, %esi /* increment pointer for full round */
-L(19): movl (%esi), %edx /* get word (= 4 bytes) in question */
+L(19):
+#ifdef __CHKP__
+ bndcu (%esi), %bnd0
+#endif
+ movl (%esi), %edx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
addl %edx, %edi /* add the magic value to the word. We get
carry bits reported for each byte which
@@ -214,6 +231,9 @@ L(19): movl (%esi), %edx /* get word (= 4 bytes) in question */
the addition will not result in 0. */
jnz L(3) /* C is detected in the word => examine it */
+#ifdef __CHKP__
+ bndcu 4(%esi), %bnd0
+#endif
movl 4(%esi), %edx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
addl %edx, %edi /* add the magic value to the word. We get
@@ -238,6 +258,9 @@ L(19): movl (%esi), %edx /* get word (= 4 bytes) in question */
the addition will not result in 0. */
jnz L(31) /* C is detected in the word => examine it */
+#ifdef __CHKP__
+ bndcu 8(%esi), %bnd0
+#endif
movl 8(%esi), %edx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
addl %edx, %edi /* add the magic value to the word. We get
@@ -262,6 +285,9 @@ L(19): movl (%esi), %edx /* get word (= 4 bytes) in question */
the addition will not result in 0. */
jnz L(32) /* C is detected in the word => examine it */
+#ifdef __CHKP__
+ bndcu 12(%esi), %bnd0
+#endif
movl 12(%esi), %edx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
addl %edx, %edi /* add the magic value to the word. We get
diff --git a/sysdeps/i386/strtok.S b/sysdeps/i386/strtok.S
index 79d540b..cfee507 100644
--- a/sysdeps/i386/strtok.S
+++ b/sysdeps/i386/strtok.S
@@ -67,6 +67,11 @@ ENTRY (FUNCTION)
movl STR(%esp), %edx
movl DELIM(%esp), %eax
+#ifdef __CHKP__
+ bndldx STR(%esp,%edx,1), %bnd0
+ bndldx DELIM(%esp,%eax,1), %bnd1
+#endif
+
#if !defined USE_AS_STRTOK_R && defined PIC
pushl %ebx /* Save PIC register. */
cfi_adjust_cfa_offset (4)
@@ -336,6 +341,9 @@ L(11):
/* Store the pointer to the next character. */
#ifdef USE_AS_STRTOK_R
movl SAVE(%esp), %ecx
+# ifdef __CHKP__
+ bndmov %bnd2, %bnd0
+# endif
#endif
movl %edx, SAVE_PTR
@@ -351,6 +359,9 @@ L(returnNULL):
xorl %eax, %eax
#ifdef USE_AS_STRTOK_R
movl SAVE(%esp), %ecx
+# ifdef __CHKP__
+ bndmov %bnd2, %bnd0
+# endif
#endif
movl %edx, SAVE_PTR
jmp L(epilogue)
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=c1057ab824007e18036efff12d9f080d16a37f2c
commit c1057ab824007e18036efff12d9f080d16a37f2c
Author: Liubov Dmitrieva <ldmitrie@sourceware.org>
Date: Mon Aug 26 16:51:26 2013 +0400
Warning! Temporary use O0 for vfprintf.c file if MPX enabled because of a compiler bug relates MPX.
diff --git a/stdio-common/Makefile b/stdio-common/Makefile
index 658804b..12befcc 100644
--- a/stdio-common/Makefile
+++ b/stdio-common/Makefile
@@ -77,6 +77,9 @@ $(objpfx)tst-printf.out: tst-printf.sh $(objpfx)tst-printf
endif
CFLAGS-vfprintf.c = -Wno-uninitialized
+ifeq ($(enable-mpx), yes)
+CFLAGS-vfprintf.c += -O0 -D__OPTIMIZE__
+endif
CFLAGS-vfwprintf.c = -Wno-uninitialized
CFLAGS-tst-printf.c = -Wno-format
CFLAGS-tstdiomisc.c = -Wno-format
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=5ea96e116e1a6bc7039d4db564fad35ff4a692a9
commit 5ea96e116e1a6bc7039d4db564fad35ff4a692a9
Author: Liubov Dmitrieva <ldmitrie@sourceware.org>
Date: Mon Sep 2 13:21:47 2013 +0400
Add --enable-mpx option to configure for Intel MPX support.
Conflicts:
elf/Makefile
diff --git a/config.make.in b/config.make.in
index 7b04568..8c1228d 100644
--- a/config.make.in
+++ b/config.make.in
@@ -96,12 +96,14 @@ build-nscd = @build_nscd@
use-nscd = @use_nscd@
build-hardcoded-path-in-tests= @hardcoded_path_in_tests@
build-pt-chown = @build_pt_chown@
+enable-mpx = @enable_mpx@
# Build tools.
CC = @CC@
CXX = @CXX@
BUILD_CC = @BUILD_CC@
CFLAGS = @CFLAGS@
+ASFLAGS = @ASFLAGS@
CPPFLAGS-config = @CPPFLAGS@
CPPUNDEFS = @CPPUNDEFS@
ASFLAGS-config = @ASFLAGS_config@
diff --git a/configure b/configure
index afe7821..7bb8bf2 100755
--- a/configure
+++ b/configure
@@ -653,6 +653,8 @@ link_obsolete_rpc
libc_cv_nss_crypt
all_warnings
force_install
+ASFLAGS
+enable_mpx
bindnow
hardcoded_path_in_tests
oldest_abi
@@ -747,6 +749,7 @@ enable_lock_elision
enable_add_ons
enable_hidden_plt
enable_bind_now
+enable_mpx
enable_static_nss
enable_force_install
enable_kernel
@@ -1409,6 +1412,7 @@ Optional Features:
for add-ons if no parameter given
--disable-hidden-plt do not hide internal function calls to avoid PLT
--enable-bind-now disable lazy relocations in DSOs
+ --enable-mpx turn on Intel MPX extension
--enable-static-nss build static NSS modules [default=no]
--disable-force-install don't force installation of files from this package,
even if they are older than the installed files
@@ -3519,6 +3523,24 @@ fi
+# Check whether --enable-mpx was given.
+if test "${enable_mpx+set}" = set; then :
+ enableval=$enable_mpx; enable_mpx=$enableval
+else
+ enable_mpx=no
+fi
+
+
+
+
+if test "$ac_test_CFLAGS" != set && test "$enable_mpx" = yes ; then
+ CFLAGS="$CFLAGS -g -fcheck-pointers -mmpx -fno-chkp-check-incomplete-type";
+fi
+
+if test "$enable_mpx" = yes ; then
+ ASFLAGS="$ASFLAGS -g -fcheck-pointers -mmpx -Wa,-madd-bnd-prefix"
+fi
+
# Check whether --enable-static-nss was given.
if test "${enable_static_nss+set}" = set; then :
enableval=$enable_static_nss; static_nss=$enableval
diff --git a/configure.in b/configure.in
index 9172ad1..d7eb9a6 100644
--- a/configure.in
+++ b/configure.in
@@ -216,6 +216,22 @@ AC_ARG_ENABLE([bind-now],
[bindnow=no])
AC_SUBST(bindnow)
+AC_ARG_ENABLE([mpx],
+ AC_HELP_STRING([--enable-mpx],
+ [turn on Intel MPX extension]),
+ [enable_mpx=$enableval],
+ [enable_mpx=no])
+
+AC_SUBST(enable_mpx)
+AC_SUBST(ASFLAGS)
+if test "$ac_test_CFLAGS" != set && test "$enable_mpx" = yes ; then
+ CFLAGS="$CFLAGS -g -fcheck-pointers -mmpx -fno-chkp-check-incomplete-type";
+fi
+
+if test "$enable_mpx" = yes ; then
+ ASFLAGS="$ASFLAGS -g -fcheck-pointers -mmpx -Wa,-madd-bnd-prefix"
+fi
+
dnl On some platforms we cannot use dynamic loading. We must provide
dnl static NSS modules.
AC_ARG_ENABLE([static-nss],
diff --git a/elf/Makefile b/elf/Makefile
index 4ef80c9..2bdf045 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -49,7 +49,10 @@ all-rtld-routines = $(rtld-routines) $(sysdep-rtld-routines)
CFLAGS-dl-runtime.c = -fexceptions -fasynchronous-unwind-tables
CFLAGS-dl-lookup.c = -fexceptions -fasynchronous-unwind-tables
CFLAGS-dl-iterate-phdr.c = $(uses-callbacks)
+
+ifeq ($(enable-mpx), yes)
CFLAGS-dl-init.c = -fno-check-pointers
+endif
ifeq ($(unwind-find-fde),yes)
routines += unwind-dw2-fde-glibc
diff --git a/manual/install.texi b/manual/install.texi
index 4575d22..68dab0d 100644
--- a/manual/install.texi
+++ b/manual/install.texi
@@ -177,6 +177,9 @@ setuid and owned by @code{root}. The use of @file{pt_chown} introduces
additional security risks to the system and you should enable it only if
you understand and accept those risks.
+@item --enable-mpx
+By default, Intel MPX extension is disabled. This option turns it on.
+
@item --build=@var{build-system}
@itemx --host=@var{host-system}
These options are for cross-compiling. If you specify both options and
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=2f16fa64a350b1977034d27f8546c2b4c822d035
commit 2f16fa64a350b1977034d27f8546c2b4c822d035
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Sun May 19 18:30:05 2013 +0400
Support new siginfo in Glibc for Intel MPX.
diff --git a/sysdeps/unix/sysv/linux/x86/bits/siginfo.h b/sysdeps/unix/sysv/linux/x86/bits/siginfo.h
index bfc6aa3..23d946c 100644
--- a/sysdeps/unix/sysv/linux/x86/bits/siginfo.h
+++ b/sysdeps/unix/sysv/linux/x86/bits/siginfo.h
@@ -108,6 +108,10 @@ typedef struct
{
void *si_addr; /* Faulting insn/memory ref. */
short int si_addr_lsb; /* Valid LSB of the reported address. */
+# ifdef __CHKP__
+ void *si_lower;
+ void *si_upper;
+# endif
} _sigfault;
/* SIGPOLL. */
@@ -141,6 +145,10 @@ typedef struct
# define si_ptr _sifields._rt.si_sigval.sival_ptr
# define si_addr _sifields._sigfault.si_addr
# define si_addr_lsb _sifields._sigfault.si_addr_lsb
+# ifdef __CHKP__
+# define si_lower _sifields._sigfault.si_lower
+# define si_upper _sifields._sigfault.si_upper
+# endif
# define si_band _sifields._sigpoll.si_band
# define si_fd _sifields._sigpoll.si_fd
# define si_call_addr _sifields._sigsys._call_addr
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=774ecb15b08e64ff458e924007ded47de6ec87fd
commit 774ecb15b08e64ff458e924007ded47de6ec87fd
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Fri Jan 25 18:40:50 2013 +0400
Intel MPX support for x86_64 and x86_32 pthread routines.
Always use zero bounds for __tls_get_addr.
Always use zero bounds for _Unwind_Resume.
diff --git a/elf/dl-tls.c b/elf/dl-tls.c
index 576d9a1..ee84fa6 100644
--- a/elf/dl-tls.c
+++ b/elf/dl-tls.c
@@ -767,6 +767,9 @@ update_get_addr (GET_ADDR_ARGS)
void *
__tls_get_addr (GET_ADDR_ARGS)
{
+#ifdef __CHKP__
+ GET_ADDR_PARAM = __bnd_init_ptr_bounds(GET_ADDR_PARAM);
+#endif
dtv_t *dtv = THREAD_DTV ();
if (__builtin_expect (dtv[0].counter != GL(dl_tls_generation), 0))
diff --git a/nptl/cleanup.c b/nptl/cleanup.c
index 07e064b..a7503aa 100644
--- a/nptl/cleanup.c
+++ b/nptl/cleanup.c
@@ -19,7 +19,6 @@
#include <stdlib.h>
#include "pthreadP.h"
-
void
__cleanup_fct_attribute
__pthread_register_cancel (__pthread_unwind_buf_t *buf)
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_timedwait.S b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_timedwait.S
index a6d6bc4..973ff0e 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_timedwait.S
+++ b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_timedwait.S
@@ -94,6 +94,13 @@ __pthread_cond_timedwait:
je .Lreltmo
#endif
+#ifdef __CHKP__
+ bndldx (%esp,%ebx,1), %bnd0
+ bndldx 28(%esp,%ebp,1), %bnd2
+ bndmov %bnd0, 48(%esp)
+ bndmov %bnd2, 80(%esp)
+#endif
+
/* Get internal lock. */
movl $1, %edx
xorl %eax, %eax
@@ -109,12 +116,24 @@ __pthread_cond_timedwait:
different value in there this is a bad user bug. */
2: cmpl $-1, dep_mutex(%ebx)
movl 24(%esp), %eax
+#ifdef __CHKP__
+ bndldx 4(%esp,%eax,1), %bnd1
+ bndmov %bnd1, 64(%esp)
+#endif
je 17f
movl %eax, dep_mutex(%ebx)
/* Unlock the mutex. */
17: xorl %edx, %edx
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
call __pthread_mutex_unlock_usercnt
+#ifdef __CHKP__
+ bndmov 48(%esp), %bnd0
+ bndmov 64(%esp), %bnd1
+ bndmov 80(%esp), %bnd2
+#endif
testl %eax, %eax
jne 16f
@@ -296,9 +315,25 @@ __pthread_cond_timedwait:
should always succeed or else the kernel did not lock the mutex
correctly. */
movl dep_mutex(%ebx), %eax
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
call __pthread_mutex_cond_lock_adjust
+#ifdef __CHKP__
+ bndmov 48(%esp), %bnd0
+ bndmov 64(%esp), %bnd1
+ bndmov 80(%esp), %bnd2
+#endif
xorl %edx, %edx
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
call __pthread_mutex_unlock_usercnt
+#ifdef __CHKP__
+ bndmov 48(%esp), %bnd0
+ bndmov 64(%esp), %bnd1
+ bndmov 80(%esp), %bnd2
+#endif
jmp 8b
28: addl $1, wakeup_seq(%ebx)
@@ -356,8 +391,15 @@ __pthread_cond_timedwait:
movl 16(%esp), %ecx
testl %ecx, %ecx
jnz 27f
-
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
call __pthread_mutex_cond_lock
+#ifdef __CHKP__
+ bndmov 48(%esp), %bnd0
+ bndmov 64(%esp), %bnd1
+ bndmov 80(%esp), %bnd2
+#endif
26: addl $FRAME_SIZE, %esp
cfi_adjust_cfa_offset(-FRAME_SIZE)
@@ -388,7 +430,16 @@ __pthread_cond_timedwait:
cfi_restore_state
-27: call __pthread_mutex_cond_lock_adjust
+27:
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
+ call __pthread_mutex_cond_lock_adjust
+#ifdef __CHKP__
+ bndmov 48(%esp), %bnd0
+ bndmov 64(%esp), %bnd1
+ bndmov 80(%esp), %bnd2
+#endif
xorl %eax, %eax
jmp 26b
@@ -529,7 +580,15 @@ __pthread_cond_timedwait:
/* Unlock the mutex. */
117: xorl %edx, %edx
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
call __pthread_mutex_unlock_usercnt
+#ifdef __CHKP__
+ bndmov 48(%esp), %bnd0
+ bndmov 64(%esp), %bnd1
+ bndmov 80(%esp), %bnd2
+#endif
testl %eax, %eax
jne 16b
@@ -899,10 +958,27 @@ __condvar_tw_cleanup:
cmpl %ebx, %gs:TID
jne 8f
/* We managed to get the lock. Fix it up before returning. */
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
call __pthread_mutex_cond_lock_adjust
+#ifdef __CHKP__
+ bndmov 48(%esp), %bnd0
+ bndmov 64(%esp), %bnd1
+ bndmov 80(%esp), %bnd2
+#endif
jmp 9f
-8: call __pthread_mutex_cond_lock
+8:
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
+ call __pthread_mutex_cond_lock
+#ifdef __CHKP__
+ bndmov 48(%esp), %bnd0
+ bndmov 64(%esp), %bnd1
+ bndmov 80(%esp), %bnd2
+#endif
9: movl %esi, (%esp)
.LcallUR:
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_wait.S b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_wait.S
index 9695dcb..af53cbf 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_wait.S
+++ b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_wait.S
@@ -60,6 +60,10 @@ __pthread_cond_wait:
xorl %esi, %esi
movl 20(%esp), %ebx
+#ifdef __CHKP__
+ bndldx (%esp,%ebx,1), %bnd0
+ bndmov %bnd0, 32(%esp)
+#endif
LIBC_PROBE (cond_wait, 2, 24(%esp), %ebx)
@@ -78,12 +82,23 @@ __pthread_cond_wait:
different value in there this is a bad user bug. */
2: cmpl $-1, dep_mutex(%ebx)
movl 24(%esp), %eax
+#ifdef __CHKP__
+ bndldx 4(%esp,%eax,1), %bnd1
+ bndmov %bnd1, 48(%esp)
+#endif
je 15f
movl %eax, dep_mutex(%ebx)
/* Unlock the mutex. */
15: xorl %edx, %edx
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
call __pthread_mutex_unlock_usercnt
+#ifdef __CHKP__
+ bndmov 32(%esp), %bnd0
+ bndmov 48(%esp), %bnd1
+#endif
testl %eax, %eax
jne 12f
@@ -270,7 +285,14 @@ __pthread_cond_wait:
testl %ecx, %ecx
jnz 21f
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
call __pthread_mutex_cond_lock
+#ifdef __CHKP__
+ bndmov 32(%esp), %bnd0
+ bndmov 48(%esp), %bnd1
+#endif
20: addl $FRAME_SIZE, %esp
cfi_adjust_cfa_offset(-FRAME_SIZE);
@@ -292,7 +314,15 @@ __pthread_cond_wait:
cfi_restore_state
-21: call __pthread_mutex_cond_lock_adjust
+21:
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
+ call __pthread_mutex_cond_lock_adjust
+#ifdef __CHKP__
+ bndmov 32(%esp), %bnd0
+ bndmov 48(%esp), %bnd1
+#endif
xorl %eax, %eax
jmp 20b
@@ -308,9 +338,23 @@ __pthread_cond_wait:
should always succeed or else the kernel did not lock the mutex
correctly. */
movl dep_mutex(%ebx), %eax
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
call __pthread_mutex_cond_lock_adjust
+#ifdef __CHKP__
+ bndmov 32(%esp), %bnd0
+ bndmov 48(%esp), %bnd1
+#endif
xorl %edx, %edx
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
call __pthread_mutex_unlock_usercnt
+#ifdef __CHKP__
+ bndmov 32(%esp), %bnd0
+ bndmov 48(%esp), %bnd1
+#endif
jmp 8b
/* Initial locking failed. */
@@ -581,10 +625,25 @@ __condvar_w_cleanup:
cmpl %ebx, %gs:TID
jne 8f
/* We managed to get the lock. Fix it up before returning. */
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
call __pthread_mutex_cond_lock_adjust
+#ifdef __CHKP__
+ bndmov 32(%esp), %bnd0
+ bndmov 48(%esp), %bnd1
+#endif
jmp 9f
-8: call __pthread_mutex_cond_lock
+8:
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
+ call __pthread_mutex_cond_lock
+#ifdef __CHKP__
+ bndmov 32(%esp), %bnd0
+ bndmov 48(%esp), %bnd1
+#endif
9: movl %esi, (%esp)
.LcallUR:
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/pthread_once.S b/nptl/sysdeps/unix/sysv/linux/i386/pthread_once.S
index b405b9e..7104fba 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/pthread_once.S
+++ b/nptl/sysdeps/unix/sysv/linux/i386/pthread_once.S
@@ -114,6 +114,9 @@ __pthread_once:
jne 7f
leal 8(%esp), %eax
+#ifdef __CHKP__
+ bndldx 8(%esp,%eax,1), %bnd0
+#endif
call HIDDEN_JUMPTARGET(__pthread_register_cancel)
/* Call the user-provided initialization function. */
@@ -121,6 +124,9 @@ __pthread_once:
/* Pop the cleanup handler. */
leal 8(%esp), %eax
+#ifdef __CHKP__
+ bndldx 8(%esp,%eax,1), %bnd0
+#endif
call HIDDEN_JUMPTARGET(__pthread_unregister_cancel)
addl $UNWINDBUFSIZE+8, %esp
cfi_adjust_cfa_offset (-UNWINDBUFSIZE-8)
@@ -168,6 +174,9 @@ __pthread_once:
ENTER_KERNEL
leal 8(%esp), %eax
+#ifdef __CHKP__
+ bndldx 8(%esp,%eax,1), %bnd0
+#endif
call HIDDEN_JUMPTARGET (__pthread_unwind_next)
/* NOTREACHED */
hlt
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
index 6c1a75f..dc15345 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
@@ -99,6 +99,12 @@ __pthread_cond_timedwait:
movq %rsi, 16(%rsp)
movq %rdx, %r13
+#ifdef __CHKP__
+ bndmov %bnd0, 72(%rsp)
+ bndmov %bnd1, 88(%rsp)
+ bndmov %bnd2, 104(%rsp)
+#endif
+
je 22f
mov %RSI_LP, dep_mutex(%rdi)
@@ -128,7 +134,15 @@ __pthread_cond_timedwait:
/* Unlock the mutex. */
32: movq 16(%rsp), %rdi
xorl %esi, %esi
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
callq __pthread_mutex_unlock_usercnt
+#ifdef __CHKP__
+ bndmov 72(%rsp), %bnd0
+ bndmov 88(%rsp), %bnd1
+ bndmov 104(%rsp), %bnd2
+#endif
testl %eax, %eax
jne 46f
@@ -338,7 +352,15 @@ __pthread_cond_timedwait:
testb %r15b, %r15b
jnz 64f
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
callq __pthread_mutex_cond_lock
+#ifdef __CHKP__
+ bndmov 72(%rsp), %bnd0
+ bndmov 88(%rsp), %bnd1
+ bndmov 104(%rsp), %bnd2
+#endif
63: testq %rax, %rax
cmoveq %r14, %rax
@@ -362,7 +384,16 @@ __pthread_cond_timedwait:
cfi_restore_state
-64: callq __pthread_mutex_cond_lock_adjust
+64:
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
+ callq __pthread_mutex_cond_lock_adjust
+#ifdef __CHKP__
+ bndmov 72(%rsp), %bnd0
+ bndmov 88(%rsp), %bnd1
+ bndmov 104(%rsp), %bnd2
+#endif
movq %r14, %rax
jmp 48b
@@ -457,7 +488,15 @@ __pthread_cond_timedwait:
/* Unlock the mutex. */
2: movq 16(%rsp), %rdi
xorl %esi, %esi
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
callq __pthread_mutex_unlock_usercnt
+#ifdef __CHKP__
+ bndmov 72(%rsp), %bnd0
+ bndmov 88(%rsp), %bnd1
+ bndmov 104(%rsp), %bnd2
+#endif
testl %eax, %eax
jne 46b
@@ -786,7 +825,15 @@ __condvar_cleanup2:
cmpl %eax, %fs:TID
jne 7f
/* We managed to get the lock. Fix it up before returning. */
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
callq __pthread_mutex_cond_lock_adjust
+#ifdef __CHKP__
+ bndmov 72(%rsp), %bnd0
+ bndmov 88(%rsp), %bnd1
+ bndmov 104(%rsp), %bnd2
+#endif
jmp 8f
7: callq __pthread_mutex_cond_lock
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
index f0f6683..32b8d69 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
@@ -74,6 +74,11 @@ __pthread_cond_wait:
movq %rdi, 8(%rsp)
movq %rsi, 16(%rsp)
+#ifdef __CHKP__
+ bndmov %bnd0, 32(%rsp)
+ bndmov %bnd1, 48(%rsp)
+#endif
+
je 15f
mov %RSI_LP, dep_mutex(%rdi)
@@ -91,7 +96,14 @@ __pthread_cond_wait:
/* Unlock the mutex. */
2: movq 16(%rsp), %rdi
xorl %esi, %esi
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
callq __pthread_mutex_unlock_usercnt
+#ifdef __CHKP__
+ bndmov 32(%rsp), %bnd0
+ bndmov 48(%rsp), %bnd1
+#endif
testl %eax, %eax
jne 12f
@@ -256,7 +268,14 @@ __pthread_cond_wait:
testb %r8b, %r8b
jnz 18f
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
callq __pthread_mutex_cond_lock
+#ifdef __CHKP__
+ bndmov 32(%rsp), %bnd0
+ bndmov 48(%rsp), %bnd1
+#endif
14: leaq FRAME_SIZE(%rsp), %rsp
cfi_adjust_cfa_offset(-FRAME_SIZE)
@@ -266,7 +285,15 @@ __pthread_cond_wait:
cfi_adjust_cfa_offset(FRAME_SIZE)
-18: callq __pthread_mutex_cond_lock_adjust
+18:
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
+ callq __pthread_mutex_cond_lock_adjust
+#ifdef __CHKP__
+ bndmov 32(%rsp), %bnd0
+ bndmov 48(%rsp), %bnd1
+#endif
xorl %eax, %eax
jmp 14b
@@ -510,10 +537,16 @@ __condvar_cleanup1:
cmpl %eax, %fs:TID
jne 7f
/* We managed to get the lock. Fix it up before returning. */
+#ifdef __CHKP__
+ bndmov %bnd1, %bnd0
+#endif
callq __pthread_mutex_cond_lock_adjust
+#ifdef __CHKP__
+ bndmov 32(%rsp), %bnd0
+ bndmov 48(%rsp), %bnd1
+#endif
jmp 8f
-
7: callq __pthread_mutex_cond_lock
8: movq 24(%rsp), %rdi
diff --git a/sysdeps/gnu/unwind-resume.c b/sysdeps/gnu/unwind-resume.c
index df845cd..19e06b2 100644
--- a/sysdeps/gnu/unwind-resume.c
+++ b/sysdeps/gnu/unwind-resume.c
@@ -46,6 +46,9 @@ init (void)
void
_Unwind_Resume (struct _Unwind_Exception *exc)
{
+#ifdef __CHKP__
+ exc = (struct _Unwind_Exception *) __bnd_set_ptr_bounds (exc, sizeof (struct _Unwind_Exception));
+#endif
if (__builtin_expect (libgcc_s_resume == NULL, 0))
init ();
libgcc_s_resume (exc);
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=87372602ca6c23df5784f3f200131e862a236eec
commit 87372602ca6c23df5784f3f200131e862a236eec
Author: Liubov Dmitrieva <ldmitrie@sourceware.org>
Date: Thu Aug 29 16:33:47 2013 +0400
Buffer overrun detected by Intel MPX in wcschr test. Fixed.
diff --git a/string/test-strchr.c b/string/test-strchr.c
index cbcf53e..572671f 100644
--- a/string/test-strchr.c
+++ b/string/test-strchr.c
@@ -219,9 +219,14 @@ do_random_tests (void)
static void
check1 (void)
{
- char s[] __attribute__((aligned(16))) = "\xff";
- char c = '\xfe';
- char *exp_result = stupid_STRCHR (s, c);
+ CHAR s[] __attribute__((aligned(16))) =
+#ifdef WIDE
+ L"\xff";
+#else
+ "\xff";
+#endif
+ CHAR c = '\xfe';
+ CHAR *exp_result = stupid_STRCHR (s, c);
FOR_EACH_IMPL (impl, 0)
check_result (impl, s, c, exp_result);
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=53b31dab5b9733bde88d1d2bc7c2a29182af72e2
commit 53b31dab5b9733bde88d1d2bc7c2a29182af72e2
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Sat Dec 22 20:51:45 2012 +0400
[BZ 15698] Buffer overrun detected by Intel MPX at sysdeps/unix/sysv/linux/ifaddrs.c
diff --git a/sysdeps/unix/sysv/linux/ifaddrs.c b/sysdeps/unix/sysv/linux/ifaddrs.c
index 89fda15..4f5f7b5 100644
--- a/sysdeps/unix/sysv/linux/ifaddrs.c
+++ b/sysdeps/unix/sysv/linux/ifaddrs.c
@@ -782,9 +782,11 @@ getifaddrs_internal (struct ifaddrs **ifap)
for (i = 0; i < (preflen / 8); i++)
*cp++ = 0xff;
- c = 0xff;
- c <<= (8 - (preflen % 8));
- *cp = c;
+ if (preflen < max_prefixlen) {
+ c = 0xff;
+ c <<= (8 - (preflen % 8));
+ *cp = c;
+ }
}
}
}
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=cccbfcfeed3c5da5c2be2110fc43ee3accd41c55
commit cccbfcfeed3c5da5c2be2110fc43ee3accd41c55
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Wed Dec 19 18:56:40 2012 +0400
Buffer overrun detected by Intel MPX in stdio-common/scanf13.c. Fixed.
diff --git a/stdio-common/scanf13.c b/stdio-common/scanf13.c
index 720224a..aa58dd5 100644
--- a/stdio-common/scanf13.c
+++ b/stdio-common/scanf13.c
@@ -59,6 +59,7 @@ main (void)
}
memset (buf, '/', sizeof (buf));
+ buf[sizeof(buf) - 1] = 0;
buf[0] = '\t';
buf[1] = ' ';
buf[2] = 0xc3;
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=ba4e1c2b51e9ea57f00f5082353793f23af85747
commit ba4e1c2b51e9ea57f00f5082353793f23af85747
Author: ienkovic <ilya.enkovich@intel.com>
Date: Tue Dec 25 15:16:28 2012 +0400
Do not block SIGSEGV signal because Intel MPX runtime uses it.
diff --git a/nptl/sysdeps/pthread/gai_misc.h b/nptl/sysdeps/pthread/gai_misc.h
index 6026085..46305ca 100644
--- a/nptl/sysdeps/pthread/gai_misc.h
+++ b/nptl/sysdeps/pthread/gai_misc.h
@@ -82,6 +82,9 @@ __gai_start_notify_thread (void)
sigset_t ss;
sigemptyset (&ss);
INTERNAL_SYSCALL_DECL (err);
+#ifdef __CHKP__
+ __sigdelset(&ss, SIGSEGV);
+#endif
INTERNAL_SYSCALL (rt_sigprocmask, err, 4, SIG_SETMASK, &ss, NULL, _NSIG / 8);
}
@@ -106,6 +109,9 @@ __gai_create_helper_thread (pthread_t *threadp, void *(*tf) (void *),
sigset_t oss;
sigfillset (&ss);
INTERNAL_SYSCALL_DECL (err);
+#ifdef __CHKP__
+ __sigdelset(&ss, SIGSEGV);
+#endif
INTERNAL_SYSCALL (rt_sigprocmask, err, 4, SIG_SETMASK, &ss, &oss, _NSIG / 8);
int ret = pthread_create (threadp, &attr, tf, arg);
diff --git a/nptl/sysdeps/unix/sysv/linux/aio_misc.h b/nptl/sysdeps/unix/sysv/linux/aio_misc.h
index 2649dc1..3994f98 100644
--- a/nptl/sysdeps/unix/sysv/linux/aio_misc.h
+++ b/nptl/sysdeps/unix/sysv/linux/aio_misc.h
@@ -32,6 +32,9 @@ __aio_start_notify_thread (void)
sigset_t ss;
sigemptyset (&ss);
INTERNAL_SYSCALL_DECL (err);
+#ifdef __CHKP__
+ __sigdelset(&ss, SIGSEGV);
+#endif
INTERNAL_SYSCALL (rt_sigprocmask, err, 4, SIG_SETMASK, &ss, NULL, _NSIG / 8);
}
@@ -54,6 +57,9 @@ __aio_create_helper_thread (pthread_t *threadp, void *(*tf) (void *),
sigset_t oss;
sigfillset (&ss);
INTERNAL_SYSCALL_DECL (err);
+#ifdef __CHKP__
+ __sigdelset(&ss, SIGSEGV);
+#endif
INTERNAL_SYSCALL (rt_sigprocmask, err, 4, SIG_SETMASK, &ss, &oss, _NSIG / 8);
int ret = pthread_create (threadp, &attr, tf, arg);
diff --git a/nptl/sysdeps/unix/sysv/linux/mq_notify.c b/nptl/sysdeps/unix/sysv/linux/mq_notify.c
index 6bc34ba..b9250df 100644
--- a/nptl/sysdeps/unix/sysv/linux/mq_notify.c
+++ b/nptl/sysdeps/unix/sysv/linux/mq_notify.c
@@ -78,6 +78,9 @@ change_sigmask (int how, sigset_t *oss)
{
sigset_t ss;
sigfillset (&ss);
+#ifdef __CHKP__
+ sigdelset (&ss, SIGSEGV);
+#endif
return pthread_sigmask (how, &ss, oss);
}
diff --git a/nptl/sysdeps/unix/sysv/linux/timer_routines.c b/nptl/sysdeps/unix/sysv/linux/timer_routines.c
index 57f115f..1979adc 100644
--- a/nptl/sysdeps/unix/sysv/linux/timer_routines.c
+++ b/nptl/sysdeps/unix/sysv/linux/timer_routines.c
@@ -174,6 +174,9 @@ __start_helper_thread (void)
sigset_t oss;
sigfillset (&ss);
__sigaddset (&ss, SIGCANCEL);
+#ifdef __CHKP__
+ __sigdelset (&ss, SIGSEGV);
+#endif
INTERNAL_SYSCALL_DECL (err);
INTERNAL_SYSCALL (rt_sigprocmask, err, 4, SIG_SETMASK, &ss, &oss, _NSIG / 8);
diff --git a/nptl/tst-cancel7.c b/nptl/tst-cancel7.c
index ad40b9c..7e8a860 100644
--- a/nptl/tst-cancel7.c
+++ b/nptl/tst-cancel7.c
@@ -65,6 +65,9 @@ sl (void)
sigset_t ss;
sigfillset (&ss);
+#ifdef __CHKP__
+ sigdelset (&ss, SIGSEGV);
+#endif
sigsuspend (&ss);
exit (0);
}
diff --git a/nptl/tst-signal1.c b/nptl/tst-signal1.c
index 81dd161..0345701 100644
--- a/nptl/tst-signal1.c
+++ b/nptl/tst-signal1.c
@@ -68,6 +68,9 @@ receiver (void)
sigfillset (&ss);
+#ifdef __CHKP__
+ sigdelset(&ss, SIGSEGV);
+#endif
if (pthread_sigmask (SIG_SETMASK, &ss, NULL) != 0)
{
puts ("1st pthread_sigmask failed");
diff --git a/nptl/tst-signal2.c b/nptl/tst-signal2.c
index 87f3bb8..23cda43 100644
--- a/nptl/tst-signal2.c
+++ b/nptl/tst-signal2.c
@@ -71,6 +71,9 @@ receiver (void)
alarm (10);
sigfillset (&ss);
+#ifdef __CHKP__
+ sigdelset(&ss, SIGSEGV);
+#endif
if (pthread_sigmask (SIG_SETMASK, &ss, NULL) != 0)
{
diff --git a/nptl/tst-signal3.c b/nptl/tst-signal3.c
index fc34f66..ae5fea6 100644
--- a/nptl/tst-signal3.c
+++ b/nptl/tst-signal3.c
@@ -96,6 +96,9 @@ do_test (void)
/* Block all signals. */
sigset_t ss;
sigfillset (&ss);
+#ifdef __CHKP__
+ sigdelset(&ss, SIGSEGV);
+#endif
th_main = pthread_self ();
@@ -118,6 +121,9 @@ do_test (void)
};
sigfillset (&sa.sa_mask);
+#ifdef __CHKP__
+ sigdelset(&ss, SIGSEGV);
+#endif
if (sigaction (sig0 + i, &sa, NULL) != 0)
{
printf ("sigaction for signal %d failed\n", i);
diff --git a/sysdeps/posix/profil.c b/sysdeps/posix/profil.c
index 86d36a9..28613af 100644
--- a/sysdeps/posix/profil.c
+++ b/sysdeps/posix/profil.c
@@ -106,6 +106,9 @@ __profil (u_short *sample_buffer, size_t size, size_t offset, u_int scale)
act.sa_handler = (sighandler_t) &profil_counter;
act.sa_flags = SA_RESTART;
__sigfillset (&act.sa_mask);
+#ifdef __CHKP__
+ __sigdelset (&act.sa_mask, SIGSEGV);
+#endif
if (__sigaction (SIGPROF, &act, oact_ptr) < 0)
return -1;
diff --git a/sysdeps/posix/sigwait.c b/sysdeps/posix/sigwait.c
index b0ea14d..a980647 100644
--- a/sysdeps/posix/sigwait.c
+++ b/sysdeps/posix/sigwait.c
@@ -42,11 +42,17 @@ do_sigwait (const sigset_t *set, int *sig)
/* Prepare set. */
__sigfillset (&tmp_mask);
+#ifdef __CHKP__
+ __sigdelset (&tmp_mask, SIGSEGV):
+#endif
/* Unblock all signals in the SET and register our nice handler. */
action.sa_handler = ignore_signal;
action.sa_flags = 0;
__sigfillset (&action.sa_mask); /* Block all signals for handler. */
+#ifdef __CHKP__
+ __sigdelset (&action.sa_mask, SIGSEGV):
+#endif
/* Make sure we recognize error conditions by setting WAS_SIG to a
value which does not describe a legal signal number. */
diff --git a/sysdeps/posix/sprofil.c b/sysdeps/posix/sprofil.c
index 1447a4f..42c43cd 100644
--- a/sysdeps/posix/sprofil.c
+++ b/sysdeps/posix/sprofil.c
@@ -339,6 +339,9 @@ __sprofil (struct prof *profp, int profcnt, struct timeval *tvp,
act.sa_handler = (sighandler_t) &profil_counter_ushort;
act.sa_flags = SA_RESTART;
__sigfillset (&act.sa_mask);
+#ifdef __CHKP__
+ __sigdelset (&act.sa_mask, SIGSEGV);
+#endif
if (__sigaction (SIGPROF, &act, &prof_info.saved_action) < 0)
return -1;
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=9bbe2c25421b4f3b06c6053d98ed8a4379aa6fb4
commit 9bbe2c25421b4f3b06c6053d98ed8a4379aa6fb4
Author: Liubov Dmitrieva <ldmitrie@sourceware.org>
Date: Thu Aug 29 17:08:14 2013 +0400
Inappropriate code style for Intel MPX in string/strcpy.c and wcsmbc/wcscpy.c
Fix the code if MPX is enabled.
diff --git a/string/strcpy.c b/string/strcpy.c
index b71f753..04278ec 100644
--- a/string/strcpy.c
+++ b/string/strcpy.c
@@ -26,6 +26,7 @@ char *
strcpy (dest, src)
char *dest;
const char *src;
+#ifndef __CHKP__
{
char c;
char *s = (char *) src;
@@ -40,4 +41,12 @@ strcpy (dest, src)
return dest;
}
+#else
+{
+ const char *ret = dest;
+ while ((*dest++ = *src++) != '\0');
+ return ret;
+}
+#endif
+
libc_hidden_builtin_def (strcpy)
diff --git a/wcsmbs/wcscpy.c b/wcsmbs/wcscpy.c
index 3b1e0c6..3113cf5 100644
--- a/wcsmbs/wcscpy.c
+++ b/wcsmbs/wcscpy.c
@@ -25,6 +25,7 @@ wchar_t *
wcscpy (dest, src)
wchar_t *dest;
const wchar_t *src;
+#ifndef __CHKP__
{
wint_t c;
wchar_t *wcp;
@@ -56,3 +57,11 @@ wcscpy (dest, src)
return dest;
}
+#else
+{
+ const wchar_t *ret = dest;
+ while ((*dest++ = *src++) != L'\0');
+ return ret;
+
+}
+#endif
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=bc2adc60820301b8780d021795e86030033d99d6
commit bc2adc60820301b8780d021795e86030033d99d6
Author: Liubov Dmitrieva <ldmitrie@sourceware.org>
Date: Thu Aug 29 19:25:35 2013 +0400
Inappropriate code style for Intel MPX in debug/wcscpy_chk.c. Fix the code if MPX is enabled.
diff --git a/debug/wcscpy_chk.c b/debug/wcscpy_chk.c
index 61092c3..3e6d185 100644
--- a/debug/wcscpy_chk.c
+++ b/debug/wcscpy_chk.c
@@ -23,6 +23,7 @@
/* Copy SRC to DEST. */
wchar_t *
__wcscpy_chk (wchar_t *dest, const wchar_t *src, size_t n)
+#ifndef __CHKP__
{
wint_t c;
wchar_t *wcp;
@@ -58,3 +59,22 @@ __wcscpy_chk (wchar_t *dest, const wchar_t *src, size_t n)
return dest;
}
+#else
+{
+ const wchar_t *result = dest;
+ dest--;
+ wint_t c;
+
+ do
+ {
+ if (__builtin_expect (n-- == 0, 0))
+ __chk_fail ();
+ c = src[0];
+ *++dest = c;
+ ++src;
+ }
+ while (c != L'\0');
+
+ return result;
+}
+#endif
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=9abc3d74ab5958f675378c35f325e1511d2c1c15
commit 9abc3d74ab5958f675378c35f325e1511d2c1c15
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Mon Mar 11 17:06:38 2013 +0400
Inappropriate code style for Intel MPX in debug/wcpcpy_chk. Fix the code if MPX is enabled.
diff --git a/debug/wcpcpy_chk.c b/debug/wcpcpy_chk.c
index 7c836e6..d90f293 100644
--- a/debug/wcpcpy_chk.c
+++ b/debug/wcpcpy_chk.c
@@ -26,6 +26,7 @@
DEST. Check for overflows. */
wchar_t *
__wcpcpy_chk (wchar_t *dest, const wchar_t *src, size_t destlen)
+#ifndef __CHKP__
{
wchar_t *wcp = (wchar_t *) dest - 1;
wint_t c;
@@ -42,3 +43,21 @@ __wcpcpy_chk (wchar_t *dest, const wchar_t *src, size_t destlen)
return wcp;
}
+#else
+{
+ dest--;
+ wint_t c;
+
+ do
+ {
+ if (__builtin_expect (destlen-- == 0, 0))
+ __chk_fail ();
+ c = src[0];
+ *++dest = c;
+ ++src;
+ }
+ while (c != L'\0');
+
+ return dest;
+}
+#endif
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=8b1885f652bc90514b36117c07312415fd7b09a7
commit 8b1885f652bc90514b36117c07312415fd7b09a7
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Mon May 27 18:54:53 2013 +0400
Inappropriate code style for Intel MPX at wcsmbs/wcpcpy.c. Use other implementation if MPX is enabled.
diff --git a/wcsmbs/wcpcpy.c b/wcsmbs/wcpcpy.c
index 6f952b4..4c541b1 100644
--- a/wcsmbs/wcpcpy.c
+++ b/wcsmbs/wcpcpy.c
@@ -18,8 +18,9 @@
#include <wchar.h>
-#define __need_ptrdiff_t
-#include <stddef.h>
+#ifndef __CHKP__
+# define __need_ptrdiff_t
+# include <stddef.h>
/* Copy SRC to DEST, returning the address of the terminating L'\0' in
@@ -42,5 +43,14 @@ __wcpcpy (dest, src)
return wcp;
}
+#else
+wchar_t *
+__wcpcpy (wchar_t *dst, const wchar_t *src)
+{
+ while ((*dst++ = *src++) != L'\0');
+ return dst - 1;
+}
+
+#endif
weak_alias (__wcpcpy, wcpcpy)
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=2c42c71e641f5b841d222dfd3e4a2ba1d8131ea7
commit 2c42c71e641f5b841d222dfd3e4a2ba1d8131ea7
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Thu Dec 20 18:46:38 2012 +0400
Inappropriate code style for Intel MPX at posix/fnmatch_loop.c. Fixed.
diff --git a/posix/fnmatch_loop.c b/posix/fnmatch_loop.c
index 078b982..802eb18 100644
--- a/posix/fnmatch_loop.c
+++ b/posix/fnmatch_loop.c
@@ -313,7 +313,7 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
/* Invalid character class name. */
return FNM_NOMATCH;
-# if defined _LIBC && ! WIDE_CHAR_VERSION
+# if defined _LIBC && ! WIDE_CHAR_VERSION && !defined __CHKP__
/* The following code is glibc specific but does
there a good job in speeding up the code since
we can avoid the btowc() call. */
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=5c77173eec2808c6940abb2bddd57551dec6ccfe
commit 5c77173eec2808c6940abb2bddd57551dec6ccfe
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Thu Dec 20 18:23:10 2012 +0400
Inappropriate code style for Intel MPX at argp/argp-help.c. Fixed.
diff --git a/argp/argp-help.c b/argp/argp-help.c
index ace71b4..8054785 100644
--- a/argp/argp-help.c
+++ b/argp/argp-help.c
@@ -867,7 +867,10 @@ hol_append (struct hol *hol, struct hol *more)
/* Fix up the short options pointers from HOL. */
for (e = entries, left = hol->num_entries; left > 0; e++, left--)
- e->short_options += (short_options - hol->short_options);
+ {
+ unsigned long offset = e->short_options - hol->short_options;
+ e->short_options = (char *)(short_options + offset);
+ }
/* Now add the short options from MORE, fixing up its entries
too. */
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=abfb12dc5f16cca6fc52083814710a6111e9f6a0
commit abfb12dc5f16cca6fc52083814710a6111e9f6a0
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Wed Dec 19 17:03:44 2012 +0400
Inappropriate code style for Intel MPX. Expand bounds in crypt/crypt.c
diff --git a/crypt/crypt.c b/crypt/crypt.c
index e429950..96ec2eb 100644
--- a/crypt/crypt.c
+++ b/crypt/crypt.c
@@ -43,7 +43,13 @@ _ufc_doit_r(itr, __data, res)
int i;
long32 s, *k;
long32 *sb01 = (long32*)__data->sb0;
+#ifdef __CHKP__
+ sb01 = __bnd_set_ptr_bounds (sb01, sizeof(__data->sb0) + sizeof(__data->sb1));
+#endif
long32 *sb23 = (long32*)__data->sb2;
+#ifdef __CHKP__
+ sb23 = __bnd_set_ptr_bounds (sb23, sizeof(__data->sb2) + sizeof(__data->sb3));
+#endif
long32 l1, l2, r1, r2;
l1 = (long32)res[0]; l2 = (long32)res[1];
@@ -89,7 +95,13 @@ _ufc_doit_r(itr, __data, res)
int i;
long64 l, r, s, *k;
long64 *sb01 = (long64*)__data->sb0;
+#ifdef __CHKP__
+ sb01 = __bnd_set_ptr_bounds (sb01, sizeof(__data->sb0) + sizeof(__data->sb1));
+#endif
long64 *sb23 = (long64*)__data->sb2;
+#ifdef __CHKP__
+ sb23 = __bnd_set_ptr_bounds (sb23, sizeof(__data->sb2) + sizeof(__data->sb3));
+#endif
l = (((long64)res[0]) << 32) | ((long64)res[1]);
r = (((long64)res[2]) << 32) | ((long64)res[3]);
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=4a9acc37ff2f154d0c3957af830242bac5bda851
commit 4a9acc37ff2f154d0c3957af830242bac5bda851
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Wed Dec 19 14:55:21 2012 +0400
Inappropriate code style for Intel MPX in libio/fileops.c.
Use zero bounds as it is hard to rewrite the algorithm.
diff --git a/libio/fileops.c b/libio/fileops.c
index e92f85b..a17504b 100644
--- a/libio/fileops.c
+++ b/libio/fileops.c
@@ -758,6 +758,9 @@ decide_maybe_mmap (_IO_FILE *fp)
void *p;
p = __mmap64 (NULL, st.st_size, PROT_READ, MAP_SHARED, fp->_fileno, 0);
+#ifdef __CHKP__
+ p = __bnd_init_ptr_bounds(p);
+#endif
if (p != MAP_FAILED)
{
/* OK, we managed to map the file. Set the buffer up and use a
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=af1d2d1ffb534abeadb2a82365f0b6ef6fc96e3a
commit af1d2d1ffb534abeadb2a82365f0b6ef6fc96e3a
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Thu Nov 8 16:35:39 2012 +0400
Inappropriate code style for Intel MPX in elf/dl-close.c
A cast implies memory access with bounds violation.
Let allow that.
diff --git a/elf/dl-close.c b/elf/dl-close.c
index fe3014c..15775ec 100644
--- a/elf/dl-close.c
+++ b/elf/dl-close.c
@@ -347,6 +347,10 @@ _dl_close_worker (struct link_map *map)
struct link_map *tmap = (struct link_map *)
((char *) imap->l_scope[cnt]
- offsetof (struct link_map, l_searchlist));
+#ifdef __CHKP__
+ tmap = __bnd_set_ptr_bounds(tmap, sizeof(struct link_map));
+#endif
+
assert (tmap->l_ns == nsid);
if (tmap->l_idx == IDX_STILL_USED)
++remain;
@@ -393,6 +397,9 @@ _dl_close_worker (struct link_map *map)
struct link_map *tmap = (struct link_map *)
((char *) imap->l_scope[cnt]
- offsetof (struct link_map, l_searchlist));
+#ifdef __CHKP__
+ tmap = __bnd_set_ptr_bounds(tmap, sizeof(struct link_map));
+#endif
if (tmap->l_idx != IDX_STILL_USED)
{
/* Remove the scope. Or replace with own map's
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=fcad0ab9dc3deded2cfd8e7bc467e204d8b55a63
commit fcad0ab9dc3deded2cfd8e7bc467e204d8b55a63
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Tue Dec 18 19:42:52 2012 +0400
Inappropriate code style for Intel MPX in crypt/crypt_util.c. Fixed.
diff --git a/crypt/crypt_util.c b/crypt/crypt_util.c
index 2409079..8b58668 100644
--- a/crypt/crypt_util.c
+++ b/crypt/crypt_util.c
@@ -487,7 +487,7 @@ small_tables_done:
* DES round.
*
*/
-
+#ifndef __CHKP__
if (__data->sb0 + sizeof (__data->sb0) == __data->sb1
&& __data->sb1 + sizeof (__data->sb1) == __data->sb2
&& __data->sb2 + sizeof (__data->sb2) == __data->sb3)
@@ -497,11 +497,14 @@ small_tables_done:
+ (int)sizeof(__data->sb2)
+ (int)sizeof(__data->sb3));
else {
+#endif
_ufc_clearmem(__data->sb0, (int)sizeof(__data->sb0));
_ufc_clearmem(__data->sb1, (int)sizeof(__data->sb1));
_ufc_clearmem(__data->sb2, (int)sizeof(__data->sb2));
_ufc_clearmem(__data->sb3, (int)sizeof(__data->sb3));
+#ifndef __CHKP__
}
+#endif
for(sg = 0; sg < 4; sg++) {
int j1, j2;
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=0a6a6f8037cc93a6d165d925d6c029fe42998acc
commit 0a6a6f8037cc93a6d165d925d6c029fe42998acc
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Mon Oct 15 15:01:09 2012 +0400
Inappropriate code style for Intel MPX. Fix missing of bounds in sysdeps/generic/unwind-dw2-fde.h
diff --git a/sysdeps/generic/unwind-dw2-fde.h b/sysdeps/generic/unwind-dw2-fde.h
index fad46bf..7fce24c 100644
--- a/sysdeps/generic/unwind-dw2-fde.h
+++ b/sysdeps/generic/unwind-dw2-fde.h
@@ -147,7 +147,7 @@ typedef struct dwarf_fde fde;
static inline struct dwarf_cie *
get_cie (struct dwarf_fde *f)
{
- return (void *)&f->CIE_delta - f->CIE_delta;
+ return (char *)f + offsetof (struct dwarf_fde, CIE_delta) - f->CIE_delta;
}
static inline fde *
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=01d45f7e37130cb47d0ef788283e99bc07564f34
commit 01d45f7e37130cb47d0ef788283e99bc07564f34
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Fri Dec 14 18:41:37 2012 +0400
Inappropriate code style for Intel MPX in debug/strcpy_chk.c Use different version if MPX enabled.
diff --git a/debug/strcpy_chk.c b/debug/strcpy_chk.c
index 81bf46f..ba6da70 100644
--- a/debug/strcpy_chk.c
+++ b/debug/strcpy_chk.c
@@ -27,6 +27,7 @@ __strcpy_chk (dest, src, destlen)
char *dest;
const char *src;
size_t destlen;
+#ifndef __CHKP__
{
char c;
char *s = (char *) src;
@@ -65,3 +66,45 @@ __strcpy_chk (dest, src, destlen)
return dest;
}
+#else
+{
+ char c;
+ char *s = (char *) src;
+ char *d = (char *) dest;
+
+ while (__builtin_expect (destlen >= 4, 0))
+ {
+ c = s[0];
+ d[0] = c;
+ if (c == '\0')
+ return dest;
+ c = s[1];
+ d[1] = c;
+ if (c == '\0')
+ return dest;
+ c = s[2];
+ d[2] = c;
+ if (c == '\0')
+ return dest;
+ c = s[3];
+ d[3] = c;
+ if (c == '\0')
+ return dest;
+ destlen -= 4;
+ d += 4;
+ s += 4;
+ }
+
+ do
+ {
+ if (__builtin_expect (destlen-- == 0, 0))
+ __chk_fail ();
+ c = *s;
+ *(d++) = c;
+ s++;
+ }
+ while (c != '\0');
+
+ return dest;
+}
+#endif
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=00ae469c06aeed1e7bd988875d241cc5a6339d01
commit 00ae469c06aeed1e7bd988875d241cc5a6339d01
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Fri Nov 23 18:50:27 2012 +0400
If Intel MPX enabled: always compile with -fno-check-pointers file elf/dl-init.c
because this file contains the code excecuting before runtime library
initialization happens.
diff --git a/elf/Makefile b/elf/Makefile
index 3b58649..4ef80c9 100644
--- a/elf/Makefile
+++ b/elf/Makefile
@@ -49,6 +49,7 @@ all-rtld-routines = $(rtld-routines) $(sysdep-rtld-routines)
CFLAGS-dl-runtime.c = -fexceptions -fasynchronous-unwind-tables
CFLAGS-dl-lookup.c = -fexceptions -fasynchronous-unwind-tables
CFLAGS-dl-iterate-phdr.c = $(uses-callbacks)
+CFLAGS-dl-init.c = -fno-check-pointers
ifeq ($(unwind-find-fde),yes)
routines += unwind-dw2-fde-glibc
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=4cd77a6b091db5450ec634eeaeab8e36ea3bb1dd
commit 4cd77a6b091db5450ec634eeaeab8e36ea3bb1dd
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Mon Dec 17 13:44:21 2012 +0400
Add attribute __bnd_variable_size to make using flexible size arrays Intel MPX complient.
diff --git a/bits/dirent.h b/bits/dirent.h
index 2117a7c..77cae84 100644
--- a/bits/dirent.h
+++ b/bits/dirent.h
@@ -32,7 +32,7 @@ struct dirent
unsigned char d_namlen; /* Length of the file name. */
/* Only this member is in the POSIX standard. */
- char d_name[1]; /* File name (actually longer). */
+ char d_name[1] __attribute__((bnd_variable_size)); /* File name (actually longer). */
};
#ifdef __USE_LARGEFILE64
@@ -43,7 +43,7 @@ struct dirent64
unsigned char d_type;
unsigned char d_namlen;
- char d_name[1];
+ char d_name[1] __attribute__((bnd_variable_size));
};
#endif
diff --git a/bits/sched.h b/bits/sched.h
index 0c200a9..0a9513a 100644
--- a/bits/sched.h
+++ b/bits/sched.h
@@ -65,7 +65,7 @@ typedef unsigned long int __cpu_mask;
/* Data structure to describe CPU mask. */
typedef struct
{
- __cpu_mask __bits[__CPU_SETSIZE / __NCPUBITS];
+ __cpu_mask __bits[__CPU_SETSIZE / __NCPUBITS] __attribute__((bnd_variable_size));
} cpu_set_t;
/* Access functions for CPU masks. */
diff --git a/debug/tst-chk1.c b/debug/tst-chk1.c
index 6ca8d9d..9783d3a 100644
--- a/debug/tst-chk1.c
+++ b/debug/tst-chk1.c
@@ -137,8 +137,8 @@ do_test (void)
}
setenv ("LIBC_FATAL_STDERR_", "1", 1);
- struct A { char buf1[9]; char buf2[1]; } a;
- struct wA { wchar_t buf1[9]; wchar_t buf2[1]; } wa;
+ struct A { char buf1[9] __attribute__((bnd_variable_size)); char buf2[1]; } a;
+ struct wA { wchar_t buf1[9] __attribute__((bnd_variable_size)); wchar_t buf2[1]; } wa;
printf ("Test checking routines at fortify level %d\n",
#ifdef __USE_FORTIFY_LEVEL
diff --git a/dlfcn/dlfcn.h b/dlfcn/dlfcn.h
index 1ed47b1..0fab755 100644
--- a/dlfcn/dlfcn.h
+++ b/dlfcn/dlfcn.h
@@ -180,7 +180,7 @@ typedef struct
{
size_t dls_size; /* Size in bytes of the whole buffer. */
unsigned int dls_cnt; /* Number of elements in `dls_serpath'. */
- Dl_serpath dls_serpath[1]; /* Actually longer, dls_cnt elements. */
+ Dl_serpath dls_serpath[1] __attribute__((bnd_variable_size)); /* Actually longer, dls_cnt elements. */
} Dl_serinfo;
#endif /* __USE_GNU */
diff --git a/include/link.h b/include/link.h
index 1682467..ca253eb 100644
--- a/include/link.h
+++ b/include/link.h
@@ -318,7 +318,7 @@ struct link_map
{
uintptr_t cookie;
unsigned int bindflags;
- } l_audit[0];
+ } l_audit[0] __attribute__((bnd_variable_size));
};
diff --git a/inet/netinet/in.h b/inet/netinet/in.h
index 89e3813..12294d0 100644
--- a/inet/netinet/in.h
+++ b/inet/netinet/in.h
@@ -319,7 +319,7 @@ struct ip_msfilter
/* Number of source addresses. */
uint32_t imsf_numsrc;
/* Source addresses. */
- struct in_addr imsf_slist[1];
+ struct in_addr imsf_slist[1] __attribute__((bnd_variable_size));
};
#define IP_MSFILTER_SIZE(numsrc) (sizeof (struct ip_msfilter) \
@@ -340,7 +340,7 @@ struct group_filter
/* Number of source addresses. */
uint32_t gf_numsrc;
/* Source addresses. */
- struct sockaddr_storage gf_slist[1];
+ struct sockaddr_storage gf_slist[1] __attribute__((bnd_variable_size));
};
#define GROUP_FILTER_SIZE(numsrc) (sizeof (struct group_filter) \
diff --git a/inet/protocols/routed.h b/inet/protocols/routed.h
index befd865..457d792 100644
--- a/inet/protocols/routed.h
+++ b/inet/protocols/routed.h
@@ -52,8 +52,8 @@ struct rip {
u_char rip_vers; /* protocol version # */
u_char rip_res1[2]; /* pad to 32-bit boundary */
union {
- struct netinfo ru_nets[1]; /* variable length... */
- char ru_tracefile[1]; /* ditto ... */
+ struct netinfo ru_nets[1] __attribute__((bnd_variable_size)); /* variable length... */
+ char ru_tracefile[1] __attribute__((bnd_variable_size)); /* ditto ... */
} ripun;
#define rip_nets ripun.ru_nets
#define rip_tracefile ripun.ru_tracefile
diff --git a/intl/dcigettext.c b/intl/dcigettext.c
index f4aa215..9885a13 100644
--- a/intl/dcigettext.c
+++ b/intl/dcigettext.c
@@ -204,7 +204,7 @@ struct known_translation_t
/* Pointer to the string in question. */
union
{
- char appended[ZERO]; /* used if domain != NULL */
+ char appended[ZERO] __attribute__((bnd_variable_size)); /* used if domain != NULL */
const char *ptr; /* used if domain == NULL */
}
msgid;
@@ -342,7 +342,7 @@ struct block_list
typedef struct transmem_list
{
struct transmem_list *next;
- char data[ZERO];
+ char data[ZERO] __attribute__((bnd_variable_size));
} transmem_block_t;
static struct transmem_list *transmem_list;
#else
diff --git a/intl/gettextP.h b/intl/gettextP.h
index d1ec644..79f0a4c 100644
--- a/intl/gettextP.h
+++ b/intl/gettextP.h
@@ -160,7 +160,7 @@ struct binding
struct binding *next;
char *dirname;
char *codeset;
- char domainname[ZERO];
+ char domainname[ZERO] __attribute__((bnd_variable_size));
};
/* A counter which is incremented each time some previous translations
diff --git a/intl/gmo.h b/intl/gmo.h
index 7b50597..b4c48cc 100644
--- a/intl/gmo.h
+++ b/intl/gmo.h
@@ -137,7 +137,7 @@ struct sysdep_string
nls_uint32 segsize;
/* Reference to system dependent string segment, or ~0 at the end. */
nls_uint32 sysdepref;
- } segments[1];
+ } segments[1] __attribute__((bnd_variable_size));
};
/* Marker for the end of the segments[] array. This has the value 0xFFFFFFFF,
diff --git a/intl/loadinfo.h b/intl/loadinfo.h
index 7563624..8004233 100644
--- a/intl/loadinfo.h
+++ b/intl/loadinfo.h
@@ -58,7 +58,7 @@ struct loaded_l10nfile
const void *data;
struct loaded_l10nfile *next;
- struct loaded_l10nfile *successor[1];
+ struct loaded_l10nfile *successor[1] __attribute__((bnd_variable_size));
};
diff --git a/io/fts.h b/io/fts.h
index 0a070ba..93f94f8 100644
--- a/io/fts.h
+++ b/io/fts.h
@@ -116,7 +116,7 @@ typedef struct _ftsent {
u_short fts_instr; /* fts_set() instructions */
struct stat *fts_statp; /* stat(2) information */
- char fts_name[1]; /* file name */
+ char fts_name[1] __attribute__((bnd_variable_size)); /* file name */
} FTSENT;
__BEGIN_DECLS
diff --git a/locale/localeinfo.h b/locale/localeinfo.h
index 3142726..8dbb598 100644
--- a/locale/localeinfo.h
+++ b/locale/localeinfo.h
@@ -84,7 +84,7 @@ struct __locale_data
const char *string;
unsigned int word; /* Note endian issues vs 64-bit pointers. */
}
- values __flexarr; /* Items, usually pointers into `filedata'. */
+ values __flexarr __attribute__((bnd_variable_size)); /* Items, usually pointers into `filedata'. */
};
/* We know three kinds of collation sorting rules. */
@@ -185,7 +185,7 @@ extern const union catnamestr_t
#include "categories.def"
#undef DEFINE_CATEGORY
};
- char str[0];
+ char str[0] __attribute__((bnd_variable_size));
} _nl_category_names attribute_hidden;
extern const uint8_t _nl_category_name_idxs[__LC_LAST] attribute_hidden;
extern const uint8_t _nl_category_name_sizes[__LC_LAST] attribute_hidden;
diff --git a/misc/search.h b/misc/search.h
index e3b3dfd..63a7768 100644
--- a/misc/search.h
+++ b/misc/search.h
@@ -35,7 +35,7 @@ struct qelem
{
struct qelem *q_forw;
struct qelem *q_back;
- char q_data[1];
+ char q_data[1] __attribute__((bnd_variable_size));
};
# endif
diff --git a/nptl/descr.h b/nptl/descr.h
index 58176ea..a175bb0 100644
--- a/nptl/descr.h
+++ b/nptl/descr.h
@@ -162,7 +162,7 @@ struct pthread
};
/* This descriptor's link on the `stack_used' or `__stack_user' list. */
- list_t list;
+ list_t list __attribute__((bnd_variable_size));
/* Thread ID - which is also a 'is this thread descriptor (and
therefore stack) used' flag. */
@@ -174,7 +174,10 @@ struct pthread
/* List of robust mutexes the thread is holding. */
#ifdef __PTHREAD_MUTEX_HAVE_PREV
void *robust_prev;
- struct robust_list_head robust_head;
+ struct robust_list_head robust_head __attribute__((bnd_variable_size));
+ /* sometimes we want to cast pair {robust_prev (void *) and the
+ * first field of struct robust_list_head (void *)}
+ * to __pthread_list_t (struct consists of two pointers: __prev, __next) */
/* The list above is strange. It is basically a double linked list
but the pointer to the next/previous element of the list points
@@ -186,7 +189,7 @@ struct pthread
# define ENQUEUE_MUTEX_BOTH(mutex, val) \
do { \
__pthread_list_t *next = (__pthread_list_t *) \
- ((((uintptr_t) THREAD_GETMEM (THREAD_SELF, robust_head.list)) & ~1ul) \
+ ((char *)(((uintptr_t) THREAD_GETMEM (THREAD_SELF, robust_head.list)) & ~1ul) \
- QUEUE_PTR_ADJUST); \
next->__prev = (void *) &mutex->__data.__list.__next; \
mutex->__data.__list.__next = THREAD_GETMEM (THREAD_SELF, \
diff --git a/nptl/sysdeps/unix/sysv/linux/x86/bits/pthreadtypes.h b/nptl/sysdeps/unix/sysv/linux/x86/bits/pthreadtypes.h
index 28b49bd..0adb200 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86/bits/pthreadtypes.h
+++ b/nptl/sysdeps/unix/sysv/linux/x86/bits/pthreadtypes.h
@@ -75,7 +75,7 @@ typedef union pthread_attr_t pthread_attr_t;
typedef struct __pthread_internal_list
{
struct __pthread_internal_list *__prev;
- struct __pthread_internal_list *__next;
+ struct __pthread_internal_list *__next __attribute__((bnd_variable_size));
} __pthread_list_t;
#else
typedef struct __pthread_internal_slist
diff --git a/stdio-common/psiginfo-define.h b/stdio-common/psiginfo-define.h
index e1d1a35..d76cb6b 100644
--- a/stdio-common/psiginfo-define.h
+++ b/stdio-common/psiginfo-define.h
@@ -3,7 +3,7 @@ static const union C(codestrs_t_, NOW) {
#define P(n, s) char MF(__LINE__)[sizeof (s)];
#include "psiginfo-data.h"
};
- char str[0];
+ char str[0] __attribute__((bnd_variable_size));
} C(codestrs_, NOW) = { {
#define P(n, s) s,
#include "psiginfo-data.h"
diff --git a/sunrpc/clnt_udp.c b/sunrpc/clnt_udp.c
index 1b6a20b..eca7122 100644
--- a/sunrpc/clnt_udp.c
+++ b/sunrpc/clnt_udp.c
@@ -96,7 +96,7 @@ struct cu_data
u_int cu_sendsz;
char *cu_outbuf;
u_int cu_recvsz;
- char cu_inbuf[1];
+ char cu_inbuf[1] __attribute__((bnd_variable_size)) ;
};
/*
diff --git a/sysdeps/gnu/netinet/ip_icmp.h b/sysdeps/gnu/netinet/ip_icmp.h
index 136fb47..5c2cb0c 100644
--- a/sysdeps/gnu/netinet/ip_icmp.h
+++ b/sysdeps/gnu/netinet/ip_icmp.h
@@ -189,7 +189,7 @@ struct icmp
} id_ip;
struct icmp_ra_addr id_radv;
u_int32_t id_mask;
- u_int8_t id_data[1];
+ u_int8_t id_data[1] __attribute__((bnd_variable_size));
} icmp_dun;
#define icmp_otime icmp_dun.id_ts.its_otime
#define icmp_rtime icmp_dun.id_ts.its_rtime
diff --git a/sysdeps/unix/sysv/linux/bits/sched.h b/sysdeps/unix/sysv/linux/bits/sched.h
index e42dee8..5086380 100644
--- a/sysdeps/unix/sysv/linux/bits/sched.h
+++ b/sysdeps/unix/sysv/linux/bits/sched.h
@@ -124,7 +124,7 @@ typedef unsigned long int __cpu_mask;
/* Data structure to describe CPU mask. */
typedef struct
{
- __cpu_mask __bits[__CPU_SETSIZE / __NCPUBITS];
+ __cpu_mask __bits[__CPU_SETSIZE / __NCPUBITS] __attribute__((bnd_variable_size));
} cpu_set_t;
/* Access functions for CPU masks. */
diff --git a/sysvipc/sys/msg.h b/sysvipc/sys/msg.h
index a0b38f0..c06424f 100644
--- a/sysvipc/sys/msg.h
+++ b/sysvipc/sys/msg.h
@@ -51,7 +51,7 @@ typedef __ssize_t ssize_t;
struct msgbuf
{
__syscall_slong_t mtype; /* type of received/sent message */
- char mtext[1]; /* text of the message */
+ char mtext[1] __attribute__((bnd_variable_size)); /* text of the message */
};
#endif
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=5d92ac866acfc532a4d49af0b1c2b69c260c0ce3
commit 5d92ac866acfc532a4d49af0b1c2b69c260c0ce3
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Mon Jan 21 15:35:12 2013 +0400
Use C code instead of inline assembler in macros of tls.h for i386 (for Intel MPX only).
diff --git a/nptl/sysdeps/i386/tls.h b/nptl/sysdeps/i386/tls.h
index 3d18b1d..bf30088 100644
--- a/nptl/sysdeps/i386/tls.h
+++ b/nptl/sysdeps/i386/tls.h
@@ -259,11 +259,24 @@ union user_desc_init
assignments like
pthread_descr self = thread_self();
do not get optimized away. */
-# define THREAD_SELF \
+
+# ifndef __CHKP__
+# define THREAD_SELF \
({ struct pthread *__self; \
asm ("movl %%gs:%c1,%0" : "=r" (__self) \
: "i" (offsetof (struct pthread, header.self))); \
__self;})
+# else
+# define THREAD_SELF \
+ ({ struct pthread *__self; \
+ asm ("movl %%gs:%c1,%0" : "=r" (__self) \
+ : "i" (offsetof (struct pthread, header.self))); \
+ /* Set first minimum bounds to make possible reading stackblock and stackblock_size. */ \
+ __self = __bnd_set_ptr_bounds(__self, TLS_INIT_TCB_SIZE); \
+ /* Set actual correct bounds. */ \
+ (struct pthread*) __bnd_copy_ptr_bounds(__self, __bnd_set_ptr_bounds(__self->stackblock, \
+ __self->stackblock_size)); })
+# endif
/* Magic for libthread_db to know how to do THREAD_SELF. */
# define DB_THREAD_SELF \
@@ -272,7 +285,8 @@ union user_desc_init
/* Read member of the thread descriptor directly. */
-# define THREAD_GETMEM(descr, member) \
+# ifndef __CHKP__
+# define THREAD_GETMEM(descr, member) \
({ __typeof (descr->member) __value; \
if (sizeof (__value) == 1) \
asm volatile ("movb %%gs:%P2,%b0" \
@@ -296,10 +310,15 @@ union user_desc_init
"i" (offsetof (struct pthread, member) + 4)); \
} \
__value; })
-
+# else
+# define THREAD_GETMEM(descr, member) \
+ ({ struct pthread *__self = THREAD_SELF; \
+ __self->member; })
+# endif
/* Same as THREAD_GETMEM, but the member offset can be non-constant. */
-# define THREAD_GETMEM_NC(descr, member, idx) \
+# ifndef __CHKP__
+# define THREAD_GETMEM_NC(descr, member, idx) \
({ __typeof (descr->member[0]) __value; \
if (sizeof (__value) == 1) \
asm volatile ("movb %%gs:%P2(%3),%b0" \
@@ -325,10 +344,15 @@ union user_desc_init
"r" (idx)); \
} \
__value; })
-
+# else
+# define THREAD_GETMEM_NC(descr, member, idx) \
+ ({ struct pthread *__self = THREAD_SELF; \
+ __self->member[idx]; })
+# endif
/* Same as THREAD_SETMEM, but the member offset can be non-constant. */
-# define THREAD_SETMEM(descr, member, value) \
+# ifndef __CHKP__
+# define THREAD_SETMEM(descr, member, value) \
({ if (sizeof (descr->member) == 1) \
asm volatile ("movb %b0,%%gs:%P1" : \
: "iq" (value), \
@@ -350,10 +374,15 @@ union user_desc_init
"i" (offsetof (struct pthread, member)), \
"i" (offsetof (struct pthread, member) + 4)); \
}})
-
+# else
+# define THREAD_SETMEM(descr, member, value) \
+ ({ struct pthread *__self = THREAD_SELF; \
+ __self->member = value; })
+#endif
/* Set member of the thread descriptor directly. */
-# define THREAD_SETMEM_NC(descr, member, idx, value) \
+# ifndef __CHKP__
+# define THREAD_SETMEM_NC(descr, member, idx, value) \
({ if (sizeof (descr->member[0]) == 1) \
asm volatile ("movb %b0,%%gs:%P1(%2)" : \
: "iq" (value), \
@@ -377,7 +406,11 @@ union user_desc_init
"i" (offsetof (struct pthread, member)), \
"r" (idx)); \
}})
-
+# else
+# define THREAD_SETMEM_NC(descr, member, idx, value) \
+ ({ struct pthread *__self = THREAD_SELF; \
+ __self->member[idx] = value; })
+# endif
/* Atomic compare and exchange on TLS, returning old value. */
#define THREAD_ATOMIC_CMPXCHG_VAL(descr, member, newval, oldval) \
@@ -417,7 +450,8 @@ union user_desc_init
/* Call the user-provided thread function. */
-#define CALL_THREAD_FCT(descr) \
+#ifndef __CHKP__
+# define CALL_THREAD_FCT(descr) \
({ void *__res; \
int __ignore1, __ignore2; \
asm volatile ("pushl %%eax\n\t" \
@@ -430,7 +464,11 @@ union user_desc_init
: "i" (offsetof (struct pthread, start_routine)), \
"i" (offsetof (struct pthread, arg))); \
__res; })
-
+# else
+# define CALL_THREAD_FCT(descr) \
+ ({ struct pthread *__self = THREAD_SELF; \
+ __self->start_routine(__self->arg); })
+# endif
/* Set the stack guard field in TCB head. */
#define THREAD_SET_STACK_GUARD(value) \
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=022ff6731b1e0938048df6904d40e9bf876625b1
commit 022ff6731b1e0938048df6904d40e9bf876625b1
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Sat Nov 10 12:22:56 2012 +0400
Use C code instead of inline assembler in macros of tls.h for x86_64 (for Intel MPX only).
diff --git a/nptl/sysdeps/x86_64/tls.h b/nptl/sysdeps/x86_64/tls.h
index 61df1af..d3bf15a 100644
--- a/nptl/sysdeps/x86_64/tls.h
+++ b/nptl/sysdeps/x86_64/tls.h
@@ -89,6 +89,7 @@ typedef struct
#ifndef __ASSEMBLER__
+
/* Get system call information. */
# include <sysdep.h>
@@ -166,10 +167,15 @@ typedef struct
/* Return the address of the dtv for the current thread. */
-# define THREAD_DTV() \
+# ifndef __CHKP__
+# define THREAD_DTV() \
({ struct pthread *__pd; \
THREAD_GETMEM (__pd, header.dtv); })
-
+# else
+# define THREAD_DTV() \
+ ({ struct pthread *__self = THREAD_SELF; \
+ GET_DTV(__self); })
+# endif
/* Return the thread descriptor for the current thread.
@@ -177,18 +183,31 @@ typedef struct
assignments like
pthread_descr self = thread_self();
do not get optimized away. */
-# define THREAD_SELF \
+# ifndef __CHKP__
+# define THREAD_SELF \
({ struct pthread *__self; \
asm ("mov %%fs:%c1,%0" : "=r" (__self) \
: "i" (offsetof (struct pthread, header.self))); \
- __self;})
+ __self; })
+# else
+# define THREAD_SELF \
+ ({ struct pthread *__self; \
+ asm ("mov %%fs:%c1,%0" : "=r" (__self) \
+ : "i" (offsetof (struct pthread, header.self))); \
+ /* Set first minimum bounds to make possible reading stackblock and stackblock_size. */ \
+ __self = __bnd_set_ptr_bounds(__self, TLS_INIT_TCB_SIZE); \
+ /* Set actual correct bounds. */ \
+ (struct pthread*) __bnd_copy_ptr_bounds(__self, __bnd_set_ptr_bounds(__self->stackblock, \
+ __self->stackblock_size)); })
+# endif
/* Magic for libthread_db to know how to do THREAD_SELF. */
# define DB_THREAD_SELF_INCLUDE <sys/reg.h> /* For the FS constant. */
# define DB_THREAD_SELF CONST_THREAD_AREA (64, FS)
/* Read member of the thread descriptor directly. */
-# define THREAD_GETMEM(descr, member) \
+# ifndef __CHKP__
+# define THREAD_GETMEM(descr, member) \
({ __typeof (descr->member) __value; \
if (sizeof (__value) == 1) \
asm volatile ("movb %%fs:%P2,%b0" \
@@ -202,7 +221,7 @@ typedef struct
{ \
if (sizeof (__value) != 8) \
/* There should not be any value with a size other than 1, \
- 4 or 8. */ \
+ 4 or 8. */ \
abort (); \
\
asm volatile ("movq %%fs:%P1,%q0" \
@@ -210,10 +229,15 @@ typedef struct
: "i" (offsetof (struct pthread, member))); \
} \
__value; })
-
+# else
+# define THREAD_GETMEM(descr, member) \
+ ({ struct pthread *__self = THREAD_SELF; \
+ __self->member; })
+# endif
/* Same as THREAD_GETMEM, but the member offset can be non-constant. */
-# define THREAD_GETMEM_NC(descr, member, idx) \
+# ifndef __CHKP__
+# define THREAD_GETMEM_NC(descr, member, idx) \
({ __typeof (descr->member[0]) __value; \
if (sizeof (__value) == 1) \
asm volatile ("movb %%fs:%P2(%q3),%b0" \
@@ -228,7 +252,7 @@ typedef struct
{ \
if (sizeof (__value) != 8) \
/* There should not be any value with a size other than 1, \
- 4 or 8. */ \
+ 4 or 8. */ \
abort (); \
\
asm volatile ("movq %%fs:%P1(,%q2,8),%q0" \
@@ -237,7 +261,11 @@ typedef struct
"r" (idx)); \
} \
__value; })
-
+# else
+# define THREAD_GETMEM_NC(descr, member, idx) \
+ ({ struct pthread *__self = THREAD_SELF; \
+ __self->member[idx]; })
+# endif
/* Loading addresses of objects on x86-64 needs to be treated special
when generating PIC code. */
@@ -249,7 +277,8 @@ typedef struct
/* Same as THREAD_SETMEM, but the member offset can be non-constant. */
-# define THREAD_SETMEM(descr, member, value) \
+# ifndef __CHKP__
+# define THREAD_SETMEM(descr, member, value) \
({ if (sizeof (descr->member) == 1) \
asm volatile ("movb %b0,%%fs:%P1" : \
: "iq" (value), \
@@ -262,17 +291,22 @@ typedef struct
{ \
if (sizeof (descr->member) != 8) \
/* There should not be any value with a size other than 1, \
- 4 or 8. */ \
+ 4 or 8. */ \
abort (); \
\
asm volatile ("movq %q0,%%fs:%P1" : \
: IMM_MODE ((uint64_t) cast_to_integer (value)), \
"i" (offsetof (struct pthread, member))); \
}})
-
+# else
+# define THREAD_SETMEM(descr, member, value) \
+ ({ struct pthread *__self = THREAD_SELF; \
+ __self->member = value; })
+# endif
/* Set member of the thread descriptor directly. */
-# define THREAD_SETMEM_NC(descr, member, idx, value) \
+# ifndef __CHKP__
+# define THREAD_SETMEM_NC(descr, member, idx, value) \
({ if (sizeof (descr->member[0]) == 1) \
asm volatile ("movb %b0,%%fs:%P1(%q2)" : \
: "iq" (value), \
@@ -287,7 +321,7 @@ typedef struct
{ \
if (sizeof (descr->member[0]) != 8) \
/* There should not be any value with a size other than 1, \
- 4 or 8. */ \
+ 4 or 8. */ \
abort (); \
\
asm volatile ("movq %q0,%%fs:%P1(,%q2,8)" : \
@@ -295,7 +329,11 @@ typedef struct
"i" (offsetof (struct pthread, member[0])), \
"r" (idx)); \
}})
-
+# else
+# define THREAD_SETMEM_NC(descr, member, idx, value) \
+ ({ struct pthread *__self = THREAD_SELF; \
+ __self->member[idx] = value; })
+# endif
/* Atomic compare and exchange on TLS, returning old value. */
# define THREAD_ATOMIC_CMPXCHG_VAL(descr, member, newval, oldval) \
@@ -333,8 +371,8 @@ typedef struct
/* Not necessary for other sizes in the moment. */ \
abort (); })
-
-# define CALL_THREAD_FCT(descr) \
+# ifndef __CHKP__
+# define CALL_THREAD_FCT(descr) \
({ void *__res; \
asm volatile ("movq %%fs:%P2, %%rdi\n\t" \
"callq *%%fs:%P1" \
@@ -344,7 +382,11 @@ typedef struct
: "di", "si", "cx", "dx", "r8", "r9", "r10", "r11", \
"memory", "cc"); \
__res; })
-
+# else
+# define CALL_THREAD_FCT(descr) \
+ ({ struct pthread *__self = THREAD_SELF; \
+ __self->start_routine(__self->arg); })
+# endif
/* Set the stack guard field in TCB head. */
# define THREAD_SET_STACK_GUARD(value) \
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=558bf1c0479495b1b7759bbe58b5f0b455fe7b46
commit 558bf1c0479495b1b7759bbe58b5f0b455fe7b46
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Wed Oct 24 16:00:49 2012 +0400
Intel MPX support for mmap and mremap wrappers of syscalls for x86_32 and x86_64.
Create bounds.
Use C wrapper of syscall instead of assembler wrapper for x86_64.
diff --git a/sysdeps/unix/sysv/linux/i386/Makefile b/sysdeps/unix/sysv/linux/i386/Makefile
index acc3021..f38f4b2 100644
--- a/sysdeps/unix/sysv/linux/i386/Makefile
+++ b/sysdeps/unix/sysv/linux/i386/Makefile
@@ -2,7 +2,7 @@
default-abi := 32
ifeq ($(subdir),misc)
-sysdep_routines += ioperm iopl vm86 call_pselect6 call_fallocate
+sysdep_routines += ioperm iopl vm86 call_pselect6 call_fallocate mremap
endif
ifeq ($(subdir),elf)
diff --git a/sysdeps/unix/sysv/linux/i386/mmap.S b/sysdeps/unix/sysv/linux/i386/mmap.S
index 0addf65..035a698 100644
--- a/sysdeps/unix/sysv/linux/i386/mmap.S
+++ b/sysdeps/unix/sysv/linux/i386/mmap.S
@@ -74,6 +74,11 @@ L(skip):
ja SYSCALL_ERROR_LABEL
/* Successful; return the syscall's value. */
+ mov 8(%esp), %ecx
+#ifdef __CHKP__
+ bndmk -1(%eax, %ecx), %bnd0
+#endif
+
ret
PSEUDO_END (__mmap)
diff --git a/sysdeps/unix/sysv/linux/i386/mmap64.S b/sysdeps/unix/sysv/linux/i386/mmap64.S
index 31a0f67..8b44c6e 100644
--- a/sysdeps/unix/sysv/linux/i386/mmap64.S
+++ b/sysdeps/unix/sysv/linux/i386/mmap64.S
@@ -89,6 +89,10 @@ L(do_syscall):
ja SYSCALL_ERROR_LABEL
/* Successful; return the syscall's value. */
+ mov 8(%esp), %ecx
+#ifdef __CHKP__
+ bndmk -1(%eax, %ecx), %bnd0
+#endif
ret
cfi_adjust_cfa_offset (16)
diff --git a/sysdeps/unix/sysv/linux/i386/mremap.c b/sysdeps/unix/sysv/linux/i386/mremap.c
new file mode 100644
index 0000000..ad55d9d
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/i386/mremap.c
@@ -0,0 +1,36 @@
+/* Copyright (C) 2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sysdeps/unix/sysv/linux/i386/sysdep.h>
+
+void *
+__mremap (void *old_address, size_t old_size, size_t new_size, int flags, ...)
+{
+ void *p = INLINE_SYSCALL (mremap, 4, old_address, old_size, new_size, flags);
+ if ((long) p == -1) return MAP_FAILED;
+#ifdef __CHKP__
+ return __bnd_set_ptr_bounds (p, new_size);
+#else
+ return p;
+#endif
+}
+
+weak_alias (__mremap, mremap)
diff --git a/sysdeps/unix/sysv/linux/x86_64/mmap.c b/sysdeps/unix/sysv/linux/x86_64/mmap.c
new file mode 100644
index 0000000..1ee6f96
--- /dev/null
+++ b/sysdeps/unix/sysv/linux/x86_64/mmap.c
@@ -0,0 +1,52 @@
+/* Copyright (C) 2012 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sysdeps/unix/sysv/linux/x86_64/sysdep.h>
+
+void *
+__mmap (void *addr, size_t len, int prot, int flags, int fd, off_t offset)
+{
+ void *p = INLINE_SYSCALL (mmap, 6, addr, len, prot, flags, fd, offset);
+ if ((long) p == -1) return MAP_FAILED;
+#ifdef __CHKP__
+ return __bnd_set_ptr_bounds (p, len);
+#else
+ return p;
+#endif
+}
+
+weak_alias (__mmap, mmap64)
+weak_alias (__mmap, __mmap64)
+weak_alias (__mmap, mmap)
+
+void *
+__mremap (void *old_address, size_t old_size, size_t new_size, int flags, ...)
+{
+ void *p = INLINE_SYSCALL (mremap, 4, old_address, old_size, new_size, flags);
+ if ((long) p == -1) return MAP_FAILED;
+#ifdef __CHKP__
+ return __bnd_set_ptr_bounds (p, new_size);
+#else
+ return p;
+#endif
+}
+
+weak_alias (__mremap, mremap)
diff --git a/sysdeps/unix/sysv/linux/x86_64/mmap64.c b/sysdeps/unix/sysv/linux/x86_64/mmap64.c
new file mode 100644
index 0000000..e69de29
diff --git a/sysdeps/unix/sysv/linux/x86_64/mremap.c b/sysdeps/unix/sysv/linux/x86_64/mremap.c
new file mode 100644
index 0000000..e69de29
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=152d00ad923c702a8753499421baba027d0beffc
commit 152d00ad923c702a8753499421baba027d0beffc
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Wed Oct 10 19:28:57 2012 +0400
Save/restore bounds in x86_64 and x86_32 version of _dl_runtime_resolve.
diff --git a/sysdeps/i386/dl-trampoline.S b/sysdeps/i386/dl-trampoline.S
index 945708f..5f3acb6 100644
--- a/sysdeps/i386/dl-trampoline.S
+++ b/sysdeps/i386/dl-trampoline.S
@@ -31,9 +31,29 @@ _dl_runtime_resolve:
cfi_adjust_cfa_offset (4)
pushl %edx
cfi_adjust_cfa_offset (4)
+#ifndef __CHKP__
movl 16(%esp), %edx # Copy args pushed by PLT in register. Note
movl 12(%esp), %eax # that `fixup' takes its parameters in regs.
+#else
+ subl $32, %esp
+ cfi_adjust_cfa_offset (32)
+ bndmov %bnd0, (%esp)
+ bndmov %bnd1, 8(%esp)
+ bndmov %bnd2, 16(%esp)
+ bndmov %bnd3, 24(%esp)
+ movl 48(%esp), %edx # Copy args pushed by PLT in register. Note
+ movl 44(%esp), %eax # that `fixup' takes its parameters in regs.
+ bndldx 44(%esp, %eax), %bnd0
+#endif
call _dl_fixup # Call resolver.
+#ifdef __CHKP__
+ bndmov 24(%esp), %bnd3
+ bndmov 16(%esp), %bnd2
+ bndmov 8(%esp), %bnd1
+ bndmov (%esp), %bnd0
+ addl $32, %esp
+ cfi_adjust_cfa_offset (-32)
+#endif
popl %edx # Get register content back.
cfi_adjust_cfa_offset (-4)
movl (%esp), %ecx
diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S
index a25e390..e07c192 100644
--- a/sysdeps/x86_64/dl-trampoline.S
+++ b/sysdeps/x86_64/dl-trampoline.S
@@ -31,8 +31,13 @@
cfi_startproc
_dl_runtime_resolve:
cfi_adjust_cfa_offset(16) # Incorporate PLT
+#ifndef __CHKP__
subq $56,%rsp
cfi_adjust_cfa_offset(56)
+#else
+ subq $120,%rsp
+ cfi_adjust_cfa_offset(120)
+#endif
movq %rax,(%rsp) # Preserve registers otherwise clobbered.
movq %rcx, 8(%rsp)
movq %rdx, 16(%rsp)
@@ -40,10 +45,26 @@ _dl_runtime_resolve:
movq %rdi, 32(%rsp)
movq %r8, 40(%rsp)
movq %r9, 48(%rsp)
+#ifndef __CHKP__
movq 64(%rsp), %rsi # Copy args pushed by PLT in register.
movq 56(%rsp), %rdi # %rdi: link_map, %rsi: reloc_index
+#else
+ bndmov %bnd0, 56(%rsp)
+ bndmov %bnd1, 72(%rsp)
+ bndmov %bnd2, 88(%rsp)
+ bndmov %bnd3, 104(%rsp)
+ movq 128(%rsp), %rsi # Copy args pushed by PLT in register.
+ movq 120(%rsp), %rdi # %rdi: link_map, %rsi: reloc_index
+ bndldx 120(%rsp, %rdi), %bnd0
+#endif
call _dl_fixup # Call resolver.
movq %rax, %r11 # Save return value
+#ifdef __CHKP__
+ bndmov 104(%rsp), %bnd3
+ bndmov 88(%rsp), %bnd2
+ bndmov 72(%rsp), %bnd1
+ bndmov 56(%rsp), %bnd0
+#endif
movq 48(%rsp), %r9 # Get register content back.
movq 40(%rsp), %r8
movq 32(%rsp), %rdi
@@ -51,8 +72,13 @@ _dl_runtime_resolve:
movq 16(%rsp), %rdx
movq 8(%rsp), %rcx
movq (%rsp), %rax
+#ifndef __CHKP__
addq $72, %rsp # Adjust stack(PLT did 2 pushes)
cfi_adjust_cfa_offset(-72)
+#else
+ addq $136, %rsp # Adjust stack(PLT did 2 pushes)
+ cfi_adjust_cfa_offset(-136)
+#endif
jmp *%r11 # Jump to function address.
cfi_endproc
.size _dl_runtime_resolve, .-_dl_runtime_resolve
http://sourceware.org/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=3e938a73037cfad920442148c39208d321248485
commit 3e938a73037cfad920442148c39208d321248485
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date: Mon Jul 23 19:39:27 2012 +0400
Add Intel MPX support to malloc allocator.
diff --git a/malloc/arena.c b/malloc/arena.c
index 12a48ad..2aaf1b8 100644
--- a/malloc/arena.c
+++ b/malloc/arena.c
@@ -131,9 +131,15 @@ int __malloc_initialized = -1;
#endif
/* find the heap and corresponding arena for a given ptr */
-
-#define heap_for_ptr(ptr) \
+#ifndef __CHKP__
+# define heap_for_ptr(ptr) \
((heap_info *)((unsigned long)(ptr) & ~(HEAP_MAX_SIZE-1)))
+#else
+static heap_info *heap_for_ptr (void *ptr) {
+ heap_info *t = (heap_info *)((unsigned long)(ptr) & ~(HEAP_MAX_SIZE-1));
+ return __bnd_set_ptr_bounds(t, sizeof(heap_info));
+}
+#endif
#define arena_for_chunk(ptr) \
(chunk_non_main_arena(ptr) ? heap_for_ptr(ptr)->ar_ptr : &main_arena)
diff --git a/malloc/hooks.c b/malloc/hooks.c
index 8c25846..c5c682f 100644
--- a/malloc/hooks.c
+++ b/malloc/hooks.c
@@ -171,6 +171,9 @@ mem2chunk_check(void* mem, unsigned char **magic_p)
next_chunk(prev_chunk(p))!=p) ))
return NULL;
magic = MAGICBYTE(p);
+#ifdef __CHKP__
+ p = (mchunkptr) __bnd_set_ptr_bounds(p, sz + SIZE_SZ);
+#endif
for(sz += SIZE_SZ-1; (c = ((unsigned char*)p)[sz]) != magic; sz -= c) {
if(c<=0 || sz<(c+2*SIZE_SZ)) return NULL;
}
diff --git a/malloc/malloc.c b/malloc/malloc.c
index dd295f5..e8fe610 100644
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -218,7 +218,6 @@
#include <malloc-machine.h>
#include <malloc-sysdep.h>
-
#include <atomic.h>
#include <_itoa.h>
#include <bits/wordsize.h>
@@ -1222,11 +1221,12 @@ nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
/*
---------- Size and alignment checks and conversions ----------
*/
-
+#ifndef __CHKP__
/* conversion from malloc headers to user pointers, and back */
-#define chunk2mem(p) ((void*)((char*)(p) + 2*SIZE_SZ))
-#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - 2*SIZE_SZ))
+# define chunk2mem(p) ((void*)((char*)(p) + 2*SIZE_SZ))
+# define mem2chunk(mem) ((mchunkptr)((char*)(mem) - 2*SIZE_SZ))
+#endif
/* The smallest possible chunk */
#define MIN_CHUNK_SIZE (offsetof(struct malloc_chunk, fd_nextsize))
@@ -1239,12 +1239,11 @@ nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
/* Check if m has acceptable alignment */
#define aligned_OK(m) (((unsigned long)(m) & MALLOC_ALIGN_MASK) == 0)
+#define aligned_chunk_OK(p) (((unsigned long)((char *)(p) + 2 * SIZE_SZ) & MALLOC_ALIGN_MASK) == 0)
#define misaligned_chunk(p) \
- ((uintptr_t)(MALLOC_ALIGNMENT == 2 * SIZE_SZ ? (p) : chunk2mem (p)) \
+ ((uintptr_t)(MALLOC_ALIGNMENT == 2 * SIZE_SZ ? (p) : ((char *)(p) + 2 * SIZE_SZ)) \
& MALLOC_ALIGN_MASK)
-
-
/*
Check if a request is so large that it would wrap around zero when
padded and aligned. To simplify some other code, the bound is made
@@ -1312,49 +1311,116 @@ nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
/* Get size, ignoring use bits */
#define chunksize(p) ((p)->size & ~(SIZE_BITS))
+#ifdef __CHKP__
+static void* chunk2mem (void *p) {
+ void *t = p + 2 * SIZE_SZ;
+ if (chunk_is_mmapped((mchunkptr)p))
+ return (void *) __bnd_narrow_ptr_bounds (t, t, chunksize((mchunkptr)p) - 2 * SIZE_SZ);
+ /* prev_size field of the next chunk can be used */
+ return (void *) __bnd_set_ptr_bounds(t, chunksize((mchunkptr)p) - SIZE_SZ);
+}
+
+static mchunkptr mem2chunk(void *mem) {
+ mchunkptr temp = (mchunkptr)((char *)(mem) - 2 * SIZE_SZ);
+ temp = __bnd_set_ptr_bounds (temp, sizeof (struct malloc_chunk));
+ return __bnd_set_ptr_bounds (temp, chunksize(temp) > sizeof(struct malloc_chunk) ?
+ chunksize(temp) : sizeof(struct malloc_chunk));
+}
+
+static mchunkptr next_chunk (mchunkptr p) {
+ mchunkptr temp = (mchunkptr)((char*) p + chunksize(p));
+ return (mchunkptr) __bnd_set_ptr_bounds ((void *) temp, sizeof(struct malloc_chunk));
+}
-/* Ptr to next physical malloc_chunk. */
-#define next_chunk(p) ((mchunkptr)( ((char*)(p)) + ((p)->size & ~SIZE_BITS) ))
+static mchunkptr prev_chunk (mchunkptr p) {
+ mchunkptr temp = (mchunkptr)((char*) p - p->prev_size);
+ return (mchunkptr) __bnd_set_ptr_bounds ((void *) temp, sizeof(struct malloc_chunk));
+}
+
+static mchunkptr chunk_at_offset (mchunkptr p, INTERNAL_SIZE_T s) {
+ mchunkptr temp = (mchunkptr)((char*) p + s);
+ return (mchunkptr) __bnd_set_ptr_bounds ((void *) temp, sizeof(struct malloc_chunk));
+}
+
+static int inuse (mchunkptr p) {
+ return next_chunk(p)->size & PREV_INUSE;
+}
+
+static int inuse_bit_at_offset (mchunkptr p, INTERNAL_SIZE_T s) {
+ return chunk_at_offset(p, s)->size & PREV_INUSE;
+}
+
+static void set_inuse_bit_at_offset (mchunkptr p, INTERNAL_SIZE_T s) {
+ chunk_at_offset(p, s)->size |= PREV_INUSE;
+}
+
+static void clear_inuse_bit_at_offset (mchunkptr p, INTERNAL_SIZE_T s) {
+ chunk_at_offset(p, s)->size &= ~(PREV_INUSE);
+}
+
+/* Set size at head, without disturbing its use bit */
+# define set_head_size(p, s) \
+{ \
+ (p) = (__typeof(p)) __bnd_set_ptr_bounds ((void *) (p), (s) > sizeof (__typeof(p)) ? \
+ (s) : sizeof(__typeof(p))); \
+ (p)->size = ((p)->size & SIZE_BITS) | (s); \
+}
+
+/* Set size/use field */
+# define set_head(p, s) \
+({ \
+ (p) = (__typeof(p)) __bnd_set_ptr_bounds ((void *) (p), \
+ ((s) & ~(SIZE_BITS)) > sizeof (__typeof(p)) ? \
+ ((s) & ~(SIZE_BITS)): sizeof (__typeof(p))); \
+ (p)->size = (s); \
+})
+
+/* Set size at footer (only when chunk is not in use) */
+static void set_foot (mchunkptr p, INTERNAL_SIZE_T s) {
+ chunk_at_offset(p, s)->prev_size = s;
+}
+#else
+/* Ptr to next physicaly100y malloc_chunk. */
+# define next_chunk(p) ((mchunkptr)( ((char*)(p)) + ((p)->size & ~SIZE_BITS) ))
/* Ptr to previous physical malloc_chunk */
-#define prev_chunk(p) ((mchunkptr)( ((char*)(p)) - ((p)->prev_size) ))
+# define prev_chunk(p) ((mchunkptr)( ((char*)(p)) - ((p)->prev_size) ))
/* Treat space at ptr + offset as a chunk */
-#define chunk_at_offset(p, s) ((mchunkptr)(((char*)(p)) + (s)))
+# define chunk_at_offset(p, s) ((mchunkptr)(((char*)(p)) + (s)))
/* extract p's inuse bit */
-#define inuse(p)\
+# define inuse(p)\
((((mchunkptr)(((char*)(p))+((p)->size & ~SIZE_BITS)))->size) & PREV_INUSE)
/* set/clear chunk as being inuse without otherwise disturbing */
-#define set_inuse(p)\
+# define set_inuse(p)\
((mchunkptr)(((char*)(p)) + ((p)->size & ~SIZE_BITS)))->size |= PREV_INUSE
-#define clear_inuse(p)\
+# define clear_inuse(p)\
((mchunkptr)(((char*)(p)) + ((p)->size & ~SIZE_BITS)))->size &= ~(PREV_INUSE)
/* check/set/clear inuse bits in known places */
-#define inuse_bit_at_offset(p, s)\
+# define inuse_bit_at_offset(p, s)\
(((mchunkptr)(((char*)(p)) + (s)))->size & PREV_INUSE)
-#define set_inuse_bit_at_offset(p, s)\
+# define set_inuse_bit_at_offset(p, s)\
(((mchunkptr)(((char*)(p)) + (s)))->size |= PREV_INUSE)
-#define clear_inuse_bit_at_offset(p, s)\
+# define clear_inuse_bit_at_offset(p, s)\
(((mchunkptr)(((char*)(p)) + (s)))->size &= ~(PREV_INUSE))
/* Set size at head, without disturbing its use bit */
-#define set_head_size(p, s) ((p)->size = (((p)->size & SIZE_BITS) | (s)))
+# define set_head_size(p, s) ((p)->size = (((p)->size & SIZE_BITS) | (s)))
/* Set size/use field */
-#define set_head(p, s) ((p)->size = (s))
+# define set_head(p, s) ((p)->size = (s))
/* Set size at footer (only when chunk is not in use) */
-#define set_foot(p, s) (((mchunkptr)((char*)(p) + (s)))->prev_size = (s))
-
-
+# define set_foot(p, s) (((mchunkptr)((char*)(p) + (s)))->prev_size = (s))
+#endif
/*
-------------------- Internal data structures --------------------
@@ -1945,7 +2011,7 @@ static void do_check_chunk(mstate av, mchunkptr p)
/* chunk is page-aligned */
assert(((p->prev_size + sz) & (GLRO(dl_pagesize)-1)) == 0);
/* mem is aligned */
- assert(aligned_OK(chunk2mem(p)));
+ assert(aligned_chunk_OK(p));
}
}
@@ -1968,7 +2034,7 @@ static void do_check_free_chunk(mstate av, mchunkptr p)
if ((unsigned long)(sz) >= MINSIZE)
{
assert((sz & MALLOC_ALIGN_MASK) == 0);
- assert(aligned_OK(chunk2mem(p)));
+ assert(aligned_chunk_OK(p));
/* ... matching footer field */
assert(next->prev_size == sz);
/* ... and is fully consolidated */
@@ -2042,7 +2108,7 @@ static void do_check_remalloced_chunk(mstate av, mchunkptr p, INTERNAL_SIZE_T s)
assert((sz & MALLOC_ALIGN_MASK) == 0);
assert((unsigned long)(sz) >= MINSIZE);
/* ... and alignment */
- assert(aligned_OK(chunk2mem(p)));
+ assert(aligned_chunk_OK(p));
/* chunk is less than MINSIZE more than request */
assert((long)(sz) - (long)(s) >= 0);
assert((long)(sz) - (long)(s + MINSIZE) < 0);
@@ -2313,16 +2379,16 @@ static void* sysmalloc(INTERNAL_SIZE_T nb, mstate av)
/* For glibc, chunk2mem increases the address by 2*SIZE_SZ and
MALLOC_ALIGN_MASK is 2*SIZE_SZ-1. Each mmap'ed area is page
aligned and therefore definitely MALLOC_ALIGN_MASK-aligned. */
- assert (((INTERNAL_SIZE_T)chunk2mem(mm) & MALLOC_ALIGN_MASK) == 0);
+ assert (((INTERNAL_SIZE_T)((void *)mm + 2 * SIZE_SZ) & MALLOC_ALIGN_MASK) == 0);
front_misalign = 0;
}
else
- front_misalign = (INTERNAL_SIZE_T)chunk2mem(mm) & MALLOC_ALIGN_MASK;
+ front_misalign = (INTERNAL_SIZE_T)((void *)mm + 2 * SIZE_SZ) & MALLOC_ALIGN_MASK;
if (front_misalign > 0) {
correction = MALLOC_ALIGNMENT - front_misalign;
p = (mchunkptr)(mm + correction);
- p->prev_size = correction;
set_head(p, (size - correction) |IS_MMAPPED);
+ p->prev_size = correction;
}
else
{
@@ -2349,7 +2415,11 @@ static void* sysmalloc(INTERNAL_SIZE_T nb, mstate av)
/* Record incoming configuration of top */
old_top = av->top;
- old_size = chunksize(old_top);
+ if (old_top == initial_top(av)) {
+ old_size = 0;
+ } else {
+ old_size = chunksize(old_top);
+ }
old_end = (char*)(chunk_at_offset(old_top, old_size));
brk = snd_brk = (char*)(MORECORE_FAILURE);
@@ -2399,9 +2469,9 @@ static void* sysmalloc(INTERNAL_SIZE_T nb, mstate av)
become the top chunk again later. Note that a footer is set
up, too, although the chunk is marked in use. */
old_size = (old_size - MINSIZE) & ~MALLOC_ALIGN_MASK;
- set_head(chunk_at_offset(old_top, old_size + 2*SIZE_SZ), 0|PREV_INUSE);
+ chunk_at_offset(old_top, old_size + 2*SIZE_SZ)->size = 0|PREV_INUSE;
if (old_size >= MINSIZE) {
- set_head(chunk_at_offset(old_top, old_size), (2*SIZE_SZ)|PREV_INUSE);
+ chunk_at_offset(old_top, old_size)->size = (2*SIZE_SZ)|PREV_INUSE;
set_foot(chunk_at_offset(old_top, old_size), (2*SIZE_SZ));
set_head(old_top, old_size|PREV_INUSE|NON_MAIN_ARENA);
_int_free(av, old_top, 1);
@@ -2545,7 +2615,7 @@ static void* sysmalloc(INTERNAL_SIZE_T nb, mstate av)
/* Guarantee alignment of first new chunk made from this space */
- front_misalign = (INTERNAL_SIZE_T)chunk2mem(brk) & MALLOC_ALIGN_MASK;
+ front_misalign = (INTERNAL_SIZE_T)((void *)brk + 2*SIZE_SZ) & MALLOC_ALIGN_MASK;
if (front_misalign > 0) {
/*
@@ -2599,9 +2669,9 @@ static void* sysmalloc(INTERNAL_SIZE_T nb, mstate av)
else {
if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
/* MORECORE/mmap must correctly align */
- assert(((unsigned long)chunk2mem(brk) & MALLOC_ALIGN_MASK) == 0);
+ assert(((unsigned long)((void *)brk + 2*SIZE_SZ) & MALLOC_ALIGN_MASK) == 0);
else {
- front_misalign = (INTERNAL_SIZE_T)chunk2mem(brk) & MALLOC_ALIGN_MASK;
+ front_misalign = (INTERNAL_SIZE_T)((void *)brk + 2*SIZE_SZ) & MALLOC_ALIGN_MASK;
if (front_misalign > 0) {
/*
@@ -2676,8 +2746,12 @@ static void* sysmalloc(INTERNAL_SIZE_T nb, mstate av)
/* finally, do the allocation */
p = av->top;
- size = chunksize(p);
-
+ if (p != initial_top(av)) {
+ size = chunksize(p);
+ }
+ else {
+ size = 0;
+ }
/* check that one of the above allocation paths succeeded */
if ((unsigned long)(size) >= (unsigned long)(nb + MINSIZE)) {
remainder_size = size - nb;
@@ -2820,11 +2894,10 @@ mremap_chunk(mchunkptr p, size_t new_size)
p = (mchunkptr)(cp + offset);
- assert(aligned_OK(chunk2mem(p)));
-
- assert((p->prev_size == offset));
+ assert(aligned_chunk_OK(p));
set_head(p, (new_size - offset)|IS_MMAPPED);
+ assert((p->prev_size == offset));
mp_.mmapped_mem -= size + offset;
mp_.mmapped_mem += new_size;
if ((unsigned long)mp_.mmapped_mem > (unsigned long)mp_.max_mmapped_mem)
@@ -2863,7 +2936,11 @@ __libc_malloc(size_t bytes)
(void)mutex_unlock(&ar_ptr->mutex);
assert(!victim || chunk_is_mmapped(mem2chunk(victim)) ||
ar_ptr == arena_for_chunk(mem2chunk(victim)));
+#ifndef __CHKP__
return victim;
+#else
+ return __bnd_narrow_ptr_bounds (victim, victim, bytes);
+#endif
}
libc_hidden_def(__libc_malloc)
@@ -2951,7 +3028,12 @@ __libc_realloc(void* oldmem, size_t bytes)
if(newp) return chunk2mem(newp);
#endif
/* Note the extra SIZE_SZ overhead. */
- if(oldsize - SIZE_SZ >= nb) return oldmem; /* do nothing */
+ if(oldsize - SIZE_SZ >= nb)
+#ifndef __CHKP__
+ return oldmem; /* do nothing */
+#else
+ return __bnd_narrow_ptr_bounds(oldmem, oldmem, bytes); /* do nothing */
+#endif
/* Must alloc, copy, free. */
newmem = __libc_malloc(bytes);
if (newmem == 0) return 0; /* propagate failure */
@@ -2993,8 +3075,11 @@ __libc_realloc(void* oldmem, size_t bytes)
_int_free(ar_ptr, oldp, 0);
}
}
-
+#ifndef __CHKP__
return newp;
+#else
+ return __bnd_narrow_ptr_bounds(newp, newp, bytes);
+#endif
}
libc_hidden_def (__libc_realloc)
@@ -3029,7 +3114,11 @@ __libc_memalign(size_t alignment, size_t bytes)
(void)mutex_unlock(&ar_ptr->mutex);
assert(!p || chunk_is_mmapped(mem2chunk(p)) ||
ar_ptr == arena_for_chunk(mem2chunk(p)));
+#ifndef __CHKP__
return p;
+#else
+ return __bnd_narrow_ptr_bounds(p, p, bytes);
+#endif
}
/* For ISO C11. */
weak_alias (__libc_memalign, aligned_alloc)
@@ -3065,8 +3154,11 @@ __libc_valloc(size_t bytes)
(void)mutex_unlock (&ar_ptr->mutex);
assert(!p || chunk_is_mmapped(mem2chunk(p)) ||
ar_ptr == arena_for_chunk(mem2chunk(p)));
-
+#ifndef __CHKP__
return p;
+#else
+ return __bnd_narrow_ptr_bounds(p, p, bytes);
+#endif
}
void*
@@ -3100,7 +3192,11 @@ __libc_pvalloc(size_t bytes)
assert(!p || chunk_is_mmapped(mem2chunk(p)) ||
ar_ptr == arena_for_chunk(mem2chunk(p)));
+#ifndef __CHKP__
return p;
+#else
+ return __bnd_narrow_ptr_bounds(p, p, bytes);
+#endif
}
void*
@@ -3132,6 +3228,9 @@ __libc_calloc(size_t n, size_t elem_size)
mem = (*hook)(sz, RETURN_ADDRESS (0));
if(mem == 0)
return 0;
+#ifdef __CHKP__
+ mem = __bnd_narrow_ptr_bounds(mem, mem, sz);
+#endif
return memset(mem, 0, sz);
}
@@ -3145,7 +3244,12 @@ __libc_calloc(size_t n, size_t elem_size)
need to clear. */
#if MORECORE_CLEARS
oldtop = top(av);
- oldtopsize = chunksize(top(av));
+ if (oldtop == initial_top(av))
+ {
+ oldtopsize = 0;
+ } else {
+ oldtopsize = chunksize(top(av));
+ }
#if MORECORE_CLEARS < 2
/* Only newly allocated memory is guaranteed to be cleared. */
if (av == &main_arena &&
@@ -3179,6 +3283,9 @@ __libc_calloc(size_t n, size_t elem_size)
/* Two optional cases in which clearing not necessary */
if (chunk_is_mmapped (p))
{
+#ifdef __CHKP__
+ mem = __bnd_narrow_ptr_bounds(mem, mem, sz);
+#endif
if (__builtin_expect (perturb_byte, 0))
MALLOC_ZERO (mem, sz);
return mem;
@@ -3221,8 +3328,11 @@ __libc_calloc(size_t n, size_t elem_size)
}
}
}
-
+#ifndef __CHKP__
return mem;
+#else
+ return __bnd_narrow_ptr_bounds(mem, mem, sz);
+#endif
}
/*
@@ -3676,7 +3786,11 @@ _int_malloc(mstate av, size_t bytes)
*/
victim = av->top;
- size = chunksize(victim);
+ if (victim == initial_top(av)) {
+ size = 0;
+ } else {
+ size = chunksize(victim);
+ }
if ((unsigned long)(size) >= (unsigned long)(nb + MINSIZE)) {
remainder_size = size - nb;
@@ -4051,6 +4165,9 @@ static void malloc_consolidate(mstate av)
p = atomic_exchange_acq (fb, 0);
if (p != 0) {
do {
+#ifdef __CHKP__
+ p = __bnd_set_ptr_bounds(p, sizeof (struct malloc_chunk));
+#endif
check_inuse_chunk(av, p);
nextp = p->fd;
@@ -4336,8 +4453,8 @@ _int_memalign(mstate av, size_t alignment, size_t bytes)
/* For mmapped chunks, just adjust offset */
if (chunk_is_mmapped(p)) {
- newp->prev_size = p->prev_size + leadsize;
set_head(newp, newsize|IS_MMAPPED);
+ newp->prev_size = p->prev_size + leadsize;
return chunk2mem(newp);
}
@@ -4350,7 +4467,7 @@ _int_memalign(mstate av, size_t alignment, size_t bytes)
p = newp;
assert (newsize >= nb &&
- (((unsigned long)(chunk2mem(p))) % alignment) == 0);
+ (((unsigned long)((char *)p + 2 * SIZE_SZ) % alignment) == 0));
}
/* Also give back spare room at the end */
@@ -4430,7 +4547,7 @@ static int mtrim(mstate av, size_t pad)
+ sizeof (struct malloc_chunk)
+ psm1) & ~psm1);
- assert ((char *) chunk2mem (p) + 4 * SIZE_SZ <= paligned_mem);
+ assert ((char *) (p) + 6 * SIZE_SZ <= paligned_mem);
assert ((char *) p + size > paligned_mem);
/* This is the size we could potentially free. */
@@ -4932,7 +5049,6 @@ __posix_memalign (void **memptr, size_t alignment, size_t size)
}
weak_alias (__posix_memalign, posix_memalign)
-
int
malloc_info (int options, FILE *fp)
{
@@ -5121,7 +5237,6 @@ malloc_info (int options, FILE *fp)
return 0;
}
-
strong_alias (__libc_calloc, __calloc) weak_alias (__libc_calloc, calloc)
strong_alias (__libc_free, __cfree) weak_alias (__libc_free, cfree)
strong_alias (__libc_free, __free) strong_alias (__libc_free, free)
diff --git a/malloc/obstack.h b/malloc/obstack.h
index d2e056b..5e4b5b9 100644
--- a/malloc/obstack.h
+++ b/malloc/obstack.h
@@ -143,7 +143,7 @@ struct _obstack_chunk /* Lives at front of each chunk. */
{
char *limit; /* 1 past end of this chunk */
struct _obstack_chunk *prev; /* address of prior chunk or NULL */
- char contents[4]; /* objects begin here */
+ char contents[4] __attribute__((bnd_variable_size)); /* objects begin here */
};
struct obstack /* control current object in current chunk */
-----------------------------------------------------------------------
Summary of changes:
ChangeLog | 463 ++-
Makefile | 19 +-
NEWS | 17 +
argp/argp-help.c | 21 +-
argp/argp-parse.c | 2 +-
bits/dirent.h | 4 +-
bits/sched.h | 2 +-
bits/socket.h | 138 +-
config.make.in | 2 +
configure | 24 +-
configure.in | 18 +-
crypt/crypt.c | 12 +
crypt/crypt_util.c | 5 +-
crypt/speeds.c | 2 +-
csu/check_fds.c | 2 +-
csu/init-first.c | 2 +-
debug/Makefile | 4 +-
debug/backtracesyms.c | 2 +-
debug/strcpy_chk.c | 43 +
debug/tst-backtrace5.c | 11 +-
debug/tst-backtrace6.c | 21 +
debug/tst-chk1.c | 4 +-
debug/wcpcpy_chk.c | 19 +
debug/wcscpy_chk.c | 20 +
dlfcn/Makefile | 10 +-
dlfcn/bug-dl-leaf-lib-cb.c | 35 +
dlfcn/bug-dl-leaf-lib.c | 71 +
dlfcn/bug-dl-leaf.c | 25 +
dlfcn/dlfcn.h | 8 +-
elf/Makefile | 4 +
elf/dl-close.c | 7 +
elf/dl-load.c | 2 +-
elf/dl-open.c | 4 +-
elf/dl-tls.c | 3 +
elf/elf.h | 2 +-
elf/reldep.c | 2 +-
elf/reldep3.c | 2 +-
elf/setup-vdso.h | 2 +-
elf/sprof.c | 2 +-
iconv/gconv_int.h | 2 +-
iconv/iconv_charmap.c | 2 +-
iconv/loop.c | 2 +-
iconv/skeleton.c | 4 +-
iconv/strtab.c | 2 +-
iconvdata/gbbig5.c | 2 +-
iconvdata/iso-2022-jp.c | 2 +-
include/features.h | 2 +-
include/link.h | 2 +-
include/netdb.h | 15 +-
include/resolv.h | 18 +-
inet/ifreq.c | 61 +-
inet/netinet/in.h | 4 +-
inet/protocols/routed.h | 4 +-
intl/dcigettext.c | 4 +-
intl/gettextP.h | 2 +-
intl/gmo.h | 2 +-
intl/loadinfo.h | 2 +-
io/fts.h | 2 +-
io/lockf64.c | 2 +-
io/lxstat64.c | 13 +-
libidn/ChangeLog | 4 +
libidn/stringprep.c | 2 +-
libio/fileops.c | 3 +
libio/libioP.h | 4 +-
locale/categories.def | 1 +
locale/localeinfo.h | 4 +-
locale/programs/ld-collate.c | 2 +-
locale/programs/linereader.h | 2 +-
localedata/ChangeLog | 6 +
localedata/tests-mbwc/dat_wcsncat.c | 2 +-
localedata/tests/test6.c | 2 +-
localedata/tst-digits.c | 2 +-
malloc/arena.c | 10 +-
malloc/hooks.c | 3 +
malloc/malloc.c | 217 +-
malloc/obstack.h | 2 +-
manual/charset.texi | 2 +-
manual/conf.texi | 9 +
manual/contrib.texi | 16 +-
manual/filesys.texi | 79 +-
manual/install.texi | 3 +
manual/stdio.texi | 2 +-
manual/string.texi | 4 +-
math/basic-test.c | 20 +-
math/libm-test.inc | 125 +-
math/s_cexp.c | 14 +-
math/s_cexpf.c | 14 +-
math/s_cexpl.c | 14 +-
math/s_cproj.c | 4 +-
math/s_cprojf.c | 4 +-
math/s_cprojl.c | 4 +-
math/s_fdim.c | 8 +-
math/s_fdimf.c | 8 +-
math/s_fdiml.c | 8 +-
misc/search.h | 2 +-
nis/nis_print.c | 2 +-
nis/nss_nis/nis-publickey.c | 2 +-
nis/rpcsvc/nis.h | 2 +-
nptl/ChangeLog | 11 +
nptl/cleanup.c | 1 -
nptl/descr.h | 9 +-
nptl/pthread_getschedparam.c | 2 +-
nptl/sysdeps/i386/tls.h | 60 +-
nptl/sysdeps/pthread/gai_misc.h | 6 +
nptl/sysdeps/pthread/pthread.h | 4 +-
nptl/sysdeps/unix/sysv/linux/aio_misc.h | 6 +
.../sysv/linux/i386/i486/pthread_cond_timedwait.S | 82 +-
.../unix/sysv/linux/i386/i486/pthread_cond_wait.S | 63 +-
nptl/sysdeps/unix/sysv/linux/i386/pthread_once.S | 9 +
nptl/sysdeps/unix/sysv/linux/internaltypes.h | 2 +-
nptl/sysdeps/unix/sysv/linux/mq_notify.c | 3 +
nptl/sysdeps/unix/sysv/linux/register-atfork.c | 2 +-
nptl/sysdeps/unix/sysv/linux/timer_routines.c | 3 +
.../unix/sysv/linux/x86/bits/pthreadtypes.h | 2 +-
.../sysv/linux/x86_64/pthread_cond_timedwait.S | 49 +-
.../unix/sysv/linux/x86_64/pthread_cond_wait.S | 37 +-
nptl/sysdeps/x86_64/tls.h | 80 +-
nptl/tst-cancel4.c | 2 +-
nptl/tst-cancel7.c | 3 +
nptl/tst-signal1.c | 3 +
nptl/tst-signal2.c | 3 +
nptl/tst-signal3.c | 6 +
nscd/aicache.c | 6 +-
nss/nss_files/files-key.c | 4 +-
po/bg.po | 1926 +++---
po/ca.po | 1902 +++---
po/cs.po | 1907 +++---
po/de.po | 1900 +++---
po/eo.po | 2072 +++---
po/fr.po | 1902 +++---
po/ko.po | 2481 ++++----
po/libc.pot | 1894 +++---
po/nl.po | 1922 +++---
po/pl.po | 1906 +++---
po/ru.po | 1912 +++---
po/sv.po | 1904 +++---
po/uk.po | 7151 ++++++++++++++++++++
po/vi.po | 2019 +++---
ports/ChangeLog.aarch64 | 7 +
ports/ChangeLog.alpha | 8 +
ports/ChangeLog.am33 | 8 +
ports/ChangeLog.arm | 30 +
ports/ChangeLog.hppa | 11 +
ports/ChangeLog.ia64 | 27 +
ports/ChangeLog.m68k | 4 +
ports/ChangeLog.mips | 26 +
ports/ChangeLog.powerpc | 13 +
ports/ChangeLog.tile | 4 +
ports/sysdeps/aarch64/Versions | 5 +
ports/sysdeps/aarch64/machine-gmon.h | 35 +
ports/sysdeps/aarch64/mcount.c | 33 +
ports/sysdeps/alpha/alphaev67/stpncpy.S | 2 +-
ports/sysdeps/am33/fpu/fraiseexcpt.c | 2 +-
ports/sysdeps/arm/armv6t2/strlen.S | 169 +
ports/sysdeps/arm/dl-machine.h | 2 +-
ports/sysdeps/hppa/dl-lookupcfg.h | 2 +-
ports/sysdeps/hppa/fpu/fraiseexcpt.c | 2 +-
ports/sysdeps/ia64/fpu/e_acoshl.S | 4 +-
ports/sysdeps/ia64/fpu/e_atanhl.S | 2 +-
ports/sysdeps/ia64/fpu/e_log.S | 2 +-
ports/sysdeps/ia64/fpu/e_log2l.S | 2 +-
ports/sysdeps/ia64/fpu/e_logf.S | 6 +-
ports/sysdeps/ia64/fpu/e_logl.S | 4 +-
ports/sysdeps/ia64/fpu/fraiseexcpt.c | 2 +-
ports/sysdeps/ia64/fpu/libm_lgammaf.S | 4 +-
ports/sysdeps/ia64/fpu/libm_lgammal.S | 74 +-
ports/sysdeps/ia64/fpu/libm_reduce.S | 6 +-
ports/sysdeps/ia64/fpu/s_erfc.S | 2 +-
ports/sysdeps/ia64/fpu/s_erfcf.S | 2 +-
ports/sysdeps/ia64/fpu/s_erfcl.S | 2 +-
ports/sysdeps/ia64/fpu/s_log1p.S | 2 +-
ports/sysdeps/ia64/fpu/s_log1pl.S | 4 +-
ports/sysdeps/ia64/fpu/w_tgammal.S | 10 +-
ports/sysdeps/m68k/m680x0/fpu/fraiseexcpt.c | 2 +-
ports/sysdeps/mips/fpu/fedisblxcpt.c | 1 -
ports/sysdeps/mips/fpu/feenablxcpt.c | 1 -
ports/sysdeps/mips/fpu_control.h | 5 +-
ports/sysdeps/mips/memcpy.S | 18 +-
ports/sysdeps/mips/sys/asm.h | 2 +-
ports/sysdeps/powerpc/nofpu/sim-full.c | 5 +
ports/sysdeps/powerpc/nofpu/soft-supp.h | 5 +
ports/sysdeps/powerpc/powerpc32/405/memcpy.S | 2 +-
ports/sysdeps/powerpc/soft-fp/sfp-machine.h | 5 +
ports/sysdeps/tile/jmpbuf-offsets.h | 2 +-
.../unix/sysv/linux/aarch64/nptl/libc.abilist | 1 +
ports/sysdeps/unix/sysv/linux/alpha/bits/netdb.h | 2 +-
ports/sysdeps/unix/sysv/linux/am33/bits/fcntl.h | 2 +-
ports/sysdeps/unix/sysv/linux/arm/clone.S | 4 +-
ports/sysdeps/unix/sysv/linux/arm/ldsodefs.h | 6 +-
ports/sysdeps/unix/sysv/linux/hppa/clone.S | 2 +-
ports/sysdeps/unix/sysv/linux/hppa/nptl/pthread.h | 4 +-
ports/sysdeps/unix/sysv/linux/hppa/sysdep.h | 2 +-
.../unix/sysv/linux/ia64/nptl/__sigstack_longjmp.c | 2 +-
ports/sysdeps/unix/sysv/linux/mips/ldsodefs.h | 4 +-
posix/fnmatch_loop.c | 2 +-
resolv/gai_notify.c | 2 +-
resolv/ns_name.c | 4 +-
resolv/ns_samedomain.c | 2 +-
resolv/res_comp.c | 2 +-
resolv/res_hconf.c | 2 +
resolv/res_libc.c | 4 +-
resolv/res_mkquery.c | 1 +
resolv/res_send.c | 7 +-
socket/Makefile | 2 +-
socket/sa_len.c | 108 +
stdio-common/Makefile | 3 +
stdio-common/psiginfo-define.h | 2 +-
stdio-common/scanf13.c | 1 +
stdio-common/vfscanf.c | 2 +-
stdlib/Makefile | 2 +-
stdlib/fmtmsg.c | 4 +-
stdlib/random.c | 2 +-
stdlib/stdlib.h | 2 +-
stdlib/strtod_l.c | 9 +-
stdlib/strtof_l.c | 7 +-
stdlib/tst-strtod6.c | 24 +-
streams/stropts.h | 2 +-
string/Makefile | 4 +
string/strcoll_l.c | 701 +--
string/strcpy.c | 9 +
string/test-strcasecmp.c | 26 +-
string/test-strchr.c | 11 +-
string/test-strncasecmp.c | 26 +-
sunrpc/clnt_tcp.c | 4 +-
sunrpc/clnt_udp.c | 6 +-
sunrpc/clnt_unix.c | 4 +-
sunrpc/rpc/xdr.h | 2 +-
sunrpc/rpc_thread.c | 2 +-
sysdeps/generic/unwind-dw2-fde.h | 2 +-
sysdeps/gnu/ldsodefs.h | 48 +
sysdeps/gnu/netinet/ip_icmp.h | 2 +-
sysdeps/gnu/unwind-resume.c | 3 +
sysdeps/i386/dl-trampoline.S | 20 +
sysdeps/i386/fpu/fraiseexcpt.c | 2 +-
sysdeps/i386/fpu/libm-test-ulps | 20 +-
sysdeps/i386/i486/strcat.S | 79 +-
sysdeps/i386/i586/strchr.S | 31 +-
sysdeps/i386/i586/strcpy.S | 7 +
sysdeps/i386/i586/strlen.S | 22 +-
sysdeps/i386/i686/memcmp.S | 172 +
sysdeps/i386/i686/memset.S | 5 +
sysdeps/i386/i686/multiarch/Makefile | 7 +-
sysdeps/i386/i686/multiarch/Versions | 7 +
sysdeps/i386/i686/multiarch/{bcopy.S => __bcopy.S} | 0
.../i386/i686/multiarch/{memcpy.S => __memcpy.S} | 0
.../multiarch/{memcpy_chk.S => __memcpy_chk.S} | 0
.../i386/i686/multiarch/{memmove.S => __memmove.S} | 0
.../multiarch/{memmove_chk.S => __memmove_chk.S} | 0
.../i386/i686/multiarch/{mempcpy.S => __mempcpy.S} | 0
.../multiarch/{mempcpy_chk.S => __mempcpy_chk.S} | 0
sysdeps/i386/i686/multiarch/bcopy.c | 7 +
sysdeps/i386/i686/multiarch/ifunc-impl-list.c | 86 +-
sysdeps/i386/i686/multiarch/memchr-sse2-bsf.S | 82 +-
sysdeps/i386/i686/multiarch/memcmp-sse4.S | 55 +-
sysdeps/i386/i686/multiarch/memcpy.c | 40 +
sysdeps/i386/i686/multiarch/memcpy_chk.c | 1 +
sysdeps/i386/i686/multiarch/memmove.c | 76 +
sysdeps/i386/i686/multiarch/memmove_chk.c | 1 +
sysdeps/i386/i686/multiarch/mempcpy.c | 40 +
sysdeps/i386/i686/multiarch/mempcpy_chk.c | 1 +
sysdeps/i386/i686/multiarch/memrchr-sse2-bsf.S | 6 +
sysdeps/i386/i686/multiarch/memset-sse2-rep.S | 8 +-
sysdeps/i386/i686/multiarch/memset-sse2.S | 2 +-
sysdeps/i386/i686/multiarch/mpx_memcpy_nobnd.S | 1803 +++++
sysdeps/i386/i686/multiarch/mpx_memmove_nobnd.S | 3 +
sysdeps/i386/i686/multiarch/mpx_mempcpy_nobnd.S | 3 +
sysdeps/i386/i686/multiarch/strcasecmp.S | 4 +
sysdeps/i386/i686/multiarch/strcat-sse2.S | 188 +-
sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S | 24 +
sysdeps/i386/i686/multiarch/strcmp-sse4.S | 94 +-
sysdeps/i386/i686/multiarch/strcmp-ssse3.S | 16 +-
sysdeps/i386/i686/multiarch/strcmp.S | 4 +
sysdeps/i386/i686/multiarch/strcpy-sse2.S | 386 ++-
sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S | 29 +
sysdeps/i386/i686/multiarch/strlen-sse2.S | 67 +-
sysdeps/i386/i686/multiarch/strncase.S | 4 +
sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S | 30 +
sysdeps/i386/i686/multiarch/wcschr-sse2.S | 45 +-
sysdeps/i386/i686/multiarch/wcscmp-sse2.S | 8 +
sysdeps/i386/i686/multiarch/wcscpy-ssse3.S | 181 +
sysdeps/i386/i686/multiarch/wcslen-sse2.S | 57 +
sysdeps/i386/i686/multiarch/wcsrchr-sse2.S | 23 +
sysdeps/i386/i686/strcmp.S | 15 +-
sysdeps/i386/i686/strtok.S | 8 +
sysdeps/i386/memchr.S | 47 +-
sysdeps/i386/memcmp.S | 14 +-
sysdeps/i386/rawmemchr.S | 32 +-
sysdeps/i386/stpncpy.S | 10 +-
sysdeps/i386/strchrnul.S | 35 +-
sysdeps/i386/strcspn.S | 8 +
sysdeps/i386/strpbrk.S | 10 +-
sysdeps/i386/strrchr.S | 28 +-
sysdeps/i386/strtok.S | 11 +
sysdeps/ieee754/flt-32/e_jnf.c | 4 +-
sysdeps/ieee754/k_standard.c | 2 +-
sysdeps/ieee754/ldbl-128/e_expl.c | 2 +-
sysdeps/ieee754/ldbl-128/strtold_l.c | 12 +-
sysdeps/ieee754/ldbl-128ibm/e_expl.c | 2 +-
sysdeps/ieee754/ldbl-128ibm/ieee754.h | 19 +
sysdeps/ieee754/ldbl-128ibm/s_cprojl.c | 4 +-
sysdeps/ieee754/ldbl-128ibm/strtold_l.c | 9 +-
sysdeps/ieee754/ldbl-64-128/strtold_l.c | 12 +-
sysdeps/ieee754/ldbl-96/s_cbrtl.c | 2 +-
sysdeps/ieee754/ldbl-96/strtold_l.c | 9 +-
sysdeps/mach/hurd/bits/errno.h | 10 +-
sysdeps/mach/hurd/check_fds.c | 2 +-
sysdeps/mach/hurd/dl-sysdep.c | 2 +-
sysdeps/mach/hurd/errnos.awk | 8 +
sysdeps/mach/hurd/i386/init-first.c | 4 +-
sysdeps/mach/hurd/profil.c | 2 +-
sysdeps/posix/dirstream.h | 2 +
sysdeps/posix/getaddrinfo.c | 2 +-
sysdeps/posix/opendir.c | 1 +
sysdeps/posix/profil.c | 3 +
sysdeps/posix/readdir_r.c | 42 +-
sysdeps/posix/rewinddir.c | 1 +
sysdeps/posix/sigwait.c | 6 +
sysdeps/posix/sprofil.c | 3 +
sysdeps/powerpc/powerpc32/backtrace.c | 66 +-
sysdeps/powerpc/powerpc32/cell/memcpy.S | 2 +-
sysdeps/powerpc/powerpc32/fpu/setjmp.S | 2 +-
sysdeps/powerpc/powerpc32/power4/memcmp.S | 2 +-
sysdeps/powerpc/powerpc32/power6/memcpy.S | 18 +-
sysdeps/powerpc/powerpc32/power6/memset.S | 4 +-
sysdeps/powerpc/powerpc32/power7/memcmp.S | 2 +-
sysdeps/powerpc/powerpc32/setjmp.S | 2 +-
sysdeps/powerpc/powerpc64/backtrace.c | 36 +-
sysdeps/powerpc/powerpc64/cell/memcpy.S | 2 +-
sysdeps/powerpc/powerpc64/power6/memcpy.S | 10 +-
sysdeps/powerpc/powerpc64/power6/memset.S | 4 +-
sysdeps/pthread/aio_notify.c | 2 +-
sysdeps/s390/dl-procinfo.h | 2 +-
sysdeps/sparc/fpu/fraiseexcpt.c | 2 +-
sysdeps/unix/bsd/bsd4.4/bits/socket.h | 342 -
sysdeps/unix/bsd/ptsname.c | 2 +-
{inet => sysdeps/unix}/ifreq.c | 0
sysdeps/unix/sysv/linux/Makefile | 2 +-
sysdeps/unix/sysv/linux/bits/sched.h | 2 +-
sysdeps/unix/sysv/linux/i386/Makefile | 2 +-
sysdeps/unix/sysv/linux/i386/chown.c | 2 +-
sysdeps/unix/sysv/linux/i386/mmap.S | 5 +
sysdeps/unix/sysv/linux/i386/mmap64.S | 4 +
sysdeps/unix/sysv/linux/i386/mremap.c | 36 +
sysdeps/unix/sysv/linux/i386/readdir64_r.c | 1 -
sysdeps/unix/sysv/linux/ifaddrs.c | 10 +-
sysdeps/unix/sysv/linux/ldsodefs.h | 23 -
sysdeps/unix/sysv/linux/mmap64.c | 6 +-
sysdeps/unix/sysv/linux/powerpc/aix/aix-dirent.h | 19 -
sysdeps/unix/sysv/linux/powerpc/aix/aix-errno.h | 124 -
sysdeps/unix/sysv/linux/powerpc/aix/aix-stat.h | 75 -
sysdeps/unix/sysv/linux/powerpc/aix/aix-termios.h | 84 -
sysdeps/unix/sysv/linux/powerpc/aix/direntconv.c | 48 -
sysdeps/unix/sysv/linux/powerpc/aix/errnoconv.c | 142 -
sysdeps/unix/sysv/linux/powerpc/aix/statconv.c | 52 -
sysdeps/unix/sysv/linux/powerpc/aix/tcgetattr.c | 155 -
sysdeps/unix/sysv/linux/powerpc/aix/tcsetattr.c | 170 -
sysdeps/unix/sysv/linux/powerpc/bits/libc-vdso.h | 7 +
sysdeps/unix/sysv/linux/powerpc/init-first.c | 16 +
sysdeps/unix/sysv/linux/powerpc/libc-start.c | 2 +-
.../linux/powerpc/powerpc32/getcontext-common.S | 2 +-
.../linux/powerpc/powerpc32/setcontext-common.S | 2 +-
.../linux/powerpc/powerpc32/swapcontext-common.S | 2 +-
sysdeps/unix/sysv/linux/ptsname.c | 2 +-
sysdeps/unix/sysv/linux/s390/s390-32/chown.c | 2 +-
sysdeps/unix/sysv/linux/s390/sa_len.c | 5 +-
sysdeps/unix/sysv/linux/s390/sys/procfs.h | 3 +-
sysdeps/unix/sysv/linux/sa_len.c | 72 +-
sysdeps/unix/sysv/linux/socketcall.h | 2 +-
.../unix/sysv/linux/sparc/sparc32/makecontext.c | 2 +-
sysdeps/unix/sysv/linux/wordsize-64/readdir_r.c | 1 -
sysdeps/unix/sysv/linux/x86/bits/siginfo.h | 8 +
sysdeps/unix/sysv/linux/x86_64/mmap.c | 52 +
.../sysv/linux/x86_64/mmap64.c} | 0
.../sysv/linux/x86_64/mremap.c} | 0
sysdeps/x86/fpu/bits/fenv.h | 2 +-
sysdeps/x86_64/Makefile | 3 +
sysdeps/x86_64/Versions | 7 +
sysdeps/x86_64/dl-trampoline.S | 28 +-
sysdeps/x86_64/fpu/fraiseexcpt.c | 2 +-
sysdeps/x86_64/fpu/libm-test-ulps | 19 +-
sysdeps/x86_64/memchr.S | 78 +-
sysdeps/x86_64/memcmp.S | 85 +
sysdeps/x86_64/memrchr.S | 13 +
sysdeps/x86_64/memset.S | 30 +
sysdeps/x86_64/multiarch/Makefile | 9 +
sysdeps/x86_64/multiarch/Versions | 13 +
sysdeps/x86_64/multiarch/bcopy.S | 5 +
sysdeps/x86_64/multiarch/ifunc-impl-list.c | 12 +-
sysdeps/x86_64/multiarch/memcmp-sse4.S | 339 +
sysdeps/x86_64/multiarch/memcpy-c.c | 70 +
sysdeps/x86_64/multiarch/memcpy-ssse3-back-1.S | 4 +
sysdeps/x86_64/multiarch/memcpy-ssse3-back.S | 26 +-
sysdeps/x86_64/multiarch/memcpy.S | 35 +-
sysdeps/x86_64/multiarch/memcpy_chk-c.c | 3 +
sysdeps/x86_64/multiarch/memcpy_chk.S | 14 +-
sysdeps/x86_64/multiarch/memmove-c.c | 108 +
sysdeps/x86_64/multiarch/memmove-ssse3-back-1.S | 5 +
sysdeps/x86_64/multiarch/memmove-ssse3-back.S | 8 +-
sysdeps/x86_64/multiarch/memmove.c | 34 +-
sysdeps/x86_64/multiarch/memmove_chk-c.c | 1 +
sysdeps/x86_64/multiarch/memmove_chk.c | 10 +-
sysdeps/x86_64/multiarch/mempcpy-c.c | 64 +
sysdeps/x86_64/multiarch/mempcpy-ssse3-back-1.S | 6 +
sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S | 12 +-
sysdeps/x86_64/multiarch/mempcpy.S | 37 +-
sysdeps/x86_64/multiarch/mempcpy_chk-c.c | 3 +
sysdeps/x86_64/multiarch/mempcpy_chk.S | 14 +-
sysdeps/x86_64/multiarch/rawmemchr.S | 103 -
sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S | 76 +
sysdeps/x86_64/multiarch/strchr.S | 22 +
sysdeps/x86_64/multiarch/strcmp-sse42.S | 240 +-
sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S | 178 +-
sysdeps/x86_64/multiarch/strcspn-c.c | 8 +
sysdeps/x86_64/multiarch/strrchr.S | 16 +
sysdeps/x86_64/multiarch/strspn-c.c | 8 +
sysdeps/x86_64/multiarch/strstr.c | 8 +-
sysdeps/x86_64/multiarch/wcscpy-ssse3.S | 171 +
sysdeps/x86_64/rawmemchr.S | 53 +-
sysdeps/x86_64/stpcpy_chk-c.c | 3 +
sysdeps/x86_64/stpcpy_chk.S | 8 +-
sysdeps/x86_64/strcat.S | 53 +-
sysdeps/x86_64/strchr.S | 13 +-
sysdeps/x86_64/strchrnul.S | 13 +-
sysdeps/x86_64/strcmp.S | 238 +-
sysdeps/x86_64/strcpy.S | 40 +
sysdeps/x86_64/strcpy_chk-c.c | 3 +
sysdeps/x86_64/strcpy_chk.S | 2 +
sysdeps/x86_64/strcspn.S | 33 +-
sysdeps/x86_64/strlen.S | 19 +
sysdeps/x86_64/strrchr.S | 13 +-
sysdeps/x86_64/strspn.S | 33 +-
sysdeps/x86_64/strtok.S | 3 +
sysdeps/x86_64/wcschr.S | 26 +
sysdeps/x86_64/wcscmp.S | 8 +
sysdeps/x86_64/wcslen.S | 73 +
sysdeps/x86_64/wcsrchr.S | 51 +-
sysvipc/sys/msg.h | 2 +-
version.h | 2 +-
wcsmbs/wcpcpy.c | 14 +-
wcsmbs/wcscpy.c | 9 +
440 files changed, 29919 insertions(+), 15187 deletions(-)
create mode 100644 debug/tst-backtrace6.c
create mode 100644 dlfcn/bug-dl-leaf-lib-cb.c
create mode 100644 dlfcn/bug-dl-leaf-lib.c
create mode 100644 dlfcn/bug-dl-leaf.c
create mode 100644 po/uk.po
create mode 100644 ports/sysdeps/aarch64/Versions
create mode 100644 ports/sysdeps/aarch64/machine-gmon.h
create mode 100644 ports/sysdeps/aarch64/mcount.c
create mode 100644 ports/sysdeps/arm/armv6t2/strlen.S
create mode 100644 socket/sa_len.c
create mode 100644 sysdeps/gnu/ldsodefs.h
rename sysdeps/i386/i686/multiarch/{bcopy.S => __bcopy.S} (100%)
rename sysdeps/i386/i686/multiarch/{memcpy.S => __memcpy.S} (100%)
rename sysdeps/i386/i686/multiarch/{memcpy_chk.S => __memcpy_chk.S} (100%)
rename sysdeps/i386/i686/multiarch/{memmove.S => __memmove.S} (100%)
rename sysdeps/i386/i686/multiarch/{memmove_chk.S => __memmove_chk.S} (100%)
rename sysdeps/i386/i686/multiarch/{mempcpy.S => __mempcpy.S} (100%)
rename sysdeps/i386/i686/multiarch/{mempcpy_chk.S => __mempcpy_chk.S} (100%)
create mode 100644 sysdeps/i386/i686/multiarch/bcopy.c
create mode 100644 sysdeps/i386/i686/multiarch/memcpy.c
create mode 100644 sysdeps/i386/i686/multiarch/memcpy_chk.c
create mode 100644 sysdeps/i386/i686/multiarch/memmove.c
create mode 100644 sysdeps/i386/i686/multiarch/memmove_chk.c
create mode 100644 sysdeps/i386/i686/multiarch/mempcpy.c
create mode 100644 sysdeps/i386/i686/multiarch/mempcpy_chk.c
create mode 100644 sysdeps/i386/i686/multiarch/mpx_memcpy_nobnd.S
create mode 100644 sysdeps/i386/i686/multiarch/mpx_memmove_nobnd.S
create mode 100644 sysdeps/i386/i686/multiarch/mpx_mempcpy_nobnd.S
delete mode 100644 sysdeps/unix/bsd/bsd4.4/bits/socket.h
copy {inet => sysdeps/unix}/ifreq.c (100%)
create mode 100644 sysdeps/unix/sysv/linux/i386/mremap.c
delete mode 100644 sysdeps/unix/sysv/linux/powerpc/aix/aix-dirent.h
delete mode 100644 sysdeps/unix/sysv/linux/powerpc/aix/aix-errno.h
delete mode 100644 sysdeps/unix/sysv/linux/powerpc/aix/aix-stat.h
delete mode 100644 sysdeps/unix/sysv/linux/powerpc/aix/aix-termios.h
delete mode 100644 sysdeps/unix/sysv/linux/powerpc/aix/direntconv.c
delete mode 100644 sysdeps/unix/sysv/linux/powerpc/aix/errnoconv.c
delete mode 100644 sysdeps/unix/sysv/linux/powerpc/aix/statconv.c
delete mode 100644 sysdeps/unix/sysv/linux/powerpc/aix/tcgetattr.c
delete mode 100644 sysdeps/unix/sysv/linux/powerpc/aix/tcsetattr.c
create mode 100644 sysdeps/unix/sysv/linux/x86_64/mmap.c
copy sysdeps/{generic/libcidn.abilist => unix/sysv/linux/x86_64/mmap64.c} (100%)
copy sysdeps/{generic/libcidn.abilist => unix/sysv/linux/x86_64/mremap.c} (100%)
create mode 100644 sysdeps/x86_64/multiarch/memcpy-c.c
create mode 100644 sysdeps/x86_64/multiarch/memcpy-ssse3-back-1.S
create mode 100644 sysdeps/x86_64/multiarch/memcpy_chk-c.c
create mode 100644 sysdeps/x86_64/multiarch/memmove-c.c
create mode 100644 sysdeps/x86_64/multiarch/memmove-ssse3-back-1.S
create mode 100644 sysdeps/x86_64/multiarch/memmove_chk-c.c
create mode 100644 sysdeps/x86_64/multiarch/mempcpy-c.c
create mode 100644 sysdeps/x86_64/multiarch/mempcpy-ssse3-back-1.S
create mode 100644 sysdeps/x86_64/multiarch/mempcpy_chk-c.c
delete mode 100644 sysdeps/x86_64/multiarch/rawmemchr.S
create mode 100644 sysdeps/x86_64/stpcpy_chk-c.c
create mode 100644 sysdeps/x86_64/strcpy_chk-c.c
hooks/post-receive
--
GNU C Library master sources