[PATCH v3] Cygwin: sigfe: Fix a bug that signal handler destroys fpu states
Corinna Vinschen
corinna-cygwin@cygwin.com
Tue Oct 22 15:58:02 GMT 2024
Hi Takashi,
big change, so, honest question: Do you think this is safe for 3.5.5?
This certainly also requires an entry in the release text and there
are just a few minor typos in comments, see below.
Thanks,
Corinna
On Oct 14 15:39, Takashi Yano wrote:
> Previously, sigfe had a bug that the signal handler destroys fpu state.
> This is caused by fninit instruction in sigdelayed. With this patch,
> saving/restoring the FPU/SIMD state is done using fxsave/fxrstor or
> xsave/xrstor rather than fnstcw/fldcw, stmxcsr/ldmxcsr and push/pop
> xmm0-xmm15. Since xsave/xrstor is used, not only x87/MMX/SSE states
> but also AVX/AVX2/AVX-512 states can be maintained unlike before.
> Addresses: https://cygwin.com/pipermail/cygwin/2024-October/256503.html
>
> Fixes: ed89fbc3ff11 ("* gendef (sigdelayed (x86_64)): Save and restore FPU control word.")
> Reported-by: Christian Franke <Christian.Franke@t-online.de>
> Suggested-by: Brian Inglis <Brian.Inglis@SystematicSW.ab.ca>
> Reviewed-by: Brian Inglis <Brian.Inglis@SystematicSW.ab.ca>
> Signed-off-by: Takashi Yano <takashi.yano@nifty.ne.jp>
> ---
> winsup/cygwin/scripts/gendef | 91 ++++++++++++++++++++----------------
> 1 file changed, 50 insertions(+), 41 deletions(-)
>
> diff --git a/winsup/cygwin/scripts/gendef b/winsup/cygwin/scripts/gendef
> index 3b1f8b9da..04350f1cb 100755
> --- a/winsup/cygwin/scripts/gendef
> +++ b/winsup/cygwin/scripts/gendef
> @@ -185,7 +185,7 @@ sigdelayed:
> # make sure it is aligned from here on
> # We could be called from an interrupted thread which doesn't know
> # about his fate, so save and restore everything and the kitchen sink.
> - andq \$0xfffffffffffffff0,%rsp
> + andq \$0xffffffffffffffc0,%rsp
> .seh_setframe %rbp,0
> pushq %r15
> .seh_pushreg %r15
> @@ -213,26 +213,41 @@ sigdelayed:
> .seh_pushreg %rbx
> pushq %rax
> .seh_pushreg %rax
> - subq \$0x128,%rsp
> - .seh_stackalloc 0x128
> - stmxcsr 0x124(%rsp)
> - fnstcw 0x120(%rsp)
> - movdqa %xmm15,0x110(%rsp)
> - movdqa %xmm14,0x100(%rsp)
> - movdqa %xmm13,0xf0(%rsp)
> - movdqa %xmm12,0xe0(%rsp)
> - movdqa %xmm11,0xd0(%rsp)
> - movdqa %xmm10,0xc0(%rsp)
> - movdqa %xmm9,0xb0(%rsp)
> - movdqa %xmm8,0xa0(%rsp)
> - movdqa %xmm7,0x90(%rsp)
> - movdqa %xmm6,0x80(%rsp)
> - movdqa %xmm5,0x70(%rsp)
> - movdqa %xmm4,0x60(%rsp)
> - movdqa %xmm3,0x50(%rsp)
> - movdqa %xmm2,0x40(%rsp)
> - movdqa %xmm1,0x30(%rsp)
> - movdqa %xmm0,0x20(%rsp)
> +
> + # +0x20: indicates if xsave is available
> + # +0x24: decrement of the stack to allocate space
> + # +0x28: %eax returnd by cpuid (0x0d, 0x00)
> + # +0x2c: %edx returnd by cpuid (0x0d, 0x00)
> + # +0x30: state save area
> + movl \$1,%eax
> + cpuid
> + andl \$0x04000000,%ecx # xsave available?
> + jnz 1f
> + movl \$0x248,%ebx # 0x18 for alibnment, 0x30 for additinal space
alignment additional
> + subq %rbx,%rsp
> + movl %ecx,0x20(%rsp)
> + movl %ebx,0x24(%rsp)
> + fxsave64 0x30(%rsp) # x86 CPU with 64-bit mode has fxsave64/fxrstor64
> + jmp 2f
> +1:
> + movl \$0x0d,%eax
> + xorl %ecx,%ecx
> + cpuid # get necessary space for xsave
> + movq %rbx,%rcx
> + addq \$0x48,%rbx # 0x18 for alignment, 0x30 for additinal space
additional
> + subq %rbx,%rsp
> + movl %ebx,0x24(%rsp)
> + xorq %rax,%rax
> + shrq \$3,%rcx
> + leaq 0x30(%rsp),%rdi
> + rep stosq
> + xgetbv # get XCR0 (ecx is 0 after rep)
> + movl %eax,0x28(%rsp)
> + movl %edx,0x2c(%rsp)
> + notl %ecx # set ecx non-zero
> + movl %ecx,0x20(%rsp)
> + xsave64 0x30(%rsp)
> +2:
> .seh_endprologue
>
> movq %gs:8,%r12 # get tls
> @@ -259,26 +274,20 @@ sigdelayed:
> xorl %r11d,%r11d
> movl %r11d,_cygtls.incyg(%r12)
> movl %r11d,_cygtls.stacklock(%r12) # unlock
> - movdqa 0x20(%rsp),%xmm0
> - movdqa 0x30(%rsp),%xmm1
> - movdqa 0x40(%rsp),%xmm2
> - movdqa 0x50(%rsp),%xmm3
> - movdqa 0x60(%rsp),%xmm4
> - movdqa 0x70(%rsp),%xmm5
> - movdqa 0x80(%rsp),%xmm6
> - movdqa 0x90(%rsp),%xmm7
> - movdqa 0xa0(%rsp),%xmm8
> - movdqa 0xb0(%rsp),%xmm9
> - movdqa 0xc0(%rsp),%xmm10
> - movdqa 0xd0(%rsp),%xmm11
> - movdqa 0xe0(%rsp),%xmm12
> - movdqa 0xf0(%rsp),%xmm13
> - movdqa 0x100(%rsp),%xmm14
> - movdqa 0x110(%rsp),%xmm15
> - fninit
> - fldcw 0x120(%rsp)
> - ldmxcsr 0x124(%rsp)
> - addq \$0x128,%rsp
> +
> + movl 0x20(%rsp),%ecx
> + testl %ecx,%ecx # xsave available?
> + jnz 1f
> + fxrstor64 0x30(%rsp)
> + jmp 2f
> +1:
> + movl 0x28(%rsp),%eax
> + movl 0x2c(%rsp),%edx
> + xrstor64 0x30(%rsp)
> +2:
> + movl 0x24(%rsp),%ebx
> + addq %rbx,%rsp
> +
> popq %rax
> popq %rbx
> popq %rcx
> --
> 2.45.1
More information about the Cygwin-patches
mailing list