diff --git a/README b/README index 1a6c960..65cb866 100644 --- a/README +++ b/README @@ -15,8 +15,9 @@ still missing,including: - @PLT dynamic linking function calls are removed. TinyCC does not have the feature.Dynamic linking library(.so) is expected not to work. -- fenv library is removed momently. - I am working on adding the missing fp instructions to TinyCC. + +Some features had been removed momently but now they are usable,including: +- fenv library(maybe applying a patch to TinyCC is needed) To compile,simply run export CC=tcc # Set the compiler @@ -34,3 +35,5 @@ version from yours,so if you fail to compile,try to apply the patches listed in /tcc-patch/,including: - empty_archive.patch Enables TinyCC to generate empty ar archive +- mxcsr.patch + Adds the support of ldmxcsr and stmxcsr diff --git a/src/fenv/__flt_rounds.c b/src/fenv/__flt_rounds.c new file mode 100644 index 0000000..ec0b368 --- /dev/null +++ b/src/fenv/__flt_rounds.c @@ -0,0 +1,19 @@ +#include +#include + +int __flt_rounds() +{ + switch (fegetround()) { +#ifdef FE_TOWARDZERO + case FE_TOWARDZERO: return 0; +#endif + case FE_TONEAREST: return 1; +#ifdef FE_UPWARD + case FE_UPWARD: return 2; +#endif +#ifdef FE_DOWNWARD + case FE_DOWNWARD: return 3; +#endif + } + return -1; +} diff --git a/src/fenv/aarch64/fenv.s b/src/fenv/aarch64/fenv.s new file mode 100644 index 0000000..8f3ec96 --- /dev/null +++ b/src/fenv/aarch64/fenv.s @@ -0,0 +1,68 @@ +.global fegetround +.type fegetround,%function +fegetround: + mrs x0, fpcr + and w0, w0, #0xc00000 + ret + +.global __fesetround +.hidden __fesetround +.type __fesetround,%function +__fesetround: + mrs x1, fpcr + bic w1, w1, #0xc00000 + orr w1, w1, w0 + msr fpcr, x1 + mov w0, #0 + ret + +.global fetestexcept +.type fetestexcept,%function +fetestexcept: + and w0, w0, #0x1f + mrs x1, fpsr + and w0, w0, w1 + ret + +.global feclearexcept +.type feclearexcept,%function +feclearexcept: + and w0, w0, #0x1f + mrs x1, fpsr + bic w1, w1, w0 + msr fpsr, x1 + mov w0, #0 + ret + +.global feraiseexcept +.type feraiseexcept,%function +feraiseexcept: + and w0, w0, #0x1f + mrs x1, fpsr + orr w1, w1, w0 + msr fpsr, x1 + mov w0, #0 + ret + +.global fegetenv +.type fegetenv,%function +fegetenv: + mrs x1, fpcr + mrs x2, fpsr + stp w1, w2, [x0] + mov w0, #0 + ret + +// TODO preserve some bits +.global fesetenv +.type fesetenv,%function +fesetenv: + mov x1, #0 + mov x2, #0 + cmn x0, #1 + b.eq 1f + ldp w1, w2, [x0] +1: msr fpcr, x1 + msr fpsr, x2 + mov w0, #0 + ret diff --git a/src/fenv/arm/fenv-hf.S b/src/fenv/arm/fenv-hf.S new file mode 100644 index 0000000..2a1de0d --- /dev/null +++ b/src/fenv/arm/fenv-hf.S @@ -0,0 +1,70 @@ +#if __ARM_PCS_VFP + +.syntax unified +.fpu vfp + +.global fegetround +.type fegetround,%function +fegetround: + fmrx r0, fpscr + and r0, r0, #0xc00000 + bx lr + +.global __fesetround +.hidden __fesetround +.type __fesetround,%function +__fesetround: + fmrx r3, fpscr + bic r3, r3, #0xc00000 + orr r3, r3, r0 + fmxr fpscr, r3 + mov r0, #0 + bx lr + +.global fetestexcept +.type fetestexcept,%function +fetestexcept: + and r0, r0, #0x1f + fmrx r3, fpscr + and r0, r0, r3 + bx lr + +.global feclearexcept +.type feclearexcept,%function +feclearexcept: + and r0, r0, #0x1f + fmrx r3, fpscr + bic r3, r3, r0 + fmxr fpscr, r3 + mov r0, #0 + bx lr + +.global feraiseexcept +.type feraiseexcept,%function +feraiseexcept: + and r0, r0, #0x1f + fmrx r3, fpscr + orr r3, r3, r0 + fmxr fpscr, r3 + mov r0, #0 + bx lr + +.global fegetenv +.type fegetenv,%function +fegetenv: + fmrx r3, fpscr + str r3, [r0] + mov r0, #0 + bx lr + +.global fesetenv +.type fesetenv,%function +fesetenv: + cmn r0, #1 + moveq r3, #0 + ldrne r3, [r0] + fmxr fpscr, r3 + mov r0, #0 + bx lr + +#endif diff --git a/src/fenv/arm/fenv.c b/src/fenv/arm/fenv.c new file mode 100644 index 0000000..ad295f5 --- /dev/null +++ b/src/fenv/arm/fenv.c @@ -0,0 +1,3 @@ +#if !__ARM_PCS_VFP +#include "../fenv.c" +#endif diff --git a/src/fenv/fegetexceptflag.c b/src/fenv/fegetexceptflag.c new file mode 100644 index 0000000..bab0b44 --- /dev/null +++ b/src/fenv/fegetexceptflag.c @@ -0,0 +1,7 @@ +#include + +int fegetexceptflag(fexcept_t *fp, int mask) +{ + *fp = fetestexcept(mask); + return 0; +} diff --git a/src/fenv/feholdexcept.c b/src/fenv/feholdexcept.c new file mode 100644 index 0000000..73ff1fa --- /dev/null +++ b/src/fenv/feholdexcept.c @@ -0,0 +1,8 @@ +#include + +int feholdexcept(fenv_t *envp) +{ + fegetenv(envp); + feclearexcept(FE_ALL_EXCEPT); + return 0; +} diff --git a/src/fenv/fenv.c b/src/fenv/fenv.c new file mode 100644 index 0000000..5588dad --- /dev/null +++ b/src/fenv/fenv.c @@ -0,0 +1,38 @@ +#include + +/* Dummy functions for archs lacking fenv implementation */ + +int feclearexcept(int mask) +{ + return 0; +} + +int feraiseexcept(int mask) +{ + return 0; +} + +int fetestexcept(int mask) +{ + return 0; +} + +int fegetround(void) +{ + return FE_TONEAREST; +} + +int __fesetround(int r) +{ + return 0; +} + +int fegetenv(fenv_t *envp) +{ + return 0; +} + +int fesetenv(const fenv_t *envp) +{ + return 0; +} diff --git a/src/fenv/fesetexceptflag.c b/src/fenv/fesetexceptflag.c new file mode 100644 index 0000000..af5f102 --- /dev/null +++ b/src/fenv/fesetexceptflag.c @@ -0,0 +1,8 @@ +#include + +int fesetexceptflag(const fexcept_t *fp, int mask) +{ + feclearexcept(~*fp & mask); + feraiseexcept(*fp & mask); + return 0; +} diff --git a/src/fenv/fesetround.c b/src/fenv/fesetround.c new file mode 100644 index 0000000..4e2f164 --- /dev/null +++ b/src/fenv/fesetround.c @@ -0,0 +1,23 @@ +#include +#include + +/* __fesetround wrapper for arch independent argument check */ + +hidden int __fesetround(int); + +int fesetround(int r) +{ + if (r != FE_TONEAREST +#ifdef FE_DOWNWARD + && r != FE_DOWNWARD +#endif +#ifdef FE_UPWARD + && r != FE_UPWARD +#endif +#ifdef FE_TOWARDZERO + && r != FE_TOWARDZERO +#endif + ) + return -1; + return __fesetround(r); +} diff --git a/src/fenv/feupdateenv.c b/src/fenv/feupdateenv.c new file mode 100644 index 0000000..50cef8e --- /dev/null +++ b/src/fenv/feupdateenv.c @@ -0,0 +1,9 @@ +#include + +int feupdateenv(const fenv_t *envp) +{ + int ex = fetestexcept(FE_ALL_EXCEPT); + fesetenv(envp); + feraiseexcept(ex); + return 0; +} diff --git a/src/fenv/i386/fenv.s b/src/fenv/i386/fenv.s new file mode 100644 index 0000000..e7f7932 --- /dev/null +++ b/src/fenv/i386/fenv.s @@ -0,0 +1,164 @@ +.hidden __hwcap + +.global feclearexcept +.type feclearexcept,@function +feclearexcept: + mov 4(%esp),%ecx + and $0x3f,%ecx + fnstsw %ax + # consider sse fenv as well if the cpu has XMM capability + call 1f +1: addl $__hwcap-1b,(%esp) + pop %edx + testl $0x02000000,(%edx) + jz 2f + # maintain exceptions in the sse mxcsr, clear x87 exceptions + test %eax,%ecx + jz 1f + fnclex +1: push %edx + stmxcsr (%esp) + pop %edx + and $0x3f,%eax + or %eax,%edx + test %edx,%ecx + jz 1f + not %ecx + and %ecx,%edx + push %edx + ldmxcsr (%esp) + pop %edx +1: xor %eax,%eax + ret + # only do the expensive x87 fenv load/store when needed +2: test %eax,%ecx + jz 1b + not %ecx + and %ecx,%eax + test $0x3f,%eax + jz 1f + fnclex + jmp 1b +1: sub $32,%esp + fnstenv (%esp) + mov %al,4(%esp) + fldenv (%esp) + add $32,%esp + xor %eax,%eax + ret + +.global feraiseexcept +.type feraiseexcept,@function +feraiseexcept: + mov 4(%esp),%eax + and $0x3f,%eax + sub $32,%esp + fnstenv (%esp) + or %al,4(%esp) + fldenv (%esp) + add $32,%esp + xor %eax,%eax + ret + +.global __fesetround +.hidden __fesetround +.type __fesetround,@function +__fesetround: + mov 4(%esp),%ecx + push %eax + xor %eax,%eax + fnstcw (%esp) + andb $0xf3,1(%esp) + or %ch,1(%esp) + fldcw (%esp) + # consider sse fenv as well if the cpu has XMM capability + call 1f +1: addl $__hwcap-1b,(%esp) + pop %edx + testl $0x02000000,(%edx) + jz 1f + stmxcsr (%esp) + shl $3,%ch + andb $0x9f,1(%esp) + or %ch,1(%esp) + ldmxcsr (%esp) +1: pop %ecx + ret + +.global fegetround +.type fegetround,@function +fegetround: + push %eax + fnstcw (%esp) + pop %eax + and $0xc00,%eax + ret + +.global fegetenv +.type fegetenv,@function +fegetenv: + mov 4(%esp),%ecx + xor %eax,%eax + fnstenv (%ecx) + # consider sse fenv as well if the cpu has XMM capability + call 1f +1: addl $__hwcap-1b,(%esp) + pop %edx + testl $0x02000000,(%edx) + jz 1f + push %eax + stmxcsr (%esp) + pop %edx + and $0x3f,%edx + or %edx,4(%ecx) +1: ret + +.global fesetenv +.type fesetenv,@function +fesetenv: + mov 4(%esp),%ecx + xor %eax,%eax + inc %ecx + jz 1f + fldenv -1(%ecx) + movl -1(%ecx),%ecx + jmp 2f +1: push %eax + push %eax + push %eax + push %eax + pushl $0xffff + push %eax + pushl $0x37f + fldenv (%esp) + add $28,%esp + # consider sse fenv as well if the cpu has XMM capability +2: call 1f +1: addl $__hwcap-1b,(%esp) + pop %edx + testl $0x02000000,(%edx) + jz 1f + # mxcsr := same rounding mode, cleared exceptions, default mask + and $0xc00,%ecx + shl $3,%ecx + or $0x1f80,%ecx + mov %ecx,4(%esp) + ldmxcsr 4(%esp) +1: ret + +.global fetestexcept +.type fetestexcept,@function +fetestexcept: + mov 4(%esp),%ecx + and $0x3f,%ecx + fnstsw %ax + # consider sse fenv as well if the cpu has XMM capability + call 1f +1: addl $__hwcap-1b,(%esp) + pop %edx + testl $0x02000000,(%edx) + jz 1f + stmxcsr 4(%esp) + or 4(%esp),%eax +1: and %ecx,%eax + ret diff --git a/src/fenv/m68k/fenv.c b/src/fenv/m68k/fenv.c new file mode 100644 index 0000000..d0658e6 --- /dev/null +++ b/src/fenv/m68k/fenv.c @@ -0,0 +1,85 @@ +#include +#include + +#if __HAVE_68881__ || __mcffpu__ + +static unsigned getsr() +{ + unsigned v; + __asm__ __volatile__ ("fmove.l %%fpsr,%0" : "=dm"(v)); + return v; +} + +static void setsr(unsigned v) +{ + __asm__ __volatile__ ("fmove.l %0,%%fpsr" : : "dm"(v)); +} + +static unsigned getcr() +{ + unsigned v; + __asm__ __volatile__ ("fmove.l %%fpcr,%0" : "=dm"(v)); + return v; +} + +static void setcr(unsigned v) +{ + __asm__ __volatile__ ("fmove.l %0,%%fpcr" : : "dm"(v)); +} + +int feclearexcept(int mask) +{ + if (mask & ~FE_ALL_EXCEPT) return -1; + setsr(getsr() & ~mask); + return 0; +} + +int feraiseexcept(int mask) +{ + if (mask & ~FE_ALL_EXCEPT) return -1; + setsr(getsr() | mask); + return 0; +} + +int fetestexcept(int mask) +{ + return getsr() & mask; +} + +int fegetround(void) +{ + return getcr() & FE_UPWARD; +} + +hidden int __fesetround(int r) +{ + setcr((getcr() & ~FE_UPWARD) | r); + return 0; +} + +int fegetenv(fenv_t *envp) +{ + envp->__control_register = getcr(); + envp->__status_register = getsr(); + __asm__ __volatile__ ("fmove.l %%fpiar,%0" + : "=dm"(envp->__instruction_address)); + return 0; +} + +int fesetenv(const fenv_t *envp) +{ + static const fenv_t default_env = { 0 }; + if (envp == FE_DFL_ENV) + envp = &default_env; + setcr(envp->__control_register); + setsr(envp->__status_register); + __asm__ __volatile__ ("fmove.l %0,%%fpiar" + : : "dm"(envp->__instruction_address)); + return 0; +} + +#else + +#include "../fenv.c" + +#endif diff --git a/src/fenv/mips/fenv-sf.c b/src/fenv/mips/fenv-sf.c new file mode 100644 index 0000000..4aa3dbf --- /dev/null +++ b/src/fenv/mips/fenv-sf.c @@ -0,0 +1,3 @@ +#ifdef __mips_soft_float +#include "../fenv.c" +#endif diff --git a/src/fenv/mips/fenv.S b/src/fenv/mips/fenv.S new file mode 100644 index 0000000..ffa9297 --- /dev/null +++ b/src/fenv/mips/fenv.S @@ -0,0 +1,72 @@ +#ifndef __mips_soft_float + +.set noreorder + +.global feclearexcept +.type feclearexcept,@function +feclearexcept: + and $4, $4, 0x7c + cfc1 $5, $31 + or $5, $5, $4 + xor $5, $5, $4 + ctc1 $5, $31 + jr $ra + li $2, 0 + +.global feraiseexcept +.type feraiseexcept,@function +feraiseexcept: + and $4, $4, 0x7c + cfc1 $5, $31 + or $5, $5, $4 + ctc1 $5, $31 + jr $ra + li $2, 0 + +.global fetestexcept +.type fetestexcept,@function +fetestexcept: + and $4, $4, 0x7c + cfc1 $2, $31 + jr $ra + and $2, $2, $4 + +.global fegetround +.type fegetround,@function +fegetround: + cfc1 $2, $31 + jr $ra + andi $2, $2, 3 + +.global __fesetround +.hidden __fesetround +.type __fesetround,@function +__fesetround: + cfc1 $5, $31 + li $6, -4 + and $5, $5, $6 + or $5, $5, $4 + ctc1 $5, $31 + jr $ra + li $2, 0 + +.global fegetenv +.type fegetenv,@function +fegetenv: + cfc1 $5, $31 + sw $5, 0($4) + jr $ra + li $2, 0 + +.global fesetenv +.type fesetenv,@function +fesetenv: + addiu $5, $4, 1 + beq $5, $0, 1f + nop + lw $5, 0($4) +1: ctc1 $5, $31 + jr $ra + li $2, 0 + +#endif diff --git a/src/fenv/mips64/fenv-sf.c b/src/fenv/mips64/fenv-sf.c new file mode 100644 index 0000000..4aa3dbf --- /dev/null +++ b/src/fenv/mips64/fenv-sf.c @@ -0,0 +1,3 @@ +#ifdef __mips_soft_float +#include "../fenv.c" +#endif diff --git a/src/fenv/mips64/fenv.S b/src/fenv/mips64/fenv.S new file mode 100644 index 0000000..d5e0a62 --- /dev/null +++ b/src/fenv/mips64/fenv.S @@ -0,0 +1,72 @@ +#ifndef __mips_soft_float + +.set noreorder + +.global feclearexcept +.type feclearexcept,@function +feclearexcept: + and $4, $4, 0x7c + cfc1 $5, $31 + or $5, $5, $4 + xor $5, $5, $4 + ctc1 $5, $31 + jr $ra + li $2, 0 + +.global feraiseexcept +.type feraiseexcept,@function +feraiseexcept: + and $4, $4, 0x7c + cfc1 $5, $31 + or $5, $5, $4 + ctc1 $5, $31 + jr $ra + li $2, 0 + +.global fetestexcept +.type fetestexcept,@function +fetestexcept: + and $4, $4, 0x7c + cfc1 $2, $31 + jr $ra + and $2, $2, $4 + +.global fegetround +.type fegetround,@function +fegetround: + cfc1 $2, $31 + jr $ra + andi $2, $2, 3 + +.global __fesetround +.hidden __fesetround +.type __fesetround,@function +__fesetround: + cfc1 $5, $31 + li $6, -4 + and $5, $5, $6 + or $5, $5, $4 + ctc1 $5, $31 + jr $ra + li $2, 0 + +.global fegetenv +.type fegetenv,@function +fegetenv: + cfc1 $5, $31 + sw $5, 0($4) + jr $ra + li $2, 0 + +.global fesetenv +.type fesetenv,@function +fesetenv: + daddiu $5, $4, 1 + beq $5, $0, 1f + nop + lw $5, 0($4) +1: ctc1 $5, $31 + jr $ra + li $2, 0 + +#endif diff --git a/src/fenv/mipsn32/fenv-sf.c b/src/fenv/mipsn32/fenv-sf.c new file mode 100644 index 0000000..4aa3dbf --- /dev/null +++ b/src/fenv/mipsn32/fenv-sf.c @@ -0,0 +1,3 @@ +#ifdef __mips_soft_float +#include "../fenv.c" +#endif diff --git a/src/fenv/mipsn32/fenv.S b/src/fenv/mipsn32/fenv.S new file mode 100644 index 0000000..563d322 --- /dev/null +++ b/src/fenv/mipsn32/fenv.S @@ -0,0 +1,71 @@ +#ifndef __mips_soft_float + +.set noreorder +.global feclearexcept +.type feclearexcept,@function +feclearexcept: + and $4, $4, 0x7c + cfc1 $5, $31 + or $5, $5, $4 + xor $5, $5, $4 + ctc1 $5, $31 + jr $ra + li $2, 0 + +.global feraiseexcept +.type feraiseexcept,@function +feraiseexcept: + and $4, $4, 0x7c + cfc1 $5, $31 + or $5, $5, $4 + ctc1 $5, $31 + jr $ra + li $2, 0 + +.global fetestexcept +.type fetestexcept,@function +fetestexcept: + and $4, $4, 0x7c + cfc1 $2, $31 + jr $ra + and $2, $2, $4 + +.global fegetround +.type fegetround,@function +fegetround: + cfc1 $2, $31 + jr $ra + andi $2, $2, 3 + +.global __fesetround +.hidden __fesetround +.type __fesetround,@function +__fesetround: + cfc1 $5, $31 + li $6, -4 + and $5, $5, $6 + or $5, $5, $4 + ctc1 $5, $31 + jr $ra + li $2, 0 + +.global fegetenv +.type fegetenv,@function +fegetenv: + cfc1 $5, $31 + sw $5, 0($4) + jr $ra + li $2, 0 + +.global fesetenv +.type fesetenv,@function +fesetenv: + addiu $5, $4, 1 + beq $5, $0, 1f + nop + lw $5, 0($4) +1: ctc1 $5, $31 + jr $ra + li $2, 0 + +#endif diff --git a/src/fenv/powerpc/fenv-sf.c b/src/fenv/powerpc/fenv-sf.c new file mode 100644 index 0000000..85bef40 --- /dev/null +++ b/src/fenv/powerpc/fenv-sf.c @@ -0,0 +1,3 @@ +#ifdef _SOFT_FLOAT +#include "../fenv.c" +#endif diff --git a/src/fenv/powerpc/fenv.S b/src/fenv/powerpc/fenv.S new file mode 100644 index 0000000..22cea21 --- /dev/null +++ b/src/fenv/powerpc/fenv.S @@ -0,0 +1,130 @@ +#ifndef _SOFT_FLOAT +.global feclearexcept +.type feclearexcept,@function +feclearexcept: + andis. 3,3,0x3e00 + /* if (r3 & FE_INVALID) r3 |= all_invalid_flags */ + andis. 0,3,0x2000 + stwu 1,-16(1) + beq- 0,1f + oris 3,3,0x01f8 + ori 3,3,0x0700 +1: + /* + * note: fpscr contains various fpu status and control + * flags and we dont check if r3 may alter other flags + * than the exception related ones + * ufpscr &= ~r3 + */ + mffs 0 + stfd 0,8(1) + lwz 9,12(1) + andc 9,9,3 + stw 9,12(1) + lfd 0,8(1) + mtfsf 255,0 + + /* return 0 */ + li 3,0 + addi 1,1,16 + blr + +.global feraiseexcept +.type feraiseexcept,@function +feraiseexcept: + andis. 3,3,0x3e00 + /* if (r3 & FE_INVALID) r3 |= software_invalid_flag */ + andis. 0,3,0x2000 + stwu 1,-16(1) + beq- 0,1f + ori 3,3,0x0400 +1: + /* fpscr |= r3 */ + mffs 0 + stfd 0,8(1) + lwz 9,12(1) + or 9,9,3 + stw 9,12(1) + lfd 0,8(1) + mtfsf 255,0 + + /* return 0 */ + li 3,0 + addi 1,1,16 + blr + +.global fetestexcept +.type fetestexcept,@function +fetestexcept: + andis. 3,3,0x3e00 + /* return r3 & fpscr */ + stwu 1,-16(1) + mffs 0 + stfd 0,8(1) + lwz 9,12(1) + addi 1,1,16 + and 3,3,9 + blr + +.global fegetround +.type fegetround,@function +fegetround: + /* return fpscr & 3 */ + stwu 1,-16(1) + mffs 0 + stfd 0,8(1) + lwz 3,12(1) + addi 1,1,16 + clrlwi 3,3,30 + blr + +.global __fesetround +.hidden __fesetround +.type __fesetround,@function +__fesetround: + /* + * note: invalid input is not checked, r3 < 4 must hold + * fpscr = (fpscr & -4U) | r3 + */ + stwu 1,-16(1) + mffs 0 + stfd 0,8(1) + lwz 9,12(1) + clrrwi 9,9,2 + or 9,9,3 + stw 9,12(1) + lfd 0,8(1) + mtfsf 255,0 + + /* return 0 */ + li 3,0 + addi 1,1,16 + blr + +.global fegetenv +.type fegetenv,@function +fegetenv: + /* *r3 = fpscr */ + mffs 0 + stfd 0,0(3) + /* return 0 */ + li 3,0 + blr + +.global fesetenv +.type fesetenv,@function +fesetenv: + cmpwi 3, -1 + bne 1f + mflr 4 + bl 2f + .zero 8 +2: mflr 3 + mtlr 4 +1: /* fpscr = *r3 */ + lfd 0,0(3) + mtfsf 255,0 + /* return 0 */ + li 3,0 + blr +#endif diff --git a/src/fenv/powerpc64/fenv.c b/src/fenv/powerpc64/fenv.c new file mode 100644 index 0000000..90dabdc --- /dev/null +++ b/src/fenv/powerpc64/fenv.c @@ -0,0 +1,69 @@ +#define _GNU_SOURCE +#include +#include + +static inline double get_fpscr_f(void) +{ + double d; + __asm__ __volatile__("mffs %0" : "=d"(d)); + return d; +} + +static inline long get_fpscr(void) +{ + return (union {double f; long i;}) {get_fpscr_f()}.i; +} + +static inline void set_fpscr_f(double fpscr) +{ + __asm__ __volatile__("mtfsf 255, %0" : : "d"(fpscr)); +} + +static void set_fpscr(long fpscr) +{ + set_fpscr_f((union {long i; double f;}) {fpscr}.f); +} + +int feclearexcept(int mask) +{ + mask &= FE_ALL_EXCEPT; + if (mask & FE_INVALID) mask |= FE_ALL_INVALID; + set_fpscr(get_fpscr() & ~mask); + return 0; +} + +int feraiseexcept(int mask) +{ + mask &= FE_ALL_EXCEPT; + if (mask & FE_INVALID) mask |= FE_INVALID_SOFTWARE; + set_fpscr(get_fpscr() | mask); + return 0; +} + +int fetestexcept(int mask) +{ + return get_fpscr() & mask & FE_ALL_EXCEPT; +} + +int fegetround(void) +{ + return get_fpscr() & 3; +} + +hidden int __fesetround(int r) +{ + set_fpscr(get_fpscr() & ~3L | r); + return 0; +} + +int fegetenv(fenv_t *envp) +{ + *envp = get_fpscr_f(); + return 0; +} + +int fesetenv(const fenv_t *envp) +{ + set_fpscr_f(envp != FE_DFL_ENV ? *envp : 0); + return 0; +} diff --git a/src/fenv/riscv64/fenv-sf.c b/src/fenv/riscv64/fenv-sf.c new file mode 100644 index 0000000..ecd3cb5 --- /dev/null +++ b/src/fenv/riscv64/fenv-sf.c @@ -0,0 +1,3 @@ +#ifndef __riscv_flen +#include "../fenv.c" +#endif diff --git a/src/fenv/riscv64/fenv.S b/src/fenv/riscv64/fenv.S new file mode 100644 index 0000000..0ea78bf --- /dev/null +++ b/src/fenv/riscv64/fenv.S @@ -0,0 +1,56 @@ +#ifdef __riscv_flen + +.global feclearexcept +.type feclearexcept, %function +feclearexcept: + csrc fflags, a0 + li a0, 0 + ret + +.global feraiseexcept +.type feraiseexcept, %function +feraiseexcept: + csrs fflags, a0 + li a0, 0 + ret + +.global fetestexcept +.type fetestexcept, %function +fetestexcept: + frflags t0 + and a0, t0, a0 + ret + +.global fegetround +.type fegetround, %function +fegetround: + frrm a0 + ret + +.global __fesetround +.type __fesetround, %function +__fesetround: + fsrm t0, a0 + li a0, 0 + ret + +.global fegetenv +.type fegetenv, %function +fegetenv: + frcsr t0 + sw t0, 0(a0) + li a0, 0 + ret + +.global fesetenv +.type fesetenv, %function +fesetenv: + li t2, -1 + li t1, 0 + beq a0, t2, 1f + lw t1, 0(a0) +1: fscsr t1 + li a0, 0 + ret + +#endif diff --git a/src/fenv/s390x/fenv.c b/src/fenv/s390x/fenv.c new file mode 100644 index 0000000..fd4e60c --- /dev/null +++ b/src/fenv/s390x/fenv.c @@ -0,0 +1,56 @@ +#include +#include + +static inline unsigned get_fpc(void) +{ + unsigned fpc; + __asm__ __volatile__("efpc %0" : "=r"(fpc)); + return fpc; +} + +static inline void set_fpc(unsigned fpc) +{ + __asm__ __volatile__("sfpc %0" :: "r"(fpc)); +} + +int feclearexcept(int mask) +{ + mask &= FE_ALL_EXCEPT; + set_fpc(get_fpc() & ~mask); + return 0; +} + +int feraiseexcept(int mask) +{ + mask &= FE_ALL_EXCEPT; + set_fpc(get_fpc() | mask); + return 0; +} + +int fetestexcept(int mask) +{ + return get_fpc() & mask & FE_ALL_EXCEPT; +} + +int fegetround(void) +{ + return get_fpc() & 3; +} + +hidden int __fesetround(int r) +{ + set_fpc(get_fpc() & ~3L | r); + return 0; +} + +int fegetenv(fenv_t *envp) +{ + *envp = get_fpc(); + return 0; +} + +int fesetenv(const fenv_t *envp) +{ + set_fpc(envp != FE_DFL_ENV ? *envp : 0); + return 0; +} diff --git a/src/fenv/sh/fenv-nofpu.c b/src/fenv/sh/fenv-nofpu.c new file mode 100644 index 0000000..b2495a6 --- /dev/null +++ b/src/fenv/sh/fenv-nofpu.c @@ -0,0 +1,3 @@ +#if !__SH_FPU_ANY__ && !__SH4__ +#include "../fenv.c" +#endif diff --git a/src/fenv/sh/fenv.S b/src/fenv/sh/fenv.S new file mode 100644 index 0000000..b3b7d66 --- /dev/null +++ b/src/fenv/sh/fenv.S @@ -0,0 +1,81 @@ +#if __SH_FPU_ANY__ || __SH4__ + +.global fegetround +.type fegetround, @function +fegetround: + sts fpscr, r0 + rts + and #3, r0 + +.global __fesetround +.hidden __fesetround +.type __fesetround, @function +__fesetround: + sts fpscr, r0 + mov #-4, r1 + and r1, r0 + or r4, r0 + lds r0, fpscr + rts + mov #0, r0 + +.global fetestexcept +.type fetestexcept, @function +fetestexcept: + sts fpscr, r0 + and r4, r0 + rts + and #0x7c, r0 + +.global feclearexcept +.type feclearexcept, @function +feclearexcept: + mov r4, r0 + and #0x7c, r0 + not r0, r4 + sts fpscr, r0 + and r4, r0 + lds r0, fpscr + rts + mov #0, r0 + +.global feraiseexcept +.type feraiseexcept, @function +feraiseexcept: + mov r4, r0 + and #0x7c, r0 + sts fpscr, r4 + or r4, r0 + lds r0, fpscr + rts + mov #0, r0 + +.global fegetenv +.type fegetenv, @function +fegetenv: + sts fpscr, r0 + mov.l r0, @r4 + rts + mov #0, r0 + +.global fesetenv +.type fesetenv, @function +fesetenv: + mov r4, r0 + cmp/eq #-1, r0 + bf 1f + + ! the default environment is complicated by the fact that we need to + ! preserve the current precision bit, which we do not know a priori + sts fpscr, r0 + mov #8, r1 + swap.w r1, r1 + bra 2f + and r1, r0 + +1: mov.l @r4, r0 ! non-default environment +2: lds r0, fpscr + rts + mov #0, r0 + +#endif diff --git a/src/fenv/x32/fenv.s b/src/fenv/x32/fenv.s new file mode 100644 index 0000000..835f23b --- /dev/null +++ b/src/fenv/x32/fenv.s @@ -0,0 +1,98 @@ +.global feclearexcept +.type feclearexcept,@function +feclearexcept: + # maintain exceptions in the sse mxcsr, clear x87 exceptions + mov %edi,%ecx + and $0x3f,%ecx + fnstsw %ax + test %eax,%ecx + jz 1f + fnclex +1: stmxcsr -8(%esp) + and $0x3f,%eax + or %eax,-8(%esp) + test %ecx,-8(%esp) + jz 1f + not %ecx + and %ecx,-8(%esp) + ldmxcsr -8(%esp) +1: xor %eax,%eax + ret + +.global feraiseexcept +.type feraiseexcept,@function +feraiseexcept: + and $0x3f,%edi + stmxcsr -8(%esp) + or %edi,-8(%esp) + ldmxcsr -8(%esp) + xor %eax,%eax + ret + +.global __fesetround +.hidden __fesetround +.type __fesetround,@function +__fesetround: + push %rax + xor %eax,%eax + mov %edi,%ecx + fnstcw (%esp) + andb $0xf3,1(%esp) + or %ch,1(%esp) + fldcw (%esp) + stmxcsr (%esp) + shl $3,%ch + andb $0x9f,1(%esp) + or %ch,1(%esp) + ldmxcsr (%esp) + pop %rcx + ret + +.global fegetround +.type fegetround,@function +fegetround: + push %rax + stmxcsr (%esp) + pop %rax + shr $3,%eax + and $0xc00,%eax + ret + +.global fegetenv +.type fegetenv,@function +fegetenv: + xor %eax,%eax + fnstenv (%edi) + stmxcsr 28(%edi) + ret + +.global fesetenv +.type fesetenv,@function +fesetenv: + xor %eax,%eax + inc %edi + jz 1f + fldenv -1(%edi) + ldmxcsr 27(%edi) + ret +1: push %rax + push %rax + pushq $0xffff + pushq $0x37f + fldenv (%esp) + pushq $0x1f80 + ldmxcsr (%esp) + add $40,%esp + ret + +.global fetestexcept +.type fetestexcept,@function +fetestexcept: + and $0x3f,%edi + push %rax + stmxcsr (%esp) + pop %rsi + fnstsw %ax + or %esi,%eax + and %edi,%eax + ret diff --git a/src/fenv/x86_64/fenv.s b/src/fenv/x86_64/fenv.s new file mode 100644 index 0000000..98d876d --- /dev/null +++ b/src/fenv/x86_64/fenv.s @@ -0,0 +1,98 @@ +.global feclearexcept +.type feclearexcept,@function +feclearexcept: + # maintain exceptions in the sse mxcsr, clear x87 exceptions + mov %edi,%ecx + and $0x3f,%ecx + fnstsw %ax + test %eax,%ecx + jz 1f + fnclex +1: stmxcsr -8(%rsp) + and $0x3f,%eax + or %eax,-8(%rsp) + test %ecx,-8(%rsp) + jz 1f + not %ecx + and %ecx,-8(%rsp) + ldmxcsr -8(%rsp) +1: xor %eax,%eax + ret + +.global feraiseexcept +.type feraiseexcept,@function +feraiseexcept: + and $0x3f,%edi + stmxcsr -8(%rsp) + or %edi,-8(%rsp) + ldmxcsr -8(%rsp) + xor %eax,%eax + ret + +.global __fesetround +.hidden __fesetround +.type __fesetround,@function +__fesetround: + push %rax + xor %eax,%eax + mov %edi,%ecx + fnstcw (%rsp) + andb $0xf3,1(%rsp) + or %ch,1(%rsp) + fldcw (%rsp) + stmxcsr (%rsp) + shl $3,%ch + andb $0x9f,1(%rsp) + or %ch,1(%rsp) + ldmxcsr (%rsp) + pop %rcx + ret + +.global fegetround +.type fegetround,@function +fegetround: + push %rax + stmxcsr (%rsp) + pop %rax + shr $3,%eax + and $0xc00,%eax + ret + +.global fegetenv +.type fegetenv,@function +fegetenv: + xor %eax,%eax + fnstenv (%rdi) + stmxcsr 28(%rdi) + ret + +.global fesetenv +.type fesetenv,@function +fesetenv: + xor %eax,%eax + inc %rdi + jz 1f + fldenv -1(%rdi) + ldmxcsr 27(%rdi) + ret +1: push %rax + push %rax + pushq $0xffff + pushq $0x37f + fldenv (%rsp) + pushq $0x1f80 + ldmxcsr (%rsp) + add $40,%rsp + ret + +.global fetestexcept +.type fetestexcept,@function +fetestexcept: + and $0x3f,%edi + push %rax + stmxcsr (%rsp) + pop %rsi + fnstsw %ax + or %esi,%eax + and %edi,%eax + ret diff --git a/tcc-patch/mxcsr.patch b/tcc-patch/mxcsr.patch new file mode 100644 index 0000000..5a9400a --- /dev/null +++ b/tcc-patch/mxcsr.patch @@ -0,0 +1,26 @@ +diff --git a/i386-asm.h b/i386-asm.h +index 65d5179..dfc5183 100644 +--- a/i386-asm.h ++++ b/i386-asm.h +@@ -447,6 +447,8 @@ ALT(DEF_ASM_OP2(psrlq, 0x0f73, 2, OPC_MODRM, OPT_IM8, OPT_MMXSSE )) + DEF_ASM_OP2(pxor, 0x0fef, 0, OPC_MODRM, OPT_EA | OPT_MMXSSE, OPT_MMXSSE ) + + /* sse */ ++ DEF_ASM_OP1(ldmxcsr, 0x0fae, 2, OPC_MODRM, OPT_EA) ++ DEF_ASM_OP1(stmxcsr, 0x0fae, 3, OPC_MODRM, OPT_EA) + DEF_ASM_OP2(movups, 0x0f10, 0, OPC_MODRM, OPT_EA | OPT_REG32, OPT_SSE ) + ALT(DEF_ASM_OP2(movups, 0x0f11, 0, OPC_MODRM, OPT_SSE, OPT_EA | OPT_REG32 )) + DEF_ASM_OP2(movaps, 0x0f28, 0, OPC_MODRM, OPT_EA | OPT_REG32, OPT_SSE ) +diff --git a/x86_64-asm.h b/x86_64-asm.h +index cb9eb16..4e03773 100644 +--- a/x86_64-asm.h ++++ b/x86_64-asm.h +@@ -483,6 +483,8 @@ ALT(DEF_ASM_OP2(psrlq, 0x0f73, 2, OPC_MODRM, OPT_IM8, OPT_MMXSSE )) + DEF_ASM_OP2(pxor, 0x0fef, 0, OPC_MODRM, OPT_EA | OPT_MMXSSE, OPT_MMXSSE ) + + /* sse */ ++ DEF_ASM_OP1(ldmxcsr, 0x0fae, 2, OPC_MODRM, OPT_EA) ++ DEF_ASM_OP1(stmxcsr, 0x0fae, 3, OPC_MODRM, OPT_EA) + DEF_ASM_OP2(movups, 0x0f10, 0, OPC_MODRM, OPT_EA | OPT_REG32, OPT_SSE ) + ALT(DEF_ASM_OP2(movups, 0x0f11, 0, OPC_MODRM, OPT_SSE, OPT_EA | OPT_REG32 )) + DEF_ASM_OP2(movaps, 0x0f28, 0, OPC_MODRM, OPT_EA | OPT_REG32, OPT_SSE )