diff options
Diffstat (limited to 'private/fp32')
186 files changed, 47808 insertions, 0 deletions
diff --git a/private/fp32/buildcrt.cmd b/private/fp32/buildcrt.cmd new file mode 100644 index 000000000..31df6e622 --- /dev/null +++ b/private/fp32/buildcrt.cmd @@ -0,0 +1,32 @@ +@set CRTLIBDEBUG= +@set CRTLIBTYPE= +@set 386_USE_LIBCMT= +@if "%1" == "DLL" (set CRTLIBTYPE=DLL) && goto blddll +@if "%1" == "Dll" (set CRTLIBTYPE=DLL) && goto blddll +@if "%1" == "dll" (set CRTLIBTYPE=DLL) && goto blddll +@if "%1" == "st" (set CRTLIBTYPE=ST) && goto bldst +@if "%1" == "ST" (set CRTLIBTYPE=ST) && goto bldst +@if "%1" == "St" (set CRTLIBTYPE=ST) && goto bldst +@if "%1" == "nt" (set CRTLIBTYPE=NT) && goto bldnt +@if "%1" == "NT" (set CRTLIBTYPE=NT) && goto bldnt +@if "%1" == "Nt" (set CRTLIBTYPE=NT) && goto bldnt +@if "%1" == "mt" set CRTLIBTYPE=MT +@if "%1" == "MT" set CRTLIBTYPE=MT +@if "%1" == "Mt" set CRTLIBTYPE=MT +@if "%CRTLIBTYPE%" == "" goto bogus + +:bldst +build %2 %3 %4 %5 conv tran +@goto done + +:blddll +build %2 %3 %4 %5 conv tran +@goto done + +:bldnt +build %2 %3 %4 %5 tran + +@goto done +:bogus +@echo Usage: BUILDFP (NT, ST, MT, or DLL) [BuildOptions] +:done diff --git a/private/fp32/conv/assrt.c b/private/fp32/conv/assrt.c new file mode 100644 index 000000000..875ac177e --- /dev/null +++ b/private/fp32/conv/assrt.c @@ -0,0 +1,30 @@ +/*** +*assrt.c - assertions needed for string conversion routines +* +* Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* Make sure that the data types used by the string conversion +* routines have the right size. If this file does not compile, +* the type definitions in cv.h should change appropriately. +* +*Revision History: +* 07-25-91 GDP written +* +*******************************************************************************/ + + +#include <cv.h> + +static void assertion_test(void) +{ + sizeof(u_char) == 1 ? 0 : 1/0, + sizeof(u_short) == 2 ? 0 : 1/0, + sizeof(u_long) == 4 ? 0 : 1/0, + sizeof(s_char) == 1 ? 0 : 1/0, + sizeof(s_short) == 2 ? 0 : 1/0, + sizeof(s_long) == 4 ? 0 : 1/0; +#ifdef _LDSUPPORT + sizeof(long double) == 10 ? 0 : 1/0; +#endif +} diff --git a/private/fp32/conv/cfin.c b/private/fp32/conv/cfin.c new file mode 100644 index 000000000..d739bf4d3 --- /dev/null +++ b/private/fp32/conv/cfin.c @@ -0,0 +1,71 @@ +/*** +*cfin.c - Encode interface for C +* +* Copyright (c) 19xx-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* +*Revision History: +* 07-20-91 GDP Ported to C from assembly +* 04-30-92 GDP use __strgtold12 and _ld12tod +* 06-22-92 GDP use new __strgtold12 interface +* +*******************************************************************************/ + +#include <string.h> +#include <cv.h> + + +#ifndef MTHREAD +static struct _flt ret; +static FLT flt = &ret; +#endif + +/* The only three conditions that this routine detects */ +#define CFIN_NODIGITS 512 +#define CFIN_OVERFLOW 128 +#define CFIN_UNDERFLOW 256 + +/* This version ignores the last two arguments (radix and scale) + * Input string should be null terminated + * len is also ignored + */ +#ifdef MTHREAD +FLT _CALLTYPE2 _fltin2(FLT flt, const char *str, int len_ignore, int scale_ignore, int radix_ignore) +#else +FLT _CALLTYPE2 _fltin(const char *str, int len_ignore, int scale_ignore, int radix_ignore) +#endif +{ + _LDBL12 ld12; + DOUBLE x; + const char *EndPtr; + unsigned flags; + int retflags = 0; + + flags = __strgtold12(&ld12, &EndPtr, str, 0, 0, 0, 0); + if (flags & SLD_NODIGITS) { + retflags |= CFIN_NODIGITS; + *(u_long *)&x = 0; + *((u_long *)&x+1) = 0; + } + else { + INTRNCVT_STATUS intrncvt; + + intrncvt = _ld12tod(&ld12, &x); + + if (flags & SLD_OVERFLOW || + intrncvt == INTRNCVT_OVERFLOW) { + retflags |= CFIN_OVERFLOW; + } + if (flags & SLD_UNDERFLOW || + intrncvt == INTRNCVT_UNDERFLOW) { + retflags |= CFIN_UNDERFLOW; + } + } + + flt->flags = retflags; + flt->nbytes = EndPtr - str; + flt->dval = *(double *)&x; + + return flt; +} diff --git a/private/fp32/conv/cfout.c b/private/fp32/conv/cfout.c new file mode 100644 index 000000000..559efc7d6 --- /dev/null +++ b/private/fp32/conv/cfout.c @@ -0,0 +1,129 @@ +/*** +*cfout.c - Encode interface for C +* +* Copyright (c) 19xx-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* +*Revision History: +* 07-20-91 GDP Ported to C from assembly +* 04-30-92 GDP Added _dtold routine (moved here from ldtod.c) +* 05-14-92 GDP NDIGITS is now 17 (instead of 16) +* 06-18-92 GDP Use double instead of DOUBLE to remove C8 warning +* +*******************************************************************************/ + + +#include <string.h> +#include <cv.h> + +#define NDIGITS 17 + +void __dtold(_LDOUBLE *pld, double *px); + + +#ifndef MTHREAD +static struct _strflt ret; +static FOS fos; +#endif + +#ifdef MTHREAD +STRFLT _CALLTYPE2 _fltout2(double x, STRFLT flt, char *resultstr) +{ + _LDOUBLE ld; + FOS autofos; + + __dtold(&ld, &x); + flt->flag = $I10_OUTPUT(ld,NDIGITS,0,&autofos); + flt->sign = autofos.sign; + flt->decpt = autofos.exp; + strcpy(resultstr,autofos.man); + flt->mantissa = resultstr; + + return flt; +} + +#else + +STRFLT _CALLTYPE2 _fltout(double x) +{ + _LDOUBLE ld; + + __dtold(&ld, &x); + ret.flag = $I10_OUTPUT(ld,NDIGITS,0,&fos); + ret.sign = fos.sign; + ret.decpt = fos.exp; + ret.mantissa = fos.man; + + return &ret; +} + +#endif + + + + +/*** +* __dtold - convert a double into a _LDOUBLE +* +*Purpose: Use together with i10_output() to get string conversion +* for double +* +*Entry: double *px +* +*Exit: the corresponding _LDOUBLE value is returned in *pld +* +*Exceptions: +* +*******************************************************************************/ + +void __dtold(_LDOUBLE *pld, double *px) +{ + u_short exp; + u_short sign; + u_long manhi, manlo; + u_long msb = MSB_ULONG; + u_short ldexp = 0; + + exp = (*U_SHORT4_D(px) & (u_short)0x7ff0) >> 4; + sign = *U_SHORT4_D(px) & (u_short)0x8000; + manhi = *UL_HI_D(px) & 0xfffff; + manlo = *UL_LO_D(px); + + switch (exp) { + case D_MAXEXP: + ldexp = LD_MAXEXP; + break; + case 0: + /* check for zero */ + if (manhi == 0 && manlo == 0) { + *UL_MANHI_LD(pld) = 0; + *UL_MANLO_LD(pld) = 0; + *U_EXP_LD(pld) = 0; + return; + } + /* we have a denormal -- we'll normalize later */ + ldexp = (u_short) ((s_short)exp - D_BIAS + LD_BIAS + 1); + msb = 0; + break; + default: + exp -= D_BIAS; + ldexp = (u_short) ((s_short)exp + LD_BIAS); + break; + } + + *UL_MANHI_LD(pld) = msb | manhi << 11 | manlo >> 21; + *UL_MANLO_LD(pld) = manlo << 11; + + /* normalize if necessary */ + while ((*UL_MANHI_LD(pld) & MSB_ULONG) == 0) { + /* shift left */ + *UL_MANHI_LD(pld) = *UL_MANHI_LD(pld) << 1 | + (MSB_ULONG & *UL_MANLO_LD(pld) ? 1: 0); + (*UL_MANLO_LD(pld)) <<= 1; + ldexp --; + } + + *U_EXP_LD(pld) = sign | ldexp; + +} diff --git a/private/fp32/conv/constpow.c b/private/fp32/conv/constpow.c new file mode 100644 index 000000000..eabca4865 --- /dev/null +++ b/private/fp32/conv/constpow.c @@ -0,0 +1,158 @@ +/*** +*constpow.c - constant powers of ten +* +* Copyright (c) 19xx-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* Provide powers of ten in long double form: +* 10^(2^i), i=0,1,2,... +* +*Revision History: +* 7-17-91 GDP Initial version (ported from assembly) +* +*******************************************************************************/ + + +#include <cv.h> + +/* Format: A 10 byte long double + 2 bytes of extra precision + * If the extra precision is desired, the 10-byte long double + * should be "unrounded" first. + * This may change in later versions + */ + +#ifdef L_END + +_LDBL12 _pow10pos[] = { + /*P0001*/ {{0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xA0,0x02,0x40}}, + /*P0002*/ {{0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xC8,0x05,0x40}}, + /*P0003*/ {{0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xFA,0x08,0x40}}, + /*P0004*/ {{0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x40,0x9C,0x0C,0x40}}, + /*P0005*/ {{0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x50,0xC3,0x0F,0x40}}, + /*P0006*/ {{0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x24,0xF4,0x12,0x40}}, + /*P0007*/ {{0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x80,0x96,0x98,0x16,0x40}}, + /*P0008*/ {{0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x20,0xBC,0xBE,0x19,0x40}}, + /*P0016*/ {{0x00,0x00, 0x00,0x00,0x00,0x04,0xBF,0xC9,0x1B,0x8E,0x34,0x40}}, + /*P0024*/ {{0x00,0x00, 0x00,0xA1,0xED,0xCC,0xCE,0x1B,0xC2,0xD3,0x4E,0x40}}, + /*P0032*/ {{0x20,0xF0, 0x9E,0xB5,0x70,0x2B,0xA8,0xAD,0xC5,0x9D,0x69,0x40}}, + /*P0040*/ {{0xD0,0x5D, 0xFD,0x25,0xE5,0x1A,0x8E,0x4F,0x19,0xEB,0x83,0x40}}, + /*P0048*/ {{0x71,0x96, 0xD7,0x95,0x43,0x0E,0x05,0x8D,0x29,0xAF,0x9E,0x40}}, + /*P0056*/ {{0xF9,0xBF, 0xA0,0x44,0xED,0x81,0x12,0x8F,0x81,0x82,0xB9,0x40}}, + /*P0064*/ {{0xBF,0x3C, 0xD5,0xA6,0xCF,0xFF,0x49,0x1F,0x78,0xC2,0xD3,0x40}}, + /*P0128*/ {{0x6F,0xC6, 0xE0,0x8C,0xE9,0x80,0xC9,0x47,0xBA,0x93,0xA8,0x41}}, + /*P0192*/ {{0xBC,0x85, 0x6B,0x55,0x27,0x39,0x8D,0xF7,0x70,0xE0,0x7C,0x42}}, + /*P0256*/ {{0xBC,0xDD, 0x8E,0xDE,0xF9,0x9D,0xFB,0xEB,0x7E,0xAA,0x51,0x43}}, + /*P0320*/ {{0xA1,0xE6, 0x76,0xE3,0xCC,0xF2,0x29,0x2F,0x84,0x81,0x26,0x44}}, + /*P0384*/ {{0x28,0x10, 0x17,0xAA,0xF8,0xAE,0x10,0xE3,0xC5,0xC4,0xFA,0x44}}, + /*P0448*/ {{0xEB,0xA7, 0xD4,0xF3,0xF7,0xEB,0xE1,0x4A,0x7A,0x95,0xCF,0x45}}, + /*P0512*/ {{0x65,0xCC, 0xC7,0x91,0x0E,0xA6,0xAE,0xA0,0x19,0xE3,0xA3,0x46}}, + /*P1024*/ {{0x0D,0x65, 0x17,0x0C,0x75,0x81,0x86,0x75,0x76,0xC9,0x48,0x4D}}, + /*P1536*/ {{0x58,0x42, 0xE4,0xA7,0x93,0x39,0x3B,0x35,0xB8,0xB2,0xED,0x53}}, + /*P2048*/ {{0x4D,0xA7, 0xE5,0x5D,0x3D,0xC5,0x5D,0x3B,0x8B,0x9E,0x92,0x5A}}, + /*P2560*/ {{0xFF,0x5D, 0xA6,0xF0,0xA1,0x20,0xC0,0x54,0xA5,0x8C,0x37,0x61}}, + /*P3072*/ {{0xD1,0xFD, 0x8B,0x5A,0x8B,0xD8,0x25,0x5D,0x89,0xF9,0xDB,0x67}}, + /*P3584*/ {{0xAA,0x95, 0xF8,0xF3,0x27,0xBF,0xA2,0xC8,0x5D,0xDD,0x80,0x6E}}, + /*P4096*/ {{0x4C,0xC9, 0x9B,0x97,0x20,0x8A,0x02,0x52,0x60,0xC4,0x25,0x75}} +}; + +_LDBL12 _pow10neg[] = { + /*N0001*/ {{0xCD,0xCC, 0xCD,0xCC,0xCC,0xCC,0xCC,0xCC,0xCC,0xCC,0xFB,0x3F}}, + /*N0002*/ {{0x71,0x3D, 0x0A,0xD7,0xA3,0x70,0x3D,0x0A,0xD7,0xA3,0xF8,0x3F}}, + /*N0003*/ {{0x5A,0x64, 0x3B,0xDF,0x4F,0x8D,0x97,0x6E,0x12,0x83,0xF5,0x3F}}, + /*N0004*/ {{0xC3,0xD3, 0x2C,0x65,0x19,0xE2,0x58,0x17,0xB7,0xD1,0xF1,0x3F}}, + /*N0005*/ {{0xD0,0x0F, 0x23,0x84,0x47,0x1B,0x47,0xAC,0xC5,0xA7,0xEE,0x3F}}, + /*N0006*/ {{0x40,0xA6, 0xB6,0x69,0x6C,0xAF,0x05,0xBD,0x37,0x86,0xEB,0x3F}}, + /*N0007*/ {{0x33,0x3D, 0xBC,0x42,0x7A,0xE5,0xD5,0x94,0xBF,0xD6,0xE7,0x3F}}, + /*N0008*/ {{0xC2,0xFD, 0xFD,0xCE,0x61,0x84,0x11,0x77,0xCC,0xAB,0xE4,0x3F}}, + /*N0016*/ {{0x2F,0x4C, 0x5B,0xE1,0x4D,0xC4,0xBE,0x94,0x95,0xE6,0xC9,0x3F}}, + /*N0024*/ {{0x92,0xC4, 0x53,0x3B,0x75,0x44,0xCD,0x14,0xBE,0x9A,0xAF,0x3F}}, + /*N0032*/ {{0xDE,0x67, 0xBA,0x94,0x39,0x45,0xAD,0x1E,0xB1,0xCF,0x94,0x3F}}, + /*N0040*/ {{0x24,0x23, 0xC6,0xE2,0xBC,0xBA,0x3B,0x31,0x61,0x8B,0x7A,0x3F}}, + /*N0048*/ {{0x61,0x55, 0x59,0xC1,0x7E,0xB1,0x53,0x7C,0x12,0xBB,0x5F,0x3F}}, + /*N0056*/ {{0xD7,0xEE, 0x2F,0x8D,0x06,0xBE,0x92,0x85,0x15,0xFB,0x44,0x3F}}, + /*N0064*/ {{0x24,0x3F, 0xA5,0xE9,0x39,0xA5,0x27,0xEA,0x7F,0xA8,0x2A,0x3F}}, + /*N0128*/ {{0x7D,0xAC, 0xA1,0xE4,0xBC,0x64,0x7C,0x46,0xD0,0xDD,0x55,0x3E}}, + /*N0192*/ {{0x63,0x7B, 0x06,0xCC,0x23,0x54,0x77,0x83,0xFF,0x91,0x81,0x3D}}, + /*N0256*/ {{0x91,0xFA, 0x3A,0x19,0x7A,0x63,0x25,0x43,0x31,0xC0,0xAC,0x3C}}, + /*N0320*/ {{0x21,0x89, 0xD1,0x38,0x82,0x47,0x97,0xB8,0x00,0xFD,0xD7,0x3B}}, + /*N0384*/ {{0xDC,0x88, 0x58,0x08,0x1B,0xB1,0xE8,0xE3,0x86,0xA6,0x03,0x3B}}, + /*N0448*/ {{0xC6,0x84, 0x45,0x42,0x07,0xB6,0x99,0x75,0x37,0xDB,0x2E,0x3A}}, + /*N0512*/ {{0x33,0x71, 0x1C,0xD2,0x23,0xDB,0x32,0xEE,0x49,0x90,0x5A,0x39}}, + /*N1024*/ {{0xA6,0x87, 0xBE,0xC0,0x57,0xDA,0xA5,0x82,0xA6,0xA2,0xB5,0x32}}, + /*N1536*/ {{0xE2,0x68, 0xB2,0x11,0xA7,0x52,0x9F,0x44,0x59,0xB7,0x10,0x2C}}, + /*N2048*/ {{0x25,0x49, 0xE4,0x2D,0x36,0x34,0x4F,0x53,0xAE,0xCE,0x6B,0x25}}, + /*N2560*/ {{0x8F,0x59, 0x04,0xA4,0xC0,0xDE,0xC2,0x7D,0xFB,0xE8,0xC6,0x1E}}, + /*N3072*/ {{0x9E,0xE7, 0x88,0x5A,0x57,0x91,0x3C,0xBF,0x50,0x83,0x22,0x18}}, + /*N3584*/ {{0x4E,0x4B, 0x65,0x62,0xFD,0x83,0x8F,0xAF,0x06,0x94,0x7D,0x11}}, + /*N4096*/ {{0xE4,0x2D, 0xDE,0x9F,0xCE,0xD2,0xC8,0x04,0xDD,0xA6,0xD8,0x0A}} +}; + +#endif + +#ifdef B_END + +_LDBL12 _pow10pos[] = { + /*P0001*/ {{0x40,0x02,0xA0,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00}}, + /*P0002*/ {{0x40,0x05,0xC8,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00}}, + /*P0003*/ {{0x40,0x08,0xFA,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00}}, + /*P0004*/ {{0x40,0x0C,0x9C,0x40,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00}}, + /*P0005*/ {{0x40,0x0F,0xC3,0x50,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00}}, + /*P0006*/ {{0x40,0x12,0xF4,0x24,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00}}, + /*P0007*/ {{0x40,0x16,0x98,0x96,0x80,0x00,0x00,0x00,0x00,0x00, 0x00,0x00}}, + /*P0008*/ {{0x40,0x19,0xBE,0xBC,0x20,0x00,0x00,0x00,0x00,0x00, 0x00,0x00}}, + /*P0016*/ {{0x40,0x34,0x8E,0x1B,0xC9,0xBF,0x04,0x00,0x00,0x00, 0x00,0x00}}, + /*P0024*/ {{0x40,0x4E,0xD3,0xC2,0x1B,0xCE,0xCC,0xED,0xA1,0x00, 0x00,0x00}}, + /*P0032*/ {{0x40,0x69,0x9D,0xC5,0xAD,0xA8,0x2B,0x70,0xB5,0x9E, 0xF0,0x20}}, + /*P0040*/ {{0x40,0x83,0xEB,0x19,0x4F,0x8E,0x1A,0xE5,0x25,0xFD, 0x5D,0xD0}}, + /*P0048*/ {{0x40,0x9E,0xAF,0x29,0x8D,0x05,0x0E,0x43,0x95,0xD7, 0x96,0x71}}, + /*P0056*/ {{0x40,0xB9,0x82,0x81,0x8F,0x12,0x81,0xED,0x44,0xA0, 0xBF,0xF9}}, + /*P0064*/ {{0x40,0xD3,0xC2,0x78,0x1F,0x49,0xFF,0xCF,0xA6,0xD5, 0x3C,0xBF}}, + /*P0128*/ {{0x41,0xA8,0x93,0xBA,0x47,0xC9,0x80,0xE9,0x8C,0xE0, 0xC6,0x6F}}, + /*P0192*/ {{0x42,0x7C,0xE0,0x70,0xF7,0x8D,0x39,0x27,0x55,0x6B, 0x85,0xBC}}, + /*P0256*/ {{0x43,0x51,0xAA,0x7E,0xEB,0xFB,0x9D,0xF9,0xDE,0x8E, 0xDD,0xBC}}, + /*P0320*/ {{0x44,0x26,0x81,0x84,0x2F,0x29,0xF2,0xCC,0xE3,0x76, 0xE6,0xA1}}, + /*P0384*/ {{0x44,0xFA,0xC4,0xC5,0xE3,0x10,0xAE,0xF8,0xAA,0x17, 0x10,0x28}}, + /*P0448*/ {{0x45,0xCF,0x95,0x7A,0x4A,0xE1,0xEB,0xF7,0xF3,0xD4, 0xA7,0xEB}}, + /*P0512*/ {{0x46,0xA3,0xE3,0x19,0xA0,0xAE,0xA6,0x0E,0x91,0xC7, 0xCC,0x65}}, + /*P1024*/ {{0x4D,0x48,0xC9,0x76,0x75,0x86,0x81,0x75,0x0C,0x17, 0x65,0x0D}}, + /*P1536*/ {{0x53,0xED,0xB2,0xB8,0x35,0x3B,0x39,0x93,0xA7,0xE4, 0x42,0x58}}, + /*P2048*/ {{0x5A,0x92,0x9E,0x8B,0x3B,0x5D,0xC5,0x3D,0x5D,0xE5, 0xA7,0x4D}}, + /*P2560*/ {{0x61,0x37,0x8C,0xA5,0x54,0xC0,0x20,0xA1,0xF0,0xA6, 0x5D,0xFF}}, + /*P3072*/ {{0x67,0xDB,0xF9,0x89,0x5D,0x25,0xD8,0x8B,0x5A,0x8B, 0xFD,0xD1}}, + /*P3584*/ {{0x6E,0x80,0xDD,0x5D,0xC8,0xA2,0xBF,0x27,0xF3,0xF8, 0x95,0xAA}}, + /*P4096*/ {{0x75,0x25,0xC4,0x60,0x52,0x02,0x8A,0x20,0x97,0x9B, 0xC9,0x4C}} +}; + +_LDBL12 _pow10neg[] = { + /*N0001*/ {{0x3F,0xFB,0xCC,0xCC,0xCC,0xCC,0xCC,0xCC,0xCC,0xCD, 0xCC,0xCD}}, + /*N0002*/ {{0x3F,0xF8,0xA3,0xD7,0x0A,0x3D,0x70,0xA3,0xD7,0x0A, 0x3D,0x71}}, + /*N0003*/ {{0x3F,0xF5,0x83,0x12,0x6E,0x97,0x8D,0x4F,0xDF,0x3B, 0x64,0x5A}}, + /*N0004*/ {{0x3F,0xF1,0xD1,0xB7,0x17,0x58,0xE2,0x19,0x65,0x2C, 0xD3,0xC3}}, + /*N0005*/ {{0x3F,0xEE,0xA7,0xC5,0xAC,0x47,0x1B,0x47,0x84,0x23, 0x0F,0xD0}}, + /*N0006*/ {{0x3F,0xEB,0x86,0x37,0xBD,0x05,0xAF,0x6C,0x69,0xB6, 0xA6,0x40}}, + /*N0007*/ {{0x3F,0xE7,0xD6,0xBF,0x94,0xD5,0xE5,0x7A,0x42,0xBC, 0x3D,0x33}}, + /*N0008*/ {{0x3F,0xE4,0xAB,0xCC,0x77,0x11,0x84,0x61,0xCE,0xFD, 0xFD,0xC2}}, + /*N0016*/ {{0x3F,0xC9,0xE6,0x95,0x94,0xBE,0xC4,0x4D,0xE1,0x5B, 0x4C,0x2F}}, + /*N0024*/ {{0x3F,0xAF,0x9A,0xBE,0x14,0xCD,0x44,0x75,0x3B,0x53, 0xC4,0x92}}, + /*N0032*/ {{0x3F,0x94,0xCF,0xB1,0x1E,0xAD,0x45,0x39,0x94,0xBA, 0x67,0xDE}}, + /*N0040*/ {{0x3F,0x7A,0x8B,0x61,0x31,0x3B,0xBA,0xBC,0xE2,0xC6, 0x23,0x24}}, + /*N0048*/ {{0x3F,0x5F,0xBB,0x12,0x7C,0x53,0xB1,0x7E,0xC1,0x59, 0x55,0x61}}, + /*N0056*/ {{0x3F,0x44,0xFB,0x15,0x85,0x92,0xBE,0x06,0x8D,0x2F, 0xEE,0xD7}}, + /*N0064*/ {{0x3F,0x2A,0xA8,0x7F,0xEA,0x27,0xA5,0x39,0xE9,0xA5, 0x3F,0x24}}, + /*N0128*/ {{0x3E,0x55,0xDD,0xD0,0x46,0x7C,0x64,0xBC,0xE4,0xA1, 0xAC,0x7D}}, + /*N0192*/ {{0x3D,0x81,0x91,0xFF,0x83,0x77,0x54,0x23,0xCC,0x06, 0x7B,0x63}}, + /*N0256*/ {{0x3C,0xAC,0xC0,0x31,0x43,0x25,0x63,0x7A,0x19,0x3A, 0xFA,0x91}}, + /*N0320*/ {{0x3B,0xD7,0xFD,0x00,0xB8,0x97,0x47,0x82,0x38,0xD1, 0x89,0x21}}, + /*N0384*/ {{0x3B,0x03,0xA6,0x86,0xE3,0xE8,0xB1,0x1B,0x08,0x58, 0x88,0xDC}}, + /*N0448*/ {{0x3A,0x2E,0xDB,0x37,0x75,0x99,0xB6,0x07,0x42,0x45, 0x84,0xC6}}, + /*N0512*/ {{0x39,0x5A,0x90,0x49,0xEE,0x32,0xDB,0x23,0xD2,0x1C, 0x71,0x33}}, + /*N1024*/ {{0x32,0xB5,0xA2,0xA6,0x82,0xA5,0xDA,0x57,0xC0,0xBE, 0x87,0xA6}}, + /*N1536*/ {{0x2C,0x10,0xB7,0x59,0x44,0x9F,0x52,0xA7,0x11,0xB2, 0x68,0xE2}}, + /*N2048*/ {{0x25,0x6B,0xCE,0xAE,0x53,0x4F,0x34,0x36,0x2D,0xE4, 0x49,0x25}}, + /*N2560*/ {{0x1E,0xC6,0xE8,0xFB,0x7D,0xC2,0xDE,0xC0,0xA4,0x04, 0x59,0x8F}}, + /*N3072*/ {{0x18,0x22,0x83,0x50,0xBF,0x3C,0x91,0x57,0x5A,0x88, 0xE7,0x9E}}, + /*N3584*/ {{0x11,0x7D,0x94,0x06,0xAF,0x8F,0x83,0xFD,0x62,0x65, 0x4B,0x4E}}, + /*N4096*/ {{0x0A,0xD8,0xA6,0xDD,0x04,0xC8,0xD2,0xCE,0x9F,0xDE, 0x2D,0xE4}} +}; + +#endif diff --git a/private/fp32/conv/cvt.c b/private/fp32/conv/cvt.c new file mode 100644 index 000000000..5ef9ace36 --- /dev/null +++ b/private/fp32/conv/cvt.c @@ -0,0 +1,739 @@ +/*** +*cvt.c - C floating-point output conversions +* +* Copyright (c) 1987-89, Microsoft Corporation +* +*Purpose: +* contains routines for performing %e, %f, and %g output conversions +* for printf, etc. +* +* routines include _cfltcvt(), _cftoe(), _cftof(), _cftog(), _fassign(), +* _positive(), _cropzeros(), _forcdecpt() +* +*Revision History: +* 04-18-84 RN author +* 01-15-87 BCM corrected processing of %g formats (to handle precision +* as the maximum number of signifcant digits displayed) +* 03-24-87 BCM Evaluation Issues: (fccvt.obj version for ?LIBFA) +* ------------------ +* SDS - no problem +* GD/TS : +* char g_fmt = 0; (local, initialized) +* int g_magnitude =0; (local, initialized) +* char g_round_expansion = 0; (local, initialized) +* STRFLT g_pflt; (local, uninitialized) +* other INIT : +* ALTMATH __fpmath() initialization (perhaps) +* TERM - nothing +* 10-22-87 BCM changes for OS/2 Support Library - +* including elimination of g_... static variables +* in favor of stack-based variables & function arguments +* under MTHREAD switch; changed interfaces to _cfto? routines +* 01-15-88 BCM remove IBMC20 switches; use only memmove, not memcpy; +* use just MTHREAD switch, not SS_NEQ_DGROUP +* 06-13-88 WAJ Fixed %.1g processing for small x +* 08-02-88 WAJ Made changes to _fassign() for new input(). +* 03-09-89 WAJ Added some long double support. +* 06-05-89 WAJ Made changes for C6. LDOUBLE => long double +* 06-12-89 WAJ Renamed this file from cvtn.c to cvt.c +* 11-02-89 WAJ Removed register.h +* 06-28-90 WAJ Removed fars. +* 11-15-90 WAJ Added _cdecl where needed. Also "pascal" => "_pascal". +* 09-12-91 GDP _cdecl=>_CALLTYPE2 _pascal=>_CALLTYPE5 near=>_NEAR +* 04-30-92 GDP Removed floating point code. Instead used S/W routines +* (_atodbl, _atoflt _atoldbl), so that to avoid generation +* of IEEE exceptions from the lib code. +* 03-11-93 JWM Added minimal support for _INTL decimal point - one byte only! +* 07-16-93 SRW ALPHA Merge +* +*******************************************************************************/ + +#include <ctype.h> +#include <string.h> +#include <math.h> +#include <cv.h> +#include <nlsint.h> + +#ifdef i386 +// Uncomment this for enabling 10-byte long double string conversions +// #define LONG_DOUBLE +#endif + + +/* this routine resides in the crt32 tree */ +extern void _fptostr(char *buf, int digits, STRFLT pflt); + + +static void _CALLTYPE5 _shift( char *s, int dist ); + +#ifdef MTHREAD + static char * _cftoe2( char * buf, int ndec, int caps, STRFLT pflt, char g_fmt ); + static char * _cftof2( char * buf, int ndec, STRFLT pflt, char g_fmt ); + +#else /* not MTHREAD */ + static char * _cftoe_g( double * pvalue, char * buf, int ndec, int caps ); + static char * _cftof_g( double * pvalue, char * buf, int ndec ); +#endif /* not MTHREAD */ + +/*** +*_forcdecpt(buffer) - force a decimal point in floating-point output +*Purpose: +* force a decimal point in floating point output. we are only called if '#' +* flag is given and precision is 0; so we know the number has no '.'. insert +* the '.' and move everybody else back one position, until '\0' seen +* +* side effects: futzes around with the buffer, trying to insert a '.' +* after the initial string of digits. the first char can usually be +* skipped since it will be a digit or a '-'. but in the 0-precision case, +* the number could start with 'e' or 'E', so we'd want the '.' before the +* exponent in that case. +* +*Entry: +* buffer = (char *) pointer to buffer to modify +* +*Exit: +* returns : (void) +* +*Exceptions: +*******************************************************************************/ + +void _CALLTYPE2 _forcdecpt( char * buffer ) +{ +char holdchar; +char nextchar; + + if (tolower(*buffer) != 'e'){ + do { + buffer++; + } + while (isdigit(*buffer)); + } + + holdchar = *buffer; + +#ifdef _INTL + *buffer++ = *_decimal_point; +#else + *buffer++ = '.'; +#endif + + do { + nextchar = *buffer; + *buffer = holdchar; + holdchar = nextchar; + } + + while(*buffer++); +} + + +/*** +*_cropzeros(buffer) - removes trailing zeros from floating-point output +*Purpose: +* removes trailing zeros (after the '.') from floating-point output; +* called only when we're doing %g format, there's no '#' flag, and +* precision is non-zero. plays around with the buffer, looking for +* trailing zeros. when we find them, then we move everbody else forward +* so they overlay the zeros. if we eliminate the entire fraction part, +* then we overlay the decimal point ('.'), too. +* +* side effects: changes the buffer from +* [-] digit [digit...] [ . [digits...] [0...] ] [(exponent part)] +* to +* [-] digit [digit...] [ . digit [digits...] ] [(exponent part)] +* or +* [-] digit [digit...] [(exponent part)] +* +*Entry: +* buffer = (char *) pointer to buffer to modify +* +*Exit: +* returns : (void) +* +*Exceptions: +*******************************************************************************/ + +void _CALLTYPE2 _cropzeros( char * buf ) +{ +char *stop; + +#ifdef _INTL + while (*buf && *buf != *_decimal_point) +#else + while (*buf && *buf != '.') +#endif + buf++; + + if (*buf++) { + while (*buf && *buf != 'e' && *buf != 'E') + buf++; + + stop = buf--; + + while (*buf == '0') + buf--; + +#ifdef _INTL + if (*buf == *_decimal_point) +#else + if (*buf == '.') +#endif + buf--; + + while( (*++buf = *stop++) != '\0' ); + } +} + + +int _CALLTYPE2 _positive( double * arg ) +{ + return( (*arg >= 0.0) ); +} + + +void _CALLTYPE2 _fassign( int flag, char * argument, char * number ) +{ + + FLOAT floattemp; + DOUBLE doubletemp; + +#ifdef LONG_DOUBLE + _LDOUBLE longtemp; + + switch( flag ){ + case 2: + _atoldbl(&longtemp, number ); + *(_LDOUBLE UNALIGNED *)argument = longtemp; + break; + + case 1: + _atodbl( &doubletemp, number ); + *(DOUBLE UNALIGNED *)argument = doubletemp; + break; + + default: + _atoflt( &floattemp, number ); + *(FLOAT UNALIGNED *)argument = floattemp; + } + +#else /* not LONG_DOUBLE */ + if (flag) { + + _atodbl( &doubletemp, number ); +#if defined(_ALPHA_) && !defined(_MSC_VER) + /* + * Alpha acc does not support unaligned double yet. + */ + ((long UNALIGNED *)argument)[0] = ((long *)&doubletemp)[0]; + ((long UNALIGNED *)argument)[1] = ((long *)&doubletemp)[1]; +#else + *(DOUBLE UNALIGNED *)argument = doubletemp; +#endif + } else { + _atoflt( &floattemp, number ); +#if defined(_ALPHA_) && !defined(_MSC_VER) + /* + * Alpha acc does not support unaligned float yet. + */ + *((long UNALIGNED *)argument) = *((long *)&floattemp); +#else + *(FLOAT UNALIGNED *)argument = floattemp; +#endif + } +#endif /* not LONG_DOUBLE */ +} + + +#ifndef MTHREAD + static char g_fmt = 0; + static int g_magnitude = 0; + static char g_round_expansion = 0; + static STRFLT g_pflt; +#endif + + +/* + * Function name: _cftoe + * + * Arguments: pvalue - double * pointer + * buf - char * pointer + * ndec - int + * caps - int + * + * Description: _cftoe converts the double pointed to by pvalue to a null + * terminated string of ASCII digits in the c language + * printf %e format, nad returns a pointer to the result. + * This format has the form [-]d.ddde(+/-)ddd, where there + * will be ndec digits following the decimal point. If + * ndec <= 0, no decimal point will appear. The low order + * digit is rounded. If caps is nonzero then the exponent + * will appear as E(+/-)ddd. + * + * Side Effects: the buffer 'buf' is assumed to have a minimum length + * of CVTBUFSIZE (defined in cvt.h) and the routines will + * not write over this size. + * + * Copyright: written R.K. Wyss, Microsoft, Sept. 9, 1983 + * copyright (c) Microsoft Corp. 1983 + * + * History: + * + */ + +#ifdef MTHREAD + static char * _cftoe2( char * buf, int ndec, int caps, STRFLT pflt, char g_fmt ) +#else + char * _CALLTYPE2 _cftoe( double * pvalue, char * buf, int ndec, int caps ) +#endif +{ +#ifndef MTHREAD + STRFLT pflt; +#endif + +char *p; +int exp; + +#ifdef MTHREAD + int g_magnitude = pflt->decpt - 1; +#endif + + /* first convert the value */ + + /* place the output in the buffer and round. Leave space in the buffer + * for the '-' sign (if any) and the decimal point (if any) + */ + + if (g_fmt) { +#ifndef MTHREAD + pflt = g_pflt; +#endif + /* shift it right one place if nec. for decimal point */ + + p = buf + (pflt->sign == '-'); + _shift(p, (ndec > 0)); + } +#ifndef MTHREAD + else { + pflt = _fltout(*pvalue); + _fptostr(buf + (pflt->sign == '-') + (ndec > 0), ndec + 1, pflt); + } +#endif + + + /* now fix the number up to be in e format */ + + p = buf; + + /* put in negative sign if needed */ + + if (pflt->sign == '-') + *p++ = '-'; + + /* put in decimal point if needed. Copy the first digit to the place + * left for it and put the decimal point in its place + */ + + if (ndec > 0) { + *p = *(p+1); +#ifdef _INTL + *(++p) = *_decimal_point; +#else + *(++p) = '.'; +#endif + } + + /* find the end of the string and attach the exponent field */ + + p = strcpy(p+ndec+(!g_fmt), "e+000"); + + /* adjust exponent indicator according to caps flag and increment + * pointer to point to exponent sign + */ + + if (caps) + *p = 'E'; + + p++; + + /* if mantissa is zero, then the number is 0 and we are done; otherwise + * adjust the exponent sign (if necessary) and value. + */ + + if (*pflt->mantissa != '0') { + + /* check to see if exponent is negative; if so adjust exponent sign and + * exponent value. + */ + + if( (exp = pflt->decpt - 1) < 0 ) { + exp = -exp; + *p = '-'; + } + + p++; + + if (exp >= 100) { + *p += (char)(exp / 100); + exp %= 100; + } + p++; + + if (exp >= 10) { + *p += (char)(exp / 10); + exp %= 10; + } + + *++p += (char)exp; + } + + return(buf); +} + + +#ifdef MTHREAD + +char * _CALLTYPE2 _cftoe( double * pvalue, char * buf, int ndec, int caps ) +{ +struct _strflt retstrflt; +char resstr[21]; +STRFLT pflt = &retstrflt; + + _fltout2(*pvalue, (struct _strflt *)&retstrflt, + (char *)resstr); + _fptostr(buf + (pflt->sign == '-') + (ndec > 0), ndec + 1, pflt); + _cftoe2(buf, ndec, caps, pflt, /* g_fmt = */ 0); + + return( buf ); +} + +#else /* not MTHREAD */ + +static char * _cftoe_g( double * pvalue, char * buf, int ndec, int caps ) +{ + char *res; + g_fmt = 1; + res = _cftoe(pvalue, buf, ndec, caps); + g_fmt = 0; + return (res); +} + +#endif /* not MTHREAD */ + + +#ifdef MTHREAD +static char * _cftof2( char * buf, int ndec, STRFLT pflt, char g_fmt ) + +#else +char * _CALLTYPE2 _cftof( double * pvalue, char * buf, int ndec ) +#endif + +{ +#ifndef MTHREAD +STRFLT pflt; +#endif + +char *p; +char addzero = 0; + +#ifdef MTHREAD +int g_magnitude = pflt->decpt - 1; +#endif + + + /* first convert the value */ + + /* place the output in the users buffer and round. Save space for + * the minus sign now if it will be needed + */ + + if (g_fmt) { +#ifndef MTHREAD + pflt = g_pflt; +#endif + + p = buf + (pflt->sign == '-'); + if (g_magnitude == ndec) { + char *q = p + g_magnitude; + *q++ = '0'; + *q = '\0'; + /* allows for extra place-holding '0' in the exponent == precision + * case of the g format + */ + } + } +#ifndef MTHREAD + else { + pflt = _fltout(*pvalue); + _fptostr(buf+(pflt->sign == '-'), ndec + pflt->decpt, pflt); + } +#endif + + + /* now fix up the number to be in the correct f format */ + + p = buf; + + /* put in negative sign, if necessary */ + + if (pflt->sign == '-') + *p++ = '-'; + + /* insert leading 0 for purely fractional values and position ourselves + * at the correct spot for inserting the decimal point + */ + + if (pflt->decpt <= 0) { + _shift(p, 1); + *p++ = '0'; + } + else + p += pflt->decpt; + + /* put in decimal point if required and any zero padding needed */ + + if (ndec > 0) { + _shift(p, 1); +#ifdef _INTL + *p++ = *_decimal_point; +#else + *p++ = '.'; +#endif + + /* if the value is less than 1 then we may need to put 0's out in + * front of the first non-zero digit of the mantissa + */ + + if (pflt->decpt < 0) { + if( g_fmt ) + ndec = -pflt->decpt; + else + ndec = (ndec < -pflt->decpt ) ? ndec : -pflt->decpt; + _shift(p, ndec); + memset( p, '0', ndec); + } + } + + return( buf); +} + + +/* + * Function name: _cftof + * + * Arguments: value - double * pointer + * buf - char * pointer + * ndec - int + * + * Description: _cftof converts the double pointed to by pvalue to a null + * terminated string of ASCII digits in the c language + * printf %f format, and returns a pointer to the result. + * This format has the form [-]ddddd.ddddd, where there will + * be ndec digits following the decimal point. If ndec <= 0, + * no decimal point will appear. The low order digit is + * rounded. + * + * Side Effects: the buffer 'buf' is assumed to have a minimum length + * of CVTBUFSIZE (defined in cvt.h) and the routines will + * not write over this size. + * + * Copyright: written R.K. Wyss, Microsoft, Sept. 9, 1983 + * copyright (c) Microsoft Corp. 1983 + * + * History: + * + */ + +#ifdef MTHREAD + +char * _CALLTYPE2 _cftof( double * pvalue, char * buf, int ndec ) +{ + struct _strflt retstrflt; + char resstr[21]; + STRFLT pflt = &retstrflt; + _fltout2(*pvalue, (struct _strflt *) &retstrflt, + (char *) resstr); + _fptostr(buf+(pflt->sign == '-'), ndec + pflt->decpt, pflt); + _cftof2(buf, ndec, pflt, /* g_fmt = */ 0); + + return( buf ); +} + +#else /* not MTHREAD */ + + +static char * _cftof_g( double * pvalue, char * buf, int ndec ) +{ + char *res; + g_fmt = 1; + res = _cftof(pvalue, buf, ndec); + g_fmt = 0; + return (res); +} + +#endif /* not MTHREAD */ + +/* + * Function name: _cftog + * + * Arguments: value - double * pointer + * buf - char * pointer + * ndec - int + * + * Description: _cftog converts the double pointed to by pvalue to a null + * terminated string of ASCII digits in the c language + * printf %g format, and returns a pointer to the result. + * The form used depends on the value converted. The printf + * %e form will be used if the magnitude of valude is less + * than -4 or is greater than ndec, otherwise printf %f will + * be used. ndec always specifies the number of digits + * following the decimal point. The low order digit is + * appropriately rounded. + * + * Side Effects: the buffer 'buf' is assumed to have a minimum length + * of CVTBUFSIZE (defined in cvt.h) and the routines will + * not write over this size. + * + * Copyright: written R.K. Wyss, Microsoft, Sept. 9, 1983 + * copyright (c) Microsoft Corp. 1983 + * + * History: + * + */ + +char * _CALLTYPE2 _cftog( double * pvalue, char * buf, int ndec, int caps ) +{ +char *p; + +#ifdef MTHREAD +char g_round_expansion = 0; +STRFLT g_pflt; +int g_magnitude; +struct _strflt retstrflt; +char resstr[21]; + + /* first convert the number */ + + g_pflt = &retstrflt; + _fltout2(*pvalue, (struct _strflt *)&retstrflt, + (char *)resstr); + +#else /* not MTHREAD */ + + /* first convert the number */ + + g_pflt = _fltout(*pvalue); +#endif /* not MTHREAD */ + + g_magnitude = g_pflt->decpt - 1; + p = buf + (g_pflt->sign == '-'); + + _fptostr(p, ndec, g_pflt); + g_round_expansion = (char)(g_magnitude < (g_pflt->decpt-1)); + + + /* compute the magnitude of value */ + + g_magnitude = g_pflt->decpt - 1; + + /* convert value to the c language g format */ + + if (g_magnitude < -4 || g_magnitude >= ndec){ /* use e format */ + /* (g_round_expansion ==> + * extra digit will be overwritten by 'e+xxx') + */ + +#ifdef MTHREAD + return(_cftoe2(buf, ndec, caps, g_pflt, /* g_fmt = */ 1)); +#else + return(_cftoe_g(pvalue, buf, ndec, caps)); +#endif + + } + else { /* use f format */ + if (g_round_expansion) { + /* throw away extra final digit from expansion */ + while (*p++); + *(p-2) = '\0'; + } + +#ifdef MTHREAD + return(_cftof2(buf, ndec, g_pflt, /* g_fmt = */ 1)); +#else + return(_cftof_g(pvalue, buf, ndec)); +#endif + + } +} + +/*** +*_cfltcvt(arg, buf, format, precision, caps) - convert floating-point output +*Purpose: +* +*Entry: +* arg = (double *) pointer to double-precision floating-point number +* buf = (char *) pointer to buffer into which to put the converted +* ASCII form of the number +* format = (int) 'e', 'f', or 'g' +* precision = (int) giving number of decimal places for %e and %f formats, +* and giving maximum number of significant digits for +* %g format +* caps = (int) flag indicating whether 'E' in exponent should be capatilized +* (for %E and %G formats only) +* +*Exit: +* returns : (void) +* +*Exceptions: +*******************************************************************************/ +/* + * Function name: _cfltcvt + * + * Arguments: arg - double * pointer + * buf - char * pointer + * format - int + * ndec - int + * caps - int + * + * Description: _cfltcvt determines from the format, what routines to + * call to generate the correct floating point format + * + * Side Effects: none + * + * Author: Dave Weil, Jan 12, 1985 + * + * Copyright: Copyright (C) Microsoft Corp. 1985 + */ + +void _CALLTYPE2 _cfltcvt( double * arg, char * buffer, int format, int precision, int caps ) +{ + if (format == 'e' || format == 'E') + _cftoe(arg, buffer, precision, caps); + else if (format == 'f') + _cftof(arg, buffer, precision); + else + _cftog(arg, buffer, precision, caps); +} + +/*** +*_shift(s, dist) - shift a null-terminated string in memory (internal routine) +*Purpose: +* _shift is a helper routine that shifts a null-terminated string +* in memory, e.g., moves part of a buffer used for floating-point output +* +* modifies memory locations (s+dist) through (s+dist+strlen(s)) +* +*Entry: +* s = (char *) pointer to string to move +* dist = (int) distance to move the string to the right (if negative, to left) +* +*Exit: +* returns : (void) +* +*Exceptions: +*******************************************************************************/ + +static void _CALLTYPE5 _shift( char *s, int dist ) +{ + if( dist ) + memmove(s+dist, s, strlen(s)+1); +} diff --git a/private/fp32/conv/fltinf.c b/private/fp32/conv/fltinf.c new file mode 100644 index 000000000..90736fad5 --- /dev/null +++ b/private/fp32/conv/fltinf.c @@ -0,0 +1,62 @@ +/*** +*fltinf.c - Encode interface for FORTRAN +* +* Copyright (c) 19xx-1992, Microsoft Corporation. All rights reserved. +* +*Purpose: +* FORTRAN interface for decimal to binary (input) conversion +* +*Revision History: +* 06-22-92 GDP Modified version of cfin.c for FORTRAN support +* +*******************************************************************************/ + +#include <string.h> +#include <cv.h> + +static struct _flt ret; +static FLT flt = &ret; + +/* Error codes set by this routine */ +#define CFIN_NODIGITS 512 +#define CFIN_OVERFLOW 128 +#define CFIN_UNDERFLOW 256 +#define CFIN_INVALID 64 + +FLT _CALLTYPE2 _fltinf(const char *str, int len, int scale, int decpt) +{ + _LDBL12 ld12; + DOUBLE x; + const char *EndPtr; + unsigned flags; + int retflags = 0; + + flags = __strgtold12(&ld12, &EndPtr, str, 0, scale, decpt, 1); + if (flags & SLD_NODIGITS) { + retflags |= CFIN_NODIGITS; + *(u_long *)&x = 0; + *((u_long *)&x+1) = 0; + } + else { + INTRNCVT_STATUS intrncvt; + + intrncvt = _ld12tod(&ld12, &x); + + if (flags & SLD_OVERFLOW || + intrncvt == INTRNCVT_OVERFLOW) { + retflags |= CFIN_OVERFLOW; + } + if (flags & SLD_UNDERFLOW || + intrncvt == INTRNCVT_UNDERFLOW) { + retflags |= CFIN_UNDERFLOW; + } + } + + flt->nbytes = EndPtr - str; + if (len != flt->nbytes) + retflags |= CFIN_INVALID; + flt->dval = *(double *)&x; + flt->flags = retflags; + + return flt; +} diff --git a/private/fp32/conv/fpinit.c b/private/fp32/conv/fpinit.c new file mode 100644 index 000000000..75315daa1 --- /dev/null +++ b/private/fp32/conv/fpinit.c @@ -0,0 +1,77 @@ +/*** +*fpinit.c - Initialize floating point +* +* Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* +*Revision History: +* 09-29-91 GDP merged fpmath.c and fltused.asm to produce this file +* 09-30-91 GDP per thread initialization and termination hooks +* 03-04-92 GDP removed finit instruction +* 11-06-92 GDP added __fastflag for FORTRAN libs +* +*******************************************************************************/ +#include <cv.h> + +int _fltused = 0x9875; +int _ldused = 0x9873; + +int __fastflag = 0; + + +void _cfltcvt_init(void); +void _fpmath(void); +void _fpclear(void); + +void (* _FPinit)(void) = _fpmath; +void (* _FPmtinit)(void) = _fpclear; +void (* _FPmtterm)(void) = _fpclear; + + +void _fpmath() +{ + + // + // There is no need for 'finit' + // since this is done by the OS + // + + _cfltcvt_init(); + return; +} + +void _fpclear() +{ + // + // There is no need for 'finit' + // since this is done by the OS + // + + return; +} + +void _cfltcvt_init() +{ + _cfltcvt_tab[0] = (PFV) _cfltcvt; + _cfltcvt_tab[1] = (PFV) _cropzeros; + _cfltcvt_tab[2] = (PFV) _fassign; + _cfltcvt_tab[3] = (PFV) _forcdecpt; + _cfltcvt_tab[4] = (PFV) _positive; + /* map long double to double */ + _cfltcvt_tab[5] = (PFV) _cfltcvt; + +} + + +/* + * Routine to set the fast flag in order to speed up computation + * of transcendentals at the expense of limiting error checking + */ + +int __setfflag(int new) +{ + int old = __fastflag; + __fastflag = new; + return old; +} diff --git a/private/fp32/conv/intrncvt.c b/private/fp32/conv/intrncvt.c new file mode 100644 index 000000000..4a2acd7a1 --- /dev/null +++ b/private/fp32/conv/intrncvt.c @@ -0,0 +1,714 @@ +/*** +* intrncvt.c - internal floating point conversions +* +* Copyright (c) 1992-1992, Microsoft Corporation. All rights reserved. +* +*Purpose: +* All fp string conversion routines use the same core conversion code +* that converts strings into an internal long double representation +* with an 80-bit mantissa field. The mantissa is represented +* as an array (man) of 32-bit unsigned longs, with man[0] holding +* the high order 32 bits of the mantissa. The binary point is assumed +* to be between the MSB and MSB-1 of man[0]. +* +* Bits are counted as follows: +* +* +* +-- binary point +* | +* v MSB LSB +* ---------------- ------------------ -------------------- +* |0 1 .... 31| | 32 33 ... 63| | 64 65 ... 95| +* ---------------- ------------------ -------------------- +* +* man[0] man[1] man[2] +* +* This file provides the final conversion routines from this internal +* form to the single, double, or long double precision floating point +* format. +* +* All these functions do not handle NaNs (it is not necessary) +* +* +*Revision History: +* 04-29-92 GDP written +* 06-18-92 GDP now ld12told returns INTRNCVT_STATUS +* 06-22-92 GDP use new __strgtold12 interface (FORTRAN support) +* 10-25-92 GDP _atoldbl bug fix (cuda 1345): if the mantissa overflows +* set its MSB to 1) +* +*******************************************************************************/ + + +#include <cv.h> + + +#define INTRNMAN_LEN 3 /* internal mantissa length in int's */ + +// +// internal mantissaa representation +// for string conversion routines +// + +typedef u_long *intrnman; + + +typedef struct { + int max_exp; // maximum base 2 exponent (reserved for special values) + int min_exp; // minimum base 2 exponent (reserved for denormals) + int precision; // bits of precision carried in the mantissa + int exp_width; // number of bits for exponent + int format_width; // format width in bits + int bias; // exponent bias +} FpFormatDescriptor; + + + +static FpFormatDescriptor +DoubleFormat = { + 0x7ff - 0x3ff, // 1024, maximum base 2 exponent (reserved for special values) + 0x0 - 0x3ff, // -1023, minimum base 2 exponent (reserved for denormals) + 53, // bits of precision carried in the mantissa + 11, // number of bits for exponent + 64, // format width in bits + 0x3ff, // exponent bias +}; + +static FpFormatDescriptor +FloatFormat = { + 0xff - 0x7f, // 128, maximum base 2 exponent (reserved for special values) + 0x0 - 0x7f, // -127, minimum base 2 exponent (reserved for denormals) + 24, // bits of precision carried in the mantissa + 8, // number of bits for exponent + 32, // format width in bits + 0x7f, // exponent bias +}; + + + +// +// function prototypes +// + +int _RoundMan (intrnman man, int nbit); +int _ZeroTail (intrnman man, int nbit); +int _IncMan (intrnman man, int nbit); +void _CopyMan (intrnman dest, intrnman src); +void _CopyMan (intrnman dest, intrnman src); +void _FillZeroMan(intrnman man); +void _Shrman (intrnman man, int n); + +INTRNCVT_STATUS _ld12cvt(_LDBL12 *pld12, void *d, FpFormatDescriptor *format); + +/*** +* _ZeroTail - check if a mantissa ends in 0's +* +*Purpose: +* Return TRUE if all mantissa bits after nbit (including nbit) are 0, +* otherwise return FALSE +* +* +*Entry: +* man: mantissa +* nbit: order of bit where the tail begins +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ +int _ZeroTail (intrnman man, int nbit) +{ + int nl = nbit / 32; + int nb = 31 - nbit % 32; + + + // + // |<---- tail to be checked ---> + // + // -- ------------------------ ---- + // |... | | ... | + // -- ------------------------ ---- + // ^ ^ ^ + // | | |<----nb-----> + // man nl nbit + // + + + + u_long bitmask = ~(MAX_ULONG << nb); + + if (man[nl] & bitmask) + return 0; + + nl++; + + for (;nl < INTRNMAN_LEN; nl++) + if (man[nl]) + return 0; + + return 1; +} + + + + +/*** +* _IncMan - increment mantissa +* +*Purpose: +* +* +*Entry: +* man: mantissa in internal long form +* nbit: order of bit that specifies the end of the part to be incremented +* +*Exit: +* returns 1 on overflow, 0 otherwise +* +*Exceptions: +* +*******************************************************************************/ + +int _IncMan (intrnman man, int nbit) +{ + int nl = nbit / 32; + int nb = 31 - nbit % 32; + + // + // |<--- part to be incremented -->| + // + // -- --------------------------- ---- + // |... | | ... | + // -- --------------------------- ---- + // ^ ^ ^ + // | | |<--nb--> + // man nl nbit + // + + u_long one = (u_long) 1 << nb; + int carry; + + carry = __addl(man[nl], one, &man[nl]); + + nl--; + + for (; nl >= 0 && carry; nl--) { + carry = (u_long) __addl(man[nl], (u_long) 1, &man[nl]); + } + + return carry; +} + + + + +/*** +* _RoundMan - round mantissa +* +*Purpose: +* round mantissa to nbit precision +* +* +*Entry: +* man: mantissa in internal form +* precision: number of bits to be kept after rounding +* +*Exit: +* returns 1 on overflow, 0 otherwise +* +*Exceptions: +* +*******************************************************************************/ + +int _RoundMan (intrnman man, int precision) +{ + int i,rndbit,nl,nb; + u_long rndmask; + int nbit; + int retval = 0; + + // + // The order of the n'th bit is n-1, since the first bit is bit 0 + // therefore decrement precision to get the order of the last bit + // to be kept + // + nbit = precision - 1; + + rndbit = nbit+1; + + nl = rndbit / 32; + nb = 31 - rndbit % 32; + + // + // Get value of round bit + // + + rndmask = (u_long)1 << nb; + + if ((man[nl] & rndmask) && + !_ZeroTail(man, rndbit+1)) { + + // + // round up + // + + retval = _IncMan(man, nbit); + } + + + // + // fill rest of mantissa with zeroes + // + + man[nl] &= MAX_ULONG << nb; + for(i=nl+1; i<INTRNMAN_LEN; i++) { + man[i] = (u_long)0; + } + + return retval; +} + + +/*** +* _CopyMan - copy mantissa +* +*Purpose: +* copy src to dest +* +*Entry: +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ +void _CopyMan (intrnman dest, intrnman src) +{ + u_long *p, *q; + int i; + + p = src; + q = dest; + + for (i=0; i < INTRNMAN_LEN; i++) { + *q++ = *p++; + } +} + + + +/*** +* _FillZeroMan - fill mantissa with zeroes +* +*Purpose: +* +* +*Entry: +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ +void _FillZeroMan(intrnman man) +{ + int i; + for (i=0; i < INTRNMAN_LEN; i++) + man[i] = (u_long)0; +} + + + +/*** +* _IsZeroMan - check if mantissa is zero +* +*Purpose: +* +* +*Entry: +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ +int _IsZeroMan(intrnman man) +{ + int i; + for (i=0; i < INTRNMAN_LEN; i++) + if (man[i]) + return 0; + + return 1; +} + + + + + +/*** +* _ShrMan - shift mantissa to the right +* +*Purpose: +* shift man by n bits to the right +* +*Entry: +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ +void _ShrMan (intrnman man, int n) +{ + int i, n1, n2, mask; + int carry_from_left; + + // + // declare this as volatile in order to work around a C8 + // optimization bug + // + + volatile int carry_to_right; + + n1 = n / 32; + n2 = n % 32; + + mask = ~(MAX_ULONG << n2); + + + // + // first deal with shifts by less than 32 bits + // + + carry_from_left = 0; + for (i=0; i<INTRNMAN_LEN; i++) { + + carry_to_right = man[i] & mask; + + man[i] >>= n2; + + man[i] |= carry_from_left; + + carry_from_left = carry_to_right << (32 - n2); + } + + + // + // now shift whole 32-bit ints + // + + for (i=INTRNMAN_LEN-1; i>=0; i--) { + if (i >= n1) { + man[i] = man[i-n1]; + } + else { + man[i] = 0; + } + } +} + + + + +/*** +* _ld12tocvt - _LDBL12 floating point conversion +* +*Purpose: +* convert a internal _LBL12 structure into an IEEE floating point +* representation +* +* +*Entry: +* pld12: pointer to the _LDBL12 +* format: pointer to the format descriptor structure +* +*Exit: +* *d contains the IEEE representation +* returns the INTRNCVT_STATUS +* +*Exceptions: +* +*******************************************************************************/ +INTRNCVT_STATUS _ld12cvt(_LDBL12 *pld12, void *d, FpFormatDescriptor *format) +{ + u_long man[INTRNMAN_LEN]; + u_long saved_man[INTRNMAN_LEN]; + u_long msw; + unsigned int bexp; // biased exponent + int exp_shift; + int exponent, sign; + INTRNCVT_STATUS retval; + + exponent = (*U_EXP_12(pld12) & 0x7fff) - 0x3fff; // unbias exponent + sign = *U_EXP_12(pld12) & 0x8000; + + + man[0] = *UL_MANHI_12(pld12); + man[1] = *UL_MANLO_12(pld12); + man[2] = *U_XT_12(pld12) << 16; + + + // + // bexp is the final biased value of the exponent to be used + // Each of the following blocks should provide appropriate + // values for man, bexp and retval. The mantissa is also + // shifted to the right, leaving space for the exponent + // and sign to be inserted + // + + if (exponent == 0 - 0x3fff) { + + // either a denormal or zero + bexp = 0; + + if (_IsZeroMan(man)) { + + retval = INTRNCVT_OK; + } + else { + + _FillZeroMan(man); + + // denormal has been flushed to zero + + retval = INTRNCVT_UNDERFLOW; + } + } + else { + + // save mantissa in case it needs to be rounded again + // at a different point (e.g., if the result is a denormal) + + _CopyMan(saved_man, man); + + if (_RoundMan(man, format->precision)) { + exponent ++; + } + + if (exponent < format->min_exp - format->precision ) { + + // + // underflow that produces a zero + // + + _FillZeroMan(man); + bexp = 0; + retval = INTRNCVT_UNDERFLOW; + } + + else if (exponent <= format->min_exp) { + + // + // underflow that produces a denormal + // + // + + // The (unbiased) exponent will be MIN_EXP + // Find out how much the mantissa should be shifted + // One shift is done implicitly by moving the + // binary point one bit to the left, i.e., + // we treat the mantissa as .ddddd instead of d.dddd + // (where d is a binary digit) + + int shift = format->min_exp - exponent; + + // The mantissa should be rounded again, so it + // has to be restored + + _CopyMan(man,saved_man); + + _ShrMan(man, shift); + _RoundMan(man, format->precision); // need not check for carry + + // make room for the exponent + sign + + _ShrMan(man, format->exp_width + 1); + + bexp = 0; + retval = INTRNCVT_UNDERFLOW; + + } + + else if (exponent >= format->max_exp) { + + // + // overflow, return infinity + // + + _FillZeroMan(man); + man[0] |= (1 << 31); // set MSB + + // make room for the exponent + sign + + _ShrMan(man, (format->exp_width + 1) - 1); + + bexp = format->max_exp + format->bias; + + retval = INTRNCVT_OVERFLOW; + } + + else { + + // + // valid, normalized result + // + + bexp = exponent + format->bias; + + + // clear implied bit + + man[0] &= (~( 1 << 31)); + + // + // shift right to make room for exponent + sign + // + + _ShrMan(man, (format->exp_width + 1) - 1); + + retval = INTRNCVT_OK; + + } + } + + + exp_shift = 32 - (format->exp_width + 1); + msw = man[0] | + (bexp << exp_shift) | + (sign ? 1<<31 : 0); + + if (format->format_width == 64) { + + *UL_HI_D(d) = msw; + *UL_LO_D(d) = man[1]; + } + + else if (format->format_width == 32) { + + *(u_long *)d = msw; + + } + + return retval; +} + + +/*** +* _ld12tod - convert _LDBL12 to double +* +*Purpose: +* +* +*Entry: +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ +INTRNCVT_STATUS _ld12tod(_LDBL12 *pld12, DOUBLE *d) +{ + return _ld12cvt(pld12, d, &DoubleFormat); +} + + + +/*** +* _ld12tof - convert _LDBL12 to float +* +*Purpose: +* +* +*Entry: +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ +INTRNCVT_STATUS _ld12tof(_LDBL12 *pld12, FLOAT *f) +{ + return _ld12cvt(pld12, f, &FloatFormat); +} + + +/*** +* _ld12told - convert _LDBL12 to 80 bit long double +* +*Purpose: +* +* +*Entry: +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ +INTRNCVT_STATUS _ld12told(_LDBL12 *pld12, _LDOUBLE *pld) +{ + + // + // This implementation is based on the fact that the _LDBL12 format is + // identical to the long double and has 2 extra bytes of mantissa + // + + u_short exp, sign; + u_long man[INTRNMAN_LEN]; + INTRNCVT_STATUS retval = 0; + + exp = *U_EXP_12(pld12) & (u_short)0x7fff; + sign = *U_EXP_12(pld12) & (u_short)0x8000; + + man[0] = *UL_MANHI_12(pld12); + man[1] = *UL_MANLO_12(pld12); + man[2] = *U_XT_12(pld12) << 16; + + if (_RoundMan(man, 64)) { + // The MSB of the mantissa is explicit and should be 1 + // since we had a carry, the mantissa is now 0. + man[0] = MSB_ULONG; + exp ++; + } + + if (exp == 0x7fff) + retval = INTRNCVT_OVERFLOW; + + *UL_MANHI_LD(pld) = man[0]; + *UL_MANLO_LD(pld) = man[1]; + *U_EXP_LD(pld) = sign | exp; + + return retval; +} + + +void _atodbl(DOUBLE *d, char *str) +{ + const char *EndPtr; + _LDBL12 ld12; + + __strgtold12(&ld12, &EndPtr, str, 0, 0, 0, 0 ); + _ld12tod(&ld12, d); +} + + +void _atoldbl(_LDOUBLE *ld, char *str) +{ + const char *EndPtr; + _LDBL12 ld12; + + __strgtold12(&ld12, &EndPtr, str, 1, 0, 0, 0 ); + _ld12told(&ld12, ld); +} + + +void _atoflt(FLOAT *f, char *str) +{ + const char *EndPtr; + _LDBL12 ld12; + + __strgtold12(&ld12, &EndPtr, str, 0, 0, 0, 0 ); + _ld12tof(&ld12, f); +} diff --git a/private/fp32/conv/makefile b/private/fp32/conv/makefile new file mode 100644 index 000000000..6ee4f43fa --- /dev/null +++ b/private/fp32/conv/makefile @@ -0,0 +1,6 @@ +# +# DO NOT EDIT THIS FILE!!! Edit .\sources. if you want to add a new source +# file to this component. This file merely indirects to the real make file +# that is shared by all the components of NT OS/2 +# +!INCLUDE $(NTMAKEENV)\makefile.def diff --git a/private/fp32/conv/mantold.c b/private/fp32/conv/mantold.c new file mode 100644 index 000000000..2fe0c26cb --- /dev/null +++ b/private/fp32/conv/mantold.c @@ -0,0 +1,182 @@ +/*** +*mantold.c - conversion of a decimal mantissa to _LDBL12 +* +* Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* Conversion of a decimal mantissa into _LDBL12 format (i.e. long +* double with two additional bytes of significand) +* +*Revision History: +* 7-17-91 GDP Initial version (ported from assembly) +* +*******************************************************************************/ + +#include <cv.h> + + + + + +/*** +*int _CALLTYPE5 __addl(u_long x, u_long y, u_long *sum) - u_long addition +* +*Purpose: add two u_long numbers and return carry +* +*Entry: u_long x, u_long y : the numbers to be added +* u_long *sum : where to store the result +* +*Exit: *sum receives the value of x+y +* the value of the carry is returned +* +*Exceptions: +* +*******************************************************************************/ + +int _CALLTYPE5 __addl(u_long x, u_long y, u_long *sum) +{ + u_long r; + int carry=0; + r = x+y; + if (r < x || r < y) + carry++; + *sum = r; + return carry; +} + + + + + + +/*** +*void _CALLTYPE5 __add_12(_LDBL12 *x, _LDBL12 *y) - _LDBL12 addition +* +*Purpose: add two _LDBL12 numbers. The numbers are added +* as 12-byte integers. Overflow is ignored. +* +*Entry: x,y: pointers to the operands +* +*Exit: *x receives the sum +* +*Exceptions: +* +*******************************************************************************/ + +void _CALLTYPE5 __add_12(_LDBL12 *x, _LDBL12 *y) +{ + int c0,c1,c2; + c0 = __addl(*UL_LO_12(x),*UL_LO_12(y),UL_LO_12(x)); + if (c0) { + c1 = __addl(*UL_MED_12(x),(u_long)1,UL_MED_12(x)); + if (c1) { + (*UL_HI_12(x))++; + } + } + c2 = __addl(*UL_MED_12(x),*UL_MED_12(y),UL_MED_12(x)); + if (c2) { + (*UL_HI_12(x))++; + } + /* ignore next carry -- assume no overflow will occur */ + (void) __addl(*UL_HI_12(x),*UL_HI_12(y),UL_HI_12(x)); +} + + + + + +/*** +*void _CALLTYPE5 __shl_12(_LDBL12 *x) - _LDBL12 shift left +*void _CALLTYPE5 __shr_12(_LDBL12 *x) - _LDBL12 shift right +* +*Purpose: Shift a _LDBL12 number one bit to the left (right). The number +* is shifted as a 12-byte integer. The MSB is lost. +* +*Entry: x: a pointer to the operand +* +*Exit: *x is shifted one bit to the left (or right) +* +*Exceptions: +* +*******************************************************************************/ + +void _CALLTYPE5 __shl_12(_LDBL12 *p) +{ + u_long c0,c1; + + c0 = *UL_LO_12(p) & MSB_ULONG ? 1: 0; + c1 = *UL_MED_12(p) & MSB_ULONG ? 1: 0; + *UL_LO_12(p) <<= 1; + *UL_MED_12(p) = *UL_MED_12(p)<<1 | c0; + *UL_HI_12(p) = *UL_HI_12(p)<<1 | c1; +} + +void _CALLTYPE5 __shr_12(_LDBL12 *p) +{ + u_long c2,c1; + c2 = *UL_HI_12(p) & 0x1 ? MSB_ULONG: 0; + c1 = *UL_MED_12(p) & 0x1 ? MSB_ULONG: 0; + *UL_HI_12(p) >>= 1; + *UL_MED_12(p) = *UL_MED_12(p)>>1 | c2; + *UL_LO_12(p) = *UL_LO_12(p)>>1 | c1; +} + + + + + + +/*** +*void _CALLTYPE5 __mtold12(char *manptr,unsigned manlen,_LDBL12 *ld12) - +* convert a mantissa into a _LDBL12 +* +*Purpose: convert a mantissa into a _LDBL12. The mantissa is +* in the form of an array of manlen BCD digits and is +* considered to be an integer. +* +*Entry: manptr: the array containing the packed BCD digits of the mantissa +* manlen: the size of the array +* ld12: a pointer to the long double where the result will be stored +* +*Exit: +* ld12 gets the result of the conversion +* +*Exceptions: +* +*******************************************************************************/ + +void _CALLTYPE5 __mtold12(char *manptr, + unsigned manlen, + _LDBL12 *ld12) +{ + _LDBL12 tmp; + u_short expn = LD_BIASM1+80; + + *UL_LO_12(ld12) = 0; + *UL_MED_12(ld12) = 0; + *UL_HI_12(ld12) = 0; + for (;manlen>0;manlen--,manptr++){ + tmp = *ld12; + __shl_12(ld12); + __shl_12(ld12); + __add_12(ld12,&tmp); + __shl_12(ld12); /* multiply by 10 */ + *UL_LO_12(&tmp) = (u_long)*manptr; + *UL_MED_12(&tmp) = 0; + *UL_HI_12(&tmp) = 0; + __add_12(ld12,&tmp); + } + + /* normalize mantissa -- first shift word by word */ + while (*UL_HI_12(ld12) == 0) { + *UL_HI_12(ld12) = *UL_MED_12(ld12) >> 16; + *UL_MED_12(ld12) = *UL_MED_12(ld12) << 16 | *UL_LO_12(ld12) >> 16; + (*UL_LO_12(ld12)) <<= 16; + expn -= 16; + } + while ((*UL_HI_12(ld12) & 0x8000) == 0) { + __shl_12(ld12); + expn--; + } + *U_EXP_12(ld12) = expn; +} diff --git a/private/fp32/conv/nmake.mak b/private/fp32/conv/nmake.mak new file mode 100644 index 000000000..29116e7eb --- /dev/null +++ b/private/fp32/conv/nmake.mak @@ -0,0 +1,42 @@ +#### +#nmake.mak - makefile +# +# Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +# +#Purpose: +# Build 32bit NT i386 libs in OMF format +# +#Revision History: +# 8-21-90 GDP +# 3-04-92 GDP Drop support for multiple source/target OS's & CPU's +# +################################################################################ +!include ..\def.mak + + + + +OBJS = \ + $(OBJDIR)\strgtold.obj \ + $(OBJDIR)\mantold.obj \ + $(OBJDIR)\tenpow.obj \ + $(OBJDIR)\constpow.obj \ + $(OBJDIR)\ldtod.obj \ + $(OBJDIR)\x10fout.obj \ + $(OBJDIR)\cvt.obj \ + $(OBJDIR)\cfout.obj \ + $(OBJDIR)\cfin.obj \ + $(OBJDIR)\fpinit.obj \ + \ + $(OBJDIR)\atold.obj + + + +$(LIBDIR)\conv$(TARGETNAMESUFFIX).lib: $(OBJS) + if exist $@ erase $@ + $(LIBEXE) @<< +$@ +y +$(OBJS) +$(LIBDIR)\conv$(TARGETNAMESUFFIX).map; +<< diff --git a/private/fp32/conv/sources b/private/fp32/conv/sources new file mode 100644 index 000000000..1efb86cc8 --- /dev/null +++ b/private/fp32/conv/sources @@ -0,0 +1,39 @@ +#### +#sources - +# +# Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +# +#Purpose: +# Specify components for 'build' +# +#Revision History: +# 9-26-91 GDP written +# 9-28-91 GDP removed fpmath.c fltused.c; added fpinit.c +# 4-30-92 GDP replace ldtod.c with intrncvt.c +# +################################################################################ + +!INCLUDE ..\fp32.def + +MAJORCOMP=fp +MINORCOMP=conv + +TARGETNAME=conv$(TARGETNAMESUFFIX) +TARGETPATH=..\obj +386_STDCALL=0 +# TARGETTYPE is defined in ..\fp32.def + +INCLUDES=..\include;..\..\crt32\h + +SOURCES=strgtold.c \ + mantold.c \ + tenpow.c \ + constpow.c \ + intrncvt.c \ + x10fout.c \ + cvt.c \ + cfout.c \ + cfin.c \ + fpinit.c \ + fltinf.c + diff --git a/private/fp32/conv/strgtold.c b/private/fp32/conv/strgtold.c new file mode 100644 index 000000000..50a707e7f --- /dev/null +++ b/private/fp32/conv/strgtold.c @@ -0,0 +1,526 @@ +/*** +*strgtold.c - conversion of a string into a long double +* +* Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: convert a fp constant into a 10 byte long double (IEEE format) +* +*Revision History: +* 7-17-91 GDP Initial version (ported from assembly) +* 4-03-92 GDP Preserve sign of -0 +* 4-30-92 GDP Now returns _LDBL12 instead of _LDOUBLE +* 6-17-92 GDP Added __strgtold entry point again (68k code uses it) +* 6-22-92 GDP Use scale, decpt and implicit_E for FORTRAN support +* 11-06-92 GDP Made char-to-int conversions usnigned for 'isdigit' +* 3-11-93 JWM Added minimal support for _INTL decimal point - one byte only! +* 7-01-93 GJF Made buf[] a local array, rather than static array of +* local scope (static is evil in multi-thread!). +* +*******************************************************************************/ + +#include <ctype.h> /* for 'isdigit' macro */ +#include <cv.h> +#include <nlsint.h> + +/* local macros */ +#define ISNZDIGIT(x) ((x)>='1' && (x)<='9' ) +#define ISWHITE(x) ((x)==' ' || (x)=='\t' || (x)=='\n' || (x)=='\r' ) + + +/**** +*unsigned int __strgtold12( _LDBL12 *pld12, +* char * * pEndPtr, +* char * str, +* int Mult12, +* int scale, +* int decpt, +* int implicit_E) +* +*Purpose: +* converts a character string into a 12byte long double (_LDBL12) +* This has the same format as a 10byte long double plus two extra +* bytes for the mantissa +* +*Entry: +* pld12 - pointer to the _LDBL12 where the result should go. +* pEndStr - pointer to a far pointer that will be set to the end of string. +* str - pointer to the string to be converted. +* Mult12 - set to non zero if the _LDBL12 multiply should be used instead of +* the long double mulitiply. +* scale - FORTRAN scale factor (0 for C) +* decpt - FORTRAN decimal point factor (0 for C) +* implicit_E - if true, E, e, D, d can be implied (FORTRAN syntax) +* +*Exit: +* Returns the SLD_* flags or'ed together. +* +*Uses: +* +*Exceptions: +* +********************************************************************************/ + +unsigned int +__strgtold12(_LDBL12 *pld12, + const char * *p_end_ptr, + const char * str, + int mult12, + int scale, + int decpt, + int implicit_E) +{ + typedef enum { + S_INIT, /* initial state */ + S_EAT0L, /* eat 0's at the left of mantissa */ + S_SIGNM, /* just read sign of mantissa */ + S_GETL, /* get integer part of mantissa */ + S_GETR, /* get decimal part of mantissa */ + S_POINT, /* just found decimal point */ + S_E, /* just found 'E', or 'e', etc */ + S_SIGNE, /* just read sign of exponent */ + S_EAT0E, /* eat 0's at the left of exponent */ + S_GETE, /* get exponent */ + S_END, /* final state */ + S_E_IMPLICIT /* check for implicit exponent */ + } state_t; + + /* this will accomodate the digits of the mantissa in BCD form*/ + char buf[LD_MAX_MAN_LEN1]; + char *manp = buf; + + /* a temporary _LDBL12 */ + _LDBL12 tmpld12; + + u_short man_sign = 0; /* to be ORed with result */ + int exp_sign = 1; /* default sign of exponent (values: +1 or -1)*/ + /* number of decimal significant mantissa digits so far*/ + unsigned manlen = 0; + int found_digit = 0; + int found_decpoint = 0; + int found_exponent = 0; + int overflow = 0; + int underflow = 0; + int pow = 0; + int exp_adj = 0; /* exponent adjustment */ + u_long ul0,ul1; + u_short u,uexp; + + unsigned int result_flags = 0; + + state_t state = S_INIT; + + char c; /* the current input symbol */ + const char *p; /* a pointer to the next input symbol */ + const char *savedp; + + for(savedp=p=str;ISWHITE(*p);p++); /* eat up white space */ + + while (state != S_END) { + c = *p++; + switch (state) { + case S_INIT: + if (ISNZDIGIT(c)) { + state = S_GETL; + p--; + } +#ifdef _INTL + else if (c == *_decimal_point) + state = S_POINT; +#endif + else + switch (c) { + case '0': + state = S_EAT0L; + break; + case '+': + state = S_SIGNM; + man_sign = 0x0000; + break; + case '-': + state = S_SIGNM; + man_sign = 0x8000; + break; +#ifndef _INTL + case '.': + state = S_POINT; + break; +#endif + default: + state = S_END; + p--; + break; + } + break; + case S_EAT0L: + found_digit = 1; + if (ISNZDIGIT(c)) { + state = S_GETL; + p--; + } +#ifdef _INTL + else if (c == *_decimal_point) + state = S_GETR; +#endif + else + switch (c) { + case '0': + state = S_EAT0L; + break; + case 'E': + case 'e': + case 'D': + case 'd': + state = S_E; + break; + case '+': + case '-': + p--; + state = S_E_IMPLICIT; + break; +#ifndef _INTL + case '.': + state = S_GETR; + break; +#endif + default: + state = S_END; + p--; + } + break; + case S_SIGNM: + if (ISNZDIGIT(c)) { + state = S_GETL; + p--; + } +#ifdef _INTL + else if (c == *_decimal_point) + state = S_POINT; +#endif + else + switch (c) { + case '0': + state = S_EAT0L; + break; +#ifndef _INTL + case '.': + state = S_POINT; + break; +#endif + default: + state = S_END; + p = savedp; + } + break; + case S_GETL: + found_digit = 1; + for (;isdigit((int)(unsigned char)c);c=*p++) { + if (manlen < LD_MAX_MAN_LEN+1){ + manlen++; + *manp++ = c - (char)'0'; + } + else + exp_adj++; + } +#ifdef _INTL + if (c == *_decimal_point) + state = S_GETR; + else +#endif + switch (c) { +#ifndef _INTL + case '.': + state = S_GETR; + break; +#endif + case 'E': + case 'e': + case 'D': + case 'd': + state = S_E; + break; + case '+': + case '-': + p--; + state = S_E_IMPLICIT; + break; + default: + state = S_END; + p--; + } + break; + case S_GETR: + found_digit = 1; + found_decpoint = 1; + if (manlen == 0) + for (;c=='0';c=*p++) + exp_adj--; + for(;isdigit((int)(unsigned char)c);c=*p++){ + if (manlen < LD_MAX_MAN_LEN+1){ + manlen++; + *manp++ = c - (char)'0'; + exp_adj--; + } + } + switch (c){ + case 'E': + case 'e': + case 'D': + case 'd': + state = S_E; + break; + case '+': + case '-': + p--; + state = S_E_IMPLICIT; + break; + default: + state = S_END; + p--; + } + break; + case S_POINT: + found_decpoint = 1; + if (isdigit((int)(unsigned char)c)){ + state = S_GETR; + p--; + } + else{ + state = S_END; + p = savedp; + } + break; + case S_E: + savedp = p-2; /* savedp points to 'E' */ + if (ISNZDIGIT(c)){ + state = S_GETE; + p--; + } + else + switch (c){ + case '0': + state = S_EAT0E; + break; + case '-': + state = S_SIGNE; + exp_sign = -1; + break; + case '+': + state = S_SIGNE; + break; + default: + state = S_END; + p = savedp; + } + break; + case S_EAT0E: + found_exponent = 1; + for(;c=='0';c=*p++); + if (ISNZDIGIT(c)){ + state = S_GETE; + p--; + } + else { + state = S_END; + p--; + } + break; + case S_SIGNE: + if (ISNZDIGIT(c)){ + state = S_GETE; + p--; + } + else + switch (c){ + case '0': + state = S_EAT0E; + break; + default: + state = S_END; + p = savedp; + } + break; + case S_GETE: + found_exponent = 1; + { + long longpow=0; /* TMAX10*10 should fit in a long */ + for(;isdigit((int)(unsigned char)c);c=*p++){ + longpow = longpow*10 + (c - '0'); + if (longpow > TMAX10){ + longpow = TMAX10+1; /* will force overflow */ + break; + } + } + pow = (int)longpow; + } + for(;isdigit((int)(unsigned char)c);c=*p++); /* eat up remaining digits */ + state = S_END; + p--; + break; + case S_E_IMPLICIT: + if (implicit_E) { + savedp = p-1; /* savedp points to whatever precedes sign */ + switch (c){ + case '-': + state = S_SIGNE; + exp_sign = -1; + break; + case '+': + state = S_SIGNE; + break; + default: + state = S_END; + p = savedp; + } + } + else { + state = S_END; + p--; + } + break; + } /* switch */ + } /* while */ + + *p_end_ptr = p; /* set end pointer */ + + /* + * Compute result + */ + + if (found_digit && !overflow && !underflow) { + if (manlen>LD_MAX_MAN_LEN){ + if (buf[LD_MAX_MAN_LEN-1]>=5) { + /* + * Round mantissa to MAX_MAN_LEN digits + * It's ok to round 9 to 0ah + */ + buf[LD_MAX_MAN_LEN-1]++; + } + manlen = LD_MAX_MAN_LEN; + manp--; + exp_adj++; + } + if (manlen>0) { + /* + * Remove trailing zero's from mantissa + */ + for(manp--;*manp==0;manp--) { + /* there is at least one non-zero digit */ + manlen--; + exp_adj++; + } + __mtold12(buf,manlen,&tmpld12); + + if (exp_sign < 0) + pow = -pow; + pow += exp_adj; + + /* new code for FORTRAN support */ + if (!found_exponent) { + pow += scale; + } + if (!found_decpoint) { + pow -= decpt; + } + + + if (pow > TMAX10) + overflow = 1; + else if (pow < TMIN10) + underflow = 1; + else { + __multtenpow12(&tmpld12,pow,mult12); + + u = *U_XT_12(&tmpld12); + ul0 =*UL_MANLO_12(&tmpld12); + ul1 = *UL_MANHI_12(&tmpld12); + uexp = *U_EXP_12(&tmpld12); + + } + } + else { + /* manlen == 0, so return 0 */ + u = (u_short)0; + ul0 = ul1 = uexp = 0; + } + } + + if (!found_digit) { + /* return 0 */ + u = (u_short)0; + ul0 = ul1 = uexp = 0; + result_flags |= SLD_NODIGITS; + } + else if (overflow) { + /* return +inf or -inf */ + uexp = (u_short)0x7fff; + ul1 = 0x80000000; + ul0 = 0; + u = (u_short)0; + result_flags |= SLD_OVERFLOW; + } + else if (underflow) { + /* return 0 */ + u = (u_short)0; + ul0 = ul1 = uexp = 0; + result_flags |= SLD_UNDERFLOW; + } + + /* + * Assemble result + */ + + *U_XT_12(pld12) = u; + *UL_MANLO_12(pld12) = ul0; + *UL_MANHI_12(pld12) = ul1; + *U_EXP_12(pld12) = uexp | man_sign; + + return result_flags; +} + + + +/**** +*unsigned int _CALLTYPE5 __stringtold( LDOUBLE *pLd, +* char * * pEndPtr, +* char * str, +* int Mult12 ) +* +*Purpose: +* converts a character string into a long double +* +*Entry: +* pLD - pointer to the long double where the result should go. +* pEndStr - pointer to a pointer that will be set to the end of string. +* str - pointer to the string to be converted. +* Mult12 - set to non zero if the _LDBL12 multiply should be used instead of +* the long double mulitiply. +* +*Exit: +* Returns the SLD_* flags or'ed together. +* +*Uses: +* +*Exceptions: +* +********************************************************************************/ + +unsigned int _CALLTYPE5 +__STRINGTOLD(_LDOUBLE *pld, + const char * *p_end_ptr, + const char *str, + int mult12) +{ + unsigned int retflags; + INTRNCVT_STATUS intrncvt; + _LDBL12 ld12; + + retflags = __strgtold12(&ld12, p_end_ptr, str, mult12, 0, 0, 0); + + intrncvt = _ld12told(&ld12, pld); + + if (intrncvt == INTRNCVT_OVERFLOW) { + retflags |= SLD_OVERFLOW; + } + + return retflags; +} diff --git a/private/fp32/conv/tenpow.c b/private/fp32/conv/tenpow.c new file mode 100644 index 000000000..4384a1451 --- /dev/null +++ b/private/fp32/conv/tenpow.c @@ -0,0 +1,240 @@ +/*** +*tenpow.c - multiply a _LDBL12 by a power of 10 +* +* Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* +*Revision History: +* 07-17-91 GDP Initial version (ported from assembly) +* 07-16-93 SRW ALPHA Merge +* 10-02-94 BWT PPC changes +* +*******************************************************************************/ + + +#include <cv.h> + +extern _LDBL12 _pow10pos[]; +extern _LDBL12 _pow10neg[]; + + + + +/*** +*void _CALLTYPE5 __ld12mul(_LDBL12 *px, _LDBL12 *py) - +* _LDBL12 multiplication +* +*Purpose: multiply two _LDBL12 numbers +* +*Entry: px,py: pointers to the _LDBL12 operands +* +*Exit: *px contains the product +* +*Exceptions: +* +*******************************************************************************/ + +void _CALLTYPE5 __ld12mul(_LDBL12 *px, _LDBL12 *py) +{ + u_short sign = 0; + u_short sticky_bits = 0; + _LDBL12 tempman; /*this is actually a 12-byte mantissa, + not a 12-byte long double */ + int i; + u_short expx, expy, expsum; + int roffs,poffs,qoffs; + int sticky; + + *UL_LO_12(&tempman) = 0; + *UL_MED_12(&tempman) = 0; + *UL_HI_12(&tempman) = 0; + + expx = *U_EXP_12(px); + expy = *U_EXP_12(py); + + sign = (expx ^ expy) & (u_short)0x8000; + expx &= 0x7fff; + expy &= 0x7fff; + expsum = expx+expy; + if (expx >= LD_MAXEXP + || expy >= LD_MAXEXP + || expsum > LD_MAXEXP+ LD_BIASM1){ + /* overflow to infinity */ + PUT_INF_12(px,sign); + return; + } + if (expsum <= LD_BIASM1-63) { + /* underflow to zero */ + PUT_ZERO_12(px); + return; + } + if (expx == 0) { + /* + * If this is a denormal temp real then the mantissa + * was shifted right once to set bit 63 to zero. + */ + expsum++; /* Correct for this */ + if (ISZERO_12(px)) { + /* put positive sign */ + *U_EXP_12(px) = 0; + return; + } + } + if (expy == 0) { + expsum++; /* because arg2 is denormal */ + if (ISZERO_12(py)) { + PUT_ZERO_12(px); + return; + } + } + + roffs = 0; + for (i=0;i<5;i++) { + int j; + poffs = i<<1; + qoffs = 8; + for (j=5-i;j>0;j--) { + u_long prod; +#if defined(_M_MRX000) || defined(_M_ALPHA) || defined(_M_PPC) + /* a variable to hold temprary sums */ + u_long sum; +#endif + int carry; + u_short *p, *q; + u_long *r; + p = USHORT_12(px,poffs); + q = USHORT_12(py,qoffs); + r = ULONG_12(&tempman,roffs); + prod = (u_long)*p * (u_long)*q; +#if defined(_M_MRX000) || defined(_M_ALPHA) || defined(_M_PPC) + /* handle misalignment problems */ + if (i&0x1){ /* i is odd */ + carry = __addl(*ALIGN(r), prod, &sum); + *ALIGN(r) = sum; + } + else /* i is even */ + carry = __addl(*r, prod, r); +#else + carry = __addl(*r,prod,r); +#endif + if (carry) { + /* roffs should be less than 8 in this case */ + (*USHORT_12(&tempman,roffs+4))++; + } + poffs+=2; + qoffs-=2; + } + roffs+=2; + } + + expsum -= LD_BIASM1; + + /* normalize */ + while ((s_short)expsum > 0 && + ((*UL_HI_12(&tempman) & MSB_ULONG) == 0)) { + __shl_12(&tempman); + expsum--; + } + + if ((s_short)expsum <= 0) { + expsum--; + while ((s_short)expsum < 0) { + if (*U_XT_12(&tempman) & 0x1) + sticky++; + __shr_12(&tempman); + expsum++; + } + if (sticky) + *U_XT_12(&tempman) |= 0x1; + } + + if (*U_XT_12(&tempman) > 0x8000 || + (*UL_LO_12(&tempman) & 0x1ffff == 0x18000)) { + /* round up */ + if (*UL_MANLO_12(&tempman) == MAX_ULONG) { + *UL_MANLO_12(&tempman) = 0; + if (*UL_MANHI_12(&tempman) == MAX_ULONG) { + *UL_MANHI_12(&tempman) = 0; + if (*U_EXP_12(&tempman) == MAX_USHORT) { + /* 12-byte mantissa overflow */ + *U_EXP_12(&tempman) = MSB_USHORT; + expsum++; + } + else + (*U_EXP_12(&tempman))++; + } + else + (*UL_MANHI_12(&tempman))++; + } + else + (*UL_MANLO_12(&tempman))++; + } + + + /* check for exponent overflow */ + if (expsum >= 0x7fff){ + PUT_INF_12(px, sign); + return; + } + + /* put result in px */ + *U_XT_12(px) = *USHORT_12(&tempman,2); + *UL_MANLO_12(px) = *UL_MED_12(&tempman); + *UL_MANHI_12(px) = *UL_HI_12(&tempman); + *U_EXP_12(px) = expsum | sign; +} + + + +void _CALLTYPE5 +__multtenpow12(_LDBL12 *pld12, int pow, unsigned mult12) +{ + _LDBL12 *pow_10p = _pow10pos-8; + if (pow == 0) + return; + if (pow < 0) { + pow = -pow; + pow_10p = _pow10neg-8; + } + + if (!mult12) + *U_XT_12(pld12) = 0; + + + while (pow) { + int last3; /* the 3 LSBits of pow */ + _LDBL12 unround; + _LDBL12 *py; + + pow_10p += 7; + last3 = pow & 0x7; + pow >>= 3; + if (last3 == 0) + continue; + py = pow_10p + last3; + +#ifdef _LDSUPPORT + if (mult12) { +#endif + /* do an exact 12byte multiplication */ + if (*U_XT_12(py) >= 0x8000) { + /* copy number */ + unround = *py; + /* unround adjacent byte */ + (*UL_MANLO_12(&unround))--; + /* point to new operand */ + py = &unround; + } + __ld12mul(pld12,py); +#ifdef _LDSUPPORT + } + else { + /* do a 10byte multiplication */ + py = (_LDBL12 *)TEN_BYTE_PART(py); + *(long double *)TEN_BYTE_PART(pld12) *= + *(long double *)py; + } +#endif + } +} diff --git a/private/fp32/conv/x10fout.c b/private/fp32/conv/x10fout.c new file mode 100644 index 000000000..0bb171acc --- /dev/null +++ b/private/fp32/conv/x10fout.c @@ -0,0 +1,347 @@ +/*** +*x10fout.c - floating point output for 10-byte long double +* +* Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* Support conversion of a long double into a string +* +*Revision History: +* 07/15/91 GDP Initial version in C (ported from assembly) +* 01/23/92 GDP Support MIPS encoding for NaN +* +*******************************************************************************/ + +#include <string.h> +#include <cv.h> + +#define STRCPY strcpy + +#define PUT_ZERO_FOS(fos) \ + fos->exp = 0, \ + fos->sign = ' ', \ + fos->ManLen = 1, \ + fos->man[0] = '0',\ + fos->man[1] = 0; + +#define SNAN_STR "1#SNAN" +#define SNAN_STR_LEN 6 +#define QNAN_STR "1#QNAN" +#define QNAN_STR_LEN 6 +#define INF_STR "1#INF" +#define INF_STR_LEN 5 +#define IND_STR "1#IND" +#define IND_STR_LEN 5 + + +/*** +*int _CALLTYPE5 +* _$i10_output(_LDOUBLE ld, +* int ndigits, +* unsigned output_flags, +* FOS *fos) - output conversion of a 10-byte _LDOUBLE +* +*Purpose: +* Fill in a FOS structure for a given _LDOUBLE +* +*Entry: +* _LDOUBLE ld: The long double to be converted into a string +* int ndigits: number of digits allowed in the output format. +* unsigned output_flags: The following flags can be used: +* SO_FFORMAT: Indicates 'f' format +* (default is 'e' format) +* FOS *fos: the structure that i10_output will fill in +* +*Exit: +* modifies *fos +* return 1 if original number was ok, 0 otherwise (infinity, NaN, etc) +* +*Exceptions: +* +*******************************************************************************/ + + +int _CALLTYPE5 $I10_OUTPUT(_LDOUBLE ld, int ndigits, + unsigned output_flags, FOS *fos) +{ + u_short expn; + u_long manhi,manlo; + u_short sign; + + /* useful constants (see algorithm explanation below) */ + u_short const log2hi = 0x4d10; + u_short const log2lo = 0x4d; + u_short const log4hi = 0x9a; + u_long const c = 0x134312f4; +#if defined(L_END) + _LDBL12 ld12_one_tenth = { + {0xcc,0xcc,0xcc,0xcc,0xcc,0xcc, + 0xcc,0xcc,0xcc,0xcc,0xfb,0x3f} + }; +#elif defined(B_END) + _LDBL12 ld12_one_tenth = { + {0x3f,0xfb,0xcc,0xcc,0xcc,0xcc, + 0xcc,0xcc,0xcc,0xcc,0xcc,0xcc} + }; +#endif + + _LDBL12 ld12; /* space for a 12-byte long double */ + _LDBL12 tmp12; + u_short hh,ll; /* the bytes of the exponent grouped in 2 words*/ + u_short mm; /* the two MSBytes of the mantissa */ + s_long r; /* the corresponding power of 10 */ + s_short ir; /* ir = floor(r) */ + int retval = 1; /* assume valid number */ + char round; /* an additional character at the end of the string */ + char *p; + int i; + int ub_exp; + int digcount; + + /* grab the components of the long double */ + expn = *U_EXP_LD(&ld); + manhi = *UL_MANHI_LD(&ld); + manlo = *UL_MANLO_LD(&ld); + sign = expn & MSB_USHORT; + expn &= 0x7fff; + + if (sign) + fos->sign = '-'; + else + fos->sign = ' '; + + if (expn==0 && manhi==0 && manlo==0) { + PUT_ZERO_FOS(fos); + return 1; + } + + if (expn == 0x7fff) { + fos->exp = 1; /* set a positive exponent for proper output */ + + /* check for special cases */ + if (_IS_MAN_SNAN(sign, manhi, manlo)) { + /* signaling NAN */ + STRCPY(fos->man,SNAN_STR); + fos->ManLen = SNAN_STR_LEN; + retval = 0; + } + else if (_IS_MAN_IND(sign, manhi, manlo)) { + /* indefinite */ + STRCPY(fos->man,IND_STR); + fos->ManLen = IND_STR_LEN; + retval = 0; + } + else if (_IS_MAN_INF(sign, manhi, manlo)) { + /* infinity */ + STRCPY(fos->man,INF_STR); + fos->ManLen = INF_STR_LEN; + retval = 0; + } + else { + /* quiet NAN */ + STRCPY(fos->man,QNAN_STR); + fos->ManLen = QNAN_STR_LEN; + retval = 0; + } + } + else { + /* + * Algorithm for the decoding of a valid real number x + * + * In the following INT(r) is the largest integer less than or + * equal to r (i.e. r rounded toward -infinity). We want a result + * r equal to 1 + log(x), because then x = mantissa + * * 10^(INT(r)) so that .1 <= mantissa < 1. Unfortunately, + * we cannot compute s exactly so we must alter the procedure + * slightly. We will instead compute an estimate r of 1 + + * log(x) which is always low. This will either result + * in the correctly normalized number on the top of the stack + * or perhaps a number which is a factor of 10 too large. We + * will then check to see that if x is larger than one + * and if so multiply x by 1/10. + * + * We will use a low precision (fixed point 24 bit) estimate + * of of 1 + log base 10 of x. We have approximately .mm + * * 2^hhll on the top of the stack where m, h, and l represent + * hex digits, mm represents the high 2 hex digits of the + * mantissa, hh represents the high 2 hex digits of the exponent, + * and ll represents the low 2 hex digits of the exponent. Since + * .mm is a truncated representation of the mantissa, using it + * in this monotonically increasing polynomial approximation + * of the logarithm will naturally give a low result. Let's + * derive a formula for a lower bound r on 1 + log(x): + * + * .4D104D42H < log(2)=.30102999...(base 10) < .4D104D43H + * .9A20H < log(4)=.60205999...(base 10) < .9A21H + * + * 1/2 <= .mm < 1 + * ==> log(.mm) >= .mm * log(4) - log(4) + * + * Substituting in truncated hex constants in the formula above + * gives r = 1 + .4D104DH * hhll. + .9AH * .mm - .9A21H. Now + * multiplication of hex digits 5 and 6 of log(2) by ll has an + * insignificant effect on the first 24 bits of the result so + * it will not be calculated. This gives the expression r = + * 1 + .4D10H * hhll. + .4DH * .hh + .9A * .mm - .9A21H. + * Finally we must add terms to our formula to subtract out the + * effect of the exponent bias. We obtain the following formula: + * + * (implied decimal point) + * < >.< > + * |3|3|2|2|2|2|2|2|2|2|2|2|1|1|1|1|1|1|1|1|1|1|0|0|0|0|0|0|0|0|0|0| + * |1|0|9|8|7|6|5|4|3|2|1|0|9|8|7|6|5|4|3|2|1|0|9|8|7|6|5|4|3|2|1|0| + * + < 1 > + * + < .4D10H * hhll. > + * + < .00004DH * hh00. > + * + < .9AH * .mm > + * - < .9A21H > + * - < .4D10H * 3FFEH > + * - < .00004DH * 3F00H > + * + * ==> r = .4D10H * hhll. + .4DH * .hh + .9AH * .mm - 1343.12F4H + * + * The difference between the lower bound r and the upper bound + * s is calculated as follows: + * + * .937EH < 1/ln(10)-log(1/ln(4))=.57614993...(base 10) < .937FH + * + * 1/2 <= .mm < 1 + * ==> log(.mm) <= .mm * log(4) - [1/ln(10) - log(1/ln(4))] + * + * so tenatively s = r + log(4) - [1/ln(10) - log(1/ln(4))], + * but we must also add in terms to ensure we will have an upper + * bound even after the truncation of various values. Because + * log(2) * hh00. is truncated to .4D104DH * hh00. we must + * add .0043H, because log(2) * ll. is truncated to .4D10H * + * ll. we must add .0005H, because <mantissa> * log(4) is + * truncated to .mm * .9AH we must add .009AH and .0021H. + * + * Thus s = r - .937EH + .9A21H + .0043H + .0005H + .009AH + .0021H + * = r + .07A6H + * ==> s = .4D10H * hhll. + .4DH * .hh + .9AH * .mm - 1343.0B4EH + * + * r is equal to 1 + log(x) more than (10000H - 7A6H) / + * 10000H = 97% of the time. + * + * In the above formula, a u_long is use to accomodate r, and + * there is an implied decimal point in the middle. + */ + + hh = expn >> 8; + ll = expn & (u_short)0xff; + mm = (u_short) (manhi >> 24); + r = (s_long)log2hi*(s_long)expn + log2lo*hh + log4hi*mm - c; + ir = (s_short)(r >> 16); + + /* + * + * We stated that we wanted to normalize x so that + * + * .1 <= x < 1 + * + * This was a slight oversimplification. Actually we want a + * number which when rounded to 16 significant digits is in the + * desired range. To do this we must normalize x so that + * + * .1 - 5*10^(-18) <= x < 1 - 5*10^(-17) + * + * and then round. + * + * If we had f = INT(1+log(x)) we could multiply by 10^(-f) + * to get x into the desired range. We do not quite have + * f but we do have INT(r) from the last step which is equal + * to f 97% of the time and 1 less than f the rest of the time. + * We can multiply by 10^-[INT(r)] and if the result is greater + * than 1 - 5*10^(-17) we can then multiply by 1/10. This final + * result will lie in the proper range. + */ + + /* convert _LDOUBLE to _LDBL12) */ + *U_EXP_12(&ld12) = expn; + *UL_MANHI_12(&ld12) = manhi; + *UL_MANLO_12(&ld12) = manlo; + *U_XT_12(&ld12) = 0; + + /* multiply by 10^(-ir) */ + __multtenpow12(&ld12,-ir,1); + + /* if ld12 >= 1.0 then divide by 10.0 */ + if (*U_EXP_12(&ld12) >= 0x3fff) { + ir++; + __ld12mul(&ld12,&ld12_one_tenth); + } + + fos->exp = ir; + if (output_flags & SO_FFORMAT){ + /* 'f' format, add exponent to ndigits */ + ndigits += ir; + if (ndigits <= 0) { + /* return 0 */ + PUT_ZERO_FOS(fos); + return 1; + } + } + if (ndigits > MAX_MAN_DIGITS) + ndigits = MAX_MAN_DIGITS; + + ub_exp = *U_EXP_12(&ld12) - 0x3ffe; /* unbias exponent */ + *U_EXP_12(&ld12) = 0; + + /* + * Now the mantissa has to be converted to fixed point. + * Then we will use the MSB of ld12 for generating + * the decimal digits. The next 11 bytes will hold + * the mantissa (after it has been converted to + * fixed point). + */ + + for (i=0;i<8;i++) + __shl_12(&ld12); /* make space for an extra byte, + in case we shift right later */ + if (ub_exp < 0) { + int shift_count = (-ub_exp) & 0xff; + for (;shift_count>0;shift_count--) + __shr_12(&ld12); + } + + p = fos->man; + for(digcount=ndigits+1;digcount>0;digcount--) { + tmp12 = ld12; + __shl_12(&ld12); + __shl_12(&ld12); + __add_12(&ld12,&tmp12); + __shl_12(&ld12); /* ld12 *= 10 */ + + /* Now we have the first decimal digit in the msbyte of exponent */ + *p++ = (char) (*UCHAR_12(&ld12,11) + '0'); + *UCHAR_12(&ld12,11) = 0; + } + + round = *(--p); + p--; /* p points now to the last character of the string + excluding the rounding digit */ + if (round >= '5') { + /* look for a non-9 digit starting from the end of string */ + for (;p>=fos->man && *p=='9';p--) { + *p = '0'; + } + if (p < fos->man){ + p++; + fos->exp ++; + } + (*p)++; + } + else { + /* remove zeros */ + for (;p>=fos->man && *p=='0';p--); + if (p < fos->man) { + /* return 0 */ + PUT_ZERO_FOS(fos); + return 1; + } + } + fos->ManLen = (char) (p - fos->man + 1); + fos->man[fos->ManLen] = '\0'; + } + return retval; +} diff --git a/private/fp32/def.mak b/private/fp32/def.mak new file mode 100644 index 000000000..821dd4ddd --- /dev/null +++ b/private/fp32/def.mak @@ -0,0 +1,45 @@ +#### +#def.mak - definitions for makefiles +# +# Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +# +#Purpose: +# Build 32bit NT i386 libs in OMF format +# +#Revision History: +# 8-21-90 GDP +# 3-04-92 GDP Drop support for multiple source/target OS's & CPU's +# +################################################################################ +!include fp32.def + + +HCPU = i386 +HOS = OS2 +TCPU = i386 + +MATHDIR = .. + +CCINC = -I$(MATHDIR)\include +ASMINC = -I$(MATHDIR)\inc\$(TCPU) -I$(MATHDIR)\inc +TOOLDIR = $(MATHDIR)\tools.mak\$(HCPU)$(HOS) +OBJDIR = $(MATHDIR)\obj$(CRTLIBTYPE).mak\$(TCPU) +LIBDIR = $(OBJDIR) +NMAKE = $(MAKE) -f nmake.mak + +CCFLAGS = -c -nologo -W3 -Ox -Zl $(CCINC) -Di386 $(C_DEFINES) +ASMFLAGS = -t -Mx -X -DQUIET -DI386 -DFLAT386 -DWIN32 $(ASM_DEFINES) $(ASMINC) + +ASM = masm386 +CC = cl386 +LIBEXE = $(TOOLDIR)\lib + + +{.}.c{$(OBJDIR)}.obj: + $(CC) $(CCFLAGS) -Fo$@ $< + +{.\i386}.c{$(OBJDIR)}.obj: + $(CC) $(CCFLAGS) -Fo$@ $< + +{.\i386}.asm{$(OBJDIR)}.obj: + $(ASM) $(ASMFLAGS) $< $@; diff --git a/private/fp32/dirs b/private/fp32/dirs new file mode 100644 index 000000000..a62c866cf --- /dev/null +++ b/private/fp32/dirs @@ -0,0 +1,2 @@ +OPTIONAL_DIRS=conv \ + tran diff --git a/private/fp32/fp32.def b/private/fp32/fp32.def new file mode 100644 index 000000000..6923b8c9a --- /dev/null +++ b/private/fp32/fp32.def @@ -0,0 +1,78 @@ +#### +#fp32.def - definitions for the fp32 libs build +# +# Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +# +#Purpose: +# This file is included in the 'sources' files in this tree +# +# Key to FP32 environment variables: +# +# CRTLIBDEBUG = debug flag (define for debug libs) +# CRTLIBTYPE = [dll/mt/st], dll = dynlink +# mt = multithread, +# st = singlethread +# +#Revision History: +# 9-26-91 GDP +# 2-07-92 GDP DLL support (according to crt32.def) +# 3-04-92 GDP Enabled 386 optimizations +# 05-22-93 SRW Compile runtines with no debug info except globals. +# 06-03-93 SRW Okay to allow FPO now, as crt32\startup\mlock.c has +# been fixed to explicitly disable FPO for itself. +# 10-18-93 SRW Disable intrinsics on Alpha +# +################################################################################ + +NTDEBUG= + +NTLEGO=1 + +!IF "$(CRTLIBTYPE)" == "DLL" +TARGETNAMESUFFIX=dll +TARGETTYPE=LIBRARY +MTOPTION=-DMTHREAD -D_MT -DCRTDLL +C_DEFINES1=-D_WIN32_=1 -D_INTL +ASM_DEFINES1= +!ELSE +!IF "$(CRTLIBTYPE)" == "MT" +TARGETNAMESUFFIX=mt +TARGETTYPE=LIBRARY +MTOPTION=-DMTHREAD -D_MT +C_DEFINES1=-D_WIN32_=1 -D_INTL +ASM_DEFINES1= +!ELSE +!IF "$(CRTLIBTYPE)" == "ST" +TARGETNAMESUFFIX= +TARGETTYPE=LIBRARY +MTOPTION= +C_DEFINES1=-D_WIN32_=1 -D_INTL +ASM_DEFINES1= +!ELSE +!IF "$(CRTLIBTYPE)" == "NT" +TARGETNAMESUFFIX=nt +TARGETTYPE=LIBRARY +MTOPTION= +C_DEFINES1=-D_WIN32_=1 -D_NTSUBSET_=1 -D_INTL +ASM_DEFINES1=-D_WIN32_=1 -D_NTSUBSET_=1 +!ELSE +!ERROR Unsupported Library CRTLIBTYPE: $(CRTLIBTYPE) +!ENDIF +!ENDIF +!ENDIF +!ENDIF + +# _WIN32_ is needed in case cruntime.h is used + +!IF "$(CRTLIBDEBUG)" == "" +C_DEFINES=-DNDEBUG $(C_DEFINES1) $(MTOPTION) +ASM_DEFINES=$(ASM_DEFINES1) -DFLAT386 -DI386 $(MTOPTION) +!ELSE +C_DEFINES=$(C_DEFINES1) -DDEBUG=1 $(MTOPTION) +ASM_DEFINES=$(ASM_DEFINES1) -DFLAT386 -DDEBUG=1 -DI386 $(MTOPTION) +!ENDIF + +# Disable intrinsics on Alpha as it will not allow redefinition of intrinsics +ALPHA_OPTIMIZATION=/Ox /Oi- +# PPC too +PPC_OPTIMIZATION=/Ox /Oi- diff --git a/private/fp32/inc/i386/cmacros.inc b/private/fp32/inc/i386/cmacros.inc new file mode 100644 index 000000000..30c637269 --- /dev/null +++ b/private/fp32/inc/i386/cmacros.inc @@ -0,0 +1,1561 @@ +comment $ +SCCSID = "@(#)cmacros.mas:1.9" +cmacros - assembly macros for interfacing to hhls +(C)Copyright Microsoft Corp. 1984-1987 +$ +.xcref +.xcref ??_out +??_out macro t +ifndef ?QUIET +%out t +endif +endm +outif macro name,defval,onmsg,offmsg +ifndef name +ifb <defval> +name=0 +else +name=defval +endif +endif +if name +name=1 +ifnb <onmsg> +??_out <! onmsg> +endif +else +ifnb <offmsg> +??_out <! offmsg> +endif +endif +endm +.xcref ??error +??error macro msg +%out e r r o r ----- msg +.err e r r o r ----- msg +endm +.xcref ??error2 +??error2 macro msg +if2 +%out e r r o r ----- msg +.err e r r o r ----- msg +endif +endm +.xcref ASMpass +.xcref memS,memM,memL,memC,memH,memMOD,sizec,sized,memS32,sizeI,wordI +if1 +ASMpass=1 +ifdef ?SMALL +memS=1 +endif +ifdef ?MEDIUM +memM=1 +endif +ifdef ?COMPACT +memC=1 +endif +ifdef ?LARGE +memL=1 +endif +ifdef ?HUGE +memH=1 +endif +ifdef ?SMALL32 +memS32=1 +endif +??_out <cMacros Version 3.06f - 05/12/89> +??_out <Copyright (C) Microsoft Corp. 1984-1989. All rights reserved.> +outif memS,0,<Small Model> +outif memM,0,<Medium model> +outif memL,0,<Large Model> +outif memC,0,<Compact Model> +outif memH,0,<Huge Model> +outif memS32,0,<32 Bit Small Model> +memMOD= memS + memM + memL + memC + memH + memS32 +if memMOD ne 1 +if memMOD eq 0 +memS = 1 +outif memS,0,<Small model> +else +??error <must have only 1 memory model selected> +endif +endif +sizec= memM + memL + memH +sized= memL + memC + (memH*2) +if memS32 +sizeI = 4 +wordI equ <dword> +asmdI equ <dd> +else +sizeI = 2 +wordI equ <word> +asmdI equ <dw> +endif +outif ?DF,0,<No segments or groups will be defined> +outif ?DFDATA,0,<No data segments will be defined> +outif ?TF,0,<Epilog sequences assume valid SP> +outif ?WIN,1,<Windows support> +outif ?COW,0,<Characters Windows support> +outif ?PLM,1,<PL/M calling convention> +outif ?NOATOMIC,0,<ATOMIC disabled> +outif ?NODATA,0,<NODATA module> +outif ?FORCEFRAME,0,<Forced BP stack frame.> +outif ?RETFLAGS,0,<Epilog preserves flags.> +ife ?NODATA +?nodata1=0 +else +?nodata1=1 +endif +ifndef ?CHKSTK +?chkstk1=0 +else +?chkstk1=1 +ifdef ?CHKSTKPROC +??_out <! Private stack checking enabled> +else +??_out <! Stack checking enabled> +endif +endif +ifndef DOS5 +?DOS5=0 +else +?DOS5=1 +??_out <! DOS5 module> +endif +ifdef ?PROFILE +??_out <! Native profiling enabled> +endif +ifndef ?NO_BP +?no_bp1=0 +else +?no_bp1=1 +??_out <! NO_BP is default> +endif +else +ASMpass=2 +endif +if memS32 +.386 +IAX equ <eax> +ICX equ <ecx> +IDX equ <edx> +IBX equ <ebx> +ISP equ <esp> +IBP equ <ebp> +ISI equ <esi> +IDI equ <edi> +IPUSHF equ pushfd +IPOPF equ popfd +IPUSHA equ pushad +IPOPA equ popad +IIRET equ iretd +else +IAX equ <ax> +ICX equ <cx> +IDX equ <dx> +IBX equ <bx> +ISP equ <sp> +IBP equ <bp> +ISI equ <si> +IDI equ <di> +IPUSHF equ pushf +IPOPF equ popf +IIRET equ iret +endif +.xcref ?n,?ax,?ah,?al,?bx,?bh +.xcref ?bl,?cx,?ch,?cl,?dx,?dh +.xcref ?dl,?si,?di,?es,?ds,?bp +.xcref ?sp,?ss,?cs +.xcref ?n,?AX,?AH,?AL,?BX,?BH +.xcref ?BL,?CX,?CH,?CL,?DX,?DH +.xcref ?DL,?SI,?DI,?ES,?DS,?BP +.xcref ?SP,?SS,?CS +.xcref ?EAX,?EBX,?ECX,?EDX,?ESI,?EDI,?ESP,?EBP +.xcref ?eax,?ebx,?ecx,?edx,?esi,?edi,?esp,?ebp +.xcref ?IAX,?IBX,?ICX,?IDX,?ISI,?IDI,?ISP,?IBP +.xcref ?rsl,?cpd,?argl,?argc,?ba +.xcref ?acb,???,?po +.xcref ?pas,?pc +.xcref uconcat,mpush,mpop +.xcref ?ri,?pp,?pp1,?al1 +.xcref ?ad,?ap,?atal,?dd,?dd1,?dd2 +.xcref ?pg,?pg1,?aloc,?cs1,?cs2 +.xcref ?DF,?TF,?ff,?PLM,?WIN,?ia,?pu,?adj +.xcref ?uf,?rp,?nx,?nd,?nodata1,?chkstk1,?DOS5 +.xcref ?wfp,arg,cCall,cProc,assumes,?cs3,?cs2,?cs1 +.xcref defgrp,addseg,createSeg +.xcref save,outif,errnz,errn$,errnz1 +.xcref ?PLMPrevParm,?gcc +.xcref ?cCall1,?pcc,?no_bp1,?no_bp2 +.xcref ?cbe,?pcbe +?rsl = 0 +?cpd = 0 +?argl = 0 +?argc = 0 +?ba = 0 +?acb = 0 +??? = 0 +?po = 0 +?pas = 0 +?pc = 0 +?ia = 0 +?pu = 0 +?adj = 0 +?rp = 0 +?uf = 0 +?nd = 0 +?nx = 0 +?wfp = 0 +?ff = ?FORCEFRAME +?dd2 = 0 +?cCall1 = 0 +?pcc = ?PLM +?PLMPrevParm = 0 +?no_bp2 = ?no_bp1 +?cbe = 0 +.xcref ?casen +if1 +?casen = 0 +endif +?n = 0000000000000000b +?ax = 0000000000000011b +?ah = 0000000000000001b +?al = 0000000000000010b +?bx = 0000000000001100b +?bh = 0000000000000100b +?bl = 0000000000001000b +?cx = 0000000000110000b +?ch = 0000000000010000b +?cl = 0000000000100000b +?dx = 0000000011000000b +?dh = 0000000001000000b +?dl = 0000000010000000b +?si = 0000000100000000b +?di = 0000001000000000b +?es = 0000010000000000b +?ds = 0000100000000000b +?bp = 0001000000000000b +?sp = 0010000000000000b +?ss = 0100000000000000b +?cs = 1000000000000000b +?AX = 0000000000000011b +?AH = 0000000000000001b +?AL = 0000000000000010b +?BX = 0000000000001100b +?BH = 0000000000000100b +?BL = 0000000000001000b +?CX = 0000000000110000b +?CH = 0000000000010000b +?CL = 0000000000100000b +?DX = 0000000011000000b +?DH = 0000000001000000b +?DL = 0000000010000000b +?SI = 0000000100000000b +?DI = 0000001000000000b +?ES = 0000010000000000b +?DS = 0000100000000000b +?BP = 0001000000000000b +?SP = 0010000000000000b +?SS = 0100000000000000b +?CS = 1000000000000000b +?EAX = 0000000000000011b +?EBX = 0000000000001100b +?ECX = 0000000000110000b +?EDX = 0000000011000000b +?ESI = 0000000100000000b +?EDI = 0000001000000000b +?EBP = 0001000000000000b +?ESP = 0010000000000000b +?eax = 0000000000000011b +?ebx = 0000000000001100b +?ecx = 0000000000110000b +?edx = 0000000011000000b +?esi = 0000000100000000b +?edi = 0000001000000000b +?ebp = 0001000000000000b +?esp = 0010000000000000b +?IAX = 0000000000000011b +?IBX = 0000000000001100b +?ICX = 0000000000110000b +?IDX = 0000000011000000b +?ISI = 0000000100000000b +?IDI = 0000001000000000b +?IBP = 0001000000000000b +?ISP = 0010000000000000b +.cref +uconcat macro a,b,c,d,e,f,g +a&b c&d e&f&g +endm +mpush macro r +irp x,<IAX,IBX,ICX,IDX,ISI,IDI,es,ds,IBP,ISP,ss,cs> +if (r and ?&&x) + push x +endif +endm +endm +mpop macro r +irp x,<cs,ss,ISP,IBP,ds,es,IDI,ISI,IDX,ICX,IBX,IAX> +if (r and ?&&x) + pop x +endif +endm +endm +save macro r +?rsl=0 +?ri ?rsl,<r> +endm +?ri macro n,r +irp x,<r> +ifdef ?&&x +n=n or ?&&x +else +??error2 <unknown register x> +.err +endif +endm +endm +.xcref +.xcref parmB,parmW,parmD,parmQ,parmT,parmCP,parmDP,parmH,parmI +.cref +parmB macro n +?pp <n>,<byte>,sizeI,1 +endm +parmW macro n +?pp <n>,<word>,sizeI,2 +endm +parmI macro n +?pp <n>,wordI,sizeI,sizeI +endm +parmD macro n +ife ?pcc +irp x,<n> +?pp <&&x>,<dword>,0,4 +?pp <off_&&x>,<word>,2,2 +?pp <seg_&&x>,<word>,2,2 +endm +else +irp x,<n> +?pp <seg_&&x>,<word>,2,2 +?pp <off_&&x>,<word>,2,2 +?pp <&&x>,<dword>,0,4 +endm +endif +endm +parmH macro n +?pp <n>,<word>,4,2 +endm +parmQ macro n +?pp <n>,<qword>,8,8 +endm +parmT macro n +?pp <n>,<tbyte>,10,10 +endm +if sizec +parmCP macro n +parmD <n> +endm +else +parmCP macro n +parmW <n> +endm +endif +if sized +parmDP macro n +parmD <n> +endm +else +parmDP macro n +parmI <n> +endm +endif +?pp macro n,t,l,s +if ?cpd +.xcref +irp x,<n> +.xcref ?t&&x +?t&&x=s +ife ?pcc +?pp1 x,<t>,,,%(?po+?adj) +?po=?po+l +else +?PLMPrevParm=?PLMPrevParm+1 +?po=?po+l +?pp1 x,<t>,%?po,%?adj,,%?PLMPrevParm,%(?PLMPrevParm-1) +endif +endm +.cref +else +??error2 <parm(s) "&n" declared outside proc def> +endif +endm +?pp1 macro n,t,o,a,b,cpc,ppc +ife ?pcc +if ?no_bp2 +n equ (t ptr [+b]) +else +n equ (t ptr [IBP][+b]) +endif +else +.xcref +.xcref ?PLMParm&cpc +.cref +if ?no_bp2 +?PLMParm&cpc ¯o po +uconcat <n>,,<equ>,,<(t ptr [+>,%(a+po-o),<])> +?PLMParm&ppc po +purge ?PLMParm&cpc +&endm +else +?PLMParm&cpc ¯o po +uconcat <n>,,<equ>,,<(t ptr [IBP][+>,%(a+po-o),<])> +?PLMParm&ppc po +purge ?PLMParm&cpc +&endm +endif +endif +endm +ifndef ?NOPARMR +.xcref +.xcref ?pr,parmR +.cref +parmR macro n,r,r2 +?pr n,r,r2,%?rp,%(?ia+2) +endm +?pr macro n,r,r2,i,o +.xcref +ifnb <r2> +parmR seg_&n,r +parmR off_&n,r2 +if ?no_bp2 +n equ (dword ptr [-o-2]) +else +n equ (dword ptr [bp][-o-2]) +endif +.xcref ?t&n +?t&n=4 +else +.xcref ?rp&i +?rp&i=0 +ifdef ?&r +?rp&i=?&r +endif +if ??? or (?cpd eq 0) or (?rp&i eq 0) +??error2 <invalid parmR encountered: &n,&r> +exitm +endif +if ?no_bp2 +n equ (word ptr [-o]) +else +n equ (word ptr [bp][-o]) +endif +.xcref ?t&n +?t&n=2 +irp x,<bh,ch,dh,bl,cl,dl,ah,al> +if ?&&x eq ?&r +if ?no_bp2 +n equ (byte ptr [-o]) +else +n equ (byte ptr [bp][-o]) +endif +?t&n=1 +exitm +endif +endm +?ia=?ia+2 +?rp=?rp+1 +endif +.cref +endm +endif +.xcref +.xcref localB,localW,localD,localQ,localT,localCP,localDP,localV,localI +.cref +localB macro n +?aloc <n>,<byte ptr>,1,1,0 +endm +localW macro n +?aloc <n>,<word ptr>,2,2,1 +endm +localI macro n +?aloc <n>,&wordI&< ptr>,sizeI,sizeI,1 +endm +localD macro n +irp x,<n> +?aloc <seg_&&x>,<word ptr>,2,2,1 +?aloc <off_&&x>,<word ptr>,2,2,1 +?aloc <&&x>,<dword ptr>,0,4,1 +endm +endm +localQ macro n +?aloc <n>,<qword ptr>,8,8,1 +endm +localT macro n +?aloc <n>,<tbyte ptr>,10,10,1 +endm +if sizec +localCP macro n +localD <n> +endm +else +localCP macro n +localW <n> +endm +endif +if sized +localDP macro n +localD <n> +endm +else +localDP macro n +localI <n> +endm +endif +localV macro n,a +?aloc <n>,,%(a),0,1 +endm +?aloc macro n,t,l,s,a +if ?cpd +.xcref +irp x,<n> +???=???+l +if a +if memS32 and l GT 2 +???=((??? + 3) and 0fffffffch) +else +???=((??? + 1) and 0fffeh) +endif +endif +?al1 x,<t>,%(???+?ia) +.xcref ?t&&x +?t&&x=s +endm +.cref +else +??error2 <locals "&n" declared outside procedure def> +endif +endm +?al1 macro n,t,o +if ?no_bp2 +n equ (t [-o]) +else +n equ (t [IBP][-o]) +endif +endm +?gcc macro s,i,cc +s = i +ifnb <cc> +ifidn <cc>,<C> +s=0 +endif +ifidn <cc>,<PLM> +s=1 +endif +ifidn <cc>,<PASCAL> +s=1 +endif +endif +endm +ifndef ?NOGLOBAL +.xcref +.xcref globalB,globalW,globalD,globalQ,globalT,globalCP,globalDP,globalI +.cref +globalB macro n,i,s,c +?ad <n>,1 +?dd n,1,<byte>,<db>,<i>,<s>,<c> +endm +globalW macro n,i,s,c +?ad <n>,2 +?dd n,1,<word>,<dw>,<i>,<s>,<c> +endm +globalI macro n,i,s,c +?ad <n>,2 +?dd n,1,wordI,%asmdI,<i>,<s>,<c> +endm +globalD macro n,i,s,c +?ad <n>,4 +?dd n,1,<dword>,<dd>,<i>,<s>,<c> +off_&n equ word ptr n[0] +seg_&n equ word ptr n[2] +endm +globalQ macro n,i,s,c +?ad <n>,8 +?dd n,1,<qword>,<dq>,<i>,<s>,<c> +endm +globalT macro n,i,s,c +?ad <n>,10 +?dd n,1,<tbyte>,<dt>,<i>,<s>,<c> +endm +if sizec +globalCP macro n,i,s,c +globalD n,<i>,<s>,<c> +endm +else +globalCP macro n,i,s,c +globalW n,<i>,<s>,<c> +endm +endif +if sized +globalDP macro n,i,s,c +globalD n,<i>,<s>,<c> +endm +else +globalDP macro n,i,s,c +globalI n,<i>,<s>,<c> +endm +endif +endif +ifndef ?NOSTATIC +.xcref +.xcref staticB,staticW,staticD,staticQ,staticT,staticCP,staticDP,staticI +.cref +staticB macro n,i,s +?ad <n>,1 +?dd n,0,<byte>,<db>,<i>,<s>,<PLM> +endm +staticW macro n,i,s +?ad <n>,2 +?dd n,0,<word>,<dw>,<i>,<s>,<PLM> +endm +staticD macro n,i,s +?ad <n>,4 +?dd n,0,<dword>,<dd>,<i>,<s>,<PLM> +endm +staticI macro n,i,s +?ad <n>,sizeI +?dd n,0,wordI,%asmdI,<i>,<s>,<PLM> +endm +staticQ macro n,i,s +?ad <n>,8 +?dd n,0,<qword>,<dq>,<i>,<s>,<PLM> +endm +staticT macro n,i,s +?ad <n>,10 +?dd n,0,<tbyte>,<dt>,<i>,<s>,<PLM> +endm +if sizec +staticCP macro n,i,s +staticD n,<i>,<s> +endm +else +staticCP macro n,i,s +staticI n,<i>,<s> +endm +endif +if sized +staticDP macro n,i,s +staticD n,<i>,<s> +endm +else +staticDP macro n,i,s +staticI n,<i>,<s> +endm +endif +endif +?dd macro n,p,t,d,i,s,c +?gcc ?dd2,%?PLM,<c> +ife ?dd2 +n label t +?dd1 _&n,p,<d>,<i>,<s> +else +?dd1 n,p,<d>,<i>,<s> +endif +endm +?dd1 macro n,p,d,i,s +if p +public n +endif +ifb <s> +n d i +else +ifb <i> +n d s dup (?) +else +n d s dup (i) +endif +endif +endm +ifndef ?NOEXTERN +.xcref +.xcref ?ex1,?ex2,externB,externW,externI,externD,externQ,externT +.xcref externNP,externFP,externP,externCP,externDP,externA +.cref +?ex2 = 0 +externA macro n,c +?ex1 <n>,40h,<abs>,<c>,<> +endm +externB macro n,c +?ex1 <n>,1,<byte>,<c>,<> +endm +externW macro n,c +?ex1 <n>,2,<word>,<c>,<> +endm +externI macro n,c +?ex1 <n>,sizeI,<wordI>,<c>,<> +endm +externD macro n,c +?ex1 <n>,4,<dword>,<c>,<> +endm +externQ macro n,c +?ex1 <n>,8,<qword>,<c>,<> +endm +externT macro n,c +?ex1 <n>,10,<tbyte>,<c>,<> +endm +externNP macro n,c +?ex1 <n>,2,<near>,<c>,<cc> +endm +externFP macro n,c +?ex1 <n>,4,<far>,<c>,<cc> +endm +if sizec +externP macro n,c +?ex1 <n>,4,<far>,<c>,<cc> +endm +else +externP macro n,c +?ex1 <n>,2,<near>,<c>,<cc> +endm +endif +if sizec +externCP macro n,c +?ex1 <n>,4,<dword>,<c>,<> +endm +else +externCP macro n,c +?ex1 <n>,2,<word>,<c>,<> +endm +endif +if sized +externDP macro n,c +?ex1 <n>,4,<dword>,<c>,<> +endm +else +externDP macro n,c +?ex1 <n>,2,<word>,<c>,<> +endm +endif +?ex1 macro n,s,d,c,scv +?gcc ?ex2,%?PLM,<c> +irp x,<n> +.xcref +.xcref ?t&&x +.cref +?t&&x=s +ife ?ex2 +extrn _&&x:&d +x equ _&&x +else +extrn x:&d +endif +ifidn <scv>,<cc> +.xcref +.xcref ?CC&&x +.cref +?CC&&x=?ex2 +endif +endm +endm +endif +ifndef ?NOLABEL +.xcref +.xcref ?lb1,?lblpu,?lb2 +.xcref labelB,labelW,labelD,labelQ,labelT +.xcref labelNP,labelFP,labelP,labelCP,labelDP +.cref +?lblpu = 0 +?lb2 = 0 +labelB macro n,c +?lb1 <n>,1,<byte>,<c> +endm +labelW macro n,c +?lb1 <n>,2,<word>,<c> +endm +labelD macro n,c +?lb1 <n>,4,<dword>,<c> +endm +labelQ macro n,c +?lb1 <n>,8,<qword>,<c> +endm +labelT macro n,c +?lb1 <n>,10,<tbyte>,<c> +endm +labelNP macro n,c +?lb1 <n>,2,<near>,<c> +endm +labelFP macro n,c +?lb1 <n>,4,<far>,<c> +endm +if sizec +labelP macro n,c +?lb1 <n>,4,<far>,<c> +endm +else +labelP macro n,c +?lb1 <n>,2,<near>,<c> +endm +endif +if sizec +labelCP macro n,c +?lb1 <n>,4,<dword>,<c> +endm +else +labelCP macro n,c +?lb1 <n>,2,<word>,<c> +endm +endif +if sized +labelDP macro n,c +?lb1 <n>,4,<dword>,<c> +endm +else +labelDP macro n,c +?lb1 <n>,2,<word>,<c> +endm +endif +?lb1 macro n,s,d,c +?gcc ?lb2,%?PLM,<c> +?lblpu=0 +irp x,<n> +ifidn <x>,<PUBLIC> +?lblpu=1 +else +.xcref +.xcref ?t&&x +.cref +?t&&x=s +ife ?lb2 +if ?lblpu +public _&&x +endif +_&&x label &d +x equ _&&x +else +if ?lblpu +public x +endif +x label &d +endif +endif +endm +endm +endif +ifndef ?NODEF +.xcref +.xcref defB,defW,defD,defQ,defT,defCP,defDP +.cref +defB macro n +?ad <n>,1 +endm +defW macro n +?ad <n>,2 +endm +defD macro n +?ad <n>,4 +endm +defQ macro n +?ad <n>,8 +endm +defT macro n +?ad <n>,10 +endm +if sizec +defCP macro n +defD <n> +endm +else +defCP macro n +defW <n> +endm +endif +if sized +defDP macro n +defD <n> +endm +else +defDP macro n +defW <n> +endm +endif +endif +?ad macro n,s +irp x,<n> +.xcref +.xcref ?t&&x +.cref +?t&&x=s +endm +endm +ifndef ?NOPTR +.xcref +.xcref regPtr,farPtr +.cref +regPtr macro n,s,o +farPtr n,s,o +endm +farPtr macro n,s,o +.xcref +.xcref ?t&n +.cref +n ¯o + push s + push o +&endm +?t&n=80h +endm +endif +arg macro a +irp x,<a> +?argc=?argc+1 +?atal <x>,%?argc +endm +endm +?atal macro n,i +.xcref +.xcref ?ali&i +.cref +?ali&i ¯o +?ap <n> +&endm +endm +?ap macro n +?argl=?argl+2 +ifdef ?t&n +ife ?t&n-1 + push word ptr (n) +exitm +endif +ife ?t&n-2 + push n +exitm +endif +ife ?t&n-4 + push word ptr (n)[2] + push word ptr (n) +?argl=?argl+2 +exitm +endif +ife ?t&n-8 + push word ptr (n)[6] + push word ptr (n)[4] + push word ptr (n)[2] + push word ptr (n) +?argl=?argl+6 +exitm +endif +if ?t&n and 80h +n +?argl=?argl+2 +exitm +endif +ife ?t&n + push word ptr (n) +exitm +endif +endif + push n +endm +cCall macro n,a,c +ifnb <a> +arg <a> +endif +mpush %?rsl +ifdef ?CC&n +?cCall1=?CC&n +else +?cCall1=?PLM +endif +ifnb <c> +?gcc ?cCall1,%?cCall1,<c> +endif +?argl=0 +ife ?cCall1 +?acb=?argc +else +?acb=1 +endif +rept ?argc +uconcat <?ali>,%?acb +uconcat <purge>,,<?ali>,%?acb +ife ?cCall1 +?acb=?acb-1 +else +?acb=?acb+1 +endif +endm + call n +if ((?cCall1 eq 0) and (?argl ne 0)) + add sp,?argl +endif +mpop %?rsl +?rsl=0 +?argc= 0 +?argl= 0 +endm +cProc macro n,cf,a +if ?cpd +?utpe +endif +?cpd=1 +???=0 +?argc=0 +?ba=0 +?po=0 +?pu=0 +?ia=0 +?adj=2*sizeI +?rp=0 +?uf=0 +?wfp=?WIN +?ff=?FORCEFRAME +?pas=0 +?pcc=?PLM +?no_bp2=?no_bp1 +ifnb <a> +?ri ?pas,<a> +endif +?pc=sizec +?nd=?nodata1 +?nx=0 +irp x,<cf> +ifdef ??_cproc_&&x +??_cproc_&&x +else +??error2 <e r r o r - unknown keyword x> +.err +endif +endm +if ?pcc +?PLMPrevParm=0 +.xcref +.xcref ?PLMParm0 +.cref +?PLMParm0 ¯o +purge ?PLMParm0 +&endm +endif +.xcref +.xcref ?CC&n +.cref +?CC&n=?pcc +if (?nx eq 1) and (?nd eq 0) +?nx = 0 +??error2 <ATOMIC specified without NODATA - ATOMIC ignored> +endif +if ?pc +if ?wfp +ife ?nx +ife ?COW +?ia=2 +?pas = ?pas and (not ?ds) +endif +endif +endif +?adj=?adj+sizeI +else +?wfp=0 +endif +?pas = ?pas and (not (?sp+?cs+?ss)) +if ?uf +?pas = ?pas and (not (?bp+?si+?di)) +endif +ife ?pcc +?pg <_&n>,%?pu,%?pc,%?pas,%?wfp,<n>,%?pcc +else +?pg <n>,%?pu,%?pc,%?pas,%?wfp,<n>,%?pcc +endif +endm +?pg macro n,p,c,a,w,nnu,cc +.xcref +if ?uf +if ?nd +??error2 <NODATA encountered in &n - user frame ignored> +?uf=0 +endif +endif +.xcref cBegin +cBegin ¯o g +.xcref +if cc +uconcat <?PLMParm>,%?PLMPrevParm,%?po +endif +if ?uf +if ?rp +??error2 <parmR encountered in &n - user frame ignored> +?uf=0 +endif +endif +?pg1 <n>,c,a,%?po,w,%?uf,%?nd,%?rp,cc,<nnu>,%??? +?cpd=0 +?argc=0 +?ba=1 +???=(???+1) and 0fffeh +if p +public n +endif +ife c +n proc near +else +n proc far +endif +ife cc +nnu equ n +endif +?cbe = 0 +ifnb <g> +?pcbe <g>,<nnu>,<cBegin> +endif +if ?cbe eq 1 +if ???+a+?rp +if2 +??_out <cBegin - nogen> +endif +endif +else +if ?uf +?mf c,%???,%?po +mpush a +else +ife ?cbe +if w +ife ?nd + mov IAX,ds + nop +endif +ife ?nx +ife ?DOS5 + inc IBP +endif + push IBP + mov IBP,ISP +ife ?COW + push ds +endif +else +if ?ff+???+?po+?rp + push IBP + mov IBP,ISP +endif +endif +ife ?nd + mov ds,IAX +endif +else +if ?ff+???+?po+?rp + push IBP + mov IBP,ISP +endif +endif +if ?rp +?uf=0 +rept ?rp +uconcat mpush,,?rp,%?uf +?uf=?uf+1 +endm +endif +if ??? +if ?chkstk1 +ifdef ?CHKSTKPROC +?CHKSTKPROC %??? +else + mov IAX,??? +ife cc + call _chkstk +else + call chkstk +endif +endif +else + sub ISP,??? +endif +endif +else +if ?rp +??error2 <cBegin - parmRs encountered with partial frame> +else +if ??? + lea ISP,[IBP][-???-?ia] +endif +endif +endif +mpush a +endif +ifdef ?PROFILE +if c + call StartNMeas +endif +endif +endif +.cref +purge cBegin +&endm +.xcref ?utpe +?utpe ¯o +??error2 <unterminated procedure definition: "&n"> +&endm +.cref +endm +?pg1 macro n,c,a,o,w,f,d,r,cc,nnu,lc +.xcref +.xcref cEnd +parm_bytes_&nnu = o +cEnd ¯o g +.xcref +?ba=0 +?cbe = 0 +ifnb <g> +?pcbe <g>,<nnu>,<cEnd> +endif +if ?cbe eq 1 +if a+r+lc +if2 +??_out <cEnd - nogen> +endif +endif +else +ifdef ?PROFILE +if c +call StopNMeas +endif +endif +mpop a +if f + db 0c3h +else +if w +ife ?nx +ife ?COW +if (?TF eq 0) or (???+r) + lea sp,-2[bp] +endif + pop ds +else +if (?TF eq 0) or (???+r) +mov sp,bp +endif +endif + pop IBP +ife ?DOS5 +if ?RETFLAGS +lea IBP,[IBP-1] +else + dec IBP +endif +endif +else +if memS32 +leave +else +if (?TF eq 0) or (???+r) + mov ISP,IBP +endif +if ???+?po+r + pop IBP +endif +endif +endif +else +if ?ff+???+?po+r +if (?TF eq 0) or (???+r) + mov ISP,IBP +endif + pop IBP +endif +endif +if ?cbe ne 4 +ife cc + ret +else + ret o +endif +endif +endif +endif +if ?cbe ne 4 +n endp +.cref +purge cEnd +else +.cref +endif +&endm +.cref +endm +.xcref +.xcref cleanframe +cleanframe macro +cEnd <??cleanframe??> +endm +.cref +.xcref +.xcref ??_cproc_FAR,??_cproc_NEAR,??_cproc_PUBLIC,??_cproc_SMALL +.xcref ??_cproc_DATA,??_cproc_NODATA,??_cproc_ATOMIC,??_cproc_C +.xcref ??_cproc_PLM,??_cproc_PASCAL,??_cproc_WIN,??_cproc_NONWIN +.xcref ??_cproc_NO_BP,??_cproc_BP +.xcref ??_cproc_far,??_cproc_near,??_cproc_public,??_cproc_small +.xcref ??_cproc_data,??_cproc_nodata,??_cproc_atomic,??_cproc_c +.xcref ??_cproc_plm,??_cproc_pascal,??_cproc_win,??_cproc_nonwin +.xcref ??_cproc_no_bp,??_cproc_bp +.cref +??_cproc_FAR macro +?pc=1 +endm +??_cproc_NEAR macro +?pc=0 +endm +??_cproc_PUBLIC macro +?pu=1 +endm +??_cproc_SMALL macro +?uf=1 +endm +??_cproc_DATA macro +?nd=0 +endm +??_cproc_NODATA macro +?nd=1 +endm +??_cproc_ATOMIC macro +?nx=1 +endm +??_cproc_C macro +?pcc=0 +endm +??_cproc_PLM macro +?pcc=1 +endm +??_cproc_PASCAL macro +?pcc=1 +endm +??_cproc_WIN macro +?wfp=1 +endm +??_cproc_NONWIN macro +?wfp=0 +endm +??_cproc_NO_BP macro +?no_bp2=1 +endm +??_cproc_BP macro +?no_bp2=0 +endm +??_cproc_far macro +?pc=1 +endm +??_cproc_near macro +?pc=0 +endm +??_cproc_public macro +?pu=1 +endm +??_cproc_small macro +?uf=1 +endm +??_cproc_data macro +?nd=0 +endm +??_cproc_nodata macro +?nd=1 +endm +??_cproc_atomic macro +?nx=1 +endm +??_cproc_c macro +?pcc=0 +endm +??_cproc_plm macro +?pcc=1 +endm +??_cproc_pascal macro +?pcc=1 +endm +??_cproc_win macro +?wfp=1 +endm +??_cproc_nonwin macro +?wfp=0 +endm +??_cproc_no_bp macro +?no_bp2=1 +endm +??_cproc_bp macro +?no_bp2=0 +endm +??_cproc_FORCEFRAME macro +?ff=1 +endm +?pcbe macro g,nnu,mt +ifidn <g>,<NOGEN> +?cbe = 1 +else +ifidn <g>,<nogen> +?cbe = 1 +else +ifidn <mt>,<cBegin> +ifidn <g>,<PARTIAL> +?cbe = 2 +else +ifidn <g>,<partial> +?cbe = 2 +else +ifdif <g>,<nnu> +??error2 <mt - unknown keyword g> +endif +endif +endif +else +ifidn <g>,<??cleanframe??> +?cbe = 4 +else +ifdif <g>,<nnu> +??error2 <mt - unknown keyword g> +endif +endif +endif +endif +endif +endm +assumes macro s,ln +ifndef ln&_assumes +assume s:ln +else +ln&_assumes s +endif +endm +createSeg macro n,ln,a,co,cl,grp +ifnb <cl> +n segment a co '&cl' +else +n segment a co +endif +n ends +ifnb <grp> +addseg grp,n +else +ln&OFFSET equ offset n: +ln&BASE equ n +?cs3 <ln>,<n> +endif +?cs1 <ln>,<n> +endm +addseg macro grp,seg +.xcref +.xcref grp&_def +.cref +ifndef grp&_def +grp&_def=0 +endif +if grp&_def ne ASMpass +.xcref +.xcref grp&_add +.cref +grp&_add ¯o s +grp&_in <seg>,s +&endm +.xcref +.xcref grp&_in +.cref +grp&_in ¯o sl,s +ifb <s> +grp group sl +else +grp&_add ¯o ns +grp&_in <sl,s>,ns +&endm +endif +&endm +grp&_def=ASMpass +else +grp&_add seg +endif +endm +defgrp macro grp,ln +addseg grp +ifnb <ln> +irp x,<ln> +?cs3 <&x>,<grp> +x&&OFFSET equ offset grp: +x&&BASE equ grp +endm +endif +endm +?cs1 macro ln,n +.xcref +.xcref ln&_sbegin +.cref +ln&_sbegin ¯o +.xcref +.xcref ?mf +.cref +?mf &¯o c,l,p +if c + extrn n&_FARFRAME:near + call n&_FARFRAME +else + extrn n&_NEARFRAME:near + call n&_NEARFRAME +endif + db l shr 1 + db p shr 1 +&&endm +?cs2 <ln>,<n> +n segment +&endm +endm +?cs2 macro ln,n +.xcref +.xcref sEnd +.cref +sEnd ¯o +n ends +purge ?mf +purge sEnd +&endm +endm +?cs3 macro ln,n +.xcref +.xcref ln&_assumes +.cref +ln&_assumes ¯o s +assume s:&n +&endm +endm +.xcref +.xcref sBegin +.cref +sBegin macro ln +ln&_sbegin +endm +ife ?DF +createSeg _TEXT,CODE,%wordI,public,CODE +ife ?nodata1 +createSeg _DATA,DATA,%wordI,public,DATA,DGROUP +defgrp DGROUP,DATA +else +ife ?DFDATA +createSeg _DATA,DATA,%wordI,public,DATA,DGROUP +defgrp DGROUP,DATA +endif +endif +if ?chkstk1 +ifndef ?CHKSTKPROC +externp <chkstk> +endif +endif +endif +errnz macro x +if2 +if x +errnz1 <x>,%(x) +endif +endif +endm +errnz1 macro x1,x2 += *errnz* x1 = x2 +.err +endm +errn$ macro l,x +errnz <offset $ - offset l x> +endm +ifdef ?PROFILE +externFP <StartNMeas,StopNMeas> +endif diff --git a/private/fp32/inc/i386/convert.inc b/private/fp32/inc/i386/convert.inc new file mode 100644 index 000000000..0cd0e5085 --- /dev/null +++ b/private/fp32/inc/i386/convert.inc @@ -0,0 +1,59 @@ + page ,132 +;*** +;convert.inc - macros and defines for floating point/ascii convertion routines +; +; Copyright (c) 1989-1989, Microsoft Corporation. All rights reserved. +; +;Purpose: +; Macros and defines for floating point/ascii convertion routines +; +;Revision History: +; +; 04/17/89 WAJ Initial version. Assembeler version of convert.h +; 04/20/89 WAJ Now defines constants for $i10_output() OutputFlags +; +;******************************************************************************* + + + +;******************************************************************************* +; +; Constants +; +;******************************************************************************* + + +MAX_MAN_DIGITS equ 21 ; Largest number of decimal digits returned + + +;* +;* Flags returned by __StrToLd() +;* + +SLD_UNDERFLOW equ 1 ; Underflow occurred. Zero returned. +SLD_OVERFLOW equ 2 ; Overflow occured. Infinity returned. +SLD_NODIGITS equ 4 ; No digits were found. Zero returned. + + +;* +;* Output flags for $i10_output() +;* + +SO_FFORMAT equ 1 ; 'f' format. Use precision as number of digits to right of decimal. + + + + +;******************************************************************************* +; +; Structures +; +;******************************************************************************* + + +FloatOutStruct struc + FloatExp dw ? + FloatSign db ? + FloatStrLen db ? + FloatStr db (MAX_MAN_DIGITS + 1) dup(?) +FloatOutStruct ends diff --git a/private/fp32/inc/i386/cruntime.inc b/private/fp32/inc/i386/cruntime.inc new file mode 100644 index 000000000..704e6b8c5 --- /dev/null +++ b/private/fp32/inc/i386/cruntime.inc @@ -0,0 +1,825 @@ +;*** +;cruntime.inc - multi-model assembly macros for interfacing to HLLs +; +; Copyright (c) 1988, Microsoft Corporation. All rights reserved. +; +;Purpose: +; This file defines the current memory model being used. +; +;Revision History: +; 08-04-88 SJM Initial version to handle all four memory models +; in 16-bit mode and small model in 32-bit mode +; 08-08-88 JCR Added CPDIST, ?WIN, PCS, ISHIFT/LSHIFT, OS2, +; DNPTR/DFPTR, DFLOAT/DDOUBLE/DLDOUBLE +; 08-16-88 PHG Added FPES, LFPES, CBI, ZXAL, ZXBL, ZXCL, ZXDL +; 08-17-88 JCR Added CAXDX, modified FPSIZE +; 08-20-88 PHG Added diagnostic messages, removed 386 16-bit support +; and 386 large code/data support, added mucho comments, +; PSS now defined as es: only if SS_NEQ_GROUP defined +; 08-24-88 JCR Added RBXSAVE and RBXONLY for use in 'proc uses' +; 08-25-88 JCR Added savereg macro, removed rbxsave/rbxonly... +; 08-26-88 GJF Added codeseg (text) macro +; 09-15-88 JCR Corrected savelist/reglist macro to go with new MASM +; 09-21-88 WAJ Added JS*, static*, global*, and label*, and lab macros +; 09-22-88 JCR Change 'plm' to 'pascal' in label macro +; 09-26-88 WAJ Added PUSH16 which will do a 16 bit push in a USE32 seg. +; 09-28-88 WAJ Added CPWORD and DPWORD +; 09-29-88 WAJ Added JMPFAR16 macro +; 10-12-88 JCR Made PCS evaluate to 'cs:' for 16/32 stub testbed +; 04-24-89 JCR Added 'assume seg:flat' for 386 to avoid masm/link bug +; 05-25-89 GJF Added APIEXT, a macro that expands to the proper extrn +; declaration for an API function +; 06-13-89 JCR Added 'flat:' to DCPTR and DDPTR +; 09-15-89 JCR Added DCPTR? and DDPTR?, always use "FLAT" not "flat" +; +;******************************************************************************* + +;============================================================================== +; +;Use the following defines to control processor/segment model +; +; -DI86 8086/8088 processor +; -DI286 80286 processor +; -DI386 80386 processor with 32-bit code/data segment +; +; -Dmem_S Small memory model (near code, near data) +; -Dmem_M Medium memory model (far code, near data) +; -Dmem_C Compact memory model (near code, fat data) +; -Dmem_L Large memory model (far code, far data) +; +; -DSS_NEQ_DGROUP SS and DS point to different segments +; +; default is -DI86 -Dmem_S +; +;============================================================================== +; +;The following variables are defined by this file: +; cpu 86, 286, or 386 +; sizeC code distance; 1 = far code, 0 = near code +; sizeD data distance; 1 = far data, 0 = near data +; mmodel english name of the memory model, i.e. "Medium" +; ISIZE, LSIZE, NSIZE size of ints, longs, shorts +; FLTSIZE, DBLSIZE, LDBLSIZE size of float, double, long double +; NPSIZE, FPSIZE size of near/far pointers +; CPSIZE, DPSIZE size of code/data pointers +; ISHIFT, LSHIFT bits to shift to convert byte to int/long +; +;The following macros allow easy writing of combined 16/32 bit code: +; +; 16/32 bit registers: +; rax, rbx, rcx, rdx, expand to native registers (rax = eax or ax) +; rsi, rdi, rsp, rbp +; 16/32 bit register instructions: +; JRCXZ jump when rcx is zero +; CBI convert byte to int (al to rax) +; CAXDX convert rax to rax:rdx +; ZXAL, ZXBL, ZXCL, ZXDL zero extend al,bl,cl,dl to rax,rbx,rcx,rdx +; Pointer instructions: +; LPDS, LPES load data pointer with ES or DS +; PDS, PES segment overrides when pointer loaded as above +; PCS, PSS segment override to get at code/stack segment +; LFPDS, LFPES load far pointer with ES or DS +; FPDS, FPES segment overrides when pointer loaded as above +; CPTR data type of code pointer +; CPDIST distance of code (near/far) +; DNPTR, DFPTR define near/far pointer +; DCPTR, DDPTR define code/data pointer +; DCPTR?, DDPTR? define uninitialized code/data pointer +; CPWORD, DPWORD data type of code or data pointer +; Numeric type instructions: +; IWORD, LWORD, SWORD data type of int, long, short +; DINT, DLONG, DSHORT define int, long, short +; DFLOAT, DDOUBLE, DLDOUBLE define float, double, long double +; Offsets: +; codeoffset, dataoffset offsets from code and data segments +; API calls: +; APIDIST distance of API calls (near/far) +; APIEXT ApiName extrn declaration for an API function +; +;The following utility macros are provided: +; codeseg define/declare code segment +; error <msg> stop assembly with message +; display <msg> display a message, unless QUIET defined +; savelist [<reg> ...] init list of regs to be save by 'proc uses' +; _if cond <instruction> assemble instruction only if cond is TRUE +; _ife cond <instruction> assemble instruction only if cond is FALSE +; _ifd symbol <instruction> assemble instruction only if symbol defined +; _ifnd symbol <instruction> assemble instruction only if symbol not defined +; +; lab LabelName assembles to "LabelName:" If DEBUG is defined +; LabelName is made public +; +; JS* (ex. JSE,JSZ,JSB ...) assemble to "je short","jz short","jb short" +; +; Cmacro look alikes +; static* Name, InitialValue, Repeat defines a static variable of type * +; global* Name, InitialValue, Repeat defines a global variable of type * +; label* Name, {PUBLIC,PASCAL,C} defines a label of type * +; +; PUSH16 SegmentReg pushes 16 bits in a use32 segment +; JMPFAR16 label will do a far 16:16 jmp from a use32 segment +; +;============================================================================== + +; error <msg> - Output message and generate error + +error MACRO msg +if2 ;; only on pass 2 can we generate errors + %out ********************************************************** + %out *** E r r o r -- msg + %out ********************************************************** + .err +endif + ENDM + +; display msg - Output message unless QUIET defined + +display MACRO msg +ifndef QUIET ;; only when quiet flag not set +if1 ;; and on pass 1, then display message + %out msg +endif +endif + ENDM + +; One line conditionals: +; here we create the capability of writing code lines like +; +; _if sizeD <push ds> as opposed to if sizeD +; push ds +; endif + +_if MACRO cond,text + if cond + text + endif + ENDM + +_ife MACRO cond,text + ife cond + text + endif + ENDM + +_ifd MACRO cond,text + ifdef cond + text + endif + ENDM + +_ifnd MACRO cond,text + ifndef cond + text + endif + ENDM + +; set windows flag to 0 + + ?WIN equ 0 ; disable windows-specific code + +; check for MTHREAD, requires 286 or greater processor + +ifdef MTHREAD +ifndef I386 +ifndef I286 +; MTHREAD implies 286 processor +display <Multi-thread specified - assuming 80286 processor> +I286 equ <> +endif +endif +endif + +; Process memory-model arguments + +ifdef mem_M + ; Medium model + sizeC equ 1 + sizeD equ 0 + mmodel equ <Medium> +elseifdef mem_C + ; Compact model + sizeC equ 0 + sizeD equ 1 + mmodel equ <Compact> +elseifdef mem_L + ; Large model + sizeC equ 1 + sizeD equ 1 + mmodel equ <Large> +else + ; Small model - default + sizeC equ 0 + sizeD equ 0 + mmodel equ <Small> +endif + +; Process processor arguments + +ifdef I286 + display <Processor: 80286> + cpu equ 286 + .286 +elseifdef I386 + display <Processor: 80386> + cpu equ 386 + OS2 equ <> ; Define "OS2" since 386 can only run on that OS + .386 +else + display <Processor: 8086/8088> + cpu equ 86 + .8086 +endif + +; 386 32-bit checking. Currently we are only expecting small model +; 32 bit segments, so we make a few checks to be sure nothing is +; incorrectly being defined. + +ifdef I386 + if sizeC or sizeD + error <Must use Small memory model for 386 version.> + endif + + ifdef _LOAD_DGROUP + error <No loading DGROUP in 386 version.> + endif + + ifdef SS_NEQ_DGROUP + error <SS always equals DGROUP in 386 version.> + endif +endif + +; Set memory model + +% display <Memory model: mmodel> +% .model mmodel, C + +; +; *** Temporary Workaround *** +; Currently, MASM will not recognize the 'FLAT' keyword unless it previously +; appears in an 'assume' statement. Presumably, when the '.model FLAT' feature +; is implemented, this will go away. [Use 'gs:' since we never use that +; segment register. +; + +ifdef I386 + ; ensure that MASM recognizes 'FLAT' + assume gs:FLAT +endif + + +; Define registers: +; Instead of using the "word" registers directly, we will use a set of +; text equates. This will allow you to use the native word size instead of +; hard coded to 16 bit words. We also have some instruction equates for +; instruction with the register type hard coded in. + +ifdef I386 + + rax equ <eax> + rbx equ <ebx> + rcx equ <ecx> + rdx equ <edx> + rdi equ <edi> + rsi equ <esi> + rbp equ <ebp> + rsp equ <esp> + + JRCXZ equ <jecxz> + CBI equ <movsx eax, al> ; convert byte to int (al to rax) + CAXDX equ <cdq> ; convert rax to rdx:rax + ZXAL equ <movzx eax, al> ; zero extend al + ZXBL equ <movzx ebx, bl> ; zero extend bl + ZXCL equ <movzx ecx, cl> ; zero extend cl + ZXDL equ <movzx edx, dl> ; zero extend dl + +else + + rax equ <ax> + rbx equ <bx> + rcx equ <cx> + rdx equ <dx> + rdi equ <di> + rsi equ <si> + rbp equ <bp> + rsp equ <sp> + + JRCXZ equ <jcxz> + CBI equ <cbw> ; convert byte to int (al to rax) + CAXDX equ <cwd> ; convert rax to rdx:rax + ZXAL equ <xor ah, ah> ; zero extend al + ZXBL equ <xor bh, bh> ; zero extend bl + ZXCL equ <xor ch, ch> ; zero extend cl + ZXDL equ <xor dh, dh> ; zero extend dl + +endif + +; The following equates deal with the differences in near versus +; far data pointers, and segment overrides. +; +; Use LPES and PES when loading a default size pointer -- it loads +; a 16-bit pointer register in 286 Small/Medium model, +; a 16-bit pointer register and 16-bit segment register in 8086/286 +; Compact/Large model, and a 32-bit pointer register in 386 mode. +; +; Use LFPES and FPES when loading an always far pointer -- it loads a +; 16-bit pointer register and 16-bit segment register in 8086/286, +; all models; a 32-bit pointer register in 386 mode. + +if sizeD + LPES equ <les> + LPDS equ <lds> + PDS equ <ds:> + PES equ <es:> +else + LPES equ <mov> + LPDS equ <mov> + PDS equ <> + PES equ <> +endif + +ifdef I386 + LFPES equ <mov> + LFPDS equ <mov> + FPES equ <> + FPDS equ <> +else + LFPES equ <les> + LFPDS equ <lds> + FPES equ <es:> + FPDS equ <ds:> +endif + +if sizeC or @WordSize eq 2 + PCS equ <cs:> ; large code model or non-386 +else + IF 1 ;*** TEMP 16/32 TESTBED *** + PCS equ <cs:> + ELSE + PCS equ <> ; 386 small code model + ENDIF ;*** END TEMP CODE +endif + +ifdef SS_NEQ_DGROUP + PSS equ <ss:> ; SS != DS +else + PSS equ <> ; SS == DS +endif + +; Define offset macros: +; The 32-bit segments will not have 'groups' + +ifdef I386 + codeoffset equ <offset FLAT:> + dataoffset equ <offset FLAT:> +else + codeoffset equ <offset @code:> + dataoffset equ <offset DGROUP:> +endif + +; The next set of equates deals with the size of SHORTS, INTS, LONGS, and +; pointers in the 16 and 32 bit versions. + +ifdef I386 ;--- 32 bit segment --- + + ; parameters and locals + IWORD equ <dword> + LWORD equ <dword> +if @Version LT 600 + SWORD equ <word> +endif + + ; static storage + DINT equ <dd> + DLONG equ <dd> + DSHORT equ <dw> + + ; sizes for fixing SP, stepping through tables, etc. + ISIZE equ 4 + LSIZE equ 4 + SSIZE equ 2 + NPSIZE equ 4 + FPSIZE equ 4 + + ; bit shift count to convert byte cnt/ptr to int/long cnt/ptr + ISHIFT equ 2 ; byte-to-int shift count + LSHIFT equ 2 ; byte-to-long shift count + + ; sizes dependent upon memory model. dq -vs- df is not yet clear + DNPTR equ <dd> ; near pointer + DFPTR equ <dd> ; far pointer + + DCPTR equ <dd offset FLAT:>; 32 bit offset only + DCPTR? equ <dd> ; No seg override for uninitialized values + CPSIZE equ 4 + CPDIST equ <near> ; code pointers are near + CPTR equ <near ptr> + + DDPTR equ <dd offset FLAT:> + DDPTR? equ <dd> + DPSIZE equ 4 + + CPWORD equ <dword> ; code pointers are dwords + DPWORD equ <dword> ; data pointers are dwords + + APIDIST equ <near> ; all API calls are NEAR in the 32 bit model + +; macro to declare API functions +EXTAPI macro apiname + extrn pascal apiname:near +endm + +else ;--- 16-bit segment --- + + ; parameters and locals + IWORD equ <word> + LWORD equ <dword> +if @Version LT 600 + SWORD equ <word> +endif + + ; static storage + DINT equ <dw> + DLONG equ <dd> + DSHORT equ <dw> + + ; sizes for fixing SP, stepping through tables, etc + ISIZE equ 2 + LSIZE equ 4 + SSIZE equ 2 + NPSIZE equ 2 + FPSIZE equ 4 + + ; bit shift count to convert byte cnt/ptr to int/long cnt/ptr + ISHIFT equ 1 ; byte-to-int shift count + LSHIFT equ 2 ; byte-to-long shift count + + ; sizes dependent upon memory model + DNPTR equ <dw> ; near pointer + DFPTR equ <dd> ; far pointer + + if sizeC + DCPTR equ <dd> ; 16 bit segment and 16 bit offset + DCPTR? equ <dd> + CPSIZE equ 4 + CPDIST equ <far> ; code pointers are far + CPTR equ <far ptr> + CPWORD equ <dword> ; code pointers are dwords + else + DCPTR equ <dw> ; 16 bit offset only + DCPTR? equ <dw> + CPSIZE equ 2 + CPDIST equ <near> ; code pointers are near + CPTR equ <near ptr> + CPWORD equ <word> ; code pointers are words + endif + + if sizeD + DDPTR equ <dd> + DDPTR? equ <dd> + DPSIZE equ 4 + DPWORD equ <dword> ; data pointers are dwords + else + DDPTR equ <dw> + DDPTR? equ <dw> + DPSIZE equ 2 + DPWORD equ <word> ; data pointers are words + endif + + APIDIST equ <far> ; API calls are FAR in 16 bit model + +; macro to declare API functions +EXTAPI macro apiname + extrn pascal apiname:far +endm + +endif ; --- 16/32 segment --- + +; Float/double definitions +; (currently the same for 16- and 32-bit segments) + +FLTSIZE equ 4 ; float +DBLSIZE equ 8 ; double +LDBLSIZE equ 10 ; long double + +DFLOAT equ <dd> +DDOUBLE equ <dq> +DLDOUBLE equ <dt> + +; +; savelist - Generate a list of regs to be saved by the proc 'uses' option. +; +; Input: +; reg1, reg2, reg3, reg4 = registers to be saved across function +; Output: +; reglist = text string of registers that can be passed to the 'uses' +; option on the 'proc' command. +; + +savelist MACRO reg1, reg2, reg3, reg4 + local ws, listsize + ws catstr < > ; whitespace char + + IFNDEF I386 + rbx equ <> ; 8086/286 don't save rbx + ENDIF + + IFNB <reg4> + reglist catstr reg1, ws, reg2, ws, reg3, ws, reg4 + ELSEIFNB <reg3> + reglist catstr reg1, ws, reg2, ws, reg3, ws + ELSEIFNB <reg2> + reglist catstr reg1, ws, reg2, ws, ws + ELSEIFNB <reg1> + reglist catstr reg1, ws, ws, ws + ELSE + reglist catstr <> + ENDIF + + listsize sizestr reglist ; size of register list + + IF listsize LE 3 ; if list is only the 3 ws chars... + reglist catstr <> + ENDIF + + IFNDEF I386 + rbx equ <bx> ; restore rbx + ENDIF + + ENDM ; savelist + +; +; codeseg - Define/declare the standard code segment. Maps to the proper +; form of the .code directive. +; +; Input: +; +; Output: +; .code _TEXT ; for large code models +; .code ; for small code models +; assume cs:FLAT ; for 386 +; assume ds:FLAT ; for 386 +; assume es:FLAT ; for 386 +; assume ss:FLAT ; for 386 +; + +codeseg MACRO + +if sizeC + .code _TEXT +else + .code +endif + +ifdef I386 +if @Version LT 600 + assume cs:FLAT +endif ;@Version LT 600 + assume ds:FLAT + assume es:FLAT + assume ss:FLAT +endif + + ENDM + +;*************************************************************** +;* +;* Debug lab macro +;* +;*************************************************************** + +lab macro name +ifdef DEBUG + public pascal name ;; define label public for Symdeb +endif +name: + endm + + +;*************************************************************** +;* +;* Conditional jump short macros +;* +;*************************************************************** + + + irp x,<Z,NZ,E,NE,S,NS,C,NC,P,NP,PE,PO,A,AE,B,BE,NB,G,GE,L,LE> +JS&x equ <j&x short> + endm + + +;*************************************************************** +;* +;* Global data definition macros +;* +;* Usage: +;* globalI Name, InitialValue, Repeat +;* +;*************************************************************** + + +MakeGlobal macro suffix, DataType ;; makes all of the global* macros + +global&suffix macro name, data, rep +public name +ifb <rep> + _repeat = 1 +else + _repeat = (rep) +endif + +name &DataType _repeat dup( data ) + endm + + endm + + + MakeGlobal T, dt ; globalT + MakeGlobal Q, dq ; globalQ + MakeGlobal D, dd ; globalD + MakeGlobal W, dw ; globalW + MakeGlobal B, db ; globalB + +% MakeGlobal I, <DINT> ; globalI + +% MakeGlobal DP, <DDPTR> ; globalDP +% MakeGlobal CP, <DCPTR> ; globalCP +% MakeGlobal FP, <DFPTR> ; globalFP +% MakeGlobal NP, <DNPTR> ; globalNP + + + +;*************************************************************** +;* +;* Static data definition macros +;* +;* Usage: +;* staticI Name, InitialValue, Repeat +;* +;*************************************************************** + + +MakeStatic macro suffix, DataType ;; makes all of the static* macros + +static&suffix macro name, data, rep + +ifdef DEBUG + public pascal name ;; make statics public if DEBUG +endif + +ifb <rep> + _repeat = 1 +else + _repeat = (rep) +endif + +name &DataType _repeat dup( data ) + endm + + endm + + + MakeStatic T, dt ; staticT + MakeStatic Q, dq ; staticQ + MakeStatic D, dd ; staticD + MakeStatic W, dw ; staticW + MakeStatic B, db ; staticB + +% MakeStatic I, <DINT> ; staticI + +% MakeStatic DP, <DDPTR> ; staticDP +% MakeStatic CP, <DCPTR> ; staticCP +% MakeStatic FP, <DFPTR> ; staticFP +% MakeStatic NP, <DNPTR> ; staticNP + +;*************************************************************** +;* +;* Label definition macros +;* +;*************************************************************** +;* +;* Label definition macros +;* +;* Usage: +;* labelI Name, {PUBLIC, PASCAL, C} +;* +;*************************************************************** + +__MakePublic macro name, option ;; decides if a label should be +ifidni <option>, <PUBLIC> ;; made public + public name +elseifidni <option>, <PASCAL> + public pascal name +elseifidni <option>, <C> + public C name +elseifb <option> + ifdef DEBUG + public pascal name ;; make public if DEBUG + endif +endif + endm + + +if @Version GE 600 + +MakeLabel macro suffix, LabelType ;; makes all of the label* macros + +%@CatStr(<label>,<suffix>) macro name, option + __MakePublic <name>,<option> +name label &LabelType + endm + + endm + +else ;!(@Version GE 600) + +MakeLabel macro suffix, LabelType ;; makes all of the label* macros + +label&suffix macro name, option + __MakePublic <name>,<option> +name label &LabelType + endm + + endm + +endif ;!(@Version GE 600) + + + MakeLabel T, tbyte ; make labelT + MakeLabel Q, qword ; make labelQ + MakeLabel D, dword ; make labelD + MakeLabel W, word ; make labelW + MakeLabel B, byte ; make labelB + + MakeLabel P, proc ; make labelP + MakeLabel FP, far ; make labelFP + MakeLabel NP, near ; make labelNP + +% MakeLabel I, IWORD ; make labelI + + +labelDP macro name, option ;; labelDP + __MakePublic <name>,<option> +ifdef I386 + if sizeD + name label fword + else + name label dword + endif +else ;not I386 + if sizeD + name label dword + else + name label word + endif +endif ;not I386 + endm + +labelCP macro name, option ;; labelCP + __MakePublic <name>,<option> +ifdef I386 + if sizeC + name label fword + else + name label dword + endif +else ;not I386 + if sizeC + name label dword + else + name label word + endif +endif ;not I386 + endm + + +;* +;* PUSH16 SegReg - pushes 16 bits in a use32 segment +;* + +PUSH16 macro SegReg + +ifdef I386 + nop + db 66h ; operand size over-ride +endif ; I386 + + push SegReg + endm + + +;* +;* JMPFAR16 label - jmps far from a use32 to a use16 segment +;* + +JMPFAR16 macro label + +ifndef I386 + error <JMPFAR16 can only be used in a use32 code segment> +endif ;I386 + + nop + db 66h ;; operand size over-ride + db 0eah ;; jmp far immediate op code + dw offset label + dw seg label + endm diff --git a/private/fp32/inc/i386/defsegs.inc b/private/fp32/inc/i386/defsegs.inc new file mode 100644 index 000000000..e0c402053 --- /dev/null +++ b/private/fp32/inc/i386/defsegs.inc @@ -0,0 +1,58 @@ +;*** +;defsegs.inc - defines segments. +; +; Copyright (c) 1989-1989, Microsoft Corporation. All rights reserved. +; +;Purpose: +; This file defines segments for the math libs. +; +;Revision History: +; 11-14-89 WAJ Initial version. +; +;******************************************************************************* + + +CrtDefSegs macro SegList + irp seg, <SegList> + + ifidni <seg>,<code> + ifdef I386 + createSeg _TEXT,code,dword,public,code,FLAT + defGrp FLAT,<code> + else + ifdef FAR_CODE + createSeg _RTEXT,code,word,public,code + else + createSeg _RTEXT,code,word,public,code + endif + endif + + elseifidni <seg>,<data> + ifdef I386 + createSeg DATA,data,dword,public,DATA,FLAT + defGrp FLAT,<data> + else + createSeg DATA,data,word,public,DATA,DGROUP + defGrp DGROUP,<data> + endif + + elseifidni <seg>,<init> + ifdef I386 + createSeg XIB, xibseg, dword, public, DATA, FLAT + createSeg XI, xiseg, dword, public, DATA, FLAT + createSeg XIE, xieseg, dword, public, DATA, FLAT + defGrp FLAT,<xibseg,xiseg,xieseg> + else + createSeg XIB, xibseg, word, public, DATA, DGROUP + createSeg XI, xiseg, word, public, DATA, DGROUP + createSeg XIE, xieseg, word, public, DATA, DGROUP + defGrp DGROUP,<xibseg,xiseg,xieseg> + endif + + else + %out <Error in CrtDefSeg: Unknown segment &seg.> + .err + endif + + endm ;; irp loop +endm ;; CrtDefSeg macro diff --git a/private/fp32/inc/i386/elem87.inc b/private/fp32/inc/i386/elem87.inc new file mode 100644 index 000000000..de620278b --- /dev/null +++ b/private/fp32/inc/i386/elem87.inc @@ -0,0 +1,223 @@ +;*** +;elem87.inc - +; +; Copyright (c) 19xx-1988, Microsoft Corporation. All rights reserved. +; +;Purpose: Include file for 8087 transcendental dunctions. +; Has jump table and common dispatch code definitions +; +; +;Revision History: +; +; 04/21/88 WAJ Added this header. Added wUser1/wUser2/bUser3 to +; common dispatch code stack frame. These are used +; for multi thread fortran. +; +; 08/25/88 WAJ 386 version +; +; 11/16/91 GDP put exception structure on stack frame +; +; 02/05/01 GDP modified DispLocals, added DOMAIN_QNAN +; +; 03/25/92 GDP added IEEE exception opcodes +; +;******************************************************************************* + + +SBUFSIZE EQU 108 ; length of buffer used by fsave + + +; local temps for common dispatch code + +DispLocals struc + +wUser1 dw ? +wUser2 dw ? +savCntrl dw ? +setCntrl dw ? +StatusWord dw ? +Fac dt ? + +ifdef I386 +Function dd ? +else +Function dw ? +endif + +ErrorType db ? + +ifdef MTHREAD +__cpower db ? +else +bUser3 db ? +endif + +typ dd ? +nam dd ? +arg1 dq ? +arg2 dq ? +retval dq ? +savebuf db SBUFSIZE dup (?) ; buffer used for fsave'ing + +DispLocals ends + +DSFISize equ ((size DispLocals) + ISIZE - 1) and (not (ISIZE-1)) + +DSF equ [rbp-DSFISize] ; Dispatch Stack Frame + +CondCode equ byte ptr [DSF.StatusWord+1] +savCntrlbyte equ byte ptr [DSF.savCntrl+1] + + +ifdef MTHREAD +_cpower equ <DSF.__cpower> +endif + + + +; 8087 control word structure + +tranCntrl= 13h ; 64 bits, round even, affine inf +Affine= 10h ; affine inf + + +; error types for matherr + +CHECKRANGE= -2 ; check for possible overflow or underflow +CHECKOVER= -1 ; check for possible overflow +DOMAIN= 1 +SING= 2 +OVERFLOW= 3 +UNDERFLOW= 4 +TLOSS= 5 +PLOSS= 6 +DOMAIN_QNAN= 7 ; should be changed to DOMAIN + ; before calling matherr +INEXACT= 8 + + +; function jump table structure + +funtab struc + fnamlen db ? + fnam db 6 dup (?) + fuseval db 6 dup (?) + fnumarg db ? + fnumber db ? + db ? ;; padding +funtab ends + + +jmptab macro nam,namlen,strnam,useval,numarg +labelW _&nam&jmptab, PUBLIC + db namlen ;; 1 + db strnam ;; 6 + db useval ;; 6 + db numarg ;; 1 + db nam ;; 1 + db ? ;; 1 + endm + + +Z = 1*ISIZE ; 0.0 +H = 2*ISIZE ; HUGE +mH = 3*ISIZE ; -HUGE + + +Pentry macro pnam,nam,narg +extrn __&nam&jmptab:word +extrn __ptrandisp&narg&s:near +extrn __ptrandisp&narg&d"near +labelP pnam&SRQQ, PUBLIC + mov rdx, dataoffset __&nam&jmptab + jmp __ptrandisp&narg&s +labelP pnam&DRQQ, PUBLIC + mov rdx, dataoffset __&nam&jmptab + jmp __ptrandisp&narg&d + endm + +; This macro is used to generate the necessary code and declarations +; for Fortran intrinsics with one argument + +F1ArgEntry macro nam +extrn _&nam&jmptab:word +extrn _ctrand1:near +labelP _FI&nam, PUBLIC + mov rdx, dataoffset _&nam&jmptab + jmp _ctrand1 + endm + +; This macro is used to generate the necessary code and declarations +; for Fortran intrinsics with two arguments + +F2ArgEntry macro nam +extrn _&nam&jmptab:word +extrn _ctrand2:near +labelP _FI&nam, PUBLIC + mov rdx, dataoffset _&nam&jmptab + jmp _ctrand2 + endm + + +; C/FORTRAN/BASIC floatcall macro + +fc macro JSS,JDS,FCNAM,INS,SIZE,WAITI +;; JSS - SS:BX and ES:BX offset +;; JDS - DS:BX offset +;; FCNAM - float call name suffix +;; INS - 8087 instruction. +;; SIZE - byte,word,dword,qword,tbyte +;; WAIT - 8087 wait. +;; +;; if large model, then frame floatcalls for BASIC error recovery + +ifnb <JSS> + labelP _s&FCNAM, PUBLIC + push ss + pop es + _e&FCNAM proc + f&ins&size& ptr es:[rbx] ;; Perform instruction. + waiti + ret + _e&FCNAM endp +endif ;;JES + +ifnb <JDS> + _f&FCNAM proc + ifnb <SIZE> + f&ins&size& ptr [rbx] ;; Perform instruction. + waiti + else + ifnb <ins> + f&ins + waiti + endif + endif ;;SIZE + + _f&FCNAM endp + +endif ;;JDS +endm + + +; IEEE exception opcodes +; should be in sync with fpieee.h, ntxcapi.h + +OP_SQRT = 05h +OP_ACOS = 0dh +OP_ASIN = 0eh +OP_ATAN = 0fh +OP_ATAN2 = 10h +OP_COS = 12h +OP_COSH = 13h +OP_EXP = 14h +OP_FMOD = 16h +OP_LOG = 1ah +OP_LOG10 = 1bh +OP_POW = 1dh +OP_SIN = 1eh + + +OP_SINH = 1fh +OP_TAN = 20h +OP_TANH = 21h diff --git a/private/fp32/inc/i386/errno.inc b/private/fp32/inc/i386/errno.inc new file mode 100644 index 000000000..4666f2882 --- /dev/null +++ b/private/fp32/inc/i386/errno.inc @@ -0,0 +1,59 @@ +;*** +;errno.inc - defines standard C error codes +; +; Copyright (c) 1987-1988, Microsoft Corporation. All rights reserved. +; +;Purpose: +; This file contains definitions for the standard C error codes +; used by XENIX (many of these have little significance for +; MS-DOS). +; +;Revision History: +; +;******************************************************************************* + + +err MACRO num,name,text +name equ num + ENDM + + +err 1, EPERM, <Not owner> +err 2, ENOENT, <No such file or directory> +err 3, ESRCH, <No such process> +err 4, EINTR, <Interrupted system call> +err 5, EIO, <I/O error> +err 6, ENXIO, <No such device or address> +err 7, E2BIG, <Arg list too long> +err 8, ENOEXEC, <Exec format error> +err 9, EBADF, <Bad file number> +err 10, ECHILD, <No child processes> +err 11, EAGAIN, <No more processes> +err 12, ENOMEM, <Not enough space> +err 13, EACCES, <Permission denied> +err 14, EFAULT, <Bad address> +err 15, ENOTBLK, <Block device required> +err 16, EBUSY, <Mount device busy> +err 17, EEXIST, <File exists> +err 18, EXDEV, <Cross-device link> +err 19, ENODEV, <No such device> +err 20, ENOTDIR, <Not a directory> +err 21, EISDIR, <Is a directory> +err 22, EINVAL, <Invalid argument> +err 23, ENFILE, <File table overflow> +err 24, EMFILE, <Too many open files> +err 25, ENOTTY, <Not a typewriter> +err 26, ETXTBSY, <Text file busy> +err 27, EFBIG, <File too large> +err 28, ENOSPC, <No space left on device> +err 29, ESPIPE, <Illegal seek> +err 30, EROFS, <Read-only file system> +err 31, EMLINK, <Too many links> +err 32, EPIPE, <Broken pipe> +err 33, EDOM, <Math argument> +err 34, ERANGE, <Result too large> +err 35, EUCLEAN, <file system needs cleaning> +err 36, EDEADLOCK, <would deadlock> + +; end of errno.inc +;----------------------------------------------------------------------- diff --git a/private/fp32/inc/i386/mathmac.inc b/private/fp32/inc/i386/mathmac.inc new file mode 100644 index 000000000..e3610db12 --- /dev/null +++ b/private/fp32/inc/i386/mathmac.inc @@ -0,0 +1,762 @@ +;--------------- Standard MATHMAC.INC +; +; Standard Math Macro Definition File +; +; Gregory F. Whitten +; 07/28/83 +; +; Copyright (C) 1983 by Microsoft Corp. +; +; +; Revision History +; +; 10/18/83 Greg Whitten +; changed LCL FLT option to have 2 values for IEEE +; +; 05/02/84 Greg Whitten +; added CSconst switch support +; +; 07/23/84 Greg Whitten +; changed public/extrn to lower case +; +; 09/03/84 Greg Whitten +; added movcnp macro for constant pointers (with CSconst) +; fixed CSconst bug in f_movcs (only used 1 place) +; +; 10/29/84 Greg Whitten +; added debugger switch for fout changes +; +; 06/17/87 Jamie Bariteau +; changed outif macro for MASM 5.0 compatibility +; +; 02/22/88 Bill johnston +; outif now checks to see if QUIET was defined. +; +;--------------- + + +if1 ; Pass 1 only + +; Helper macros for undefined symbols + + +;*** OUTIF name,msg +; +; Function: +; Output msg if name is non-zero. If name is undefined, set name = 0. +; + +outif MACRO name,msg +ifndef Name + Name= 0 +else + if Name + if1 + ifndef QUIET + %out ! msg + endif + endif + endif + Name=Name +endif + ENDM + + +;*** ERROR msg +; +; Function: +; Output msg and generate an assembly time error +; + +error MACRO msg + bug + %OUT E r r o r ----- msg + ENDM + +endif ; Pass 1 + +; Define standard math package switches + +ifdef DEBUG + %out <+++++++++++++++++++++++> + %out <+++ DEBUG version +++> + %out <+++++++++++++++++++++++> +endif ;DEBUG + +outif XENIX3, <+++ XENIX 3.0 and later version +++> + +outif BASIC, <BASIC Interpreter> +outif BASCOM, <BASIC Compiler> +outif CC, <C Compiler> +outif FORTRAN, <FORTRAN Compiler> +outif PASCAL, <PASCAL Compiler> +outif Frontends, < compiler front-end version> +outif LOGO, <LOGO Interpreter> +outif IBMASM, <IBM Assembler Library> +outif ASSEMBLER, <Macro Assembler> +outif debugger, <Symbolic Assembly Debugger> + +outif DecFmt, <- Decimal Microsoft Format> +outif MBinFmt, <- Binary Microsoft Format> +outif IEEEFmt, <- IEEE Format> +if IEEEFmt and (Frontends or debugger) +Denormal= 1 ; front-ends need denormals +endif +outif Denormal, <- denormal number support> +outif Use8087, <- 8087 instructions> + +outif Single, <- Single precision> +outif Double, <- Double precision> +outif CSconst, <- Constants in CS> + + + +if DecFmt+IEEEFmt+MBinFmt ne 1 + error <Improper math format specified> +endif + +if Single+Double ne 1 + error <Improper math size specified> +endif + + +poly1 = 8000h ; flag for leading 1 in poly + +if Single ; Defined on both passes + DefTyp= 4 + DefWrd= 2 +else ;Double + DefTyp= 8 + DefWrd= 4 +endif + + +; offsets to sign and exponent fields + +if IEEEFmt + if single +expmask= 07F80h +expbias= 03F80h +expshft= 7 +manbits= 24 +of_exp= 2 +of_sgn= 3 + else ;double +expmask= 07FF0h +expbias= 03FF0h +expshft= 4 +manbits= 53 +of_exp= 6 +of_sgn= 7 + endif +endif ;IEEEFmt + +if MBinFmt + if single +manbits= 24 +of_exp= 3 +of_sgn= 2 + else ;double +manbits= 56 +of_exp= 7 +of_sgn= 6 + endif +endif ;MBinFmt + +if DecFmt +of_exp= 0 +of_sgn= 0 +endif ;DecFmt + + + +if1 ; Pass 1 only + +; Helper macros for elementary functions + + +;*** LCL name,type,value +; +; Function: +; LCL declares data with the specified name, type, and value. +; If the type is FLT for IEEE numbers, then either DD or DQ is +; substituted depending on the size of the variable. +; + +lcl MACRO name,type,value,value2 +ifidn <type>,<FLT> + if IEEEFmt + if Single + name DD value + else ;;Double + name DQ value2 + endif + else + error <FLT not implemented for this type> + endif +else + name type value +endif + ENDM + + +;*** GENHELP typ,siz +; +; Function: +; GENHELP generates the following macros with the typ and siz +; information embedded in the macro. +; +; PUB name +; PUB4 name +; PUB8 name +; GBL name,type,value +; GBL4 name,type,value +; GBL8 name,type,value +; EXT name,type +; EXT4 name,type +; EXT8 name,type +; F_DW rout +; F4_DW rout +; F8_DW rout +; F_JMP rout +; F4_JMP rout +; F8_JMP rout +; F_CALL rout +; F4_CALL rout +; F8_CALL rout +; +; Global names are considered to be names with the type and size prefix. +; Local names have no prefix. I.e., $I8_ONE and ONE, respectively. +; +; Macros with a size in the name create local names with the size at the +; end. I.e., RESULT4 +; + +genhelp MACRO typ,siz + + +;*** PUB name +; +; Function: +; PUB declares both the global and local names as labels. +; + +pub &MACRO name + public $&typ&&siz&_&&name +$&typ&&siz&_&&name: +name: + &ENDM + + +pub4 &MACRO name + public $&typ&4_&&name +$&typ&4_&&name: +name&&4: + &ENDM + + +pub8 &MACRO name + public $&typ&8_&&name +$&typ&8_&&name: +name&&8: + &ENDM + +;*** PUBX name +; +; Function: +; PUBX declares both the global and local names as labels. +; Added for MASM 5.0 compatibility. Adds leading underscore +; to local names to avoid conflict with MASM 5.0 reserved words. +; +pubx &MACRO name + public $&typ&&siz&_&&name +$&typ&&siz&_&&name: +_&&name: + &ENDM + +pub4x &MACRO name + public $&typ&4_&&name +$&typ&4_&&name: +_name&&4: + &ENDM + + +pub8x &MACRO name + public $&typ&8_&&name +$&typ&8_&&name: +_name&&8: + &ENDM + +;*** GLB name,type,value +; +; Function: +; GLB declares the global name for the data value and aliases the local +; name to it. +; + +glb &MACRO name,type,value + public $&typ&&siz&_&&name + lcl $&typ&&siz&_&&name,type,<value> +name equ $&typ&&siz&_&&name + &ENDM + + +glb4 &MACRO name,type,value + public $&typ&4_&&name + lcl $&typ&4_&&name,type,<value> +name&&4 equ $&typ&4_&&name + &ENDM + + +glb8 &MACRO name,type,value + public $&typ&8_&&name + lcl $&typ&8_&&name,type,<value> +name&&8 equ $&typ&8_&&name + &ENDM + + +;*** EXT name,type +; +; Function: +; EXT declares the global name to be external with the specified type. +; It also aliases the local name to the global name. +; + +ext &MACRO name,type + extrn $&typ&&siz&_&&name:type +name equ $&typ&&siz&_&&name + &ENDM + + +ext4 &MACRO name,type + extrn $&typ&4_&&name:type +name&&4 equ $&typ&8_&&name + &ENDM + + +ext8 &MACRO name,type + extrn $&typ&8_&&name:type +name&&8 equ $&typ&8_&&name + &ENDM + + +;*** F_DW name +; +; Function: +; F_DW declares the code address of the global name +; + +f_dw &MACRO name + dwcp $&typ&&siz&_&&name + &ENDM + + +f4_dw &MACRO name + dwcp $&typ&4_&&name + &ENDM + + +f8_dw &MACRO name + dwcp $&typ&8_&&name + &ENDM + + +;*** F_CALL name +; +; Function: +; F_CALL declares the global name to be external and issues a call. +; + +f_call &MACRO name + extrn $&typ&&siz&_&&name:near + call $&typ&&siz&_&&name + &ENDM + + +f4_call &MACRO name + extrn $&typ&4_&&name:near + call $&typ&4_&&name + &ENDM + + +f8_call &MACRO name + extrn $&typ&8_&&name:near + call $&typ&8_&&name + &ENDM + + +;*** F_JMP name +; +; Function: +; F_JMP declares the global name to be external and issues a jmp. +; + +f_jmp &MACRO name + extrn $&typ&&siz&_&&name:near + jmp $&typ&&siz&_&&name + &ENDM + + +f4_jmp &MACRO name + extrn $&typ&4_&&name:near + jmp $&typ&4_&&name + &ENDM + + +f8_jmp &MACRO name + extrn $&typ&8_&&name:near + jmp $&typ&8_&&name + &ENDM + + + ENDM ;; End of genhelp + + + +; Invoke GENHELP with the appropriate type and size information. + +if DecFmt + if Single + genhelp d,4 + else ;;Double + genhelp d,8 + endif +endif + +if IEEEFmt + if Single + genhelp i,4 + else ;;Double + genhelp i,8 + endif +endif + +if MBinFmt + if Single + genhelp m,4 + else ;;Double + genhelp m,8 + endif +endif + + purge genhelp ; toss genhelp macro + + +; cs mover macros - generate code iff CSconst nonzero + +movcssi macro +if CSconst + f_call mvcssi +endif + endm + +movcsdi macro +if CSconst + f_call mvcsdi +endif + endm + + +movcnp macro dest,name,off +if CSconst + movcp dest,name,off +else + movp dest,name,off +endif + endm + + +; f_movcs macro +; f_mov macro +; f4_mov macro +; f8_mov macro +; +; Special forms f_mov +; +; op1/DI op2/SI Routine Res SI Use +; +; ARG AC movarg_ac AC DI +; ARG <> movarg SI DI +; AC ARG movac_arg AC DI +; AC <> movac AC DI +; TEMP <> movtemp SI DI +; any any gen code used SI.DI +; +; Special forms f_movcs +; +; op1/DI op2/SI Routine Res SI Use +; +; AC any gencode AC DI +; any any gen code used SI.DI + +f_movcs macro op1,op2 +if CSconst + movp di,op1 + movcp si,op2 ;; op2 is source + &rept DefWrd + movs word ptr es:[di],word ptr cs:[si] + &endm + ifidn <op1>,<AC> + movp si,AC + endif +else + f_mov op1,op2 +endif + endm + +f_mov MACRO op1,op2 +ifidn <op1>,<ARG> + ifidn <op2>,<AC> + f_call movarg_ac + else + ifnb <op2> + movp si,op2 ;; op2 is source + endif + f_call movarg + endif +else + ifidn <op1>,<AC> + ifidn <op2>,<ARG> + f_call movac_arg + else + ifnb <op2> + movp si,op2 ;; op2 is source + endif + f_call movac + endif + else + ifidn <op1>,<TEMP> + ifnb <op2> + movp si,op2 ;; op2 is source + endif + f_call movtemp + else + ifnb <op1> + movp di,op1 ;; op1 is dest + endif + ifnb <op2> + movp si,op2 ;; op2 is source + endif + &rept DefWrd + movsw + &endm + endif + endif +endif + ENDM + + +f4_mov MACRO op1,op2 +ifidn <op1>,<ARG> + ifidn <op2>,<AC> + f4_call movarg_ac + else + ifnb <op2> + movp si,op2 ;; op2 is source + endif + f4_call movarg + endif +else + ifidn <op1>,<AC> + ifidn <op2>,<ARG> + f4_call movac_arg + else + ifnb <op2> + movp si,op2 ;; op2 is source + endif + f4_call movac + endif + else + ifidn <op1>,<TEMP> + ifnb <op2> + movp si,op2 ;; op2 is source + endif + f4_call movtemp + else + ifnb <op1> + movp di,op1 ;; op1 is dest + endif + ifnb <op2> + movp si,op2 ;; op2 is source + endif + movsw + movsw + endif + endif +endif + ENDM + + +f8_mov MACRO op1,op2 +ifidn <op1>,<ARG> + ifidn <op2>,<AC> + f8_call movarg_ac + else + ifnb <op2> + movp si,op2 ;; op2 is source + endif + f8_call movarg + endif +else + ifidn <op1>,<AC> + ifidn <op2>,<ARG> + f8_call movac_arg + else + ifnb <op2> + movp si,op2 ;; op2 is source + endif + f8_call movac + endif + else + ifidn <op1>,<TEMP> + ifnb <op2> + movp si,op2 ;; op2 is source + endif + f8_call movtemp + else + ifnb <op1> + movp di,op1 ;; op1 is dest + endif + ifnb <op2> + movp si,op2 ;; op2 is source + endif + movsw + movsw + movsw + movsw + endif + endif +endif + ENDM + + +; f_push macro +; +; Special forms +; +; op1 Routine Res SI Use +; +; AC pushac AC AX +; ARG pusharg ARG AX +; SI pushsi SI AX +; any gen code SI --- + +f_push MACRO op1 +ifidn <op1>,<AC> + f_call pshac +else + ifidn <op1>,<ARG> + f_call psharg + else + ifidn <op1>,<SI> + f_call pshsi + else + if Double + push [op1+6] + push [op1+4] + endif + push [op1+2] + push [op1] + endif + endif +endif + ENDM + + +; f_pop macro +; +; Special forms +; +; op1 Routine Res SI Use +; +; AC popac AC AX +; ARG poparg ARG AX +; SI popsi SI AX +; any gen code SI --- + +f_pop MACRO op1 +ifidn <op1>,<AC> + f_call popac +else + ifidn <op1>,<ARG> + f_call poparg + else + ifidn <op1>,<SI> + f_call popsi + else + pop [op1] + pop [op1+2] + if Double + pop [op1+4] + pop [op1+6] + endif + endif + endif +endif + ENDM + + +; f_opr macro +; +; Special forms +; +; op1/SI op2/DI routine operations +; +; AC ARG xxxf add,sub,mul,div,cmp +; <> ARG xxxfsi add,sub,mul,div,cmp +; ARG AC xxxr sub,div,cmp +; ARG <> xxxrdi sub,div,cmp +; any any gen moves and call + +genoper MACRO op +f_&op &MACRO op1,op2 +ifidn <op2>,<ARG> + ifidn <op1>,<AC> + f_call op&f + else + ifb <op1> + f_call op&fsi + else + movp si,op1 + movp di,ARG + f_call op + endif + endif +else + ifidn <op1>,<ARG> + ifidn <op2>,<AC> + f_call op&r + else + ifb <op2> + f_call op&rdi + else + movp si,ARG + movp di,op2 + f_call op + endif + endif + else + ifnb <op1> + movp si,op1 + endif + ifnb <op2> + movp di,op2 + endif + f_call op + endif +endif + &ENDM + ENDM + +genoper add +genoper sub +genoper mul +genoper div +genoper cmp + + purge genoper + + +endif ; Pass 1 + +;--------------- End of Standard MATHMAC.INC diff --git a/private/fp32/inc/i386/mathver.inc b/private/fp32/inc/i386/mathver.inc new file mode 100644 index 000000000..0b4887828 --- /dev/null +++ b/private/fp32/inc/i386/mathver.inc @@ -0,0 +1,88 @@ +;*** +;mathver.inc - defines current math model +; +; Copyright (c) 1987-1988, Microsoft Corporation. All rights reserved. +; +;Purpose: +; This file defines what math version is being built. +; +;Revision History: +; 02-21-88 WAJ Combined all mathver.* into this file. +; 09-14-88 WAJ Added to 386 tree +; +;******************************************************************************* + + + +; This file contains special version specific information - +; +; Floating point format definition - +; +; MBinFmt Microsoft Binary Format +; DecFmt Microsoft Decimal Format +; IEEEFmt IEEE Binary Format +; +; Size definition - +; +; Single Single Precision Format +; Double Double Precision Format +; +; Language definition - +; +; BASIC BASIC Interpreter +; BASCOM BASIC Compiler +; CC C Compiler +; FORTRAN FORTRAN Compiler +; PASCAL PASCAL Compiler +; +; The above switches should be defined in this include file only if +; they are being set. They should only be set to 1. + + +ifdef MATHVERDBL + +IEEEFmt= 1 ; IEEE +Double= 1 ; Double + +FORTRAN= 1 +PASCAL= 1 + +endif ;MATHVERDBL + + +ifdef MATHVERU87 + +IEEEFmt= 1 ; IEEE +Double= 1 ; Double + +FORTRAN= 1 +PASCAL= 1 +Denormal= 1 +Use8087= 1 + +endif ;MATHVERU87 + + +ifdef MATHVERSNG + + +IEEEFmt= 1 ; IEEE +Single= 1 ; Single + +FORTRAN= 1 +PASCAL= 1 + + +endif ;MATHVERSNG + + +ifdef MATHVERDEC + +DecFmt= 1 ; Decimal +Double= 1 ; Double + +FORTRAN= 1 +PASCAL= 1 + + +endif ;MATHVERDEC diff --git a/private/fp32/inc/i386/memmacs.inc b/private/fp32/inc/i386/memmacs.inc new file mode 100644 index 000000000..9967027d7 --- /dev/null +++ b/private/fp32/inc/i386/memmacs.inc @@ -0,0 +1,87 @@ +;*** +;memmacs.inc - defines macros to to handle different memory models easier. +; +; Copyright (c) 1989-89, Microsoft Corporation. All rights reserved. +; +;Purpose: +; This file defines the macros that handle different memory models. +; +;Revision History: +; +; 01-23-89 WAJ Initial version copied from cruntime.inc +; 11-01-89 WAJ Added IWORD/ISIZE +; 11-01-89 WAJ Added PCS +; 11-01-89 WAJ Added short condition jmps for 386 code +; +;******************************************************************************* + + + +;******************************************************************************* +;* +;* Define load pointer and segment override macros. +;* +;******************************************************************************* + +if sizeD + LPES equ <les> + LPDS equ <lds> + PDS equ <ds:> + PES equ <es:> +else + LPES equ <mov> + LPDS equ <mov> + PDS equ <> + PES equ <> +endif + +ifdef I386 + LFPES equ <mov> + LFPDS equ <mov> + FPES equ <> + FPDS equ <> +else + LFPES equ <les> + LFPDS equ <lds> + FPES equ <es:> + FPDS equ <ds:> +endif + +ifdef SS_NEQ_DGROUP + PSS equ <ss:> ; SS != DS +else + PSS equ <> ; SS == DS +endif + +ifdef I386 + PCS equ <> +else + PCS equ <cs:> +endif + + +;******************************************************************************* +;* +;* Define IWORD and ISIZE. +;* +;******************************************************************************* + +ifdef I386 + IWORD equ <dword> + ISIZE equ 4 +else + IWORD equ <word> + ISIZE equ 2 +endif + + +;******************************************************************************* +;* +;* Define short conditional jumps for 386 code. +;* +;******************************************************************************* + + irp x,<Z,E,S,C,P,PE,PO,A,AE,B,BE,G,GE,L,LE,O> +JS&x equ <j&x short> +JSN&x equ <jn&x short> + endm diff --git a/private/fp32/inc/i386/mrt386.inc b/private/fp32/inc/i386/mrt386.inc new file mode 100644 index 000000000..90046f494 --- /dev/null +++ b/private/fp32/inc/i386/mrt386.inc @@ -0,0 +1,102 @@ +;*** +;mrt386.inc - include to aid in generating 386 code +; +; Copyright (c) 1988-1988, Microsoft Corporation. All rights reserved. +; +;Purpose: +; This file reproduces a few of the cMacro macros. +; +;Revision History: +; 08-24-88 WAJ Initial version. +; +;******************************************************************************* + + + +ifdef I386 ; Define Near Code Pointer + DNCPTR equ <dd> +else + DNCPTR equ <dw> +endif + + +ifdef I386 + if sizeD + DPWORD equ <fword> + else + DPWORD equ <dword> + endif + DFPWORD equ <fword> + DNPWORD equ <dword> + +else ;not I386 + if sizeD + DPWORD equ <dword> + else + DPWORD equ <word> + endif + DFPWORD equ <dword> + DNPWORD equ <word> +endif ;not I386 + + +ifdef I386 + if sizeC + CPWORD equ <fword> + else + CPWORD equ <dword> + endif + CFPWORD equ <fword> + CNPWORD equ <dword> + +else ;not I386 + if sizeC + CPWORD equ <dword> + else + CPWORD equ <word> + endif + CFPWORD equ <dword> + CNPWORD equ <word> +endif ;not I386 + + + +MOVRW macro reg, word ;; move a word into a register +ifdef I386 +% movzx reg, (word) +else +% mov reg, (word) +endif + endm + + +ifdef I386 +RBXONLY equ <rbx> +RBXUSED equ <rbx> + +else ;not I386 +RBXONLY equ <nothing> +RBXUSED equ <> +endif ;not I386 + + +ifdef I386 + ife sizeC + ife sizeD + FLAT386 equ <DEFINED> + endif + endif +endif + +ifflat macro code +ifdef FLAT386 + code +endif + endm + + +noflat macro code +ifndef FLAT386 + code +endif + endm diff --git a/private/fp32/inc/i386/os2dll.inc b/private/fp32/inc/i386/os2dll.inc new file mode 100644 index 000000000..df0eedb88 --- /dev/null +++ b/private/fp32/inc/i386/os2dll.inc @@ -0,0 +1,325 @@ +;*** +;os2dll.inc - DynaLib/Multi-thread parameter definitions +; +; Copyright (c) 1987-1991, Microsoft Corporation. All rights reserved. +; +;Purpose: +; +;Revision History: +; 10-22-87 JCR Module created +; 11-13-87 SKS Added _HEAP_LOCK +; 11-16-87 JCR Added _mlock/_munlock macros +; 12-15-87 JCR Added _EXIT_LOCK +; 01-07-88 BCM Added _SIGNAL_LOCK; upped MAXTHREADID from 16 to 32 +; 02-01-88 JCR Added _dll_mlock/_dll_munlock macros +; 05-03-88 JCR Added _BHEAP_LOCK +; 08-03-88 JCR Bumped maximum file count to 256 +; 08-09-88 GJF Added _lock_str/_unlock_str macros +; 08-12-88 JCR 386 version +; 06-02-89 JCR 386 mthread support +; 06-09-89 JCR 386: Added values to _tiddata struc (for _beginthread) +; 07-11-89 JCR 386mt: Corrected _DEBUG_LOCK_SIZE value +; 07-13-89 JCR 386: Added _LOCKTAB_LOCK and some macros +; 09-14-90 GJF Added __pxcptacttab, __pxcptinfoptr and __fpecode +; fields to _tiddata struc. +; 10-03-91 JCR Added _cvtbuf to _tiddata structure +; +;******************************************************************************* + +_NFILE_ = 256 ;maximum number of streams + +;Lock table offsets +;------------------ +;[NOTE: These values must coincide with the values in os2dll.h.] + +;[NOTE: do not change _SIGNAL_LOCK without changing emulator's os2dll.inc.] +_SIGNAL_LOCK equ 1 ; lock for signal() and emulator SignalAddress + ; emulator uses \math\include\os2dll.inc + +_IOB_SCAN_LOCK equ 2 ; _iob[] table lock +_TMPNAM_LOCK equ 3 ; lock global tempnam variables +_INPUT_LOCK equ 4 ; lock for _input() routine +_OUTPUT_LOCK equ 5 ; lock for _output() routine +_CSCANF_LOCK equ 6 ; lock for _cscanf() routine +_CPRINTF_LOCK equ 7 ; lock for _cprintf() routine +_CONIO_LOCK equ 8 ; lock for conio routines +_HEAP_LOCK equ 9 ; lock for heap allocator routines +_BHEAP_LOCK equ 10 ; lock for based heap routines +_TIME_LOCK equ 11 ; lock for time functions +_ENV_LOCK equ 12 ; lock for environment variables +_EXIT_LOCK1 equ 13 ; lock #1 for exit code +_EXIT_LOCK2 equ 14 ; lock #2 for exit code +_THREADDATA_LOCK equ 15 ; lock for thread data table +_POPEN_LOCK equ 16 ; lock for _popen/_pclose database +_SSCANF_LOCK equ 17 ; lock for sscanf() iob +_SPRINTF_LOCK equ 18 ; lock for sprintf() iob +_VSPRINTF_LOCK equ 19 ; lock for vsprintf() iob +_LOCKTAB_LOCK equ 20 ; lock to protect semaphore lock table +_OSFHND_LOCK equ 21 ; lock to protect _osfhnd array +_STREAM_LOCKS equ 21 ; Table of stream locks + +_LAST_STREAM_LOCK equ (_STREAM_LOCKS+_NFILE_-1) ; Last stream lock + +_FH_LOCKS equ (_LAST_STREAM_LOCK+1) ; Table of fh locks +_LAST_FH_LOCK equ (_FH_LOCKS+_NFILE_-1) ; Last fh lock + +_TOTAL_LOCKS equ _LAST_FH_LOCK+1 ; Total number of locks + +_LOCK_BIT_INTS equ (_TOTAL_LOCKS/(ISIZE*8))+1; # of ints to hold lock bits + +;*** THE FH-LOCK TABLE SHOULD REALLY BASED ON __nfile NOT _NFILE_ *** + + +IFDEF DEBUG +;General multi-thread values +;--------------------------- + +MAXTHREADID EQU 32 ; max thread id supported +THREADINTS EQU (MAXTHREADID/(ISIZE*8)) ; # of ints to hold thread bits + +;Semaphore debugging lock structure +;---------------------------------- + +debug_lock struc +% holder DINT THREADINTS dup (0) ;bit set for thread holding lock +% waiters DINT THREADINTS dup (0) ;bit(s) set for threads waiting +% lockcnt DINT 0 ;total # of times lock has been aquired +% collcnt DINT 0 ;total # of lock collisions +debug_lock ends + +DEBUG_LOCK_SIZE EQU (2*(THREADINTS*ISIZE))+(2*ISIZE) ;size of debug_lock struct +ENDIF + + +; Tid Table Definitions +; --------------------- + +IFDEF MTHREAD + +; Structure for each thread's data +; [NOTE: Tid structure and data must agree with os2dll.h. In addition, +; startup depends on __stkhqq being the third entry in the structure.] + +_tiddata struc +% __terrno DINT 0 ; errno value +% __tdoserrno DINT 0 ; _doserrno value +% __stkhqq DINT 0 ; stack limit +% __fpds DINT 0 ; Floating Point data segment +% __holdrand DLONG 0 ; rand() seed value +% __token DDPTR 0 ; * to strtok() token + ;following pointers get malloc'd at runtime +% __errmsg DDPTR 0 ; * to strerror()/_strerror() buffer +% __namebuf DDPTR 0 ; * to tmpfile() buffer +% __asctimebuf DDPTR 0 ; * to asctime() buffer +% __gmtimebuf DDPTR 0 ; * to gmtime() structure +% __cvtbuf DDPTR 0 ; * to ecvt()/fcvt() buffer + ;following three values needed by _beginthread code +% __initaddr DCPTR 0 ; initial user thread address +% __initarg DDPTR 0 ; initial user thread argument +% __initstksz DINT 0 ; initial stack size (specified by user) + ;following three fields are needed to support signal handling and + ;runtime errors +% __pxcptacttab DDPTR 0 ; * to exception-action table +% __tpxcptinfoptrs DDPTR 0 ; * to exception info pointers +% __tfpecode DINT 0 ; * float point exception code +_tiddata ends + +_TIDDATASIZE = (size _tiddata) ; size of _tiddata entry (bytes) +_TIDSHIFT = 6 ; # of bits to shift to get _TIDSIZE +_TIDSIZE = (1 shl _TIDSHIFT) ; # of bytes in each tid entry (rounded + ; to next power of two). + +_TID_INCSIZE equ 1000h ; grow thread table a page at a time +_TIDTABGROW = (_TID_INCSIZE/_TIDSIZE) ; table increment (# of tid's per page) + ; (must be power of 2!!!) +_TIDTABMASK = (NOT (_TIDTABGROW -1)) ; mask off grow bits + +_TID_MAXTID = 1024 ; max # of tids supported +_TID_REGSIZE = (_TID_MAXTID * _TIDSIZE) ; size of tid table region + +.ERRE (_TIDSIZE GE _TIDDATASIZE) ; make sure _TIDSIZE >= _TIDDATASIZE +.ERRE ((_TIDSIZE SHR 1) LT _TIDDATASIZE); make sure _TIDSIZE/2 < _TIDDATASIZE +.ERRE (_TIDTABGROW GE 1) ; must be at least 1 !!! +.ERRE (_TID_MAXTID GE _TIDTABGROW) ; make sure _TID_MAXTID >= _TIDTABGROW + +ENDIF ;MTHREAD + +;Declarations +;------------ + +;Multi-thread Macros +;------------------- + +;_mlock -- Aquire a lock +;Arg = lock number + +IFDEF MTHREAD +_mlock MACRO locknum + push locknum + call _lock + add rsp,ISIZE + ENDM +ELSE +_mlock MACRO locknum + ENDM +ENDIF + +;_munlock -- Release a lock +;Arg = lock number + +IFDEF MTHREAD +_munlock MACRO locknum + push locknum + call _unlock + add rsp,ISIZE + ENDM +ELSE +_munlock MACRO locknum + ENDM +ENDIF + +;_mwait -- Wait for a lock but don't aquire it +;Arg = lock number + +IFDEF MTHREAD +_mwait MACRO locknum + push locknum + call _waitlock + add rsp,ISIZE + ENDM +ELSE +_mwait MACRO locknum + ENDM +ENDIF + +;_lock_fh -- Lock file handle +;Arg = file handle + +IFDEF MTHREAD +_lock_fh MACRO fh + push fh + call _lock_file + add rsp,ISIZE + ENDM +ELSE +_lock_fh MACRO fh + ENDM +ENDIF + +;_unlock_fh -- Unlock file handle +;Arg = register containing file handle + +IFDEF MTHREAD +_unlock_fh MACRO fh + push fh + call _unlock_file + add rsp,ISIZE + ENDM +ELSE +_unlock_fh MACRO fh + ENDM +ENDIF + +;_unlock_fh_check -- Unlock file handle if flag is set +;Args: reg = register to use in the macro (e.g., ax) +; file = file handle (e.g., bx or [fh]) +; flag = lock/unlock flag (e.g., [lock_flag]) +; (0 = no lock/unlock operation) + +IFDEF MTHREAD +_unlock_fh_check MACRO reg,file,flag + LOCAL no_unlock ;; local label + mov reg,flag ;; get lock flag + or reg,reg ;; test it + jz no_unlock ;; 0 = no lock/unlock + push file ;; unlock file + call _unlock_file + add rsp,ISIZE +no_unlock: + ENDM +ELSE +_unlock_fh_check MACRO reg,file,flag + ENDM +ENDIF + +;_lock_fh_check -- Lock file handle if flag is set +;Args: reg = register to use in the macro (e.g., ax) +; file = file handle (e.g., bx or [fh]) +; flag = lock/unlock flag (e.g., [lock_flag]) +; (0 = no lock/unlock operation) + +IFDEF MTHREAD +_lock_fh_check MACRO reg,file,flag + LOCAL no_lock ;; local label + mov reg,flag ;; get lock flag + or reg,reg ;; test it + jz no_unlock ;; 0 = no lock/unlock + push file ;; lock the file + call _lock_file + add rsp,ISIZE +no_lock: + ENDM +ELSE +_lock_fh_check MACRO reg,file,flag + ENDM +ENDIF + +;_lock_str -- Acquire stream lock +;Arg: str = index of stream in _iob[] + +IFDEF MTHREAD +_lock_str MACRO str + push str + call _lock_stream + add rsp,ISIZE + ENDM +ELSE +_lock_str MACRO str + ENDM +ENDIF + +;_unlock_str -- Unlock stream +;Arg: str = index of stream in _iob[] + +IFDEF MTHREAD +_unlock_str MACRO str + push str + call _unlock_stream + add rsp,ISIZE + ENDM +ELSE +_unlock_str MACRO str + ENDM +ENDIF + +;_dll_mlock -- Aquire a lock (set/clear DS) +;Arg = lock number + +IFDEF _LOAD_DGROUP +IFDEF MTHREAD +_dll_mlock MACRO locknum + push locknum + call _dll_lock + add rsp,ISIZE + ENDM +ELSE +_dll_mlock MACRO locknum + ENDM +ENDIF +ENDIF + +;_dll_munlock -- Release a lock (set/clear DS) +;Arg = lock number + +IFDEF _LOAD_DGROUP +IFDEF MTHREAD +_dll_munlock MACRO locknum + push locknum + call _dll_unlock + add rsp,ISIZE + ENDM +ELSE +_dll_munlock MACRO locknum + ENDM +ENDIF +ENDIF diff --git a/private/fp32/inc/i386/os2supp.inc b/private/fp32/inc/i386/os2supp.inc new file mode 100644 index 000000000..2a962ff27 --- /dev/null +++ b/private/fp32/inc/i386/os2supp.inc @@ -0,0 +1,92 @@ +;*** +;os2supp.inc - helper macros for OS/2, MTHREAD, and DLL support +; +; Copyright (c) 1987-88, Microsoft Corporation +; +;Purpose: +; Support for OS/2, MTHREAD, and DLL support +; +;Revision History: +; +; 08-25-88 WAJ Added this header. Added to 386 tree. +; +;******************************************************************************* + + +ifdef MTHREAD + OS2SUPP equ 1 +else + OS2SUPP equ 0 +endif + +__MsgString equ <> + +ifdef MTHREAD +__MsgString catstr __MsgString, < MTHREAD> +endif + +ifdef SS_NEQ_DGROUP +__MsgString catstr __MsgString, < SS_NEQ_DGROUP> +endif + +ifdef DLL +__MsgString catstr __MsgString, < DLL> +endif + +__SMsgString sizestr __MsgString + +if __SMsgString + if OS2SUPP + __MsgString catstr <OS/2 Support Lib ->, __MsgString + else + __MsgString catstr <Support Lib ->, __MsgString + endif + ifndef QUIET +% display __MsgString + endif +endif + + +_loadds macro ;; code for loading ds with DGROUP + push ds + mov ax,DGROUP + mov ds,ax +endm + +_reloadds macro ;; code for resetting ds + pop ds +endm + + + + +ifmt macro code + ifdef MTHREAD + code + endif +endm + +nomt macro code + ifndef MTHREAD + code + endif +endm + +ifdll macro code + ifdef DLL + code + endif +endm + + +if_LOAD_DGROUP macro code + ifdef _LOAD_DGROUP + code + endif +endm + +no_LOAD_DGROUP macro code + ifndef _LOAD_DGROUP + code + endif +endm diff --git a/private/fp32/inc/i386/version.inc b/private/fp32/inc/i386/version.inc new file mode 100644 index 000000000..b2d15ab23 --- /dev/null +++ b/private/fp32/inc/i386/version.inc @@ -0,0 +1,49 @@ +;*** +;version.inc - defines current memory model +; +; Copyright (c) 1987-1989, Microsoft Corporation. All rights reserved. +; +;Purpose: +; This file defines the current memory model being used. +; +;Revision History: +; 10-22-87 JCR Single version to handle all four memory models +; 02-11-88 WAJ Added to math project +; 12-02-88 WAJ Added definition of CallDist +; 09-22-89 WAJ Removed CallDist. Added "ifdef WIN". +; 10-16-89 WAJ Added "?WIN = 1". +; 11-20-89 WAJ Added ?QUIET +; +;******************************************************************************* + +?PLM = 0 + +ifdef WIN + ?WIN = 1 +else + ?WIN = 0 +endif + +ifdef mem_s + memS equ 1 ; small model +endif + +ifdef mem_c + memC equ 1 ; compact model +endif + +ifdef mem_m + memM equ 1 ; medium model +endif + +ifdef mem_l + memL equ 1 ; large model +endif + +ifdef I386 + memS32 equ 1 ; small 32 bit model +endif + +ifdef QUIET + ?QUIET equ 1 +endif diff --git a/private/fp32/include/cv.h b/private/fp32/include/cv.h new file mode 100644 index 000000000..0455b424d --- /dev/null +++ b/private/fp32/include/cv.h @@ -0,0 +1,531 @@ +/*** +*cv.h - definitions for floating point conversion +* +* Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* define types, macros, and constants used in floating point +* conversion routines +* +*Revision History: +* 7-17-91 GDP initial version +* 9-21-91 GDP restructured 'ifdef' directives +* 10-29-91 GDP MIPS port: new defs for ALIGN and DOUBLE +* 3-03-92 GDP removed os2 16-bit stuff +* 4-30-92 GDP support intrncvt.c --cleanup and reorganize +* 5-13-92 XY fixed B_END macros +* 6-16-92 GDP merged changes from \\orville and \\vangogh trees +* 9-05-92 GDP included fltintrn.h, new calling convention macros +* 07-16-93 SRW ALPHA Merge +* 10-02-94 BWT PPC merge +* +*******************************************************************************/ +#ifndef _INC_CV + +#ifdef __cplusplus +extern "C" { +#endif + +#include <cruntime.h> + +/* + * Conditional macro definition for function calling type and variable type + * qualifiers. + */ +#if ( (_MSC_VER >= 800) && (_M_IX86 >= 300) ) + +/* + * Definitions for MS C8-32 (386/486) compiler + */ +#define _CRTAPI1 __cdecl +#define _CRTAPI2 __cdecl + +#elif ( _MSC_VER == 600 ) + +/* + * Definitions for old MS C6-386 compiler + */ +#define _CRTAPI1 _cdecl +#define _CRTAPI2 _cdecl +#define _CRTVAR1 _cdecl +#define _M_IX86 300 + +#else + +/* + * Other compilers (e.g., MIPS) + */ +#define _CRTAPI1 +#define _CRTAPI2 +#define _CRTVAR1 + +#endif + +/* + * For MIPS and Alpha, define DOUBLE as 'double' + * so that the definition in fltintrn.h is not used. + * This is done because floating point arguments are passed in the + * fp register. + */ + +#if defined(_M_MRX000) || defined(_M_ALPHA) || defined(_M_PPC) +#define DOUBLE double +#endif +#include <fltintrn.h> + + + + +/* define little endian or big endian memory */ + +#ifdef i386 +#define L_END +#endif + +#if defined(_M_MRX000) || defined(_M_ALPHA) || defined(_M_PPC) +#define L_END +#endif + +typedef unsigned char u_char; /* should have 1 byte */ +typedef char s_char; /* should have 1 byte */ +typedef unsigned short u_short; /* should have 2 bytes */ +typedef signed short s_short; /* should have 2 bytes */ +typedef unsigned int u_long; /* sholuld have 4 bytes */ +typedef int s_long; /* sholuld have 4 bytes */ + +/* calling conventions */ +#define _CALLTYPE5 + + +/* + * defining _LDSUPPORT enables using long double computations + * for string conversion. We do not do this even for i386, + * since we want to avoid using floating point code that + * may generate IEEE exceptions. + * + * Currently our string conversion routines do not conform + * to the special requirements of the IEEE standard for + * floating point conversions + */ + + +#ifndef _LDSUPPORT + +#pragma pack(4) +typedef struct { + u_char ld[10]; +} _LDOUBLE; +#pragma pack() + +#define PTR_LD(x) ((u_char *)(&(x)->ld)) + +#else + +typedef long double _LDOUBLE; + +#define PTR_LD(x) ((u_char *)(x)) + +#endif + + +#pragma pack(4) +typedef struct { + u_char ld12[12]; +} _LDBL12; +#pragma pack() + +typedef struct { + float f; +} FLOAT; + + + +/* + * return values for internal conversion routines + * (12-byte to long double, double, or float) + */ + +typedef enum { + INTRNCVT_OK, + INTRNCVT_OVERFLOW, + INTRNCVT_UNDERFLOW +} INTRNCVT_STATUS; + + +/* + * return values for strgtold12 routine + */ + +#define SLD_UNDERFLOW 1 +#define SLD_OVERFLOW 2 +#define SLD_NODIGITS 4 + +#define MAX_MAN_DIGITS 21 + + +/* specifies '%f' format */ + +#define SO_FFORMAT 1 + +typedef struct _FloatOutStruct { + short exp; + char sign; + char ManLen; + char man[MAX_MAN_DIGITS+1]; + } FOS; + + + +#define PTR_12(x) ((u_char *)(&(x)->ld12)) + +#define MAX_USHORT ((u_short)0xffff) +#define MSB_USHORT ((u_short)0x8000) +#define MAX_ULONG ((u_long)0xffffffff) +#define MSB_ULONG ((u_long)0x80000000) + +#define TMAX10 5200 /* maximum temporary decimal exponent */ +#define TMIN10 -5200 /* minimum temporary decimal exponent */ +#define LD_MAX_EXP_LEN 4 /* maximum number of decimal exponent digits */ +#define LD_MAX_MAN_LEN 24 /* maximum length of mantissa (decimal)*/ +#define LD_MAX_MAN_LEN1 25 /* MAX_MAN_LEN+1 */ + +#define LD_BIAS 0x3fff /* exponent bias for long double */ +#define LD_BIASM1 0x3ffe /* LD_BIAS - 1 */ +#define LD_MAXEXP 0x7fff /* maximum biased exponent */ + +#define D_BIAS 0x3ff /* exponent bias for double */ +#define D_BIASM1 0x3fe /* D_BIAS - 1 */ +#define D_MAXEXP 0x7ff /* maximum biased exponent */ + + +#ifdef M68K +typedef struct _fltl /* used by _fltinl */ + { + int flags; + int nbytes; /* number of characters read */ + long lval; + _LDOUBLE ldval; /* the returned floating point number */ + } + *FLTL; +#endif + + +#ifdef M68K +char *_clftole(long double *, char *, int, int); +char *_clftolf(long double *, char *, int); +char * _CALLTYPE2 _clftolg(long double *, char *, int, int); +void _CALLTYPE2 _cldcvt( long double *, char *, int, int, int); +#endif + + +#ifndef MTHREAD +#ifdef M68K +FLTL _CALLTYPE2 _fltinl( const char *, int, int, int); +STRFLT _CALLTYPE2 _lfltout(long double); + +#define _IS_MAN_IND(signbit, manhi, manlo) \ + ((signbit) && (manhi)==0xc0000000 && (manlo)==0) + +#define _IS_MAN_QNAN(signbit, manhi, manlo) \ + ( (manhi)&NAN_BIT ) + +#define _IS_MAN_SNAN(signbit, manhi, manlo) \ + (!( _IS_MAN_INF(signbit, manhi, manlo) || \ + _IS_MAN_QNAN(signbit, manhi, manlo) )) + +#endif +#endif + + +/* + * end of definitions from crt32\h\fltintrn.h + */ + + + + +/* Recognizing special patterns in the mantissa field */ +#define _EXP_SP 0x7fff +#define NAN_BIT (1<<30) + +#define _IS_MAN_INF(signbit, manhi, manlo) \ + ( (manhi)==MSB_ULONG && (manlo)==0x0 ) + + +// i386 and Alpha use same NaN format + +#if defined(_M_IX86) || defined(_M_ALPHA) + +#define _IS_MAN_IND(signbit, manhi, manlo) \ + ((signbit) && (manhi)==0xc0000000 && (manlo)==0) + +#define _IS_MAN_QNAN(signbit, manhi, manlo) \ + ( (manhi)&NAN_BIT ) + +#define _IS_MAN_SNAN(signbit, manhi, manlo) \ + (!( _IS_MAN_INF(signbit, manhi, manlo) || \ + _IS_MAN_QNAN(signbit, manhi, manlo) )) + + +#elif defined(_M_MRX000) || defined(_M_PPC) + +#define _IS_MAN_IND(signbit, manhi, manlo) \ + (!(signbit) && (manhi)==0xbfffffff && (manlo)==0xfffff800) + +#define _IS_MAN_SNAN(signbit, manhi, manlo) \ + ( (manhi)&NAN_BIT ) + +#define _IS_MAN_QNAN(signbit, manhi, manlo) \ + (!( _IS_MAN_INF(signbit, manhi, manlo) || \ + _IS_MAN_SNAN(signbit, manhi, manlo) )) +#endif + + + +#if defined (L_END) && !( defined(_M_MRX000) || defined(_M_ALPHA) || defined(_M_PPC) ) +/* "little endian" memory */ +/* Note: MIPS and Alpha have alignment requirements and have different macros */ +/* + * Manipulation of a 12-byte long double number (an ordinary + * 10-byte long double plus two extra bytes of mantissa). + */ +/* + * byte layout: + * + * +-----+--------+--------+-------+ + * |XT(2)|MANLO(4)|MANHI(4)|EXP(2) | + * +-----+--------+--------+-------+ + * |<-UL_LO->|<-UL_MED->|<-UL_HI ->| + * (4) (4) (4) + */ + +/* a pointer to the exponent/sign portion */ +#define U_EXP_12(p) ((u_short *)(PTR_12(p)+10)) + +/* a pointer to the 4 hi-order bytes of the mantissa */ +#define UL_MANHI_12(p) ((u_long *)(PTR_12(p)+6)) + +/* a pointer to the 4 lo-order bytes of the ordinary (8-byte) mantissa */ +#define UL_MANLO_12(p) ((u_long *)(PTR_12(p)+2)) + +/* a pointer to the 2 extra bytes of the mantissa */ +#define U_XT_12(p) ((u_short *)PTR_12(p)) + +/* a pointer to the 4 lo-order bytes of the extended (10-byte) mantissa */ +#define UL_LO_12(p) ((u_long *)PTR_12(p)) + +/* a pointer to the 4 mid-order bytes of the extended (10-byte) mantissa */ +#define UL_MED_12(p) ((u_long *)(PTR_12(p)+4)) + +/* a pointer to the 4 hi-order bytes of the extended long double */ +#define UL_HI_12(p) ((u_long *)(PTR_12(p)+8)) + +/* a pointer to the byte of order i (LSB=0, MSB=9)*/ +#define UCHAR_12(p,i) ((u_char *)PTR_12(p)+(i)) + +/* a pointer to a u_short with offset i */ +#define USHORT_12(p,i) ((u_short *)((u_char *)PTR_12(p)+(i))) + +/* a pointer to a u_long with offset i */ +#define ULONG_12(p,i) ((u_long *)((u_char *)PTR_12(p)+(i))) + +/* a pointer to the 10 MSBytes of a 12-byte long double */ +#define TEN_BYTE_PART(p) ((u_char *)PTR_12(p)+2) + +/* + * Manipulation of a 10-byte long double number + */ +#define U_EXP_LD(p) ((u_short *)(PTR_LD(p)+8)) +#define UL_MANHI_LD(p) ((u_long *)(PTR_LD(p)+4)) +#define UL_MANLO_LD(p) ((u_long *)PTR_LD(p)) + +/* + * Manipulation of a 64bit IEEE double + */ +#define U_SHORT4_D(p) ((u_short *)(p) + 3) +#define UL_HI_D(p) ((u_long *)(p) + 1) +#define UL_LO_D(p) ((u_long *)(p)) + +#endif + +/* big endian */ +#if defined (B_END) + +/* + * byte layout: + * + * +------+-------+---------+------+ + * |EXP(2)|MANHI(4)|MANLO(4)|XT(2) | + * +------+-------+---------+------+ + * |<-UL_HI->|<-UL_MED->|<-UL_LO ->| + * (4) (4) (4) + */ + + +#define U_EXP_12(p) ((u_short *)PTR_12(p)) +#define UL_MANHI_12(p) ((u_long *)(PTR_12(p)+2)) +#define UL_MANLO_12(p) ((u_long *)(PTR_12(p)+6)) +#define U_XT_12(p) ((u_short *)(PTR_12(p)+10)) + +#define UL_LO_12(p) ((u_long *)(PTR_12(p)+8)) +#define UL_MED_12(p) ((u_long *)(PTR_12(p)+4)) +#define UL_HI_12(p) ((u_long *)PTR_12(p)) + +#define UCHAR_12(p,i) ((u_char *)PTR_12(p)+(11-(i))) +#define USHORT_12(p,i) ((u_short *)((u_char *)PTR_12(p)+10-(i))) +#define ULONG_12(p,i) ((u_long *)((u_char *)PTR_12(p)+8-(i))) +#define TEN_BYTE_PART(p) (u_char *)PTR_12(p) + +#define U_EXP_LD(p) ((u_short *)PTR_LD(p)) +#define UL_MANHI_LD(p) ((u_long *)(PTR_LD(p)+2)) +#define UL_MANLO_LD(p) ((u_long *)(PTR_LD(p)+6)) + +/* + * Manipulation of a 64bit IEEE double + */ +#define U_SHORT4_D(p) ((u_short *)(p)) +#define UL_HI_D(p) ((u_long *)(p)) +#define UL_LO_D(p) ((u_long *)(p) + 1) + +#endif + +#if defined(_M_MRX000) || defined(_M_ALPHA) || defined(_M_PPC) + +#define ALIGN(x) ( (unsigned long __unaligned *) (x)) + +#define U_EXP_12(p) ((u_short *)(PTR_12(p)+10)) + +#define UL_MANHI_12(p) ((u_long __unaligned *) (PTR_12(p)+6) ) +#define UL_MANLO_12(p) ((u_long __unaligned *) (PTR_12(p)+2) ) + + +#define U_XT_12(p) ((u_short *)PTR_12(p)) +#define UL_LO_12(p) ((u_long *)PTR_12(p)) +#define UL_MED_12(p) ((u_long *)(PTR_12(p)+4)) +#define UL_HI_12(p) ((u_long *)(PTR_12(p)+8)) + +/* the following 3 macros do not take care of proper alignment */ +#define UCHAR_12(p,i) ((u_char *)PTR_12(p)+(i)) +#define USHORT_12(p,i) ((u_short *)((u_char *)PTR_12(p)+(i))) +#define ULONG_12(p,i) ((u_long *) ((u_char *)PTR_12(p)+(i) )) + +#define TEN_BYTE_PART(p) ((u_char *)PTR_12(p)+2) + +/* + * Manipulation of a 10-byte long double number + */ +#define U_EXP_LD(p) ((u_short *)(PTR_LD(p)+8)) + +#define UL_MANHI_LD(p) ((u_long *) (PTR_LD(p)+4) ) +#define UL_MANLO_LD(p) ((u_long *) PTR_LD(p) ) + +/* + * Manipulation of a 64bit IEEE double + */ +#define U_SHORT4_D(p) ((u_short *)(p) + 3) +#define UL_HI_D(p) ((u_long *)(p) + 1) +#define UL_LO_D(p) ((u_long *)(p)) + +#endif + + +#define PUT_INF_12(p,sign) \ + *UL_HI_12(p) = (sign)?0xffff8000:0x7fff8000; \ + *UL_MED_12(p) = 0; \ + *UL_LO_12(p) = 0; + +#define PUT_ZERO_12(p) *UL_HI_12(p) = 0; \ + *UL_MED_12(p) = 0; \ + *UL_LO_12(p) = 0; + +#define ISZERO_12(p) ((*UL_HI_12(p)&0x7fffffff) == 0 && \ + *UL_MED_12(p) == 0 && \ + *UL_LO_12(p) == 0 ) + +#define PUT_INF_LD(p,sign) \ + *U_EXP_LD(p) = (sign)?0xffff:0x7fff; \ + *UL_MANHI_LD(p) = 0x8000; \ + *UL_MANLO_LD(p) = 0; + +#define PUT_ZERO_LD(p) *U_EXP_LD(p) = 0; \ + *UL_MANHI_LD(p) = 0; \ + *UL_MANLO_LD(p) = 0; + +#define ISZERO_LD(p) ((*U_EXP_LD(p)&0x7fff) == 0 && \ + *UL_MANHI_LD(p) == 0 && \ + *UL_MANLO_LD(p) == 0 ) + + +/********************************************************* + * + * Function Prototypes + * + *********************************************************/ + +/* from mantold.c */ +void _CALLTYPE5 __mtold12(char *manptr, unsigned manlen,_LDBL12 *ld12); +int _CALLTYPE5 __addl(u_long x, u_long y, u_long *sum); +void _CALLTYPE5 __shl_12(_LDBL12 *ld12); +void _CALLTYPE5 __shr_12(_LDBL12 *ld12); +void _CALLTYPE5 __add_12(_LDBL12 *x, _LDBL12 *y); + +/* from tenpow.c */ +void _CALLTYPE5 __multtenpow12(_LDBL12 *pld12,int pow, unsigned mult12); +void _CALLTYPE5 __ld12mul(_LDBL12 *px, _LDBL12 *py); + +/* from strgtold.c */ +unsigned int __strgtold12(_LDBL12 *pld12, + const char * *p_end_ptr, + const char * str, + int mult12, + int scale, + int decpt, + int implicit_E); + +unsigned _CALLTYPE5 __STRINGTOLD(_LDOUBLE *pld, + const char * *p_end_ptr, + const char *str, + int mult12); + + +/* from x10fout.c */ +/* this is defined as void in convert.h + * After porting the asm files to c, we need a return value for + * i10_output, that used to reside in reg. ax + */ +int _CALLTYPE5 $I10_OUTPUT(_LDOUBLE ld, int ndigits, + unsigned output_flags, FOS *fos); + + +/* for cvt.c and fltused.c */ +/* The following functions are #defined as macros in fltintrn.h */ +#undef _cfltcvt +#undef _cropzeros +#undef _fassign +#undef _forcdecpt +#undef _positive + +void _CALLTYPE2 _cfltcvt(double *arg, char *buffer, + int format, int precision, + int caps); +void _CALLTYPE2 _cropzeros(char *buf); +void _CALLTYPE2 _fassign(int flag, char *argument, char *number); +void _CALLTYPE2 _forcdecpt(char *buf); +int _CALLTYPE2 _positive(double *arg); + +/* from intrncvt.c */ +void _atodbl(DOUBLE *d, char *str); +void _atoldbl(_LDOUBLE *ld, char *str); +void _atoflt(FLOAT *f, char *str); +INTRNCVT_STATUS _ld12tod(_LDBL12 *ifp, DOUBLE *d); +INTRNCVT_STATUS _ld12tof(_LDBL12 *ifp, FLOAT *f); +INTRNCVT_STATUS _ld12told(_LDBL12 *ifp, _LDOUBLE *ld); + + + +#ifdef __cplusplus +} +#endif + +#define _INC_CV +#endif /* _INC_CV */ diff --git a/private/fp32/include/trans.h b/private/fp32/include/trans.h new file mode 100644 index 000000000..3e5c66f95 --- /dev/null +++ b/private/fp32/include/trans.h @@ -0,0 +1,490 @@ +/*** +*trans.h - definitions for computing transcendentals +* +* Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: Define constants and macros that are used for computing +* transcendentals. Some of the definitions are machine dependent. +* Double is assumed to conform to the IEEE 754 std format. +* +*Revision History: +* 08-14-91 GDP written +* 10-29-91 GDP removed unused prototypes, added _frnd +* 01-20-92 GDP significant changes -- IEEE exc. support +* 03-27-92 GDP put IEEE definitions in fpieee.h +* 03-31-92 GDP add internal constants for _ctrlfp, _statfp +* 05-08-92 PLM added M68K switch +* 05-18-92 XY added exception macro under M68K switch +* 06-23-92 GDP added macro for negative zero +* 09-06-92 GDP include cruntime.h, calling convention macros +* 07-16-93 SRW ALPHA Merge +* 01-13-94 RDL Added #ifndef _LANGUAGE_ASSEMBLY for asm includes. +* 10-02-94 BWT PPC merge +* +*******************************************************************************/ +#ifndef _INC_TRANS + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef _LANGUAGE_ASSEMBLY + +#include <cruntime.h> + +/* + * Conditional macro definition for function calling type and variable type + * qualifiers. + */ +#if ( (_MSC_VER >= 800) && (_M_IX86 >= 300) ) + +/* + * Definitions for MS C8-32 (386/486) compiler + */ +#define _CRTAPI1 __cdecl +#define _CRTAPI2 __cdecl + +#elif ( _MSC_VER == 600 ) + +/* + * Definitions for old MS C6-386 compiler + */ +#define _CRTAPI1 _cdecl +#define _CRTAPI2 _cdecl +#define _CRTVAR1 _cdecl +#define _M_IX86 300 + +#else + +/* + * Other compilers (e.g., MIPS) + */ +#define _CRTAPI1 +#define _CRTAPI2 +#define _CRTVAR1 + +#endif + +#include <fpieee.h> + +#define D_BIASM1 0x3fe /* off by one to compensate for the implied bit */ + +#ifdef B_END +/* big endian */ +#define D_EXP(x) ((unsigned short *)&(x)) +#define D_HI(x) ((unsigned long *)&(x)) +#define D_LO(x) ((unsigned long *)&(x)+1) +#else +#define D_EXP(x) ((unsigned short *)&(x)+3) +#define D_HI(x) ((unsigned long *)&(x)+1) +#define D_LO(x) ((unsigned long *)&(x)) +#endif + +/* return the int representation of the exponent + * if x = .f * 2^n, 0.5<=f<1, return n (unbiased) + * e.g. INTEXP(3.0) == 2 + */ +#define INTEXP(x) ((signed short)((*D_EXP(x) & 0x7ff0) >> 4) - D_BIASM1) + + +/* check for infinity, NAN */ +#define D_ISINF(x) ((*D_HI(x) & 0x7fffffff) == 0x7ff00000 && *D_LO(x) == 0) +#define IS_D_SPECIAL(x) ((*D_EXP(x) & 0x7ff0) == 0x7ff0) +#define IS_D_NAN(x) (IS_D_SPECIAL(x) && !D_ISINF(x)) + +#ifdef _M_MRX000 + +#define IS_D_SNAN(x) ((*D_EXP(x) & 0x7ff8) == 0x7ff8) +#define IS_D_QNAN(x) ((*D_EXP(x) & 0x7ff8) == 0x7ff0 && \ + (*D_HI(x) << 13 || *D_LO(x))) +#else + +#define IS_D_QNAN(x) ((*D_EXP(x) & 0x7ff8) == 0x7ff8) +#define IS_D_SNAN(x) ((*D_EXP(x) & 0x7ff8) == 0x7ff0 && \ + (*D_HI(x) << 13 || *D_LO(x))) +#endif + +#define IS_D_DENORM(x) ((*D_EXP(x) & 0x7ff0) == 0 && \ + (*D_HI(x) << 12 || *D_LO(x))) + + +#define IS_D_INF(x) (*D_HI(x) == 0x7ff00000 && *D_LO(x) == 0) +#define IS_D_MINF(x) (*D_HI(x) == 0xfff00000 && *D_LO(x) == 0) + +#ifdef _M_MRX000 +#define D_IND_HI 0x7ff7ffff +#define D_IND_LO 0xffffffff +#else +#define D_IND_HI 0xfff80000 +#define D_IND_LO 0x0 +#endif + +typedef union { + long lng[2]; + double dbl; + } _dbl; + +extern _dbl _d_inf; +extern _dbl _d_ind; +extern _dbl _d_max; +extern _dbl _d_min; +extern _dbl _d_mzero; + +#define D_INF (_d_inf.dbl) +#define D_IND (_d_ind.dbl) +#define D_MAX (_d_max.dbl) +#define D_MIN (_d_min.dbl) +#define D_MZERO (_d_mzero.dbl) /* minus zero */ + +/* min and max exponents for normalized numbers in the + * form: 0.xxxxx... * 2^exp (NOT 1.xxxx * 2^exp !) + */ +#define MAXEXP 1024 +#define MINEXP -1021 + +#endif /* _LANGUAGE_ASSEMBLY */ + + +#if defined(_M_IX86) + + +/* Control word for computation of transcendentals */ +#define ICW 0x133f + +#define IMCW 0xffff + +#define IMCW_EM 0x003f /* interrupt Exception Masks */ +#define IEM_INVALID 0x0001 /* invalid */ +#define IEM_DENORMAL 0x0002 /* denormal */ +#define IEM_ZERODIVIDE 0x0004 /* zero divide */ +#define IEM_OVERFLOW 0x0008 /* overflow */ +#define IEM_UNDERFLOW 0x0010 /* underflow */ +#define IEM_INEXACT 0x0020 /* inexact (precision) */ + + +#define IMCW_RC 0x0c00 /* Rounding Control */ +#define IRC_CHOP 0x0c00 /* chop */ +#define IRC_UP 0x0800 /* up */ +#define IRC_DOWN 0x0400 /* down */ +#define IRC_NEAR 0x0000 /* near */ + +#define ISW_INVALID 0x0001 /* invalid */ +#define ISW_DENORMAL 0x0002 /* denormal */ +#define ISW_ZERODIVIDE 0x0004 /* zero divide */ +#define ISW_OVERFLOW 0x0008 /* overflow */ +#define ISW_UNDERFLOW 0x0010 /* underflow */ +#define ISW_INEXACT 0x0020 /* inexact (precision) */ + +#define IMCW_PC 0x0300 /* Precision Control */ +#define IPC_24 0x0000 /* 24 bits */ +#define IPC_53 0x0200 /* 53 bits */ +#define IPC_64 0x0300 /* 64 bits */ + +#define IMCW_IC 0x1000 /* Infinity Control */ +#define IIC_AFFINE 0x1000 /* affine */ +#define IIC_PROJECTIVE 0x0000 /* projective */ + + +#elif defined(_M_MRX000) + + +#define ICW 0x00000f80 /* Internal CW for transcendentals */ +#define IMCW 0xffffff83 /* Internal CW Mask */ + +#define IMCW_EM 0x00000f80 /* interrupt Exception Masks */ +#define IEM_INVALID 0x00000800 /* invalid */ +#define IEM_ZERODIVIDE 0x00000400 /* zero divide */ +#define IEM_OVERFLOW 0x00000200 /* overflow */ +#define IEM_UNDERFLOW 0x00000100 /* underflow */ +#define IEM_INEXACT 0x00000080 /* inexact (precision) */ + + +#define IMCW_RC 0x00000003 /* Rounding Control */ +#define IRC_CHOP 0x00000001 /* chop */ +#define IRC_UP 0x00000002 /* up */ +#define IRC_DOWN 0x00000003 /* down */ +#define IRC_NEAR 0x00000000 /* near */ + + +#define ISW_INVALID (1<<6) /* invalid */ +#define ISW_ZERODIVIDE (1<<5) /* zero divide */ +#define ISW_OVERFLOW (1<<4) /* overflow */ +#define ISW_UNDERFLOW (1<<3) /* underflow */ +#define ISW_INEXACT (1<<2) /* inexact (precision) */ + + +#elif defined(_M_ALPHA) + + +// +// ICW is the Internal Control Word for transcendentals: all five exceptions +// are masked and round to nearest mode is set. IMCW is the mask: all bits +// are set, except for the ISW bits. +// + +#define ICW (IEM_INEXACT | IEM_UNDERFLOW | IEM_OVERFLOW | IEM_ZERODIVIDE | IEM_INVALID | IRC_NEAR) +#define ISW (ISW_INEXACT | ISW_UNDERFLOW | ISW_OVERFLOW | ISW_ZERODIVIDE | ISW_INVALID) +#define IMCW (0xffffffff ^ ISW) + +// +// The defines for the internal control word match the format of the Alpha +// AXP software FPCR except for the rounding mode which is obtained from the +// Alpha AXP hardware FPCR and shifted right 32 bits. +// + +// +// Internal Exception Mask bits. +// Each bit _disables_ an exception (they are not _enable_ bits). +// + +#define IMCW_EM 0x0000003e /* interrupt Exception Masks */ + +#define IEM_INEXACT 0x00000020 /* inexact (precision) */ +#define IEM_UNDERFLOW 0x00000010 /* underflow */ +#define IEM_OVERFLOW 0x00000008 /* overflow */ +#define IEM_ZERODIVIDE 0x00000004 /* zero divide */ +#define IEM_INVALID 0x00000002 /* invalid */ + +// +// Internal Rounding Control values. +// + +#define IMCW_RC (0x3 << 26) /* Rounding Control */ + +#define IRC_CHOP (0x0 << 26) /* chop */ +#define IRC_DOWN (0x1 << 26) /* down */ +#define IRC_NEAR (0x2 << 26) /* near */ +#define IRC_UP (0x3 << 26) /* up */ + +// +// Internal Status Word bits. +// + +#define ISW_INEXACT 0x00200000 /* inexact (precision) */ +#define ISW_UNDERFLOW 0x00100000 /* underflow */ +#define ISW_OVERFLOW 0x00080000 /* overflow */ +#define ISW_ZERODIVIDE 0x00040000 /* zero divide */ +#define ISW_INVALID 0x00020000 /* invalid */ + + +#elif defined(_M_PPC) + + +#define IMCW_EM 0x000000f8 /* Exception Enable Mask */ + +#define IEM_INVALID 0x00000080 /* invalid */ +#define IEM_OVERFLOW 0x00000040 /* overflow */ +#define IEM_UNDERFLOW 0x00000020 /* underflow */ +#define IEM_ZERODIVIDE 0x00000010 /* zero divide */ +#define IEM_INEXACT 0x00000008 /* inexact (precision) */ + + +#define IMCW_RC 0x00000003 /* Rounding Control Mask */ + +#define IRC_NEAR 0x00000000 /* near */ +#define IRC_CHOP 0x00000001 /* chop */ +#define IRC_UP 0x00000002 /* up */ +#define IRC_DOWN 0x00000003 /* down */ + + +#define IMCW_SW 0x3E000000 /* Status Mask */ + +#define ISW_INVALID 0x20000000 /* invalid summary */ +#define ISW_OVERFLOW 0x10000000 /* overflow */ +#define ISW_UNDERFLOW 0x08000000 /* underflow */ +#define ISW_ZERODIVIDE 0x04000000 /* zero divide */ +#define ISW_INEXACT 0x02000000 /* inexact (precision) */ + + +#define IMCW_VX 0x01F80100 /* Invalid Cause Mask */ + +#define IVX_SNAN 0x01000000 /* SNaN */ +#define IVX_ISI 0x00800000 /* infinity - infinity */ +#define IVX_IDI 0x00400000 /* infinity / infinity */ +#define IVX_ZDZ 0x00200000 /* zero / zero */ +#define IVX_IMZ 0x00100000 /* infinity * zero */ +#define IVX_VC 0x00080000 /* inv flpt compare */ +#define IVX_CVI 0x00000100 /* inv integer convert */ + + +/* Internal CW for transcendentals */ + +#define ICW (IMCW_EM) + +/* Internal CW Mask (non-status bits) */ + +#define IMCW (0xffffffff & (~(IMCW_SW|IMCW_VX))) + +#endif + + +#ifndef _LANGUAGE_ASSEMBLY + +#define RETURN(fpcw,result) return _rstorfp(fpcw),(result) + +#define RETURN_INEXACT1(op,arg1,res,cw) \ + if (cw & IEM_INEXACT) { \ + _rstorfp(cw); \ + return res; \ + } \ + else { \ + return _except1(FP_P, op, arg1, res, cw); \ + } + + +#define RETURN_INEXACT2(op,arg1,arg2,res,cw) \ + if (cw & IEM_INEXACT) { \ + _rstorfp(cw); \ + return res; \ + } \ + else { \ + return _except2(FP_P, op, arg1, arg2, res, cw); \ + } + +#ifdef _M_ALPHA + +// +// Since fp32 is not compiled in IEEE exception mode perform Alpha NaN +// propagation in software to avoid hardware/kernel trap involvement. +// + +extern double _nan2qnan(double); + +#define _d_snan2(x,y) _nan2qnan(y) +#define _s2qnan(x) _nan2qnan(x) + +#else +//handle NaN propagation +#define _d_snan2(x,y) ((x)+(y)) +#define _s2qnan(x) ((x)+1.0) +#endif + + +#define _maskfp() _ctrlfp(ICW, IMCW) +#ifdef _ALPHA_ +#define _rstorfp(cw) 0 +#else +#define _rstorfp(cw) _ctrlfp(cw, IMCW) +#endif + + +#define ABS(x) ((x)<0 ? -(x) : (x) ) + + +int _d_inttype(double); + +#endif /* _LANGUAGE_ASSEMBLY */ + +#define _D_NOINT 0 +#define _D_ODD 1 +#define _D_EVEN 2 + + +// IEEE exceptions +#define FP_O 0x01 +#define FP_U 0x02 +#define FP_Z 0x04 +#define FP_I 0x08 +#define FP_P 0x10 + +// An extra flag for matherr support +// Set together with FP_I from trig functions when the argument is too large +#define FP_TLOSS 0x20 + + +#ifndef _LANGUAGE_ASSEMBLY +#ifdef B_END +#define SET_DBL(msw, lsw) msw, lsw +#else +#define SET_DBL(msw, lsw) lsw, msw +#endif +#endif /* _LANGUAGE_ASSEMBLY */ + + +// special types +#define T_PINF 1 +#define T_NINF 2 +#define T_QNAN 3 +#define T_SNAN 4 + + +// exponent adjustment for IEEE overflow/underflow exceptions +// used before passing the result to the trap handler + +#define IEEE_ADJUST 1536 + +// QNAN values + +#define INT_NAN (~0) + +#define QNAN_SQRT D_IND +#define QNAN_LOG D_IND +#define QNAN_LOG10 D_IND +#define QNAN_POW D_IND +#define QNAN_SINH D_IND +#define QNAN_COSH D_IND +#define QNAN_TANH D_IND +#define QNAN_SIN1 D_IND +#define QNAN_SIN2 D_IND +#define QNAN_COS1 D_IND +#define QNAN_COS2 D_IND +#define QNAN_TAN1 D_IND +#define QNAN_TAN2 D_IND +#define QNAN_ACOS D_IND +#define QNAN_ASIN D_IND +#define QNAN_ATAN2 D_IND +#define QNAN_CEIL D_IND +#define QNAN_FLOOR D_IND +#define QNAN_MODF D_IND +#define QNAN_LDEXP D_IND +#define QNAN_FMOD D_IND +#define QNAN_FREXP D_IND + + +/* + * Function prototypes + */ + +#ifndef _LANGUAGE_ASSEMBLY + +double _copysign(double x, double y); +double _set_exp(double x, int exp); +double _set_bexp(double x, int exp); +double _add_exp(double x, int exp); +double _frnd(double); +double _fsqrt(double); +/* Must have these declarations (or a suitable macro) or the C implementations + * of a great many FP functions fail badly when there is an exception */ +#ifndef _except1 +double _except1(int flags, int opcode, double arg, double res, unsigned int cw); +#endif +#ifndef _except2 +double _except2(int flags, int opcode, double arg1, double arg2, double res, unsigned int cw); +#endif +int _sptype(double); +int _get_exp(double); +double _decomp(double, int *); +int _powhlp(double x, double y, double * result); +extern unsigned int _fpstatus; +double _frnd(double); +double _exphlp(double, int *); +double _handle_qnan1(unsigned int op, double arg, unsigned int cw); +double _handle_qnan2(unsigned int op,double arg1,double arg2,unsigned int cw); +unsigned int _clhwfp(void); +unsigned int _setfpcw(unsigned int); +int _errcode(unsigned int flags); +void _set_errno(int matherrtype); +int _handle_exc(unsigned int flags, double * presult, unsigned int cw); +unsigned int _clrfp(void); +unsigned int _ctrlfp(unsigned int,unsigned int); +unsigned int _statfp(void); +void _set_statfp(unsigned int); + +#endif /* _LANGUAGE_ASSEMBLY */ + +#ifdef __cplusplus +} +#endif + +#define _INC_TRANS +#endif /* _INC_TRANS */ diff --git a/private/fp32/makefile b/private/fp32/makefile new file mode 100644 index 000000000..c88bf0d71 --- /dev/null +++ b/private/fp32/makefile @@ -0,0 +1,108 @@ +#### +#fp32\makefile - makefile for fp32 NT build +# +# Copyright (c) 1991-1992, Microsoft Corporation. All rights reserved. +# +#Purpose: +# +#Revision History: +# 03-04-92 GDP File created. +# 03-11-93 CFW Change coff to link32. +# +################################################################################ + +!INCLUDE fp32.def + +!IFNDEF 386 +386=0 +!ENDIF +!IFNDEF MIPS +MIPS=0 +!ENDIF +!IFNDEF ALPHA +ALPHA=0 +!ENDIF +!IFNDEF PPC +PPC=0 +!ENDIF + +# +# +# Default to building for i386 target, if no targets specified. +# + +!IF !$(386) +!IF !$(MIPS) +!IF !$(ALPHA) +!IF !$(PPC) +!IFDEF NTMIPSDEFAULT +MIPS=1 +!ELSE +!IFDEF NTALPHADEFAULT +ALPHA=1 +!ELSE +!IFDEF NTPPCDEFAULT +PPC=1 +!ELSE +386=1 +!ENDIF +!ENDIF +!ENDIF +!ENDIF +!ENDIF +!ENDIF +!ENDIF + +!IF $(386) +TARGETLIB=$(LIBFP:*=i386) +COMPONENTLIBS=$(LIBTRAN:*=i386) \ + $(LIBCONV:*=i386) +!ENDIF +!IF $(MIPS) +TARGETLIB=$(LIBFP:*=mips) +COMPONENTLIBS=$(LIBTRAN:*=mips) \ + $(LIBCONV:*=mips) +!ENDIF + +!IF $(ALPHA) +TARGETLIB=$(LIBFP:*=alpha) +COMPONENTLIBS=$(LIBTRAN:*=alpha) \ + $(LIBCONV:*=alpha) +!ENDIF + +!IF $(PPC) +TARGETLIB=$(LIBFP:*=ppc) +COMPONENTLIBS=$(LIBTRAN:*=ppc) \ + $(LIBCONV:*=ppc) +!ENDIF + +LIBFP=obj\*\fp$(TARGETNAMESUFFIX).lib +LIBTRAN=obj\*\tran$(TARGETNAMESUFFIX).lib +!IF "$(CRTLIBTYPE)" == "NT" +LIBCONV= +!ELSE +LIBCONV=obj\*\conv$(TARGETNAMESUFFIX).lib +!ENDIF + +all: $(TARGETLIB) +!IF "$(BUILDMSG)" != "" + echo $(BUILDMSG) +!ENDIF + +clean: cleanlib all + +cleanlib: + -erase $(TARGETLIB) + + +!IF $(ALPHA) +$(TARGETLIB): $(COMPONENTLIBS) makefile + lib -machine:alpha -out:$@ @<< +$(COMPONENTLIBS) +<< +!ELSE +$(TARGETLIB): $(COMPONENTLIBS) makefile + lib -out:$@ @<< +$(COMPONENTLIBS) +<< +!ENDIF diff --git a/private/fp32/nmake.mak b/private/fp32/nmake.mak new file mode 100644 index 000000000..585a76510 --- /dev/null +++ b/private/fp32/nmake.mak @@ -0,0 +1,40 @@ +################################################################################ +# +# +# Makefile for portable floating point lib +# +#Revision History +# +# 8-23-91 GDP written +# 3-04-92 GDP support only i386 OMF libs +# +################################################################################ +!INCLUDE fp32.def + +TCPU=i386 +NMAKE=$(MAKE) -f mkf +LIBEXE=tools.mak\i386os2\lib.exe + +OBJDIR=obj$(CRTLIBTYPE).mak\$(TCPU) +COMPONENTLIBS= $(OBJDIR)\conv$(TARGETNAMESUFFIX).lib \ + $(OBJDIR)\tran$(TARGETNAMESUFFIX).lib + +$(OBJDIR)\fp$(TARGETNAMESUFFIX).lib: makeobj $(COMPONENTLIBS) + if exist $@ erase $@ + $(LIBEXE) @<< +$@ +y +$(COMPONENTLIBS) +$(OBJDIR)\fp$(TARGETNAMESUFFIX).map; +<< + + +makeobj: + -md obj$(CRTLIBTYPE).mak + -md obj$(CRTLIBTYPE).mak\$(TCPU) + cd conv + $(NMAKE) /nologo + cd .. + cd tran + $(NMAKE) /nologo + cd .. diff --git a/private/fp32/readme b/private/fp32/readme new file mode 100644 index 000000000..e615ce240 --- /dev/null +++ b/private/fp32/readme @@ -0,0 +1,18 @@ +#### +#Revision History: +# 9-26-91 GDP written +# 9-28-91 GDP updated +# 3-05-91 GDP updated -- dropped old nmake build scheme +################################################################################ + + +This is the tree for the portable floating point code. +It contains string conversion routines and transcendentals. + +Currently two different build procedures are supported: + 1. nt 'build' + + 2. 'nmake' for building i386 libs in OMF format only + (Multiple host/target builds no longer supported) + All *.mak files are used for this purpose. + Use 'nmake -f nmake.mak CRTLIBTYPE=[ST | MT | DLL]' diff --git a/private/fp32/tran/alpha/asinacos.s b/private/fp32/tran/alpha/asinacos.s new file mode 100644 index 000000000..9b5e48382 --- /dev/null +++ b/private/fp32/tran/alpha/asinacos.s @@ -0,0 +1,1220 @@ +// TITLE("Alpha AXP Arc Sine and Cosine") +//++ +// +// Copyright (c) 1993, 1994 Digital Equipment Corporation +// +// Module Name: +// +// asincos.s +// +// Abstract: +// +// This module implements a high-performance Alpha AXP specific routine +// for IEEE double format arcsine and arccosine. +// +// Author: +// +// Bob Hanek (rtl::gs) 01-Jan-1992 +// +// Environment: +// +// User mode. +// +// Revision History: +// +// Thomas Van Baak (tvb) 12-Feb-1994 +// +// Adapted for NT. +// +//-- + +#include "ksalpha.h" + +// +// Define DPML exception record for NT. +// + + .struct 0 +ErErr: .space 4 // error code +ErCxt: .space 4 // context +ErPlat: .space 4 // platform +ErEnv: .space 4 // environment +ErRet: .space 4 // return value pointer +ErName: .space 4 // function name +ErType: .space 8 // flags and fill +ErVal: .space 8 // return value +ErArg0: .space 8 // arg 0 +ErArg1: .space 8 // arg 1 +ErArg2: .space 8 // arg 2 +ErArg3: .space 8 // arg 3 +DpmlExceptionLength: + +// +// Define stack frame. +// + + .struct 0 +Temp: .space 8 // save argument +ExRec: .space DpmlExceptionLength // exception record + .space 0 // for 16-byte stack alignment +FrameLength: + +// +// Define lower and upper 32-bit parts of 64-bit double. +// + +#define LowPart 0x0 +#define HighPart 0x4 + +// +// The coefficient tables are indexed by the high-order bits of x. +// For ease-of-access, define offsets to the three tables. +// + +#define SQRT_OFFSET 1024 // Table of m and sqrt values +#define ASIN_OFFSET 1536 // Table of asins of m +#define POLY_OFFSET 2048 // Polynomial coefficients and other constants + + SBTTL("ArcCosine") + +//++ +// +// double acos (double x) +// +// Routine Description: +// +// This function returns the arccosine of the given double argument. +// +// Arguments: +// +// x (f16) - Supplies the argument value. +// +// Return Value: +// +// The double arccosine result is returned in f0. +// +//-- + + NESTED_ENTRY(acos, FrameLength, ra) + + lda sp, -FrameLength(sp) // allocate stack frame + mov ra, t7 // save return address + + PROLOGUE_END + + stt f16, Temp(sp) // First, compute the table index + ldl v0, Temp + HighPart(sp) + ornot zero, zero, t0 + srl t0, 33 t0 + ldah t1, 0x10(zero) + and v0, t0, v0 + zapnot v0, 0xf, t0 + lda t1, -POLY_OFFSET + 0(t1) + srl t0, 10 t0 + subl t0, t1, t0 + lda t1, 0x6319(t0) // Br if index implies x >= 1/2 + bge t0, c_larger_than_half + + blt t1, c_small_x // Br if index implies x is 'tiny' + +// +// Compute the basic polynomial. +// Note: Start x^2 ASAP, and don't bother reusing registers +// (excpet those that are obviusly 'consumed' by the instruction), +// since that makes the (re)scheduling easier. FWIW, this code +// scheduling was bummed from the sin code in dpml_sincos.s before +// it was changed to use tables. +// + + mult f16, f16, f0 // X^2 + lda t1, __inv_trig_t_table + ldt f10, POLY_OFFSET + 0(t1) // 9 + ldt f12, POLY_OFFSET - 8(t1) // 8 + ldt f15, POLY_OFFSET + 16(t1) // 11 + mult f10, f0, f10 + ldt f17, POLY_OFFSET + 32(t1) // 13 + mult f0, f0, f1 // X^4 + ldt f20, POLY_OFFSET - 24(t1) // 6 + mult f15, f0, f15 + ldt f19, POLY_OFFSET - 48(t1) // 3 + mult f17, f0, f17 + ldt f21, POLY_OFFSET - 32(t1) // 5 + ldt f22, POLY_OFFSET + 8(t1) // 10 + mult f19, f0, f19 + addt f10, f12, f10 + ldt f12, POLY_OFFSET - 16(t1) // 7 + mult f1, f1, f11 // X^8 + mult f21, f0, f21 + mult f12, f0, f12 + addt f15, f22, f15 + ldt f22, POLY_OFFSET - 40(t1) // 4 + mult f11, f11, f13 // X^16, top of the line + mult f1, f11, f14 + addt f12, f20, f12 + ldt f20, POLY_OFFSET + 24(t1) // 12 + addt f21, f22, f21 + ldt f22, POLY_OFFSET - 72(t1) // 0 + mult f10, f13, f10 + mult f14, f11, f13 + mult f14, f14, f18 + addt f17, f20, f17 + ldt f20, POLY_OFFSET - 56(t1) // 2 + mult f12, f14, f12 + ldt f14, POLY_OFFSET - 64(t1) // 1 + mult f21, f11, f11 + addt f19, f20, f19 + mult f15, f13, f13 + ldt f20, POLY_OFFSET + 48(t1) // 15 + mult f14, f0, f14 + ldt f15, POLY_OFFSET + 40(t1) // 14 + mult f17, f18, f17 + addt f12, f10, f10 + mult f16, f0, f0 + mult f19, f1, f1 + addt f14, f22, f14 + addt f13, f17, f13 + addt f1, f11, f1 + addt f10, f13, f10 + addt f1, f10, f1 + addt f14, f1, f1 + mult f0, f1, f0 + addt f16, f0, f0 + subt f20, f0, f0 + addt f0, f15, f0 // Put result offinal add into f0 + br zero, c_done // And branch to adjust sp & return + +// +// The input is too small -- the poly evaluation could underflow +// and/or produce additional errors that we can easily avoid. +// +c_small_x: + ldah t1, -0x10(zero) + cpys f31, f31, f18 // Assume adjustment is 0 + lda t1, 0xc00(t1) + cmplt t0, t1, t0 // Are we close enoung? + bne t0, c_linear + + cpys f16, f16, f18 // Nope, use x instead + +// +// Use the linear approximation (in high and low parts!) +// +c_linear: + lda t1, __inv_trig_t_table + ldt f21, POLY_OFFSET + 48(t1) + ldt f19, POLY_OFFSET + 40(t1) + subt f21, f18, f18 + addt f18, f19, f0 // Add in high to give f0 + br zero, c_done // Done + +// +// Come here if the index indicates a large argument. +// First, determine _how_ large. +// +c_larger_than_half: + lda t1, -SQRT_OFFSET + 0(t0) + bge t1, c_out_of_range_or_one + + lda v0, __inv_trig_t_table + fblt f16, c_negative_x // Branch if negative + // + // Do the large region acos, using Bob's suggestion for the + // reduction, to maintain accuracy. + // + ldt f12, POLY_OFFSET + 72(v0) + ldt f17, POLY_OFFSET + 80(v0) + ldt f0, POLY_OFFSET + 0(v0) + subt f12, f16, f12 + ldt f13, One + lda t3, __sqrt_t_table + ldt f23, POLY_OFFSET + 8(v0) + mult f12, f17, f12 + stt f12, Temp(sp) + ldl t2, Temp + HighPart(sp) + cpyse f13, f12, f22 + ldt f12, POLY_OFFSET - 8(v0) + sra t2, 13 t1 + and t1, 0xff, t1 + addl t1, t1, t1 + s8addl t1, zero, t1 + mult f22, f22, f14 + addl t3, t1, t1 + ldah t3, -0x7fe0(zero) + lds f11, 4(t1) // Do the sqrt calculation in-line + lda t3, -1(t3) + lds f10, 0(t1) + and t2, t3, t3 + ldt f1, 8(t1) + ldah t1, 0x3fe0(zero) + mult f11, f22, f11 + bis t3, t1, t4 + mult f14, f10, f10 + xor t2, t3, t2 // Determine what the sign will be + stl t4, Temp + HighPart(sp) + addl t2, t1, t1 + ldt f20, Temp(sp) + zapnot t1, 0xf, t1 + sll t1, 31 t1 + stq t1, Temp(sp) + addt f1, f11, f1 + ldt f15, Temp(sp) + // + // Now start fetching constants for the polynomial. + // + ldt f11, POLY_OFFSET - 16(v0) + addt f10, f1, f1 // sqrt ... + ldt f10, POLY_OFFSET + 16(v0) + mult f20, f1, f20 // ... times Temp + // + // Now feed it into the polynoimial + // + mult f20, f1, f1 + mult f20, f15, f15 + subt f13, f1, f1 + ldt f13, POLY_OFFSET + 32(v0) + addt f15, f15, f21 + mult f15, f1, f1 + addt f21, f1, f1 + ldt f21, POLY_OFFSET - 48(v0) + mult f1, f1, f18 + mult f18, f18, f19 + mult f0, f18, f0 + mult f11, f18, f11 + mult f10, f18, f10 + mult f13, f18, f13 + mult f21, f18, f21 + mult f19, f19, f17 + addt f0, f12, f0 + ldt f12, POLY_OFFSET - 24(v0) + addt f10, f23, f10 + ldt f23, POLY_OFFSET - 40(v0) + addt f11, f12, f11 + ldt f12, POLY_OFFSET + 24(v0) + mult f17, f17, f22 + mult f19, f17, f14 + addt f13, f12, f12 + ldt f13, POLY_OFFSET - 56(v0) + addt f21, f13, f13 + mult f0, f22, f0 + ldt f22, POLY_OFFSET - 32(v0) + mult f14, f17, f20 + mult f14, f14, f15 + mult f11, f14, f11 + mult f22, f18, f22 + ldt f14, POLY_OFFSET - 64(v0) + mult f13, f19, f13 + mult f10, f20, f10 + mult f12, f15, f12 + addt f11, f0, f0 + addt f22, f23, f22 + ldt f23, POLY_OFFSET - 72(v0) + mult f14, f18, f14 + mult f1, f18, f18 + addt f10, f12, f10 + mult f22, f17, f17 + addt f14, f23, f14 + addt f0, f10, f0 + addt f13, f17, f13 + addt f13, f0, f0 + addt f14, f0, f0 + mult f18, f0, f0 + addt f1, f0, f0 + addt f0, f0, f0 + br zero, c_done + +// +// Take the absolute value, evaluate the difference of the sqrts, +// then take a lower-degree polynomial to compute the arccosine. +// +c_negative_x: + cpys f31, f16, f21 + lda t6, __inv_trig_t_table + addl t6, t0, t0 + ldt f20, POLY_OFFSET + 72(t6) + ldq_u t3, 0(t0) + ldt f12, One + lda t5, __sqrt_t_table + subt f20, f21, f15 + ldah t1, 0x3fe0(zero) + addt f20, f21, f20 + extbl t3, t0, t0 + addl t0, t0, t2 + s8addl t2, zero, t2 + addl t6, t2, t2 + s8addl t0, t6, t0 + mult f15, f20, f15 + ldt f22, SQRT_OFFSET + 0(t2) + ldt f19, SQRT_OFFSET + 8(t2) + ldah t2, -0x7fe0(zero) + lda t2, -1(t2) + ldt f20, POLY_OFFSET - 104(t6) + subt f21, f22, f0 + stt f15, Temp(sp) + ldl t4, Temp + HighPart(sp) + mult f21, f19, f19 + cpyse f12, f15, f17 + ldt f15, POLY_OFFSET - 80(t6) + sra t4, 13, v0 + and t4, t2, t2 + and v0, 0xff, v0 + addl v0, v0, v0 + s8addl v0, zero, v0 + mult f17, f17, f23 + addl t5, v0, v0 + bis t2, t1, t5 + lds f11, 4(v0) + xor t4, t2, t2 + lds f10, 0(v0) + addl t2, t1, t1 + ldt f13, 8(v0) + zapnot t1, 0xf, t1 + mult f11, f17, f11 + mult f23, f10, f10 + stl t5, Temp + HighPart(sp) + sll t1, 31, t1 + ldt f14, Temp(sp) + stq t1, Temp(sp) + ldt f18, Temp(sp) + addt f13, f11, f11 + ldt f23, POLY_OFFSET - 96(t6) + ldt f13, POLY_OFFSET - 112(t6) + addt f10, f11, f10 + ldt f11, POLY_OFFSET - 88(t6) + mult f14, f10, f14 + mult f14, f10, f10 + mult f14, f18, f14 + ldt f18, ASIN_OFFSET + 96(t0) + subt f12, f10, f10 + ldt f12, POLY_OFFSET + 48(t6) + addt f14, f14, f1 + mult f14, f10, f10 + ldt f14, POLY_OFFSET + 40(t6) + addt f1, f10, f1 + mult f22, f1, f1 + addt f21, f22, f22 + addt f1, f19, f1 + mult f0, f22, f0 + divt f0, f1, f0 + mult f0, f0, f16 + mult f16, f16, f17 + mult f16, f20, f20 + mult f15, f16, f15 + mult f23, f17, f23 + addt f20, f13, f13 + mult f17, f16, f17 + addt f15, f11, f11 + mult f0, f16, f16 + addt f13, f23, f13 + mult f17, f11, f11 + addt f13, f11, f11 + mult f16, f11, f11 + addt f0, f11, f0 + addt f0, f18, f0 + addt f12, f0, f0 + addt f0, f14, f0 +// br zero, c_done // Fall thru +// +// Return with result in f0. +// + +c_done: lda sp, FrameLength(sp) // deallocate stack frame + ret zero, (t7) // return through saved ra in t7 + +// +// Check for infinity or NaN +// +c_out_of_range_or_one: + ldah t5, 0x7ff0(zero) + and v0, t5, v0 + lda t3, __inv_trig_t_table + xor v0, t5, v0 + beq v0, c_nan_or_inf + ldt f10, POLY_OFFSET + 72(t3) + cmpteq f16, f10, f21 // x == 1? + fbeq f21, c_not_one + cpys f31, f31, f0 // x == 1, so return 0 + br zero, c_done +c_not_one: + cpysn f10, f10, f10 + cmpteq f16, f10, f10 + fbeq f10, c_out_of_range // x == -1? + ldt f0, POLY_OFFSET + 64(t3) // return pi or 180 + br zero, c_done + +// +// Fill the exception record and call dpml_exception +// +c_out_of_range: +c_infinity: + lda t4, acosName + stl t4, ExRec + ErName(sp) + ldah t0, 0x800(zero) + stt f16, ExRec + ErArg0(sp) + stl t0, ExRec + ErErr(sp) + lda v0, ExRec(sp) + bsr ra, __dpml_exception + ldt f0, 0(v0) + br zero, c_done + +// +// Classify NaNs and infinities. +// +c_nan_or_inf: + stt f16, Temp(sp) + ldl t2, Temp + HighPart(sp) + and t2, t5, t4 + cmpeq t4, t5, t4 + beq t4, c_out_of_range + + ldl t1, Temp(sp) + ldah t0, 0x10(zero) + lda t0, -1(t0) + and t2, t0, t0 + bis t0, t1, t0 + cmpult zero, t0, t0 + and t4, t0, t4 + beq t4, c_infinity + + cpys f16, f16, f0 // Just return the NaN + br zero, c_done + + .end acos + + SBTTL("ArcSine") + +//++ +// +// double asin (double x) +// +// Routine Description: +// +// This function returns the arcsine of the given double argument. +// +// Arguments: +// +// x (f16) - Supplies the argument value. +// +// Return Value: +// +// The double arcsine result is returned in f0. +// +//-- + + NESTED_ENTRY(asin, FrameLength, ra) + + lda sp, -FrameLength(sp) // allocate stack frame + mov ra, t7 // save return address + + PROLOGUE_END + + stt f16, Temp(sp) // Get the high bits of x ... + ldl v0, Temp + HighPart(sp) // ... into a register + ornot zero, zero, t0 // Now compute the index + srl t0, 33 t0 + ldah t1, 0x10(zero) + and v0, t0, v0 + zapnot v0, 0xf, t0 + lda t1, -POLY_OFFSET + 0(t1) + srl t0, 10 t0 + subl t0, t1, t0 + lda t1, 0x6319(t0) // Br if index implies x >= 1/2 + bge t0, s_larger_than_half + + blt t1, s_tiny_x // Br if index implies x is 'tiny' + +// Polynomial region: +// +// The function is computed as +// +// x + x^3 P(x) +// +// where P(x) is approximately +// +// 1/6 + 3/40 x^2 + .... +// +// Compute the polynomial. The string of leading ldts don't matter..., +// it's probably the x^2 term that's on the critical path. +// NB: because we quickly run out of issue slots in the second tier +// (from the bottom), it's not clear whether this is still optimal. +// + mult f16, f16, f0 // x^2 + lda t1, __inv_trig_t_table + ldt f10, POLY_OFFSET + 0(t1) + ldt f12, POLY_OFFSET - 8(t1) + ldt f15, POLY_OFFSET + 16(t1) + mult f10, f0, f10 + ldt f17, POLY_OFFSET + 32(t1) + mult f0, f0, f1 // x^4 + ldt f20, POLY_OFFSET - 24(t1) + mult f15, f0, f15 + ldt f19, POLY_OFFSET - 48(t1) + mult f17, f0, f17 + ldt f21, POLY_OFFSET - 32(t1) + ldt f22, POLY_OFFSET + 8(t1) + mult f19, f0, f19 + addt f10, f12, f10 + ldt f12, POLY_OFFSET - 16(t1) + mult f1, f1, f11 // x^8 + mult f21, f0, f21 + mult f12, f0, f12 + addt f15, f22, f15 + ldt f22, POLY_OFFSET - 40(t1) + mult f11, f11, f13 // x^16 is it + mult f1, f11, f14 // x^12 + addt f12, f20, f12 + ldt f20, POLY_OFFSET + 24(t1) + addt f21, f22, f21 + ldt f22, POLY_OFFSET - 72(t1) + mult f10, f13, f10 + mult f14, f11, f13 // x^20 + mult f14, f14, f18 // x^24 + addt f17, f20, f17 + ldt f20, POLY_OFFSET - 56(t1) + mult f12, f14, f12 + ldt f14, POLY_OFFSET - 64(t1) + mult f21, f11, f11 + addt f19, f20, f19 + mult f15, f13, f13 + mult f14, f0, f14 + mult f17, f18, f17 + addt f12, f10, f10 + mult f16, f0, f0 + mult f19, f1, f1 + addt f14, f22, f14 + addt f13, f17, f13 + addt f1, f11, f1 + addt f10, f13, f10 + addt f1, f10, f1 + addt f14, f1, f1 + mult f0, f1, f0 + addt f16, f0, f0 // Whew! + br zero, s_done + + +// Small: asin(x) = x (x < small) +// +// Within the "small" region the Arcsine function is approximated as +// asin(x) = x. This is a very quick approximation but it may only be +// applied to small input values. There is effectively no associated +// storage costs. By limiting the magnitude of x the error bound can +// be limited to <= 1/2 lsb. +// +s_tiny_x: + cpys f16, f16, f0 + br zero, s_done + + +// +// Come here if the index indicates a large argument. +// First, determine _how_ large. +// +s_larger_than_half: + lda t1, -SQRT_OFFSET + 0(t0) + bge t1, s_out_of_range_or_one + + cpys f31, f16, f20 + lda v0, __inv_trig_t_table + // + // Do the large region asin. The same reduction is used here as in + // acos, except that the sign of the surds is reversed. +// +// Reduction: +// asin(x) = asin(x0) + asin(x*sqrt(1-x0^2)-x0*sqrt(1-x^2)) = +// asin(x0) + asin((x^2 - x0^2) / (x0*sqrt(1-x^2) + x*sqrt(1-x0^2))) +// + cpys f16, f16, f12 // Save sign for test below + addl v0, t0, t0 + ldt f15, POLY_OFFSET + 72(v0) + ldq_u t1, 0(t0) + subt f15, f20, f18 + addt f15, f20, f15 + extbl t1, t0, t0 + ldt f11, One + lda t3, __sqrt_t_table + addl t0, t0, t1 + s8addl t1, zero, t1 + addl v0, t1, t1 + mult f18, f15, f15 + ldt f21, SQRT_OFFSET + 0(t1) + ldt f19, SQRT_OFFSET + 8(t1) + stt f15, Temp(sp) + ldl t2, Temp + HighPart(sp) + cpyse f11, f15, f13 + subt f20, f21, f15 + sra t2, 13, t1 + mult f20, f19, f19 + and t1, 0xff, t1 + addl t1, t1, t1 + s8addl t1, zero, t1 + mult f13, f13, f10 + addl t3, t1, t1 + ldah t3, -0x7fe0(zero) + lds f17, 4(t1) + lda t3, -1(t3) + lds f22, 0(t1) + and t2, t3, t3 + ldt f14, 8(t1) + ldah t1, 0x3fe0(zero) + mult f17, f13, f13 + bis t3, t1, t4 + mult f10, f22, f10 + xor t2, t3, t2 + stl t4, Temp + HighPart(sp) + addl t2, t1, t1 + ldt f1, Temp(sp) + zapnot t1, 0xf, t1 + ldt f22, POLY_OFFSET - 104(v0) + sll t1, 31, t1 + addt f14, f13, f13 + ldt f14, POLY_OFFSET - 80(v0) + stq t1, Temp(sp) + ldt f0, Temp(sp) + addt f10, f13, f10 + mult f1, f10, f1 + mult f1, f10, f10 + mult f1, f0, f0 + ldt f1, POLY_OFFSET - 96(v0) + subt f11, f10, f10 + addt f0, f0, f18 + ldt f11, POLY_OFFSET - 112(v0) + mult f0, f10, f0 + ldt f10, POLY_OFFSET - 88(v0) + s8addl t0, v0, v0 + addt f18, f0, f0 + ldt f18, ASIN_OFFSET + 96(v0) + mult f21, f0, f0 + addt f20, f21, f21 + addt f0, f19, f0 + mult f15, f21, f15 + divt f15, f0, f0 + mult f0, f0, f17 + mult f17, f17, f13 + mult f17, f22, f22 + mult f14, f17, f14 + mult f1, f13, f1 + addt f22, f11, f11 + mult f13, f17, f13 + addt f14, f10, f10 + mult f0, f17, f17 + addt f11, f1, f1 + mult f13, f10, f10 + addt f1, f10, f1 + mult f17, f1, f1 + addt f0, f1, f0 + addt f0, f18, f0 + fbge f12, s_done // Skip if sign is fine + + cpysn f0, f0, f0 +// br zero, s_done // Fall thru +// +// Return with result in f0. +// +s_done: lda sp, FrameLength(sp) // deallocate stack frame + ret zero, (t7) // return through saved ra in t7 + +// +// Check for infinity or NaN +// +s_out_of_range_or_one: + ldah t5, 0x7ff0(zero) + and v0, t5, v0 + lda t6, __inv_trig_t_table + xor v0, t5, v0 + beq v0, s_nan_or_inf + ldt f20, POLY_OFFSET + 72(t6) // Check x == 1 ? + cmpteq f16, f20, f21 + fbeq f21, s_not_one + ldt f0, POLY_OFFSET + 56(t6) // Return asin(1) + br zero, s_done +s_not_one: + cpysn f20, f20, f20 + cmpteq f16, f20, f20 // Check x == -1 ? + fbeq f20, s_out_of_range // must be oor + ldt f19, POLY_OFFSET + 56(t6) + cpysn f19, f19, f0 // Return -asin(1) = asin(-1) + br zero, s_done + +// +// Fill the exception record and call dpml_exception +// +s_out_of_range: +s_infinity: + lda t3, asinName + ldah t4, 0x800(zero) + stl t3, ExRec + ErName(sp) + stt f16, ExRec + ErArg0(sp) + lda t4, 3(t4) + stl t4, ExRec + ErErr(sp) + // + // Report the exception + // + lda v0, ExRec(sp) + bsr ra, __dpml_exception + ldt f0, 0(v0) + br zero, s_done + +// +// Classify NaNs and infinities. +// +s_nan_or_inf: + stt f16, Temp(sp) + ldl t2, Temp + HighPart(sp) + and t2, t5, t3 + cmpeq t3, t5, t3 + beq t3, s_out_of_range + + ldl t1, Temp(sp) + ldah t0, 0x10(zero) + lda t0, -1(t0) + and t2, t0, t0 + bis t0, t1, t0 + cmpult zero, t0, t0 + and t3, t0, t3 + beq t3, s_infinity + + cpys f16, f16, f0 // Just return the NaN + br zero, s_done + + .end asin + + .rdata + .align 3 + +One: .double 1.0 + +// +// Function names for __dpml_exception. +// + +acosName: + .ascii "acos\0" + +asinName: + .ascii "asin\0" + +// +// The indirection table is indexed by the high 10 bits of x, +// giving an index into the following tables. +// + + .align 3 + +__inv_trig_t_table: + .long 0x00000000 + .long 0x00000000 + .long 0x00000000 + .long 0x00000000 + .long 0x00000000 + .long 0x00000000 + .long 0x00000000 + .long 0x01010100 + .long 0x01010101 + .long 0x01010101 + .long 0x01010101 + .long 0x01010101 + .long 0x01010101 + .long 0x01010101 + .long 0x02020101 + .long 0x02020202 + .long 0x02020202 + .long 0x02020202 + .long 0x02020202 + .long 0x02020202 + .long 0x02020202 + .long 0x02020202 + .long 0x03030303 + .long 0x03030303 + .long 0x03030303 + .long 0x03030303 + .long 0x03030303 + .long 0x03030303 + .long 0x03030303 + .long 0x04040303 + .long 0x04040404 + .long 0x04040404 + .long 0x04040404 + .long 0x04040404 + .long 0x04040404 + .long 0x04040404 + .long 0x04040404 + .long 0x05050504 + .long 0x05050505 + .long 0x05050505 + .long 0x05050505 + .long 0x05050505 + .long 0x05050505 + .long 0x05050505 + .long 0x05050505 + .long 0x06060605 + .long 0x06060606 + .long 0x06060606 + .long 0x06060606 + .long 0x06060606 + .long 0x06060606 + .long 0x06060606 + .long 0x06060606 + .long 0x07070706 + .long 0x07070707 + .long 0x07070707 + .long 0x07070707 + .long 0x07070707 + .long 0x07070707 + .long 0x07070707 + .long 0x07070707 + .long 0x08080707 + .long 0x08080808 + .long 0x08080808 + .long 0x08080808 + .long 0x08080808 + .long 0x08080808 + .long 0x08080808 + .long 0x08080808 + .long 0x09080808 + .long 0x09090909 + .long 0x09090909 + .long 0x09090909 + .long 0x09090909 + .long 0x09090909 + .long 0x09090909 + .long 0x09090909 + .long 0x09090909 + .long 0x0a0a0a09 + .long 0x0a0a0a0a + .long 0x0a0a0a0a + .long 0x0a0a0a0a + .long 0x0a0a0a0a + .long 0x0a0a0a0a + .long 0x0a0a0a0a + .long 0x0a0a0a0a + .long 0x0b0a0a0a + .long 0x0b0b0b0b + .long 0x0b0b0b0b + .long 0x0b0b0b0b + .long 0x0b0b0b0b + .long 0x0b0b0b0b + .long 0x0b0b0b0b + .long 0x0b0b0b0b + .long 0x0b0b0b0b + .long 0x0c0c0c0b + .long 0x0c0c0c0c + .long 0x0c0c0c0c + .long 0x0c0c0c0c + .long 0x0c0c0c0c + .long 0x0c0c0c0c + .long 0x0c0c0c0c + .long 0x0c0c0c0c + .long 0x0c0c0c0c + .long 0x0d0d0d0d + .long 0x0d0d0d0d + .long 0x0d0d0d0d + .long 0x0d0d0d0d + .long 0x0d0d0d0d + .long 0x0d0d0d0d + .long 0x0d0d0d0d + .long 0x0d0d0d0d + .long 0x0e0d0d0d + .long 0x0e0e0e0e + .long 0x0e0e0e0e + .long 0x0e0e0e0e + .long 0x0e0e0e0e + .long 0x0e0e0e0e + .long 0x0e0e0e0e + .long 0x0e0e0e0e + .long 0x0e0e0e0e + .long 0x0f0f0e0e + .long 0x0f0f0f0f + .long 0x0f0f0f0f + .long 0x0f0f0f0f + .long 0x0f0f0f0f + .long 0x0f0f0f0f + .long 0x0f0f0f0f + .long 0x0f0f0f0f + .long 0x0f0f0f0f + .long 0x1010100f + .long 0x10101010 + .long 0x10101010 + .long 0x10101010 + .long 0x10101010 + .long 0x10101010 + .long 0x10101010 + .long 0x10101010 + .long 0x10101010 + .long 0x11111111 + .long 0x11111111 + .long 0x11111111 + .long 0x11111111 + .long 0x11111111 + .long 0x11111111 + .long 0x11111111 + .long 0x11111111 + .long 0x12111111 + .long 0x12121212 + .long 0x12121212 + .long 0x12121212 + .long 0x12121212 + .long 0x12121212 + .long 0x12121212 + .long 0x12121212 + .long 0x12121212 + .long 0x13131212 + .long 0x13131313 + .long 0x13131313 + .long 0x13131313 + .long 0x13131313 + .long 0x13131313 + .long 0x13131313 + .long 0x13131313 + .long 0x13131313 + .long 0x14141413 + .long 0x14141414 + .long 0x14141414 + .long 0x14141414 + .long 0x14141414 + .long 0x14141414 + .long 0x14141414 + .long 0x14141414 + .long 0x15141414 + .long 0x15151515 + .long 0x15151515 + .long 0x15151515 + .long 0x15151515 + .long 0x15151515 + .long 0x15151515 + .long 0x15151515 + .long 0x15151515 + .long 0x16161615 + .long 0x16161616 + .long 0x16161616 + .long 0x16161616 + .long 0x16161616 + .long 0x16161616 + .long 0x16161616 + .long 0x16161616 + .long 0x17171616 + .long 0x17171717 + .long 0x17171717 + .long 0x17171717 + .long 0x17171717 + .long 0x17171717 + .long 0x17171717 + .long 0x17171717 + .long 0x18181717 + .long 0x18181818 + .long 0x18181818 + .long 0x18181818 + .long 0x18181818 + .long 0x18181818 + .long 0x18181818 + .long 0x18181818 + .long 0x19191918 + .long 0x19191919 + .long 0x19191919 + .long 0x19191919 + .long 0x19191919 + .long 0x19191919 + .long 0x19191919 + .long 0x1a191919 + .long 0x1a1a1a1a + .long 0x1a1a1a1a + .long 0x1a1a1a1a + .long 0x1a1a1a1a + .long 0x1a1a1a1a + .long 0x1a1a1a1a + .long 0x1b1a1a1a + .long 0x1b1b1b1b + .long 0x1b1b1b1b + .long 0x1b1b1b1b + .long 0x1b1b1b1b + .long 0x1b1b1b1b + .long 0x1b1b1b1b + .long 0x1c1c1c1b + .long 0x1c1c1c1c + .long 0x1c1c1c1c + .long 0x1c1c1c1c + .long 0x1c1c1c1c + .long 0x1c1c1c1c + .long 0x1d1d1d1c + .long 0x1d1d1d1d + .long 0x1d1d1d1d + .long 0x1d1d1d1d + .long 0x1d1d1d1d + .long 0x1e1d1d1d + .long 0x1e1e1e1e + .long 0x1e1e1e1e + .long 0x1e1e1e1e + .long 0x1e1e1e1e + .long 0x1f1e1e1e + .long 0x1f1f1f1f + .long 0x1f1f1f1f + .long 0x1f1f1f1f + .long 0x1f1f1f1f + .long 0x20202020 + .long 0x20202020 + .long 0x20202020 + .long 0x21212120 + .long 0x21212121 + .long 0x22212121 + .long 0x22222222 + .long 0x24232322 + // + // Table of m and sqrt values + // + .double 5.0706834168761639e-001 + .double 8.6190585150477472e-001 + .double 5.2137893902388344e-001 + .double 8.5332526151657417e-001 + .double 5.3568455639492574e-001 + .double 8.4441817604784630e-001 + .double 5.5047793238782239e-001 + .double 8.3484971459181090e-001 + .double 5.6526534783047766e-001 + .double 8.2490913835530344e-001 + .double 5.8053903635241511e-001 + .double 8.1423241600356910e-001 + .double 5.9629789885182627e-001 + .double 8.0276323771389602e-001 + .double 6.1204861405331246e-001 + .double 7.9082014013011792e-001 + .double 6.2828241971242693e-001 + .double 7.7798534760000315e-001 + .double 6.4450630561276323e-001 + .double 7.6459899426129740e-001 + .double 6.6121067125093991e-001 + .double 7.5020027207665119e-001 + .double 6.7790283072811974e-001 + .double 7.3515151641739962e-001 + .double 6.9458178227957712e-001 + .double 7.1941375280524500e-001 + .double 7.1173630303037350e-001 + .double 7.0244674883485392e-001 + .double 7.2887423014480368e-001 + .double 6.8464761493108250e-001 + .double 7.4599395561707982e-001 + .double 6.6595271467483508e-001 + .double 7.6309361601341208e-001 + .double 6.4628796460987514e-001 + .double 7.8017102961507767e-001 + .double 6.2556627510548357e-001 + .double 7.9722361208624026e-001 + .double 6.0368411634907859e-001 + .double 8.1424826092086988e-001 + .double 5.8051681249326359e-001 + .double 8.3124119321203460e-001 + .double 5.5591193430923302e-001 + .double 8.4771379548405590e-001 + .double 5.3045388201616195e-001 + .double 8.6414659725883325e-001 + .double 5.0324015981038306e-001 + .double 8.8005137328121386e-001 + .double 4.7487849012757949e-001 + .double 8.9542402068439086e-001 + .double 4.4521435644125357e-001 + .double 9.1025912069088977e-001 + .double 4.1403904791583146e-001 + .double 9.2454927177753021e-001 + .double 3.8106251987782608e-001 + .double 9.3781185698639746e-001 + .double 3.4714394837837054e-001 + .double 9.5004241465364447e-001 + .double 3.1212082653849399e-001 + .double 9.6123413668593594e-001 + .double 2.7573344822426499e-001 + .double 9.7137596294032880e-001 + .double 2.3754733974883613e-001 + .double 9.8044825498366817e-001 + .double 1.9677708021891172e-001 + .double 9.8797603760528618e-001 + .double 1.5460707977889673e-001 + .double 9.9351955820522586e-001 + .double 1.1366128392593858e-001 + .double 9.9750136815748058e-001 + .double 7.0647155101634357e-002 + .double 9.9953688091941839e-001 + .double 3.0430637224356950e-002 + .double 9.9999999999999001e-001 + .double 1.4136482746161692e-007 + .double 1.0000000000000000e+000 + .double 0.0000000000000000e+000 + // + // Table of asins of m + // + .double 5.3178000646702150e-001 + .double 5.4846611388311073e-001 + .double 5.6531822297603329e-001 + .double 5.8293660704549022e-001 + .double 6.0075488730339055e-001 + .double 6.1939055235381923e-001 + .double 6.3888146881507923e-001 + .double 6.5864849519564295e-001 + .double 6.7934350983258240e-001 + .double 7.0037741676519327e-001 + .double 7.2243141289503865e-001 + .double 7.4490616531340070e-001 + .double 7.6783840625167599e-001 + .double 7.9196691740187719e-001 + .double 8.1667620264088137e-001 + .double 8.4202612785445319e-001 + .double 8.6808642746433240e-001 + .double 8.9493917031620063e-001 + .double 9.2268207581200223e-001 + .double 9.5143306842063147e-001 + .double 9.8133668714908417e-001 + .double 1.0116604321555875e+000 + .double 1.0434520784684196e+000 + .double 1.0759703715622875e+000 + .double 1.1093826892149530e+000 + .double 1.1439094683009035e+000 + .double 1.1798510719874449e+000 + .double 1.2162723883933650e+000 + .double 1.2533717611875035e+000 + .double 1.2914436801658071e+000 + .double 1.3309561925115592e+000 + .double 1.3727266870827943e+000 + .double 1.4155665880147674e+000 + .double 1.4568888795401558e+000 + .double 1.5000902724115686e+000 + .double 1.5403609910305571e+000 + .double 1.5707961854300692e+000 + .double 1.5707963267948966e+000 + // + // poly for accurate table range + // + .double 1.6666666666666666e-001 + .double 7.4999999999998457e-002 + .double 4.4642857155497803e-002 + .double 3.0381908199656704e-002 + .double 2.2414568383463437e-002 + // + // poly for basic interval [-1/2, 1/2] + // + .double 1.6666666666666666e-001 + .double 7.5000000000001246e-002 + .double 4.4642857142535047e-002 + .double 3.0381944477026079e-002 + .double 2.2372157374774295e-002 + .double 1.7352818420738318e-002 + .double 1.3963747019875614e-002 + .double 1.1566847170089092e-002 + .double 9.6187294249636349e-003 + .double 9.3367202446762564e-003 + .double 2.9810775552463675e-003 + .double 1.9707463673752881e-002 + .double -1.9455097598214267e-002 + .double 2.9743706165166042e-002 + // + // hi and lo parts of pi over 2, and pi + // + .double 1.5707963267948966e+000 + .double 6.1232339957367660e-017 + .double 1.5707963267948966e+000 + .double 3.1415926535897931e+000 + // + // 1.0 and 0.5 + // + .double 1.0000000000000000e+000 + .double 5.0000000000000000e-001 + .double 0.0000000000000000e+000 + +// +// End of table. +// diff --git a/private/fp32/tran/alpha/atan2s.s b/private/fp32/tran/alpha/atan2s.s new file mode 100644 index 000000000..13a553aff --- /dev/null +++ b/private/fp32/tran/alpha/atan2s.s @@ -0,0 +1,515 @@ +// TITLE("Alpha AXP ArcTangent2") +//++ +// +// Copyright (c) 1993, 1994 Digital Equipment Corporation +// +// Module Name: +// +// atan2.s +// +// Abstract: +// +// This module implements a high-performance Alpha AXP specific routine +// for IEEE double format arctangent2. +// +// Author: +// +// Andy Garside +// +// Environment: +// +// User mode. +// +// Revision History: +// +// Thomas Van Baak (tvb) 15-Feb-1994 +// +// Adapted for NT. +// +//-- + +#include "ksalpha.h" + +// +// Define DPML exception record for NT. +// + + .struct 0 +ErErr: .space 4 // error code +ErCxt: .space 4 // context +ErPlat: .space 4 // platform +ErEnv: .space 4 // environment +ErRet: .space 4 // return value pointer +ErName: .space 4 // function name +ErType: .space 8 // flags and fill +ErVal: .space 8 // return value +ErArg0: .space 8 // arg 0 +ErArg1: .space 8 // arg 1 +ErArg2: .space 8 // arg 2 +ErArg3: .space 8 // arg 3 +DpmlExceptionLength: + +// +// Define stack frame. +// + + .struct 0 +SaveS0: .space 8 // +SaveS1: .space 8 // +SaveRa: .space 8 // +SaveF2: .space 8 // +SaveF3: .space 8 // +Temp: .space 8 // save argument +ExRec: .space DpmlExceptionLength // exception record + .space 8 // for 16-byte stack alignment +FrameLength: + +// +// Define lower and upper 32-bit parts of 64-bit double. +// + +#define LowPart 0x0 +#define HighPart 0x4 + +// +// Define offsets into atan_t_table. +// + +#define ATAN_INF 0xf18 +#define TWICE_ATAN_INF 0xf28 + + SBTTL("ArcTangent2") + +//++ +// +// double +// atan2 ( +// IN double y +// IN double x +// ) +// +// Routine Description: +// +// This function returns the arctangent of the given double arguments. +// It returns atan(y/x) in range [-pi,pi]. +// +// Arguments: +// +// y (f16) - Supplies the argument value. +// +// x (f17) - Supplies the argument value. +// +// Return Value: +// +// The double arctangent2 result is returned as the function value in f0. +// +//-- + + NESTED_ENTRY(atan2, FrameLength, ra) + + lda sp, -FrameLength(sp) // allocate stack frame + stq s0, SaveS0(sp) + stq s1, SaveS1(sp) + stq ra, SaveRa(sp) + stt f2, SaveF2(sp) + stt f3, SaveF3(sp) + + PROLOGUE_END + + cpys f16, f16, f2 // y + ldah s0, 0x7ff0(zero) + cpys f17, f17, f3 // x + stt f2, Temp(sp) + ldl v0, Temp + HighPart(sp) + and v0, s0, v0 + mov v0, t0 + xor t0, s0, t1 + beq t1, spec_y + beq t0, spec_y + + stt f3, Temp(sp) + ldl t2, Temp + HighPart(sp) + and t2, s0, t2 + xor t2, s0, t1 + beq t1, class_y + bne t2, calc_atan2 + br zero, class_y + +// +// Abnormal inputs +// + +spec_y: stt f3, Temp(sp) + ldl t2, Temp + HighPart(sp) + and t2, s0, t2 + +// +// Classify y according to type +// + +class_y: + stt f2, Temp(sp) + ldl t3, Temp + HighPart(sp) + zapnot t3, 0xf, t1 + and t3, s0, t4 + srl t1, 31, t1 + and t1, 1, t1 + beq t4, LL00d0 + cmpult t4, s0, t4 + beq t4, LL0098 + addl t1, 4, t5 + br zero, class_x +LL0098: ldah t6, 0x10(zero) + ldl t4, Temp(sp) + lda t6, -1(t6) + and t3, t6, t6 + stl t6, Temp + HighPart(sp) + bis t6, t4, t4 + srl t6, 19, t6 + beq t4, LL00c8 + and t6, 1, t6 + mov t6, t5 + br zero, class_x +LL00c8: addl t1, 2, t5 + br zero, class_x +LL00d0: ldl t7, Temp(sp) + ldah t4, 0x10(zero) + lda t4, -1(t4) + and t3, t4, t3 + bis t3, t7, t7 + stl t3, Temp + HighPart(sp) + mov 6, t6 + cmoveq t7, 8, t6 + addl t1, t6, t5 + +// +// Classify x according to type +// + +class_x: + stt f3, Temp(sp) + ldl t3, Temp + HighPart(sp) + zapnot t3, 0xf, t4 + and t3, s0, t1 + srl t4, 31, t4 + and t4, 1, t4 + beq t1, LL0158 + cmpult t1, s0, t1 + beq t1, LL0120 + addl t4, 4, t6 + br zero, switch +LL0120: ldah t1, 0x10(zero) + ldl t7, Temp(sp) + lda t1, -1(t1) + and t3, t1, t1 + bis t1, t7, t7 + stl t1, Temp + HighPart(sp) + beq t7, LL0150 + srl t1, 19, t1 + and t1, 1, t1 + mov t1, t6 + br zero, switch +LL0150: addl t4, 2, t6 + br zero, switch +LL0158: ldl a0, Temp(sp) + ldah t7, 0x10(zero) + lda t7, -1(t7) + and t3, t7, t3 + bis t3, a0, a0 + stl t3, Temp + HighPart(sp) + mov 6, t1 + cmoveq a0, 8, t1 + addl t4, t1, t6 + +// +// switch on class(y) and class(x) +// + +switch: sra t5, 1, a0 + sra t6, 1, t3 + s4addl a0, a0, a0 + addl a0, t3, t3 + cmpule t3, 24, t12 + beq t12, cpys_y_class + + lda t12, Switch_table + s4addl t3, t12, t12 + ldl t12, 0(t12) + jmp zero, (t12) + +ret_y: cpys f2, f2, f0 + br zero, done + +ret_x: cpys f3, f3, f0 + br zero, done + +infs: + lda t1, atan2Name + stl t1, ExRec + ErName(sp) + ldah t3, 0x800(zero) + stt f2, ExRec + ErArg0(sp) + stt f3, ExRec + ErArg1(sp) + lda t3, 9(t3) + stl t3, ExRec + ErErr(sp) + lda v0, ExRec(sp) + bsr ra, __dpml_exception + ldt f0, 0(v0) + br zero, done + +zeros: + lda t6, atan2Name + stl t6, ExRec + ErName(sp) + ldah a0, 0x800(zero) + stt f2, ExRec + ErArg0(sp) + stt f3, ExRec + ErArg1(sp) + lda a0, 8(a0) + stl a0, ExRec + ErErr(sp) + lda v0, ExRec(sp) + bsr ra, __dpml_exception + ldt f0, 0(v0) + br zero, done + +ret_inf: + ldt f0, __atan_t_table + ATAN_INF + +cpys_y_class: + blbc t5, done + cpysn f0, f0, f0 + br zero, done + +ret_tw_inf: + blbc t6, x_pos + + ldt f16, __atan_t_table + TWICE_ATAN_INF + cpys f16, f16, f0 + + blbc t5, done + cpysn f0, f0, f0 + br zero, done + +x_pos: cpys f31, f31, f16 + cpys f16, f16, f0 + + blbc t5, done + cpysn f0, f0, f0 + br zero, done + +de_o_norm: + ldah t4, 0x4350(zero) // underflow check + cmpult t2, t4, t4 + bne t4, scale_up_denorm + br zero, underflow + +n_o_de: ldah t1, 0x360(zero) // check for const range + cmplt t0, t1, t1 + beq t1, const_range + + +// Scale x and y up by 2^F_PRECISION and adjust exp_x and exp_y accordingly. +// With x and y scaled into the normal range, we can rejoin the main logic +// flow for computing atan(y/x) + +scale_up_denorm: + + beq t0, LL02c0 + stt f2, Temp(sp) + ldl ra, Temp + HighPart(sp) + ldah v0, 0x4330(zero) + ldah t3, -0x7ff0(zero) + addl t0, v0, v0 + lda t3, -1(t3) + and ra, t3, t3 + mov v0, t0 + bis t3, t0, t3 + stl t3, Temp + HighPart(sp) + ldt f2, Temp(sp) + br zero, LL02e4 +LL02c0: ldt f17, Two53 + cpys f2, f17, f16 + cpyse f16, f2, f0 + subt f0, f16, f2 + stt f2, Temp(sp) + ldl t4, Temp + HighPart(sp) + and t4, s0, t4 + mov t4, t0 +LL02e4: beq t2, LL0318 + stt f3, Temp(sp) + ldl a0, Temp + HighPart(sp) + ldah v0, -0x7ff0(zero) + ldah ra, 0x4330(zero) + lda v0, -1(v0) + addl t2, ra, t2 + and a0, v0, v0 + bis v0, t2, v0 + stl v0, Temp + HighPart(sp) + ldt f3, Temp(sp) + br zero, calc_atan2 +LL0318: ldt f17, Two53 + cpys f3, f17, f0 + cpyse f0, f3, f16 + subt f16, f0, f3 + stt f3, Temp(sp) + ldl t1, Temp + HighPart(sp) + and t1, s0, t1 + mov t1, t2 + +// +// OK. Calculate atan2. +// + +calc_atan2: + subl t0, t2, s1 + ldah t4, 0x360(zero) // check for const range + ldah t5, -0x1c0(zero) // check for identity range + cmplt s1, t4, t4 + cmple s1, t5, t5 + beq t4, const_range + bne t5, ident_range + divt f2, f3, f16 + bsr ra, atan + cpys f0, f0, f1 + cmptlt f31, f3, f3 + cpys f1, f1, f0 + fbeq f3, post_proc + br zero, done + +ident_range: + ldah v0, -0x360(zero) // check for possible underflow + cmpult s1, v0, v0 + fbge f3, poss_under + beq v0, poss_under + + ldt f10, __atan_t_table + TWICE_ATAN_INF + br zero, fix_sign + +poss_under: + ldah t1, -0x3fe0(zero) // check for certain underflow or denorm + cmpule s1, t1, t1 + bne t1, under_or_de + + divt f2, f3, f1 + cmptlt f31, f3, f3 + fbeq f3, post_proc + cpys f1, f1, f0 + br zero, done + +post_proc: + ldt f11, __atan_t_table + TWICE_ATAN_INF + cpys f2, f11, f12 + addt f1, f12, f0 + br zero, done + +under_or_de: + ldah t3, -0x4350(zero) // check for underflow + cmpult s1, t3, t3 + bne t3, underflow + + ldah t6, 0x350(zero) // fixup denorm check + cpys f2, f2, f13 + stt f13, Temp(sp) + ldl t5, Temp + HighPart(sp) + addl t5, t6, t5 + stl t5, Temp + HighPart(sp) + ldt f14, Temp(sp) + divt f14, f3, f14 + stt f14, Temp(sp) + ldl a2, Temp + HighPart(sp) + and a2, s0, s0 + subl s0, t6, t6 + ble t6, underflow + + stt f14, Temp(sp) + ldl a4, Temp + HighPart(sp) + ldah a5, -0x7ff0(zero) + lda a5, -1(a5) + and a4, a5, a4 + bis a4, t6, t6 + stl t6, Temp + HighPart(sp) + ldt f0, Temp(sp) + br zero, done + +// +// quotient underflows +// + +underflow: + lda t10, atan2Name + ldah v0, 0x800(zero) + stl t10, ExRec + ErName(sp) + stt f2, ExRec + ErArg0(sp) + lda v0, 0xa(v0) + stt f3, ExRec + ErArg1(sp) + stl v0, ExRec + ErErr(sp) + lda v0, ExRec(sp) + bsr ra, __dpml_exception + ldt f0, 0(v0) + br zero, done + +const_range: + ldt f10, __atan_t_table + ATAN_INF + +fix_sign: + cpys f2, f10, f0 + +// +// Restore registers and return with result in f0. +// + +done: + ldq s0, SaveS0(sp) + ldq s1, SaveS1(sp) + ldq ra, SaveRa(sp) + ldt f2, SaveF2(sp) + ldt f3, SaveF3(sp) + lda sp, FrameLength(sp) // deallocate stack frame + ret zero, (ra) // return + + .end atan2 + + .rdata + .align 3 + +// +// Define floating point constants. +// + +One: .double 1.0 + +Two53: .quad 0x4340000000000000 // 2^53 (9007199254740992) + +// +// switch on class of y and x +// +Switch_table: + .long ret_y + .long ret_y + .long ret_y + .long ret_y + .long ret_y + .long ret_x + .long infs + .long ret_inf + .long ret_inf + .long ret_inf + .long ret_x + .long ret_tw_inf + .long cpys_y_class + .long n_o_de + .long ret_inf + .long ret_x + .long ret_tw_inf + .long de_o_norm + .long scale_up_denorm + .long ret_inf + .long ret_x + .long ret_tw_inf + .long ret_tw_inf + .long ret_tw_inf + .long zeros + +// +// Function name for dpml_exception. +// + +atan2Name: + .ascii "atan2\0" diff --git a/private/fp32/tran/alpha/atans.s b/private/fp32/tran/alpha/atans.s new file mode 100644 index 000000000..9b8e90f28 --- /dev/null +++ b/private/fp32/tran/alpha/atans.s @@ -0,0 +1,882 @@ +// TITLE("Alpha AXP ArcTangent") +//++ +// +// Copyright (c) 1993, 1994 Digital Equipment Corporation +// +// Module Name: +// +// atan.s +// +// Abstract: +// +// This module implements a high-performance Alpha AXP specific routine +// for IEEE double format arctangent. +// +// Author: +// +// Andy Garside +// +// Environment: +// +// User mode. +// +// Revision History: +// +// Thomas Van Baak (tvb) 12-Feb-1994 +// +// Adapted for NT. +// +//-- + +#include "ksalpha.h" + +// +// Define stack frame. +// + + .struct 0 +Temp: .space 8 // save argument + .space 8 // for 16-byte stack alignment +FrameLength: + +// +// Define lower and upper 32-bit parts of 64-bit double. +// + +#define LowPart 0x0 +#define HighPart 0x4 + +// +// Define offsets into atan table. +// + +#define ATAN_INF 0xf18 +#define ATAN_INF_LO 0xf20 +#define POLY_COEF_1 0xf30 +#define POLY_COEF_2 0xf38 +#define POLY_COEF_3 0xf40 +#define POLY_COEF_4 0xf48 +#define POLY_COEF_5 0xf50 +#define POLY_COEF_6 0xf58 +#define POLY_COEF_7 0xf60 +#define REDUCE_COEF_1 0xf68 +#define REDUCE_COEF_2 0xf70 +#define REDUCE_COEF_3 0xf78 +#define LARGE_COEF_1 0xf80 +#define LARGE_COEF_2 0xf88 +#define LARGE_COEF_3 0xf90 + + SBTTL("Arc Tangent") + +//++ +// +// double +// atan ( +// IN double x +// ) +// +// Routine Description: +// +// This function returns the arctangent of the given double argument. +// +// Arguments: +// +// x (f16) - Supplies the argument value. +// +// Return Value: +// +// The double arctangent result is returned as the function value in f0. +// +//-- + + NESTED_ENTRY(atan, FrameLength, ra) + + lda sp, -FrameLength(sp) // allocate stack frame + + PROLOGUE_END + + cpys f31, f16, f1 + ldah v0, 1(zero) + lda v0, -0xf2(v0) + stt f1, Temp(sp) + ldl t0, Temp + HighPart(sp) + lda t1, __atan_t_table + sra t0, 14, t0 + subl t0, v0, v0 + lda t0, -0x244(v0) // MIN_LARGE_INDEX + blt v0, poly + + bge t0, large + +// +// reduce range +// + + addl t1, v0, t0 + ldt f14, REDUCE_COEF_1(t1) + ldt f13, REDUCE_COEF_3(t1) + ldq_u t2, 0(t0) + ldt f11, One + extbl t2, t0, t0 + addl t0, t0, t0 + s8addl t0, t1, t0 + ldt f0, 0x248(t0) // ATAN_TABLE + lda t0, 0x248(t0) + mult f1, f0, f10 + subt f1, f0, f0 + addt f10, f11, f10 + divt f0, f10, f0 + ldt f10, REDUCE_COEF_2(t1) + mult f0, f0, f11 + mult f11, f11, f12 + mult f10, f11, f10 + mult f0, f11, f11 + mult f13, f12, f12 + ldt f13, 8(t0) + addt f10, f14, f10 + addt f12, f10, f10 + mult f11, f10, f10 + addt f0, f10, f0 + addt f0, f13, f0 + cpys f16, f0, f14 + cpys f14, f14, f0 + br zero, done + +// +// large range +// + +large: lda t1, -0xdd1(v0) // MIN_CONSTANT_INDEX + bge t1, const + + ldt f12, One + lda t0, __atan_t_table + ldt f10, LARGE_COEF_2(t0) + divt f12, f1, f1 + ldt f0, LARGE_COEF_1(t0) + ldt f14, LARGE_COEF_3(t0) + ldt f12, ATAN_INF_LO(t0) + mult f1, f1, f11 + mult f11, f11, f13 + mult f10, f11, f10 + mult f1, f11, f11 + subt f1, f12, f1 + mult f14, f13, f13 + ldt f14, ATAN_INF(t0) + addt f10, f0, f0 + addt f13, f0, f0 + mult f11, f0, f0 + addt f1, f0, f0 + subt f14, f0, f0 + cpys f16, f0, f10 + cpys f10, f10, f0 + br zero, done + +// +// constant range +// + +const: ldah t2, 1(zero) + lda t2, 0xb2(t2) // see if index too big + cmplt v0, t2, v0 + bne v0, retinf + + stt f16, Temp(sp) + ldl t0, Temp + HighPart(sp) + ldl t2, Temp(sp) + ldah v0, 0x10(zero) + lda v0, -1(v0) + and t0, v0, v0 + bis v0, t2, v0 + bne v0, retarg + +// +// return_atan_of_inf +// + +retinf: ldt f13, __atan_t_table + ATAN_INF + cpys f16, f13, f0 + br zero, done + +// +// poly range +// + +poly: lda v0, 0x5e0(v0) // MAX_SMALL_INDEX + blt v0, retarg + + mult f16, f16, f12 + lda t0, __atan_t_table + ldt f1, POLY_COEF_2(t0) + ldt f14, POLY_COEF_4(t0) + ldt f10, POLY_COEF_5(t0) + mult f12, f12, f11 + ldt f15, POLY_COEF_1(t0) + mult f1, f12, f1 + ldt f17, POLY_COEF_3(t0) + mult f14, f12, f14 + ldt f13, POLY_COEF_6(t0) + ldt f0, POLY_COEF_7(t0) + mult f13, f12, f13 + mult f10, f11, f10 + mult f0, f11, f0 + addt f1, f15, f1 + mult f17, f11, f17 + mult f11, f11, f15 + mult f16, f12, f12 + addt f14, f10, f10 + addt f13, f0, f0 + addt f1, f17, f1 + mult f10, f11, f10 + mult f0, f15, f0 + addt f1, f10, f1 + addt f0, f1, f0 + mult f12, f0, f0 + addt f16, f0, f0 + br zero, done + + +// +// Return original argument as result. +// + +retarg: cpys f16, f16, f0 + +// +// Return with result in f0. +// + +done: + lda sp, FrameLength(sp) // deallocate stack frame + ret zero, (ra) // return + + .end atan + + .rdata + .align 3 + +// +// Define floating point constants. +// + +One: .double 1.0 + +// +// This table is exported since it is also used by atan2. +// + + .align 3 + .globl __atan_t_table + +__atan_t_table: + +// +// Indices +// + + .long 0x03020100 + .long 0x07060504 + .long 0x0b0a0908 + .long 0x0f0e0d0c + .long 0x13121110 + .long 0x17161514 + .long 0x1b1a1918 + .long 0x1f1e1d1c + .long 0x23222120 + .long 0x27262524 + .long 0x2b2a2928 + .long 0x2e2d2d2c + .long 0x31302f2e + .long 0x35343332 + .long 0x39383736 + .long 0x3d3c3b3a + .long 0x41403f3e + .long 0x45444342 + .long 0x49484746 + .long 0x4d4c4b4a + .long 0x51504f4e + .long 0x55545352 + .long 0x59585756 + .long 0x5d5c5b5a + .long 0x61605f5e + .long 0x63626261 + .long 0x65646463 + .long 0x67666665 + .long 0x6a696867 + .long 0x6e6d6c6b + .long 0x7271706f + .long 0x76757473 + .long 0x7a797877 + .long 0x7e7d7c7b + .long 0x8281807f + .long 0x86858483 + .long 0x89888887 + .long 0x8b8a8a89 + .long 0x8d8c8c8b + .long 0x8f8e8e8d + .long 0x9190908f + .long 0x93929291 + .long 0x95949493 + .long 0x96969695 + .long 0x99989797 + .long 0x9d9c9b9a + .long 0xa1a09f9e + .long 0xa3a2a2a1 + .long 0xa5a4a4a3 + .long 0xa7a6a6a5 + .long 0xa9a8a8a7 + .long 0xabaaaaa9 + .long 0xacacabab + .long 0xadadadac + .long 0xafaeaeae + .long 0xb0b0afaf + .long 0xb1b1b0b0 + .long 0xb2b2b1b1 + .long 0xb3b3b2b2 + .long 0xb4b3b3b3 + .long 0xb5b4b4b4 + .long 0xb6b6b6b5 + .long 0xb8b7b7b7 + .long 0xb9b9b8b8 + .long 0xbabab9b9 + .long 0xbbbbbaba + .long 0xbcbbbbbb + .long 0xbcbcbcbc + .long 0xbdbdbdbd + .long 0xbebebdbd + .long 0xbebebebe + .long 0xbfbfbfbe + .long 0xbfbfbfbf + .long 0xc0c0c0bf + .long 0xc0c0c0c0 + .long 0xc1c1c0c0 + .long 0xc1c1c1c1 + .long 0xc2c2c2c1 + .long 0xc3c2c2c2 + .long 0xc3c3c3c3 + .long 0xc4c3c3c3 + .long 0xc4c4c4c4 + .long 0xc4c4c4c4 + .long 0xc5c5c5c4 + .long 0xc5c5c5c5 + .long 0xc5c5c5c5 + .long 0xc6c6c5c5 + .long 0xc6c6c6c6 + .long 0xc6c6c6c6 + .long 0xc6c6c6c6 + .long 0xc6c6c6c6 + .long 0xc7c7c7c7 + .long 0xc7c7c7c7 + .long 0xc7c7c7c7 + .long 0xc8c7c7c7 + .long 0xc8c8c8c8 + .long 0xc8c8c8c8 + .long 0xc8c8c8c8 + .long 0xc8c8c8c8 + .long 0xc9c9c8c8 + .long 0xc9c9c9c9 + .long 0xc9c9c9c9 + .long 0xc9c9c9c9 + .long 0xc9c9c9c9 + .long 0xc9c9c9c9 + .long 0xc9c9c9c9 + .long 0xc9c9c9c9 + .long 0xc9c9c9c9 + .long 0xcacacaca + .long 0xcacacaca + .long 0xcacacaca + .long 0xcacacaca + .long 0xcacacaca + .long 0xcacacaca + .long 0xcacacaca + .long 0xcacacaca + .long 0xcacacaca + .long 0xcbcbcaca + .long 0xcbcbcbcb + .long 0xcbcbcbcb + .long 0xcbcbcbcb + .long 0xcbcbcbcb + .long 0xcbcbcbcb + .long 0xcbcbcbcb + .long 0xcbcbcbcb + .long 0xcbcbcbcb + .long 0xcbcbcbcb + .long 0xcbcbcbcb + .long 0xcbcbcbcb + .long 0xcbcbcbcb + .long 0xcbcbcbcb + .long 0xcbcbcbcb + .long 0xcbcbcbcb + .long 0xcbcbcbcb + .long 0xcbcbcbcb + .long 0xcbcbcbcb + .long 0xcbcbcbcb + .long 0xcbcbcbcb + .long 0xcbcbcbcb + .long 0xcbcbcbcb + .long 0xcccccccc + .long 0xcccccccc + .long 0xcccccccc + .long 0xcccccccc + .long 0xcccccccc + .long 0x00000000 + +// +// table of m, atan(m) +// + + .double 1.5355248400422339e-001 + .double 1.5236243595325524e-001 + .double 1.5552158817919209e-001 + .double 1.5428561088659756e-001 + .double 1.5749071068375525e-001 + .double 1.5620765397022107e-001 + .double 1.5945985174286212e-001 + .double 1.5812855223376682e-001 + .double 1.6142901158120504e-001 + .double 1.6004829275559573e-001 + .double 1.6339819042320866e-001 + .double 1.6196686266324070e-001 + .double 1.6536738849303570e-001 + .double 1.6388424913386970e-001 + .double 1.6733660601457914e-001 + .double 1.6580043939473066e-001 + .double 1.6930584321147152e-001 + .double 1.6771542072360776e-001 + .double 1.7127510030705803e-001 + .double 1.6962918044923750e-001 + .double 1.7324437752441801e-001 + .double 1.7154170595176652e-001 + .double 1.7521367508635091e-001 + .double 1.7345298466316975e-001 + .double 1.7718299321537423e-001 + .double 1.7536300406767497e-001 + .double 1.7915233213372153e-001 + .double 1.7727175170218212e-001 + .double 1.8112169206334361e-001 + .double 1.7917921515668045e-001 + .double 1.8309107322589921e-001 + .double 1.8108538207465014e-001 + .double 1.8506047584275731e-001 + .double 1.8299024015346987e-001 + .double 1.8702990013499132e-001 + .double 1.8489377714481120e-001 + .double 1.8899934632336557e-001 + .double 1.8679598085502003e-001 + .double 1.9096881462840698e-001 + .double 1.8869683914557503e-001 + .double 1.9293830527024586e-001 + .double 1.9059633993331765e-001 + .double 1.9490781846877406e-001 + .double 1.9249447119098312e-001 + .double 1.9687735444355606e-001 + .double 1.9439122094748745e-001 + .double 1.9884691341385727e-001 + .double 1.9628657728832211e-001 + .double 2.0081649559861969e-001 + .double 1.9818052835589239e-001 + .double 2.0278610121648225e-001 + .double 2.0007306234989339e-001 + .double 2.0475573048576073e-001 + .double 2.0196416752764149e-001 + .double 2.0672538362445764e-001 + .double 2.0385383220442907e-001 + .double 2.0869506085024936e-001 + .double 2.0574204475385199e-001 + .double 2.1066476238049245e-001 + .double 2.0762879360814970e-001 + .double 2.1263448843221744e-001 + .double 2.0951406725852789e-001 + .double 2.1460423922213120e-001 + .double 2.1139785425548374e-001 + .double 2.1657401496660342e-001 + .double 2.1328014320911035e-001 + .double 2.1854381588167510e-001 + .double 2.1516092278941668e-001 + .double 2.2051364218304903e-001 + .double 2.1704018172662459e-001 + .double 2.2248349408610146e-001 + .double 2.1891790881148054e-001 + .double 2.2445337180585717e-001 + .double 2.2079409289552671e-001 + .double 2.2642327555699829e-001 + .double 2.2266872289139872e-001 + .double 2.2839320555389561e-001 + .double 2.2454178777313910e-001 + .double 2.3036316201052537e-001 + .double 2.2641327657639609e-001 + .double 2.3233314514054382e-001 + .double 2.2828317839876702e-001 + .double 2.3430315515726335e-001 + .double 2.3015148240004227e-001 + .double 2.3627319227362892e-001 + .double 2.3201817780244399e-001 + .double 2.3824325670223515e-001 + .double 2.3388325389089790e-001 + .double 2.4021334865532848e-001 + .double 2.3574670001328502e-001 + .double 2.4218346834479032e-001 + .double 2.3760850558066995e-001 + .double 2.4612379177854776e-001 + .double 2.4132715301206165e-001 + .double 2.5006422869130918e-001 + .double 2.4503911274623447e-001 + .double 2.5203449022822305e-001 + .double 2.4689255893254244e-001 + .double 2.5597510051142724e-001 + .double 2.5059433291856392e-001 + .double 2.5991582846749378e-001 + .double 2.5428921511209990e-001 + .double 2.6385667576350508e-001 + .double 2.5797712569279457e-001 + .double 2.6779764406045437e-001 + .double 2.6165798588799988e-001 + .double 2.7173873501326334e-001 + .double 2.6533171797836824e-001 + .double 2.7567995027061964e-001 + .double 2.6899824530292393e-001 + .double 2.7962129147501918e-001 + .double 2.7265749226397429e-001 + .double 2.8356276026265581e-001 + .double 2.7630938433152924e-001 + .double 2.8750435826340059e-001 + .double 2.7995384804745721e-001 + .double 2.9144608710072656e-001 + .double 2.8359081102924566e-001 + .double 2.9538794839168914e-001 + .double 2.8722020197347103e-001 + .double 2.9932994374684735e-001 + .double 2.9084195065887453e-001 + .double 3.0327207477022777e-001 + .double 2.9445598794914085e-001 + .double 3.0721434305929912e-001 + .double 2.9806224579535306e-001 + .double 3.1115675020490474e-001 + .double 3.0166065723807844e-001 + .double 3.1509929779121809e-001 + .double 3.0525115640914857e-001 + .double 3.1904198739572476e-001 + .double 3.0883367853313998e-001 + .double 3.2298482058916428e-001 + .double 3.1240815992849819e-001 + .double 3.2692779893548751e-001 + .double 3.1597453800835984e-001 + .double 3.3087092399184592e-001 + .double 3.1953275128109132e-001 + .double 3.3481419730851864e-001 + .double 3.2308273935046272e-001 + .double 3.3875762042890090e-001 + .double 3.2662444291557313e-001 + .double 3.4270119488946582e-001 + .double 3.3015780377045167e-001 + .double 3.4664492221974780e-001 + .double 3.3368276480338221e-001 + .double 3.5058880394226560e-001 + .double 3.3719926999588284e-001 + .double 3.5453284157253667e-001 + .double 3.4070726442148014e-001 + .double 3.5847703661905733e-001 + .double 3.4420669424417044e-001 + .double 3.6242139058320622e-001 + .double 3.4769750671653560e-001 + .double 3.6636590495929278e-001 + .double 3.5117965017771474e-001 + .double 3.7031058123450267e-001 + .double 3.5465307405101643e-001 + .double 3.7425542088887304e-001 + .double 3.5811772884129434e-001 + .double 3.7820042539523707e-001 + .double 3.6157356613203800e-001 + .double 3.8214559621934541e-001 + .double 3.6502053858236522e-001 + .double 3.8609093481959766e-001 + .double 3.6845859992342517e-001 + .double 3.9003644264724363e-001 + .double 3.7188770495496976e-001 + .double 3.9398212114627240e-001 + .double 3.7530780954141402e-001 + .double 3.9792797175340849e-001 + .double 3.7871887060775766e-001 + .double 4.0187399589809220e-001 + .double 3.8212084613526559e-001 + .double 4.0582019500246991e-001 + .double 3.8551369515693501e-001 + .double 4.0976657048137899e-001 + .double 3.8889737775274108e-001 + .double 4.1371312374235236e-001 + .double 3.9227185504468803e-001 + .double 4.1765985618556611e-001 + .double 3.9563708919160545e-001 + .double 4.2160676920387552e-001 + .double 3.9899304338381947e-001 + .double 4.2555386418276547e-001 + .double 4.0233968183755547e-001 + .double 4.2950114250038729e-001 + .double 4.0567696978922430e-001 + .double 4.3344860552750974e-001 + .double 4.0900487348945086e-001 + .double 4.3739625462752746e-001 + .double 4.1232336019697230e-001 + .double 4.4134409115647377e-001 + .double 4.1563239817236564e-001 + .double 4.4529211646299088e-001 + .double 4.1893195667157090e-001 + .double 4.5318873876639293e-001 + .double 4.2550251720217980e-001 + .double 4.6108613217920819e-001 + .double 4.3203481548843342e-001 + .double 4.6898430722472922e-001 + .double 4.3852864071650399e-001 + .double 4.7688327430812050e-001 + .double 4.4498379732613808e-001 + .double 4.8478304371653724e-001 + .double 4.5140010476930359e-001 + .double 4.9268362561922469e-001 + .double 4.5777739726103206e-001 + .double 5.0058503006778432e-001 + .double 4.6411552352325064e-001 + .double 5.0453604385581541e-001 + .double 4.6726985594612491e-001 + .double 5.1243870071325748e-001 + .double 4.7354898030362186e-001 + .double 5.2034220473268367e-001 + .double 4.7978862196590671e-001 + .double 5.2824656555629101e-001 + .double 4.8598867832338383e-001 + .double 5.3615179271017821e-001 + .double 4.9214906004903236e-001 + .double 5.4405789560483941e-001 + .double 4.9826969081401601e-001 + .double 5.5196488353568340e-001 + .double 5.0435050700000683e-001 + .double 5.5987276568356159e-001 + .double 5.1039145740874092e-001 + .double 5.6778155111542927e-001 + .double 5.1639250296941264e-001 + .double 5.7569124878490574e-001 + .double 5.2235361644422107e-001 + .double 5.8360186753303733e-001 + .double 5.2827478213277490e-001 + .double 5.9151341608890706e-001 + .double 5.3415599557554083e-001 + .double 5.9942590307042987e-001 + .double 5.3999726325702790e-001 + .double 6.0733933698512588e-001 + .double 5.4579860230896726e-001 + .double 6.1525372623089158e-001 + .double 5.5156004021390115e-001 + .double 6.2316907909682573e-001 + .double 5.5728161450960134e-001 + .double 6.3108540376411404e-001 + .double 5.6296337249467798e-001 + .double 6.3900270830686567e-001 + .double 5.6860537093564145e-001 + .double 6.4692100069300273e-001 + .double 5.7420767577581244e-001 + .double 6.5484028878522904e-001 + .double 5.7977036184639996e-001 + .double 6.6276058034191265e-001 + .double 5.8529351257991435e-001 + .double 6.7068188301809273e-001 + .double 5.9077721972633057e-001 + .double 6.7860420436639635e-001 + .double 5.9622158307209971e-001 + .double 6.8652755183808556e-001 + .double 6.0162671016239366e-001 + .double 6.9445193278400708e-001 + .double 6.0699271602663973e-001 + .double 7.0237735445565164e-001 + .double 6.1231972290768610e-001 + .double 7.1030382400613368e-001 + .double 6.1760785999465095e-001 + .double 7.1823134849130543e-001 + .double 6.2285726315976664e-001 + .double 7.2615993487071206e-001 + .double 6.2806807469917436e-001 + .double 7.3408959000871321e-001 + .double 6.3324044307802407e-001 + .double 7.4202032067552715e-001 + .double 6.3837452267984052e-001 + .double 7.4995213354821244e-001 + .double 6.4347047356027798e-001 + .double 7.6581903216136837e-001 + .double 6.5354865629575443e-001 + .double 7.8169033744456895e-001 + .double 6.6347637610648558e-001 + .double 7.9756609957470270e-001 + .double 6.7325509358925351e-001 + .double 8.1344636734328057e-001 + .double 6.8288633806304899e-001 + .double 8.2933118819147211e-001 + .double 6.9237170082828436e-001 + .double 8.4522060824488088e-001 + .double 7.0171282873931906e-001 + .double 8.6111467234814298e-001 + .double 7.1091141809055203e-001 + .double 8.7701342409900629e-001 + .double 7.1996920881421278e-001 + .double 8.9291690588282036e-001 + .double 7.2888797898706914e-001 + .double 9.0882515890560212e-001 + .double 7.3766953964025872e-001 + .double 9.2473822322778765e-001 + .double 7.4631572986747752e-001 + .double 9.4065613779674118e-001 + .double 7.5482841222342667e-001 + .double 9.5657894047909242e-001 + .double 7.6320946840524795e-001 + .double 9.7250666809235886e-001 + .double 7.7146079520806365e-001 + .double 9.9640757179857808e-001 + .double 7.8359871904813783e-001 + .double 1.0123477662106497e+000 + .double 7.9153408654898494e-001 + .double 1.0282930065975109e+000 + .double 7.9934643006362416e-001 + .double 1.0442433254321561e+000 + .double 8.0703767115598279e-001 + .double 1.0601987543019136e+000 + .double 8.1460972871741655e-001 + .double 1.0761593239359200e+000 + .double 8.2206451646712742e-001 + .double 1.0921250642322042e+000 + .double 8.2940394065219647e-001 + .double 1.1080960042837751e+000 + .double 8.3662989793684506e-001 + .double 1.1240721724040819e+000 + .double 8.4374427347116965e-001 + .double 1.1400535961519611e+000 + .double 8.5074893912966976e-001 + .double 1.1560403023557917e+000 + .double 8.5764575190995740e-001 + .double 1.1880296659320808e+000 + .double 8.7112316388210831e-001 + .double 1.2200404640247038e+000 + .double 8.8419101620372653e-001 + .double 1.2520728872705937e+000 + .double 8.9686349887653072e-001 + .double 1.2841271167089126e+000 + .double 9.0915445688978058e-001 + .double 1.3162033243984250e+000 + .double 9.2107736858648426e-001 + .double 1.3483016739966118e+000 + .double 9.3264532972485459e-001 + .double 1.3804223213026274e+000 + .double 9.4387104245016884e-001 + .double 1.4125654147650204e+000 + .double 9.5476680847240347e-001 + .double 1.4447310959575739e+000 + .double 9.6534452582212138e-001 + .double 1.4769195000229485e+000 + .double 9.7561568862694747e-001 + .double 1.5252449924236093e+000 + .double 9.9047179921019246e-001 + .double 1.5736223129440838e+000 + .double 1.0046987952523838e+000 + .double 1.6220518482036179e+000 + .double 1.0183304846548173e+000 + .double 1.6705339614358272e+000 + .double 1.0313987782295579e+000 + .double 1.7190689946814175e+000 + .double 1.0439337627979282e+000 + .double 1.7838652477841976e+000 + .double 1.0598662298339001e+000 + .double 1.8487568705321733e+000 + .double 1.0749634150447689e+000 + .double 1.9137445321043374e+000 + .double 1.0892830552328896e+000 + .double 1.9951151143237842e+000 + .double 1.1061698282655272e+000 + .double 2.0766378750592405e+000 + .double 1.1220193029527641e+000 + .double 2.1419663687581529e+000 + .double 1.1340098135689110e+000 + .double 2.2401441155497896e+000 + .double 1.1509403173125212e+000 + .double 2.3385453087436905e+000 + .double 1.1667117718174038e+000 + .double 2.4371714724301077e+000 + .double 1.1814322708247920e+000 + .double 2.5690254496685805e+000 + .double 1.1995890899144255e+000 + .double 2.7012851246360814e+000 + .double 1.2162456309906018e+000 + .double 2.8671848136736733e+000 + .double 1.2352139791159169e+000 + .double 3.0337286825933503e+000 + .double 1.2523848414028640e+000 + .double 3.2344390781066932e+000 + .double 1.2709456743012855e+000 + .double 3.4697948808564751e+000 + .double 1.2901987053917314e+000 + .double 3.7403633162267851e+000 + .double 1.3095526166650662e+000 + .double 4.0468035358478183e+000 + .double 1.3285408170404023e+000 + .double 4.3898707100328336e+000 + .double 1.3468211249075761e+000 + .double 4.8051827486338796e+000 + .double 1.3656163032815085e+000 + .double 5.3651964370400229e+000 + .double 1.3865243098539592e+000 + .double 6.0754885617421879e+000 + .double 1.4076632048189699e+000 + .double 7.0163874932087893e+000 + .double 1.4292262718246274e+000 + .double 8.3530110964301372e+000 + .double 1.4516460893659640e+000 + .double 1.0199215339979428e+001 + .double 1.4730619487601273e+000 + .double 1.3282700889621619e+001 + .double 1.4956521911510898e+000 + .double 1.9351193695758720e+001 + .double 1.5191658543189086e+000 + .double 3.5680505204950208e+001 + .double 1.5427771524815408e+000 + .double 7.1244387842511500e+001 + .double 1.5567610551550020e+000 + +// +// hi and lo pieces of pi/2 +// + + .double 1.5707963267948966e+000 + .double 6.1232339957367660e-017 + +// +// pi +// + + .double 3.1415926535897931e+000 + +// +// reduce range coefs +// + + .double -3.3333333333333259e-001 + .double 1.9999999999930743e-001 + .double -1.4285714261780827e-001 + .double 1.1111107045818812e-001 + .double -9.0905335038737295e-002 + .double 7.6730212431403905e-002 + .double -6.1467058390548932e-002 + +// +// reduce range coefs +// + + .double -3.3333333333324405e-001 + .double 1.9999999697587109e-001 + .double -1.4282435376697181e-001 + +// +// large range coefs +// + + .double -3.3333333333324200e-001 + .double 1.9999999692877043e-001 + .double -1.4282409942371077e-001 + +// +// End of table. +// diff --git a/private/fp32/tran/alpha/ceils.s b/private/fp32/tran/alpha/ceils.s new file mode 100644 index 000000000..cbab107e0 --- /dev/null +++ b/private/fp32/tran/alpha/ceils.s @@ -0,0 +1,156 @@ +// TITLE("Alpha AXP ceil") +//++ +// +// Copyright (c) 1993, 1994 Digital Equipment Corporation +// +// Module Name: +// +// ceil.s +// +// Abstract: +// +// This module implements a high-performance Alpha AXP specific routine +// for IEEE double format ceil. +// +// Author: +// +// Bill Gray +// +// Environment: +// +// User mode. +// +// Revision History: +// +// Thomas Van Baak (tvb) 15-Apr-1994 +// +// Adapted for NT. +// +//-- + +#include "ksalpha.h" + + +// +// Define stack frame. +// + + .struct 0 +Temp: .space 8 // save argument + .space 8 // for 16-byte stack alignment +FrameLength: + +// +// Define lower and upper 32-bit parts of 64-bit double. +// + +#define LowPart 0x0 +#define HighPart 0x4 + + SBTTL("ceil") + +//++ +// +// double +// ceil ( +// IN double x +// ) +// +// Routine Description: +// +// This function returns the ceil of the given double argument. +// +// Arguments: +// +// x (f16) - Supplies the argument value. +// +// Return Value: +// +// The double ceil result is returned as the function value in f0. +// +//-- + + NESTED_ENTRY(ceil, FrameLength, ra) + + lda sp, -FrameLength(sp) // allocate stack frame + + PROLOGUE_END + + ldah t0, 0x7ff0(zero) // exp mask + ldt f0, two_to_52 // get big + ldah t1, 0x10(zero) // one in exp field + stt f16, Temp(sp) + ldl v0, Temp + HighPart(sp) + cpys f16, f0, f1 // fix sign of big + cpys f16, f16, f0 + and v0, t0, t0 + subl t0, t1, t0 + ldt f10, one + ldah t1, 0x4320(zero) // cutoff value + cmpult t0, t1, t0 + beq t0, quick_out + +// Add big, sub big to round to int. + + addt f16, f1, f11 + subt f11, f1, f1 + cmptlt f1, f0, f0 + fbeq f0, it_rounded_up + +// It rounded down so add one. + + addt f1, f10, f1 + +it_rounded_up: + cpys f1, f1, f0 + br zero, done + + +// Value is abnormal (or too big). +// If it is zero or denorm, figure out +// whether to return 0.0 or 1.0 -- if +// value is too big, just return it. + +quick_out: + ldah t1, 0x7ff0(zero) + ldah t2, -0x8000(zero) + and v0, t1, t0 + and v0, t2, v0 + bne t0, ret_arg + ldah t0, 0x10(zero) + bne v0, ret_zero + stt f16, Temp(sp) + ldl v0, Temp(sp) + lda t0, -1(t0) + ldl t2, Temp + HighPart(sp) + cpys f10, f10, f16 + and t2, t0, t0 + bis t0, v0, v0 + and t2, t1, t1 + cmpult zero, v0, v0 + cmpeq t1, zero, t1 + beq t1, ret_zero + and t1, v0, t1 + beq t1, ret_zero + br zero, ret_arg + +ret_zero: + cpys f31, f31, f16 + +ret_arg: + cpys f16, f16, f0 + +done: + lda sp, FrameLength(sp) // deallocate stack frame + ret zero, (ra) + + .end ceil + + .align 3 + .rdata + +one: + .quad 0x3ff0000000000000 // 1.0 + +two_to_52: + .quad 0x4330000000000000 // 2^52 (4503599627370496.0) diff --git a/private/fp32/tran/alpha/chopt.s b/private/fp32/tran/alpha/chopt.s new file mode 100644 index 000000000..abd433ef6 --- /dev/null +++ b/private/fp32/tran/alpha/chopt.s @@ -0,0 +1,80 @@ +// TITLE("Floating Point Chopped Arithmetic") +//++ +// +// Copyright (c) 1993 Digital Equipment Corporation +// +// Module Name: +// +// chopt.s +// +// Abstract: +// +// This module implements routines for performing floating point arithmetic +// using the chopped rounding mode. These can be used to replace instances +// where (e.g., NT/Mips) the global rounding mode is set to chopped. For +// Alpha, the dynamic rounding mode has no effect on floating point code +// emitted by the current compilers. +// +// Author: +// +// Thomas Van Baak (tvb) 22-Feb-1993 +// +// Environment: +// +// Any mode. +// +// Revision History: +// +//-- + +#include "ksalpha.h" + + SBTTL("Chopped Arithmetic") + +// +// Add chopped with software completion. +// + + LEAF_ENTRY(_addtc) + + addtsuc f16, f17, f0 // add operands - chopped + trapb // wait for possible trap + ret zero, (ra) // return + + .end _addtc + +// +// Divide chopped with software completion. +// + + LEAF_ENTRY(_divtc) + + divtsuc f16, f17, f0 // divide operands - chopped + trapb // wait for possible trap + ret zero, (ra) // return + + .end _divtc + +// +// Multiply chopped with software completion. +// + + LEAF_ENTRY(_multc) + + multsuc f16, f17, f0 // multiply operands - chopped + trapb // wait for possible trap + ret zero, (ra) // return + + .end _multc + +// +// Subtract chopped with software completion. +// + + LEAF_ENTRY(_subtc) + + subtsuc f16, f17, f0 // subtract operands - chopped + trapb // wait for possible trap + ret zero, (ra) // return + + .end _subtc diff --git a/private/fp32/tran/alpha/coss.s b/private/fp32/tran/alpha/coss.s new file mode 100644 index 000000000..874566a54 --- /dev/null +++ b/private/fp32/tran/alpha/coss.s @@ -0,0 +1,995 @@ +// TITLE("Alpha AXP Cosine") +//++ +// +// Copyright (c) 1993, 1994 Digital Equipment Corporation +// +// Module Name: +// +// cos.s +// +// Abstract: +// +// This module implements a high-performance Alpha AXP specific routine +// for IEEE double format cosine +// +// Author: +// +// Bob Hanek (rtl::hanek) 1-Oct-1991 +// +// Environment: +// +// User mode. +// +// Revision History: +// +// Thomas Van Baak (tvb) 13-Feb-1994 +// +// Adapted for NT. +// +//-- + +#include "ksalpha.h" + +// +// Define DPML exception record for NT. +// + + .struct 0 +ErErr: .space 4 // error code +ErCxt: .space 4 // context +ErPlat: .space 4 // platform +ErEnv: .space 4 // environment +ErRet: .space 4 // return value pointer +ErName: .space 4 // function name +ErType: .space 8 // flags and fill +ErVal: .space 8 // return value +ErArg0: .space 8 // arg 0 +ErArg1: .space 8 // arg 1 +ErArg2: .space 8 // arg 2 +ErArg3: .space 8 // arg 3 +DpmlExceptionLength: + +// +// Define stack frame. +// + + .struct 0 +SaveS0: .space 8 // +SaveS1: .space 8 // +SaveRa: .space 8 // +SaveF2: .space 8 // +SaveF3: .space 8 // +SaveF4: .space 8 // +SaveF5: .space 8 // +SaveF6: .space 8 // +SaveF7: .space 8 // +SaveF8: .space 8 // +SaveF9: .space 8 // +Temp0: .space 8 // +Temp1: .space 8 // +Temp2: .space 8 // +Temp3: .space 8 // +Temp4: .space 8 // +Temp5: .space 8 // +ExRec: .space DpmlExceptionLength // exception record + .space 0 // for 16-byte stack alignment +FrameLength: + +// +// Define lower and upper 32-bit parts of 64-bit double. +// + +#define LowPart 0x0 +#define HighPart 0x4 + +// +// Define argument range values. +// + +#define BIG_X_HI 0x4169 // upper bound of medium argument range +#define BIG_X_LO 0x21fb +#define SMALL_X_HI 0x3e40 // lower bound of medium argument range +#define SMALL_X_LO 0x0000 + +#define EXP_WORD_OF_TWO_PI_HI 0x4019 +#define EXP_WORD_OF_TWO_PI_LO 0x21fb + +// +// Define table offset values. +// + +#define D_2_POW_K_OVER_PI_OVER_4 0x0 +#define PI_OVER_4_OVER_2_POW_K_0 0x08 +#define PI_OVER_4_OVER_2_POW_K_1 0x10 +#define PI_OVER_4_OVER_2_POW_K_2 0x18 +#define PI_OVER_4_OVER_2_POW_K_3 0x20 +#define PI_OVER_4_OVER_2_POW_K_4 0x28 + +#define PI_OVER_2_HI 0x30 +#define PI_OVER_2_LO 0x38 +#define PI_HI 0x40 +#define PI_LO 0x48 +#define THREE_PI_OVER_2_HI 0x50 +#define THREE_PI_OVER_2_LO 0x58 +#define TWO_PI_HI 0x60 +#define TWO_PI_LO 0x68 +#define TWO_POW_K_OVER_PI_OVER_4 0x70 + +#define C_POLY0 0xb8 +#define C_POLY1 C_POLY0 + 8 +#define C_POLY2 C_POLY1 + 8 +#define C_POLY3 C_POLY2 + 8 +#define C_POLY4 C_POLY3 + 8 + +#define P_POLY0 0x78 +#define P_POLY1 P_POLY0 + 8 + +#define Q_POLY0 0x88 +#define Q_POLY1 Q_POLY0 + 8 + +#define S_POLY0 0x98 +#define S_POLY1 S_POLY0 + 8 +#define S_POLY2 S_POLY1 + 8 +#define S_POLY3 S_POLY2 + 8 + +#define SINCOS 0xe0 +#define SIN_A 0xe8 +#define COS_A 0xf0 + + SBTTL("Cosine") + +//++ +// +// double +// cos ( +// IN double x +// ) +// +// Routine Description: +// +// This function returns the cosine of the given double argument. +// +// Arguments: +// +// x (f16) - Supplies the argument value. +// +// Return Value: +// +// The double cosine result is returned as the function value in f0. +// +//-- + + NESTED_ENTRY(cos, FrameLength, ra) + + lda sp, -FrameLength(sp) // allocate stack frame + stq s0, SaveS0(sp) + stq s1, SaveS1(sp) + stq ra, SaveRa(sp) + stt f2, SaveF2(sp) + stt f3, SaveF3(sp) + stt f4, SaveF4(sp) + stt f5, SaveF5(sp) + stt f6, SaveF6(sp) + stt f7, SaveF7(sp) + stt f8, SaveF8(sp) + stt f9, SaveF9(sp) + + PROLOGUE_END + + ornot zero, zero, t0 + srl t0, 33, t0 + ldah t1, SMALL_X_HI(zero) + stt f16, Temp2(sp) + cpys f31, f16, f2 + ldl v0, Temp2 + HighPart(sp) + ldah t2, BIG_X_HI - SMALL_X_HI(zero) + lda t2, BIG_X_LO - SMALL_X_LO(t2) + ldt f3, One + and v0, t0, v0 // the exponent field of the argument + subl v0, t1, t0 // if v0 - small <= big - small + cmpult t0, t2, t0 // an abnormal argument + stt f3, Temp2(sp) + beq t0, abnormal_argument + + ldah t2, EXP_WORD_OF_TWO_PI_HI(zero) // if (j >= EXP_WORD_OF_TWO_PI) + lda t2, EXP_WORD_OF_TWO_PI_LO(t2) // medium argument + cmplt v0, t2, t2 + beq t2, medium_argument + +// +// small argument reduction +// +// reduce the argument X to ( 8 * N + I ) * pi / 4 + y +// and let the reduced argument be y' where +// y' = X - floor( ( 8 * N + I + 1 ) / 2 ) * pi / 4 +// the low 3 bits of I are the octant +// + + lda t0, __trig_cons + ldt f16, TWO_POW_K_OVER_PI_OVER_4(t0) + mult f2, f16, f16 + cvttqc f16, f0 + stt f0, Temp3(sp) + ldl t2, Temp3(sp) + sra t2, 7, t4 + cmpule t4, 7, t12 + beq t12, small_cos // if octant > 7; shouldn't happen + +// dispatch on octant + + lda t12, Switch2 + s4addl t4, t12, t12 + ldl t12, 0(t12) + jmp zero, (t12) + +// +// 1st octant; compute cos(y') +// +Switch20: + and t2, 127, t5 + subl t5, 27, t5 + blt t5, small_cos + + s4subl t5, t5, t5 + s8addl t5, t0, t5 + ldt f0, SINCOS(t5) + ldt f7, SIN_A(t5) + ldt f6, COS_A(t5) + subt f0, f2, f5 + br zero, pos_tab_eval + +// +// 2nd octant; compute -sin(y') +// +Switch21: + ldt f1, PI_OVER_2_HI(t0) + ornot zero, t2, t4 + and t4, 127, t4 + ldt f16, PI_OVER_2_LO(t0) + subl t4, 27, t4 + subt f2, f1, f1 + blt t4, neg_sin_1 + + s4subl t4, t4, t4 + s8addl t4, t0, t4 + ldt f0, SINCOS(t4) + ldt f6, SIN_A(t4) + ldt f7, COS_A(t4) + addt f1, f0, f0 + subt f16, f0, f5 + br zero, pos_tab_eval + +// +// 3rd octant; compute -sin(y') +// +Switch22: + ldt f0, PI_OVER_2_HI(t0) + and t2, 127, t3 + ldt f16, PI_OVER_2_LO(t0) + subl t3, 27, t3 + subt f2, f0, f1 + blt t3, neg_sin_1 + + s4subl t3, t3, t3 + s8addl t3, t0, t3 + ldt f7, SINCOS(t3) + ldt f6, SIN_A(t3) + subt f1, f7, f7 + subt f7, f16, f5 + ldt f7, COS_A(t3) + br zero, neg_tab_eval + +neg_sin_1: + subt f1, f16, f0 + ldt f5, S_POLY3(t0) + ldt f7, S_POLY1(t0) + ldt f10, S_POLY2(t0) + ldt f12, S_POLY0(t0) + ldt f25, Temp2(sp) + mult f0, f0, f2 + mult f5, f2, f5 + mult f2, f2, f11 + mult f7, f2, f7 + mult f0, f2, f0 + addt f5, f10, f5 + addt f7, f12, f7 + mult f5, f11, f5 + addt f5, f7, f5 + mult f0, f5, f0 + addt f16, f0, f0 + subt f0, f1, f8 + fbge f25, adjust_sign + + cpysn f8, f8, f8 + cpys f8, f8, f0 + br zero, done + +// +// 4th octant; compute -cos(y') +// +Switch23: + ldt f10, PI_HI(t0) + ornot zero, t2, t5 + and t5, 127, t5 + ldt f12, PI_LO(t0) + subl t5, 27, t5 + subt f2, f10, f10 + blt t5, neg_cos_1 + + s4subl t5, t5, t5 + s8addl t5, t0, t5 + ldt f11, SINCOS(t5) + ldt f7, SIN_A(t5) + ldt f6, COS_A(t5) + addt f10, f11, f11 + subt f11, f12, f5 + br zero, neg_tab_eval + +// +// 5th octant; compute -cos(y') +// +Switch24: + ldt f16, PI_HI(t0) + and t2, 127, t4 + ldt f12, PI_LO(t0) + subl t4, 27, t4 + subt f2, f16, f10 + blt t4, neg_cos_1 + + s4subl t4, t4, t4 + s8addl t4, t0, t4 + ldt f0, SINCOS(t4) + ldt f7, SIN_A(t4) + ldt f6, COS_A(t4) + subt f10, f0, f0 + subt f12, f0, f5 + br zero, neg_tab_eval + +neg_cos_1: + subt f10, f12, f10 + ldt f1, C_POLY1(t0) + ldt f11, C_POLY4(t0) + ldt f16, C_POLY2(t0) + ldt f8, C_POLY0(t0) + ldt f0, C_POLY3(t0) + ldt f25, Temp2(sp) + mult f10, f10, f10 + mult f10, f10, f2 + mult f10, f1, f1 + mult f11, f10, f11 + mult f16, f2, f16 + addt f1, f8, f1 + mult f2, f10, f2 + addt f11, f0, f0 + addt f1, f16, f1 + mult f2, f0, f0 + addt f1, f0, f0 + mult f10, f0, f0 + subt f0, f3, f8 + fbge f25, adjust_sign + + cpysn f8, f8, f8 + cpys f8, f8, f0 + br zero, done + +// +// 6th octant; compute sin(y') +// +Switch25: + ldt f7, THREE_PI_OVER_2_HI(t0) + ornot zero, t2, t3 + and t3, 127, t3 + ldt f11, THREE_PI_OVER_2_LO(t0) + subl t3, 27, t3 + subt f2, f7, f12 + blt t3, pos_sin_1 + + s4subl t3, t3, t3 + s8addl t3, t0, t3 + ldt f16, SINCOS(t3) + ldt f6, SIN_A(t3) + ldt f7, COS_A(t3) + addt f12, f16, f16 + subt f11, f16, f5 + br zero, neg_tab_eval + +// +// 7th octant; compute sin(y') +// +Switch26: + ldt f1, THREE_PI_OVER_2_HI(t0) + and t2, 127, t5 + ldt f11, THREE_PI_OVER_2_LO(t0) + subl t5, 27, t5 + subt f2, f1, f12 + blt t5, pos_sin_1 + + s4subl t5, t5, t5 + s8addl t5, t0, t5 + ldt f10, SINCOS(t5) + ldt f6, SIN_A(t5) + ldt f7, COS_A(t5) + subt f12, f10, f10 + subt f10, f11, f5 + br zero, pos_tab_eval + +pos_sin_1: + subt f12, f11, f0 + ldt f1, S_POLY3(t0) + ldt f2, S_POLY1(t0) + ldt f8, S_POLY2(t0) + ldt f5, S_POLY0(t0) + ldt f25, Temp2(sp) + mult f0, f0, f16 + mult f1, f16, f1 + mult f16, f16, f10 + mult f2, f16, f2 + mult f0, f16, f0 + addt f1, f8, f1 + addt f2, f5, f2 + mult f1, f10, f1 + addt f1, f2, f1 + mult f0, f1, f0 + addt f11, f0, f0 + subt f12, f0, f8 + fbge f25, adjust_sign + + cpysn f8, f8, f8 + cpys f8, f8, f0 + br zero, done + +// +// 8th octant; compute cos(y') +// +Switch27: + ldt f7, TWO_PI_HI(t0) + ornot zero, t2, t2 + and t2, 127, t2 + ldt f10, TWO_PI_LO(t0) + subl t2, 27, t2 + subt f2, f7, f2 + blt t2, pos_cos_1 + + s4subl t2, t2, t2 + s8addl t2, t0, t2 + ldt f16, SINCOS(t2) + ldt f7, SIN_A(t2) + ldt f6, COS_A(t2) + addt f2, f16, f16 + subt f16, f10, f5 + br zero, pos_tab_eval + +pos_cos_1: + subt f2, f10, f2 + ldt f1, C_POLY1(t0) + ldt f11, C_POLY4(t0) + ldt f12, C_POLY2(t0) + ldt f8, C_POLY0(t0) + ldt f16, C_POLY3(t0) + ldt f25, Temp2(sp) + mult f2, f2, f2 + mult f2, f2, f0 + mult f2, f1, f1 + mult f11, f2, f11 + mult f12, f0, f12 + addt f1, f8, f1 + mult f0, f2, f0 + addt f11, f16, f11 + addt f1, f12, f1 + mult f0, f11, f0 + addt f1, f0, f0 + mult f2, f0, f0 + subt f3, f0, f8 + fbge f25, adjust_sign + + cpysn f8, f8, f8 + cpys f8, f8, f0 + br zero, done + +small_cos: + mult f2, f2, f7 + ldt f10, C_POLY1(t0) + ldt f16, C_POLY4(t0) + ldt f11, C_POLY2(t0) + ldt f1, C_POLY0(t0) + ldt f0, C_POLY3(t0) + ldt f25, Temp2(sp) + mult f7, f7, f12 + mult f7, f10, f10 + mult f16, f7, f16 + mult f11, f12, f11 + addt f10, f1, f1 + mult f12, f7, f12 + addt f16, f0, f0 + addt f1, f11, f1 + mult f12, f0, f0 + addt f1, f0, f0 + mult f7, f0, f0 + subt f3, f0, f8 + fbge f25, adjust_sign + + cpysn f8, f8, f8 + cpys f8, f8, f0 + br zero, done + +// +// a medium argument +// +medium_argument: + lda t5, __trig_cons // reduce the argument with extra precision + ldt f6, D_2_POW_K_OVER_PI_OVER_4(t5) + mult f2, f6, f6 + cvttqc f6, f10 + stt f10, Temp3(sp) + ldl s0, Temp3(sp) + addl s0, 0x80, t2 + bic t2, 0xff, t2 + stq t2, Temp4(sp) + ldt f11, Temp4(sp) + ldt f1, PI_OVER_4_OVER_2_POW_K_0(t5) + ldt f7, PI_OVER_4_OVER_2_POW_K_1(t5) + cvtqt f11, f11 + ldt f6, PI_OVER_4_OVER_2_POW_K_2(t5) + mult f11, f1, f1 + mult f11, f7, f7 + mult f11, f6, f6 + subt f2, f1, f1 + subt f1, f7, f0 + subt f0, f1, f1 + addt f7, f1, f1 + addt f6, f1, f9 + subt f0, f9, f4 + cmpteq f4, f0, f10 + fbne f10, evaluate + + subt f4, f0, f0 + ldt f16, PI_OVER_4_OVER_2_POW_K_3(t5) + mult f11, f16, f16 + addt f9, f0, f0 + addt f16, f0, f9 + subt f4, f9, f12 + cmpteq f12, f4, f7 + fbne f7, evaluate + + subt f12, f4, f4 + ldt f1, PI_OVER_4_OVER_2_POW_K_4(t5) + mult f11, f1, f1 + addt f9, f4, f4 + addt f1, f4, f9 + subt f12, f9, f4 + cmpteq f4, f12, f6 + fbne f6, evaluate + + subt f4, f12, f12 + addt f9, f12, f9 + br zero, evaluate + +// +// process an abnormal argument +// it's either very small, very big, a NaN or an Inf +// +abnormal_argument: + cmple v0, t1, t1 + beq t1, big_NaN_or_Inf + + cpys f3, f3, f0 // very small argument; simply return it. + br zero, done + +// +// Process big arguments or NaNs or Infs +// +big_NaN_or_Inf: + ldah s1, 0x7ff0(zero) // screen out NaNs and Infs. + and v0, s1, v0 + xor v0, s1, v0 + beq v0, NaN_or_Inf // NaN or an infinity + + cpys f2, f2, f16 // a large argument + mov zero, a1 // reduce it accurately + lda a2, Temp1(sp) + lda a3, Temp0(sp) + bsr ra, __trig_reduce + mov v0, s0 // and then evaluate the + ldt f9, Temp0(sp) // the reduced argument + ldt f4, Temp1(sp) + +// +// evaluate the function +// +evaluate: + sra s0, 7, t2 + and t2, 7, t2 + cmpule t2, 7, t12 + beq t12, pos_sin_2 + + lda t12, Switch1 + s4addl t2, t12, t12 + ldl t12, 0(t12) + jmp zero, (t12) + +// +// 1st octant; compute cos(y') +// +Switch10: + and s0, 127, t4 + subl t4, 27, t4 + blt t4, pos_cos_2 + + s4subl t4, t4, t4 + lda t5, __trig_cons + s8addl t4, t5, t4 + ldt f7, SINCOS(t4) + ldt f6, COS_A(t4) + subt f4, f7, f7 + subt f9, f7, f5 + ldt f7, SIN_A(t4) + br zero, pos_tab_eval + +// +// 2nd octant; compute -sin(y') +// +Switch11: + ornot zero, s0, t7 + and t7, 127, t7 + subl t7, 27, t7 + blt t7, neg_sin_2 + + s4subl t7, t7, t7 + lda a0, __trig_cons + s8addl t7, a0, t7 + ldt f0, SINCOS(t7) + ldt f6, SIN_A(t7) + ldt f7, COS_A(t7) + addt f4, f0, f0 + subt f9, f0, f5 + br zero, pos_tab_eval + +// +// 3rd octant; compute -sin(y') +// +Switch12: + and s0, 127, a2 + subl a2, 27, a2 + blt a2, neg_sin_2 + + s4subl a2, a2, a2 + lda a3, __trig_cons + s8addl a2, a3, a2 + ldt f1, SINCOS(a2) + ldt f6, SIN_A(a2) + ldt f7, COS_A(a2) + subt f4, f1, f1 + subt f1, f9, f5 + br zero, neg_tab_eval + +neg_sin_2: + mult f4, f4, f10 + lda a5, __trig_cons + ldt f25, Temp2(sp) + ldt f11, S_POLY3(a5) + ldt f12, S_POLY1(a5) + ldt f13, S_POLY2(a5) + mult f11, f10, f11 + mult f10, f10, f14 + ldt f15, S_POLY0(a5) + mult f12, f10, f12 + mult f4, f10, f10 + addt f11, f13, f11 + addt f12, f15, f12 + mult f11, f14, f11 + addt f11, f12, f11 + mult f10, f11, f10 + addt f9, f10, f9 + subt f9, f4, f8 + fbge f25, adjust_sign + + cpysn f8, f8, f8 + cpys f8, f8, f0 + br zero, done + +// +// 4th octant; compute -cos(y') +// +Switch13: + ornot zero, s0, t8 + and t8, 127, t8 + subl t8, 27, t8 + blt t8, neg_cos_2 + + s4subl t8, t8, t8 + lda t9, __trig_cons + s8addl t8, t9, t8 + ldt f16, SINCOS(t8) + ldt f7, SIN_A(t8) + ldt f6, COS_A(t8) + addt f4, f16, f16 + subt f16, f9, f5 + br zero, neg_tab_eval + +// +// 5th octant; compute -cos(y') +// +Switch14: + and s0, 127, t11 + subl t11, 27, t11 + blt t11, neg_cos_2 + + s4subl t11, t11, t11 + lda ra, __trig_cons + s8addl t11, ra, t11 + ldt f17, SINCOS(t11) + ldt f7, SIN_A(t11) + ldt f6, COS_A(t11) + subt f4, f17, f17 + subt f9, f17, f5 + br zero, neg_tab_eval + +neg_cos_2: + mult f4, f4, f18 + lda v0, __trig_cons + ldt f25, Temp2(sp) + ldt f19, C_POLY1(v0) + ldt f20, C_POLY4(v0) + ldt f22, C_POLY2(v0) + mult f18, f18, f21 + mult f18, f19, f19 + ldt f23, C_POLY0(v0) + mult f20, f18, f20 + ldt f24, C_POLY3(v0) + mult f22, f21, f22 + addt f19, f23, f19 + mult f21, f18, f21 + addt f20, f24, f20 + addt f19, f22, f19 + mult f21, f20, f20 + addt f19, f20, f19 + mult f18, f19, f18 + subt f18, f3, f8 + fbge f25, adjust_sign + + cpysn f8, f8, f8 + cpys f8, f8, f0 + br zero, done + +// +// 6th octant; compute sin(y') +// +Switch15: + ornot zero, s0, t0 + and t0, 127, t0 + subl t0, 27, t0 + blt t0, pos_sin_2 + + s4subl t0, t0, t0 + lda t1, __trig_cons + s8addl t0, t1, t0 + ldt f25, SINCOS(t0) + ldt f6, SIN_A(t0) + ldt f7, COS_A(t0) + addt f4, f25, f25 + subt f9, f25, f5 + +neg_tab_eval: + ldt f26, MinusOne + stt f26, Temp2(sp) + br zero, pos_tab_eval + +// +// 7th octant; compute sin(y') +// +Switch16: + and s0, 127, t3 + subl t3, 27, t3 + blt t3, pos_sin_2 + + s4subl t3, t3, t3 + lda t5, __trig_cons + s8addl t3, t5, t3 + ldt f27, SINCOS(t3) + ldt f6, SIN_A(t3) + ldt f7, COS_A(t3) + subt f4, f27, f27 + subt f27, f9, f5 + br zero, pos_tab_eval + +// +// 8th octant; compute cos(y') +// +Switch17: + ornot zero, s0, s0 + and s0, 127, s0 + subl s0, 27, s0 + lda t6, __trig_cons + blt s0, pos_cos_2 + + s4subl s0, s0, s0 + s8addl s0, t6, t6 + ldt f28, SINCOS(t6) + ldt f7, SIN_A(t6) + ldt f6, COS_A(t6) + addt f4, f28, f28 + subt f28, f9, f5 + +pos_tab_eval: + mult f5, f5, f29 + lda t7, __trig_cons + ldt f25, Temp2(sp) + ldt f30, P_POLY1(t7) + ldt f1, P_POLY0(t7) + ldt f0, Q_POLY1(t7) + ldt f15, Q_POLY0(t7) + mult f30, f29, f30 + mult f0, f29, f0 + mult f5, f29, f13 + addt f30, f1, f1 + addt f0, f15, f0 + mult f13, f1, f1 + mult f29, f0, f0 + subt f5, f1, f1 + mult f6, f0, f0 + mult f7, f1, f1 + subt f0, f1, f0 + subt f6, f0, f8 + fbge f25, adjust_sign + + cpysn f8, f8, f8 + cpys f8, f8, f0 + br zero, done + +pos_cos_2: + mult f4, f4, f14 + lda a1, __trig_cons + ldt f25, Temp2(sp) + ldt f12, C_POLY1(a1) + ldt f11, C_POLY4(a1) + ldt f16, C_POLY2(a1) + mult f14, f14, f10 + mult f14, f12, f12 + ldt f17, C_POLY0(a1) + mult f11, f14, f11 + ldt f23, C_POLY3(a1) + mult f16, f10, f16 + addt f12, f17, f12 + mult f10, f14, f10 + addt f11, f23, f11 + addt f12, f16, f12 + mult f10, f11, f10 + addt f12, f10, f10 + mult f14, f10, f10 + subt f3, f10, f8 + fbge f25, adjust_sign + + cpysn f8, f8, f8 + cpys f8, f8, f0 + br zero, done + +pos_sin_2: + mult f4, f4, f24 + lda a3, __trig_cons + ldt f25, Temp2(sp) + ldt f22, S_POLY3(a3) + ldt f21, S_POLY1(a3) + ldt f20, S_POLY2(a3) + mult f22, f24, f22 + mult f24, f24, f19 + ldt f18, S_POLY0(a3) + mult f21, f24, f21 + mult f4, f24, f24 + addt f22, f20, f20 + addt f21, f18, f18 + mult f20, f19, f19 + addt f19, f18, f18 + mult f24, f18, f18 + addt f9, f18, f9 + subt f4, f9, f8 + fbge f25, adjust_sign + + cpysn f8, f8, f8 + +adjust_sign: + cpys f8, f8, f0 + br zero, done + +// +// Determine if we have a NaN or an Inf +// +NaN_or_Inf: + stt f2, Temp2(sp) + ldl a4, Temp3 + HighPart(sp) + and a4, s1, a5 + cmpeq a5, s1, s1 + beq s1, NaN_or_Inf1 + + ldl t9, Temp2(sp) + ldah t8, 0x10(zero) // mask = 0x000fffff + lda t8, -1(t8) + and a4, t8, a4 + bis a4, t9, a4 + cmpult zero, a4, a4 + and s1, a4, s1 + bne s1, done_1 + +// +// report an exception +// +NaN_or_Inf1: + lda t10, cosName + stl t10, ExRec + ErName(sp) + ldah t12, 0x800(zero) + stt f2, ExRec + ErArg0(sp) + lda t12, 0x11(t12) + stl t12, ExRec + ErErr(sp) + lda v0, ExRec(sp) + bsr ra, __dpml_exception + ldt f2, 0(v0) + +// +// return the argument +// +done_1: cpys f2, f2, f0 + +// +// Restore registers and return with result in f0. +// + +done: + ldq s0, SaveS0(sp) + ldq s1, SaveS1(sp) + ldq ra, SaveRa(sp) + ldt f2, SaveF2(sp) + ldt f3, SaveF3(sp) + ldt f4, SaveF4(sp) + ldt f5, SaveF5(sp) + ldt f6, SaveF6(sp) + ldt f7, SaveF7(sp) + ldt f8, SaveF8(sp) + ldt f9, SaveF9(sp) + lda sp, FrameLength(sp) // deallocate stack frame + ret zero, (ra) // return + + .end cos.s + + .rdata + .align 3 + +// +// Define floating point constants. +// + +MinusOne: + .double -1.0 +One: + .double 1.0 + + .align 2 + +// +// dispatch on octant +// + +Switch1: + .long Switch10 + .long Switch11 + .long Switch12 + .long Switch13 + .long Switch14 + .long Switch15 + .long Switch16 + .long Switch17 + +// +// dispatch on octant +// + +Switch2: + .long Switch20 + .long Switch21 + .long Switch22 + .long Switch23 + .long Switch24 + .long Switch25 + .long Switch26 + .long Switch27 + +cosName: + .ascii "cos\0" diff --git a/private/fp32/tran/alpha/dpml_exc.c b/private/fp32/tran/alpha/dpml_exc.c new file mode 100644 index 000000000..d4cfd1b38 --- /dev/null +++ b/private/fp32/tran/alpha/dpml_exc.c @@ -0,0 +1,342 @@ +// +// Alpha AXP High-performance math library exception handler. +// +// This code maps the standard DPML exception record to calls to +// the appropriate NT libc private exception handlers. +// + +#include <stdarg.h> +#include <errno.h> +#include <windows.h> +#include <math.h> +#include <fpieee.h> + +#pragma function(acos, asin, atan, atan2, cos, cosh, exp, fabs, fmod, log, \ + log10, pow, sin, sinh, sqrt, tan, tanh) + +typedef struct { + unsigned int func:6, + fast_err:4, + fast_val:4, + ieee_err:4, + ieee_val:4; + } DPML_EXCEPTION_RESPONSE; + +typedef union { + signed int w; + float f; + double d; + long double ld; + } DPML_EXCEPTION_VALUE; + +typedef struct { + signed int func_error_code; + void * context; + signed int platform_specific_err_code; + signed int environment; + void * ret_val_ptr; + char * name; + char data_type; + char dpml_error; + char mode; + DPML_EXCEPTION_VALUE ret_val; + DPML_EXCEPTION_VALUE args[4]; + } DPML_EXCEPTION_RECORD; + +typedef struct { + signed int exception_code, + error, + fp_num; + } __NT_ERROR_MAP; + +typedef struct { char nt_prec, nt_format; } __NT_TYPE_MAP; + +extern void * __dpml_exception(DPML_EXCEPTION_RECORD *); +extern double _except1(signed int, signed int, double, double, signed int); +extern double _except2(signed int, signed int, double, double, double, signed int); +extern signed int _ctrlfp(signed int, signed int); +extern void ReceiveSComplexResult(float*, float*); +extern void ReceiveTComplexResult(double*, double*); + +const unsigned int __dpml_globals_table[] = { + 0x00000000, 0x00000000, 0x00000000, 0xffff8000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x80000000, + 0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000, + 0x00000000, 0x80000000, 0xffffffff, 0xffffffff, 0xffffffff, 0x7ffeffff, + 0xffffffff, 0xffffffff, 0xffffffff, 0xfffeffff, 0x00000000, 0x00000000, + 0x00000000, 0x7fff0000, 0x00000000, 0x00000000, 0x00000000, 0xffff0000, + 0x00000000, 0x00000000, 0x00000000, 0x3f8f0000, 0x00000000, 0x00000000, + 0x00000000, 0xbf8f0000, 0x00000000, 0x00000000, 0x00000000, 0x3fff0000, + 0x00000000, 0x00000000, 0x00000000, 0xbfff0000, 0x00000000, 0xfff80000, + 0x00000000, 0x00000000, 0x00000000, 0x80000000, 0x00000001, 0x00000000, + 0x00000001, 0x80000000, 0xffffffff, 0x7fefffff, 0xffffffff, 0xffefffff, + 0x00000000, 0x7ff00000, 0x00000000, 0xfff00000, 0x00000000, 0x3cb00000, + 0x00000000, 0xbcb00000, 0x00000000, 0x3ff00000, 0x00000000, 0xbff00000, + 0xffc00000, 0x00000000, 0x80000000, 0x00000001, 0x80000001, 0x7f7fffff, + 0xff7fffff, 0x7f800000, 0xff800000, 0x34000000, 0xb4000000, 0x3f800000, + 0xbf800000 + }; + +const unsigned int __dpml_globals_offset_table[] = { + 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 216, 224, + 232, 240, 248, 256, 264, 272, 280, 288, 296, 304, 312, 316, 320, 324, 328, 332, + 336, 340, 344, 348, 352, 356, 360 + }; + +static DPML_EXCEPTION_RESPONSE dpml_response_table[] = { + {0, 1, 1, 1, 0}, {1, 1, 1, 1, 0}, {2, 1, 1, 1, 0}, {3, 1, 1, 1, 0}, + {4, 1, 1, 1, 0}, {8, 1, 1, 1, 0}, {8, 2, 5, 2, 7}, {8, 2, 6, 2, 8}, + {9, 1, 1, 1, 0}, {9, 1, 1, 1, 0}, {9, 4, 1, 4, 1}, {10, 1, 1, 1, 0}, + {10, 1, 1, 1, 0}, {10, 4, 1, 4, 1}, {11, 3, 5, 3, 7}, {39, 1, 1, 1, 0}, + {39, 3, 5, 3, 7}, {12, 1, 1, 1, 0}, {13, 1, 1, 1, 0}, {14, 3, 5, 3, 7}, + {33, 4, 1, 4, 1}, {33, 3, 5, 3, 7}, {33, 3, 6, 3, 8}, {33, 1, 1, 1, 0}, + {33, 2, 5, 2, 7}, {34, 4, 1, 4, 1}, {34, 3, 5, 3, 7}, {34, 3, 6, 3, 8}, + {34, 1, 1, 1, 0}, {34, 2, 5, 2, 7}, {16, 3, 5, 3, 7}, {16, 4, 1, 4, 1}, + {16, 0, 7, 0, 7}, {16, 0, 1, 0, 1}, {17, 3, 5, 3, 7}, {17, 0, 7, 0, 7}, + {17, 0, 12, 0, 12}, {38, 3, 5, 3, 7}, {38, 3, 6, 3, 8}, {38, 4, 1, 4, 1}, + {47, 3, 5, 3, 7}, {47, 3, 6, 3, 8}, {47, 4, 1, 4, 1}, {47, 0, 7, 0, 7}, + {47, 0, 8, 0, 8}, {47, 0, 1, 0, 1}, {47, 1, 1, 1, 0}, {37, 2, 6, 2, 8}, + {18, 1, 1, 1, 0}, {18, 2, 6, 2, 8}, {19, 1, 1, 1, 0}, {19, 2, 6, 2, 8}, + {20, 1, 1, 1, 0}, {20, 2, 6, 2, 8}, {45, 1, 1, 1, 0}, {45, 2, 6, 2, 8}, + {21, 4, 1, 4, 1}, {21, 0, 1, 0, 1}, {21, 1, 1, 1, 0}, {40, 3, 5, 3, 7}, + {40, 4, 1, 4, 1}, {22, 3, 5, 3, 7}, {22, 3, 6, 3, 8}, {22, 4, 1, 4, 1}, + {22, 1, 1, 1, 0}, {22, 2, 6, 2, 8}, {22, 1, 1, 1, 0}, {22, 1, 1, 1, 0}, + {22, 3, 6, 3, 8}, {22, 1, 1, 1, 0}, {22, 0, 7, 0, 7}, {22, 0, 7, 0, 7}, + {22, 0, 8, 0, 8}, {22, 0, 7, 0, 7}, {22, 0, 1, 0, 1}, {22, 0, 1, 0, 1}, + {41, 3, 5, 3, 7}, {41, 4, 1, 4, 1}, {41, 1, 1, 1, 0}, {41, 1, 1, 1, 0}, + {48, 3, 5, 3, 7}, {48, 1, 1, 1, 0}, {23, 4, 1, 4, 1}, {23, 0, 1, 0, 1}, + {23, 1, 1, 1, 0}, {24, 1, 1, 1, 0}, {31, 1, 1, 1, 0}, {32, 1, 1, 1, 0}, + {32, 4, 1, 4, 1}, {25, 1, 1, 1, 0}, {25, 4, 1, 4, 1}, {26, 3, 5, 3, 7}, + {26, 3, 6, 3, 8}, {26, 4, 1, 4, 1}, {27, 1, 1, 1, 0}, {28, 1, 1, 1, 0}, + {29, 4, 1, 4, 1}, {29, 3, 5, 3, 7}, {29, 1, 1, 1, 0}, {29, 2, 5, 2, 7}, + {30, 3, 5, 3, 7}, {30, 4, 1, 4, 1}, {35, 1, 1, 1, 0}, {36, 1, 1, 1, 0}, + {36, 4, 1, 4, 1}, {49, 0, 1, 0, 1}, {50, 0, 1, 0, 1}, {51, 0, 1, 0, 1}, + {42, 0, 1, 0, 1}, {43, 0, 1, 0, 1}, {44, 0, 1, 0, 1}, {42, 1, 1, 1, 0}, + {42, 2, 6, 2, 8}, {43, 1, 1, 1, 0}, {43, 2, 6, 2, 8}, {43, 3, 6, 3, 8}, + {44, 1, 1, 1, 0}, {44, 2, 6, 2, 8}, {44, 3, 6, 3, 8}, {46, 3, 5, 3, 7}, + {46, 0, 7, 0, 7}, {46, 1, 1, 1, 0}, {46, 2, 5, 2, 7}, {46, 2, 5, 2, 7}, + {53, 4, 1, 4, 1} + }; + +static __NT_ERROR_MAP __nt_errors[] = { + { STATUS_FLOAT_INVALID_OPERATION, _DOMAIN, 8 }, + { STATUS_FLOAT_INVALID_OPERATION, _SING, 4 }, + { STATUS_FLOAT_OVERFLOW, _OVERFLOW, 1 }, + { STATUS_FLOAT_UNDERFLOW, _UNDERFLOW, 2 }, + { STATUS_FLOAT_INEXACT_RESULT, _PLOSS, 16 } + }; + +static __NT_TYPE_MAP __nt_type[] = { + {_FpPrecisionFull, _FpFormatFp128 }, + {_FpPrecision53, _FpFormatFp64 }, + {_FpPrecision24, _FpFormatFp32 } + }; + +static const unsigned int __nt_func_codes[] = { + + ((signed int)(_FpCodeAcos) << 18) | (1 | (1 << 3) | (1 << 6) ), + ((signed int)(_FpCodeUnspecified) << 18) | (1 | (1 << 3) | (1 << 6) ), + ((signed int)(_FpCodeUnspecified) << 18) | (1 | (1 << 3) | (1 << 6) ), + ((signed int)(_FpCodeAsin) << 18) | (1 | (1 << 3) | (1 << 6) ), + ((signed int)(_FpCodeUnspecified) << 18) | (1 | (1 << 3) | (1 << 6) ), + ((signed int)(_FpCodeUnspecified) << 18) | (1 | (1 << 3) | (1 << 6) ), + ((signed int)(_FpCodeAtan) << 18) | (1 | (1 << 3) | (1 << 6) ), + ((signed int)(_FpCodeUnspecified) << 18) | (1 | (1 << 3) | (1 << 6) ), + ((signed int)(_FpCodeUnspecified) << 18) | (1 | (1 << 3) | (1 << 6) ), + ((signed int)(_FpCodeAtan2) << 18) | (2 | (1 << 3) | (1 << 6) | (1 << 9) ), + ((signed int)(_FpCodeUnspecified) << 18) | (2 | (1 << 3) | (1 << 6) | (1 << 9) ), + ((signed int)(_FpCodeCabs) << 18) | (2 | (1 << 3) | (1 << 6) | (1 << 9) ), + ((signed int)(_FpCodeCos) << 18) | (1 | (1 << 3) | (1 << 6) ), + ((signed int)(_FpCodeUnspecified) << 18) | (1 | (1 << 3) | (1 << 6) ), + ((signed int)(_FpCodeCosh) << 18) | (1 | (1 << 3) | (1 << 6) ), + ((signed int)(_FpCodeUnspecified) << 18) | (2 | (2 << 3) | (1 << 6) | (1 << 9) ), + ((signed int)(_FpCodeExp) << 18) | (1 | (1 << 3) | (1 << 6) ), + ((signed int)(_FpCodeUnspecified) << 18) | (1 | (1 << 3) | (1 << 6) ), + ((signed int)(_FpCodeLog) << 18) | (1 | (1 << 3) | (1 << 6) ), + ((signed int)(_FpCodeUnspecified) << 18) | (1 | (1 << 3) | (1 << 6) ), + ((signed int)(_FpCodeLog10) << 18) | (1 | (1 << 3) | (1 << 6) ), + ((signed int)(_FpCodeFmod) << 18) | (2 | (1 << 3) | (1 << 6) | (1 << 9) ), + ((signed int)(_FpCodePow) << 18) | (2 | (1 << 3) | (1 << 6) | (1 << 9) ), + ((signed int)(_FpCodeRemainder) << 18) | (2 | (1 << 3) | (1 << 6) | (1 << 9) ), + ((signed int)(_FpCodeSin) << 18) | (1 | (1 << 3) | (1 << 6) ), + ((signed int)(_FpCodeUnspecified) << 18) | (1 | (1 << 3) | (1 << 6) ), + ((signed int)(_FpCodeSinh) << 18) | (1 | (1 << 3) | (1 << 6) ), + ((signed int)(_FpCodeSquareRoot) << 18) | (1 | (1 << 3) | (1 << 6) ), + ((signed int)(_FpCodeTan) << 18) | (1 | (1 << 3) | (1 << 6) ), + ((signed int)(_FpCodeUnspecified) << 18) | (1 | (1 << 3) | (1 << 6) ), + ((signed int)(_FpCodeTanh) << 18) | (1 | (1 << 3) | (1 << 6) ), + ((signed int)(_FpCodeUnspecified) << 18) | (1 | (2 << 3) | (1 << 6) ), + ((signed int)(_FpCodeUnspecified) << 18) | (1 | (2 << 3) | (1 << 6) ), + ((signed int)(_FpCodeUnspecified) << 18) | (1 | (1 << 3) | (1 << 6) ), + ((signed int)(_FpCodeUnspecified) << 18) | (1 | (1 << 3) | (1 << 6) ), + ((signed int)(_FpCodeUnspecified) << 18) | (1 | (2 << 3) | (1 << 6) ), + ((signed int)(_FpCodeUnspecified) << 18) | (1 | (2 << 3) | (1 << 6) ), + ((signed int)(_FpCodeLogb) << 18) | (1 | (1 << 3) | (1 << 6) ), + ((signed int)(_FpCodeLdexp) << 18) | (2 | (1 << 3) | (1 << 6) | (0 << 9) ), + ((signed int)(_FpCodeUnspecified) << 18) | (4 | (2 << 3) | (1 << 6) | (1 << 9) | (1 << 12) | (1 << 15)), + ((signed int)(_FpCodeNextafter) << 18) | (2 | (1 << 3) | (1 << 6) | (1 << 9) ), + ((signed int)(_FpCodeUnspecified) << 18) | (2 | (1 << 3) | (1 << 6) | (0 << 9) ), + ((signed int)(_FpCodeY0) << 18) | (1 | (1 << 3) | (1 << 6) ), + ((signed int)(_FpCodeY1) << 18) | (1 | (1 << 3) | (1 << 6) ), + ((signed int)(_FpCodeYn) << 18) | (2 | (1 << 3) | (0 << 6) | (1 << 9) ), + ((signed int)(_FpCodeUnspecified) << 18) | (1 | (1 << 3) | (1 << 6) ), + ((signed int)(_FpCodeUnspecified) << 18) | (1 | (1 << 3) | (1 << 6) ), + ((signed int)(_FpCodeUnspecified) << 18) | (2 | (1 << 3) | (1 << 6) | (1 << 9) ), + ((signed int)(_FpCodeUnspecified) << 18) | (2 | (0 << 3) | (0 << 6) | (0 << 9) ), + ((signed int)(_FpCodeUnspecified) << 18) | (1 | (1 << 3) | (1 << 6) ), + ((signed int)(_FpCodeUnspecified) << 18) | (1 | (1 << 3) | (1 << 6) ), + ((signed int)(_FpCodeUnspecified) << 18) | (2 | (1 << 3) | (0 << 6) | (1 << 9) ), + ((signed int)(_FpCodeUnspecified) << 18) | (1 | (1 << 3) | (1 << 6) ), + ((signed int)(_FpCodeUnspecified) << 18) | (1 | (1 << 3) | (1 << 6) ), + ((signed int)(_FpCodeTruncate) << 18) | (1 | (1 << 3) | (1 << 6) ), + ((signed int)(_FpCodeFloor) << 18) | (1 | (1 << 3) | (1 << 6) ), + ((signed int)(_FpCodeCeil) << 18) | (1 | (1 << 3) | (1 << 6) ), + ((signed int)(_FpCodeFabs) << 18) | (1 | (1 << 3) | (1 << 6) ), + ((signed int)(_FpCodeFrexp) << 18) | (2 | (1 << 3) | (1 << 6) | (3 << 9) ), + ((signed int)(_FpCodeHypot) << 18) | (2 | (1 << 3) | (1 << 6) | (1 << 9) ), + ((signed int)(_FpCodeModf) << 18) | (2 | (1 << 3) | (1 << 6) | (1 << 9 )) + }; + + +double +__to_double(DPML_EXCEPTION_VALUE *p, int generic_type, int float_type) +{ + double d; + switch (generic_type) { + case 1: + switch (float_type) { + case 0: d = (double) p->ld; break; + case 1: d = (double) p->d; break; + case 2: d = (double) p->f; break; + } + break; + + case 0: + case 3: + case 4: + case 5: + *((signed int *) &d) = p->w; + break; + + case 2: + default: + d = (double) 0.0; + break; + } + return d; +} + +static void +DPML_DO_SIDE_EFFECTS_NAME(DPML_EXCEPTION_RECORD *p) +{ + signed int nt_fpe_num, nt_op, operand_desc, func_code, float_type; + signed int fpcsr; + double arg1, arg2, result; + + func_code = dpml_response_table[p->func_error_code].func; + nt_op = (__nt_func_codes[func_code] >> 18); + nt_fpe_num = __nt_errors[(p->dpml_error)-1].fp_num; + fpcsr = _ctrlfp(-1, -1); + if (nt_fpe_num & (2 | 1)) { + nt_fpe_num |= 16; + } + + float_type = p->data_type; + operand_desc = (__nt_func_codes[func_code] & ((((unsigned int)1 << (18)) - 1) << 0)); + result = __to_double((DPML_EXCEPTION_VALUE *) p->ret_val_ptr, (((operand_desc) >> 3) & 0x7), float_type); + arg1 = __to_double(&p->args[0], (((operand_desc) >> 6) & 0x7), float_type); + + if (((operand_desc) & 0x7) == 1) { + result = _except1(nt_fpe_num, nt_op, arg1, result, fpcsr); + + } else { + arg2 = __to_double(&p->args[1], (((operand_desc) >> 9) & 0x7), float_type); + result = _except2(nt_fpe_num, nt_op, arg1, arg2, result, fpcsr); + } + + if ((((operand_desc) >> 3) & 0x7) == 1) { + switch (float_type) { + case 0: p->ret_val.ld = ((long double) result); break; + case 1: p->ret_val.d = ((double) result); break; + case 2: p->ret_val.f = ((float) result); break; + } + p->ret_val_ptr = ((void *) &(p->ret_val)); + } + + return; +} + + +#if _NTSUBSET_ + +// +// Cannot call _controlfp in ntdll subset. +// + +static void +dpml_get_environment(DPML_EXCEPTION_RECORD *p) +{ + signed int fpcsr; + + p->environment = 0; + + return; +} + +#else + +static void +dpml_get_environment(DPML_EXCEPTION_RECORD *p) +{ + signed int _controlfp(signed int, signed int); + signed int fpcsr; + + fpcsr = _controlfp(0, 0); + /* bit reverse sticky, exception, status bytes */ + (fpcsr) = (((fpcsr) & 0xf0f0f0f0) >> 4) | (((fpcsr) & 0x0f0f0f0f) << 4); + (fpcsr) = (((fpcsr) & 0xcccccccc) >> 2) | (((fpcsr) & 0x33333333) << 2); + (fpcsr) = (((fpcsr) & 0xaaaaaaaa) >> 1) | (((fpcsr) & 0x55555555) << 1); + p->environment = ((fpcsr >> 3) | ((unsigned int)1 << (7))); + + return; +} + +#endif + + +void * +__dpml_exception(DPML_EXCEPTION_RECORD *p) +{ + signed int err = p->func_error_code; + p->data_type = (((err) >> (32 - 5))); + p->func_error_code = + (((err) & ~((((unsigned int)1 << (5)) - 1) << ((32 - 5))))); + dpml_get_environment(p); + if (err < 0) { + return (void *) p->environment; + } + + { + signed int e, v, type; + e = p->func_error_code; + if (((unsigned int)1 << (7)) & p->environment) { + v = dpml_response_table[e].ieee_val; + e = dpml_response_table[e].ieee_err; + + } else { + v = dpml_response_table[e].fast_val; + e = dpml_response_table[e].fast_err; + } + p->dpml_error = e; + type = p->data_type; + p->ret_val_ptr = + (((void *) ((char *) __dpml_globals_table + + __dpml_globals_offset_table[type*13 + v]))); + } + + if (p->dpml_error != 0) { + DPML_DO_SIDE_EFFECTS_NAME(p); + } + + return p->ret_val_ptr; +} diff --git a/private/fp32/tran/alpha/exph.c b/private/fp32/tran/alpha/exph.c new file mode 100644 index 000000000..2714a8bd0 --- /dev/null +++ b/private/fp32/tran/alpha/exph.c @@ -0,0 +1,100 @@ +/*** +*exp.c - exponential +* +* Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* Compute exp(x) +* +*Revision History: +* 8-15-91 GDP written +* 12-21-91 GDP support IEEE exceptions +* 02-03-92 GDP added _exphlp for use by exp, sinh, and cosh +* +*******************************************************************************/ +#include <math.h> +#include <trans.h> + +double _exphlp(double, int *); + +/* + * Thresholds for over/underflow that results in an adjusted value + * too big/small to be represented as a double. + * OVFX: ln(XMAX * 2^IEEE_ADJ) + * UFLX: ln(XIN * 2^(-IEEE_ADJ) + */ + +static _dbl const ovfx = {SET_DBL(0x409bb9d3, 0xbeb8c86b)}; +static _dbl const uflx = {SET_DBL(0xc09bb448, 0x2df909dd)}; + +#define OVFX ovfx.dbl +#define UFLX uflx.dbl + + +static double const EPS = 5.16987882845642297e-26; /* 2^(-53) / 2 */ +static double const LN2INV = 1.442695040889634074; /* 1/ln(2) */ +static double const C1 = 0.693359375000000000; +static double const C2 = -2.1219444005469058277e-4; + +/* constants for the rational approximation */ +static double const p0 = 0.249999999999999993e+0; +static double const p1 = 0.694360001511792852e-2; +static double const p2 = 0.165203300268279130e-4; +static double const q0 = 0.500000000000000000e+0; +static double const q1 = 0.555538666969001188e-1; +static double const q2 = 0.495862884905441294e-3; + +#define P(z) ( (p2 * (z) + p1) * (z) + p0 ) +#define Q(z) ( (q2 * (z) + q1) * (z) + q0 ) + + + + + +/*** +*double _exphlp(double x, int * pnewexp) - exp helper routine +* +*Purpose: +* Provide the mantissa and the exponent of e^x +* +*Entry: +* x : a (non special) double precision number +* +*Exit: +* *newexp: the exponent of e^x +* return value: the mantissa m of e^x scaled by a factor +* (the value of this factor has no significance. +* The mantissa can be obtained with _set_exp(m, 0). +* +* _set_exp(m, *pnewexp) may be used for constructing the final +* result, if it is within the representable range. +* +*Exceptions: +* No exceptions are raised by this function +* +*******************************************************************************/ + + + +double _exphlp(double x, int * pnewexp) +{ + + double xn; + double g,z,gpz,qz,rg; + int n; + + xn = _frnd(x * LN2INV); + n = (int) xn; + + /* assume guard digit is present */ + g = (x - xn * C1) - xn * C2; + z = g*g; + gpz = g * P(z); + qz = Q(z); + rg = 0.5 + gpz/(qz-gpz); + + n++; + + *pnewexp = _get_exp(rg) + n; + return rg; +} diff --git a/private/fp32/tran/alpha/exps.s b/private/fp32/tran/alpha/exps.s new file mode 100644 index 000000000..c5470e1f7 --- /dev/null +++ b/private/fp32/tran/alpha/exps.s @@ -0,0 +1,586 @@ +// TITLE("Alpha AXP Exponential") +//++ +// +// Copyright (c) 1993, 1994 Digital Equipment Corporation +// +// Module Name: +// +// exp.s +// +// Abstract: +// +// This module implements a high-performance Alpha AXP specific routine +// for IEEE double format exponential. +// +// Author: +// +// Bob Hanek 30-Jun-1993 +// +// Environment: +// +// User mode. +// +// Revision History: +// +// Thomas Van Baak (tvb) 6-Feb-1994 +// +// Adapted for NT. +// +//-- + +#include "ksalpha.h" + +// +// Define DPML exception record for NT. +// + + .struct 0 +ErErr: .space 4 // error code +ErCxt: .space 4 // context +ErPlat: .space 4 // platform +ErEnv: .space 4 // environment +ErRet: .space 4 // return value pointer +ErName: .space 4 // function name +ErType: .space 8 // flags and fill +ErVal: .space 8 // return value +ErArg0: .space 8 // arg 0 +ErArg1: .space 8 // arg 1 +ErArg2: .space 8 // arg 2 +ErArg3: .space 8 // arg 3 +DpmlExceptionLength: + +// +// Define stack frame. +// + + .struct 0 +Temp: .space 8 // save argument +ExRec: .space DpmlExceptionLength // exception record + .space 0 // for 16-byte stack alignment +FrameLength: + +// +// Define lower and upper 32-bit parts of 64-bit double. +// + +#define LowPart 0x0 +#define HighPart 0x4 + + SBTTL("Exponential") + +//++ +// +// double +// exp ( +// IN double x +// ) +// +// Routine Description: +// +// This function returns the exponential of the given double argument. +// +// Arguments: +// +// x (f16) - Supplies the argument value. +// +// Return Value: +// +// The double exponential result is returned as the function value in f0. +// +//-- + + NESTED_ENTRY(exp, FrameLength, ra) + + lda sp, -FrameLength(sp) // allocate stack frame + mov ra, t6 // save return address + + PROLOGUE_END + +// +// do some range checks and load some constants +// + + ornot zero, zero, t1 + stt f16, Temp(sp) + ldl v0, Temp + HighPart(sp) + srl t1, 33, t1 + ldah t2, 0x3c90(zero) // small_x + zapnot v0, 0xf, t0 + and t0, t1, t1 + lda v0, __exp_t_table + ldah t3, 0x3f6(zero) // big_x - small_x + subq t1, t2, t2 + lda t3, 0x232b(t3) // big_x - small_x + ldt f1, 0x10(v0) // two_pow_l_over_ln2 + cmpult t2, t3, t2 + beq t2, 10f + + mov zero, t3 + br zero, 30f + +// +// check for possible problems +// + +10: ldah t4, 0x4086(zero) // big_x + ldah t2, 0x7ff0(zero) // exp mask + lda t4, 0x232b(t4) + cmpult t1, t2, t3 + cmpult t1, t4, t1 + beq t3, 80f + + beq t1, 20f + + ldt f0, One + br zero, done + +// +// check for under/over flow +// + +20: lda t1, 1(zero) + ldah t4, 0x4086(zero) + sll t1, 31, t1 + mov t0, t3 + lda t4, 0x2e42(t4) // overflow_x + cmpule t3, t4, t4 + mov 1, t3 + bne t4, 30f + + cmpult t0, t1, t1 + bne t1, 70f + + ldq t4, Under // underflow_x + cmpult t0, t4, t0 + beq t0, 60f + +// +// rejoin normal path +// + +30: mult f16, f1, f1 + ldt f12, 0x48(v0) // load poly coef + ldt f0, Two52 + ldt f13, 0x38(v0) // load poly coef + ldt f15, 0x40(v0) // load poly coef + cpys f16, f0, f10 + ldt f0, 0(v0) + addt f1, f10, f1 + subt f1, f10, f1 // flt_int_N + ldt f10, 8(v0) + mult f1, f0, f0 + mult f1, f10, f10 + subt f16, f0, f0 + addt f0, f10, f11 + mult f11, f11, f14 + mult f12, f11, f12 + mult f13, f11, f11 + ldt f13, 0x30(v0) // load poly coef + mult f14, f14, f17 + addt f15, f12, f12 + addt f13, f11, f11 + cvttqc f1, f15 + mult f17, f12, f12 + mult f14, f11, f11 + stt f15, Temp(sp) + ldq t1, Temp(sp) + and t1, 0x3f, t0 + sll t0, 4, t0 + addt f11, f12, f11 + addl v0, t0, v0 + sra t1, 6, t1 + ldt f17, 0x50(v0) // powers of two + ldt f14, 0x58(v0) + lda t4, 0x3ff(t1) + addt f10, f11, f10 + addt f17, f14, f1 + sll t4, 20, t4 + stl t4, Temp + HighPart(sp) + stl zero, Temp(sp) + ldt f13, Temp(sp) + addt f0, f10, f0 + mult f1, f0, f0 + addt f14, f0, f0 + addt f17, f0, f17 + bne t3, 40f + + mult f17, f13, f0 + br zero, done + +// +// do check +// + +40: stt f17, Temp(sp) + ldl v0, Temp + HighPart(sp) + subq t1, 1, t1 + sll t1, 20, t1 + zapnot v0, 0xf, v0 + ldah t4, 0x7fe0(zero) + addq v0, t1, t0 + mov t0, t3 + cmpult t3, t4, t4 + beq t4, 50f + + ldah t5, 0x10(zero) + addq t3, t5, t5 + stl t5, Temp + HighPart(sp) + ldt f0, Temp(sp) + br zero, done + +// +// must check for abnormals +// + +50: bgt t1, 70f + + ldah t5, 0x350(zero) + addq t3, t5, t5 + blt t5, 60f + + subq v0, t3, v0 + stl zero, Temp(sp) + stl v0, Temp + HighPart(sp) + ldt f15, Temp(sp) + addt f17, f15, f17 + stt f17, Temp(sp) + cmpteq f17, f15, f15 + fbne f15, 60f + + ldl t4, Temp + HighPart(sp) + ldah t3, 0x7ff0(zero) + zapnot t4, 0xf, t4 + subq t4, v0, v0 + stl v0, Temp + HighPart(sp) + and v0, t3, v0 + ldt f16, Temp(sp) + bne v0, retarg + +// +// underflow +// + +60: lda t0, expName + ldah t5, 0x800(zero) + stl t0, ExRec + ErName(sp) + stt f16, ExRec + ErArg0(sp) + lda t5, 0x1f(t5) + stl t5, ExRec + ErErr(sp) + lda v0, ExRec(sp) + bsr ra, __dpml_exception + ldt f0, 0(v0) + br zero, done + +// +// overflow +// + +70: lda t0, expName + ldah t3, 0x800(zero) + stl t0, ExRec + ErName(sp) + stt f16, ExRec + ErArg0(sp) + lda t3, 0x1e(t3) + stl t3, ExRec + ErErr(sp) + lda v0, ExRec(sp) + bsr ra, __dpml_exception + ldt f0, 0(v0) + br zero, done + +// +// nan or inf check +// + +80: stt f16, Temp(sp) + ldl t1, Temp + HighPart(sp) + ldl ra, Temp(sp) + zapnot t1, 0xf, t3 + zapnot ra, 0xf, ra + and t3, t2, t5 + cmpeq t5, t2, t2 + beq t2, 90f + + zapnot t1, 0xf, t3 + ldq v0, Mask52 + and t3, v0, v0 + bis v0, ra, v0 + cmpult zero, v0, v0 + and t2, v0, t2 + bne t2, retarg + +// +// call exception dispatcher for inf +// + +90: lda t4, 1(zero) + sll t4, 31, t4 + mov 0x21, t5 + and t0, t4, t0 + cmoveq t0, 0x20, t5 + ldah t1, 0x800(zero) + bis t5, t1, t1 + stl t1, ExRec + ErErr(sp) + lda ra, expName + stl ra, ExRec + ErName(sp) + lda v0, ExRec(sp) + stt f16, ExRec + ErArg0(sp) + bsr ra, __dpml_exception + ldt f16, 0(v0) + +// +// just return x +// + +retarg: cpys f16, f16, f0 + +// +// Return with result in f0. +// + +done: + lda sp, FrameLength(sp) // deallocate stack frame + ret zero, (t6) // return through saved ra in t6 + + .end exp + + SBTTL("Exponential Helper") + +// +// exp special entry point (for sinh, cosh etc) +// + + NESTED_ENTRY(__dpml_exp_special_entry_point, 0x10, ra) + + lda sp, -0x10(sp) + + PROLOGUE_END + + lda t0, __exp_t_table + ldt f0, 0x10(t0) + ldt f1, Two52 + ldt f12, 0x38(t0) + mult f16, f0, f0 + ldt f11, 0x48(t0) + cpys f16, f1, f10 + ldt f1, 0(t0) + ldt f14, 0x40(t0) + addt f0, f10, f0 + subt f0, f10, f0 + ldt f10, 8(t0) + mult f0, f1, f1 + mult f0, f10, f10 + subt f16, f1, f1 + addt f1, f10, f16 + mult f16, f16, f13 + mult f11, f16, f11 + mult f12, f16, f12 + ldt f16, 0x30(t0) + mult f13, f13, f15 + addt f14, f11, f11 + addt f16, f12, f12 + cvttqc f0, f14 + mult f15, f11, f11 + stt f14, Temp(sp) + mult f13, f12, f12 + ldq v0, Temp(sp) + and v0, 0x3f, t1 + sll t1, 4, t1 + addt f12, f11, f11 + addl t0, t1, t0 + ldt f0, 0x50(t0) + ldt f15, 0x58(t0) + addt f10, f11, f10 + addt f0, f15, f13 + addt f1, f10, f1 + mult f13, f1, f1 + addt f15, f1, f1 + stt f1, 0(a2) + stq v0, 0(a1) + + lda sp, 0x10(sp) + ret zero, (ra) + + .end __dpml_exp_special_entry_point + + .align 3 + .rdata + +Under: .long 0xc0874386 // underflow_x + .long 0x0 + +Two52: .quad 0x4330000000000000 // 2^52 (4503599627370496.0) + +Mask52: .quad 0x000fffffffffffff // 52-bit mantissa mask + +One: .double 1.0 + +// +// Function name for _dpml_exception. +// + +expName: + .ascii "exp\0" + +// +// exp table data +// + + .align 3 + +__exp_t_table: + +// +// misc constants +// + + .quad 0x3f862e42fefa0000 + .quad 0xbd1cf79abc9e3b3a + .quad 0x40571547652b82fe + .quad 0x0000000042b8aa3b + .quad 0x3fe00001ebfbdb81 + .quad 0x3fc55555555551c2 + +// +// poly coefs +// + + .quad 0x3fdfffffffffe5bc + .quad 0x3fc5555555556bd8 + .quad 0x3fa555570aa6fd1d + .quad 0x3f81111111110f6f + +// +// 2^(j/2^L) for j = 0 to 2^L - 1 in hi and lo pieces +// + + .quad 0x3ff0000000000000 + .quad 0x0000000000000000 + .quad 0x3ff02c9a3e778040 + .quad 0x3d007737be56527b + .quad 0x3ff059b0d3158540 + .quad 0x3d0a1d73e2a475b4 + .quad 0x3ff0874518759bc0 + .quad 0x3ce01186be4bb284 + .quad 0x3ff0b5586cf98900 + .quad 0x3ceec5317256e308 + .quad 0x3ff0e3ec32d3d180 + .quad 0x3d010103a1727c57 + .quad 0x3ff11301d0125b40 + .quad 0x3cf0a4ebbf1aed93 + .quad 0x3ff1429aaea92dc0 + .quad 0x3cffb34101943b25 + .quad 0x3ff172b83c7d5140 + .quad 0x3d0d6e6fbe462875 + .quad 0x3ff1a35beb6fcb40 + .quad 0x3d0a9e5b4c7b4968 + .quad 0x3ff1d4873168b980 + .quad 0x3d053c02dc0144c8 + .quad 0x3ff2063b88628cc0 + .quad 0x3cf63b8eeb029509 + .quad 0x3ff2387a6e756200 + .quad 0x3d0c3360fd6d8e0a + .quad 0x3ff26b4565e27cc0 + .quad 0x3cfd257a673281d3 + .quad 0x3ff29e9df51fdec0 + .quad 0x3d009612e8afad12 + .quad 0x3ff2d285a6e40300 + .quad 0x3ce680123aa6da0e + .quad 0x3ff306fe0a31b700 + .quad 0x3cf52de8d5a46305 + .quad 0x3ff33c08b26416c0 + .quad 0x3d0fa64e43086cb3 + .quad 0x3ff371a7373aa9c0 + .quad 0x3ce54e28aa05e8a8 + .quad 0x3ff3a7db34e59fc0 + .quad 0x3d0b750de494cf05 + .quad 0x3ff3dea64c123400 + .quad 0x3d011ada0911f09e + .quad 0x3ff4160a21f72e00 + .quad 0x3d04fc2192dc79ed + .quad 0x3ff44e0860618900 + .quad 0x3d068189b7a04ef8 + .quad 0x3ff486a2b5c13cc0 + .quad 0x3cf013c1a3b69062 + .quad 0x3ff4bfdad5362a00 + .quad 0x3d038ea1cbd7f621 + .quad 0x3ff4f9b2769d2c80 + .quad 0x3d035699ec5b4d50 + .quad 0x3ff5342b569d4f80 + .quad 0x3cbdf0a83c49d86a + .quad 0x3ff56f4736b527c0 + .quad 0x3cfa66ecb004764e + .quad 0x3ff5ab07dd485400 + .quad 0x3d04ac64980a8c8f + .quad 0x3ff5e76f15ad2140 + .quad 0x3ce0dd37c9840732 + .quad 0x3ff6247eb03a5580 + .quad 0x3cd2c7c3e81bf4b6 + .quad 0x3ff6623882552200 + .quad 0x3d024893ecf14dc7 + .quad 0x3ff6a09e667f3bc0 + .quad 0x3ce921165f626cdd + .quad 0x3ff6dfb23c651a00 + .quad 0x3d0779107165f0dd + .quad 0x3ff71f75e8ec5f40 + .quad 0x3d09ee91b8797785 + .quad 0x3ff75feb564267c0 + .quad 0x3ce17edd35467491 + .quad 0x3ff7a11473eb0180 + .quad 0x3cdb5f54408fdb36 + .quad 0x3ff7e2f336cf4e40 + .quad 0x3d01082e815d0abc + .quad 0x3ff82589994cce00 + .quad 0x3cf28acf88afab34 + .quad 0x3ff868d99b4492c0 + .quad 0x3d0640720ec85612 + .quad 0x3ff8ace5422aa0c0 + .quad 0x3cfb5ba7c55a192c + .quad 0x3ff8f1ae99157700 + .quad 0x3d0b15cc13a2e397 + .quad 0x3ff93737b0cdc5c0 + .quad 0x3d027a280e1f92a0 + .quad 0x3ff97d829fde4e40 + .quad 0x3cef173d241f23d1 + .quad 0x3ff9c49182a3f080 + .quad 0x3cf01c7c46b071f2 + .quad 0x3ffa0c667b5de540 + .quad 0x3d02594d6d45c655 + .quad 0x3ffa5503b23e2540 + .quad 0x3cfc8b424491caf8 + .quad 0x3ffa9e6b5579fd80 + .quad 0x3d0fa1f5921deffa + .quad 0x3ffae89f995ad380 + .quad 0x3d06af439a68bb99 + .quad 0x3ffb33a2b84f15c0 + .quad 0x3d0d7b5fe873deca + .quad 0x3ffb7f76f2fb5e40 + .quad 0x3cdbaa9ec206ad4f + .quad 0x3ffbcc1e904bc1c0 + .quad 0x3cf2247ba0f45b3d + .quad 0x3ffc199bdd855280 + .quad 0x3cfc2220cb12a091 + .quad 0x3ffc67f12e57d140 + .quad 0x3ce694426ffa41e5 + .quad 0x3ffcb720dcef9040 + .quad 0x3d048a81e5e8f4a4 + .quad 0x3ffd072d4a078940 + .quad 0x3d0dc68791790d0a + .quad 0x3ffd5818dcfba480 + .quad 0x3cdc976816bad9b8 + .quad 0x3ffda9e603db3280 + .quad 0x3cd5c2300696db53 + .quad 0x3ffdfc97337b9b40 + .quad 0x3cfeb968cac39ed2 + .quad 0x3ffe502ee78b3fc0 + .quad 0x3d0b139e8980a9cc + .quad 0x3ffea4afa2a490c0 + .quad 0x3cf9858f73a18f5d + .quad 0x3ffefa1bee615a00 + .quad 0x3d03bb8fe90d496d + .quad 0x3fff50765b6e4540 + .quad 0x3c99d3e12dd8a18a + .quad 0x3fffa7c1819e90c0 + .quad 0x3cf82e90a7e74b26 + +// +// End of table. +// diff --git a/private/fp32/tran/alpha/fabss.s b/private/fp32/tran/alpha/fabss.s new file mode 100644 index 000000000..e9b8a416e --- /dev/null +++ b/private/fp32/tran/alpha/fabss.s @@ -0,0 +1,62 @@ +// TITLE("Alpha AXP fabs") +//++ +// +// Copyright (c) 1993, 1994 Digital Equipment Corporation +// +// Module Name: +// +// fabs.s +// +// Abstract: +// +// This module implements a high-performance Alpha AXP specific routine +// for IEEE double format fabs. +// +// Author: +// +// Bill Gray +// +// Environment: +// +// User mode. +// +// Revision History: +// +// Thomas Van Baak (tvb) 15-Apr-1994 +// +// Adapted for NT. +// +//-- + +#include "ksalpha.h" + + SBTTL("fabs") + +//++ +// +// double +// fabs ( +// IN double x +// ) +// +// Routine Description: +// +// This function returns the absolute value of the given double argument. +// +// Arguments: +// +// x (f16) - Supplies the argument value. +// +// Return Value: +// +// The double fabs result is returned as the function value in f0. +// +//-- + + LEAF_ENTRY(fabs) + + cpys f31, f16, f0 // clear the sign bit + + ret zero, (ra) + + .end fabs diff --git a/private/fp32/tran/alpha/filter.c b/private/fp32/tran/alpha/filter.c new file mode 100644 index 000000000..13d28c706 --- /dev/null +++ b/private/fp32/tran/alpha/filter.c @@ -0,0 +1,673 @@ +/*++ + +Copyright (c) 1993 Digital Equipment Corporation + +Module Name: + + filter.c + +Abstract: + + This module implements the user interface for IEEE floating point + exception handling using structured exception handling. + +Author: + + Thomas Van Baak (tvb) 24-Aug-1993 + +Environment: + + User mode. + +Revision History: + +--*/ + +#include <fpieee.h> +#include <excpt.h> +#include <nt.h> +#include <alphaops.h> + +// +// Define assembly assist function prototypes. +// + +extern unsigned __int64 _get_fpcr(); +extern unsigned int _get_softfpcr(); +extern void _set_fpcr(unsigned __int64); +extern void _set_softfpcr(unsigned int); + +ULONGLONG +_GetRegisterValue ( + IN ULONG Register, + IN PCONTEXT Context + ); + +VOID +_SetRegisterValue ( + IN ULONG Register, + IN ULONGLONG Value, + IN OUT PCONTEXT Context + ); + +// +// Define forward referenced function prototypes. +// + +static +ULONGLONG +_ConvertSingleOperandToRegister ( + IN ULONG SingleValue + ); + +static +ULONG +_ConvertRegisterToSingleOperand ( + IN ULONGLONG DoubleValue + ); + +// +// The hardware recognizes the CVTST instruction by the kludged +// opcode function 16.2ac instead of the proper 16.00e (ECO #46). +// + +#define CVTST_FUNC_PROPER 0x00E + +// +// Define software FPCR exception enable mask bits. +// +// N.B. The kernel does not restore the TEB-based software FPCR on +// continuation from an exception. Thus if the software FPCR is +// to be updated, it must be set explicitly before returning. +// + +#define SW_FPCR_ENABLE_MASK 0x0000003e + +// +// Define table of IEEE floating point operations and operand formats. +// + +static +struct _OPERATION_TABLE { + ULONG Function; + _FP_OPERATION_CODE OperationCode : 12; + _FPIEEE_FORMAT FormatOperand1 : 4; + _FPIEEE_FORMAT FormatOperand2 : 4; + _FPIEEE_FORMAT FormatResult : 4; + ULONG ValidOperand1 : 1; + ULONG ValidOperand2 : 1; + ULONG ValidResult : 1; +} _OperationTable[] = { + + { ADDS_FUNC, _FpCodeAdd, + _FpFormatFp32, _FpFormatFp32, _FpFormatFp32, TRUE, TRUE, TRUE }, + { ADDT_FUNC, _FpCodeAdd, + _FpFormatFp64, _FpFormatFp64, _FpFormatFp64, TRUE, TRUE, TRUE }, + { SUBS_FUNC, _FpCodeSubtract, + _FpFormatFp32, _FpFormatFp32, _FpFormatFp32, TRUE, TRUE, TRUE }, + { SUBT_FUNC, _FpCodeSubtract, + _FpFormatFp64, _FpFormatFp64, _FpFormatFp64, TRUE, TRUE, TRUE }, + { MULS_FUNC, _FpCodeMultiply, + _FpFormatFp32, _FpFormatFp32, _FpFormatFp32, TRUE, TRUE, TRUE }, + { MULT_FUNC, _FpCodeMultiply, + _FpFormatFp64, _FpFormatFp64, _FpFormatFp64, TRUE, TRUE, TRUE }, + { DIVS_FUNC, _FpCodeDivide, + _FpFormatFp32, _FpFormatFp32, _FpFormatFp32, TRUE, TRUE, TRUE }, + { DIVT_FUNC, _FpCodeDivide, + _FpFormatFp64, _FpFormatFp64, _FpFormatFp64, TRUE, TRUE, TRUE }, + + { CVTLQ_FUNC, _FpCodeConvert, + _FpFormatFp64, _FpFormatI32, _FpFormatI64, FALSE, TRUE, TRUE }, + { CVTQL_FUNC, _FpCodeConvert, + _FpFormatFp64, _FpFormatI64, _FpFormatI32, FALSE, TRUE, TRUE }, + { CVTQS_FUNC, _FpCodeConvert, + _FpFormatFp64, _FpFormatI64, _FpFormatFp32, FALSE, TRUE, TRUE }, + { CVTQT_FUNC, _FpCodeConvert, + _FpFormatFp64, _FpFormatI64, _FpFormatFp64, FALSE, TRUE, TRUE }, + { CVTST_FUNC, _FpCodeConvert, + _FpFormatFp64, _FpFormatFp32, _FpFormatFp64, FALSE, TRUE, TRUE }, + { CVTTQ_FUNC, _FpCodeConvert, + _FpFormatFp64, _FpFormatFp64, _FpFormatI64, FALSE, TRUE, TRUE }, + { CVTTS_FUNC, _FpCodeConvert, + _FpFormatFp64, _FpFormatFp64, _FpFormatFp32, FALSE, TRUE, TRUE }, + + { CMPTEQ_FUNC, _FpCodeCompare, + _FpFormatFp64, _FpFormatFp64, _FpFormatCompare, TRUE, TRUE, TRUE }, + { CMPTLE_FUNC, _FpCodeCompare, + _FpFormatFp64, _FpFormatFp64, _FpFormatCompare, TRUE, TRUE, TRUE }, + { CMPTLT_FUNC, _FpCodeCompare, + _FpFormatFp64, _FpFormatFp64, _FpFormatCompare, TRUE, TRUE, TRUE }, + { CMPTUN_FUNC, _FpCodeCompare, + _FpFormatFp64, _FpFormatFp64, _FpFormatCompare, TRUE, TRUE, TRUE }, +}; +#define OPERATION_COUNT (sizeof(_OperationTable) / sizeof(_OperationTable[0])) +#define OperationTableLimit (&_OperationTable[OPERATION_COUNT]) + +int +_fpieee_flt ( + IN ULONG ExceptionCode, + IN PEXCEPTION_POINTERS ExceptionPointers, + IN int Handler(_FPIEEE_RECORD *) + ) + +/*++ + +Routine Description: + + This function is called from the exception filter of a try-except to + determine if the exception is a precise, resumable IEEE floating point + exception. If so, it invokes the user's trap handler with all information + necessary to analyze the trapping instruction and its operands. The user's + trap handler may choose to handle the exception or it may modify the result + value of the trapping instruction and request that execution be continued. + +Arguments: + + ExceptionCode - Supplies the exception code value that is obtained from + GetExceptionCode(). + + ExceptionPointers - Supplies a pointer to the exception pointers structure + that is obtained from GetExceptionInformation(). + + Handler - Supplies a pointer to the user supplied IEEE floating point + exception handler function. + +Return Value: + + If the exception is not a precise, resumable IEEE floating point exception + then EXCEPTION_CONTINUE_SEARCH is returned as the function value. + Otherwise, the disposition value returned by the user exception handler is + returned as the function value. + +--*/ + +{ + + PCONTEXT Context; + LONG Disposition; + PULONG ExceptionInformation; + PEXCEPTION_RECORD ExceptionRecord; + ULONG Function; + _FPIEEE_RECORD IeeeRecord; + ALPHA_INSTRUCTION Instruction; + LARGE_INTEGER Li; + ULONG Longword; + ULONG SoftFpcrValue; + PSW_FPCR SoftwareFpcr; + struct _OPERATION_TABLE *Table; + + // + // If the exception is not a floating point exception, then return the + // disposition to continue the search for another handler. + // + + if ((ExceptionCode != STATUS_FLOAT_DIVIDE_BY_ZERO) && + (ExceptionCode != STATUS_FLOAT_INEXACT_RESULT) && + (ExceptionCode != STATUS_FLOAT_INVALID_OPERATION) && + (ExceptionCode != STATUS_FLOAT_OVERFLOW) && + (ExceptionCode != STATUS_FLOAT_UNDERFLOW)) { + return EXCEPTION_CONTINUE_SEARCH; + } + + // + // If the exception record has one parameter and that parameter is + // nonzero, then assume the exception is a software generated exception. + // + // N.B. This convention is used to distinguish hardware generated + // exceptions from software generated exceptions. For Alpha AXP + // no hardware/kernel generated exceptions have exactly one + // parameter. + // + + Context = ExceptionPointers->ContextRecord; + ExceptionRecord = ExceptionPointers->ExceptionRecord; + ExceptionInformation = ExceptionRecord->ExceptionInformation; + if ((ExceptionRecord->NumberParameters == 1) && + (ExceptionInformation[0] != 0)) { + + // + // Mask all exceptions, call the user exception handler with the + // pointer to the software IEEE exception record, restore the + // exception mask, and return the handler disposition value. + // + + SoftFpcrValue = _get_softfpcr(); + _set_softfpcr(SoftFpcrValue & ~SW_FPCR_ENABLE_MASK); + + Disposition = Handler((_FPIEEE_RECORD *)(ExceptionInformation[0])); + + _set_softfpcr(SoftFpcrValue); + return Disposition; + } + + // + // If the exception record is not a 6 word IEEE exception record, then the + // floating point exception is probably a `high-performance' exception. + // Return the disposition to continue the search for another handler. + // + // A user handler function for operand or result fixup cannot be invoked + // for these exceptions because in general the trapping instruction of an + // imprecise exception cannot be precisely located. + // + // N.B. Code that requires precise exceptions is compiled with various + // IEEE options. This results in instructions with the /S qualifier + // bit set and instruction sequences that follow trap shadow rules. + // In this case imprecise exceptions are converted to precise + // exceptions by a kernel trap handler. + // + + if (ExceptionRecord->NumberParameters != 6) { + return EXCEPTION_CONTINUE_SEARCH; + } + + Instruction.Long = *((PULONG)(ExceptionRecord->ExceptionAddress)); + Function = Instruction.FpOp.Function; + if (Instruction.FpOp.Opcode == IEEEFP_OP) { + + // + // Adjust the function code if the instruction is CVTST. + // + + if (Function == CVTST_FUNC) { + Function = CVTST_FUNC_PROPER; + + } else if (Function == CVTST_S_FUNC) { + Function = CVTST_FUNC_PROPER | FP_TRAP_ENABLE_S; + } + } + + // + // Set floating point instruction operation and operand format codes. + // + + for (Table = &_OperationTable[0]; Table < OperationTableLimit; Table += 1) { + if ((Function & FP_FUNCTION_MASK) == Table->Function) { + IeeeRecord.Operation = Table->OperationCode; + IeeeRecord.Operand1.Format = Table->FormatOperand1; + IeeeRecord.Operand1.OperandValid = Table->ValidOperand1; + IeeeRecord.Operand2.Format = Table->FormatOperand2; + IeeeRecord.Operand2.OperandValid = Table->ValidOperand2; + IeeeRecord.Result.Format = Table->FormatResult; + IeeeRecord.Result.OperandValid = Table->ValidResult; + break; + } + } + if (Table == OperationTableLimit) { + + // + // The instruction was not recognized. This cannot happen if the + // operation table is complete and if the kernel is raising proper + // precise IEEE exceptions. Just set the unspecified operation code. + // + + IeeeRecord.Operation = _FpCodeUnspecified; + IeeeRecord.Operand1.Format = _FpFormatFp64; + IeeeRecord.Operand1.OperandValid = TRUE; + IeeeRecord.Operand2.Format = _FpFormatFp64; + IeeeRecord.Operand2.OperandValid = TRUE; + IeeeRecord.Result.Format = _FpFormatFp64; + IeeeRecord.Result.OperandValid = TRUE; + } + + // + // Set source operand values. + // + + if (IeeeRecord.Operand1.OperandValid != FALSE) { + Li.QuadPart = _GetRegisterValue(Instruction.FpOp.Fa + 32, Context); + switch (IeeeRecord.Operand1.Format) { + case _FpFormatFp32 : + case _FpFormatI32 : + case _FpFormatU32 : + Longword = _ConvertRegisterToSingleOperand(Li.QuadPart); + IeeeRecord.Operand1.Value.U32Value = Longword; + break; + + default : + IeeeRecord.Operand1.Value.U64Value.W[0] = Li.LowPart; + IeeeRecord.Operand1.Value.U64Value.W[1] = Li.HighPart; + break; + } + } + + if (IeeeRecord.Operand2.OperandValid != FALSE) { + Li.QuadPart = _GetRegisterValue(Instruction.FpOp.Fb + 32, Context); + switch (IeeeRecord.Operand2.Format) { + case _FpFormatFp32 : + case _FpFormatI32 : + case _FpFormatU32 : + Longword = _ConvertRegisterToSingleOperand(Li.QuadPart); + IeeeRecord.Operand2.Value.U32Value = Longword; + break; + + default : + IeeeRecord.Operand2.Value.U64Value.W[0] = Li.LowPart; + IeeeRecord.Operand2.Value.U64Value.W[1] = Li.HighPart; + break; + } + } + + // + // Set result operand value. + // + // The kernel generates the following IEEE exception record information: + // + // ExceptionInformation[0] 0 + // ExceptionInformation[1] Fir (Exception PC + 4) + // ExceptionInformation[2] \ + // ExceptionInformation[3] \ Computed IEEE masked result + // ExceptionInformation[4] / ( _FPIEEE_VALUE ) + // ExceptionInformation[5] / + // + + IeeeRecord.Result.Value.U64Value.W[0] = ExceptionInformation[2]; + IeeeRecord.Result.Value.U64Value.W[1] = ExceptionInformation[3]; + + // + // Set rounding mode. + // + + switch (((PFPCR)&Context->Fpcr)->DynamicRoundingMode) { + case ROUND_TO_NEAREST : + IeeeRecord.RoundingMode = _FpRoundNearest; + break; + + case ROUND_TO_PLUS_INFINITY : + IeeeRecord.RoundingMode = _FpRoundPlusInfinity; + break; + + case ROUND_TO_MINUS_INFINITY : + IeeeRecord.RoundingMode = _FpRoundMinusInfinity; + break; + + case ROUND_TO_ZERO : + IeeeRecord.RoundingMode = _FpRoundChopped; + break; + } + + // + // Set Precision (but not applicable to Alpha). + // + + IeeeRecord.Precision = _FpPrecision53; + + // + // Set IEEE sticky status bits. + // + + SoftwareFpcr = (PSW_FPCR)&Context->SoftFpcr; + + IeeeRecord.Status.Inexact = SoftwareFpcr->StatusInexact; + IeeeRecord.Status.Underflow = SoftwareFpcr->StatusUnderflow; + IeeeRecord.Status.Overflow = SoftwareFpcr->StatusOverflow; + IeeeRecord.Status.ZeroDivide = SoftwareFpcr->StatusDivisionByZero; + IeeeRecord.Status.InvalidOperation = SoftwareFpcr->StatusInvalid; + + // + // Set IEEE exception enable bits. + // + + IeeeRecord.Enable.Inexact = SoftwareFpcr->EnableInexact; + IeeeRecord.Enable.Underflow = SoftwareFpcr->EnableUnderflow; + IeeeRecord.Enable.Overflow = SoftwareFpcr->EnableOverflow; + IeeeRecord.Enable.ZeroDivide = SoftwareFpcr->EnableDivisionByZero; + IeeeRecord.Enable.InvalidOperation = SoftwareFpcr->EnableInvalid; + + // + // Set IEEE exception cause bits. + // + + IeeeRecord.Cause.Inexact = (ExceptionCode == STATUS_FLOAT_INEXACT_RESULT); + IeeeRecord.Cause.Underflow = (ExceptionCode == STATUS_FLOAT_UNDERFLOW); + IeeeRecord.Cause.Overflow = (ExceptionCode == STATUS_FLOAT_OVERFLOW); + IeeeRecord.Cause.ZeroDivide = (ExceptionCode == STATUS_FLOAT_DIVIDE_BY_ZERO); + IeeeRecord.Cause.InvalidOperation = (ExceptionCode == STATUS_FLOAT_INVALID_OPERATION); + + // + // Mask all exceptions, call the user exception handler with a pointer + // to the hardware IEEE exception record, and check the return disposition + // value. If execution is to be continued, then update the hardware + // register context from the exception record result operand value. The + // default IEEE exception result value is orginally computed by the kernel + // and may be altered the user handler. + // + + SoftFpcrValue = _get_softfpcr(); + _set_softfpcr(SoftFpcrValue & ~SW_FPCR_ENABLE_MASK); + + Disposition = Handler(&IeeeRecord); + + if (Disposition == EXCEPTION_CONTINUE_EXECUTION) { + + // + // Use the kernel calculated continuation address. + // + + Context->Fir = (ULONGLONG)(LONG)ExceptionInformation[1]; + + // + // Convert the updated result value based on its format and copy + // to the hardware result register. + // + + switch (IeeeRecord.Result.Format) { + + // + // Translate logical compare result values to canonical floating + // point truth values. + // + + case _FpFormatCompare : + switch (IeeeRecord.Result.Value.CompareValue) { + case _FpCompareEqual : + switch (Function & FP_FUNCTION_MASK) { + case CMPTEQ_FUNC : + case CMPTLE_FUNC : + Li.QuadPart = FP_COMPARE_TRUE; + break; + + default : + Li.QuadPart = FP_COMPARE_FALSE; + } + break; + + case _FpCompareLess : + switch (Function & FP_FUNCTION_MASK) { + case CMPTLT_FUNC : + Li.QuadPart = FP_COMPARE_TRUE; + break; + + default : + Li.QuadPart = FP_COMPARE_FALSE; + } + break; + + case _FpCompareGreater : + Li.QuadPart = FP_COMPARE_FALSE; + break; + + case _FpCompareUnordered : + switch (Function & FP_FUNCTION_MASK) { + case CMPTUN_FUNC : + Li.QuadPart = FP_COMPARE_TRUE; + break; + + default : + Li.QuadPart = FP_COMPARE_FALSE; + } + break; + } + break; + + // + // Convert 32-bit data formats to floating point register formats. + // + + case _FpFormatFp32 : + case _FpFormatI32 : + case _FpFormatU32 : + Longword = IeeeRecord.Result.Value.U32Value; + Li.QuadPart = _ConvertSingleOperandToRegister(Longword); + break; + + default : + Li.LowPart = IeeeRecord.Result.Value.U64Value.W[0]; + Li.HighPart = IeeeRecord.Result.Value.U64Value.W[1]; + break; + } + + _SetRegisterValue(Instruction.FpOp.Fc + 32, Li.QuadPart, Context); + + // + // Make changes in the floating point environment to take effect + // on continuation. The user is allowed to change the rounding mode, + // the exception mask, and the precision (not applicable to Alpha). + // + + switch (IeeeRecord.RoundingMode) { + case _FpRoundNearest : + ((PFPCR)&Context->Fpcr)->DynamicRoundingMode = ROUND_TO_NEAREST; + break; + + case _FpRoundChopped : + ((PFPCR)&Context->Fpcr)->DynamicRoundingMode = ROUND_TO_ZERO; + break; + + case _FpRoundPlusInfinity : + ((PFPCR)&Context->Fpcr)->DynamicRoundingMode = ROUND_TO_PLUS_INFINITY; + break; + + case _FpRoundMinusInfinity : + ((PFPCR)&Context->Fpcr)->DynamicRoundingMode = ROUND_TO_MINUS_INFINITY; + break; + } + + SoftwareFpcr->EnableInexact = IeeeRecord.Enable.Inexact; + SoftwareFpcr->EnableUnderflow = IeeeRecord.Enable.Underflow; + SoftwareFpcr->EnableOverflow = IeeeRecord.Enable.Overflow; + SoftwareFpcr->EnableDivisionByZero = IeeeRecord.Enable.ZeroDivide; + SoftwareFpcr->EnableInvalid = IeeeRecord.Enable.InvalidOperation; + + // + // Update the saved software FPCR with the new value. + // + + SoftFpcrValue = (ULONG)Context->SoftFpcr; + } + + _set_softfpcr(SoftFpcrValue); + return Disposition; +} + +// +// Define single and double IEEE floating point memory formats. +// + +typedef struct _DOUBLE_FORMAT { + ULONGLONG Mantissa : 52; + ULONG Exponent : 11; + ULONG Sign : 1; +} DOUBLE_FORMAT, *PDOUBLE_FORMAT; + +typedef struct _SINGLE_FORMAT { + ULONG Mantissa : 23; + ULONG Exponent : 8; + ULONG Sign : 1; +} SINGLE_FORMAT, *PSINGLE_FORMAT; + +ULONGLONG +_ConvertSingleOperandToRegister ( + IN ULONG SingleValue + ) + +/*++ + +Routine Description: + + This function converts a 32-bit single format floating point value to + the 64-bit, double format used within floating point registers. Alpha + floating point registers are 64-bits wide and single format values are + transformed to 64-bits when stored or loaded from memory. + +Arguments: + + SingleValue - Supplies the 32-bit single operand value as an integer. + +Return Value: + + The 64-bit register format operand value is returned as the function + value. + +--*/ + +{ + PDOUBLE_FORMAT DoubleFormat; + ULONGLONG Result; + PSINGLE_FORMAT SingleFormat; + + SingleFormat = (PSINGLE_FORMAT)&SingleValue; + DoubleFormat = (PDOUBLE_FORMAT)&Result; + + DoubleFormat->Sign = SingleFormat->Sign; + DoubleFormat->Mantissa = ((ULONGLONG)SingleFormat->Mantissa) << (52 - 23); + if (SingleFormat->Exponent == SINGLE_MAXIMUM_EXPONENT) { + DoubleFormat->Exponent = DOUBLE_MAXIMUM_EXPONENT; + + } else if (SingleFormat->Exponent == SINGLE_MINIMUM_EXPONENT) { + DoubleFormat->Exponent = DOUBLE_MINIMUM_EXPONENT; + + } else { + DoubleFormat->Exponent = SingleFormat->Exponent - SINGLE_EXPONENT_BIAS + + DOUBLE_EXPONENT_BIAS; + } + + return Result; +} + +ULONG +_ConvertRegisterToSingleOperand ( + IN ULONGLONG DoubleValue + ) + +/*++ + +Routine Description: + + This function converts the 64-bit, double format floating point value + used within the floating point registers to a 32-bit, single format + floating point value. + +Arguments: + + DoubleValue - Supplies the 64-bit double operand value as an integer. + +Return Value: + + The 32-bit register format operand value is returned as the function + value. + +--*/ + +{ + PDOUBLE_FORMAT DoubleFormat; + ULONG Result; + PSINGLE_FORMAT SingleFormat; + + SingleFormat = (PSINGLE_FORMAT)&Result; + DoubleFormat = (PDOUBLE_FORMAT)&DoubleValue; + + SingleFormat->Sign = DoubleFormat->Sign; + SingleFormat->Mantissa = DoubleFormat->Mantissa >> (52 - 23); + if (DoubleFormat->Exponent == DOUBLE_MAXIMUM_EXPONENT) { + SingleFormat->Exponent = SINGLE_MAXIMUM_EXPONENT; + + } else if (DoubleFormat->Exponent == DOUBLE_MINIMUM_EXPONENT) { + SingleFormat->Exponent = SINGLE_MINIMUM_EXPONENT; + + } else { + SingleFormat->Exponent = DoubleFormat->Exponent - DOUBLE_EXPONENT_BIAS + + SINGLE_EXPONENT_BIAS; + } + + return Result; +} diff --git a/private/fp32/tran/alpha/floors.s b/private/fp32/tran/alpha/floors.s new file mode 100644 index 000000000..8406e6b17 --- /dev/null +++ b/private/fp32/tran/alpha/floors.s @@ -0,0 +1,161 @@ +// TITLE("Alpha AXP floor") +//++ +// +// Copyright (c) 1993, 1994 Digital Equipment Corporation +// +// Module Name: +// +// floor.s +// +// Abstract: +// +// This module implements a high-performance Alpha AXP specific routine +// for IEEE double format floor. +// +// Author: +// +// Bill Gray +// +// Environment: +// +// User mode. +// +// Revision History: +// +// Thomas Van Baak (tvb) 15-Apr-1994 +// +// Adapted for NT. +// +//-- + +#include "ksalpha.h" + + +// +// Define stack frame. +// + + .struct 0 +Temp: .space 8 // save argument + .space 8 // for 16-byte stack alignment +FrameLength: + +// +// Define lower and upper 32-bit parts of 64-bit double. +// + +#define LowPart 0x0 +#define HighPart 0x4 + + SBTTL("floor") + +//++ +// +// double +// floor ( +// IN double x +// ) +// +// Routine Description: +// +// This function returns the floor of the given double argument. +// +// Arguments: +// +// x (f16) - Supplies the argument value. +// +// Return Value: +// +// The double floor result is returned as the function value in f0. +// +//-- + + NESTED_ENTRY(floor, FrameLength, ra) + + lda sp, -FrameLength(sp) // allocate stack frame + + PROLOGUE_END + + ldah t0, 0x7ff0(zero) // exp mask + ldt f0, two_to_52 // get big + ldah t1, 0x10(zero) // one in exp field + stt f16, Temp(sp) + ldl v0, Temp + HighPart(sp) + cpys f16, f0, f1 // fix sign of big + cpys f16, f16, f0 + and v0, t0, t0 + subl t0, t1, t0 + ldt f10, neg_one + ldah t1, 0x4320(zero) // cutoff value + cmpult t0, t1, t0 + beq t0, quick_out + +// Add big, sub big to round to int. + + addt f16, f1, f11 + subt f11, f1, f1 + cmptlt f0, f1, f0 + fbeq f0, it_rounded_down + +// It rounded up so subtract one. + + ldt f10, one + subt f1, f10, f1 + +it_rounded_down: + cpys f1, f1, f0 + br zero, done + + +// Value is abnormal (or too big). +// If it is zero or denorm, figure out +// whether to return 0.0 or -1.0 -- if +// value is too big, just return it. + +quick_out: + ldah t1, 0x7ff0(zero) + ldah t2, -0x8000(zero) + and v0, t1, t0 + and v0, t2, v0 + bne t0, ret_arg + ldah t0, 0x10(zero) + beq v0, ret_zero + stt f16, Temp(sp) + ldl v0, Temp(sp) + lda t0, -1(t0) + ldl t2, Temp + HighPart(sp) + cpys f10, f10, f16 + and t2, t0, t0 + bis t0, v0, v0 + and t2, t1, t1 + cmpult zero, v0, v0 + cmpeq t1, zero, t1 + beq t1, ret_zero + and t1, v0, t1 + beq t1, ret_zero + br zero, ret_arg + +ret_zero: + cpys f31, f31, f16 + +ret_arg: + cpys f16, f16, f0 + +done: + lda sp, FrameLength(sp) // deallocate stack frame + + ret zero, (ra) + + .end floor + + .align 3 + .rdata + +one: + .quad 0x3ff0000000000000 // 1.0 + +neg_one: + .quad 0xbff0000000000000 // -(1.0) + +two_to_52: + .quad 0x4330000000000000 // 2^52 (4503599627370496.0) diff --git a/private/fp32/tran/alpha/fmods.s b/private/fp32/tran/alpha/fmods.s new file mode 100644 index 000000000..f14a98bfc --- /dev/null +++ b/private/fp32/tran/alpha/fmods.s @@ -0,0 +1,518 @@ +// TITLE("Alpha AXP fmod") +//++ +// +// Copyright (c) 1993, 1994 Digital Equipment Corporation +// +// Module Name: +// +// fmod.s +// +// Abstract: +// +// This module implements a high-performance Alpha AXP specific routine +// for IEEE double format fmod. +// +// Author: +// +// (ajg) 1-Nov-1991. +// +// Environment: +// +// User mode. +// +// Revision History: +// +// Thomas Van Baak (tvb) 15-Apr-1994 +// +// Adapted for NT. +// +//-- + +#include "ksalpha.h" + +// +// Define DPML exception record for NT. +// + + .struct 0 +ErErr: .space 4 // error code +ErCxt: .space 4 // context +ErPlat: .space 4 // platform +ErEnv: .space 4 // environment +ErRet: .space 4 // return value pointer +ErName: .space 4 // function name +ErType: .space 8 // flags and fill +ErVal: .space 8 // return value +ErArg0: .space 8 // arg 0 +ErArg1: .space 8 // arg 1 +ErArg2: .space 8 // arg 2 +ErArg3: .space 8 // arg 3 +DpmlExceptionLength: + +// +// Define stack frame. +// + + .struct 0 +SaveRa: .space 8 +Temp0: .space 8 // to extract exponent from p, etc +Temp1: .space 8 // to extract exponent from q, etc +ExRec: .space DpmlExceptionLength // exception record + .space 0 // for 16-byte stack alignment +FrameLength: + + +// +// Define lower and upper 32-bit parts of 64-bit double. +// + +#define LowPart 0x0 +#define HighPart 0x4 + + SBTTL("fmod") + +//++ +// +// double +// fmod ( +// IN double x +// IN double y +// ) +// +// Routine Description: +// +// This function returns the modulus of the two given double arguments. +// The fmod function is an exactly computable function defined by: +// +// mod(p, q) = p - trunc(p/q)*q +// +// Arguments: +// +// x (f16) - Supplies the dividend value. +// y (f17) - Supplies the divisor value. +// +// Return Value: +// +// The double modulus result is returned as the function value in f0. +// +//-- + + NESTED_ENTRY(fmod, FrameLength, ra) + + lda sp, -FrameLength(sp) // allocate stack frame + stq ra, SaveRa(sp) + + PROLOGUE_END + + ldah t1, 0x7ff0(zero) // Load exponent mask + stq zero, Temp0(sp) + + stt f16, Temp0(sp) + stt f17, Temp1(sp) + ldl t0, Temp0 + HighPart(sp) // Get exp field of p + ldl v0, Temp1 + HighPart(sp) // Get exp field of q + and t0, t1, t0 + xor t0, t1, t2 + and v0, t1, v0 + beq t2, p_NaN_inf // Goto p_NaN_inf if all 1s + + xor v0, t1, t3 + cpys f16, f16, f0 // Copy to f0 for return + subl t0, v0, t0 + beq t3, q_NaN_inf // Goto q_NaN_inf if all 1s + + bge t0, not_quick // A quick check to see if |p| < |q| + + br zero, return // If so, just return + +not_quick: + ldah t2, 0x350(zero) + ldah t3, 0x340(zero) + cmple v0, t2, t2 + cmplt t0, t3, t3 + bne t2, small_q // If q is small, goto small_q + // to avoid underflow/denorms + + ldah t2, 0x1a0(zero) + beq t3, general_case // If exponent difference is not small enough, + // go do the looping general case + + cmplt t0, t2, t2 + divtc f16, f17, f0 // r = p/q + ldah t3, -0x400(zero) + beq t2, r_is_big // Can we do a quick extended mul & sub? + + stt f17, Temp0(sp) // Yes, the ratio fits in an integer + ldl t2, Temp0(sp) + and t2, t3, t2 + stl t2, Temp0(sp) + cvttqc f0, f1 // Integerize the ratio + cvtqt f1, f0 + ldt f1, Temp0(sp) + subt f17, f1, f10 // Split into high and low + mult f0, f1, f1 // Multiply by the integerized ratio + mult f0, f10, f0 + subt f16, f1, f1 // Subtract the pieces, in this order + subt f1, f0, f0 + br zero, return + +// +// We need to convert r to an extended precision integer. +// + +r_is_big: + stt f17, Temp0(sp) + ldt f10, int_con + ldl t3, Temp0(sp) + ldah t4, -0x400(zero) + cpys f0, f10, f11 + ldt f10, denorm_con + and t3, t4, t3 + stl t3, Temp0(sp) + addtc f0, f11, f1 // Add big + subt f1, f11, f1 // Subtract big + cpys f1, f10, f0 + ldt f10, Temp0(sp) + subt f17, f10, f17 // yhi = (y + 2^(p-1+k)) - 2^(p-1+k) + addt f1, f0, f11 + subt f11, f0, f0 + mult f0, f10, f11 // Extended precision multiply + subt f1, f0, f1 + mult f0, f17, f0 + subt f16, f11, f11 // ... and subtract + mult f1, f10, f10 + mult f1, f17, f1 + subt f11, f0, f0 + subt f0, f10, f0 + subt f0, f1, f0 // and done + br zero, return + +small_q: ble v0, quick_small_q // If exp_q is > 0, use the general case + +general_case: + cmptlt f16, f31, f11 // Set flag if p is negative + mov zero, t2 + fbeq f11, small_q_rejoin_general_case + mov 1, t2 + br zero, small_q_rejoin_general_case + +quick_small_q: + cmptlt f16, f31, f1 // Capture sign of p + mov zero, t2 + ldt f10, int_con + cpyse f10, f17, f0 // q, normalized + subt f0, f10, f17 + stt f17, Temp0(sp) + fbeq f1, 30f + mov 1, t2 // p is negative +30: ldl t4, Temp0 + HighPart(sp) + ldah t3, 0x4320(zero) + and t4, t1, t4 + subl t4, t3, t4 + mov t4, v0 + fbeq f17, q_is_zero // Can't divide by 0 + cpyse f10, f16, f11 + bne t0, small_q_done // If c == 0, q is denormal too + + subt f11, f10, f16 // Ayup, q is denormal, too + stt f16, Temp0(sp) + ldl t5, Temp0 + HighPart(sp) // Check for p == 0. (in case) + fbeq f16, done + and t5, t1, t5 + subl t5, t3, t3 + mov t3, t0 + cmplt t0, v0, t5 + beq t5, small_q_done + + stt f16, Temp0(sp) + ldl v0, Temp0 + HighPart(sp) + stt f31, Temp1(sp) + ldl t6, Temp1 + HighPart(sp) + ldah t3, 0x10(zero) + ldah t5, 0x4000(zero) + lda t3, -1(t3) + zapnot v0, 7, v0 + subl t5, t0, t0 + and t6, t3, t3 + ldah t5, 0x3ff0(zero) + bis t3, t0, t3 + bis v0, t5, v0 + stl t3, Temp1 + HighPart(sp) + stl v0, Temp0 + HighPart(sp) + ldt f1, Temp0(sp) + ldt f0, Temp1(sp) + addt f1, f0, f0 + beq t2, 40f // Need to negate p? + cpysn f0, f0, f0 +40: // Now rescale p + stt f0, Temp0(sp) + ldl t5, Temp0 + HighPart(sp) + subl t5, t0, t0 // Reduce c by q's exponent + stl t0, Temp0 + HighPart(sp) + ldt f16, Temp0(sp) + cpys f16, f16, f0 + br zero, return + +small_q_done: + subl t0, v0, t0 // Adjust c by q's exponent, + // and fall into ... + +small_q_rejoin_general_case: + stt f16, Temp0(sp) // Normalize p and q + stt f17, Temp1(sp) + ldl t3, Temp0 + HighPart(sp) + ldl t6, Temp1 + HighPart(sp) + ldah t4, 0x10(zero) + lda t4, -1(t4) + and t3, t4, t3 + ldah t5, 0x4330(zero) + and t6, t4, t6 + addl t3, t5, t3 + stl t3, Temp0 + HighPart(sp) + addl t6, t5, t6 + stl t6, Temp1 + HighPart(sp) + ldt f16, Temp0(sp) + ldt f10, Temp1(sp) + cmptle f10, f16, f11 // If p >= q, then p -= q + fbeq f11, 50f + subt f16, f10, f16 + fbeq f16, done // If that makes p == 0, goto done +50: stt f10, Temp0(sp) // Convert q to extended + ldl a0, Temp0(sp) + ldah t3, -0x400(zero) + and a0, t3, t3 + stl t3, Temp0(sp) + ldah a0, 0x340(zero) + ldt f1, Temp0(sp) + subt f10, f1, f0 // High and low + +// +// Here's the dreaded loop, bane of good fmod implemetors around the world, +// and all-too-often omitted by mediocre fmod implemetors. +// + +dread_loop: + subl t0, a0, t0 // Reduce c + blt t0, end_of_loop // End of loop? + + stt f16, Temp0(sp) // Scale p + ldl t7, Temp0 + HighPart(sp) + ldah t6, 0x340(zero) + addl t7, t6, t6 + stl t6, Temp0 + HighPart(sp) + ldt f11, Temp0(sp) + ldt f12, int_con + divtc f11, f10, f16 // r = p/q + addtc f16, f12, f13 // Add big + ldt f16, denorm_con + subt f13, f12, f12 // Subtract big + cpys f12, f16, f14 + addt f12, f14, f13 // Split q into hi & lo, too + subt f13, f14, f13 + mult f13, f1, f16 // Extended multiply + subt f12, f13, f12 + mult f13, f0, f13 + subt f11, f16, f11 // And subtract + mult f12, f1, f14 + mult f12, f0, f12 + subt f11, f13, f11 + subt f11, f14, f11 + subt f11, f12, f16 + fbne f16, dread_loop // Continue looping ... + cpys f16, f16, f0 // ... unless p == 0, in which case, ... + br zero, return // ... return p + +// +// We may need one additional iteration. Fortunately, this one can use +// the faster scaling for p. +// + +end_of_loop: + addl t0, a0, t0 // Bump c back up + beq t0, almost_done // And unless it was zero, ... + + stt f16, Temp0(sp) + ldl t6, Temp0 + HighPart(sp) + addl t6, t0, t0 + stl t0, Temp0 + HighPart(sp) + ldt f13, Temp0(sp) + ldt f11, int_con + divtc f13, f10, f14 + ldt f16, denorm_con + addtc f14, f11, f12 + subt f12, f11, f11 + cpys f11, f16, f14 + addt f11, f14, f12 + subt f12, f14, f12 + mult f12, f1, f16 + subt f11, f12, f11 + mult f12, f0, f12 + subt f13, f16, f13 + mult f11, f1, f1 + mult f11, f0, f0 + subt f13, f12, f12 + subt f12, f1, f1 + subt f1, f0, f16 + +// +// Here, manage the final niggling details: +// (a) Make a final check for p == 0, +// (b) Compute the exponent field of the result, +// (c) Check for underflow of the result, +// (d) And give it the sign of p. +almost_done: + fbeq f16, done // p == 0? + stt f16, Temp0(sp) + ldl t6, Temp0 + HighPart(sp) + ldah t0, -0x10(zero) + and t6, t0, t0 // Isolate exponent of p + addl t0, v0, v0 // Add in c + subl v0, t5, v0 // Subtract the 'biased denorm' + ble v0, gen_underflow_or_denormal // Branch if exponent <= 0 + + stt f16, Temp0(sp) + ldl a0, Temp0 + HighPart(sp) + and a0, t4, t4 + bis t4, v0, v0 + stl v0, Temp0 + HighPart(sp) + ldt f16, Temp0(sp) + beq t2, done // Was input p >= 0? + + cpysn f16, f16, f0 // No; so return -p. + br zero, return + +// +// Exceptions +// + +gen_underflow_or_denormal: + lda t0, fmodName + stl t0, ExRec + ErName(sp) + ldah t4, 0x800(zero) + stt f16, ExRec + ErArg0(sp) + stt f10, ExRec + ErArg1(sp) + lda t4, 0x38(t4) // MOD_REM_UNDERFLOW + stl t4, ExRec + ErErr(sp) + lda v0, ExRec(sp) + bsr ra, __dpml_exception + ldt f16, 0(v0) + +// +// Return p, which is in f16 +// + +done: cpys f16, f16, f0 + br zero, return + + +q_is_zero: + lda t3, fmodName + stl t3, ExRec + ErName(sp) + ldah t5, 0x800(zero) + stt f16, ExRec + ErArg0(sp) + stt f17, ExRec + ErArg1(sp) + lda t5, 0x39(t5) // MOD_REM_BY_ZERO + stl t5, ExRec + ErErr(sp) + br zero, call_exception + +// +// If q is a NaN, return q. +// If q is Inf, return p (p mod Inf = p). +// + +q_NaN_inf: + stt f17, Temp0(sp) + ldl t6, Temp0(sp) + ldah t2, 0x10(zero) + ldl t4, Temp0 + HighPart(sp) + lda t2, -1(t2) + and t4, t2, t2 + bis t2, t6, t2 + and t4, t1, t3 + cmpult zero, t2, t2 + cmpeq t3, t1, t3 + beq t3, 60f + and t3, t2, t3 +60: cpys f17, f17, f0 // Return q ... + bne t3, return // ... if it's a NaN + cpys f16, f16, f0 // Otherwise, return p + br zero, return + +// +// If p (or q) is a NaN, return p (or q) +// Otherwise, report an exception (MOD_REM_INF) +// + +p_NaN_inf: + stt f16, Temp0(sp) + ldl ra, Temp0(sp) + ldah v0, 0x10(zero) + ldl t5, Temp0 + HighPart(sp) + lda v0, -1(v0) + and t5, v0, v0 + bis v0, ra, v0 + and t5, t1, t7 + cmpult zero, v0, v0 + cmpeq t7, t1, t7 + beq t7, 70f + and t7, v0, t7 + beq t7, 70f + + cpys f16, f16, f0 // return p + br zero, return + +70: stt f17, Temp0(sp) + ldl t4, Temp0 + HighPart(sp) + and t4, t1, t6 + cmpeq t6, t1, t1 + beq t1, exc_mod_rem_of_inf + + ldl t2, Temp0(sp) + ldah t3, 0x10(zero) + lda t3, -1(t3) + and t4, t3, t3 + bis t3, t2, t2 + cmpult zero, t2, t2 + and t1, t2, t1 + beq t1, exc_mod_rem_of_inf + + cpys f17, f17, f0 // return q + br zero, return + +exc_mod_rem_of_inf: + lda t0, fmodName + stl t0, ExRec + ErName(sp) + ldah t7, 0x800(zero) + stt f16, ExRec + ErArg0(sp) + stt f17, ExRec + ErArg1(sp) + lda t7, 0x3a(t7) // MOD_REM_OF_INF + stl t7, ExRec + ErErr(sp) +call_exception: + lda v0, ExRec(sp) + bsr ra, __dpml_exception + ldt f0, 0(v0) + +// +// And return +// + +return: ldq ra, SaveRa(sp) + lda sp, FrameLength(sp) + + ret zero, (ra) + + .end fmod + + .align 2 + .data + +denorm_con: + .long 0x0 // Scale factor for normalization + .long 0x44d00000 + +int_con: + .long 0x0 // 2^TT, to integerize a number + .long 0x43300000 + +fmodName: + .ascii "fmod\0" diff --git a/private/fp32/tran/alpha/fpctrl.s b/private/fp32/tran/alpha/fpctrl.s new file mode 100644 index 000000000..15c1a722d --- /dev/null +++ b/private/fp32/tran/alpha/fpctrl.s @@ -0,0 +1,371 @@ +// TITLE("Floating Point Control") +//++ +// +// Copyright (c) 1992 Digital Equipment Corporation +// +// Module Name: +// +// fpctrl.s +// +// Abstract: +// +// This module implements routines that control floating point +// operations. +// +// Author: +// +// Thomas Van Baak (tvb) 31-Aug-1992 +// +// Environment: +// +// Any mode. +// +// Revision History: +// +//-- + +#include "ksalpha.h" + +// +// Define call frame used to exchange a floating point and integer register. +// + + .struct 0 +FpCr: .space 8 // fpcr value + .space 8 // ensure 16-byte stack alignment +FpFrameLength: // length of stack frame + + SBTTL("Get Hardware Floating Point Control Register") +//++ +// +// ULONGLONG +// _get_fpcr ( +// VOID +// ) +// +// Routine Description: +// +// This function obtains the current FPCR value. +// +// Arguments: +// +// None. +// +// Return Value: +// +// The current value of the FPCR is returned as the function value. +// +//-- + + NESTED_ENTRY(_get_fpcr, FpFrameLength, ra) + + lda sp, -FpFrameLength(sp) // allocate stack frame + + PROLOGUE_END + + excb // wait for all pending traps + mf_fpcr f0, f0, f0 // get current fpcr + excb // block against new traps + stt f0, FpCr(sp) // store floating register in order to + ldq v0, FpCr(sp) // load integer register with fpcr + + lda sp, FpFrameLength(sp) // deallocate stack frame + ret zero, (ra) // return + + .end _get_fpcr + + SBTTL("Set Hardware Floating Point Control Register") +//++ +// +// VOID +// _set_fpcr ( +// ULONGLONG FpcrValue +// ) +// +// Routine Description: +// +// This function sets a new value in the FPCR. +// +// Arguments: +// +// FpcrValue (a0) - Supplies the new value for the FPCR. +// +// Return Value: +// +// None. +// +//-- + + NESTED_ENTRY(_set_fpcr, FpFrameLength, ra) + + lda sp, -FpFrameLength(sp) // allocate stack frame + + PROLOGUE_END + + stq a0, FpCr(sp) // store integer register in order to + ldt f0, FpCr(sp) // load floating register with fpcr + excb // wait for all pending traps + mt_fpcr f0, f0, f0 // set new fpcr + excb // block against new traps + + lda sp, FpFrameLength(sp) // deallocate stack frame + ret zero, (ra) // return + + .end _set_fpcr + + SBTTL("Get Software Floating Point Control and Status Register") +//++ +// +// ULONG +// _get_softfpcr ( +// VOID +// ) +// +// Routine Description: +// +// This function obtains the current software FPCR value. +// +// Arguments: +// +// None. +// +// Return Value: +// +// The current value of the software FPCR is returned as the function value. +// +//-- + + LEAF_ENTRY(_get_softfpcr) + + GET_THREAD_ENVIRONMENT_BLOCK // get Teb address in v0 + ldl v0, TeSoftFpcr(v0) // get current software fpcr value + ret zero, (ra) // return + + .end _get_softfpcr + + SBTTL("Set Software Floating Point Control and Status Register") +//++ +// +// VOID +// _set_softfpcr ( +// ULONG SoftFpcrValue +// ) +// +// Routine Description: +// +// This function sets a new value in the software FPCR. +// +// Arguments: +// +// SoftFpcrValue (a0) - Supplies the new value for the software FPCR. +// +// Return Value: +// +// None. +// +//-- + + LEAF_ENTRY(_set_softfpcr) + + GET_THREAD_ENVIRONMENT_BLOCK // get Teb address in v0 + stl a0, TeSoftFpcr(v0) // store new software fpcr value + ret zero, (ra) // return + + .end _set_softfpcr + + SBTTL("Set New Floating Control Register Value") +//++ +// +// ULONG +// _ctrlfp ( +// IN ULONG newctrl, +// IN ULONG mask +// ) +// +// Routine Description: +// +// For Alpha AXP this function sets nothing. It returns the current +// rounding mode and the current IEEE exception disable mask in the +// fp32 internal control word format. +// +// Arguments: +// +// newctrl (a0) - Supplies the new control bits to be set. +// +// mask (a1) - Supplies the mask of bits to be set. +// +// Return Value: +// +// oldctrl (v0) - Returns the old value of the control bits. +// +// +//-- + + NESTED_ENTRY(_ctrlfp, FpFrameLength, ra) + + lda sp, -FpFrameLength(sp) // allocate stack frame + + PROLOGUE_END + +// +// Get the dynamic rounding mode from the FPCR. +// + + excb // wait for all pending traps + mf_fpcr f0, f0, f0 // get current fpcr + excb // block against new traps + stt f0, FpCr(sp) // store floating register in order to + ldq t0, FpCr(sp) // load integer register with fpcr + + srl t0, 58, t0 // shift rounding mode to low end + and t0, 0x3, t0 // isolate rounding mode bits + sll t0, 58 - 32, t0 // shift to internal cw format + +// +// Get the IEEE exception mask bits and status bits from the software FPCR. +// + + GET_THREAD_ENVIRONMENT_BLOCK // get Teb address in v0 + ldl v0, TeSoftFpcr(v0) // get current software fpcr value + xor v0, 0x3e, v0 // convert enable bits to disable bits + or v0, t0, v0 // merge with current rounding mode + + lda sp, FpFrameLength(sp) // deallocate stack frame + ret zero, (ra) // return + + .end _ctrlfp + + SBTTL("Get IEEE Sticky Status Bits") +//++ +// +// ULONG +// _statfp ( +// VOID +// ) +// +// Routine Description: +// +// This function gets the IEEE sticky status bits from the software FPCR. +// +// Arguments: +// +// None. +// +// Return Value: +// +// The current value of the status word is returned as the function value. +// +//-- + + LEAF_ENTRY(_statfp) + + GET_THREAD_ENVIRONMENT_BLOCK // get Teb address in v0 + ldl v0, TeSoftFpcr(v0) // get current software fpcr value + ret zero, (ra) // return + + .end _statfp + + SBTTL("Clear IEEE Sticky Status Bits") +//++ +// +// ULONG +// _clrfp ( +// VOID +// ) +// +// Routine Description: +// +// This function clears the IEEE sticky status bits in the software FPCR. +// +// Arguments: +// +// None. +// +// Return Value: +// +// The previous value of the status word is returned as the function value. +// +//-- + + LEAF_ENTRY(_clrfp) + + GET_THREAD_ENVIRONMENT_BLOCK // get Teb address in v0 + ldl t0, TeSoftFpcr(v0) // get current software fpcr value + bic t0, 0x3e0000, t1 // clear status bits + stl t1, TeSoftFpcr(v0) // store new software fpcr value + mov t0, v0 // get previous value + ret zero, (ra) // return + + .end _clrfp + + SBTTL("Set IEEE Sticky Status Bits") +//++ +// +// VOID +// _set_statfp ( +// IN ULONG sw +// ) +// +// Routine Description: +// +// This function sets a IEEE sticky status bit in the software FPCR. +// +// Arguments: +// +// sw (a0) - Supplies the status bits to be set. +// +// Return Value: +// +// None. +// +//-- + + LEAF_ENTRY(_set_statfp) + + GET_THREAD_ENVIRONMENT_BLOCK // get Teb address in v0 + ldl t0, TeSoftFpcr(v0) // get current software fpcr value + or t0, a0, t0 // set status bit(s) + stl t0, TeSoftFpcr(v0) // store new software fpcr value + ret zero, (ra) // return + + .end _set_statfp + + SBTTL("Convert Signal NaN to Quiet NaN") +//++ +// +// double +// _nan2qnan ( +// IN double x +// ) +// +// Routine Description: +// +// This function converts a signaling NaN to a quiet NaN without causing +// a hardware trap. +// +// Arguments: +// +// x (f16) - Supplies the signal NaN value to be converted. +// +// Return Value: +// +// The quiet NaN value is returned as the function value. +// +//-- + NESTED_ENTRY(_nan2qnan, FpFrameLength, ra) + + lda sp, -FpFrameLength(sp) // allocate stack frame + + PROLOGUE_END + + stt f16, FpCr(sp) // store floating register in order to + ldq t0, FpCr(sp) // load integer register + ldiq t1, (1 << 51) // get NaN bit + or t0, t1, t0 // convert NaN to QNaN + stq t0, FpCr(sp) // store integer register in order to + ldt f0, FpCr(sp) // load floating register + + lda sp, FpFrameLength(sp) // deallocate stack frame + ret zero, (ra) // return + + .end _nan2qnan diff --git a/private/fp32/tran/alpha/fpint.s b/private/fp32/tran/alpha/fpint.s new file mode 100644 index 000000000..c8df077fd --- /dev/null +++ b/private/fp32/tran/alpha/fpint.s @@ -0,0 +1,176 @@ +// TITLE("Floating Point Integer Functions") +//++ +// +// Copyright (c) 1993 Digital Equipment Corporation +// +// Module Name: +// +// fpint.s +// +// Abstract: +// +// This module implements routines floor, ceiling, and integer fraction +// for normal, finite operands. +// +// Author: +// +// Thomas Van Baak (tvb) 30-Jul-1993 +// +// Environment: +// +// Any mode. +// +// Revision History: +// +//-- + +#include "ksalpha.h" + +// +// Define floating point constants. +// + + .align 3 + .rdata +__one: + .double 1.0 +__two_52: + .double 4503599627370496.0 // 2**52 + + SBTTL("Floating Point Integer Ceiling") +//++ +// +// double +// _finite_ceil ( +// double Value +// ) +// +// Routine Description: +// +// This function computes the smallest integer no less than the given +// finite normal floating point value. +// +// Arguments: +// +// Value (f16) - Supplies the value to be converted. +// +// Return Value: +// +// The integer value is returned as the function value. +// +//-- + + LEAF_ENTRY(_finite_ceil) + + fbeq f16, 10f // if zero, retain sign of zero + ldt f30, __two_52 // get largest fraction-free integer + fabs f16, f0 // get absolute value of argument + cmptlt f0, f30, f11 // is |x| < 2**52 ? + fbeq f11, 10f // no, all |x| >= 2**52 are integers + + cpys f16, f30, f30 // if argument < 0, use -2**52 instead + ldt f10, __one // get increment value + addt f16, f30, f0 // add 2**52 to drop fraction bits + subt f0, f30, f0 // subtract same to restore integer part + cmptlt f0, f16, f11 // is result less than argument? + fcmoveq f11, f31, f10 // if eq[no], then no adjustment + addt f0, f10, f0 // otherwise, must add 1 + ret zero, (ra) // return +10: + fmov f16, f0 // argument is return value + ret zero, (ra) // return + + .end _finite_ceil + + SBTTL("Floating Point Integer Floor") +//++ +// +// double +// _finite_floor ( +// double Value +// ) +// +// Routine Description: +// +// This function computes the largest integer no greater than the given +// finite normal floating point value. +// +// Arguments: +// +// Value (f16) - Supplies the value to be converted. +// +// Return Value: +// +// The integer value is returned as the function value. +// +//-- + + LEAF_ENTRY(_finite_floor) + + fbeq f16, 10f // if zero, retain sign of zero + ldt f30, __two_52 // get largest fraction-free integer + fabs f16, f0 // get absolute value of argument + cmptlt f0, f30, f11 // is |x| < 2**52 ? + fbeq f11, 10f // no, all |x| >= 2**52 are integers + + cpys f16, f30, f30 // if argument < 0, use -2**52 instead + ldt f10, __one // get increment value + addt f16, f30, f0 // add 2**52 to drop fraction bits + subt f0, f30, f0 // subtract same to restore integer part + cmptle f0, f16, f11 // is result less or equal to argument? + fcmovne f11, f31, f10 // if ne[yes], then no adjustment + subt f0, f10, f0 // otherwise, must subtract 1 + ret zero, (ra) // return +10: + fmov f16, f0 // argument is return value + ret zero, (ra) // return + + .end _finite_floor + + SBTTL("Floating Point Integer and Fraction") +//++ +// +// double +// _finite_modf ( +// double Value, +// double *IntegerPart +// ) +// +// Routine Description: +// +// This function computes the integer and fractional parts of the given +// finite normal floating point value. +// +// Arguments: +// +// Value (f16) - Supplies the value to be converted. +// +// IntegerPart (a1) - Supplies a pointer to the location that is to +// receive the integer part of the operation. +// +// Return Value: +// +// The fractional value is returned as the function value. +// +//-- + + LEAF_ENTRY(_finite_modf) + + fbeq f16, 10f // if zero, retain sign of zero + ldt f30, __two_52 // get largest fraction-free integer + fabs f16, f0 // get absolute value of argument + cmptlt f0, f30, f11 // is |x| < 2**52 ? + fbeq f11, 10f // no fraction when |x| >= 2**52 + + cpys f16, f30, f30 // if argument < 0, use -2**52 instead + addtc f16, f30, f0 // add +-2**52 to drop fraction bits + subtc f0, f30, f10 // subtract same to restore integer part + subt f16, f10, f0 // calculate fractional part + stt f10, 0(a1) // store integer part + ret zero, (ra) // return +10: + fmov f31, f0 // set fraction to zero + stt f16, 0(a1) // set integer part to argument + ret zero, (ra) // return + + .end _finite_modf diff --git a/private/fp32/tran/alpha/frnd.s b/private/fp32/tran/alpha/frnd.s new file mode 100644 index 000000000..7e8a2f791 --- /dev/null +++ b/private/fp32/tran/alpha/frnd.s @@ -0,0 +1,89 @@ +// TITLE("Floating Point Round") +//++ +// +// Copyright (c) 1992 Digital Equipment Corporation +// +// Module Name: +// +// frnd.s +// +// Abstract: +// +// This module implements the floating round to integer function. +// +// Author: +// +// Thomas Van Baak (tvb) 07-Sep-1992 +// +// Environment: +// +// Any mode. +// +// Revision History: +// +//-- + +#include "ksalpha.h" + + SBTTL("Floating Round to Integer") +//++ +// +// DOUBLE +// _frnd ( +// IN DOUBLE x +// ) +// +// Routine Description: +// +// This function rounds the given finite floating point argument to +// an integer using nearest rounding. +// +// Arguments: +// +// x (f16) - Supplies the floating point value to be rounded. +// +// Return Value: +// +// The integer rounded floating point value is returned in f0. +// +// Implementation Notes: +// +//-- + + LEAF_ENTRY(_frnd) + +// +// If the absolute value of the argument is greater than or equal to 2^52, +// then the argument is already an integer and it can be returned as the +// function value. Note that 2^52 - 1 is the largest integer representable +// by T-format (double) floating point because the mantissa (without the +// hidden bit) is 52 bits wide. +// + + fbeq f16, 10f // return if argument is 0.0 + ldt f10, Two52 // get 2^52 magic constant + fabs f16, f11 // get absolute value of argument + cmptlt f10, f11, f12 // is 2^52 < arg? + fbeq f12, 20f // if eq[false], then do rounding + +10: cpys f16, f16, f0 // argument is return value + ret zero, (ra) // return + +20: cpys f16, f10, f10 // if argument < 0, use -2^52 instead + addt f16, f10, f0 // add [+-]2^52 (nearest rounding) + subt f0, f10, f0 // subtract [+-]2^52 (nearest rounding) + ret zero, (ra) // return + + .end _frnd + +// +// Define floating point constants. +// +// (avoid ldit with floating point literal due to bug in acc/as that +// creates writable .rdata sections - tvb) +// + + .align 3 + .rdata +Two52: + .double 4503599627370496.0 // 2^52 diff --git a/private/fp32/tran/alpha/getsetrg.c b/private/fp32/tran/alpha/getsetrg.c new file mode 100644 index 000000000..37be99a50 --- /dev/null +++ b/private/fp32/tran/alpha/getsetrg.c @@ -0,0 +1,1076 @@ +/*++ + +Copyright (c) 1991 Microsoft Corporation +Copyright (c) 1992 Digital Equipment Corporation + +Module Name: + + getsetrg.c + +Abstract: + + This module implement the code necessary to get and set register values. + These routines are used during the emulation of unaligned data references + and floating point exceptions. + +Author: + + David N. Cutler (davec) 17-Jun-1991 + +Environment: + + Kernel mode only. + +Revision History: + + Thomas Van Baak (tvb) 14-Jul-1992 + + Adapted for Alpha AXP. + +--*/ + +#include "nt.h" +#include "ntalpha.h" + +#ifndef ULONGLONG +#ifndef DWORDLONG +#define ULONGLONG unsigned __int64 +#else +#define ULONGLONG DWORDLONG +#endif +#endif + +ULONGLONG +_GetRegisterValue ( + IN ULONG Register, + IN PCONTEXT Context + ) + +/*++ + +Routine Description: + + This function is called to get the value of a register from the specified + exception or trap frame. + +Arguments: + + Register - Supplies the number of the register whose value is to be + returned. Integer registers are specified as 0 - 31 and floating + registers are specified as 32 - 63. + + Context - Supplies a pointer to a context record. + +Return Value: + + The value of the specified register is returned as the function value. + +--*/ + +{ + + // + // Dispatch on the register number. + // + + switch (Register) { + + // + // Integer register V0. + // + + case 0: + return Context->IntV0; + + // + // Integer register T0. + // + + case 1: + return Context->IntT0; + + // + // Integer register T1. + // + + case 2: + return Context->IntT1; + + // + // Integer register T2. + // + + case 3: + return Context->IntT2; + + // + // Integer register T3. + // + + case 4: + return Context->IntT3; + + // + // Integer register T4. + // + + case 5: + return Context->IntT4; + + // + // Integer register T5. + // + + case 6: + return Context->IntT5; + + // + // Integer register T6. + // + + case 7: + return Context->IntT6; + + // + // Integer register T7. + // + + case 8: + return Context->IntT7; + + // + // Integer register S0. + // + + case 9: + return Context->IntS0; + + // + // Integer register S1. + // + + case 10: + return Context->IntS1; + + // + // Integer register S2. + // + + case 11: + return Context->IntS2; + + // + // Integer register S3. + // + + case 12: + return Context->IntS3; + + // + // Integer register S4. + // + + case 13: + return Context->IntS4; + + // + // Integer register S5. + // + + case 14: + return Context->IntS5; + + // + // Integer register Fp/S6. + // + + case 15: + return Context->IntFp; + + // + // Integer register A0. + // + + case 16: + return Context->IntA0; + + // + // Integer register A1. + // + + case 17: + return Context->IntA1; + + // + // Integer register A2 + // + + case 18: + return Context->IntA2; + + // + // Integer register A3. + // + + case 19: + return Context->IntA3; + + // + // Integer register A4. + // + + case 20: + return Context->IntA4; + + // + // Integer register A5. + // + + case 21: + return Context->IntA5; + + // + // Integer register T8. + // + + case 22: + return Context->IntT8; + + // + // Integer register T9. + // + + case 23: + return Context->IntT9; + + // + // Integer register T10. + // + + case 24: + return Context->IntT10; + + // + // Integer register T11. + // + + case 25: + return Context->IntT11; + + // + // Integer register Ra. + // + + case 26: + return Context->IntRa; + + // + // Integer register T12. + // + + case 27: + return Context->IntT12; + + // + // Integer register At. + // + + case 28: + return Context->IntAt; + + // + // Integer register Gp. + // + + case 29: + return Context->IntGp; + + // + // Integer register Sp. + // + + case 30: + return Context->IntSp; + + // + // Integer register Zero. + // + + case 31: + return 0; + + // + // Floating register F0. + // + + case 32: + return Context->FltF0; + + // + // Floating register F1. + // + + case 33: + return Context->FltF1; + + // + // Floating register F2. + // + + case 34: + return Context->FltF2; + + // + // Floating register F3. + // + + case 35: + return Context->FltF3; + + // + // Floating register F4. + // + + case 36: + return Context->FltF4; + + // + // Floating register F5. + // + + case 37: + return Context->FltF5; + + // + // Floating register F6. + // + + case 38: + return Context->FltF6; + + // + // Floating register F7. + // + + case 39: + return Context->FltF7; + + // + // Floating register F8. + // + + case 40: + return Context->FltF8; + + // + // Floating register F9. + // + + case 41: + return Context->FltF9; + + // + // Floating register F10. + // + + case 42: + return Context->FltF10; + + // + // Floating register F11. + // + + case 43: + return Context->FltF11; + + // + // Floating register F12. + // + + case 44: + return Context->FltF12; + + // + // Floating register F13. + // + + case 45: + return Context->FltF13; + + // + // Floating register F14. + // + + case 46: + return Context->FltF14; + + // + // Floating register F15. + // + + case 47: + return Context->FltF15; + + // + // Floating register F16. + // + + case 48: + return Context->FltF16; + + // + // Floating register F17. + // + + case 49: + return Context->FltF17; + + // + // Floating register F18. + // + + case 50: + return Context->FltF18; + + // + // Floating register F19. + // + + case 51: + return Context->FltF19; + + // + // Floating register F20. + // + + case 52: + return Context->FltF20; + + // + // Floating register F21. + // + + case 53: + return Context->FltF21; + + // + // Floating register F22. + // + + case 54: + return Context->FltF22; + + // + // Floating register F23. + // + + case 55: + return Context->FltF23; + + // + // Floating register F24. + // + + case 56: + return Context->FltF24; + + // + // Floating register F25. + // + + case 57: + return Context->FltF25; + + // + // Floating register F26. + // + + case 58: + return Context->FltF26; + + // + // Floating register F27. + // + + case 59: + return Context->FltF27; + + // + // Floating register F28. + // + + case 60: + return Context->FltF28; + + // + // Floating register F29. + // + + case 61: + return Context->FltF29; + + // + // Floating register F30. + // + + case 62: + return Context->FltF30; + + // + // Floating register F31 (Zero). + // + + case 63: + return 0; + } +} + +VOID +_SetRegisterValue ( + IN ULONG Register, + IN ULONGLONG Value, + OUT PCONTEXT Context + ) + +/*++ + +Routine Description: + + This function is called to set the value of a register in the specified + exception or trap frame. + +Arguments: + + Register - Supplies the number of the register whose value is to be + stored. Integer registers are specified as 0 - 31 and floating + registers are specified as 32 - 63. + + Value - Supplies the value to be stored in the specified register. + + Context - Supplies a pointer to a context record. + +Return Value: + + None. + +--*/ + +{ + + // + // Dispatch on the register number. + // + + switch (Register) { + + // + // Integer register V0. + // + + case 0: + Context->IntV0 = Value; + return; + + // + // Integer register T0. + // + + case 1: + Context->IntT0 = Value; + return; + + // + // Integer register T1. + // + + case 2: + Context->IntT1 = Value; + return; + + // + // Integer register T2. + // + + case 3: + Context->IntT2 = Value; + return; + + // + // Integer register T3. + // + + case 4: + Context->IntT3 = Value; + return; + + // + // Integer register T4. + // + + case 5: + Context->IntT4 = Value; + return; + + // + // Integer register T5. + // + + case 6: + Context->IntT5 = Value; + return; + + // + // Integer register T6. + // + + case 7: + Context->IntT6 = Value; + return; + + // + // Integer register T7. + // + + case 8: + Context->IntT7 = Value; + return; + + // + // Integer register S0. + // + + case 9: + Context->IntS0 = Value; + return; + + // + // Integer register S1. + // + + case 10: + Context->IntS1 = Value; + return; + + // + // Integer register S2. + // + + case 11: + Context->IntS2 = Value; + return; + + // + // Integer register S3. + // + + case 12: + Context->IntS3 = Value; + return; + + // + // Integer register S4. + // + + case 13: + Context->IntS4 = Value; + return; + + // + // Integer register S5. + // + + case 14: + Context->IntS5 = Value; + return; + + // + // Integer register Fp/S6. + // + + case 15: + Context->IntFp = Value; + return; + + // + // Integer register A0. + // + + case 16: + Context->IntA0 = Value; + return; + + // + // Integer register A1. + // + + case 17: + Context->IntA1 = Value; + return; + + // + // Integer register A2. + // + + case 18: + Context->IntA2 = Value; + return; + + // + // Integer register A3. + // + + case 19: + Context->IntA3 = Value; + return; + + // + // Integer register A4. + // + + case 20: + Context->IntA4 = Value; + return; + + // + // Integer register A5. + // + + case 21: + Context->IntA5 = Value; + return; + + // + // Integer register T8. + // + + case 22: + Context->IntT8 = Value; + return; + + // + // Integer register T9. + // + + case 23: + Context->IntT9 = Value; + return; + + // + // Integer register T10. + // + + case 24: + Context->IntT10 = Value; + return; + + // + // Integer register T11. + // + + case 25: + Context->IntT11 = Value; + return; + + // + // Integer register Ra. + // + + case 26: + Context->IntRa = Value; + return; + + // + // Integer register T12. + // + + case 27: + Context->IntT12 = Value; + return; + + // + // Integer register At. + // + + case 28: + Context->IntAt = Value; + return; + + // + // Integer register Gp. + // + + case 29: + Context->IntGp = Value; + return; + + // + // Integer register Sp. + // + + case 30: + Context->IntSp = Value; + return; + + // + // Integer register Zero. + // + + case 31: + return; + + // + // Floating register F0. + // + + case 32: + Context->FltF0 = Value; + return; + + // + // Floating register F1. + // + + case 33: + Context->FltF1 = Value; + return; + + // + // Floating register F2. + // + + case 34: + Context->FltF2 = Value; + return; + + // + // Floating register F3. + // + + case 35: + Context->FltF3 = Value; + return; + + // + // Floating register F4. + // + + case 36: + Context->FltF4 = Value; + return; + + // + // Floating register F5. + // + + case 37: + Context->FltF5 = Value; + return; + + // + // Floating register F6. + // + + case 38: + Context->FltF6 = Value; + return; + + // + // Floating register F7. + // + + case 39: + Context->FltF7 = Value; + return; + + // + // Floating register F8. + // + + case 40: + Context->FltF8 = Value; + return; + + // + // Floating register F9. + // + + case 41: + Context->FltF9 = Value; + return; + + // + // Floating register F10. + // + + case 42: + Context->FltF10 = Value; + return; + + // + // Floating register F11. + // + + case 43: + Context->FltF11 = Value; + return; + + // + // Floating register F12. + // + + case 44: + Context->FltF12 = Value; + return; + + // + // Floating register F13. + // + + case 45: + Context->FltF13 = Value; + return; + + // + // Floating register F14. + // + + case 46: + Context->FltF14 = Value; + return; + + // + // Floating register F15. + // + + case 47: + Context->FltF15 = Value; + return; + + // + // Floating register F16. + // + + case 48: + Context->FltF16 = Value; + return; + + // + // Floating register F17. + // + + case 49: + Context->FltF17 = Value; + return; + + // + // Floating register F18. + // + + case 50: + Context->FltF18 = Value; + return; + + // + // Floating register F19. + // + + case 51: + Context->FltF19 = Value; + return; + + // + // Floating register F20. + // + + case 52: + Context->FltF20 = Value; + return; + + // + // Floating register F21. + // + + case 53: + Context->FltF21 = Value; + return; + + // + // Floating register F22. + // + + case 54: + Context->FltF22 = Value; + return; + + // + // Floating register F23. + // + + case 55: + Context->FltF23 = Value; + return; + + // + // Floating register F24. + // + + case 56: + Context->FltF24 = Value; + return; + + // + // Floating register F25. + // + + case 57: + Context->FltF25 = Value; + return; + + // + // Floating register F26. + // + + case 58: + Context->FltF26 = Value; + return; + + // + // Floating register F27. + // + + case 59: + Context->FltF27 = Value; + return; + + // + // Floating register F28. + // + + case 60: + Context->FltF28 = Value; + return; + + // + // Floating register F29. + // + + case 61: + Context->FltF29 = Value; + return; + + // + // Floating register F30. + // + + case 62: + Context->FltF30 = Value; + return; + + // + // Floating register F31 (Zero). + // + + case 63: + return; + } +} diff --git a/private/fp32/tran/alpha/huge.s b/private/fp32/tran/alpha/huge.s new file mode 100644 index 000000000..859413750 --- /dev/null +++ b/private/fp32/tran/alpha/huge.s @@ -0,0 +1,28 @@ +// +// Define the maximum floating point value (+Infinity). +// + + .data + .align 3 + +#ifdef CRTDLL + .globl _HUGE_dll +_HUGE_dll: +#else + .globl _HUGE +_HUGE: +#endif + +// +// N.B. 0x7feff...ff is the maximum finite floating point value and +// 0x7ff00...00 is IEEE Plus Infinity. +// + + .quad 0x7ff0000000000000 + +// +// The following are alternate representations of HUGE. +// +// .double 0x1.0h0x7ff +// #define HUGE 1.7976931348623158e+308 +// diff --git a/private/fp32/tran/alpha/hypoth.c b/private/fp32/tran/alpha/hypoth.c new file mode 100644 index 000000000..e6d5650a5 --- /dev/null +++ b/private/fp32/tran/alpha/hypoth.c @@ -0,0 +1,99 @@ +/*** +*hypot.c - hypotenuse and complex absolute value +* +* Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* +*Revision History: +* 08-15-91 GDP written +* 10-20-91 GDP removed inline assembly for calling sqrt +* +*******************************************************************************/ +#include <math.h> +#include <trans.h> + +static double _hypothlp(double x, double y, int who); + + +/*** +*double _cabs(struct _complex z) - absolute value of a complex number +* +*Purpose: +* +*Entry: +* +*Exit: +* +*Exceptions: +*******************************************************************************/ +double _cabs(struct _complex z) +{ + return( _hypothlp(z.x, z.y, OP_CABS ) ); +} + + + +static double _hypothlp(double x, double y, int who) +{ + double max; + double result, sum; + unsigned int savedcw; + int exp1, exp2, newexp; + + /* save user fp control word */ + savedcw = _maskfp(); + + /* check for infinity or NAN */ + if (IS_D_SPECIAL(x) || IS_D_SPECIAL(y)){ + if (IS_D_SNAN(x) || IS_D_SNAN(y)){ + return _except2(FP_I,who,x,y,_d_snan2(x,y),savedcw); + } + if (IS_D_QNAN(x) || IS_D_QNAN(y)){ + return _handle_qnan2(who,x,y,savedcw); + } + /* there is at least one infinite argument ... */ + RETURN(savedcw,D_INF); + } + + + /* take the absolute values of x and y, compute the max, and then scale by + max to prevent over or underflowing */ + + if ( x < 0.0 ) + x = - x; + + if ( y < 0.0 ) + y = - y; + + max = ( ( y > x ) ? y : x ); + + if ( max == 0.0 ) + RETURN(savedcw, 0.0 ); + + x /= max; //this may pollute the fp status word (underflow flag) + y /= max; + + sum = x*x + y*y; + + result = _decomp(sqrt(sum),&exp1) * _decomp(max,&exp2); + newexp = exp1 + exp2 + _get_exp(result); + + // in case of overflow or underflow + // adjusting exp by IEEE_ADJUST will certainly + // bring the result in the representable range + + if (newexp > MAXEXP) { + result = _set_exp(result, newexp - IEEE_ADJUST); + return _except2(FP_O | FP_P, who, x, y, result, savedcw); + } + if (newexp < MINEXP) { + result = _set_exp(result, newexp + IEEE_ADJUST); + return _except2(FP_U | FP_P, who, x, y, result, savedcw); + } + + result = _set_exp(result, newexp); + // fix needed: P exception is raised even if the result is exact + + RETURN_INEXACT2(who, x, y, result, savedcw); +} diff --git a/private/fp32/tran/alpha/hypots.s b/private/fp32/tran/alpha/hypots.s new file mode 100644 index 000000000..f7b4ebbbd --- /dev/null +++ b/private/fp32/tran/alpha/hypots.s @@ -0,0 +1,574 @@ +// TITLE("Alpha AXP Hypotenuse") +//++ +// +// Copyright (c) 1993, 1994 Digital Equipment Corporation +// +// Module Name: +// +// hypot.s +// +// Abstract: +// +// This module implements a high-performance Alpha AXP specific routine +// for IEEE double format hypotenuse. +// +// Author: +// +// Bill Gray (rtl::gray) 30-Jun-1993 +// +// Environment: +// +// User mode. +// +// Revision History: +// +// Thomas Van Baak (tvb) 15-Feb-1994 +// +// Adapted for NT. +// +//-- + +#include "ksalpha.h" + +// +// Define DPML exception record for NT. +// + + .struct 0 +ErErr: .space 4 // error code +ErCxt: .space 4 // context +ErPlat: .space 4 // platform +ErEnv: .space 4 // environment +ErRet: .space 4 // return value pointer +ErName: .space 4 // function name +ErType: .space 8 // flags and fill +ErVal: .space 8 // return value +ErArg0: .space 8 // arg 0 +ErArg1: .space 8 // arg 1 +ErArg2: .space 8 // arg 2 +ErArg3: .space 8 // arg 3 +DpmlExceptionLength: + +// +// Define stack frame. +// + + .struct 0 +Temp0: .space 8 // save argument +Temp1: .space 8 // save argument +ExRec: .space DpmlExceptionLength // exception record + .space 8 // for 16-byte stack alignment +FrameLength: + +// +// Define lower and upper 32-bit parts of 64-bit double. +// + +#define LowPart 0x0 +#define HighPart 0x4 + + SBTTL("Hypotenuse") + +//++ +// +// double +// _hypot ( +// IN double x +// IN double y +// ) +// +// Routine Description: +// +// This function returns the hypotenuse for the given x, y values: +// double hypot(double x, double y) = sqrt(x*x + y*y). +// +// Arguments: +// +// x (f16) - Supplies the x argument value. +// +// y (f17) - Supplies the y argument value. +// +// Return Value: +// +// The double result is returned as the function value in f0. +// +//-- + + NESTED_ENTRY(_hypot, FrameLength, ra) + + lda sp, -FrameLength(sp) // allocate stack frame + mov ra, a1 // save return address + + PROLOGUE_END + +// This implementation first check for special cases: infinities, nans, zeros +// and denormalized numbers. Then it scales both number to avoid intermediate +// underflow and overflow. Once the scaled result of Hypot(x, y) is +// calculated, it checks for possible overflow before scaling up the result. + + ldah t1, 0x7ff0(zero) + stt f17, Temp0(sp) + stt f16, Temp1(sp) + ldl t0, Temp1 + HighPart(sp) // Get exp of x + ldah t2, 0x2000(zero) // bias - 1 + ldl v0, Temp0 + HighPart(sp) // Get exp of y + ldah t5, 0x3fd0(zero) // exponent mask + mov zero, t4 + and t0, t1, t0 // mask + subl t0, t2, t3 // subtract bias + and v0, t1, v0 // mask + subl v0, t2, t2 // subtract bias + cmpult t3, t5, t3 + cmpult t2, t5, t2 + beq t3, scale_input + bne t2, calculate_hypot + + +// +// We get here if simply squaring and adding will cause an intermediate +// overflow or underflow. Consequently we need to scale the arguments +// before preceeding. In the IEEE case: NaN's, Inf's and denorms come +// through here. Split them out at special cases here +// +scale_input: + and t0, t1, t3 + ldah t5, 0x10(zero) + and v0, t1, t4 + subl t3, t5, t3 + ldah t2, 0x7fe0(zero) + subl t4, t5, t4 + cmpult t3, t2, t3 + cmpult t4, t2, t2 + beq t3, classify // exp_x abnormal? goto classify + beq t2, classify // exp_y abnormal? goto classify + + subl t0, v0, t3 // diff = exp_x - exp_y + ldah t5, 0x360(zero) + blt t3, 10f // if diff < 0, goto 10 + + ldah t2, 0x4000(zero) + cmple t3, t5, t5 // if (diff > scale) goto return_abs_x + subl t0, t2, t0 // precompute exp_x - SCALE_ADJUST + beq t5, return_abs_x + + mov t0, t4 // scale = exp_x - SCALE_ADJUST + br zero, 20f +10: + ldah t5, -0x360(zero) + ldah t2, 0x4000(zero) + cmplt t3, t5, t3 // if (diff < -scale) goto return_abs_y + subl v0, t2, v0 + bne t3, return_abs_y + mov v0, t4 // scale = exp_y - SCALE_ADJUST +20: + // + // Make floats for the scale factor and unscale factor + // + ldah t0, 0x3ff0(zero) + subl t0, t4, t0 + stl t0, Temp0 + HighPart(sp) + ldah v0, 0x4000(zero) + stl zero, Temp0(sp) + addl t4, v0, v0 + ldt f0, Temp0(sp) + stl v0, Temp0 + HighPart(sp) + stl zero, Temp0(sp) + ldt f1, Temp0(sp) + mult f0, f16, f16 // x *= scale_factor + mult f0, f17, f17 // y *= scale_factor + br zero, calculate_hypot + +classify: +// +// Classify x +// +classify_x: + stt f16, Temp0(sp) + ldl t5, Temp0 + HighPart(sp) + zapnot t5, 0xf, t3 + and t5, t1, t4 + srl t3, 31, t3 + and t3, 1, t3 + beq t4, 30f + cmpult t4, t1, t4 + beq t4, 10f + addl t3, 4, t2 + br zero, classify_y +10: + ldah t6, 0x10(zero) + ldl t4, Temp0(sp) + lda t6, -1(t6) + and t5, t6, t6 + stl t6, Temp0 + HighPart(sp) + bis t6, t4, t4 + srl t6, 19, t6 + beq t4, 20f + and t6, 1, t6 + mov t6, t2 + br zero, classify_y +20: + addl t3, 2, t2 + br zero, classify_y +30: + ldl t7, Temp0(sp) + ldah t4, 0x10(zero) + lda t4, -1(t4) + and t5, t4, t4 + bis t4, t7, t7 + stl t4, Temp0 + HighPart(sp) + mov 6, t6 + cmoveq t7, 8, t6 + addl t3, t6, t2 + +// +// Classify y +// +classify_y: + stt f17, Temp0(sp) + ldl t4, Temp0 + HighPart(sp) + zapnot t4, 0xf, t5 + and t4, t1, t3 + srl t5, 31, t5 + and t5, 1, t5 + beq t3, 30f + cmpult t3, t1, t3 + beq t3, 10f + + addl t5, 4, t6 + br zero, special_args +10: + ldah t3, 0x10(zero) + ldl t7, Temp0(sp) + lda t3, -1(t3) + and t4, t3, t3 + bis t3, t7, t7 + stl t3, Temp0 + HighPart(sp) + beq t7, 20f + srl t3, 19, t3 + and t3, 1, t3 + mov t3, t6 + br zero, special_args +20: + addl t5, 2, t6 + br zero, special_args +30: + ldl a0, Temp0(sp) + ldah t7, 0x10(zero) + lda t7, -1(t7) + and t4, t7, t4 + bis t4, a0, a0 + stl t4, Temp0 + HighPart(sp) + mov 6, t3 + cmoveq a0, 8, t3 + addl t5, t3, t6 + +// +// If we get to here we know that x is a NaN, Inf, denorm or zero. +// We don't necessarily know anything about y. +// +special_args: + sra t2, 1, t2 // Classify x + sra t6, 1, t6 // Classify y + s4addl t2, t2, t2 + addl t2, t6, t2 // Combine + cmpule t2, 24, t12 // Sanity check + beq t12, scale_up_denorm_input + + lda t12, Switch + s4addl t2, t12, t12 + ldl t12, 0(t12) + jmp zero, (t12) + +// +// x and y are zero -- return 0 +// +ret_zero: + cpys f31, f31, f0 + br zero, done + +// +// x is a NaN - return x +// +ret_x: cpys f16, f16, f0 + br zero, done + +// +// y is a NaN, but x isn't - return y +// +ret_y: cpys f17, f17, f0 + br zero, done + +// +// y is a denorm; if |x| is large enough, just return |x| +// +y_denorm: + ldah t4, 0x1c0(zero) // if (exp_x >= LARGE) + cmpult t0, t4, t4 + beq t4, return_abs_x // goto return_abs_x + br zero, scale_up_denorm_input + +// +// x is a denorm; if |y| is large enough, just return |y| +// +x_denorm: + ldah t7, 0x1c0(zero) // if (exp_y >= LARGE) + cmpult v0, t7, t7 + beq t7, return_abs_y // goto return_abs_y + + +// +// Scale x and y up by 2^F_PRECISION and adjust exp_x and exp_y +// accordingly. With x and y scaled into the normal range, we can +// rejoin the main logic flow for computing hypot(x, y) +// +scale_up_denorm_input: + // + // if (exp_x is non-zero) put exp_x - scale in x's exponent field + // + ldah t4, -0x4000(zero) + beq t0, 10f + stt f16, Temp0(sp) + ldl t5, Temp0 + HighPart(sp) + ldah t2, -0x7ff0(zero) + ldah t6, 0x4000(zero) + lda t2, -1(t2) + addl t0, t6, t0 + and t5, t2, t2 + bis t2, t0, t0 + stl t0, Temp0 + HighPart(sp) + ldt f16, Temp0(sp) + br zero, 20f +10: // + // else `denorm-to-norm' + // + ldt f0, Four + cpyse f0, f16, f10 + subt f10, f0, f16 +20: + // + // if (exp_y is non-zero) put exp_y - scale in y's exponent field + // + beq v0, 30f + stt f17, Temp0(sp) + ldl t6, Temp0 + HighPart(sp) + ldah t2, -0x7ff0(zero) + ldah t5, 0x4000(zero) + lda t2, -1(t2) + addl v0, t5, v0 + and t6, t2, t2 + bis t2, v0, v0 + stl v0, Temp0 + HighPart(sp) + ldt f17, Temp0(sp) + br zero, 40f +30: + ldt f0, Four + cpyse f0, f17, f10 + subt f10, f0, f17 +40: +calculate_hypot: +// +// Compute z = sqrt(x*x + y*y) directly +// + mult f16, f16, f0 // x^2 + mult f17, f17, f10 // y^2 + ldt f11, One + lda t6, __sqrt_t_table // We compute sqrt(x) inline + ldah t2, -0x7fe0(zero) + lda t2, -1(t2) + ldah v0, 0x3fe0(zero) // Half bias + addt f0, f10, f0 // x^2 + y^2 + stt f0, Temp0(sp) // To mem and back ... + ldl t3, Temp0 + HighPart(sp) // ... for exp & mantissa bits + cpyse f11, f0, f12 + sra t3, 13, t5 // low exp + high mantissa bits + and t3, t2, t2 + and t5, 0xff, t5 // masked + addl t5, t5, t5 + s8addl t5, zero, t5 // table index + mult f12, f12, f14 + addl t6, t5, t5 // address of coefficients + bis t2, v0, t0 + lds f10, 4(t5) + xor t3, t2, t2 + lds f13, 0(t5) + addl t2, v0, v0 + ldt f15, 8(t5) + zapnot v0, 0xf, v0 + mult f10, f12, f10 // evaluate poly + mult f14, f13, f13 + stl t0, Temp0 + HighPart(sp) // check for overflow below + sll v0, 31, v0 + ldt f12, Temp0(sp) + stq v0, Temp0(sp) + ldt f14, Temp0(sp) + addt f15, f10, f10 + addt f13, f10, f10 + ldt f13, Lsb // To check for correct rounding + // + // Perform a Newton's iteration + // + mult f10, f12, f12 + mult f12, f10, f10 + mult f12, f14, f12 + subt f11, f10, f10 + addt f12, f12, f15 + mult f12, f10, f10 + mult f12, f13, f12 + addt f15, f10, f10 + // + // Check for correctly rounded results + // + ldt f15, Half + subt f10, f12, f14 + addt f10, f12, f12 + multc f10, f14, f11 + multc f10, f12, f13 + cmptle f0, f11, f11 + cmptlt f13, f0, f0 + fcmoveq f11, f10, f14 + fcmoveq f0, f14, f12 + bne t4, start_unscale + + cpys f12, f12, f0 // Return result in f0 + br zero, done + +// +// +// +start_unscale: + ldah t0, 0x3fd0(zero) // exponent mask + mult f12, f15, f15 // w = TWO_POW_M_T * z + // + // if ((scale > MAX_SCALE) && (z >= MAX_Z)) then overflow + // + cmple t4, t0, t0 + bne t0, no_overflow + ldt f13, Four + lda a0, hypotName + ldah v0, 0x800(zero) + lda v0, 14(v0) + cmptle f13, f12, f13 + fbeq f13, no_overflow +// +// Report overflow (800/14) +// + stl a0, ExRec + ErName(sp) + stt f16, ExRec + ErArg0(sp) + stt f17, ExRec + ErArg1(sp) + stl v0, ExRec + ErErr(sp) + lda v0, ExRec(sp) + bsr ra, __dpml_exception + ldt f0, 0(v0) + br zero, done + +// +// +// +no_overflow: + ldah t5, -0x3ff0(zero) + cmplt t4, t5, t4 // if (scale >= MIN_SCALE) + beq t4, do_unscale // goto do_unscale; + + stt f12, Temp0(sp) + ldl t7, Temp0 + HighPart(sp) + ldah a0, 0x4000(zero) + and t7, t1, t1 + subl t1, a0, a0 + xor t7, t1, t7 + ble a0, 10f + + bis t7, a0, t7 + stl t7, Temp0 + HighPart(sp) + ldt f0, Temp0(sp) + br zero, done + +10f: subl t1, a0, t1 + stl zero, Temp0(sp) + ldah t6, 0x10(zero) + addl t1, t6, t1 + bis t7, t1, t7 + ldah v0, -0x10(zero) + and t7, v0, v0 + stl v0, Temp0 + HighPart(sp) + ldt f10, Temp0(sp) + addt f10, f12, f10 + stt f10, Temp0(sp) + ldl t3, Temp0 + HighPart(sp) + subl t3, t1, t1 + stl t1, Temp0 + HighPart(sp) + ldt f0, Temp0(sp) + br zero, done + +do_unscale: + mult f1, f15, f0 // return unscale_factor * w + br zero, done + +return_abs_y: + cpys f31, f17, f0 + br zero, done + +return_abs_x: + cpys f31, f16, f0 +// br zero, done + +// +// Return with result in f0. +// +done: + lda sp, FrameLength(sp) // deallocate stack frame + ret zero, (a1) // return through saved ra in a1 + + .end _hypot + + .rdata + .align 3 + +// +// Define floating point constants. +// + +Lsb: .quad 0x3cb4000000000000 // lsb factor: 5*2^-54 + +Half: .double 0.5 + +One: .double 1.0 + +Four: .double 4.0 + +// +// Switch table indexed by class(x)*5 + class(y) +// + +Switch: + .long ret_x + .long ret_x + .long ret_x + .long ret_x + .long ret_x + .long ret_y + .long return_abs_y + .long return_abs_x + .long return_abs_x + .long return_abs_x + .long ret_y + .long return_abs_y + .long scale_up_denorm_input + .long y_denorm + .long return_abs_x + .long ret_y + .long return_abs_y + .long x_denorm + .long scale_up_denorm_input + .long return_abs_x + .long ret_y + .long return_abs_y + .long return_abs_y + .long return_abs_y + .long ret_zero + +// +// Function name for dpml_exception. +// + +hypotName: + .ascii "_hypot\0" diff --git a/private/fp32/tran/alpha/ieee.c b/private/fp32/tran/alpha/ieee.c new file mode 100644 index 000000000..362bf5b97 --- /dev/null +++ b/private/fp32/tran/alpha/ieee.c @@ -0,0 +1,506 @@ +/*** +*ieee.c - ieee control and status routines +* +* Copyright (c) 1985-91, Microsoft Corporation +* Copyright (c) 1993, Digital Equipment Corporation +* +*Purpose: +* IEEE control and status routines. +* +*Revision History: +* +* 04-01-02 GDP Rewritten to use abstract control and status words +* 10-30-92 GDP fpreset now modifies the saved fp context if called +* from a signal handler +* 07-14-93 TVB Adapted for Alpha AXP. +* 04-20-95 haslock Modifications to support EV4.5 and EV5 +* +*/ + +// #include <trans.h> +#include <float.h> +#include <nt.h> +#include <signal.h> + +// +// Define forward referenced function prototypes. +// + +static unsigned int _abstract_sw(unsigned int sw); +static unsigned int _abstract_cw(unsigned int cw, __int64 fpcr); +static unsigned __int64 _hw_cw(unsigned int abstr); +static unsigned int _soft_cw(unsigned int abstr); + +// +// Define assembly assist function prototypes. +// + +extern unsigned __int64 _get_fpcr(); +extern unsigned int _get_softfpcr(); +extern void _set_fpcr(unsigned __int64); +extern void _set_softfpcr(unsigned int); + +// +// Define Alpha AXP (hardware) FPCR bits. +// + +#define FPCR_ROUND_CHOP ((__int64)0x0000000000000000) +#define FPCR_ROUND_DOWN ((__int64)0x0400000000000000) +#define FPCR_ROUND_NEAR ((__int64)0x0800000000000000) +#define FPCR_ROUND_UP ((__int64)0x0c00000000000000) + +#define FPCR_ROUND_MASK ((__int64)0x0c00000000000000) + +#define FPCR_DISABLE_INVALID ((__int64)0x0002000000000000) +#define FPCR_DISABLE_DIVISION_BY_ZERO ((__int64)0x0004000000000000) +#define FPCR_DISABLE_OVERFLOW ((__int64)0x0008000000000000) + +#define FPCR_DISABLE_UNDERFLOW ((__int64)0x2000000000000000) +#define FPCR_DISABLE_INEXACT ((__int64)0x4000000000000000) + +#define FPCR_UNDERFLOW_TO_ZERO_ENABLE ((__int64)0x1000000000000000) + +#define FPCR_STATUS_INVALID ((__int64)0x0010000000000000) +#define FPCR_STATUS_DIVISION_BY_ZERO ((__int64)0x0020000000000000) +#define FPCR_STATUS_OVERFLOW ((__int64)0x0040000000000000) +#define FPCR_STATUS_UNDERFLOW ((__int64)0x0080000000000000) +#define FPCR_STATUS_INEXACT ((__int64)0x0100000000000000) +#define FPCR_STATUS_SUMMARY ((__int64)0x8000000000000000) + +#define FPCR_STATUS_MASK ((__int64)0x81f0000000000000) +#define FPCR_DISABLE_MASK ((__int64)0x700c000000000000) + +// +// Define Alpha AXP Software FPCR bits (NT version). +// + +#define SW_FPCR_ARITHMETIC_TRAP_IGNORE 0x00000001 + +#define SW_FPCR_ENABLE_INVALID 0x00000002 +#define SW_FPCR_ENABLE_DIVISION_BY_ZERO 0x00000004 +#define SW_FPCR_ENABLE_OVERFLOW 0x00000008 +#define SW_FPCR_ENABLE_UNDERFLOW 0x00000010 +#define SW_FPCR_ENABLE_INEXACT 0x00000020 + +#define SW_FPCR_DENORMAL_RESULT_ENABLE 0x00001000 +#define SW_FPCR_NO_SOFTWARE_EMULATION 0x00002000 +#define SW_FPCR_EMULATION_OCCURRED 0x00010000 + +#define SW_FPCR_STATUS_INVALID 0x00020000 +#define SW_FPCR_STATUS_DIVISION_BY_ZERO 0x00040000 +#define SW_FPCR_STATUS_OVERFLOW 0x00080000 +#define SW_FPCR_STATUS_UNDERFLOW 0x00100000 +#define SW_FPCR_STATUS_INEXACT 0x00200000 + +#define SW_FPCR_ENABLE_MASK 0x0000003e +#define SW_FPCR_STATUS_MASK 0x003e0000 + +/*** +* _statusfp() - +* +*Purpose: +* return abstract fp status word +* +*Entry: +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +unsigned int _statusfp() +{ + return _abstract_sw(_get_softfpcr()); +} + + +/*** +*_clearfp() - +* +*Purpose: +* return abstract status word and clear status +* +*Entry: +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +unsigned int _clearfp() +{ + unsigned __int64 oldFpcr; + unsigned int newstatus; + unsigned int oldstatus; + + oldstatus = _get_softfpcr(); + newstatus = oldstatus & (~SW_FPCR_STATUS_MASK); + + oldFpcr = _get_fpcr() & (FPCR_ROUND_MASK|FPCR_UNDERFLOW_TO_ZERO_ENABLE); + _set_fpcr (oldFpcr); + _set_softfpcr(newstatus); + + return _abstract_sw(oldstatus); +} + + + +/***_controlfp +* () - +* +*Purpose: +* return and set abstract user fp control word +* +*Entry: +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +unsigned int _controlfp(unsigned int newctrl, unsigned int mask) +{ + unsigned __int64 oldFpcr; + unsigned __int64 newFpcr; + unsigned int oldCw; + unsigned int newCw; + unsigned int oldabs; + unsigned int newabs; + + oldCw = _get_softfpcr(); + oldFpcr = _get_fpcr(); + + oldabs = _abstract_cw(oldCw, oldFpcr); + + newabs = (newctrl & mask) | (oldabs & ~mask); + + if (mask & (_MCW_DN | _MCW_EM)) { + newCw = _soft_cw(newabs); + _set_softfpcr(newCw); + } + + if (mask & (_MCW_RC | _MCW_EM | _MCW_DN)) { + newFpcr = _hw_cw(newabs); + +// Fix hardware denormal control to match software bit + if ((newCw & SW_FPCR_DENORMAL_RESULT_ENABLE) == 0 ) + newFpcr |= FPCR_UNDERFLOW_TO_ZERO_ENABLE; + else + newFpcr &= ~FPCR_UNDERFLOW_TO_ZERO_ENABLE; + +// Only disable a trap if the trap is signaled in the status bit +// and the trap is not enabled. + if (newabs & _MCW_EM) { + if ((newCw & SW_FPCR_STATUS_INVALID) && + (newCw & SW_FPCR_ENABLE_INVALID == 0 )) + newFpcr |= FPCR_DISABLE_INVALID; + if ((newCw & SW_FPCR_STATUS_DIVISION_BY_ZERO) && + (newCw & SW_FPCR_ENABLE_DIVISION_BY_ZERO == 0 )) + newFpcr |= FPCR_DISABLE_DIVISION_BY_ZERO; + if ((newCw & SW_FPCR_STATUS_OVERFLOW) && + (newCw & SW_FPCR_ENABLE_OVERFLOW == 0 )) + newFpcr |= FPCR_DISABLE_OVERFLOW; + if ((newCw & SW_FPCR_STATUS_UNDERFLOW) && + (newCw & SW_FPCR_ENABLE_UNDERFLOW == 0 )) + newFpcr |= FPCR_DISABLE_UNDERFLOW; + if ((newCw & SW_FPCR_STATUS_INEXACT) && + (newCw & SW_FPCR_ENABLE_INEXACT == 0 )) + newFpcr |= FPCR_DISABLE_INEXACT; + } + _set_fpcr(newFpcr); + } + + return newabs; +} /* _controlfp() */ + +/*** +* _fpreset() - reset fp system +* +*Purpose: +* reset fp environment to the default state +* Also reset saved fp environment if invoked from a user's +* signal handler +* +*Entry: +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ +void _fpreset() +{ + PEXCEPTION_POINTERS excptrs = (PEXCEPTION_POINTERS) _pxcptinfoptrs; + unsigned int status; + unsigned __int64 newFpcr; + + // + // Clear IEEE status bits. Clear software IEEE trap enable bits. + // Clear denormal enable bit. + // + + status = _get_softfpcr(); + status &= ~(SW_FPCR_STATUS_MASK | SW_FPCR_ENABLE_MASK | + SW_FPCR_DENORMAL_RESULT_ENABLE); + _set_softfpcr(status); + + // + // Set round to nearest mode. Clear FPCR status bits. + // Set Denormal Flush to Zero + // + + // Exceptions enabled so first instance can set the status bit + // Exception handler will disable further exceptions if the + // exceptions mask bit is set. + + _set_fpcr(FPCR_ROUND_NEAR | FPCR_UNDERFLOW_TO_ZERO_ENABLE); + + if (excptrs && + excptrs->ContextRecord->ContextFlags & CONTEXT_FLOATING_POINT) { + // _fpreset has been invoked by a signal handler which in turn + // has been invoked by the CRT filter routine. In this case + // the saved fp context should be cleared, so that the change take + // effect on continuation. + + excptrs->ContextRecord->Fpcr = _get_fpcr(); + excptrs->ContextRecord->SoftFpcr = _get_softfpcr(); + } +} + + + +/*** +* _abstract_cw() - abstract control word +* +*Purpose: +* produce a fp control word in abstracted (machine independent) form +* +*Entry: +* cw: machine control word +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +unsigned int _abstract_cw(unsigned int cw, __int64 fpcr) +{ + unsigned int abstr = 0; + + + // + // Set exception mask bits + // + + if ((cw & SW_FPCR_ENABLE_INVALID) == 0) + abstr |= _EM_INVALID; + if ((cw & SW_FPCR_ENABLE_DIVISION_BY_ZERO) == 0) + abstr |= _EM_ZERODIVIDE; + if ((cw & SW_FPCR_ENABLE_OVERFLOW) == 0) + abstr |= _EM_OVERFLOW; + if ((cw & SW_FPCR_ENABLE_UNDERFLOW) == 0) + abstr |= _EM_UNDERFLOW; + if ((cw & SW_FPCR_ENABLE_INEXACT) == 0) + abstr |= _EM_INEXACT; + + // + // Set rounding mode + // + // N.B. switch requires 32-bits, so scale quadwords. + // + +#define HIGHPART(q) ( (ULONG)((q) >> 32) ) + + switch ( HIGHPART(fpcr & FPCR_ROUND_MASK) ) { + case HIGHPART(FPCR_ROUND_NEAR) : + abstr |= _RC_NEAR; + break; + + case HIGHPART(FPCR_ROUND_UP) : + abstr |= _RC_UP; + break; + + case HIGHPART(FPCR_ROUND_DOWN) : + abstr |= _RC_DOWN; + break; + + case HIGHPART(FPCR_ROUND_CHOP) : + abstr |= _RC_CHOP; + break; + } + + // Precision mode is ignored + + // + // Set denormal control + // + + if ((cw & SW_FPCR_DENORMAL_RESULT_ENABLE) == 0) { + abstr |= _DN_FLUSH; + } + + return abstr; +} + + + +/*** +* _hw_cw() - h/w control word +* +*Purpose: +* produce a machine dependent fp control word +* +* +*Entry: +* abstr: abstract control word +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +unsigned __int64 _hw_cw(unsigned int abstr) +{ + + unsigned __int64 cw = 0; + + // + // Set exception mask bits (future chip support). + // + + if ((abstr & _EM_INVALID) != 0) + cw |= FPCR_DISABLE_INVALID; + if ((abstr & _EM_ZERODIVIDE) != 0) + cw |= FPCR_DISABLE_DIVISION_BY_ZERO; + if ((abstr & _EM_OVERFLOW) != 0) + cw |= FPCR_DISABLE_OVERFLOW; + if ((abstr & _EM_UNDERFLOW) != 0) + cw |= FPCR_DISABLE_UNDERFLOW; + if ((abstr & _EM_INEXACT) != 0) + cw |= FPCR_DISABLE_INEXACT; + + // + // Set rounding mode + // + + switch (abstr & _MCW_RC) { + case _RC_NEAR: + cw |= FPCR_ROUND_NEAR; + break; + case _RC_UP: + cw |= FPCR_ROUND_UP; + break; + case _RC_DOWN: + cw |= FPCR_ROUND_DOWN; + break; + case _RC_CHOP: + cw |= FPCR_ROUND_CHOP; + break; + } + + // + // Set denormal control + // + + if ((abstr & _DN_FLUSH) != 0) { + cw |= FPCR_UNDERFLOW_TO_ZERO_ENABLE; + } + + return cw; +} + +/*** +* _soft_cw() - s/w control word +* +*Purpose: +* produce a machine dependent fp control word +* +* +*Entry: +* abstr: abstract control word +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +unsigned int _soft_cw(unsigned int abstr) +{ + + unsigned int cw = 0; + + // + // Set exception mask bits + // + + if ((abstr & _EM_INVALID) == 0) + cw |= SW_FPCR_ENABLE_INVALID; + if ((abstr & _EM_ZERODIVIDE) == 0) + cw |= SW_FPCR_ENABLE_DIVISION_BY_ZERO; + if ((abstr & _EM_OVERFLOW) == 0) + cw |= SW_FPCR_ENABLE_OVERFLOW; + if ((abstr & _EM_UNDERFLOW) == 0) + cw |= SW_FPCR_ENABLE_UNDERFLOW; + if ((abstr & _EM_INEXACT) == 0) + cw |= SW_FPCR_ENABLE_INEXACT; + + // + // Set denormal control + // + + if ((abstr & _DN_FLUSH) == 0) { + cw |= SW_FPCR_DENORMAL_RESULT_ENABLE; + } + + return cw; +} + + +/*** +* _abstract_sw() - abstract fp status word +* +*Purpose: +* produce an abstract (machine independent) fp status word +* +* +*Entry: +* sw: machine status word +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +unsigned int _abstract_sw(unsigned int sw) +{ + unsigned int abstr = 0; + + + if (sw & SW_FPCR_STATUS_INVALID) + abstr |= _EM_INVALID; + if (sw & SW_FPCR_STATUS_DIVISION_BY_ZERO) + abstr |= _EM_ZERODIVIDE; + if (sw & SW_FPCR_STATUS_OVERFLOW) + abstr |= _EM_OVERFLOW; + if (sw & SW_FPCR_STATUS_UNDERFLOW) + abstr |= _EM_UNDERFLOW; + if (sw & SW_FPCR_STATUS_INEXACT) + abstr |= _EM_INEXACT; + + return abstr; +} + + + + + + + + diff --git a/private/fp32/tran/alpha/log10s.s b/private/fp32/tran/alpha/log10s.s new file mode 100644 index 000000000..187999142 --- /dev/null +++ b/private/fp32/tran/alpha/log10s.s @@ -0,0 +1,912 @@ +// TITLE("Alpha AXP Logarithm Base 10") +//++ +// +// Copyright (c) 1991, 1993, 1994 Digital Equipment Corporation +// +// Module Name: +// +// log10.s +// +// Abstract: +// +// This module implements a high-performance Alpha AXP specific routine +// for IEEE double format logarithm base 10. +// +// Author: +// +// Martha Jaffe 1-May-1991 +// +// Environment: +// +// User mode. +// +// Revision History: +// +// Thomas Van Baak (tvb) 9-Feb-1994 +// +// Adapted for NT. +// +//-- + +#include "ksalpha.h" + +// +// Define DPML exception record for NT. +// + + .struct 0 +ErErr: .space 4 // error code +ErCxt: .space 4 // context +ErPlat: .space 4 // platform +ErEnv: .space 4 // environment +ErRet: .space 4 // return value pointer +ErName: .space 4 // function name +ErType: .space 8 // flags and fill +ErVal: .space 8 // return value +ErArg0: .space 8 // arg 0 +ErArg1: .space 8 // arg 1 +ErArg2: .space 8 // arg 2 +ErArg3: .space 8 // arg 3 +DpmlExceptionLength: + +// +// Define stack frame. +// + + .struct 0 +Temp: .space 8 // save argument +ExRec: .space DpmlExceptionLength // exception record + .space 0 // for 16-byte stack alignment +FrameLength: + +// +// Define lower and upper 32-bit parts of 64-bit double. +// + +#define LowPart 0x0 +#define HighPart 0x4 + + SBTTL("Logarithm Base 10") + +//++ +// +// double +// log10 ( +// IN double x +// ) +// +// Routine Description: +// +// This function returns the logarithm base 10 of the given double argument. +// +// Arguments: +// +// x (f16) - Supplies the argument value. +// +// Return Value: +// +// The double logarithm base 10 result is returned as the function value +// in f0. +// +//-- + + NESTED_ENTRY(log10, FrameLength, ra) + + lda sp, -FrameLength(sp) // allocate stack frame + mov ra, t7 // save return address + + PROLOGUE_END +// +// Fetch the sign, exponent, and highest fraction bits as an integer. +// + stt f16, Temp(sp) + ldl t1, Temp + HighPart(sp) + lda t0, log10_table + ldah v0, 0x3fee(zero) + subl t1, v0, v0 // screen = hi_x - T1 + ldt f1, 0(t0) // Load 1.0 as early as possible + ldah t2, 3(zero) // T2_MINUS_T1 + cmpult v0, t2, v0 // if screen < T2_MINUS_T1 + bne v0, near_1 // then goto near_1 + + sra t1, 20, v0 + sra t1, 8, t2 + cpyse f1, f16, f10 // Create a scaled-down x + subl v0, 1, t4 + lda t5, 0x7fe(zero) + lda t3, 0xfe0(zero) + cmpult t4, t5, t4 // Screen out bad x + and t2, t3, t2 + beq t4, getf // Branch if denorm + + lda t6, 0x3ff(zero) // Get the unbiased, ... + subl v0, t6, t6 // ... IEEE-style exponent m. + br zero, denorms_rejoin + +// +// Isolate the fraction field f of x, where 1 <= f < 2. +// +getf: ldah t5, -0x8000(zero) + ldah t4, 0x7ff0(zero) + and t1, t5, t5 + and t1, t4, v0 + beq t5, eval_poly_1 // Screen infs and NaNs + bne v0, eval_poly_0 // Skip if normal + + // Report either 0x834 or 0x835, depending on whether it's an inf or NaN + + ldt f10, Two53 + cpyse f10, f16, f0 + subt f0, f10, f0 + fbne f0, x0834 // Oops, NaN + +x0835: ldah v0, 0x800(zero) + lda v0, 0x35(v0) + br zero, x08xx +x0834: + ldah v0, 0x800(zero) + lda v0, 0x34(v0) +x08xx: stt f16, ExRec + ErArg0(sp) + lda t6, log10Name + stl t6, ExRec + ErName(sp) + stl v0, ExRec + ErErr(sp) + lda v0, ExRec(sp) + bsr ra, __dpml_exception + ldt f0, 0(v0) + br zero, done + +// +// Get index bits +// +eval_poly_0: + stt f16, Temp(sp) // f->int thru memory + ldl ra, Temp(sp) + ldah v0, 0x10(zero) + ldl t2, Temp + HighPart(sp) + lda v0, -1(v0) + and t2, v0, v0 + bis v0, ra, v0 + and t2, t4, t6 + cmpult zero, v0, v0 // materialize sign bit + cmpeq t6, t4, t4 + beq t4, x0834 // Again, check the range + + and t4, v0, t4 + beq t4, x0834 + +retarg: cpys f16, f16, f0 + br zero, done + + +// +// Prepare variable for the far poly +// +eval_poly_1: + ldah t4, 0x7ff0(zero) + and t1, t4, t1 + bne t1, retarg + + ldt f10, Two53 + cpyse f10, f16, f0 + subt f0, f10, f11 + fbeq f11, x0835 + + stt f11, Temp(sp) + cpyse f1, f11, f10 + ldl t1, Temp + HighPart(sp) + lda t2, 0x832(zero) + sra t1, 8, t5 + sra t1, 20, t1 // Shift the high mantissa bits + and t5, t3, t3 // And isolate them + subl t1, t2, t6 // Remove `bias' from n + mov t3, t2 // We'll index the table with t2 + +denorms_rejoin: + addl t0, t2, t2 // Index into the table + ldt f1, 0x98(t0) // Load c4 + ldt f16, 0xf8(t2) // Load Fj after t2 is available + ldt f0, 0x100(t2) // Load 1/Fj + stq t6, Temp(sp) // Store n + subt f10, f16, f10 // Subtract Fj + ldt f16, Temp(sp) // Load n + ldt f12, 0x90(t0) // Load c3 + ldt f15, 0x88(t0) // Load c2 + cvtqt f16, f16 // Convert n back to float + ldt f17, 0xa0(t0) // Load c5 + mult f10, f0, f0 // Multiply by 1/Fj -> z + ldt f10, 0xa8(t0) // Load c6 + mult f0, f0, f11 // z^2 + mult f1, f0, f1 // c4 z + mult f10, f0, f10 // c z + mult f11, f11, f13 // z^4 + mult f11, f0, f14 // z^3 + addt f12, f1, f1 // c3 + c4 z + ldt f12, 0xd0(t0) // Load log(2)_lo + mult f11, f15, f11 // z^2 c2 + addt f17, f10, f10 // c5 + c6 z + ldt f15, 0x110(t2) + ldt f17, 0xc8(t0) // Load log(2)_hi + mult f12, f16, f12 // n*log(2)_lo + mult f13, f0, f13 // z^5 + mult f14, f1, f1 // z^3 (c3 + c4 z) + ldt f14, 0xd8(t0) + mult f16, f17, f16 // n*log(2)_hi + addt f12, f15, f12 // n*log(2)_lo + log(F)_lo + ldt f15, 0x108(t2) + mult f0, f14, f0 + addt f11, f1, f1 // z^2 c2 + z^3 (c3 + c4 z) + mult f13, f10, f10 // z^5 (c5 + c6 z) + addt f16, f15, f15 // m*log(2)_hi + log(F)_hi + addt f12, f0, f0 + addt f1, f10, f1 // z^2 c2 + ... z^6 c6 + addt f0, f1, f0 // n*log(2)_lo + log(F)_lo + poly + addt f0, f15, f0 // n*log(2) + log(F) + poly + br zero, done + +// +// Near 1, m = 0, so we drop the m*log(2) terms. +// But to maintain accuracy, if no backup precision is available, +// split z into hi and lo parts. +// +near_1: + subt f16, f1, f1 // Subtract 1 (exact) + ldt f11, 0x18(t0) // Load odd coefficients + ldt f13, Two29 + ldt f17, 0x28(t0) + ldt f16, 0x10(t0) + ldt f18, 0x20(t0) + cpys f1, f13, f12 + ldt f13, 0x38(t0) + cpys f1, f1, f15 // z^2 + ldt f19, 0x30(t0) + mult f1, f1, f14 + ldt f20, 0x58(t0) + mult f1, f11, f11 + mult f1, f17, f17 + mult f1, f13, f13 + mult f1, f20, f20 + addt f15, f12, f15 + mult f14, f1, f0 + mult f14, f14, f10 + addt f11, f16, f11 + addt f17, f18, f17 + addt f13, f19, f13 + subt f15, f12, f12 + ldt f19, 0x48(t0) + mult f14, f0, f16 + mult f0, f10, f18 + mult f0, f11, f0 + mult f10, f1, f15 + mult f10, f14, f14 + mult f12, f12, f11 + mult f16, f17, f16 + ldt f17, Half + mult f1, f19, f19 + mult f18, f13, f13 + ldt f18, 0x40(t0) + mult f15, f10, f15 + mult f14, f1, f14 + mult f11, f17, f11 + addt f0, f16, f0 + subt f1, f12, f16 + addt f19, f18, f18 + ldt f19, 0x50(t0) + addt f1, f12, f12 + mult f14, f10, f10 + subt f1, f11, f1 + ldt f14, 0xe0(t0) + addt f0, f13, f0 + ldt f13, 0xf0(t0) + addt f20, f19, f19 + mult f15, f18, f15 + mult f12, f16, f12 + ldt f18, 0xd8(t0) + cvtts f1, f11 // Do the mult in high and low parts + mult f10, f19, f10 + addt f0, f15, f0 + mult f12, f17, f12 + subt f1, f11, f1 // The low part + mult f11, f13, f13 // Mult hi + mult f11, f14, f11 + addt f0, f10, f0 + mult f12, f18, f12 + mult f1, f18, f1 // Mult lo + subt f0, f12, f0 + addt f0, f1, f0 // Add lo product + addt f0, f13, f0 // _Now_ add high product + addt f0, f11, f0 // The rest is fine +// +// Done! +// +done: + lda sp, FrameLength(sp) // deallocate stack frame + ret zero, (t7) // return through saved ra in t7 + + .end log10 + + .rdata + .align 3 + +// +// Define floating point constants. +// + +Half: .double 0.5 + +Two29: .quad 0x41c0000000000000 // 2^29 (536870912) + +Two53: .quad 0x4340000000000000 // 2^53 (9007199254740992) + +// +// Function name for dpml_exception. +// + +log10Name: + .ascii "log10\0" + +// +// Lookup table for log10. +// + + .align 4 + +log10_table: + // 1.0 in working precision + .double 1.0000000000000000e+000 + + // poly coeffs for TWO_PATH, near 1 + .double -2.1714724095162591e-001 + .double 1.4476482730108503e-001 + .double -1.0857362047581537e-001 + .double 8.6858896377427067e-002 + .double -7.2382413645518701e-002 + .double 6.2042072361751348e-002 + .double -5.4286814693113541e-002 + .double 4.8253207196292662e-002 + .double -4.3427532690713110e-002 + .double 3.9875334541624938e-002 + .double -3.6585409973116101e-002 + .double 3.6191206825271258e-002 + .double 5.4286810235891743e-003 + .double 9.6940738065545891e-004 + .double 1.8848909038419727e-004 + .double 3.8940762182296921e-005 + + // poly coeffs for TWO_PATH, away from 1 + .double -2.1714724095162594e-001 + .double 1.4476482729295831e-001 + .double -1.0857362046531732e-001 + .double 8.6860316430547854e-002 + .double -7.2383833702936592e-002 + .double 3.6191206825271674e-002 + .double 5.4286810097452865e-003 + .double 9.6949937116583870e-004 + + // log of 2 in hi and lo parts + .double 3.0102999566406652e-001 + .double -8.5323443170571066e-014 + + // log of e, in hi and lo parts + .double 4.3429448190325182e-001 + .double 4.3429448455572128e-001 + .double 1.0983196502167651e-017 + .double -2.6524694553078553e-009 + + // Table of F, 1/F, and hi and lo log of F + .double 1.0039062500000000e+000 // row 0 + .double 9.9610894941634243e-001 + .double 1.6931580194068374e-003 + .double 3.8138041759466050e-014 + .double 1.0117187500000000e+000 // row 1 + .double 9.8841698841698844e-001 + .double 5.0597987694800395e-003 + .double -7.7773671249150176e-014 + .double 1.0195312500000000e+000 + .double 9.8084291187739459e-001 + .double 8.4005420264929853e-003 + .double -6.1585101698164946e-014 + .double 1.0273437500000000e+000 + .double 9.7338403041825095e-001 + .double 1.1715783177805861e-002 + .double 1.0244650263202214e-013 + .double 1.0351562500000000e+000 + .double 9.6603773584905661e-001 + .double 1.5005908624971198e-002 + .double -1.2909441545846259e-014 + .double 1.0429687500000000e+000 + .double 9.5880149812734083e-001 + .double 1.8271296052716934e-002 + .double 8.7259536842955739e-015 + .double 1.0507812500000000e+000 + .double 9.5167286245353155e-001 + .double 2.1512314690653511e-002 + .double -9.5092829302083433e-014 + .double 1.0585937500000000e+000 + .double 9.4464944649446492e-001 + .double 2.4729325562475424e-002 + .double 8.0744017029277640e-014 + .double 1.0664062500000000e+000 + .double 9.3772893772893773e-001 + .double 2.7922681728796306e-002 + .double 1.1016967045376735e-013 + .double 1.0742187500000000e+000 + .double 9.3090909090909091e-001 + .double 3.1092728518387958e-002 + .double 2.5131020316238211e-014 + .double 1.0820312500000000e+000 + .double 9.2418772563176899e-001 + .double 3.4239803752598164e-002 + .double 8.2851246893013341e-016 + .double 1.0898437500000000e+000 + .double 9.1756272401433692e-001 + .double 3.7364237961810431e-002 + .double -6.2438896306063030e-014 + .double 1.0976562500000000e+000 + .double 9.1103202846975084e-001 + .double 4.0466354593263532e-002 + .double -3.3200415384730550e-014 + .double 1.1054687500000000e+000 + .double 9.0459363957597172e-001 + .double 4.3546470212504573e-002 + .double -6.3899433744960902e-014 + .double 1.1132812500000000e+000 + .double 8.9824561403508774e-001 + .double 4.6604894696656629e-002 + .double 4.0128137886518359e-015 + .double 1.1210937500000000e+000 + .double 8.9198606271777003e-001 + .double 4.9641931422229391e-002 + .double -8.6627079666029577e-014 + .double 1.1289062500000000e+000 // row 32 + .double 8.8581314878892736e-001 + .double 5.2657877444744372e-002 + .double -4.6076671467354740e-014 + .double 1.1367187500000000e+000 + .double 8.7972508591065290e-001 + .double 5.5653023674040014e-002 + .double 1.7713212406847493e-014 + .double 1.1445312500000000e+000 + .double 8.7372013651877134e-001 + .double 5.8627655042300830e-002 + .double -4.0935712101973142e-014 + .double 1.1523437500000000e+000 + .double 8.6779661016949150e-001 + .double 6.1582050666402210e-002 + .double -8.8777144458530814e-014 + .double 1.1601562500000000e+000 + .double 8.6195286195286192e-001 + .double 6.4516484005253005e-002 + .double 1.0978561820475473e-013 + .double 1.1679687500000000e+000 + .double 8.5618729096989965e-001 + .double 6.7431223012590635e-002 + .double -1.0548970701762625e-014 + .double 1.1757812500000000e+000 + .double 8.5049833887043191e-001 + .double 7.0326530282045496e-002 + .double -5.1700871049188391e-014 + .double 1.1835937500000000e+000 + .double 8.4488448844884489e-001 + .double 7.3202663190386374e-002 + .double 6.9075379282565748e-014 + .double 1.1914062500000000e+000 + .double 8.3934426229508197e-001 + .double 7.6059874034854147e-002 + .double 8.2130077344161123e-014 + .double 1.1992187500000000e+000 + .double 8.3387622149837137e-001 + .double 7.8898410165265886e-002 + .double 7.1036582403713468e-014 + .double 1.2070312500000000e+000 + .double 8.2847896440129454e-001 + .double 8.1718514112935736e-002 + .double 4.9344562866294779e-014 + .double 1.2148437500000000e+000 + .double 8.2315112540192925e-001 + .double 8.4520423715048310e-002 + .double -6.0365688882138538e-014 + .double 1.2226562500000000e+000 + .double 8.1789137380191690e-001 + .double 8.7304372234711991e-002 + .double -1.1306794275205778e-013 + .double 1.2304687500000000e+000 + .double 8.1269841269841270e-001 + .double 9.0070588477829006e-002 + .double -7.8057190597576649e-014 + .double 1.2382812500000000e+000 + .double 8.0757097791798105e-001 + .double 9.2819296905872761e-002 + .double 2.9171571423880465e-014 + .double 1.2460937500000000e+000 + .double 8.0250783699059558e-001 + .double 9.5550717745254587e-002 + .double 7.6978874642083809e-014 + .double 1.2539062500000000e+000 // row 64 + .double 7.9750778816199375e-001 + .double 9.8265067093052494e-002 + .double -2.9977454325636676e-014 + .double 1.2617187500000000e+000 + .double 7.9256965944272451e-001 + .double 1.0096255701932932e-001 + .double -7.5995881654636293e-014 + .double 1.2695312500000000e+000 + .double 7.8769230769230769e-001 + .double 1.0364339566694980e-001 + .double 7.5016853454684971e-014 + .double 1.2773437500000000e+000 + .double 7.8287461773700306e-001 + .double 1.0630778734844171e-001 + .double -5.1960682886781393e-015 + .double 1.2851562500000000e+000 + .double 7.7811550151975684e-001 + .double 1.0895593263808223e-001 + .double 4.2502137993246911e-014 + .double 1.2929687500000000e+000 + .double 7.7341389728096677e-001 + .double 1.1158802846375693e-001 + .double 1.1224937633010701e-013 + .double 1.3007812500000000e+000 + .double 7.6876876876876876e-001 + .double 1.1420426819449858e-001 + .double -2.8271890359716029e-014 + .double 1.3085937500000000e+000 + .double 7.6417910447761195e-001 + .double 1.1680484172507022e-001 + .double -7.4541446562998513e-014 + .double 1.3164062500000000e+000 + .double 7.5964391691394662e-001 + .double 1.1938993555941124e-001 + .double 7.7822341509243432e-014 + .double 1.3242187500000000e+000 + .double 7.5516224188790559e-001 + .double 1.2195973289112771e-001 + .double 1.0488384232834887e-013 + .double 1.3320312500000000e+000 + .double 7.5073313782991202e-001 + .double 1.2451441368057203e-001 + .double 7.6125080034185725e-014 + .double 1.3398437500000000e+000 + .double 7.4635568513119532e-001 + .double 1.2705415473101311e-001 + .double -9.2183206060576902e-014 + .double 1.3476562500000000e+000 + .double 7.4202898550724639e-001 + .double 1.2957912976139596e-001 + .double 2.8597843690125436e-014 + .double 1.3554687500000000e+000 + .double 7.3775216138328525e-001 + .double 1.3208950947910125e-001 + .double -7.7095683834893825e-014 + .double 1.3632812500000000e+000 + .double 7.3352435530085958e-001 + .double 1.3458546164724794e-001 + .double 8.2395576103574202e-014 + .double 1.3710937500000000e+000 + .double 7.2934472934472938e-001 + .double 1.3706715115404222e-001 + .double -6.7702224100030562e-014 + .double 1.3789062500000000e+000 // row 96 + .double 7.2521246458923516e-001 + .double 1.3953474007598743e-001 + .double -1.4425221661004675e-014 + .double 1.3867187500000000e+000 + .double 7.2112676056338032e-001 + .double 1.4198838774314027e-001 + .double 1.0426394002127905e-013 + .double 1.3945312500000000e+000 + .double 7.1708683473389356e-001 + .double 1.4442825080027433e-001 + .double 6.9307789402367274e-014 + .double 1.4023437500000000e+000 + .double 7.1309192200557103e-001 + .double 1.4685448326645201e-001 + .double 1.7578878109644966e-014 + .double 1.4101562500000000e+000 + .double 7.0914127423822715e-001 + .double 1.4926723659391428e-001 + .double -1.0591509184191785e-013 + .double 1.4179687500000000e+000 + .double 7.0523415977961434e-001 + .double 1.5166665972424198e-001 + .double 2.0975919101771430e-014 + .double 1.4257812500000000e+000 + .double 7.0136986301369864e-001 + .double 1.5405289914451714e-001 + .double 1.0800275641969783e-013 + .double 1.4335937500000000e+000 + .double 6.9754768392370570e-001 + .double 1.5642609894030102e-001 + .double -6.1240859626408548e-014 + .double 1.4414062500000000e+000 + .double 6.9376693766937669e-001 + .double 1.5878640084724793e-001 + .double -3.7125298776595440e-014 + .double 1.4492187500000000e+000 + .double 6.9002695417789761e-001 + .double 1.6113394430317385e-001 + .double 2.2466323880587632e-014 + .double 1.4570312500000000e+000 + .double 6.8632707774798929e-001 + .double 1.6346886649694170e-001 + .double -1.0365666433479815e-013 + .double 1.4648437500000000e+000 + .double 6.8266666666666664e-001 + .double 1.6579130241598250e-001 + .double -1.1321000759665903e-013 + .double 1.4726562500000000e+000 + .double 6.7904509283819625e-001 + .double 1.6810138489404380e-001 + .double -1.0050896766270149e-013 + .double 1.4804687500000000e+000 + .double 6.7546174142480209e-001 + .double 1.7039924465620970e-001 + .double 1.3077268785973317e-014 + .double 1.4882812500000000e+000 + .double 6.7191601049868765e-001 + .double 1.7268501036369344e-001 + .double 7.6303457216343226e-014 + .double 1.4960937500000000e+000 // row 127 + .double 6.6840731070496084e-001 + .double 1.7495880865681102e-001 + .double -3.7837011297347936e-014 + // + .double 1.5039062500000000e+000 // row 128 + .double 6.6493506493506493e-001 + .double 1.7722076419659061e-001 + .double 6.0506785742672106e-014 + .double 1.5117187500000000e+000 + .double 6.6149870801033595e-001 + .double 1.7947099970706404e-001 + .double -2.1990901597542469e-015 + .double 1.5195312500000000e+000 + .double 6.5809768637532129e-001 + .double 1.8170963601392032e-001 + .double -6.2142519690429475e-014 + .double 1.5273437500000000e+000 + .double 6.5473145780051156e-001 + .double 1.8393679208406866e-001 + .double -5.1410851753162693e-014 + .double 1.5351562500000000e+000 + .double 6.5139949109414763e-001 + .double 1.8615258506360988e-001 + .double -3.2740274772316513e-014 + .double 1.5429687500000000e+000 + .double 6.4810126582278482e-001 + .double 1.8835713031467094e-001 + .double -6.0268625781366657e-014 + .double 1.5507812500000000e+000 + .double 6.4483627204030225e-001 + .double 1.9055054145132999e-001 + .double -6.4486938075512777e-014 + .double 1.5585937500000000e+000 + .double 6.4160401002506262e-001 + .double 1.9273293037485928e-001 + .double 3.9388579215085728e-014 + .double 1.5664062500000000e+000 + .double 6.3840399002493764e-001 + .double 1.9490440730828595e-001 + .double 4.6790775628349175e-014 + .double 1.5742187500000000e+000 + .double 6.3523573200992556e-001 + .double 1.9706508082936125e-001 + .double -1.0136276227059958e-013 + .double 1.5820312500000000e+000 + .double 6.3209876543209875e-001 + .double 1.9921505790284755e-001 + .double -2.8555409708142505e-014 + .double 1.5898437500000000e+000 + .double 6.2899262899262898e-001 + .double 2.0135444391326018e-001 + .double 1.1029208598053272e-013 + .double 1.5976562500000000e+000 + .double 6.2591687041564792e-001 + .double 2.0348334269556290e-001 + .double -7.0654651972775882e-014 + .double 1.6054687500000000e+000 + .double 6.2287104622871048e-001 + .double 2.0560185656427166e-001 + .double -5.2012531877479611e-014 + .double 1.6132812500000000e+000 + .double 6.1985472154963683e-001 + .double 2.0771008634460486e-001 + .double -5.3401379032096916e-014 + .double 1.6210937500000000e+000 + .double 6.1686746987951813e-001 + .double 2.0980813140022292e-001 + .double 2.0227726258388914e-014 + .double 1.6289062500000000e+000 // row 160 + .double 6.1390887290167862e-001 + .double 2.1189608966187734e-001 + .double 3.0615637233164826e-014 + .double 1.6367187500000000e+000 + .double 6.1097852028639621e-001 + .double 2.1397405765446820e-001 + .double -2.2449001876879735e-014 + .double 1.6445312500000000e+000 + .double 6.0807600950118768e-001 + .double 2.1604213052387422e-001 + .double -5.5474936976441291e-014 + .double 1.6523437500000000e+000 + .double 6.0520094562647753e-001 + .double 2.1810040206310077e-001 + .double 9.2003918882681055e-014 + .double 1.6601562500000000e+000 + .double 6.0235294117647054e-001 + .double 2.2014896473842782e-001 + .double 3.4154972403284904e-014 + .double 1.6679687500000000e+000 + .double 5.9953161592505855e-001 + .double 2.2218790971328417e-001 + .double -1.0986783267263705e-013 + .double 1.6757812500000000e+000 + .double 5.9673659673659674e-001 + .double 2.2421732687280382e-001 + .double 7.0861752240218193e-014 + .double 1.6835937500000000e+000 + .double 5.9396751740139209e-001 + .double 2.2623730484883708e-001 + .double 4.4954903465882407e-014 + .double 1.6914062500000000e+000 + .double 5.9122401847575057e-001 + .double 2.2824793104155106e-001 + .double -3.5174398507191329e-014 + .double 1.6992187500000000e+000 + .double 5.8850574712643677e-001 + .double 2.3024929164284913e-001 + .double -6.1362153270813425e-014 + .double 1.7070312500000000e+000 + .double 5.8581235697940504e-001 + .double 2.3224147165865361e-001 + .double -8.1330061940444252e-014 + .double 1.7148437500000000e+000 + .double 5.8314350797266512e-001 + .double 2.3422455493027883e-001 + .double -7.0187452689804572e-015 + .double 1.7226562500000000e+000 + .double 5.8049886621315194e-001 + .double 2.3619862415603166e-001 + .double -4.2681425171142758e-014 + .double 1.7304687500000000e+000 + .double 5.7787810383747173e-001 + .double 2.3816376091122038e-001 + .double -3.8053714699203643e-016 + .double 1.7382812500000000e+000 + .double 5.7528089887640455e-001 + .double 2.4012004566907308e-001 + .double 8.9489021116470891e-015 + .double 1.7460937500000000e+000 + .double 5.7270693512304249e-001 + .double 2.4206755782006439e-001 + .double 2.2519806172357142e-014 + .double 1.7539062500000000e+000 // row 192 + .double 5.7015590200445432e-001 + .double 2.4400637569146966e-001 + .double 3.9563235136158044e-015 + .double 1.7617187500000000e+000 + .double 5.6762749445676275e-001 + .double 2.4593657656600953e-001 + .double 1.0143867756365332e-013 + .double 1.7695312500000000e+000 + .double 5.6512141280353201e-001 + .double 2.4785823670094942e-001 + .double 3.2893280777775821e-014 + .double 1.7773437500000000e+000 + .double 5.6263736263736264e-001 + .double 2.4977143134515245e-001 + .double 1.1039261888111886e-013 + .double 1.7851562500000000e+000 + .double 5.6017505470459517e-001 + .double 2.5167623475795153e-001 + .double 4.9127105345619272e-014 + .double 1.7929687500000000e+000 + .double 5.5773420479302838e-001 + .double 2.5357272022552024e-001 + .double -1.0856410515143064e-013 + .double 1.8007812500000000e+000 + .double 5.5531453362255967e-001 + .double 2.5546096007769847e-001 + .double 1.0010989541644841e-013 + .double 1.8085937500000000e+000 + .double 5.5291576673866094e-001 + .double 2.5734102570618234e-001 + .double -7.8761888363386944e-014 + .double 1.8164062500000000e+000 + .double 5.5053763440860215e-001 + .double 2.5921298757816658e-001 + .double -6.2215947878711509e-014 + .double 1.8242187500000000e+000 + .double 5.4817987152034264e-001 + .double 2.6107691525430710e-001 + .double -4.4494514587443047e-014 + .double 1.8320312500000000e+000 + .double 5.4584221748400852e-001 + .double 2.6293287740327287e-001 + .double -3.9165681136564615e-014 + .double 1.8398437500000000e+000 + .double 5.4352441613588109e-001 + .double 2.6478094181697998e-001 + .double 6.6636790574456454e-014 + .double 1.8476562500000000e+000 + .double 5.4122621564482032e-001 + .double 2.6662117542605301e-001 + .double -9.1008789262737551e-014 + .double 1.8554687500000000e+000 + .double 5.3894736842105262e-001 + .double 2.6845364431301277e-001 + .double 4.2357622160033502e-015 + .double 1.8632812500000000e+000 + .double 5.3668763102725370e-001 + .double 2.7027841372819239e-001 + .double 7.1968104770021779e-014 + .double 1.8710937500000000e+000 + .double 5.3444676409185798e-001 + .double 2.7209554810269765e-001 + .double 1.6009813385998178e-014 + .double 1.8789062500000000e+000 // row 224 + .double 5.3222453222453225e-001 + .double 2.7390511106204940e-001 + .double -6.7195756324252579e-014 + .double 1.8867187500000000e+000 + .double 5.3002070393374745e-001 + .double 2.7570716543959861e-001 + .double 6.3973609116559328e-014 + .double 1.8945312500000000e+000 + .double 5.2783505154639176e-001 + .double 2.7750177329039616e-001 + .double 1.7936160834199008e-014 + .double 1.9023437500000000e+000 + .double 5.2566735112936347e-001 + .double 2.7928899590278888e-001 + .double -4.1127874854455486e-015 + .double 1.9101562500000000e+000 + .double 5.2351738241308798e-001 + .double 2.8106889381183464e-001 + .double -6.3958157821339126e-014 + .double 1.9179687500000000e+000 + .double 5.2138492871690423e-001 + .double 2.8284152681112573e-001 + .double -6.8208667990807859e-015 + .double 1.9257812500000000e+000 + .double 5.1926977687626774e-001 + .double 2.8460695396529445e-001 + .double 8.6002826729130499e-014 + .double 1.9335937500000000e+000 + .double 5.1717171717171717e-001 + .double 2.8636523362160915e-001 + .double 1.1000856663210625e-013 + .double 1.9414062500000000e+000 + .double 5.1509054325955739e-001 + .double 2.8811642342157029e-001 + .double -8.7733969995519119e-014 + .double 1.9492187500000000e+000 + .double 5.1302605210420837e-001 + .double 2.8986058031159700e-001 + .double -5.6652309650791293e-014 + .double 1.9570312500000000e+000 + .double 5.1097804391217561e-001 + .double 2.9159776055530529e-001 + .double 9.0870189902074631e-014 + .double 1.9648437500000000e+000 + .double 5.0894632206759438e-001 + .double 2.9332801974396716e-001 + .double 1.1067434010922574e-013 + .double 1.9726562500000000e+000 + .double 5.0693069306930694e-001 + .double 2.9505141280674252e-001 + .double 6.9298327709917256e-014 + .double 1.9804687500000000e+000 + .double 5.0493096646942803e-001 + .double 2.9676799402159304e-001 + .double -1.0662609006456711e-013 + .double 1.9882812500000000e+000 + .double 5.0294695481335949e-001 + .double 2.9847781702483189e-001 + .double 7.7291935644916260e-014 + .double 1.9960937500000000e+000 + .double 5.0097847358121328e-001 + .double 3.0018093482294717e-001 + .double -8.3995153597100334e-014 + +// +// End of table. +// diff --git a/private/fp32/tran/alpha/logs.s b/private/fp32/tran/alpha/logs.s new file mode 100644 index 000000000..bd6c33d7a --- /dev/null +++ b/private/fp32/tran/alpha/logs.s @@ -0,0 +1,956 @@ +// TITLE("Alpha AXP Natural Logarithm") +//++ +// +// Copyright (c) 1993, 1994 Digital Equipment Corporation +// +// Module Name: +// +// log.s +// +// Abstract: +// +// This module implements a high-performance Alpha AXP specific routine +// for IEEE double format natural logarithm. +// +// Author: +// +// Martha Jaffe +// +// Environment: +// +// User mode. +// +// Revision History: +// +// Thomas Van Baak (tvb) 7-Feb-1994 +// +// Adapted for NT. +// +//-- + +#include "ksalpha.h" + +// +// Define DPML exception record for NT. +// + + .struct 0 +ErErr: .space 4 // error code +ErCxt: .space 4 // context +ErPlat: .space 4 // platform +ErEnv: .space 4 // environment +ErRet: .space 4 // return value pointer +ErName: .space 4 // function name +ErType: .space 8 // flags and fill +ErVal: .space 8 // return value +ErArg0: .space 8 // arg 0 +ErArg1: .space 8 // arg 1 +ErArg2: .space 8 // arg 2 +ErArg3: .space 8 // arg 3 +DpmlExceptionLength: + +// +// Define stack frame. +// + + .struct 0 +Temp: .space 8 // save argument +ExRec: .space DpmlExceptionLength // exception record + .space 0 // for 16-byte stack alignment +FrameLength: + +// +// Define lower and upper 32-bit parts of 64-bit double. +// + +#define LowPart 0x0 +#define HighPart 0x4 + + SBTTL("Natural Log") + +//++ +// +// double +// log ( +// IN double x +// ) +// +// Routine Description: +// +// This function returns the natural log of the given double argument. +// +// Arguments: +// +// x (f16) - Supplies the argument value. +// +// Return Value: +// +// The double log result is returned as the function value in f0. +// +//-- + + NESTED_ENTRY(log, FrameLength, ra) + + lda sp, -FrameLength(sp) // allocate stack frame + mov ra, t7 // save return address + + PROLOGUE_END + + stt f16, Temp(sp) + ldl t1, Temp + HighPart(sp) + lda t0, _log_table + ldah v0, 0x3fee(zero) + subl t1, v0, v0 + ldt f1, 0(t0) + ldah t2, 3(zero) + cmpult v0, t2, v0 + bne v0, 80f + + sra t1, 20, v0 + sra t1, 8, t2 + cpyse f1, f16, f10 + subl v0, 1, t4 + lda t5, 0x7fe(zero) + lda t3, 0xfe0(zero) + cmpult t4, t5, t4 + and t2, t3, t2 + beq t4, 10f + + lda t6, 0x3ff(zero) + subl v0, t6, t6 + br zero, 70f + +// +// abnormal x +// + +10: ldah t5, -0x8000(zero) + ldah t4, 0x7ff0(zero) + and t1, t5, t5 + and t1, t4, v0 + beq t5, 50f + + lda t6, logName + bne v0, 30f + + ldah v0, 0x800(zero) + ldt f10, Two53 + lda v0, 0x31(v0) + cpyse f10, f16, f0 + subt f0, f10, f0 + fbne f0, 20f + +// +// call exception dispatcher log(zero) +// + + stt f16, ExRec + ErArg0(sp) + stl t6, ExRec + ErName(sp) + stl v0, ExRec + ErErr(sp) + lda v0, ExRec(sp) + bsr ra, __dpml_exception + ldt f0, 0(v0) + br zero, done + +// +// call exception dispatcher log(neg) +// + +20: ldah ra, 0x800(zero) + stt f16, ExRec + ErArg0(sp) + lda t6, logName + stl t6, ExRec + ErName(sp) + lda ra, 0x30(ra) + stl ra, ExRec + ErErr(sp) + lda v0, ExRec(sp) + bsr ra, __dpml_exception + ldt f0, 0(v0) + br zero, done + +// +// check for nan +// + +30: stt f16, Temp(sp) + ldl ra, Temp(sp) + ldah v0, 0x10(zero) + ldl t2, Temp + HighPart(sp) + lda v0, -1(v0) + and t2, v0, v0 + bis v0, ra, v0 + and t2, t4, t6 + cmpult zero, v0, v0 + cmpeq t6, t4, t4 + beq t4, 40f + + and t4, v0, t4 + bne t4, retarg + +// +// call exception dispatcher log(neg) +// + +40: ldah ra, 0x800(zero) + stt f16, ExRec + ErArg0(sp) + lda t6, logName + stl t6, ExRec + ErName(sp) + lda ra, 0x30(ra) + stl ra, ExRec + ErErr(sp) + lda v0, ExRec(sp) + bsr ra, __dpml_exception + ldt f16, 0(v0) + + +retarg: cpys f16, f16, f0 + br zero, done + + +// +// check for denorm +// + +50: ldah t4, 0x7ff0(zero) + and t1, t4, t1 + bne t1, retarg + + ldah t2, 0x800(zero) + ldt f10, Two53 + lda t2, 0x31(t2) + lda ra, logName + cpyse f10, f16, f0 + lda v0, ExRec(sp) + subt f0, f10, f11 + fbne f11, 60f + +// +// call exception dispatcher log(zero) +// + + stt f16, ExRec + ErArg0(sp) + stl t2, ExRec + ErErr(sp) + stl ra, ExRec + ErName(sp) + bsr ra, __dpml_exception + ldt f0, 0(v0) + br zero, done + +// +// fix up denorms +// + +60: stt f11, Temp(sp) + cpyse f1, f11, f10 + ldl t1, Temp + HighPart(sp) + lda t2, 0x832(zero) + sra t1, 8, t5 + sra t1, 20, t1 + and t5, t3, t3 + subl t1, t2, t6 + mov t3, t2 + +// +// rejoin normal path +// + +70: addl t0, t2, t2 + ldt f1, 0x98(t0) // load away from 1 coefs + ldt f16, 0xd8(t2) // LOG_F_TABLE_TWOP + ldt f0, 0xe0(t2) + stq t6, Temp(sp) + subt f10, f16, f10 + ldt f16, Temp(sp) + ldt f12, 0x90(t0) + ldt f15, 0x88(t0) // POLY_ADDRESS_TWOP_AWAY + cvtqt f16, f16 + ldt f17, 0xa0(t0) + mult f10, f0, f0 + ldt f10, 0xa8(t0) + mult f0, f0, f11 + mult f1, f0, f1 + mult f10, f0, f10 + mult f11, f11, f13 + mult f11, f0, f14 + addt f12, f1, f1 + ldt f12, 0xd0(t0) // LOG2_LO_TWOP + mult f11, f15, f11 + addt f17, f10, f10 + ldt f15, 0xf0(t2) + ldt f17, 0xe8(t2) + mult f12, f16, f12 + mult f13, f0, f13 + mult f14, f1, f1 + ldt f14, 0xc8(t0) // LOG2_HI_TWOP + addt f12, f15, f12 + mult f13, f10, f10 + addt f11, f1, f1 + mult f16, f14, f14 + addt f12, f0, f0 + addt f1, f10, f1 + addt f14, f17, f14 + addt f0, f1, f0 + addt f0, f14, f0 + br zero, done + +// +// near one case +// + +80: subt f16, f1, f1 + ldt f10, 0x18(t0) // load near 1 poly coefs + ldt f14, 0x28(t0) + ldt f21, 0x20(t0) + ldt f16, Two29 + ldt f19, 0x38(t0) // LOG2_LO_ONEP + mult f1, f1, f15 + mult f1, f10, f10 + mult f1, f14, f14 + cpys f1, f16, f18 + ldt f16, 0x10(t0) + cpys f1, f1, f20 + mult f1, f19, f19 + mult f15, f1, f13 + mult f15, f15, f11 + addt f10, f16, f10 + addt f14, f21, f14 + ldt f16, 0x30(t0) // LOG2_HI_ONEP + ldt f21, 0x48(t0) + addt f20, f18, f20 + mult f15, f13, f17 + mult f11, f1, f12 + mult f13, f11, f0 + mult f11, f15, f15 + mult f13, f10, f10 + ldt f13, 0x58(t0) + addt f19, f16, f16 + ldt f19, 0x40(t0) // LOG_F_TABLE_ONEP + mult f1, f21, f21 + mult f17, f14, f14 + mult f12, f11, f12 + ldt f17, 0x50(t0) + mult f15, f1, f15 + mult f1, f13, f13 + subt f20, f18, f18 + mult f0, f16, f0 + addt f21, f19, f19 + ldt f21, Half + addt f10, f14, f10 + mult f15, f11, f11 + addt f13, f17, f13 + subt f1, f18, f20 + addt f1, f18, f16 + mult f12, f19, f12 + addt f10, f0, f0 + mult f18, f18, f18 + mult f11, f13, f11 + mult f16, f20, f16 + addt f0, f12, f0 + mult f18, f21, f18 + mult f16, f21, f16 + addt f0, f11, f0 + subt f1, f18, f1 + subt f0, f16, f0 + addt f0, f1, f0 + +// +// Return with result in f0. +// + +done: + lda sp, FrameLength(sp) // deallocate stack frame + ret zero, (t7) // return through saved ra in t7 + + .end log + + .align 3 + .rdata + +// +// Define floating point constants. +// + +Half: .double 0.5 + +One: .double 1.0 + +Two29: .double 536870912.0 // 2^29 + +Two53: .double 9007199254740992.0 // 2^53 + +// +// Function name for dpml_exception. +// + +logName: + .ascii "log\0" + + +// +// log data table +// + + .align 3 + +_log_table: + + // 1.0 in working precision + + .double 1.0000000000000000e+000 + + // poly coeffs near 1 + + .double -5.0000000000000000e-001 + .double 3.3333333333333581e-001 + .double -2.5000000000000555e-001 + .double 1.9999999999257809e-001 + .double -1.6666666665510016e-001 + .double 1.4285715095862653e-001 + .double -1.2500001025849336e-001 + .double 1.1110711557933650e-001 + .double -9.9995589399147614e-002 + .double 9.1816350893696136e-002 + .double -8.4241019625172817e-002 + + // poly coeffs quotient, near 1 + + .double 8.3333333333333953e-002 + .double 1.2499999999536091e-002 + .double 2.2321429837356640e-003 + .double 4.3401216971065997e-004 + .double 8.9664418510783172e-005 + + // poly coeffs away from 1 + + .double -5.0000000000000000e-001 + .double 3.3333333331462339e-001 + .double -2.4999999997583292e-001 + .double 2.0000326978572527e-001 + .double -1.6666993645814179e-001 + + // poly coeffs quotient, away from 1 + + .double 8.3333333333334911e-002 + .double 1.2499999967659360e-002 + .double 2.2323547997135616e-003 + + // log of 2 in hi and lo parts + + .double 6.9314718055989033e-001 + .double 5.4979230187083712e-014 + + // Table of F, 1/F, and hi and lo log of F; (128 * 4 entries) + + .double 1.0039062500000000e+000 + .double 9.9610894941634243e-001 + .double 3.8986404156275967e-003 + .double 2.9726346900928951e-014 + .double 1.0117187500000000e+000 + .double 9.8841698841698844e-001 + .double 1.1650617220084314e-002 + .double -1.0903974971735932e-013 + .double 1.0195312500000000e+000 + .double 9.8084291187739459e-001 + .double 1.9342962843211353e-002 + .double -8.0418538505225864e-014 + .double 1.0273437500000000e+000 + .double 9.7338403041825095e-001 + .double 2.6976587698300136e-002 + .double -9.8060505168431766e-014 + .double 1.0351562500000000e+000 + .double 9.6603773584905661e-001 + .double 3.4552381506728125e-002 + .double -6.8391397423287774e-014 + .double 1.0429687500000000e+000 + .double 9.5880149812734083e-001 + .double 4.2071213920735318e-002 + .double -4.8263140005511282e-014 + .double 1.0507812500000000e+000 + .double 9.5167286245353155e-001 + .double 4.9533935122326511e-002 + .double -4.9880309107981426e-014 + .double 1.0585937500000000e+000 + .double 9.4464944649446492e-001 + .double 5.6941376400118315e-002 + .double 2.0109399435564958e-014 + .double 1.0664062500000000e+000 + .double 9.3772893772893773e-001 + .double 6.4294350705495162e-002 + .double -9.7905185119902161e-014 + .double 1.0742187500000000e+000 + .double 9.3090909090909091e-001 + .double 7.1593653186937445e-002 + .double 7.1373082253431780e-014 + .double 1.0820312500000000e+000 + .double 9.2418772563176899e-001 + .double 7.8840061707751374e-002 + .double 2.4650189061766119e-014 + .double 1.0898437500000000e+000 + .double 9.1756272401433692e-001 + .double 8.6034337341743594e-002 + .double 5.9559229876256426e-014 + .double 1.0976562500000000e+000 + .double 9.1103202846975084e-001 + .double 9.3177224854116503e-002 + .double 6.6787085171628983e-014 + .double 1.1054687500000000e+000 + .double 9.0459363957597172e-001 + .double 1.0026945316371894e-001 + .double -4.3786376170783979e-014 + .double 1.1132812500000000e+000 + .double 8.9824561403508774e-001 + .double 1.0731173578915332e-001 + .double -6.5266788027310712e-014 + .double 1.1210937500000000e+000 + .double 8.9198606271777003e-001 + .double 1.1430477128010352e-001 + .double -4.4889533522386993e-014 + .double 1.1289062500000000e+000 + .double 8.8581314878892736e-001 + .double 1.2124924363297396e-001 + .double -1.0427241278273008e-013 + .double 1.1367187500000000e+000 + .double 8.7972508591065290e-001 + .double 1.2814582269197672e-001 + .double -4.6680314039457961e-014 + .double 1.1445312500000000e+000 + .double 8.7372013651877134e-001 + .double 1.3499516453748583e-001 + .double 1.8996158041578768e-014 + .double 1.1523437500000000e+000 + .double 8.6779661016949150e-001 + .double 1.4179791186029433e-001 + .double -3.6984595066970968e-014 + .double 1.1601562500000000e+000 + .double 8.6195286195286192e-001 + .double 1.4855469432313839e-001 + .double -1.2491548980751600e-015 + .double 1.1679687500000000e+000 + .double 8.5618729096989965e-001 + .double 1.5526612891108016e-001 + .double 4.3792508292406054e-014 + .double 1.1757812500000000e+000 + .double 8.5049833887043191e-001 + .double 1.6193282026938505e-001 + .double -7.1793900192956773e-014 + .double 1.1835937500000000e+000 + .double 8.4488448844884489e-001 + .double 1.6855536102980295e-001 + .double 3.7143977541704719e-015 + .double 1.1914062500000000e+000 + .double 8.3934426229508197e-001 + .double 1.7513433212775453e-001 + .double 9.4615165806650815e-014 + .double 1.1992187500000000e+000 + .double 8.3387622149837137e-001 + .double 1.8167030310769405e-001 + .double -5.9375063333847015e-014 + .double 1.2070312500000000e+000 + .double 8.2847896440129454e-001 + .double 1.8816383241824042e-001 + .double -5.7430783932007560e-014 + .double 1.2148437500000000e+000 + .double 8.2315112540192925e-001 + .double 1.9461546769957749e-001 + .double 9.4165381457182504e-014 + .double 1.2226562500000000e+000 + .double 8.1789137380191690e-001 + .double 2.0102574606062262e-001 + .double -3.1881849375437737e-014 + .double 1.2304687500000000e+000 + .double 8.1269841269841270e-001 + .double 2.0739519434596332e-001 + .double 1.0726867577289733e-013 + .double 1.2382812500000000e+000 + .double 8.0757097791798105e-001 + .double 2.1372432939779173e-001 + .double -7.3595801864405143e-014 + .double 1.2460937500000000e+000 + .double 8.0250783699059558e-001 + .double 2.2001365830533359e-001 + .double -5.1496672341414078e-014 + .double 1.2539062500000000e+000 + .double 7.9750778816199375e-001 + .double 2.2626367865041175e-001 + .double 4.1641267302872263e-014 + .double 1.2617187500000000e+000 + .double 7.9256965944272451e-001 + .double 2.3247487874300532e-001 + .double 8.8745072979746316e-014 + .double 1.2695312500000000e+000 + .double 7.8769230769230769e-001 + .double 2.3864773785021498e-001 + .double -3.9970509095301341e-014 + .double 1.2773437500000000e+000 + .double 7.8287461773700306e-001 + .double 2.4478272641772492e-001 + .double -3.3999811083618331e-014 + .double 1.2851562500000000e+000 + .double 7.7811550151975684e-001 + .double 2.5088030628580782e-001 + .double 1.5973663463624904e-015 + .double 1.2929687500000000e+000 + .double 7.7341389728096677e-001 + .double 2.5694093089759917e-001 + .double -9.8748030159663917e-014 + .double 1.3007812500000000e+000 + .double 7.6876876876876876e-001 + .double 2.6296504550077771e-001 + .double 1.0364636459896663e-013 + .double 1.3085937500000000e+000 + .double 7.6417910447761195e-001 + .double 2.6895308734560786e-001 + .double -1.0389630784002988e-013 + .double 1.3164062500000000e+000 + .double 7.5964391691394662e-001 + .double 2.7490548587275043e-001 + .double 4.8816703646769986e-014 + .double 1.3242187500000000e+000 + .double 7.5516224188790559e-001 + .double 2.8082266290084590e-001 + .double 4.1886091378637011e-014 + .double 1.3320312500000000e+000 + .double 7.5073313782991202e-001 + .double 2.8670503280386583e-001 + .double 8.8481096040068212e-014 + .double 1.3398437500000000e+000 + .double 7.4635568513119532e-001 + .double 2.9255300268641804e-001 + .double -4.0599978860151284e-014 + .double 1.3476562500000000e+000 + .double 7.4202898550724639e-001 + .double 2.9836697255177569e-001 + .double 2.1592693741973491e-014 + .double 1.3554687500000000e+000 + .double 7.3775216138328525e-001 + .double 3.0414733546740536e-001 + .double -1.0863828679707913e-013 + .double 1.3632812500000000e+000 + .double 7.3352435530085958e-001 + .double 3.0989447772276435e-001 + .double 1.0033796982039214e-013 + .double 1.3710937500000000e+000 + .double 7.2934472934472938e-001 + .double 3.1560877898641593e-001 + .double -1.1259274624680829e-013 + .double 1.3789062500000000e+000 + .double 7.2521246458923516e-001 + .double 3.2129061245382218e-001 + .double -8.7885427699715446e-014 + .double 1.3867187500000000e+000 + .double 7.2112676056338032e-001 + .double 3.2694034499581903e-001 + .double 3.4288400126669462e-014 + .double 1.3945312500000000e+000 + .double 7.1708683473389356e-001 + .double 3.3255833730004269e-001 + .double 3.3906861336722287e-014 + .double 1.4023437500000000e+000 + .double 7.1309192200557103e-001 + .double 3.3814494400871808e-001 + .double -1.6869501228130390e-015 + .double 1.4101562500000000e+000 + .double 7.0914127423822715e-001 + .double 3.4370051385326406e-001 + .double 5.4388883298990648e-014 + .double 1.4179687500000000e+000 + .double 7.0523415977961434e-001 + .double 3.4922538978526063e-001 + .double 2.7672711265736626e-014 + .double 1.4257812500000000e+000 + .double 7.0136986301369864e-001 + .double 3.5471990910286877e-001 + .double 6.0259386391812782e-014 + .double 1.4335937500000000e+000 + .double 6.9754768392370570e-001 + .double 3.6018440357497639e-001 + .double 3.1410128435793507e-014 + .double 1.4414062500000000e+000 + .double 6.9376693766937669e-001 + .double 3.6561919956102429e-001 + .double -5.9577094649293112e-014 + .double 1.4492187500000000e+000 + .double 6.9002695417789761e-001 + .double 3.7102461812787624e-001 + .double -3.5739377400104385e-015 + .double 1.4570312500000000e+000 + .double 6.8632707774798929e-001 + .double 3.7640097516418791e-001 + .double 6.5153983564591272e-014 + .double 1.4648437500000000e+000 + .double 6.8266666666666664e-001 + .double 3.8174858149091051e-001 + .double -6.2170323645733908e-014 + .double 1.4726562500000000e+000 + .double 6.7904509283819625e-001 + .double 3.8706774296838375e-001 + .double 6.4533411753084866e-014 + .double 1.4804687500000000e+000 + .double 6.7546174142480209e-001 + .double 3.9235876060297414e-001 + .double -1.1027121477530621e-013 + .double 1.4882812500000000e+000 + .double 6.7191601049868765e-001 + .double 3.9762193064711937e-001 + .double 1.9118699266850969e-014 + .double 1.4960937500000000e+000 + .double 6.6840731070496084e-001 + .double 4.0285754470119173e-001 + .double -1.0821299887954718e-013 + .double 1.5039062500000000e+000 + .double 6.6493506493506493e-001 + .double 4.0806588980831293e-001 + .double -9.1183133506522949e-014 + .double 1.5117187500000000e+000 + .double 6.6149870801033595e-001 + .double 4.1324724855030581e-001 + .double -8.6481461319862886e-014 + .double 1.5195312500000000e+000 + .double 6.5809768637532129e-001 + .double 4.1840189913887116e-001 + .double 1.2659153984938316e-014 + .double 1.5273437500000000e+000 + .double 6.5473145780051156e-001 + .double 4.2353011550585506e-001 + .double -5.1769120694201545e-014 + .double 1.5351562500000000e+000 + .double 6.5139949109414763e-001 + .double 4.2863216738965093e-001 + .double 4.7829207034065312e-014 + .double 1.5429687500000000e+000 + .double 6.4810126582278482e-001 + .double 4.3370832042160146e-001 + .double -4.2063037733589860e-014 + .double 1.5507812500000000e+000 + .double 6.4483627204030225e-001 + .double 4.3875883620762579e-001 + .double 2.1468971783400094e-015 + .double 1.5585937500000000e+000 + .double 6.4160401002506262e-001 + .double 4.4378397241030143e-001 + .double -4.4932834403337654e-016 + .double 1.5664062500000000e+000 + .double 6.3840399002493764e-001 + .double 4.4878398282708076e-001 + .double -7.4052432293450566e-014 + .double 1.5742187500000000e+000 + .double 6.3523573200992556e-001 + .double 4.5375911746714337e-001 + .double -2.2862495308664916e-014 + .double 1.5820312500000000e+000 + .double 6.3209876543209875e-001 + .double 4.5870962262688408e-001 + .double 9.2581114645991212e-014 + .double 1.5898437500000000e+000 + .double 6.2899262899262898e-001 + .double 4.6363574096312732e-001 + .double -9.4805444680453647e-014 + .double 1.5976562500000000e+000 + .double 6.2591687041564792e-001 + .double 4.6853771156315815e-001 + .double 8.1115771640052352e-014 + .double 1.6054687500000000e+000 + .double 6.2287104622871048e-001 + .double 4.7341577001657242e-001 + .double 9.9707744046996850e-014 + .double 1.6132812500000000e+000 + .double 6.1985472154963683e-001 + .double 4.7827014848144245e-001 + .double 2.7832864616306362e-014 + .double 1.6210937500000000e+000 + .double 6.1686746987951813e-001 + .double 4.8310107575116490e-001 + .double -2.9076236446386640e-014 + .double 1.6289062500000000e+000 + .double 6.1390887290167862e-001 + .double 4.8790877731926230e-001 + .double -2.3325742005188250e-014 + .double 1.6367187500000000e+000 + .double 6.1097852028639621e-001 + .double 4.9269347544259290e-001 + .double -1.7642921490304046e-014 + .double 1.6445312500000000e+000 + .double 6.0807600950118768e-001 + .double 4.9745538920274157e-001 + .double 7.7370898042138569e-014 + .double 1.6523437500000000e+000 + .double 6.0520094562647753e-001 + .double 5.0219473456672858e-001 + .double -1.3090194780543625e-014 + .double 1.6601562500000000e+000 + .double 6.0235294117647054e-001 + .double 5.0691172444476251e-001 + .double 9.1841537361323107e-014 + .double 1.6679687500000000e+000 + .double 5.9953161592505855e-001 + .double 5.1160656874913002e-001 + .double -6.7941049953303914e-014 + .double 1.6757812500000000e+000 + .double 5.9673659673659674e-001 + .double 5.1627947444853817e-001 + .double -8.3670880082996502e-014 + .double 1.6835937500000000e+000 + .double 5.9396751740139209e-001 + .double 5.2093064562427571e-001 + .double -9.0399770141535103e-014 + .double 1.6914062500000000e+000 + .double 5.9122401847575057e-001 + .double 5.2556028352296380e-001 + .double -3.6428968707830412e-014 + .double 1.6992187500000000e+000 + .double 5.8850574712643677e-001 + .double 5.3016858660907928e-001 + .double 4.2333597202652293e-014 + .double 1.7070312500000000e+000 + .double 5.8581235697940504e-001 + .double 5.3475575061611380e-001 + .double -8.6125310374957207e-014 + .double 1.7148437500000000e+000 + .double 5.8314350797266512e-001 + .double 5.3932196859568649e-001 + .double -7.7610404204187166e-014 + .double 1.7226562500000000e+000 + .double 5.8049886621315194e-001 + .double 5.4386743096733881e-001 + .double -5.5287539987057404e-014 + .double 1.7304687500000000e+000 + .double 5.7787810383747173e-001 + .double 5.4839232556560091e-001 + .double -2.7750502668562431e-014 + .double 1.7382812500000000e+000 + .double 5.7528089887640455e-001 + .double 5.5289683768660325e-001 + .double 7.4488995702366880e-014 + .double 1.7460937500000000e+000 + .double 5.7270693512304249e-001 + .double 5.5738115013400602e-001 + .double 3.3666963248598655e-016 + .double 1.7539062500000000e+000 + .double 5.7015590200445432e-001 + .double 5.6184544326265495e-001 + .double 3.6864628681746405e-014 + .double 1.7617187500000000e+000 + .double 5.6762749445676275e-001 + .double 5.6628989502314653e-001 + .double -3.0655228485481327e-014 + .double 1.7695312500000000e+000 + .double 5.6512141280353201e-001 + .double 5.7071468100343736e-001 + .double 3.4181893084806535e-014 + .double 1.7773437500000000e+000 + .double 5.6263736263736264e-001 + .double 5.7511997447136309e-001 + .double 2.4846950587975989e-014 + .double 1.7851562500000000e+000 + .double 5.6017505470459517e-001 + .double 5.7950594641465614e-001 + .double -1.3912911733001039e-014 + .double 1.7929687500000000e+000 + .double 5.5773420479302838e-001 + .double 5.8387276558096346e-001 + .double 1.9219300209816174e-014 + .double 1.8007812500000000e+000 + .double 5.5531453362255967e-001 + .double 5.8822059851718222e-001 + .double -9.6181860936898864e-014 + .double 1.8085937500000000e+000 + .double 5.5291576673866094e-001 + .double 5.9254960960674907e-001 + .double -7.7473812531053051e-014 + .double 1.8164062500000000e+000 + .double 5.5053763440860215e-001 + .double 5.9685996110783890e-001 + .double -4.5062309859097483e-014 + .double 1.8242187500000000e+000 + .double 5.4817987152034264e-001 + .double 6.0115181318928990e-001 + .double 4.4939791960264390e-014 + .double 1.8320312500000000e+000 + .double 5.4584221748400852e-001 + .double 6.0542532396675597e-001 + .double -3.9078848156752539e-014 + .double 1.8398437500000000e+000 + .double 5.4352441613588109e-001 + .double 6.0968064953681278e-001 + .double 4.2493638957603774e-014 + .double 1.8476562500000000e+000 + .double 5.4122621564482032e-001 + .double 6.1391794401242805e-001 + .double -5.7559595156051101e-014 + .double 1.8554687500000000e+000 + .double 5.3894736842105262e-001 + .double 6.1813735955502125e-001 + .double 5.7485347680567445e-014 + .double 1.8632812500000000e+000 + .double 5.3668763102725370e-001 + .double 6.2233904640879700e-001 + .double -1.8261498866916553e-014 + .double 1.8710937500000000e+000 + .double 5.3444676409185798e-001 + .double 6.2652315293144056e-001 + .double -8.7803627974403551e-014 + .double 1.8789062500000000e+000 + .double 5.3222453222453225e-001 + .double 6.3068982562617748e-001 + .double 2.1224639414045291e-014 + .double 1.8867187500000000e+000 + .double 5.3002070393374745e-001 + .double 6.3483920917292380e-001 + .double 8.6410153425250818e-014 + .double 1.8945312500000000e+000 + .double 5.2783505154639176e-001 + .double 6.3897144645784465e-001 + .double 7.6071821668420202e-014 + .double 1.9023437500000000e+000 + .double 5.2566735112936347e-001 + .double 6.4308667860314017e-001 + .double -1.1285622521565641e-013 + .double 1.9101562500000000e+000 + .double 5.2351738241308798e-001 + .double 6.4718504499523988e-001 + .double 6.9672514647224776e-014 + .double 1.9179687500000000e+000 + .double 5.2138492871690423e-001 + .double 6.5126668331504334e-001 + .double -8.5234246813161544e-014 + .double 1.9257812500000000e+000 + .double 5.1926977687626774e-001 + .double 6.5533172956315866e-001 + .double -3.1028217233522746e-014 + .double 1.9335937500000000e+000 + .double 5.1717171717171717e-001 + .double 6.5938031808923370e-001 + .double -1.0587069463342906e-013 + .double 1.9414062500000000e+000 + .double 5.1509054325955739e-001 + .double 6.6341258161696715e-001 + .double 9.9105859809946792e-014 + .double 1.9492187500000000e+000 + .double 5.1302605210420837e-001 + .double 6.6742865127184814e-001 + .double 1.0805094338364667e-013 + .double 1.9570312500000000e+000 + .double 5.1097804391217561e-001 + .double 6.7142865660525786e-001 + .double 4.4466890378487691e-014 + .double 1.9648437500000000e+000 + .double 5.0894632206759438e-001 + .double 6.7541272562016275e-001 + .double 1.3985026783782165e-014 + .double 1.9726562500000000e+000 + .double 5.0693069306930694e-001 + .double 6.7938098479589826e-001 + .double -1.0090714198118343e-013 + .double 1.9804687500000000e+000 + .double 5.0493096646942803e-001 + .double 6.8333355911158833e-001 + .double 3.2359204011502443e-014 + .double 1.9882812500000000e+000 + .double 5.0294695481335949e-001 + .double 6.8727057207092912e-001 + .double 3.1147551503113092e-014 + .double 1.9960937500000000e+000 + .double 5.0097847358121328e-001 + .double 6.9119214572424426e-001 + .double -1.0229682936814195e-013 + +// +// End of table. +// diff --git a/private/fp32/tran/alpha/modfs.s b/private/fp32/tran/alpha/modfs.s new file mode 100644 index 000000000..3fb7eb972 --- /dev/null +++ b/private/fp32/tran/alpha/modfs.s @@ -0,0 +1,145 @@ +// TITLE("Alpha AXP modf") +//++ +// +// Copyright (c) 1993, 1994 Digital Equipment Corporation +// +// Module Name: +// +// modf.s +// +// Abstract: +// +// This module implements a high-performance Alpha AXP specific routine +// for IEEE double format modf. +// +// Author: +// +// Bill Gray +// +// Environment: +// +// User mode. +// +// Revision History: +// +// Thomas Van Baak (tvb) 15-Apr-1994 +// +// Adapted for NT. +// +//-- + +#include "ksalpha.h" + + +// +// Define stack frame. +// + + .struct 0 +Temp: .space 8 // save argument + .space 8 // for 16-byte stack alignment +FrameLength: + +// +// Define lower and upper 32-bit parts of 64-bit double. +// + +#define LowPart 0x0 +#define HighPart 0x4 + + SBTTL("modf") + +//++ +// +// double +// modf ( +// IN double x, +// IN double *int_part +// ) +// +// Routine Description: +// +// This function returns the modf of the given double argument. +// Modf(x,*i) splits x into integral and fractional parts, +// each with the same sign as x. It stores the integral +// part at *i (i.e. *i = trunc(x)), and returns the fractional +// part (i.e. x - trunc(x)). +// +// Arguments: +// +// x (f16) - Supplies the argument value. +// *i (a1) - Supplies the int_part pointer. +// +// Return Value: +// +// The double modf result is returned as the function value in f0. +// +//-- + + NESTED_ENTRY(modf, FrameLength, ra) + + lda sp, -FrameLength(sp) // allocate stack frame + + PROLOGUE_END + + ldah t0, 0x7ff0(zero) // exp mask + ldt f0, two_to_52 // get big + ldah t1, 0x10(zero) // one in exp field + stt f16, Temp(sp) + ldl v0, Temp + HighPart(sp) + cpys f16, f0, f1 // fix sign of big + ldah t2, 0x4320(zero) // cutoff value + cpys f16, f16, f0 + and v0, t0, t0 + subl t0, t1, t0 + cmpult t0, t2, t0 + beq t0, quick_out + +// Add big, sub big to trunc to int + + addtc f16, f1, f10 + subt f10, f1, f1 + stt f1, 0(a1) + subt f0, f1, f0 + br zero, done + +quick_out: + ldah t1, 0x7ff0(zero) + and v0, t1, v0 + bne v0, exp_not_zero + stt f31, 0(a1) + br zero, ret_arg + +exp_not_zero: + stt f16, Temp(sp) + ldl t0, Temp + HighPart(sp) + stt f16, 0(a1) + ldl t2, Temp(sp) + ldah a1, 0x10(zero) + lda a1, -1(a1) + and t0, t1, v0 + and t0, a1, t0 + bis t0, t2, t0 + cmpult zero, t0, t0 + cmpeq v0, t1, v0 + beq v0, ret_zero + and v0, t0, v0 + +ret_zero: + bne v0, ret_arg + cpys f31, f31, f16 + +ret_arg: + cpys f16, f16, f0 + +done: + lda sp, FrameLength(sp) // deallocate stack frame + ret zero, (ra) + + .end modf + + .align 3 + .rdata + +two_to_52: + .quad 0x4330000000000000 // 2^52 (4503599627370496.0) diff --git a/private/fp32/tran/alpha/pows.s b/private/fp32/tran/alpha/pows.s new file mode 100644 index 000000000..cddc7cf00 --- /dev/null +++ b/private/fp32/tran/alpha/pows.s @@ -0,0 +1,1743 @@ +// TITLE("Alpha AXP Power function") +//++ +// +// Copyright (c) 1993, 1994 Digital Equipment Corporation +// +// Module Name: +// +// pow.s +// +// Abstract: +// +// This module implements a high-performance Alpha AXP specific routine +// for IEEE double format power. +// +// Author: +// +// Martha Jaffe +// +// Environment: +// +// User mode. +// +// Revision History: +// +// Thomas Van Baak (tvb) 17-Feb-1994 +// +// Adapted for NT. +// +//-- + +#include "ksalpha.h" + +// +// Define DPML exception record for NT. +// + + .struct 0 +ErErr: .space 4 // error code +ErCxt: .space 4 // context +ErPlat: .space 4 // platform +ErEnv: .space 4 // environment +ErRet: .space 4 // return value pointer +ErName: .space 4 // function name +ErType: .space 8 // flags and fill +ErVal: .space 8 // return value +ErArg0: .space 8 // arg 0 +ErArg1: .space 8 // arg 1 +ErArg2: .space 8 // arg 2 +ErArg3: .space 8 // arg 3 +DpmlExceptionLength: + +// +// Define stack frame. +// + + .struct 0 +Temp0: .space 8 // +Temp1: .space 8 // +ExRec: .space DpmlExceptionLength // exception record + .space 8 // for 16-byte stack alignment +FrameLength: + +// +// Define lower and upper 32-bit parts of 64-bit double. +// + +#define LowPart 0x0 +#define HighPart 0x4 + +// +// Constants for masks and for fetching coefficients +// + +#define P0 0x40 +#define P1 0x48 +#define P2 0x50 +#define P3 0x58 +#define P4 0x60 +#define P5 0x68 +#define P6 0x70 +#define P7 0x78 +#define P8 0x80 +#define P9 0x88 +#define P10 0x90 +#define P11 0x98 +#define P12 0xa0 + +// +// Error codes +// + +#define POS_OVERFLOW 0x3d +#define NEG_OVERFLOW 0x3e +#define UNDERFLOW 0x3f +#define NEG_BASE 0x40 +#define ZERO_TO_NEG 0x41 + +#define ONE_TO_INF 0x43 +#define NEG_ZERO_TO_NEG 0x44 + +#define POS_INF_TO_POS 0x46 +#define NEG_INF_TO_POS 0x47 +#define NEG_INF_TO_POS_ODD 0x48 +#define FINITE_TO_INF 0x49 +#define INF_TO_NEG 0x4a +#define SMALL_TO_INF 0x4b + + + SBTTL("Power") + +//++ +// +// double +// pow ( +// IN double x +// IN double y +// ) +// +// Routine Description: +// +// This function returns the value of x to the power y. +// +// Arguments: +// +// x (f16) - Supplies the base argument value. +// +// y (f17) - Supplies the exponent argument value. +// +// Return Value: +// +// The double result is returned as the function value in f0. +// +//-- + + NESTED_ENTRY(pow, FrameLength, ra) + + lda sp, -FrameLength(sp) // allocate stack frame + mov ra, a1 // save return address + + PROLOGUE_END + + cpys f16, f16, f1 + ldah t0, 0x10(zero) + cpys f17, f17, f10 + + stt f16, Temp0(sp) + ldl v0, Temp0 + HighPart(sp) + ldah t1, 0x7fe0(zero) + mov zero, t2 + subl v0, t0, t0 + cmpult t0, t1, t0 + ldah t1, 0x7ff0(zero) + bne t0, x_is_ok + + stt f16, Temp0(sp) + ldl t0, Temp0 + HighPart(sp) + zapnot t0, 0xf, t3 + and t0, t1, t4 + srl t3, 31, t3 + and t3, 1, t3 + beq t4, LL00a0 + + cmpult t4, t1, t4 + beq t4, LL0068 + + addl t3, 4, t5 + br zero, examine_y + +// +// Look at low part of x and then look at y +// + +LL0068: ldah t6, 0x10(zero) + ldl t4, Temp0(sp) + lda t6, -1(t6) + and t0, t6, t6 + stl t6, Temp0 + HighPart(sp) + bis t6, t4, t4 + srl t6, 19, t6 + beq t4, LL0098 + + and t6, 1, t6 + mov t6, t5 + br zero, examine_y + +LL0098: addl t3, 2, t5 + br zero, examine_y + +LL00a0: ldl t7, Temp0(sp) + ldah t4, 0x10(zero) + lda t4, -1(t4) + and t0, t4, t0 + bis t0, t7, t7 + stl t0, Temp0 + HighPart(sp) + mov 6, t6 + cmoveq t7, 8, t6 + addl t3, t6, t5 + +// +// Examine y for exceptional cases +// + +examine_y: + stt f17, Temp0(sp) + ldl t0, Temp0 + HighPart(sp) + zapnot t0, 0xf, t4 + and t0, t1, t3 + srl t4, 31, t4 + and t4, 1, t4 + beq t3, LL0128 + + cmpult t3, t1, t3 + beq t3, LL00f0 + + addl t4, 4, t6 + br zero, LL014c + + +LL00f0: ldah t3, 0x10(zero) + ldl t7, Temp0(sp) + lda t3, -1(t3) + and t0, t3, t3 + stl t3, Temp0 + HighPart(sp) + bis t3, t7, t7 + srl t3, 19, t3 + beq t7, LL0120 + + and t3, 1, t3 + mov t3, t6 + br zero, LL014c + +LL0120: addl t4, 2, t6 + br zero, LL014c + +LL0128: ldl a0, Temp0(sp) + ldah t7, 0x10(zero) + lda t7, -1(t7) + and t0, t7, t0 + bis t0, a0, a0 + stl t0, Temp0 + HighPart(sp) + mov 6, t3 + cmoveq a0, 8, t3 + addl t4, t3, t6 + +// +// Remove easy exceptional cases - y = 0, x or y NaN, inf to inf +// + +LL014c: xor t6, 8, a0 + xor t6, 9, t0 + beq a0, return_1 + + beq t0, return_1 + + xor t5, 1, t7 + beq t5, return_x + + xor t6, 1, t3 + beq t7, return_x + + xor t5, 2, t4 + beq t6, inf_to_inf + + mov POS_INF_TO_POS, t0 + beq t3, inf_to_inf + + bne t4, LL01d0 + + xor t6, 2, a0 + bne a0, LL0190 + + br zero, except_disp + +// +// Continue removing exceptional cases +// + +LL0190: xor t6, 3, t7 + bne t7, LL01a0 + + mov INF_TO_NEG, t0 + br zero, except_disp + +LL01a0: xor t6, 4, t3 + beq t3, LL01c8 + + xor t6, 6, t4 + beq t4, LL01c8 + + xor t6, 5, a0 + beq a0, LL01c0 + + xor t6, 7, t7 + bne t7, LL01d0 + + +LL01c0: mov INF_TO_NEG, t0 + br zero, except_disp + + +LL01c8: mov POS_INF_TO_POS, t0 + br zero, except_disp + +LL01d0: xor t5, 3, t3 + bne t3, LL02a8 + + xor t6, 2, t4 + bne t4, LL01e8 + + mov NEG_INF_TO_POS, t0 + br zero, except_disp + + +LL01e8: xor t6, 3, a0 + bne a0, LL01f8 + + mov INF_TO_NEG, t0 + br zero, except_disp + +LL01f8: xor t6, 5, t7 + xor t6, 7, v0 + beq t7, LL02a0 + + mov NEG_INF_TO_POS, t0 + beq v0, LL02a0 + + xor t6, 6, t6 + bne t6, LL0218 + + br zero, except_disp + +// +// x is negative, check if y is integral +// + +LL0218: cpys f31, f17, f0 + mov zero, t2 + ldt f11, Two52 + cmptle f0, f11, f12 + fbeq f12, LL0240 + addt f0, f11, f13 + subt f13, f11, f11 + cmpteq f0, f11, f11 + fbeq f11, LL0244 + +LL0240: mov 1, t2 + +// +// Remove non-integral y, then look for y even +// + +LL0244: beq t2, LL0298 + + mov zero, t3 + ldt f12, Two53 + mov NEG_INF_TO_POS_ODD, t4 + cmoveq t3, NEG_INF_TO_POS, t4 + cmptle f12, f0, f13 + mov t4, t0 + fbeq f13, LL0270 + br zero, except_disp + +LL0270: addt f0, f12, f11 + mov zero, t3 + subt f11, f12, f11 + cmpteq f11, f0, f0 + fbne f0, LL0288 + mov 1, t3 + +// +// Remove minus infinity to non-integral powers, error cases +// + +LL0288: mov NEG_INF_TO_POS_ODD, t4 + cmoveq t3, NEG_INF_TO_POS, t4 + mov t4, t0 + br zero, except_disp + +LL0298: mov NEG_INF_TO_POS, t0 + br zero, except_disp + +LL02a0: mov INF_TO_NEG, t0 + br zero, except_disp + +LL02a8: xor t5, 8, a0 + bne a0, LL02f0 + + xor t6, 2, t7 + bne t7, LL02c0 + + cpys f31, f31, f0 + br zero, done + +// +// Remove zero to negative, error cases +// + +LL02c0: xor t6, 3, t3 + bne t3, LL02d0 + + mov ZERO_TO_NEG, t0 + br zero, except_disp + +LL02d0: xor t6, 4, t4 + xor t6, 6, t6 + beq t4, LL02e8 + + mov ZERO_TO_NEG, t0 + beq t6, LL02e8 + + br zero, except_disp + +// +// Zero to positive powers is ok +// + +LL02e8: cpys f31, f31, f0 + br zero, done + +LL02f0: xor t5, 9, a0 + bne a0, LL03e8 + + xor t6, 2, t7 + bne t7, LL0308 + + cpys f31, f31, f0 + br zero, done + +// +// Zero to negative powers is error +// + +LL0308: xor t6, 3, t3 + bne t3, LL0318 + + mov ZERO_TO_NEG, t0 + br zero, except_disp + +LL0318: xor t6, 6, t4 + bne t4, LL0328 + + cpys f31, f31, f0 + br zero, done + + +LL0328: xor t6, 7, t5 + bne t5, LL0338 + + mov ZERO_TO_NEG, t0 + br zero, except_disp + +// +// Check if y is integral +// + +LL0338: cpys f31, f17, f13 + mov zero, t5 + ldt f12, Two52 + cmptle f13, f12, f11 + fbeq f11, LL0360 + addt f13, f12, f0 + subt f0, f12, f0 + cmpteq f13, f0, f0 + fbeq f0, LL0364 + +LL0360: mov 1, t5 + +// +// Remove non-integral y cases and check for y even +// + +LL0364: cmpult zero, t5, a0 + beq a0, LL03a0 + + mov zero, t6 + ldt f11, Two53 + cmptle f11, f13, f12 + fbeq f12, LL0388 + br zero, LL03a0 + + +LL0388: addt f13, f11, f0 + mov zero, t6 + subt f0, f11, f0 + cmpteq f0, f13, f0 + fbne f0, LL03a0 + mov 1, t6 + +// +// x is (negative) zero, look at y (if y > 0, ok; if y < 0, error case) +// + +LL03a0: cmptlt f31, f17, f12 + fbeq f12, LL03c8 + beq a0, LL03c0 + + cpys f16, f16, f0 + bne t6, done + + cpys f31, f31, f0 + br zero, done + +LL03c0: cpys f31, f31, f0 + br zero, done + + +LL03c8: fbge f17, LL03e8 + beq a0, LL03e0 + + mov NEG_ZERO_TO_NEG, t7 + cmoveq t6, ZERO_TO_NEG, t7 + mov t7, t0 + br zero, except_disp + +LL03e0: mov ZERO_TO_NEG, t0 + br zero, except_disp + +// +// y is infinite +// + +LL03e8: xor t5, 6, t3 + bne t3, LL0410 + + xor t6, 2, t4 + bne t4, LL0400 + + mov SMALL_TO_INF, t0 + br zero, except_disp + + +LL0400: xor t6, 3, t6 + bne t6, LL04b0 + + mov FINITE_TO_INF, t0 + br zero, except_disp + + +LL0410: xor t5, 7, t5 + bne t5, LL04f8 + + xor t6, 2, a0 + bne a0, LL0428 + + mov SMALL_TO_INF, t0 + br zero, except_disp + + +LL0428: xor t6, 3, t7 + bne t7, LL0438 + + mov FINITE_TO_INF, t0 + br zero, except_disp + +// +// Check if y is integral +// + +LL0438: xor t6, 6, t3 + beq t3, LL04f0 + + xor t6, 7, t6 + beq t6, LL04f0 + + cpys f31, f17, f11 + mov zero, t4 + ldt f13, Two52 + cmptle f11, f13, f12 + fbeq f12, LL0470 + addt f11, f13, f0 + subt f0, f13, f0 + cmpteq f11, f0, f0 + fbeq f0, LL0474 + +LL0470: mov 1, t4 + +// +// Remove non-integral cases, look for y even +// + +LL0474: beq t4, LL04e8 + + mov zero, v0 + ldt f12, Two53 + cmptle f12, f11, f13 + fbeq f13, LL0490 + br zero, LL04a8 + +LL0490: addt f11, f12, f0 + mov zero, v0 + subt f0, f12, f0 + cmpteq f0, f11, f0 + fbne f0, LL04a8 + mov 1, v0 + +LL04a8: cpys f31, f16, f16 + cmpult zero, v0, t2 + +// +// Handle denorms +// + +LL04b0: + ldah t0, 0x3ff0(zero) + ldt f13, Two52 + ldah t3, 0x4320(zero) + cpyse f13, f16, f12 + subt f12, f13, f16 + stt f16, Temp0(sp) + ldl a0, Temp0 + HighPart(sp) + and a0, t1, t7 + subl t7, t0, t0 + subl a0, t0, a0 + subl t0, t3, t0 + mov t0, t4 + br zero, Compute + +// +// Negative x to finite non-integral power is error +// + +LL04e8: mov NEG_BASE, t0 + br zero, except_disp + +LL04f0: mov NEG_BASE, t0 + br zero, except_disp + +// +// Check for integral y +// + +LL04f8: xor t6, 2, t5 + xor t6, 3, t7 + beq t5, LL05b0 + + xor t6, 6, t3 + beq t7, LL05b0 + + beq t3, LL05a8 + + xor t6, 7, t6 + beq t6, LL05a8 + + cpys f31, f17, f11 + mov zero, t4 + ldt f0, Two52 + cmptle f11, f0, f12 + fbeq f12, LL0540 + addt f11, f0, f13 + subt f13, f0, f0 + cmpteq f11, f0, f0 + fbeq f0, LL0544 + +// +// Sort out non-integral y +// + +LL0540: mov 1, t4 + +// +// y is integral - is it even? +// + +LL0544: beq t4, non_int_y + + ornot zero, zero, t5 + ldt f12, Two53 + srl t5, 33, t5 + mov zero, t0 + cmpult zero, t0, t2 + cmptle f12, f11, f13 + fbeq f13, LL0570 + and v0, t5, v0 + br zero, x_is_ok + +LL0570: addt f11, f12, f0 + mov zero, t0 + subt f0, f12, f0 + cmpteq f0, f11, f0 + fbne f0, LL0588 + mov 1, t0 + +LL0588: ornot zero, zero, t5 + srl t5, 33, t5 + cmpult zero, t0, t2 + and v0, t5, v0 + br zero, x_is_ok + +// +// Non-integral y and negative x is error case +// + +non_int_y: + mov NEG_BASE, t0 + br zero, except_disp + +LL05a8: mov NEG_BASE, t0 + br zero, except_disp + +// +// We will be able to compute power +// + +LL05b0: ornot zero, zero, t7 + srl t7, 33, t7 + and v0, t7, v0 + +// +// Get x's exponent from the hi word of x +// + +x_is_ok: + and v0, t1, t3 + ldah t6, 0x3ff0(zero) + subl t3, t6, t3 + mov t3, t4 + subl v0, t4, a0 + +// +// Computation of log2(x), product y*log2(x), and 2^prod +// + +Compute: + ldah t0, 32(zero) + ldah t5, 0x10(zero) + lda t5, 0x1000(t5) + lda t0, -0x2000(t0) + addl a0, t5, t5 + and t5, t0, t0 + sra t0, 8, t0 + lda t6, __pow_t_table + addl t6, t0, t0 + stt f16, Temp0(sp) + stl a0, Temp0 + HighPart(sp) + ldt f13, P9(t0) + ldt f12, Temp0(sp) + sra t4, 0xf, t4 + ldt f16, One + ldah a0, 1(zero) + ldt f18, P2(t6) + lda a0, -P0(a0) + addt f12, f13, f11 + ldt f20, P1(t6) + subt f12, f13, f12 + lda t7, 0xf60(zero) + lda v0, -0x20(zero) + divt f16, f11, f11 + cvtts f12, f19 + mult f12, f11, f0 + addt f0, f0, f0 + cvtts f0, f14 + mult f0, f0, f15 + mult f13, f14, f13 + mult f18, f15, f18 + subt f12, f13, f13 + addt f18, f20, f18 + ldt f20, P0(t6) + subt f12, f19, f12 + mult f19, f14, f19 + stq t4, Temp0(sp) + ldt f21, P11(t0) + addt f13, f13, f13 + mult f18, f15, f18 + mult f12, f14, f12 + subt f13, f19, f13 + addt f18, f20, f18 + ldt f20, Temp0(sp) + ldt f19, 8(t6) + stt f17, Temp0(sp) + cvtqt f20, f20 + ldl t3, Temp0 + HighPart(sp) + mult f14, f19, f19 + subt f13, f12, f12 + mult f18, f15, f15 + ldt f18, 0(t6) + ldt f13, P12(t0) + and t3, t1, t3 + sra t3, 0xf, t3 + addt f20, f21, f20 + cmple t3, a0, a0 + mult f12, f11, f11 + mult f15, f0, f0 + ldt f15, 0x10(t6) + ldah t0, 1(zero) + lda t0, -0xd80(t0) + addt f20, f19, f21 + mult f14, f15, f14 + mult f11, f18, f11 + addt f13, f0, f0 + subt f21, f20, f20 + cvtts f21, f12 + addt f0, f11, f0 + subt f19, f20, f19 + subt f21, f12, f21 + addt f0, f14, f0 + addt f0, f19, f0 + addt f0, f21, f21 + beq a0, bad_y + + stt f12, Temp0(sp) + ldl t4, Temp0 + HighPart(sp) + and t4, t1, t4 + sra t4, 0xf, t4 + addl t4, t3, t3 + subl t3, t0, t5 + beq t4, x_was_1 + + cmpule t5, t7, t5 + beq t5, problem_w_product + + cvtts f17, f18 + mult f21, f17, f11 + mult f18, f12, f13 + subt f17, f18, f18 + cvttq f13, f15 + mult f18, f12, f18 + stt f15, Temp1(sp) + ldq a0, Temp1(sp) + addt f11, f18, f11 + addl zero, a0, a0 + stq a0, Temp1(sp) + and a0, 31, t4 + ldt f20, Temp1(sp) + addl t4, t4, t4 + ldt f19, P8(t6) + s8addl t4, zero, t4 + ldt f0, P7(t6) + addl t6, t4, t4 + cvtqt f20, f20 + ldt f15, P6(t6) + ldt f14, P5(t6) + and a0, v0, v0 + ldt f18, P4(t6) + subt f13, f20, f13 + ldt f20, P3(t6) + ldah t6, 1(zero) + lda t6, -P4(t6) + addt f13, f11, f11 + ldt f13, 0x10a8(t4) + mult f19, f11, f19 + addt f19, f0, f0 + ldt f19, 0x10b0(t4) + mult f0, f11, f0 + addt f0, f15, f0 + mult f0, f11, f0 + addt f0, f14, f0 + mult f0, f11, f0 + addt f0, f18, f0 + mult f0, f11, f0 + addt f0, f20, f0 + mult f0, f11, f0 + mult f0, f13, f0 + addt f0, f19, f0 + addt f13, f0, f0 + stt f0, Temp1(sp) + ldl t5, Temp1 + HighPart(sp) + and t5, t1, t5 + sra t5, 0xf, t5 + addl v0, t5, t5 + subl t5, 32, t5 + cmpule t5, t6, t5 + beq t5, problem_w_result + + stt f0, Temp1(sp) + ldl t7, Temp1 + HighPart(sp) + sll v0, 0xf, a0 + addl t7, a0, t7 + stl t7, Temp1 + HighPart(sp) + ldt f0, Temp1(sp) + beq t2, done + + cpysn f0, f0, f0 + br zero, done + +// +// Final result will underflow or overflow +// + +problem_w_result: + blt v0, underflows + + mov NEG_OVERFLOW, t4 + cmoveq t2, POS_OVERFLOW, t4 + mov t4, t0 + br zero, except_disp + +// +// Product y * log2(x) will underflow or overflow +// + +problem_w_product: + cmplt t3, t0, t0 + beq t0, might_overflow + + ldt f15, MinusOne + bne t2, correct_underflow + + cpys f16, f16, f15 + +// +// Product y * log2(x) underflows - return + or - 1 +// + +correct_underflow: + cpys f15, f15, f0 + br zero, done + +// +// Result might overflow or underflow +// + +might_overflow: + cmptlt f31, f12, f14 + fbeq f14, check_further + cmptlt f31, f17, f18 + fbne f18, overflows + +// +// Check further +// + +check_further: + fbge f12, underflows + fbge f17, underflows + +// +// Definite overflow +// + +overflows: + mov NEG_OVERFLOW, t4 + cmoveq t2, POS_OVERFLOW, t4 + mov t4, t0 + br zero, except_disp + +// +// Definite underflow +// + +underflows: + mov UNDERFLOW, t0 + br zero, except_disp + +// +// x was 1, return + or -1 +// + +x_was_1: + ldt f20, MinusOne + bne t2, correct_sgn_1 + + cpys f16, f16, f20 + +// +// returning -1 +// + +correct_sgn_1: + cpys f20, f20, f0 + br zero, done + +// +// y was NaN or inf +// + +bad_y: stt f17, Temp1(sp) + ldl t6, Temp1 + HighPart(sp) + zapnot t6, 0xf, t7 + and t6, t1, t5 + srl t7, 31, t7 + cmpult t5, t1, t1 + and t7, 1, t7 + beq t5, LL0908 + + beq t1, LL08d0 + + addl t7, 4, v0 + br zero, LL092c + +// +// Look at lo part of y to distinguish NaN from infinity +// + +LL08d0: ldah t4, 0x10(zero) + ldl t3, Temp1(sp) + lda t4, -1(t4) + and t6, t4, t4 + bis t4, t3, t3 + stl t4, Temp1 + HighPart(sp) + beq t3, LL0900 + + srl t4, 19, t4 + and t4, 1, t4 + mov t4, v0 + br zero, LL092c + +LL0900: addl t7, 2, v0 + br zero, LL092c + +LL0908: ldl t2, Temp1(sp) + ldah a0, 0x10(zero) + lda a0, -1(a0) + and t6, a0, t6 + bis t6, t2, t2 + stl t6, Temp1 + HighPart(sp) + mov 6, t5 + cmoveq t2, 8, t5 + addl t7, t5, v0 + +LL092c: xor v0, 1, t0 + beq v0, y_was_NaN + + beq t0, y_was_NaN + + addt f12, f21, f12 + mov ONE_TO_INF, t0 + fbne f12, y_is_inf + br zero, except_disp + +// +// y is infinite - look at sign of y and at log2(x) to distinguish cases +// + +y_is_inf: + xor v0, 2, v0 + bne v0, LL0970 + + mov FINITE_TO_INF, t3 + mov SMALL_TO_INF, t4 + fblt f12, y_inf_x_small + mov t3, t0 + br zero, except_disp + +// +// x was small +// + +y_inf_x_small: + mov t4, t0 + br zero, except_disp + +LL0970: cmptlt f31, f12, f12 + mov FINITE_TO_INF, a0 + mov SMALL_TO_INF, t2 + mov a0, t0 + fbne f12, LL0988 + br zero, except_disp + +LL0988: mov t2, t0 + +// +// The exception dispatch and return +// + +except_disp: + lda t1, powName + stl t1, ExRec + ErName(sp) + ldah v0, 0x800(zero) + stt f1, ExRec + ErArg0(sp) + bis t0, v0, v0 + stt f10, ExRec + ErArg1(sp) + + stl v0, ExRec + ErErr(sp) + lda v0, ExRec(sp) + bsr ra, __dpml_exception + ldt f0, 0(v0) + br zero, done + +// +// Return y +// + +y_was_NaN: + cpys f17, f17, f0 + br zero, done + +// +// Return y +// + +inf_to_inf: + cpys f17, f17, f0 + br zero, done + +// +// Return x - was NaN +// + +return_x: + cpys f16, f16, f0 + br zero, done + +// +// Return 1 - x^0 is 1 +// + +return_1: + ldt f0, One + +// +// Return with result in f0. +// + +done: + lda sp, FrameLength(sp) // deallocate stack frame + ret zero, (a1) // return through saved ra in a1 + + .end pow + + .rdata + .align 3 + +// +// Define floating point constants. +// + +MinusOne: + .double -1.0 + +One: .double 1.0 + +Two: .double 2.0 + +Two52: .quad 0x4330000000000000 // 2^52 (4503599627370496) + +Two53: .quad 0x4340000000000000 // 2^53 (9007199254740992) + +// +// Function name for dpml_exception. +// + +powName: + .ascii "pow\0" + +// +// Power lookup table. +// + + .align 3 +__pow_t_table: + + .double 4.6166241308446828e+001 + .double 4.6166241645812988e+001 + .double -3.3736615924573241e-007 + .double 3.8471867757009264e+000 + .double 5.7707958264828652e-001 + .double 2.1660849395913177e-002 + .double 2.3459849132431777e-004 + .double 1.6938410381884394e-006 + .double 3.8471867757039022e+000 + .double 5.7707801635298150e-001 + .double 1.0305010261356369e-001 + .double 2.1660849392498290e-002 + .double 2.3459619820224674e-004 + .double 1.6938509724129059e-006 + .double 9.1725627021532984e-009 + .double 3.9737294056403906e-011 + .double 1.4345602999278721e-013 + .double 1.0000000000000000e+000 + .double 0.0000000000000000e+000 + .double 0.0000000000000000e+000 + .double 0.0000000000000000e+000 + .double 1.0078125000000000e+000 + .double 3.5927217354413182e-001 + .double 3.5927217453718185e-001 + .double -9.9305000343586817e-010 + .double 1.0156250000000000e+000 + .double 7.1577001691054432e-001 + .double 7.1577002108097076e-001 + .double -4.1704264996119296e-009 + .double 1.0234375000000000e+000 + .double 1.0695360491984089e+000 + .double 1.0695360600948334e+000 + .double -1.0896424445460407e-008 + .double 1.0312500000000000e+000 + .double 1.4206118194705100e+000 + .double 1.4206118285655975e+000 + .double -9.0950875292804230e-009 + .double 1.0390625000000000e+000 + .double 1.7690379360380672e+000 + .double 1.7690379321575165e+000 + .double 3.8805507600434525e-009 + .double 1.0468750000000000e+000 + .double 2.1148540946487180e+000 + .double 2.1148540973663330e+000 + .double -2.7176151540756356e-009 + .double 1.0546875000000000e+000 + .double 2.4580991056265886e+000 + .double 2.4580991268157959e+000 + .double -2.1189207347028338e-008 + .double 1.0625000000000000e+000 + .double 2.7988109200108608e+000 + .double 2.7988108992576599e+000 + .double 2.0753201152020737e-008 + .double 1.0703125000000000e+000 + .double 3.1370266547368550e+000 + .double 3.1370266675949097e+000 + .double -1.2858054762507897e-008 + .double 1.0781250000000000e+000 + .double 3.4727826169014095e+000 + .double 3.4727826118469238e+000 + .double 5.0544858918073901e-009 + .double 1.0859375000000000e+000 + .double 3.8061143271522377e+000 + .double 3.8061143159866333e+000 + .double 1.1165604490890214e-008 + .double 1.0937500000000000e+000 + .double 4.1370565422389269e+000 + .double 4.1370565891265869e+000 + .double -4.6887660344069263e-008 + .double 1.1015625000000000e+000 + .double 4.4656432767613934e+000 + .double 4.4656432867050171e+000 + .double -9.9436233738581598e-009 + .double 1.1093750000000000e+000 + .double 4.7919078241498259e+000 + .double 4.7919077873229980e+000 + .double 3.6826827918140897e-008 + .double 1.1171875000000000e+000 + .double 5.1158827769084612e+000 + .double 5.1158827543258667e+000 + .double 2.2582594631858872e-008 + .double 1.1250000000000000e+000 + .double 5.4376000461539959e+000 + .double 5.4376000165939331e+000 + .double 2.9560062507570542e-008 + .double 1.1328125000000000e+000 + .double 5.7570908804779029e+000 + .double 5.7570909261703491e+000 + .double -4.5692446126210781e-008 + .double 1.1406250000000000e+000 + .double 6.0743858841605514e+000 + .double 6.0743858814239502e+000 + .double 2.7366011603360492e-009 + .double 1.1484375000000000e+000 + .double 6.3895150347636607e+000 + .double 6.3895150423049927e+000 + .double -7.5413319929755668e-009 + .double 1.1562500000000000e+000 + .double 6.7025077001263931e+000 + .double 6.7025077342987061e+000 + .double -3.4172313035237769e-008 + .double 1.1640625000000000e+000 + .double 7.0133926547891701e+000 + .double 7.0133926868438721e+000 + .double -3.2054701778734244e-008 + .double 1.1718750000000000e+000 + .double 7.3221980958681883e+000 + .double 7.3221981525421143e+000 + .double -5.6673926187592411e-008 + .double 1.1796875000000000e+000 + .double 7.6289516584025252e+000 + .double 7.6289516687393188e+000 + .double -1.0336793838283408e-008 + .double 1.1875000000000000e+000 + .double 7.9336804301947357e+000 + .double 7.9336804151535034e+000 + .double 1.5041232383423873e-008 + .double 1.1953125000000000e+000 + .double 8.2364109661648559e+000 + .double 8.2364108562469482e+000 + .double 1.0991790843498190e-007 + .double 1.2031250000000000e+000 + .double 8.5371693022368440e+000 + .double 8.5371692180633545e+000 + .double 8.4173489144335138e-008 + .double 1.2109375000000000e+000 + .double 8.8359809687756012e+000 + .double 8.8359808921813965e+000 + .double 7.6594205329271174e-008 + .double 1.2187500000000000e+000 + .double 9.1328710035919478e+000 + .double 9.1328709125518799e+000 + .double 9.1040067056405146e-008 + .double 1.2265625000000000e+000 + .double 9.4278639645320634e+000 + .double 9.4278640747070312e+000 + .double -1.1017496704487870e-007 + .double 1.2343750000000000e+000 + .double 9.7209839416672938e+000 + .double 9.7209839820861816e+000 + .double -4.0418888058951836e-008 + .double 1.2421875000000000e+000 + .double 1.0012254569099371e+001 + .double 1.0012254476547241e+001 + .double 9.2552130332567041e-008 + .double 1.2500000000000000e+000 + .double 1.0301699036395595e+001 + .double 1.0301698923110962e+001 + .double 1.1328463321778773e-007 + .double 1.2578125000000000e+000 + .double 1.0589340099667742e+001 + .double 1.0589340209960937e+001 + .double -1.1029319414844424e-007 + .double 1.2656250000000000e+000 + .double 1.0875200092307992e+001 + .double 1.0875200033187866e+001 + .double 5.9120125015141084e-008 + .double 1.2734375000000000e+000 + .double 1.1159300935394482e+001 + .double 1.1159301042556763e+001 + .double -1.0716228101327151e-007 + .double 1.2812500000000000e+000 + .double 1.1441664147778678e+001 + .double 1.1441664218902588e+001 + .double -7.1123909709313923e-008 + .double 1.2890625000000000e+000 + .double 1.1722310855866105e+001 + .double 1.1722310781478882e+001 + .double 7.4387223300811990e-008 + .double 1.2968750000000000e+000 + .double 1.2001261803101592e+001 + .double 1.2001261711120605e+001 + .double 9.1980986734758079e-008 + .double 1.3046875000000000e+000 + .double 1.2278537359169672e+001 + .double 1.2278537273406982e+001 + .double 8.5762688761207246e-008 + .double 1.3125000000000000e+000 + .double 1.2554157528920330e+001 + .double 1.2554157495498657e+001 + .double 3.3421672018100162e-008 + .double 1.3203125000000000e+000 + .double 1.2828141961029898e+001 + .double 1.2828141927719116e+001 + .double 3.3310782054458511e-008 + .double 1.3281250000000000e+000 + .double 1.3100509956406457e+001 + .double 1.3100509881973267e+001 + .double 7.4433189594417838e-008 + .double 1.3359375000000000e+000 + .double 1.3371280476348732e+001 + .double 1.3371280431747437e+001 + .double 4.4601294890994412e-008 + .double 1.3437500000000000e+000 + .double 1.3640472150467135e+001 + .double 1.3640472173690796e+001 + .double -2.3223661854144968e-008 + .double 1.3515625000000000e+000 + .double 1.3908103284375189e+001 + .double 1.3908103227615356e+001 + .double 5.6759831801771949e-008 + .double 1.3593750000000000e+000 + .double 1.4174191867159305e+001 + .double 1.4174191951751709e+001 + .double -8.4592403314822612e-008 + .double 1.3671875000000000e+000 + .double 1.4438755578634522e+001 + .double 1.4438755512237549e+001 + .double 6.6396972873718455e-008 + .double 1.3750000000000000e+000 + .double 1.4701811796393512e+001 + .double 1.4701811790466309e+001 + .double 5.9272036046296177e-009 + .double 1.3828125000000000e+000 + .double 1.4963377602655918e+001 + .double 1.4963377714157104e+001 + .double -1.1150118640383083e-007 + .double 1.3906250000000000e+000 + .double 1.5223469790924728e+001 + .double 1.5223469734191895e+001 + .double 5.6732833627606963e-008 + .double 1.3984375000000000e+000 + .double 1.5482104872456205e+001 + .double 1.5482104778289795e+001 + .double 9.4166410649057593e-008 + .double 1.4062500000000000e+000 + .double 1.5739299082549591e+001 + .double 1.5739299058914185e+001 + .double 2.3635406174577014e-008 + .double 1.4140625000000000e+000 + .double 1.5995068386662572e+001 + .double 1.5995068311691284e+001 + .double 7.4971287357386678e-008 + .double 1.4218750000000000e+000 + .double 1.6249428486358280e+001 + .double 1.6249428272247314e+001 + .double 2.1411096611771602e-007 + .double 1.4296875000000000e+000 + .double 1.6502394825089358e+001 + .double 1.6502394676208496e+001 + .double 1.4888086255988012e-007 + .double 1.4375000000000000e+000 + .double 1.6753982593824411e+001 + .double 1.6753982543945313e+001 + .double 4.9879099413412742e-008 + .double 1.4453125000000000e+000 + .double 1.7004206736521986e+001 + .double 1.7004206657409668e+001 + .double 7.9112320182549957e-008 + .double 1.4531250000000000e+000 + .double 1.7253081955457006e+001 + .double 1.7253081798553467e+001 + .double 1.5690353769144059e-007 + .double 1.4609375000000000e+000 + .double 1.7500622716404372e+001 + .double 1.7500622749328613e+001 + .double -3.2924240018740268e-008 + .double 1.4687500000000000e+000 + .double 1.7746843253684396e+001 + .double 1.7746843338012695e+001 + .double -8.4328299403034051e-008 + .double 1.4765625000000000e+000 + .double 1.7991757575074324e+001 + .double 1.7991757392883301e+001 + .double 1.8219102407645195e-007 + .double 1.4843750000000000e+000 + .double 1.8235379466590331e+001 + .double 1.8235379695892334e+001 + .double -2.2930200305113216e-007 + .double 1.4921875000000000e+000 + .double 1.8477722497143958e+001 + .double 1.8477722644805908e+001 + .double -1.4766195014684411e-007 + .double 1.5000000000000000e+000 + .double 1.8718800023076998e+001 + .double 1.8718800067901611e+001 + .double -4.4824613521605356e-008 + .double 1.5078125000000000e+000 + .double 1.8958625192578573e+001 + .double 1.8958625316619873e+001 + .double -1.2404129961848124e-007 + .double 1.5156250000000000e+000 + .double 1.9197210949988087e+001 + .double 1.9197210788726807e+001 + .double 1.6126127911381400e-007 + .double 1.5234375000000000e+000 + .double 1.9434570039987541e+001 + .double 1.9434569835662842e+001 + .double 2.0432470027419287e-007 + .double 1.5312500000000000e+000 + .double 1.9670715011686664e+001 + .double 1.9670714855194092e+001 + .double 1.5649257107941104e-007 + .double 1.5390625000000000e+000 + .double 1.9905658222604039e+001 + .double 1.9905658245086670e+001 + .double -2.2482629384793354e-008 + .double 1.5468750000000000e+000 + .double 2.0139411842547506e+001 + .double 2.0139411926269531e+001 + .double -8.3722023438581091e-008 + .double 1.5546875000000000e+000 + .double 2.0371987857396764e+001 + .double 2.0371987819671631e+001 + .double 3.7725131974471861e-008 + .double 1.5625000000000000e+000 + .double 2.0603398072791190e+001 + .double 2.0603397846221924e+001 + .double 2.2656926643557545e-007 + .double 1.5703125000000000e+000 + .double 2.0833654117725715e+001 + .double 2.0833653926849365e+001 + .double 1.9087635042588152e-007 + .double 1.5781250000000000e+000 + .double 2.1062767448057432e+001 + .double 2.1062767505645752e+001 + .double -5.7588320363631908e-008 + .double 1.5859375000000000e+000 + .double 2.1290749349925640e+001 + .double 2.1290749549865723e+001 + .double -1.9994008335507424e-007 + .double 1.5937500000000000e+000 + .double 2.1517610943087860e+001 + .double 2.1517611026763916e+001 + .double -8.3676057144975244e-008 + .double 1.6015625000000000e+000 + .double 2.1743363184174274e+001 + .double 2.1743363380432129e+001 + .double -1.9625785559308870e-007 + .double 1.6093750000000000e+000 + .double 2.1968016869862989e+001 + .double 2.1968017101287842e+001 + .double -2.3142485360292707e-007 + .double 1.6171875000000000e+000 + .double 2.2191582639978407e+001 + .double 2.2191582679748535e+001 + .double -3.9770127629797963e-008 + .double 1.6250000000000000e+000 + .double 2.2414070980514950e+001 + .double 2.2414071083068848e+001 + .double -1.0255389852355200e-007 + .double 1.6328125000000000e+000 + .double 2.2635492226588248e+001 + .double 2.2635492324829102e+001 + .double -9.8240853562727763e-008 + .double 1.6406250000000000e+000 + .double 2.2855856565315925e+001 + .double 2.2855856418609619e+001 + .double 1.4670630523588789e-007 + .double 1.6484375000000000e+000 + .double 2.3075174038629925e+001 + .double 2.3075173854827881e+001 + .double 1.8380204343371019e-007 + .double 1.6562500000000000e+000 + .double 2.3293454546022375e+001 + .double 2.3293454647064209e+001 + .double -1.0104183524739010e-007 + .double 1.6640625000000000e+000 + .double 2.3510707847226822e+001 + .double 2.3510707855224609e+001 + .double -7.9977856034644568e-009 + .double 1.6718750000000000e+000 + .double 2.3726943564836702e+001 + .double 2.3726943492889404e+001 + .double 7.1947298034792141e-008 + .double 1.6796875000000000e+000 + .double 2.3942171186862730e+001 + .double 2.3942171096801758e+001 + .double 9.0060971363642748e-008 + .double 1.6875000000000000e+000 + .double 2.4156400069230994e+001 + .double 2.4156400203704834e+001 + .double -1.3447384056481607e-007 + .double 1.6953125000000000e+000 + .double 2.4369639438223338e+001 + .double 2.4369639396667480e+001 + .double 4.1555857651188973e-008 + .double 1.7031250000000000e+000 + .double 2.4581898392861643e+001 + .double 2.4581898212432861e+001 + .double 1.8042878214319617e-007 + .double 1.7109375000000000e+000 + .double 2.4793185907237550e+001 + .double 2.4793185710906982e+001 + .double 1.9633056674029319e-007 + .double 1.7187500000000000e+000 + .double 2.5003510832789107e+001 + .double 2.5003510951995850e+001 + .double -1.1920674227914515e-007 + .double 1.7265625000000000e+000 + .double 2.5212881900525812e+001 + .double 2.5212882041931152e+001 + .double -1.4140534214692187e-007 + .double 1.7343750000000000e+000 + .double 2.5421307723203391e+001 + .double 2.5421307563781738e+001 + .double 1.5942165254471939e-007 + .double 1.7421875000000000e+000 + .double 2.5628796797449752e+001 + .double 2.5628796577453613e+001 + .double 2.1999613817580130e-007 + .double 1.7500000000000000e+000 + .double 2.5835357505843330e+001 + .double 2.5835357666015625e+001 + .double -1.6017229356185698e-007 + .double 1.7578125000000000e+000 + .double 2.6040998118945186e+001 + .double 2.6040997982025146e+001 + .double 1.3692003939236474e-007 + .double 1.7656250000000000e+000 + .double 2.6245726797286007e+001 + .double 2.6245726585388184e+001 + .double 2.1189782307244345e-007 + .double 1.7734375000000000e+000 + .double 2.6449551593309280e+001 + .double 2.6449551582336426e+001 + .double 1.0972853699776823e-008 + .double 1.7812500000000000e+000 + .double 2.6652480453271732e+001 + .double 2.6652480602264404e+001 + .double -1.4899267068896274e-007 + .double 1.7890625000000000e+000 + .double 2.6854521219102207e+001 + .double 2.6854521274566650e+001 + .double -5.5464444352106986e-008 + .double 1.7968750000000000e+000 + .double 2.7055681630220008e+001 + .double 2.7055681705474854e+001 + .double -7.5254846470362033e-008 + .double 1.8046875000000000e+000 + .double 2.7255969325313842e+001 + .double 2.7255969524383545e+001 + .double -1.9906970347883273e-007 + .double 1.8125000000000000e+000 + .double 2.7455391844082307e+001 + .double 2.7455391883850098e+001 + .double -3.9767789793217256e-008 + .double 1.8203125000000000e+000 + .double 2.7653956628936967e+001 + .double 2.7653956413269043e+001 + .double 2.1566792461584058e-007 + .double 1.8281250000000000e+000 + .double 2.7851671026668946e+001 + .double 2.7851671218872070e+001 + .double -1.9220312556676271e-007 + .double 1.8359375000000000e+000 + .double 2.8048542290079990e+001 + .double 2.8048542499542236e+001 + .double -2.0946224528680884e-007 + .double 1.8437500000000000e+000 + .double 2.8244577579578920e+001 + .double 2.8244577407836914e+001 + .double 1.7174200621933701e-007 + .double 1.8515625000000000e+000 + .double 2.8439783964744290e+001 + .double 2.8439784049987793e+001 + .double -8.5243501580557185e-008 + .double 1.8593750000000000e+000 + .double 2.8634168425854192e+001 + .double 2.8634168624877930e+001 + .double -1.9902373718522686e-007 + .double 1.8671875000000000e+000 + .double 2.8827737855383955e+001 + .double 2.8827737808227539e+001 + .double 4.7156414804454224e-008 + .double 1.8750000000000000e+000 + .double 2.9020499059472591e+001 + .double 2.9020499229431152e+001 + .double -1.6995855940538013e-007 + .double 1.8828125000000000e+000 + .double 2.9212458759358771e+001 + .double 2.9212458610534668e+001 + .double 1.4882410383872944e-007 + .double 1.8906250000000000e+000 + .double 2.9403623592787024e+001 + .double 2.9403623580932617e+001 + .double 1.1854407209259235e-008 + .double 1.8984375000000000e+000 + .double 2.9594000115384990e+001 + .double 2.9594000339508057e+001 + .double -2.2412306760802678e-007 + .double 1.9062500000000000e+000 + .double 2.9783594802012360e+001 + .double 2.9783594608306885e+001 + .double 1.9370547608148548e-007 + .double 1.9140625000000000e+000 + .double 2.9972414048082257e+001 + .double 2.9972414016723633e+001 + .double 3.1358625195636261e-008 + .double 1.9218750000000000e+000 + .double 3.0160464170855676e+001 + .double 3.0160464286804199e+001 + .double -1.1594852323091928e-007 + .double 1.9296875000000000e+000 + .double 3.0347751410709684e+001 + .double 3.0347751617431641e+001 + .double -2.0672195569090938e-007 + .double 1.9375000000000000e+000 + .double 3.0534281932380008e+001 + .double 3.0534281730651855e+001 + .double 2.0172815121304595e-007 + .double 1.9453125000000000e+000 + .double 3.0720061826178590e+001 + .double 3.0720061779022217e+001 + .double 4.7156373213152730e-008 + .double 1.9531250000000000e+000 + .double 3.0905097109186784e+001 + .double 3.0905097007751465e+001 + .double 1.0143532055180066e-007 + .double 1.9609375000000000e+000 + .double 3.1089393726424703e+001 + .double 3.1089393615722656e+001 + .double 1.1070204746126331e-007 + .double 1.9687500000000000e+000 + .double 3.1272957551997326e+001 + .double 3.1272957324981689e+001 + .double 2.2701563759805731e-007 + .double 1.9765625000000000e+000 + .double 3.1455794390217925e+001 + .double 3.1455794334411621e+001 + .double 5.5806303018042362e-008 + .double 1.9843750000000000e+000 + .double 3.1637909976709306e+001 + .double 3.1637909889221191e+001 + .double 8.7488115898710126e-008 + .double 1.9921875000000000e+000 + .double 3.1819309979483453e+001 + .double 3.1819310188293457e+001 + .double -2.0881000302875003e-007 + .double 2.0000000000000000e+000 + .double 3.2000000000000000e+001 + .double 3.2000000000000000e+001 + .double 0.0000000000000000e+000 + .double 1.0000000000000000e+000 + .double 0.0000000000000000e+000 + .double 1.0218971486541166e+000 + .double 5.1092250289734439e-017 + .double 1.0442737824274138e+000 + .double 8.5518897055379649e-017 + .double 1.0671404006768237e+000 + .double -7.8998539668415821e-017 + .double 1.0905077326652577e+000 + .double -3.0467820798124711e-017 + .double 1.1143867425958924e+000 + .double 1.0410278456845571e-016 + .double 1.1387886347566916e+000 + .double 8.9128126760254078e-017 + .double 1.1637248587775775e+000 + .double 3.8292048369240935e-017 + .double 1.1892071150027210e+000 + .double 3.9820152314656461e-017 + .double 1.2152473599804690e+000 + .double -7.7126306926814881e-017 + .double 1.2418578120734840e+000 + .double 4.6580275918369368e-017 + .double 1.2690509571917332e+000 + .double 2.6679321313421861e-018 + .double 1.2968395546510096e+000 + .double 2.5382502794888315e-017 + .double 1.3252366431597413e+000 + .double -2.8587312100388614e-017 + .double 1.3542555469368927e+000 + .double 7.7009483798029895e-017 + .double 1.3839098819638320e+000 + .double -6.7705116587947863e-017 + .double 1.4142135623730951e+000 + .double -9.6672933134529135e-017 + .double 1.4451808069770467e+000 + .double -3.0237581349939873e-017 + .double 1.4768261459394993e+000 + .double -3.4839945568927958e-017 + .double 1.5091644275934228e+000 + .double -1.0164553277542950e-016 + .double 1.5422108254079407e+000 + .double 7.9498348096976209e-017 + .double 1.5759808451078865e+000 + .double -1.0136916471278304e-017 + .double 1.6104903319492543e+000 + .double 2.4707192569797888e-017 + .double 1.6457554781539649e+000 + .double -1.0125679913674773e-016 + .double 1.6817928305074290e+000 + .double 8.1990100205814965e-017 + .double 1.7186192981224779e+000 + .double -1.8513804182631110e-017 + .double 1.7562521603732995e+000 + .double 2.9601406954488733e-017 + .double 1.7947090750031072e+000 + .double 1.8227458427912087e-017 + .double 1.8340080864093424e+000 + .double 3.2831072242456272e-017 + .double 1.8741676341103000e+000 + .double -6.1227634130041426e-017 + .double 1.9152065613971474e+000 + .double -1.0619946056195963e-016 + .double 1.9571441241754002e+000 + .double 8.9607677910366678e-017 + .double 2.0000000000000000e+000 + +// +// End of table. +// diff --git a/private/fp32/tran/alpha/sins.s b/private/fp32/tran/alpha/sins.s new file mode 100644 index 000000000..c94cf1dd6 --- /dev/null +++ b/private/fp32/tran/alpha/sins.s @@ -0,0 +1,978 @@ +// TITLE("Alpha AXP Sine") +//++ +// +// Copyright (c) 1993, 1994 Digital Equipment Corporation +// +// Module Name: +// +// sin.s +// +// Abstract: +// +// This module implements a high-performance Alpha AXP specific routine +// for IEEE double format sine. +// +// Author: +// +// Bob Hanek 1-Oct-1991 +// +// Environment: +// +// User mode. +// +// Revision History: +// +// Thomas Van Baak (tvb) 12-Feb-1994 +// +// Adapted for NT. +// +//-- + +#include "ksalpha.h" + +// +// Define DPML exception record for NT. +// + + .struct 0 +ErErr: .space 4 // error code +ErCxt: .space 4 // context +ErPlat: .space 4 // platform +ErEnv: .space 4 // environment +ErRet: .space 4 // return value pointer +ErName: .space 4 // function name +ErType: .space 8 // flags and fill +ErVal: .space 8 // return value +ErArg0: .space 8 // arg 0 +ErArg1: .space 8 // arg 1 +ErArg2: .space 8 // arg 2 +ErArg3: .space 8 // arg 3 +DpmlExceptionLength: + +// +// Define stack frame. +// + + .struct 0 +SaveS0: .space 8 // +SaveS1: .space 8 // +SaveRa: .space 8 // +SaveF2: .space 8 // +SaveF3: .space 8 // +SaveF4: .space 8 // +SaveF5: .space 8 // +SaveF6: .space 8 // +SaveF7: .space 8 // +SaveF8: .space 8 // +SaveF9: .space 8 // +Temp0: .space 8 // +Temp1: .space 8 // +Temp2: .space 8 // +Temp3: .space 8 // +ExRec: .space DpmlExceptionLength // exception record + .space 0 // for 16-byte stack alignment +FrameLength: + +// +// Define lower and upper 32-bit parts of 64-bit double. +// + +#define LowPart 0x0 +#define HighPart 0x4 + +// +// Define argument ranges. +// + +#define BIG_X_HI 0x4169 // upper bound of medium argument range +#define BIG_X_LO 0x21fb +#define SMALL_X_HI 0x3e40 // lower bound of medium argument range +#define SMALL_X_LO 0x0000 +#define EXP_WORD_OF_TWO_PI_HI 0x4019 +#define EXP_WORD_OF_TWO_PI_LO 0x21fb + +// +// Define offsets into table. +// + +#define D_2_POW_K_OVER_PI_OVER_4 0x0 +#define PI_OVER_4_OVER_2_POW_K_0 0x08 +#define PI_OVER_4_OVER_2_POW_K_1 0x10 +#define PI_OVER_4_OVER_2_POW_K_2 0x18 +#define PI_OVER_4_OVER_2_POW_K_3 0x20 +#define PI_OVER_4_OVER_2_POW_K_4 0x28 + +#define PI_OVER_2_HI 0x30 +#define PI_OVER_2_LO 0x38 +#define PI_HI 0x40 +#define PI_LO 0x48 +#define THREE_PI_OVER_2_HI 0x50 +#define THREE_PI_OVER_2_LO 0x58 +#define TWO_PI_HI 0x60 +#define TWO_PI_LO 0x68 +#define TWO_POW_K_OVER_PI_OVER_4 0x70 + +#define C_POLY0 0xb8 +#define C_POLY1 C_POLY0 + 8 +#define C_POLY2 C_POLY1 + 8 +#define C_POLY3 C_POLY2 + 8 +#define C_POLY4 C_POLY3 + 8 + +#define P_POLY0 0x78 +#define P_POLY1 P_POLY0 + 8 + +#define Q_POLY0 0x88 +#define Q_POLY1 Q_POLY0 + 8 + +#define S_POLY0 0x98 +#define S_POLY1 S_POLY0 + 8 +#define S_POLY2 S_POLY1 + 8 +#define S_POLY3 S_POLY2 + 8 + +#define SINCOS 0xe0 +#define SIN_A 0xe8 +#define COS_A 0xf0 + + SBTTL("Sine") + +//++ +// +// double +// sin ( +// IN double x +// ) +// +// Routine Description: +// +// This function returns the sine of the given double argument. +// +// Arguments: +// +// x (f16) - Supplies the argument value. +// +// Return Value: +// +// The double sine result is returned as the function value in f0. +// +//-- + + NESTED_ENTRY(sin, FrameLength, ra) + + lda sp, -FrameLength(sp) // allocate stack frame + stq s0, SaveS0(sp) + stq s1, SaveS1(sp) + stq ra, SaveRa(sp) + stt f2, SaveF2(sp) + stt f3, SaveF3(sp) + stt f4, SaveF4(sp) + stt f5, SaveF5(sp) + stt f6, SaveF6(sp) + stt f7, SaveF7(sp) + stt f8, SaveF8(sp) + stt f9, SaveF9(sp) + + PROLOGUE_END + + ornot zero, zero, t0 + srl t0, 33, t0 + ldah t1, SMALL_X_HI(zero) + ldah t2, BIG_X_HI - SMALL_X_HI(zero) + stt f16, Temp2(sp) + cpys f31, f16, f2 + ldl v0, Temp2 + HighPart(sp) + lda t2, BIG_X_LO - SMALL_X_LO(t2) + cpys f16, f16, f3 + and v0, t0, v0 // the exponent field of the argument + subl v0, t1, t0 // if v0 - small <= big - small + cmpult t0, t2, t0 // we've got an abnormal (but not nessarily bad) argument + beq t0, abnormal_argument + + ldah t2, EXP_WORD_OF_TWO_PI_HI(zero) // if (j >= EXP_WORD_OF_TWO_PI) + lda t2, EXP_WORD_OF_TWO_PI_LO(t2) // we have a medium argument + cmplt v0, t2, t2 + beq t2, medium_argument + +// +// small argument reduction +// +// reduce the argument X to ( 8 * N + I ) * pi / 4 + y +// and let the reduced argument be y' where +// y' = X - floor( ( 8 * N + I + 1 ) / 2 ) * pi / 4 +// the low 3 bits of I are the octant +// + + lda t0, __trig_cons + ldt f0, TWO_POW_K_OVER_PI_OVER_4(t0) + mult f2, f0, f0 + cvttqc f0, f1 + stt f1, Temp2(sp) + ldl t2, Temp2(sp) + sra t2, 7, t4 + cmpule t4, 7, t12 + beq t12, small_cos // if octant > 7; shouldn't happen + +// dispatch on octant + + lda t12, Switch2 + s4addl t4, t12, t12 + ldl t12, 0(t12) + jmp zero, (t12) + +// +// 1st octant; compute sin(y') +// +Switch20: + and t2, 127, t5 + subl t5, 27, t5 + blt t5, small_sin + + s4subl t5, t5, t5 + s8addl t5, t0, t5 + ldt f0, SINCOS(t5) + ldt f5, SIN_A(t5) + ldt f6, COS_A(t5) + subt f2, f0, f4 + br zero, pos_tab_eval + +small_sin: + mult f2, f2, f1 + ldt f10, S_POLY3(t0) + ldt f0, S_POLY1(t0) + ldt f5, S_POLY2(t0) + ldt f11, S_POLY0(t0) + mult f10, f1, f10 + mult f1, f1, f6 + mult f0, f1, f0 + mult f2, f1, f1 + addt f10, f5, f5 + addt f0, f11, f0 + mult f5, f6, f5 + addt f5, f0, f0 + mult f1, f0, f0 + subt f2, f0, f7 + fbge f3, adjust_sign + br zero, change_sign + +// +// 2nd octant; compute cos(y') +// +Switch21: + ldt f10, PI_OVER_2_HI(t0) + ornot zero, t2, t4 + and t4, 127, t4 + ldt f11, PI_OVER_2_LO(t0) + subl t4, 27, t4 + subt f2, f10, f10 + blt t4, pos_cos_1 + + s4subl t4, t4, t4 + s8addl t4, t0, t4 + ldt f6, SINCOS(t4) + ldt f5, COS_A(t4) + addt f10, f6, f6 + subt f6, f11, f4 + ldt f6, SIN_A(t4) + br zero, pos_tab_eval + +// +// 3rd octant; compute cos(y') +// +Switch22: + ldt f1, PI_OVER_2_HI(t0) + and t2, 127, t3 + ldt f11, PI_OVER_2_LO(t0) + subl t3, 27, t3 + subt f2, f1, f10 + blt t3, pos_cos_1 + + s4subl t3, t3, t3 + s8addl t3, t0, t3 + ldt f0, SINCOS(t3) + ldt f6, SIN_A(t3) + ldt f5, COS_A(t3) + subt f10, f0, f0 + subt f11, f0, f4 + br zero, pos_tab_eval + +pos_cos_1: + subt f10, f11, f10 + ldt f1, C_POLY1(t0) + ldt f2, C_POLY4(t0) + ldt f0, C_POLY2(t0) + ldt f4, C_POLY0(t0) + ldt f6, C_POLY3(t0) + ldt f11, One + mult f10, f10, f10 + mult f10, f10, f7 + mult f10, f1, f1 + mult f2, f10, f2 + mult f0, f7, f0 + addt f1, f4, f1 + mult f7, f10, f7 + addt f2, f6, f2 + addt f1, f0, f0 + mult f7, f2, f2 + addt f0, f2, f0 + mult f10, f0, f0 + subt f11, f0, f7 + fbge f3, adjust_sign + br zero, change_sign + +// +// 4th octant; compute -sin(y') +// +Switch23: + ldt f6, PI_HI(t0) + ornot zero, t2, t5 + and t5, 127, t5 + ldt f10, PI_LO(t0) + subl t5, 27, t5 + subt f2, f6, f1 + blt t5, neg_sin_1 + + s4subl t5, t5, t5 + s8addl t5, t0, t5 + ldt f0, SINCOS(t5) + ldt f5, SIN_A(t5) + ldt f6, COS_A(t5) + addt f1, f0, f0 + subt f10, f0, f4 + br zero, pos_tab_eval + +// +// 5th octant; compute -sin(y') +// +Switch24: + ldt f11, PI_HI(t0) + and t2, 127, t4 + ldt f10, PI_LO(t0) + subl t4, 27, t4 + subt f2, f11, f1 + blt t4, neg_sin_1 + + s4subl t4, t4, t4 + s8addl t4, t0, t4 + ldt f0, SINCOS(t4) + ldt f5, SIN_A(t4) + ldt f6, COS_A(t4) + subt f1, f0, f0 + subt f0, f10, f4 + br zero, neg_tab_eval + +neg_sin_1: + subt f1, f10, f2 + ldt f7, S_POLY3(t0) + ldt f0, S_POLY1(t0) + ldt f4, S_POLY2(t0) + ldt f12, S_POLY0(t0) + mult f2, f2, f11 + mult f7, f11, f7 + mult f11, f11, f6 + mult f0, f11, f0 + mult f2, f11, f2 + addt f7, f4, f4 + addt f0, f12, f0 + mult f4, f6, f4 + addt f4, f0, f0 + mult f2, f0, f0 + addt f10, f0, f0 + subt f0, f1, f7 + fbge f3, adjust_sign + br zero, change_sign + +// +// 6th octant; compute -cos(y') +// +Switch25: + ldt f12, THREE_PI_OVER_2_HI(t0) + ornot zero, t2, t3 + and t3, 127, t3 + ldt f11, THREE_PI_OVER_2_LO(t0) + subl t3, 27, t3 + subt f2, f12, f12 + blt t3, neg_cos_1 + + s4subl t3, t3, t3 + s8addl t3, t0, t3 + ldt f6, SINCOS(t3) + ldt f5, COS_A(t3) + addt f12, f6, f6 + subt f6, f11, f4 + ldt f6, SIN_A(t3) + br zero, neg_tab_eval + +// +// 7th octant; compute -cos(y') +// +Switch26: + ldt f10, THREE_PI_OVER_2_HI(t0) + and t2, 127, t5 + ldt f11, THREE_PI_OVER_2_LO(t0) + subl t5, 27, t5 + subt f2, f10, f12 + blt t5, neg_cos_1 + + s4subl t5, t5, t5 + s8addl t5, t0, t5 + ldt f0, SINCOS(t5) + ldt f6, SIN_A(t5) + ldt f5, COS_A(t5) + subt f12, f0, f0 + subt f11, f0, f4 + br zero, neg_tab_eval + +neg_cos_1: + subt f12, f11, f11 + ldt f1, C_POLY1(t0) + ldt f2, C_POLY4(t0) + ldt f7, C_POLY2(t0) + ldt f0, C_POLY0(t0) + ldt f4, C_POLY3(t0) + ldt f6, One + mult f11, f11, f11 + mult f11, f11, f10 + mult f11, f1, f1 + mult f2, f11, f2 + mult f7, f10, f7 + addt f1, f0, f0 + mult f10, f11, f10 + addt f2, f4, f2 + addt f0, f7, f0 + mult f10, f2, f2 + addt f0, f2, f0 + mult f11, f0, f0 + subt f0, f6, f7 + fbge f3, adjust_sign + br zero, change_sign + +// +// 8th octant; compute sin(y') +// +Switch27: + ldt f12, TWO_PI_HI(t0) + ornot zero, t2, t2 + and t2, 127, t2 + ldt f1, TWO_PI_LO(t0) + subl t2, 27, t2 + subt f2, f12, f2 + blt t2, pos_sin_1 + + s4subl t2, t2, t2 + s8addl t2, t0, t2 + ldt f10, SINCOS(t2) + ldt f5, SIN_A(t2) + ldt f6, COS_A(t2) + addt f2, f10, f10 + subt f1, f10, f4 + br zero, neg_tab_eval + +pos_sin_1: + subt f2, f1, f11 + ldt f12, S_POLY3(t0) + ldt f7, S_POLY1(t0) + ldt f10, S_POLY2(t0) + ldt f6, S_POLY0(t0) + mult f11, f11, f0 + mult f12, f0, f12 + mult f0, f0, f4 + mult f7, f0, f7 + mult f11, f0, f0 + addt f12, f10, f10 + addt f7, f6, f6 + mult f10, f4, f4 + addt f4, f6, f4 + mult f0, f4, f0 + addt f1, f0, f0 + subt f2, f0, f7 + fbge f3, adjust_sign + br zero, change_sign + +small_cos: + mult f2, f2, f12 + ldt f10, C_POLY1(t0) + ldt f6, C_POLY4(t0) + ldt f1, C_POLY2(t0) + ldt f0, C_POLY0(t0) + ldt f5, C_POLY3(t0) + mult f12, f12, f11 + mult f12, f10, f10 + mult f6, f12, f6 + mult f1, f11, f1 + addt f10, f0, f0 + mult f11, f12, f11 + ldt f10, One + addt f6, f5, f5 + addt f0, f1, f0 + mult f11, f5, f5 + addt f0, f5, f0 + mult f12, f0, f0 + subt f10, f0, f7 + fbge f3, adjust_sign + br zero, change_sign + +// +// a medium argument +// +medium_argument: + lda t5, __trig_cons // reduce the argument with extra precision + ldt f6, D_2_POW_K_OVER_PI_OVER_4(t5) + mult f2, f6, f6 + cvttqc f6, f1 + stt f1, Temp2(sp) + ldl s0, Temp2(sp) + addl s0, 0x80, t2 + bic t2, 0xff, t2 + stq t2, Temp3(sp) + ldt f5, Temp3(sp) + ldt f0, PI_OVER_4_OVER_2_POW_K_0(t5) + ldt f10, PI_OVER_4_OVER_2_POW_K_1(t5) + cvtqt f5, f5 + ldt f6, PI_OVER_4_OVER_2_POW_K_2(t5) + mult f5, f0, f0 + mult f5, f10, f10 + mult f5, f6, f6 + subt f2, f0, f0 + subt f0, f10, f1 + subt f1, f0, f0 + addt f10, f0, f0 + addt f6, f0, f8 + subt f1, f8, f9 + cmpteq f9, f1, f11 + fbne f11, evaluate + + subt f9, f1, f1 + ldt f12, PI_OVER_4_OVER_2_POW_K_3(t5) + mult f5, f12, f12 + addt f8, f1, f1 + addt f12, f1, f8 + subt f9, f8, f10 + cmpteq f10, f9, f0 + fbne f0, evaluate + + subt f10, f9, f9 + ldt f6, PI_OVER_4_OVER_2_POW_K_4(t5) + mult f5, f6, f5 + addt f8, f9, f8 + addt f5, f8, f8 + subt f10, f8, f9 + cmpteq f9, f10, f11 + fbne f11, evaluate + + subt f9, f10, f10 + addt f8, f10, f8 + br zero, evaluate + +// +// process an abnormal argument +// it's either very small, very big, a NaN or an Inf +// +abnormal_argument: + cmple v0, t1, t1 + beq t1, big_NaN_or_Inf + + cpys f3, f2, f0 // very small argument; simply return it. + br zero, done + +// +// Process big arguments or NaNs or Infs +// +big_NaN_or_Inf: + ldah s1, 0x7ff0(zero) // mask is 0x7ff00000 + and v0, s1, v0 + xor v0, s1, v0 + beq v0, NaN_or_Inf // NaN or an infinity + + cpys f2, f2, f16 // reduce the very big argument + mov zero, a1 // very carefully + lda a2, Temp1(sp) + lda a3, Temp0(sp) + bsr ra, __trig_reduce + mov v0, s0 + ldt f8, Temp0(sp) + ldt f9, Temp1(sp) + +// +// evaluate the function +// +evaluate: + sra s0, 7, t2 + and t2, 7, t2 + cmpule t2, 7, t12 + beq t12, pos_sin_2 + + lda t12, Switch1 // dispatch on the octant + s4addl t2, t12, t12 + ldl t12, 0(t12) + jmp zero, (t12) + +// +// 1st octant; compute sin(y') +// +Switch10: + and s0, 127, t4 + subl t4, 27, t4 + blt t4, pos_sin_2 + + s4subl t4, t4, t4 + lda t5, __trig_cons + s8addl t4, t5, t4 + ldt f6, SINCOS(t4) + ldt f5, SIN_A(t4) + subt f9, f6, f6 + subt f6, f8, f4 + ldt f6, COS_A(t4) + br zero, pos_tab_eval + +// +// 2nd octant; compute cos(y') +// +Switch11: + ornot zero, s0, t7 + and t7, 127, t7 + subl t7, 27, t7 + blt t7, pos_cos_2 + + s4subl t7, t7, t7 + lda a0, __trig_cons + s8addl t7, a0, t7 + ldt f0, SINCOS(t7) + ldt f6, SIN_A(t7) + ldt f5, COS_A(t7) + addt f9, f0, f0 + subt f0, f8, f4 + br zero, pos_tab_eval + +// +// 3rd octant; compute cos(y') +// +Switch12: + and s0, 127, a2 + subl a2, 27, a2 + blt a2, pos_cos_2 + + s4subl a2, a2, a2 + lda a3, __trig_cons + s8addl a2, a3, a2 + ldt f1, SINCOS(a2) + ldt f6, SIN_A(a2) + ldt f5, COS_A(a2) + subt f9, f1, f1 + subt f8, f1, f4 + br zero, pos_tab_eval + +pos_cos_2: + mult f9, f9, f10 + lda a5, __trig_cons + ldt f11, C_POLY1(a5) + ldt f12, C_POLY4(a5) + ldt f14, C_POLY2(a5) + mult f10, f10, f13 + mult f10, f11, f11 + ldt f15, C_POLY0(a5) + mult f12, f10, f12 + ldt f16, C_POLY3(a5) + ldt f17, One + mult f14, f13, f14 + addt f11, f15, f11 + mult f13, f10, f13 + addt f12, f16, f12 + addt f11, f14, f11 + mult f13, f12, f12 + addt f11, f12, f11 + mult f10, f11, f10 + subt f17, f10, f7 + fbge f3, adjust_sign + br zero, change_sign + +// +// 4th octant; compute -sin(y') +// +Switch13: + ornot zero, s0, t8 + and t8, 127, t8 + subl t8, 27, t8 + blt t8, neg_sin_2 + + s4subl t8, t8, t8 + lda t9, __trig_cons + s8addl t8, t9, t8 + ldt f18, SINCOS(t8) + ldt f5, SIN_A(t8) + ldt f6, COS_A(t8) + addt f9, f18, f18 + subt f8, f18, f4 + br zero, pos_tab_eval + +// +// 5th octant; compute -sin(y') +// +Switch14: + and s0, 127, t11 + subl t11, 27, t11 + blt t11, neg_sin_2 + + s4subl t11, t11, t11 + lda ra, __trig_cons + s8addl t11, ra, t11 + ldt f19, SINCOS(t11) + ldt f5, SIN_A(t11) + ldt f6, COS_A(t11) + subt f9, f19, f19 + subt f19, f8, f4 + br zero, neg_tab_eval + +neg_sin_2: + mult f9, f9, f20 + lda v0, __trig_cons + ldt f21, S_POLY3(v0) + ldt f22, S_POLY1(v0) + ldt f23, S_POLY2(v0) + mult f21, f20, f21 + ldt f25, S_POLY0(v0) + mult f20, f20, f24 + mult f22, f20, f22 + mult f9, f20, f20 + addt f21, f23, f21 + addt f22, f25, f22 + mult f21, f24, f21 + addt f21, f22, f21 + mult f20, f21, f20 + addt f8, f20, f8 + subt f8, f9, f7 + fbge f3, adjust_sign + br zero, change_sign + +// +// 6th octant; compute -cos(y') +// +Switch15: + ornot zero, s0, t0 + and t0, 127, t0 + subl t0, 27, t0 + blt t0, neg_cos_2 + + s4subl t0, t0, t0 + lda t1, __trig_cons + s8addl t0, t1, t0 + ldt f26, SINCOS(t0) + ldt f6, SIN_A(t0) + ldt f5, COS_A(t0) + addt f9, f26, f26 + subt f26, f8, f4 + br zero, neg_tab_eval + +// +// 7th octant; compute -cos(y') +// +Switch16: + and s0, 127, t3 + subl t3, 27, t3 + blt t3, neg_cos_2 + + s4subl t3, t3, t3 + lda t5, __trig_cons + s8addl t3, t5, t3 + ldt f27, SINCOS(t3) + ldt f6, SIN_A(t3) + ldt f5, COS_A(t3) + subt f9, f27, f27 + subt f8, f27, f4 + br zero, neg_tab_eval + +neg_cos_2: + mult f9, f9, f28 + lda t6, __trig_cons + ldt f29, C_POLY1(t6) + ldt f30, C_POLY4(t6) + ldt f1, C_POLY2(t6) + mult f28, f28, f0 + mult f28, f29, f29 + ldt f15, C_POLY0(t6) + mult f30, f28, f30 + ldt f16, C_POLY3(t6) + ldt f14, One + mult f1, f0, f1 + addt f29, f15, f15 + mult f0, f28, f0 + addt f30, f16, f16 + addt f15, f1, f1 + mult f0, f16, f0 + addt f1, f0, f0 + mult f28, f0, f0 + subt f0, f14, f7 + fbge f3, adjust_sign + br zero, change_sign + +// +// 8th octant; compute sin(y') +// +Switch17: + ornot zero, s0, s0 + and s0, 127, s0 + subl s0, 27, s0 + blt s0, pos_sin_2 + + s4subl s0, s0, s0 + lda a0, __trig_cons + s8addl s0, a0, s0 + ldt f13, SINCOS(s0) + ldt f5, SIN_A(s0) + ldt f6, COS_A(s0) + addt f9, f13, f13 + subt f8, f13, f4 + +neg_tab_eval: + cpysn f3, f3, f3 + +pos_tab_eval: + mult f4, f4, f12 // evalutate p_poly & q_poly + lda a1, __trig_cons + ldt f11, P_POLY1(a1) + ldt f17, P_POLY0(a1) + ldt f10, Q_POLY1(a1) + mult f11, f12, f11 + ldt f19, Q_POLY0(a1) + mult f4, f12, f18 + mult f10, f12, f10 + addt f11, f17, f11 + addt f10, f19, f10 + mult f18, f11, f11 + mult f12, f10, f10 + subt f4, f11, f4 + mult f5, f10, f10 + mult f6, f4, f4 + subt f10, f4, f4 + subt f5, f4, f7 + fbge f3, adjust_sign + br zero, change_sign + +pos_sin_2: + mult f9, f9, f23 + lda a3, __trig_cons + ldt f25, S_POLY3(a3) + ldt f24, S_POLY1(a3) + ldt f22, S_POLY2(a3) + mult f25, f23, f25 + ldt f20, S_POLY0(a3) + mult f23, f23, f21 + mult f24, f23, f24 + mult f9, f23, f23 + addt f25, f22, f22 + addt f24, f20, f20 + mult f22, f21, f21 + addt f21, f20, f20 + mult f23, f20, f20 + addt f8, f20, f8 + subt f9, f8, f7 + fbge f3, adjust_sign + + br zero, change_sign + +// +// Determine if we have a NaN or an Inf +// +NaN_or_Inf: + stt f2, Temp2(sp) + ldl a4, Temp2 + HighPart(sp) + and a4, s1, a5 + cmpeq a5, s1, s1 + beq s1, NaN_or_Inf1 + + ldl t9, Temp2(sp) + ldah t8, 0x10(zero) // mask = 0x000fffff + lda t8, -1(t8) + and a4, t8, a4 + bis a4, t9, a4 + cmpult zero, a4, a4 + and s1, a4, s1 + bne s1, NaN_or_Inf2 + +// +// report an exception +// +NaN_or_Inf1: + lda t10, sinName + stl t10, ExRec + ErName(sp) + ldah t12, 0x800(zero) + stt f2, ExRec + ErArg0(sp) + lda t12, 0x55(t12) + stl t12, ExRec + ErErr(sp) + lda v0, ExRec(sp) + bsr ra, __dpml_exception + ldt f2, 0(v0) + +// +// return the argument +// +NaN_or_Inf2: + cpys f2, f2, f0 + br zero, done + +change_sign: + cpysn f7, f7, f7 +adjust_sign: + cpys f7, f7, f0 + +// +// Restore registers and return with result in f0. +// + +done: ldq s0, SaveS0(sp) + ldq s1, SaveS1(sp) + ldq ra, SaveRa(sp) + ldt f2, SaveF2(sp) + ldt f3, SaveF3(sp) + ldt f4, SaveF4(sp) + ldt f5, SaveF5(sp) + ldt f6, SaveF6(sp) + ldt f7, SaveF7(sp) + ldt f8, SaveF8(sp) + ldt f9, SaveF9(sp) + lda sp, FrameLength(sp) // deallocate stack frame + ret zero, (ra) // return + + .end sin + + .rdata + .align 3 + +// +// Define floating point constants. +// + +One: .double 1.0 + +// +// dispatch on octant +// + +Switch1: + .long Switch10 + .long Switch11 + .long Switch12 + .long Switch13 + .long Switch14 + .long Switch15 + .long Switch16 + .long Switch17 + +// +// dispatch on octant +// + +Switch2: + .long Switch20 + .long Switch21 + .long Switch22 + .long Switch23 + .long Switch24 + .long Switch25 + .long Switch26 + .long Switch27 + +// +// Function name for dpml_exception. +// + +sinName: + .ascii "sin\0" diff --git a/private/fp32/tran/alpha/sqrt.c b/private/fp32/tran/alpha/sqrt.c new file mode 100644 index 000000000..8cff2f7bf --- /dev/null +++ b/private/fp32/tran/alpha/sqrt.c @@ -0,0 +1,192 @@ +/*** +*sqrt.c - square root +* +* Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* +*Revision History: +* 8-15-91 GDP written +* 1-29-91 GDP Kahan's algorithm for final rounding +* 3-11-92 GDP new interval and initial approximation +* 2-22-93 TVB Adapted for Alpha AXP +* +*******************************************************************************/ +#ifdef _ALPHA_ + +// +// This Alpha-specific version of sqrt is identical to portable version +// except for instances where chopped floating point operations are performed +// implicitly by setting the rounding mode to chopped. For Alpha, these +// instances are replaced with calls to assembler routines that explicitly +// perform the chopped operations. This is necessary because there is no +// compiler support yet for generating floating point instructions using the +// Alpha dynamic rounding mode. +// + +#include <math.h> +#include <trans.h> + +extern double _addtc(double x, double y); +extern double _divtc(double x, double y); +extern double _multc(double x, double y); +extern double _subtc(double x, double y); + +// +// Coefficients for initial approximation (Hart & al) +// + +static double p00 = .2592768763e+0; +static double p01 = .1052021187e+1; +static double p02 = -.3163221431e+0; + + +/*** +*double sqrt(double x) - square root +* +*Purpose: +* Compute the square root of a number. +* This function should be provided by the underlying +* hardware (IEEE spec). +*Entry: +* +*Exit: +* +*Exceptions: +* I P +*******************************************************************************/ +double sqrt(double x) +{ + unsigned int savedcw, sw; + double result,t; + unsigned int stat,rc; + + savedcw = _ctrlfp(ICW, IMCW); + + if (IS_D_SPECIAL(x)){ + switch (_sptype(x)) { + case T_PINF: + RETURN(savedcw, x); + case T_QNAN: + return _handle_qnan1(OP_SQRT, x, savedcw); + case T_SNAN: + return _except1(FP_I,OP_SQRT,x,QNAN_SQRT,savedcw); + } + /* -INF will be handled in the x<0 case */ + } + if (x < 0.0) { + return _except1(FP_I, OP_SQRT, x, QNAN_SQRT,savedcw); + } + + if (x == 0.0) { + RETURN (savedcw, x); + } + + + result = _fsqrt(x); + + + // + // Kahan's algorithm + // + + t = _divtc(x, result); + + // + // Multiply back to see if division was exact. + // Compare using subtraction to avoid invalid exceptions. + // + + if (_subtc(x, _multc(t, result)) == 0.0) { + // exact + if (t == result) { + RETURN(savedcw, result); + } + else { + // t = t-1 + if (*D_LO(t) == 0) { + (*D_HI(t)) --; + } + (*D_LO(t)) --; + } + + } + + rc = savedcw & IMCW_RC; + if (rc == IRC_UP || rc == IRC_NEAR) { + // t = t+1 + (*D_LO(t)) ++; + if (*D_LO(t) == 0) { + (*D_HI(t)) ++; + } + if (rc == IRC_UP) { + // y = y+1 + (*D_LO(t)) ++; + if (*D_LO(t) == 0) { + (*D_HI(t)) ++; + } + } + } + + result = _multc(0.5, _addtc(t, result)); + + _set_statfp(ISW_INEXACT); // update status word + RETURN_INEXACT1(OP_SQRT, x, result, savedcw); +} + + + +/*** +* _fsqrt - non IEEE conforming square root +* +*Purpose: +* compute a square root of a normal number without performing +* IEEE rounding. The argument is a finite number (no NaN or INF) +* +*Entry: +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +double _fsqrt(double x) +{ + double f,y,result; + int n; + + f = _decomp(x,&n); + + if (n & 0x1) { + // n is odd + n++; + f *= 0.5; + } + + // + // approximation for sqrt in the interval [.25, 1] + // (Computer Approximationsn, Hart & al.) + // gives more than 7 bits of accuracy + // + + y = p00 + f * (p01 + f * p02); + + y += f / y; + y *= 0.5; + + y += f / y; + y *= 0.5; + + y += f / y; + y *= 0.5; + + n >>= 1; + result = _add_exp(y,n); + + return result; +} + + + +#endif diff --git a/private/fp32/tran/alpha/sqrts.s b/private/fp32/tran/alpha/sqrts.s new file mode 100644 index 000000000..92f465664 --- /dev/null +++ b/private/fp32/tran/alpha/sqrts.s @@ -0,0 +1,1155 @@ +// TITLE("Alpha AXP Square Root") +//++ +// +// Copyright (c) 1993, 1994 Digital Equipment Corporation +// +// Module Name: +// +// sqrt.s +// +// Abstract: +// +// This module implements a high-performance Alpha AXP specific routine +// for IEEE double format square root. +// +// Author: +// +// Bill Gray (rtl::gray) 30-Jun-1993 +// +// Environment: +// +// User mode. +// +// Revision History: +// +// Thomas Van Baak (tvb) 4-Feb-1994 +// +// Adapted for NT. +// +//-- + +#include "ksalpha.h" + + SBTTL("Square Root") + +// +// Define DPML exception record for NT. +// + + .struct 0 +ErErr: .space 4 // error code +ErCxt: .space 4 // context +ErPlat: .space 4 // platform +ErEnv: .space 4 // environment +ErRet: .space 4 // return value pointer +ErName: .space 4 // function name +ErType: .space 8 // flags and fill +ErVal: .space 8 // return value +ErArg0: .space 8 // arg 0 +ErArg1: .space 8 // arg 1 +ErArg2: .space 8 // arg 2 +ErArg3: .space 8 // arg 3 +DpmlExceptionLength: + +// +// Define stack frame. +// + + .struct 0 +Temp0: .space 8 // save argument +Temp1: .space 8 // +ExRec: .space DpmlExceptionLength // exception record + .space 8 // for 16-byte stack alignment +FrameLength: + +// +// Define lower and upper 32-bit parts of 64-bit double. +// + +#define LowPart 0x0 +#define HighPart 0x4 + +//++ +// +// double +// sqrt ( +// IN double x +// ) +// +// Routine Description: +// +// This function returns the square root of the given double argument. +// +// Arguments: +// +// x (f16) - Supplies the argument value. +// +// Return Value: +// +// The double square root result is returned in f0. +// +//-- + + NESTED_ENTRY(sqrt, FrameLength, ra) + + lda sp, -FrameLength(sp) // allocate stack frame + mov ra, a0 // save return address + + PROLOGUE_END + +// +// Get the high 32 bits of x in an integer register +// while isolating the fraction field, f. +// + stt f16, Temp0(sp) + ldl v0, Temp0 + HighPart(sp) + ldt f1, One + lda t1, __sqrt_t_table + sra v0, 13 t0 + cpyse f1, f16, f10 +// +// Isolate exponent bits and compute index into +// polynomial table +// + ldah t4, 0x3fe0(zero) + and t0, 0xff, t0 + addl t0, t0, t0 + s8addl t0, zero, t0 + addl t1, t0, t0 +// +// Evaluate a + b*f + c*(f*f). +// + lds f0, 4(t0) + lds f11, 0(t0) + ldt f12, 8(t0) + mult f0, f10, f0 + ldah t0, -0x7fe0(zero) + mult f10, f10, f10 + lda t0, -1(t0) + and v0, t0, t2 + xor v0, t2, t3 + addl t3, t4, t3 + zapnot t3, 0xf, t3 + addt f12, f0, f0 + bis t2, t4, t2 + mult f10, f11, f10 + sll t3, 31, t3 + stq t3, Temp1(sp) + ldah t3, 0x10(zero) + stl t2, Temp0 + HighPart(sp) +// +// Compute final scale factor and branch if original +// argument was abnormal +// + subl v0, t3, v0 + ldt f11, Temp0(sp) + ldah t2, 0x7fe0(zero) + ldt f12, Temp1(sp) + cmpult v0, t2, v0 + addt f10, f0, f0 + beq v0, 10f +// +// Incorporate scale factor and perform Newton's iteration +// + mult f0, f11, f11 + mult f11, f0, f0 + mult f11, f12, f11 + ldt f12, Lsb + subt f1, f0, f0 + addt f11, f11, f10 + mult f11, f0, f0 + mult f11, f12, f11 +// +// Do Tuckerman's rounding +// + addt f10, f0, f0 + subt f0, f11, f12 + addt f0, f11, f11 + multc f0, f12, f10 + multc f0, f11, f13 + cmptle f16, f10, f10 + cmptlt f13, f16, f13 + fcmoveq f10, f0, f12 + fcmoveq f13, f12, f11 + cpys f11, f11, f0 + br zero, done + +// +// The following code classifies the argument. I.e. t3 <-- F_CLASSIFY(x). +// Start with check on 0 or denormal +// + +10: stt f16, Temp1(sp) + ldl t3, Temp1 + HighPart(sp) + ldah v0, 0x7ff0(zero) + zapnot t3, 0xf, t2 + and t3, v0, t5 + srl t2, 31, t2 + cmpult t5, v0, v0 + and t2, 1, t2 + beq t5, 40f + +// +// Branch if NaN's or Infinity +// + + addl t2, 4, t6 + beq v0, 20f + + br zero, 50f + +// +// Distinguish between NaN's and Infinities +// + +20: ldah v0, 0x10(zero) + ldl t5, Temp1(sp) + lda v0, -1(v0) + and t3, v0, v0 + bis v0, t5, t5 + stl v0, Temp1 + HighPart(sp) + beq t5, 30f + + srl v0, 19 v0 + and v0, 1, t6 + br zero, 50f + +// +// Was Infinity +// + +30: addl t2, 2, t6 + br zero, 50f + + +// +// Distinguish between 0 and denorm +// + +40: ldl t7, Temp1(sp) + ldah t5, 0x10(zero) + lda t5, -1(t5) + and t3, t5, t3 + bis t3, t7, t7 + stl t3, Temp1 + HighPart(sp) + mov 6, v0 + cmoveq t7, 8, v0 + addl t2, v0, t6 + +// +// Switch on class +// + +50: cmpule t6, 9, t12 + beq t12, denorm + + lda t12, Switch + s4addl t6, t12, t12 + ldl t12, 0(t12) + jmp zero, (t12) + +// +// Nan, +Inf or zero. Just return argument +// + +retarg: + cpys f16, f16, f0 + br zero, done + + +// +// Argument was negative. Dispatch error +// + +error: + lda t3, FunctionName + ldah t2, 0x800(zero) + stl t3, ExRec + ErName(sp) + stt f16, ExRec + ErArg0(sp) + lda t2, 0x5e(t2) + stl t2, ExRec + ErErr(sp) + lda v0, ExRec(sp) + bsr ra, __dpml_exception + ldt f0, 0(v0) + br zero, done + +// +// Denormalized argument. Scale up and take sqrt +// + +denorm: + ldah t7, 0x6b0(zero) + stl t7, Temp1 + HighPart(sp) + stl zero, Temp1(sp) + ldt f10, Temp1(sp) + cpyse f10, f16, f11 + subt f11, f10, f10 + stt f10, Temp0(sp) + ldl ra, Temp0 + HighPart(sp) + cpyse f1, f10, f13 + sra ra, 13, v0 + and ra, t0, t0 + and v0, 0xff, v0 + sll v0, 4, v0 + mult f13, f13, f16 + bis t0, t4, t3 + addl t1, v0, v0 + stl t3, Temp0 + HighPart(sp) + xor ra, t0, t0 + lds f12, 4(v0) + lds f0, 0(v0) + addl t0, t4, t0 + ldt f11, 8(v0) + zapnot t0, 0xf, t0 + mult f12, f13, f12 + ldt f13, Temp0(sp) + mult f16, f0, f0 + sll t0, 31, t0 + stq t0, Temp0(sp) + ldah t3, 0x350(zero) + ldt f16, Temp0(sp) + addt f11, f12, f11 + addt f0, f11, f0 + ldt f11, Lsb + mult f0, f13, f13 + mult f13, f0, f0 + mult f13, f16, f13 + subt f1, f0, f0 + addt f13, f13, f12 + mult f13, f11, f11 + mult f13, f0, f0 + addt f12, f0, f0 + subt f0, f11, f16 + addt f0, f11, f11 + multc f0, f16, f1 + multc f0, f11, f13 + cmptle f10, f1, f1 + cmptlt f13, f10, f10 + fcmoveq f1, f0, f16 + fcmoveq f10, f16, f11 + stt f11, Temp0(sp) + ldl v0, Temp0 + HighPart(sp) + subl v0, t3, v0 + stl v0, Temp0 + HighPart(sp) + ldt f0, Temp0(sp) + +// +// Return with result in f0. +// + +done: + lda sp, FrameLength(sp) // deallocate stack frame + ret zero, (a0) // return through saved ra in a0 + + .end sqrt + + .rdata + .align 3 + +Lsb: .quad 0x3cb4000000000000 // lsb factor: 5*2^-54 + +One: .double 1.0 + +// +// Jump table indexed by F_CLASS(x) +// + +Switch: + .long retarg + .long retarg + .long retarg + .long error + .long denorm + .long error + .long denorm + .long error + .long retarg + .long retarg + +// +// Function name for __dpml_exception. +// + +FunctionName: + .ascii "sqrt\0" + +// +// 256 entry square root table. +// + + .align 3 + .globl __sqrt_t_table +__sqrt_t_table: + + .float 5.25192082e-001 + .float -1.75747597e+000 + .double 2.6464974462692763e+000 + .float 5.15111804e-001 + .float -1.73715818e+000 + .double 2.6362593054537453e+000 + .float 5.05300283e-001 + .float -1.71722889e+000 + .double 2.6261391341915705e+000 + .float 4.95748460e-001 + .float -1.69767773e+000 + .double 2.6161345458522471e+000 + .float 4.86447543e-001 + .float -1.67849493e+000 + .double 2.6062435780891402e+000 + .float 4.77389067e-001 + .float -1.65967047e+000 + .double 2.5964637691802634e+000 + .float 4.68565017e-001 + .float -1.64119542e+000 + .double 2.5867933778763708e+000 + .float 4.59967613e-001 + .float -1.62306046e+000 + .double 2.5772301172720726e+000 + .float 4.51589465e-001 + .float -1.60525715e+000 + .double 2.5677722316823841e+000 + .float 4.43423420e-001 + .float -1.58777702e+000 + .double 2.5584177809840178e+000 + .float 4.35462624e-001 + .float -1.57061172e+000 + .double 2.5491646878882950e+000 + .float 4.27700490e-001 + .float -1.55375361e+000 + .double 2.5400114102036593e+000 + .float 4.20130670e-001 + .float -1.53719485e+000 + .double 2.5309559425202091e+000 + .float 4.12747115e-001 + .float -1.52092814e+000 + .double 2.5219966432800311e+000 + .float 4.05543953e-001 + .float -1.50494635e+000 + .double 2.5131318561302742e+000 + .float 3.98515552e-001 + .float -1.48924243e+000 + .double 2.5043598307294976e+000 + .float 3.91656518e-001 + .float -1.47380984e+000 + .double 2.4956791166576457e+000 + .float 3.84961635e-001 + .float -1.45864189e+000 + .double 2.4870879666367363e+000 + .float 3.78425866e-001 + .float -1.44373238e+000 + .double 2.4785850040822726e+000 + .float 3.72044414e-001 + .float -1.42907512e+000 + .double 2.4701686028111789e+000 + .float 3.65812600e-001 + .float -1.41466427e+000 + .double 2.4618374464189174e+000 + .float 3.59725922e-001 + .float -1.40049386e+000 + .double 2.4535898984567197e+000 + .float 3.53780121e-001 + .float -1.38655853e+000 + .double 2.4454247451516906e+000 + .float 3.47970992e-001 + .float -1.37285280e+000 + .double 2.4373406056097546e+000 + .float 3.42294544e-001 + .float -1.35937130e+000 + .double 2.4293359988624310e+000 + .float 3.36746901e-001 + .float -1.34610915e+000 + .double 2.4214098911341502e+000 + .float 3.31324309e-001 + .float -1.33306122e+000 + .double 2.4135608443300902e+000 + .float 3.26023191e-001 + .float -1.32022274e+000 + .double 2.4057876358548973e+000 + .float 3.20840031e-001 + .float -1.30758882e+000 + .double 2.3980888548975567e+000 + .float 3.15771550e-001 + .float -1.29515541e+000 + .double 2.3904637987369988e+000 + .float 3.10814440e-001 + .float -1.28291762e+000 + .double 2.3829108308987554e+000 + .float 3.05965573e-001 + .float -1.27087140e+000 + .double 2.3754291107716869e+000 + .float 3.01221997e-001 + .float -1.25901258e+000 + .double 2.3680174165800736e+000 + .float 2.96580702e-001 + .float -1.24733686e+000 + .double 2.3606745061532881e+000 + .float 2.92038947e-001 + .float -1.23584068e+000 + .double 2.3533996524278131e+000 + .float 2.87593961e-001 + .float -1.22451997e+000 + .double 2.3461916115767520e+000 + .float 2.83243120e-001 + .float -1.21337104e+000 + .double 2.3390493874647480e+000 + .float 2.78983861e-001 + .float -1.20239019e+000 + .double 2.3319719207388188e+000 + .float 2.74813741e-001 + .float -1.19157410e+000 + .double 2.3249584500223111e+000 + .float 2.70730376e-001 + .float -1.18091917e+000 + .double 2.3180078386470857e+000 + .float 2.66731441e-001 + .float -1.17042208e+000 + .double 2.3111191896320209e+000 + .float 2.62814730e-001 + .float -1.16007960e+000 + .double 2.3042915890976596e+000 + .float 2.58978039e-001 + .float -1.14988852e+000 + .double 2.2975241538422528e+000 + .float 2.55219340e-001 + .float -1.13984573e+000 + .double 2.2908158724885475e+000 + .float 2.51536548e-001 + .float -1.12994838e+000 + .double 2.2841661874688031e+000 + .float 2.47927740e-001 + .float -1.12019336e+000 + .double 2.2775739558529393e+000 + .float 2.44390994e-001 + .float -1.11057794e+000 + .double 2.2710385421514721e+000 + .float 2.40924492e-001 + .float -1.10109925e+000 + .double 2.2645589845228762e+000 + .float 2.37526432e-001 + .float -1.09175467e+000 + .double 2.2581346441659949e+000 + .float 2.34195098e-001 + .float -1.08254158e+000 + .double 2.2517647411316082e+000 + .float 2.30928808e-001 + .float -1.07345724e+000 + .double 2.2454482962721607e+000 + .float 2.27725953e-001 + .float -1.06449938e+000 + .double 2.2391848731660877e+000 + .float 2.24584922e-001 + .float -1.05566525e+000 + .double 2.2329733937440555e+000 + .float 2.21504226e-001 + .float -1.04695272e+000 + .double 2.2268134030638063e+000 + .float 2.18482375e-001 + .float -1.03835940e+000 + .double 2.2207041336993361e+000 + .float 2.15517908e-001 + .float -1.02988291e+000 + .double 2.2146447786117487e+000 + .float 2.12609455e-001 + .float -1.02152121e+000 + .double 2.2086348810952106e+000 + .float 2.09755674e-001 + .float -1.01327205e+000 + .double 2.2026736016458566e+000 + .float 2.06955209e-001 + .float -1.00513327e+000 + .double 2.1967603227513171e+000 + .float 2.04206824e-001 + .float -9.97102916e-001 + .double 2.1908944562665140e+000 + .float 2.01509267e-001 + .float -9.89178896e-001 + .double 2.1850752901811767e+000 + .float 1.98861346e-001 + .float -9.81359303e-001 + .double 2.1793022649539600e+000 + .float 1.96261868e-001 + .float -9.73642170e-001 + .double 2.1735747521556466e+000 + .float 1.93709716e-001 + .float -9.66025651e-001 + .double 2.1678921542098140e+000 + .float 1.91203788e-001 + .float -9.58507895e-001 + .double 2.1622538571652758e+000 + .float 1.88743010e-001 + .float -9.51087177e-001 + .double 2.1566593763278674e+000 + .float 1.86326340e-001 + .float -9.43761706e-001 + .double 2.1511080868901873e+000 + .float 1.83952779e-001 + .float -9.36529815e-001 + .double 2.1455994575881343e+000 + .float 1.81621328e-001 + .float -9.29389775e-001 + .double 2.1401328820050880e+000 + .float 1.79331034e-001 + .float -9.22339976e-001 + .double 2.1347078468186740e+000 + .float 1.77080989e-001 + .float -9.15378988e-001 + .double 2.1293240245907636e+000 + .float 1.74870253e-001 + .float -9.08505023e-001 + .double 2.1239806212691255e+000 + .float 1.72697961e-001 + .float -9.01716650e-001 + .double 2.1186772313024411e+000 + .float 1.70563266e-001 + .float -8.95012379e-001 + .double 2.1134133344032175e+000 + .float 1.68465331e-001 + .float -8.88390839e-001 + .double 2.1081885752221190e+000 + .float 1.66403353e-001 + .float -8.81850541e-001 + .double 2.1030023503810171e+000 + .float 1.64376527e-001 + .float -8.75390053e-001 + .double 2.0978541627791989e+000 + .float 1.62384093e-001 + .float -8.69008124e-001 + .double 2.0927437009699346e+000 + .float 1.60425320e-001 + .float -8.62703323e-001 + .double 2.0876703032418864e+000 + .float 1.58499449e-001 + .float -8.56474400e-001 + .double 2.0826336840914288e+000 + .float 1.56605810e-001 + .float -8.50320101e-001 + .double 2.0776333382202288e+000 + .float 1.54743686e-001 + .float -8.44239116e-001 + .double 2.0726687916769500e+000 + .float 1.52912408e-001 + .float -8.38230312e-001 + .double 2.0677397562803219e+000 + .float 1.51111335e-001 + .float -8.32292378e-001 + .double 2.0628455813976707e+000 + .float 1.49339810e-001 + .float -8.26424241e-001 + .double 2.0579860597217148e+000 + .float 1.47597238e-001 + .float -8.20624828e-001 + .double 2.0531608342665746e+000 + .float 1.45882994e-001 + .float -8.14892828e-001 + .double 2.0483692428461486e+000 + .float 1.44196495e-001 + .float -8.09227288e-001 + .double 2.0436111056397239e+000 + .float 1.42537162e-001 + .float -8.03627074e-001 + .double 2.0388859545094555e+000 + .float 1.40904456e-001 + .float -7.98091173e-001 + .double 2.0341934039869969e+000 + .float 1.39297798e-001 + .float -7.92618573e-001 + .double 2.0295332076569954e+000 + .float 1.37716666e-001 + .float -7.87208140e-001 + .double 2.0249047535575562e+000 + .float 1.36160567e-001 + .float -7.81859100e-001 + .double 2.0203079620307052e+000 + .float 1.34628952e-001 + .float -7.76570261e-001 + .double 2.0157422231049780e+000 + .float 1.33121356e-001 + .float -7.71340787e-001 + .double 2.0112072854805856e+000 + .float 1.31637290e-001 + .float -7.66169786e-001 + .double 2.0067028938663114e+000 + .float 1.30176291e-001 + .float -7.61056304e-001 + .double 2.0022286078757636e+000 + .float 1.28737882e-001 + .float -7.55999446e-001 + .double 1.9977841449065679e+000 + .float 1.27321631e-001 + .float -7.50998318e-001 + .double 1.9933690929232475e+000 + .float 1.25927106e-001 + .float -7.46052146e-001 + .double 1.9889832578829993e+000 + .float 1.24553859e-001 + .float -7.41159976e-001 + .double 1.9846261843352397e+000 + .float 1.23201489e-001 + .float -7.36321032e-001 + .double 1.9802976005972370e+000 + .float 1.21869594e-001 + .float -7.31534541e-001 + .double 1.9759972429186805e+000 + .float 1.20557763e-001 + .float -7.26799726e-001 + .double 1.9717248802941552e+000 + .float 1.19265616e-001 + .float -7.22115695e-001 + .double 1.9674799765186872e+000 + .float 1.17992781e-001 + .float -7.17481792e-001 + .double 1.9632624085188497e+000 + .float 1.16738878e-001 + .float -7.12897241e-001 + .double 1.9590718698474670e+000 + .float 1.15503550e-001 + .float -7.08361268e-001 + .double 1.9549079859501843e+000 + .float 1.14286453e-001 + .float -7.03873277e-001 + .double 1.9507706671873914e+000 + .float 1.13087229e-001 + .float -6.99432433e-001 + .double 1.9466594424976678e+000 + .float 1.11905552e-001 + .float -6.95038080e-001 + .double 1.9425740748447038e+000 + .float 1.10741086e-001 + .float -6.90689504e-001 + .double 1.9385142485689399e+000 + .float 1.09593518e-001 + .float -6.86386168e-001 + .double 1.9344799112660707e+000 + .float 1.08462527e-001 + .float -6.82127297e-001 + .double 1.9304705967413658e+000 + .float 1.07347809e-001 + .float -6.77912295e-001 + .double 1.9264861273746881e+000 + .float 1.06249064e-001 + .float -6.73740506e-001 + .double 1.9225261926578792e+000 + .float 1.05166003e-001 + .float -6.69611335e-001 + .double 1.9185905738015345e+000 + .float 1.04098327e-001 + .float -6.65524185e-001 + .double 1.9146790857728240e+000 + .float 1.03045776e-001 + .float -6.61478460e-001 + .double 1.9107914129450609e+000 + .float 1.02008060e-001 + .float -6.57473564e-001 + .double 1.9069273545162257e+000 + .float 1.00984909e-001 + .float -6.53508842e-001 + .double 1.9030865170164049e+000 + .float 9.99760777e-002 + .float -6.49583876e-001 + .double 1.8992688898169741e+000 + .float 9.89812911e-002 + .float -6.45698011e-001 + .double 1.8954741178920014e+000 + .float 9.80003178e-002 + .float -6.41850770e-001 + .double 1.8917020294982954e+000 + .float 9.70328897e-002 + .float -6.38041556e-001 + .double 1.8879523676159233e+000 + .float 9.60787907e-002 + .float -6.34269893e-001 + .double 1.8842249127996440e+000 + .float 9.51377675e-002 + .float -6.30535185e-001 + .double 1.8805193594542446e+000 + .float 9.42095965e-002 + .float -6.26837015e-001 + .double 1.8768356449191570e+000 + .float 3.71366888e-001 + .float -1.24272311e+000 + .double 1.8713562142426390e+000 + .float 3.64239037e-001 + .float -1.22835636e+000 + .double 1.8641168780063997e+000 + .float 3.57301265e-001 + .float -1.21426415e+000 + .double 1.8569607428562085e+000 + .float 3.50547105e-001 + .float -1.20043945e+000 + .double 1.8498864878045040e+000 + .float 3.43970358e-001 + .float -1.18687510e+000 + .double 1.8428924612812110e+000 + .float 3.37565035e-001 + .float -1.17356420e+000 + .double 1.8359770996729186e+000 + .float 3.31325501e-001 + .float -1.16050041e+000 + .double 1.8291391376738104e+000 + .float 3.25246215e-001 + .float -1.14767706e+000 + .double 1.8223768997372263e+000 + .float 3.19321990e-001 + .float -1.13508832e+000 + .double 1.8156892511203222e+000 + .float 3.13547701e-001 + .float -1.12272787e+000 + .double 1.8090745419785592e+000 + .float 3.07918578e-001 + .float -1.11059022e+000 + .double 1.8025316593797485e+000 + .float 3.02429914e-001 + .float -1.09866965e+000 + .double 1.7960592199296197e+000 + .float 2.97077239e-001 + .float -1.08696091e+000 + .double 1.7896561302788023e+000 + .float 2.91856289e-001 + .float -1.07545865e+000 + .double 1.7833209679422681e+000 + .float 2.86762863e-001 + .float -1.06415772e+000 + .double 1.7770525463183728e+000 + .float 2.81793058e-001 + .float -1.05305350e+000 + .double 1.7708498911299231e+000 + .float 2.76942968e-001 + .float -1.04214084e+000 + .double 1.7647115407015652e+000 + .float 2.72208989e-001 + .float -1.03141558e+000 + .double 1.7586367726362873e+000 + .float 2.67587513e-001 + .float -1.02087295e+000 + .double 1.7526242355192094e+000 + .float 2.63075113e-001 + .float -1.01050866e+000 + .double 1.7466729284878335e+000 + .float 2.58668572e-001 + .float -1.00031865e+000 + .double 1.7407818909752450e+000 + .float 2.54364640e-001 + .float -9.90298688e-001 + .double 1.7349500347577642e+000 + .float 2.50160336e-001 + .float -9.80444968e-001 + .double 1.7291764320552221e+000 + .float 2.46052653e-001 + .float -9.70753491e-001 + .double 1.7234600227237515e+000 + .float 2.42038801e-001 + .float -9.61220741e-001 + .double 1.7178000348867495e+000 + .float 2.38116011e-001 + .float -9.51842904e-001 + .double 1.7121953568056012e+000 + .float 2.34281659e-001 + .float -9.42616582e-001 + .double 1.7066451948521644e+000 + .float 2.30533198e-001 + .float -9.33538377e-001 + .double 1.7011486818575288e+000 + .float 2.26868168e-001 + .float -9.24604952e-001 + .double 1.6957049235088713e+000 + .float 2.23284200e-001 + .float -9.15813148e-001 + .double 1.6903131416040469e+000 + .float 2.19778985e-001 + .float -9.07159746e-001 + .double 1.6849724255940581e+000 + .float 2.16350347e-001 + .float -8.98641765e-001 + .double 1.6796819827589482e+000 + .float 2.12996110e-001 + .float -8.90256286e-001 + .double 1.6744411236170984e+000 + .float 2.09714234e-001 + .float -8.82000387e-001 + .double 1.6692489779491364e+000 + .float 2.06502721e-001 + .float -8.73871326e-001 + .double 1.6641048550491992e+000 + .float 2.03359634e-001 + .float -8.65866363e-001 + .double 1.6590079957912536e+000 + .float 2.00283125e-001 + .float -8.57982874e-001 + .double 1.6539576728451819e+000 + .float 1.97271377e-001 + .float -8.50218296e-001 + .double 1.6489532110447931e+000 + .float 1.94322661e-001 + .float -8.42570126e-001 + .double 1.6439938878792240e+000 + .float 1.91435277e-001 + .float -8.35035920e-001 + .double 1.6390790307825804e+000 + .float 1.88607618e-001 + .float -8.27613413e-001 + .double 1.6342080719480374e+000 + .float 1.85838073e-001 + .float -8.20300102e-001 + .double 1.6293801522379776e+000 + .float 1.83125138e-001 + .float -8.13093960e-001 + .double 1.6245948813109343e+000 + .float 1.80467322e-001 + .float -8.05992663e-001 + .double 1.6198514694492983e+000 + .float 1.77863196e-001 + .float -7.98994124e-001 + .double 1.6151493577598059e+000 + .float 1.75311387e-001 + .float -7.92096317e-001 + .double 1.6104879804605579e+000 + .float 1.72810540e-001 + .float -7.85297215e-001 + .double 1.6058667642813294e+000 + .float 1.70359343e-001 + .float -7.78594792e-001 + .double 1.6012850723561343e+000 + .float 1.67956561e-001 + .float -7.71987200e-001 + .double 1.5967423908195668e+000 + .float 1.65600941e-001 + .float -7.65472472e-001 + .double 1.5922380878437010e+000 + .float 1.63291335e-001 + .float -7.59048939e-001 + .double 1.5877717619763705e+000 + .float 1.61026567e-001 + .float -7.52714694e-001 + .double 1.5833427552316039e+000 + .float 1.58805519e-001 + .float -7.46468067e-001 + .double 1.5789506429365341e+000 + .float 1.56627148e-001 + .float -7.40307391e-001 + .double 1.5745948697300647e+000 + .float 1.54490367e-001 + .float -7.34230995e-001 + .double 1.5702749848537503e+000 + .float 1.52394176e-001 + .float -7.28237212e-001 + .double 1.5659903726677535e+000 + .float 1.50337592e-001 + .float -7.22324610e-001 + .double 1.5617407422690024e+000 + .float 1.48319662e-001 + .float -7.16491580e-001 + .double 1.5575255002630859e+000 + .float 1.46339431e-001 + .float -7.10736573e-001 + .double 1.5533441521316012e+000 + .float 1.44396037e-001 + .float -7.05058217e-001 + .double 1.5491962883876773e+000 + .float 1.42488569e-001 + .float -6.99455082e-001 + .double 1.5450815220948069e+000 + .float 1.40616208e-001 + .float -6.93925798e-001 + .double 1.5409993751875097e+000 + .float 1.38778090e-001 + .float -6.88468933e-001 + .double 1.5369493910723628e+000 + .float 1.36973456e-001 + .float -6.83083296e-001 + .double 1.5329312507492618e+000 + .float 1.35201499e-001 + .float -6.77767456e-001 + .double 1.5289443907643872e+000 + .float 1.33461460e-001 + .float -6.72520161e-001 + .double 1.5249884264125595e+000 + .float 1.31752625e-001 + .float -6.67340279e-001 + .double 1.5210630637476021e+000 + .float 1.30074263e-001 + .float -6.62226558e-001 + .double 1.5171678746536008e+000 + .float 1.28425673e-001 + .float -6.57177806e-001 + .double 1.5133024639111228e+000 + .float 1.26806200e-001 + .float -6.52192891e-001 + .double 1.5094664332725669e+000 + .float 1.25215158e-001 + .float -6.47270620e-001 + .double 1.5056593734736323e+000 + .float 1.23651937e-001 + .float -6.42410040e-001 + .double 1.5018810773552325e+000 + .float 1.22115903e-001 + .float -6.37609959e-001 + .double 1.4981310315722083e+000 + .float 1.20606445e-001 + .float -6.32869363e-001 + .double 1.4944089567247609e+000 + .float 1.19122982e-001 + .float -6.28187180e-001 + .double 1.4907144164537049e+000 + .float 1.17664941e-001 + .float -6.23562515e-001 + .double 1.4870472476381746e+000 + .float 1.16231754e-001 + .float -6.18994236e-001 + .double 1.4834069004982244e+000 + .float 1.14822894e-001 + .float -6.14481509e-001 + .double 1.4797932134058187e+000 + .float 1.13437831e-001 + .float -6.10023379e-001 + .double 1.4762058443113220e+000 + .float 1.12076037e-001 + .float -6.05618834e-001 + .double 1.4726443632928796e+000 + .float 1.10737026e-001 + .float -6.01267099e-001 + .double 1.4691086176496402e+000 + .float 1.09420307e-001 + .float -5.96967220e-001 + .double 1.4655981929176258e+000 + .float 1.08125404e-001 + .float -5.92718303e-001 + .double 1.4621127390083379e+000 + .float 1.06851846e-001 + .float -5.88519573e-001 + .double 1.4586520899895670e+000 + .float 1.05599195e-001 + .float -5.84370196e-001 + .double 1.4552159098404298e+000 + .float 1.04367010e-001 + .float -5.80269396e-001 + .double 1.4518039685175916e+000 + .float 1.03154853e-001 + .float -5.76216221e-001 + .double 1.4484157452291972e+000 + .float 1.01962321e-001 + .float -5.72210073e-001 + .double 1.4450512194914906e+000 + .float 1.00788996e-001 + .float -5.68250120e-001 + .double 1.4417100217216250e+000 + .float 9.96344909e-002 + .float -5.64335704e-001 + .double 1.4383920062345164e+000 + .float 9.84984189e-002 + .float -5.60465932e-001 + .double 1.4350966293370357e+000 + .float 9.73803923e-002 + .float -5.56640208e-001 + .double 1.4318238603544484e+000 + .float 9.62800533e-002 + .float -5.52857816e-001 + .double 1.4285733865110009e+000 + .float 9.51970443e-002 + .float -5.49118102e-001 + .double 1.4253450033570922e+000 + .float 9.41310152e-002 + .float -5.45420289e-001 + .double 1.4221382841486783e+000 + .float 9.30816233e-002 + .float -5.41763842e-001 + .double 1.4189531990222775e+000 + .float 9.20485407e-002 + .float -5.38148105e-001 + .double 1.4157894735102996e+000 + .float 9.10314322e-002 + .float -5.34572303e-001 + .double 1.4126466515879097e+000 + .float 9.00299922e-002 + .float -5.31036019e-001 + .double 1.4095248195351726e+000 + .float 8.90439078e-002 + .float -5.27538538e-001 + .double 1.4064235685111435e+000 + .float 8.80728811e-002 + .float -5.24079263e-001 + .double 1.4033426578794330e+000 + .float 8.71166140e-002 + .float -5.20657599e-001 + .double 1.4002818522920506e+000 + .float 8.61748159e-002 + .float -5.17273068e-001 + .double 1.3972411125262980e+000 + .float 8.52472112e-002 + .float -5.13925016e-001 + .double 1.3942200360457768e+000 + .float 8.43335241e-002 + .float -5.10612905e-001 + .double 1.3912184387210305e+000 + .float 8.34334940e-002 + .float -5.07336259e-001 + .double 1.3882362014696363e+000 + .float 8.25468525e-002 + .float -5.04094481e-001 + .double 1.3852730181874626e+000 + .float 8.16733465e-002 + .float -5.00887096e-001 + .double 1.3823287554388843e+000 + .float 8.08127224e-002 + .float -4.97713536e-001 + .double 1.3794031206822264e+000 + .float 7.99647421e-002 + .float -4.94573385e-001 + .double 1.3764960501331538e+000 + .float 7.91291744e-002 + .float -4.91466135e-001 + .double 1.3736072943265429e+000 + .float 7.83057734e-002 + .float -4.88391250e-001 + .double 1.3707366035783282e+000 + .float 7.74943158e-002 + .float -4.85348314e-001 + .double 1.3678838777230611e+000 + .float 7.66945854e-002 + .float -4.82336819e-001 + .double 1.3650488271986914e+000 + .float 7.59063661e-002 + .float -4.79356378e-001 + .double 1.3622313906713814e+000 + .float 7.51294345e-002 + .float -4.76406485e-001 + .double 1.3594313133382971e+000 + .float 7.43635893e-002 + .float -4.73486722e-001 + .double 1.3566484333684583e+000 + .float 7.36086369e-002 + .float -4.70596671e-001 + .double 1.3538825656360580e+000 + .float 7.28643686e-002 + .float -4.67735916e-001 + .double 1.3511335837170295e+000 + .float 7.21305907e-002 + .float -4.64903980e-001 + .double 1.3484011956721456e+000 + .float 7.14071169e-002 + .float -4.62100536e-001 + .double 1.3456853727847522e+000 + .float 7.06937611e-002 + .float -4.59325165e-001 + .double 1.3429859180407269e+000 + .float 6.99903443e-002 + .float -4.56577450e-001 + .double 1.3403026092774943e+000 + .float 6.92966878e-002 + .float -4.53857034e-001 + .double 1.3376353441254671e+000 + .float 6.86126128e-002 + .float -4.51163501e-001 + .double 1.3349839073740417e+000 + .float 6.79379627e-002 + .float -4.48496521e-001 + .double 1.3323481762839757e+000 + .float 6.72725588e-002 + .float -4.45855707e-001 + .double 1.3297280012598427e+000 + .float 6.66162446e-002 + .float -4.43240732e-001 + .double 1.3271232670080648e+000 + +// +// End of table. +// diff --git a/private/fp32/tran/alpha/tans.s b/private/fp32/tran/alpha/tans.s new file mode 100644 index 000000000..5f901e634 --- /dev/null +++ b/private/fp32/tran/alpha/tans.s @@ -0,0 +1,934 @@ +// TITLE("Alpha AXP Tangent") +//++ +// +// Copyright (c) 1993, 1994 Digital Equipment Corporation +// +// Module Name: +// +// tan.s +// +// Abstract: +// +// This module implements a high-performance Alpha AXP specific routine +// for IEEE double format tangent. +// +// Author: +// +// Bob Hanek 1-Oct-1991 +// +// Environment: +// +// User mode. +// +// Revision History: +// +// Thomas Van Baak (tvb) 13-Feb-1994 +// +// Adapted for NT. +// +//-- + +#include "ksalpha.h" + +// +// Define DPML exception record for NT. +// + + .struct 0 +ErErr: .space 4 // error code +ErCxt: .space 4 // context +ErPlat: .space 4 // platform +ErEnv: .space 4 // environment +ErRet: .space 4 // return value pointer +ErName: .space 4 // function name +ErType: .space 8 // flags and fill +ErVal: .space 8 // return value +ErArg0: .space 8 // arg 0 +ErArg1: .space 8 // arg 1 +ErArg2: .space 8 // arg 2 +ErArg3: .space 8 // arg 3 +DpmlExceptionLength: + +// +// Define stack frame. +// + + .struct 0 +SaveS0: .space 8 // +SaveS1: .space 8 // +SaveS2: .space 8 // +SaveRa: .space 8 // +SaveF2: .space 8 // +SaveF3: .space 8 // +SaveF4: .space 8 // +SaveF5: .space 8 // +SaveF6: .space 8 // +SaveF7: .space 8 // +SaveF8: .space 8 // +SaveF9: .space 8 // +Temp0: .space 8 // +Temp1: .space 8 // +Temp2: .space 8 // +Temp3: .space 8 // +ExRec: .space DpmlExceptionLength // exception record + .space 8 // for 16-byte stack alignment +FrameLength: + +// +// Define lower and upper 32-bit parts of 64-bit double. +// + +#define LowPart 0x0 +#define HighPart 0x4 + +// +// Define argument range values. +// + +#define BIG_X_HI 0x4169 // upper bound of medium argument range +#define BIG_X_LO 0x21fb +#define SMALL_X_HI 0x3e40 // lower bound of medium argument range +#define SMALL_X_LO 0x0000 +#define EXP_WORD_OF_TWO_PI_HI 0x4019 +#define EXP_WORD_OF_TWO_PI_LO 0x21fb + +// +// Define table offsets. +// + +#define D_2_POW_K_OVER_PI_OVER_4 0x0 +#define PI_OVER_4_OVER_2_POW_K_0 0x08 +#define PI_OVER_4_OVER_2_POW_K_1 0x10 +#define PI_OVER_4_OVER_2_POW_K_2 0x18 +#define PI_OVER_4_OVER_2_POW_K_3 0x20 +#define PI_OVER_4_OVER_2_POW_K_4 0x28 + +#define PI_OVER_2_HI 0x30 +#define PI_OVER_2_LO 0x38 +#define PI_HI 0x40 +#define PI_LO 0x48 +#define THREE_PI_OVER_2_HI 0x50 +#define THREE_PI_OVER_2_LO 0x58 +#define TWO_PI_HI 0x60 +#define TWO_PI_LO 0x68 +#define TWO_POW_K_OVER_PI_OVER_4 0x70 + +#define E_POLY0 0xa58 +#define E_POLY1 E_POLY0 + 8 +#define E_POLY2 E_POLY1 + 8 +#define E_POLY3 E_POLY2 + 8 +#define E_POLY4 E_POLY3 + 8 +#define E_POLY5 E_POLY4 + 8 +#define E_POLY6 E_POLY5 + 8 + +#define F_POLY0 0xa90 +#define F_POLY1 F_POLY0 + 8 +#define F_POLY2 F_POLY1 + 8 +#define F_POLY3 F_POLY2 + 8 +#define F_POLY4 F_POLY3 + 8 +#define F_POLY5 F_POLY4 + 8 + +#define G_POLY0 0xac0 +#define G_POLY1 G_POLY0 + 8 + +#define TANCOT_A 0xad0 +#define TAN_A 0xad8 +#define COT_A 0xae0 +#define TAN_COT_A 0xae8 + + SBTTL("Tangent") + +//++ +// +// double +// tan ( +// IN double x +// ) +// +// Routine Description: +// +// This function returns the tangent of the given double argument. +// +// Arguments: +// +// x (f16) - Supplies the argument value. +// +// Return Value: +// +// The double tangent result is returned as the function value in f0. +// +//-- + + NESTED_ENTRY(tan, FrameLength, ra) + + lda sp, -FrameLength(sp) // allocate stack frame + stq s0, SaveS0(sp) + stq s1, SaveS1(sp) + stq s2, SaveS2(sp) + stq ra, SaveRa(sp) + stt f2, SaveF2(sp) + stt f3, SaveF3(sp) + stt f4, SaveF4(sp) + stt f5, SaveF5(sp) + stt f6, SaveF6(sp) + stt f7, SaveF7(sp) + stt f8, SaveF8(sp) + stt f9, SaveF9(sp) + + PROLOGUE_END + + ornot zero, zero, t0 + srl t0, 33, t0 + ldah t1, SMALL_X_HI(zero) + ldah t2, BIG_X_HI - SMALL_X_HI(zero) + lda t2, BIG_X_LO - SMALL_X_LO(t2) + stt f16, Temp2(sp) + cpys f31, f16, f2 + ldl v0, Temp2 + HighPart(sp) + cpys f16, f16, f3 + and v0, t0, v0 // the exponent field of the argument + subl v0, t1, t0 // if v0 - small <= big - small + cmpult t0, t2, t0 // we've got an abnormal argument + beq t0, abnormal_argument + + ldah t2, EXP_WORD_OF_TWO_PI_HI(zero) // if (j >= EXP_WORD_OF_TWO_PI) + lda t2, EXP_WORD_OF_TWO_PI_LO(t2) // we have a medium argument + cmplt v0, t2, t2 + beq t2, medium_argument + +// +// small argument reduction +// +// reduce the argument X to ( 8 * N + I ) * pi / 2 + y +// and let the reduced argument be y' where +// y' = X - floor( ( 8 * N + I + 1 ) / 2 ) * pi / 2 +// the low 3 bits of I are the octant +// + + lda t0, __trig_cons + ldt f0, TWO_POW_K_OVER_PI_OVER_4(t0) + mult f2, f0, f0 + cvttqc f0, f1 + stt f1, Temp2(sp) + ldl t2, Temp2(sp) + sra t2, 7, t4 + cmpule t4, 7, t12 + beq t12, small_tan // if octant > 7; shouldn't happen + +// dispatch on octant + + lda t12, Switch2 + s4addl t4, t12, t12 + ldl t12, 0(t12) + jmp zero, (t12) + +// +// 1st octant; compute tan(y') +// +Switch20: + and t2, 127, t5 + subl t5, 35, t5 + blt t5, small_tan + + s4addl t5, zero, t5 + s8addl t5, t0, s0 + ldt f0, TANCOT_A(s0) + ldt f5, TAN_A(s0) + ldt f6, COT_A(s0) + subt f2, f0, f4 + br zero, pos_tab_eval + +// +// 2nd octant; compute -cot(y') +// +Switch21: + ldt f1, PI_OVER_2_HI(t0) + ornot zero, t2, t4 + and t4, 127, t4 + ldt f10, PI_OVER_2_LO(t0) + subl t4, 35, t4 + subt f2, f1, f1 + blt t4, neg_cot_1 + + s4addl t4, zero, t4 + s8addl t4, t0, s0 + ldt f0, TANCOT_A(s0) + ldt f6, TAN_A(s0) + ldt f5, COT_A(s0) + addt f1, f0, f0 + subt f0, f10, f4 + br zero, pos_tab_eval + +// +// 3rd octant; compute -cot(y') +// +Switch22: + ldt f0, PI_OVER_2_HI(t0) + and t2, 127, t3 + ldt f10, PI_OVER_2_LO(t0) + subl t3, 35, t3 + subt f2, f0, f1 + blt t3, neg_cot_1 + + s4addl t3, zero, t3 + s8addl t3, t0, s0 + ldt f6, TANCOT_A(s0) + ldt f5, COT_A(s0) + subt f1, f6, f6 + subt f10, f6, f4 + ldt f6, TAN_A(s0) + br zero, neg_tab_eval + +// +// 4th octant; compute tan(y') +// +Switch23: + ldt f0, PI_HI(t0) + ornot zero, t2, t5 + and t5, 127, t5 + ldt f11, PI_LO(t0) + subl t5, 35, t5 + subt f2, f0, f0 + blt t5, pos_tan_1 + + s4addl t5, zero, t5 + s8addl t5, t0, s0 + ldt f6, TANCOT_A(s0) + addt f0, f6, f6 + subt f11, f6, f4 + br zero, neg_tab_eval1 + +// +// 5th octant; compute tan(y') +// +Switch24: + ldt f11, PI_HI(t0) + and t2, 127, t4 + subl t4, 35, t4 + subt f2, f11, f0 + ldt f11, PI_LO(t0) + blt t4, pos_tan_1 + + s4addl t4, zero, t4 + s8addl t4, t0, s0 + ldt f6, TANCOT_A(s0) + ldt f5, TAN_A(s0) + subt f0, f6, f6 + subt f6, f11, f4 + ldt f6, COT_A(s0) + br zero, pos_tab_eval + +// +// 6th octant; compute -cot(y') +// +Switch25: + ldt f11, THREE_PI_OVER_2_HI(t0) + ornot zero, t2, t3 + and t3, 127, t3 + ldt f10, THREE_PI_OVER_2_LO(t0) + subl t3, 35, t3 + subt f2, f11, f1 + blt t3, neg_cot_1 + + s4addl t3, zero, t3 + s8addl t3, t0, s0 + ldt f6, TANCOT_A(s0) + ldt f5, COT_A(s0) + addt f1, f6, f6 + subt f6, f10, f4 + ldt f6, TAN_A(s0) + br zero, pos_tab_eval + +// +// 7th octant; compute -cot(y') +// +Switch26: + ldt f10, THREE_PI_OVER_2_HI(t0) + and t2, 127, t5 + subl t5, 35, t5 + subt f2, f10, f1 + ldt f10, THREE_PI_OVER_2_LO(t0) + blt t5, neg_cot_1 + + s4addl t5, zero, t5 + s8addl t5, t0, s0 + ldt f6, TANCOT_A(s0) + ldt f5, COT_A(s0) + subt f1, f6, f6 + subt f10, f6, f4 + ldt f6, TAN_A(s0) + br zero, neg_tab_eval + +neg_cot_1: + subt f1, f10, f2 + ldt f14, F_POLY5(t0) + ldt f4, One + ldt f19, F_POLY3(t0) + ldt f15, F_POLY4(t0) + divt f4, f2, f6 + mult f2, f2, f12 + cvtts f2, f13 + mult f12, f14, f14 + mult f12, f12, f18 + mult f12, f19, f19 + subt f1, f13, f1 + addt f14, f15, f14 + ldt f15, F_POLY2(t0) + subt f1, f10, f1 + addt f19, f15, f15 + ldt f19, F_POLY1(t0) + ldt f10, F_POLY0(t0) + mult f14, f18, f14 + mult f12, f19, f12 + mult f15, f18, f15 + mult f14, f18, f14 + addt f12, f10, f10 + addt f15, f14, f14 + addt f10, f14, f10 + mult f2, f10, f2 + cvtts f6, f17 + mult f17, f13, f13 + mult f17, f1, f1 + subt f6, f17, f17 + subt f4, f13, f4 + subt f4, f1, f1 + mult f1, f6, f1 + subt f1, f17, f1 + subt f2, f1, f1 + subt f1, f6, f7 + fbge f3, adjust_sign + cpysn f7, f7, f7 + cpys f7, f7, f0 + br zero, done + +// +// 8th octant; compute tan(y') +// +Switch27: + ldt f18, TWO_PI_HI(t0) + ornot zero, t2, t2 + and t2, 127, t2 + ldt f11, TWO_PI_LO(t0) + subl t2, 35, t2 + subt f2, f18, f0 + blt t2, pos_tan_1 + + s4addl t2, zero, t2 + s8addl t2, t0, s0 + ldt f19, TANCOT_A(s0) + addt f0, f19, f19 + subt f11, f19, f4 + br zero, neg_tab_eval1 + +pos_tan_1: + subt f0, f11, f13 + ldt f14, E_POLY1(t0) + ldt f10, E_POLY3(t0) + ldt f17, E_POLY4(t0) + ldt f18, E_POLY0(t0) + ldt f7, E_POLY2(t0) + ldt f1, E_POLY5(t0) + mult f13, f13, f15 + ldt f2, E_POLY6(t0) + mult f15, f15, f12 + mult f14, f15, f14 + mult f10, f15, f10 + mult f1, f15, f1 + mult f13, f15, f13 + mult f17, f12, f17 + mult f2, f12, f2 + addt f14, f18, f14 + mult f7, f12, f7 + mult f12, f12, f19 + addt f10, f17, f10 + addt f1, f2, f1 + addt f14, f7, f7 + mult f10, f12, f10 + mult f1, f19, f1 + addt f7, f10, f7 + addt f1, f7, f1 + mult f13, f1, f1 + addt f11, f1, f1 + subt f0, f1, f7 + fbge f3, adjust_sign + cpysn f7, f7, f7 + cpys f7, f7, f0 + br zero, done + +small_tan: + mult f2, f2, f6 + ldt f17, E_POLY1(t0) + ldt f14, E_POLY3(t0) + ldt f12, E_POLY4(t0) + ldt f15, E_POLY0(t0) + ldt f13, E_POLY2(t0) + ldt f19, E_POLY5(t0) + mult f6, f6, f18 + mult f17, f6, f17 + ldt f10, E_POLY6(t0) + mult f14, f6, f14 + mult f19, f6, f19 + mult f2, f6, f6 + mult f12, f18, f12 + mult f10, f18, f10 + addt f17, f15, f15 + mult f13, f18, f13 + mult f18, f18, f11 + addt f14, f12, f12 + addt f19, f10, f10 + addt f15, f13, f13 + mult f12, f18, f12 + mult f10, f11, f10 + addt f13, f12, f12 + addt f10, f12, f10 + mult f6, f10, f6 + subt f2, f6, f7 + fbge f3, adjust_sign + cpysn f7, f7, f7 + cpys f7, f7, f0 + br zero, done + +// +// a medium argument +// +medium_argument: + lda t5, __trig_cons // reduce the argument with extra precision + ldt f0, D_2_POW_K_OVER_PI_OVER_4(t5) + mult f2, f0, f0 + cvttqc f0, f1 + stt f1, Temp2(sp) + ldl s1, Temp2(sp) + addl s1, 0x80, t2 + bic t2, 0xff, t2 + stq t2, Temp3(sp) + ldt f17, Temp3(sp) + ldt f19, PI_OVER_4_OVER_2_POW_K_0(t5) + ldt f15, PI_OVER_4_OVER_2_POW_K_1(t5) + cvtqt f17, f17 + ldt f11, PI_OVER_4_OVER_2_POW_K_2(t5) + mult f17, f19, f19 + mult f17, f15, f15 + mult f17, f11, f11 + subt f2, f19, f19 + subt f19, f15, f18 + subt f18, f19, f19 + addt f15, f19, f15 + addt f11, f15, f8 + subt f18, f8, f9 + cmpteq f9, f18, f13 + fbne f13, evaluate + + subt f9, f18, f18 + ldt f12, PI_OVER_4_OVER_2_POW_K_3(t5) + mult f17, f12, f12 + addt f8, f18, f8 + addt f12, f8, f8 + subt f9, f8, f10 + cmpteq f10, f9, f6 + fbne f6, evaluate + + subt f10, f9, f9 + ldt f0, PI_OVER_4_OVER_2_POW_K_4(t5) + mult f17, f0, f0 + addt f8, f9, f8 + addt f0, f8, f8 + subt f10, f8, f9 + cmpteq f9, f10, f1 + fbne f1, evaluate + + subt f9, f10, f10 + addt f8, f10, f8 + br zero, evaluate + +// +// process an abnormal argument +// it's either very small, very big, a NaN or an Inf +// +abnormal_argument: + cmple v0, t1, t1 + beq t1, big_NaN_or_Inf + + cpys f3, f2, f0 // very small argument; simply return it. + br zero, done + +// +// Process big arguments or NaNs or Infs +// +big_NaN_or_Inf: + ldah s2, 0x7ff0(zero) // mask is 0x7ff00000 + and v0, s2, v0 + xor v0, s2, v0 + beq v0, NaN_or_Inf // NaN or an infinity + + cpys f2, f2, f16 // reduce the very big argument carefully + mov zero, a1 + lda a2, Temp0(sp) + lda a3, Temp1(sp) + bsr ra, __trig_reduce + mov v0, s1 + ldt f9, Temp0(sp) + ldt f8, Temp1(sp) + +// +// evaluate the function +// +evaluate: + sra s1, 7, t1 + and t1, 7, t1 + cmpule t1, 7, t12 + beq t12, pos_tan_2 + + lda t12, Switch1 + s4addl t1, t12, t12 + ldl t12, 0(t12) + jmp zero, (t12) + +// +// 1st octant; compute tan(y') +// +Switch10: + and s1, 127, t3 + subl t3, 35, t3 + blt t3, pos_tan_2 + + s4addl t3, zero, t3 + lda t4, __trig_cons + s8addl t3, t4, s0 + ldt f5, TANCOT_A(s0) + ldt f6, COT_A(s0) + subt f9, f5, f5 + subt f5, f8, f4 + ldt f5, TAN_A(s0) + br zero, pos_tab_eval + +// +// 2nd octant; compute -cot(y') +// +Switch11: + ornot zero, s1, t6 + and t6, 127, t6 + subl t6, 35, t6 + blt t6, neg_cot_2 + + s4addl t6, zero, t6 + lda t7, __trig_cons + s8addl t6, t7, s0 + ldt f0, TANCOT_A(s0) + ldt f6, TAN_A(s0) + ldt f5, COT_A(s0) + addt f9, f0, f0 + subt f0, f8, f4 + br zero, pos_tab_eval + +// +// 3rd octant; compute -cot(y') +// +Switch12: + and s1, 127, a1 + subl a1, 35, a1 + blt a1, neg_cot_2 + + s4addl a1, zero, a1 + lda a2, __trig_cons + s8addl a1, a2, s0 + ldt f1, TANCOT_A(s0) + ldt f6, TAN_A(s0) + ldt f5, COT_A(s0) + subt f9, f1, f1 + subt f8, f1, f4 + br zero, neg_tab_eval + +// +// 4th octant; compute tan(y') +// +Switch13: + ornot zero, s1, a4 + and a4, 127, a4 + subl a4, 35, a4 + blt a4, pos_tan_2 + + s4addl a4, zero, a4 + lda a5, __trig_cons + s8addl a4, a5, s0 + ldt f10, TANCOT_A(s0) + addt f9, f10, f10 + subt f8, f10, f4 + br zero, neg_tab_eval1 + +// +// 5th octant; compute tan(y') +// +Switch14: + and s1, 127, t9 + subl t9, 35, t9 + blt t9, pos_tan_2 + + s4addl t9, zero, t9 + lda t10, __trig_cons + s8addl t9, t10, s0 + ldt f11, TANCOT_A(s0) + ldt f5, TAN_A(s0) + ldt f6, COT_A(s0) + subt f9, f11, f11 + subt f11, f8, f4 + br zero, pos_tab_eval + +// +// 6th octant; compute -cot(y') +// +Switch15: + ornot zero, s1, ra + and ra, 127, ra + subl ra, 35, ra + blt ra, neg_cot_2 + + s4addl ra, zero, ra + lda t12, __trig_cons + s8addl ra, t12, s0 + ldt f12, TANCOT_A(s0) + ldt f6, TAN_A(s0) + ldt f5, COT_A(s0) + addt f9, f12, f12 + subt f12, f8, f4 + br zero, pos_tab_eval + +// +// 7th octant; compute -cot(y') +// +Switch16: + and s1, 127, t0 + subl t0, 35, t0 + blt t0, neg_cot_2 + + s4addl t0, zero, t0 + lda t1, __trig_cons + s8addl t0, t1, s0 + ldt f13, TANCOT_A(s0) + ldt f6, TAN_A(s0) + ldt f5, COT_A(s0) + subt f9, f13, f13 + subt f8, f13, f4 + br zero, neg_tab_eval + +neg_cot_2: + mult f9, f9, f16 + cvtts f9, f17 + ldt f14, One + lda t3, __trig_cons + ldt f18, F_POLY5(t3) + divt f14, f9, f15 + ldt f21, F_POLY3(t3) + mult f16, f16, f22 + mult f16, f18, f18 + ldt f19, F_POLY4(t3) + subt f9, f17, f23 + ldt f24, F_POLY2(t3) + mult f16, f21, f21 + ldt f25, F_POLY1(t3) + ldt f26, F_POLY0(t3) + mult f16, f25, f16 + addt f18, f19, f18 + subt f23, f8, f8 + addt f21, f24, f21 + addt f16, f26, f16 + mult f18, f22, f18 + mult f21, f22, f21 + mult f18, f22, f18 + addt f21, f18, f18 + addt f16, f18, f16 + mult f9, f16, f16 + cvtts f15, f20 + mult f20, f17, f17 + mult f20, f8, f8 + subt f15, f20, f20 + subt f14, f17, f14 + subt f14, f8, f8 + mult f8, f15, f8 + subt f8, f20, f8 + subt f16, f8, f8 + subt f8, f15, f7 + fbge f3, adjust_sign + cpysn f7, f7, f7 + cpys f7, f7, f0 + br zero, done + +// +// 8th octant; compute tan(y') +// +Switch17: + ornot zero, s1, s1 + and s1, 127, s1 + subl s1, 35, s1 + blt s1, pos_tan_2 + + s4addl s1, zero, s1 + lda t4, __trig_cons + s8addl s1, t4, s0 + ldt f27, TANCOT_A(s0) + addt f9, f27, f27 + subt f8, f27, f4 + +neg_tab_eval1: + ldt f5, TAN_A(s0) + ldt f6, COT_A(s0) + +neg_tab_eval: + cpysn f3, f3, f3 + +pos_tab_eval: + mult f4, f4, f28 + lda t6, __trig_cons + ldt f0, TAN_COT_A(s0) + ldt f29, G_POLY1(t6) + ldt f30, G_POLY0(t6) + mult f29, f28, f29 + mult f4, f28, f28 + addt f29, f30, f29 + mult f28, f29, f28 + subt f4, f28, f4 + mult f0, f4, f0 + subt f4, f6, f4 + divt f0, f4, f0 + subt f5, f0, f7 + fbge f3, adjust_sign + cpysn f7, f7, f7 + cpys f7, f7, f0 + br zero, done + +pos_tan_2: + mult f9, f9, f1 + lda t7, __trig_cons + ldt f11, E_POLY1(t7) + ldt f12, E_POLY3(t7) + ldt f13, E_POLY4(t7) + mult f1, f1, f10 + ldt f23, E_POLY0(t7) + mult f11, f1, f11 + ldt f22, E_POLY2(t7) + mult f12, f1, f12 + ldt f19, E_POLY5(t7) + ldt f24, E_POLY6(t7) + mult f19, f1, f19 + mult f13, f10, f13 + mult f24, f10, f24 + addt f11, f23, f11 + mult f22, f10, f22 + mult f10, f10, f25 + mult f9, f1, f1 + addt f12, f13, f12 + addt f19, f24, f19 + addt f11, f22, f11 + mult f12, f10, f10 + mult f19, f25, f19 + addt f11, f10, f10 + addt f19, f10, f10 + mult f1, f10, f1 + addt f8, f1, f1 + subt f9, f1, f7 + fbge f3, adjust_sign + cpysn f7, f7, f7 + +adjust_sign: + cpys f7, f7, f0 + br zero, done + +// +// Determine if we have a NaN or an Inf +// +NaN_or_Inf: + stt f2, Temp2(sp) + ldl a1, Temp2 + HighPart(sp) + and a1, s2, a2 + cmpeq a2, s2, s2 + beq s2, NaN_or_Inf1 + + ldl a3, Temp2(sp) + ldah a4, 0x10(zero) + lda a4, -1(a4) + and a1, a4, a1 + bis a1, a3, a1 + cmpult zero, a1, a1 + and s2, a1, s2 + bne s2, NaN_or_Inf2 + +// +// report an exception +// +NaN_or_Inf1: + lda a5, tanName + stl a5, ExRec + ErName(sp) + ldah t10, 0x800(zero) + stt f2, ExRec + ErArg0(sp) + lda t10, 0x5f(t10) + stl t10, ExRec + ErErr(sp) + lda v0, ExRec(sp) + bsr ra, __dpml_exception + ldt f2, 0(v0) + +// +// return the argument +// +NaN_or_Inf2: + cpys f2, f2, f0 + +// +// Restore registers and return with result in f0. +// + +done: + ldq s0, SaveS0(sp) + ldq s1, SaveS1(sp) + ldq s2, SaveS2(sp) + ldq ra, SaveRa(sp) + ldt f2, SaveF2(sp) + ldt f3, SaveF3(sp) + ldt f4, SaveF4(sp) + ldt f5, SaveF5(sp) + ldt f6, SaveF6(sp) + ldt f7, SaveF7(sp) + ldt f8, SaveF8(sp) + ldt f9, SaveF9(sp) + lda sp, FrameLength(sp) // deallocate stack frame + ret zero, (ra) // return + + .end tan + + .rdata + .align 3 + +// +// Define floating point constants. +// + +One: .double 1.0 + +// +// dispatch on octant +// + +Switch1: + .long Switch10 + .long Switch11 + .long Switch12 + .long Switch13 + .long Switch14 + .long Switch15 + .long Switch16 + .long Switch17 + +// +// dispatch on octant +// + +Switch2: + .long Switch20 + .long Switch21 + .long Switch22 + .long Switch23 + .long Switch24 + .long Switch25 + .long Switch26 + .long Switch27 + +tanName: + .ascii "tan\0" diff --git a/private/fp32/tran/alpha/trig_rdx.s b/private/fp32/tran/alpha/trig_rdx.s new file mode 100644 index 000000000..f38adf3d9 --- /dev/null +++ b/private/fp32/tran/alpha/trig_rdx.s @@ -0,0 +1,554 @@ +// TITLE("Alpha AXP Trigonometric Argument Reduction") +//++ +// +// Copyright (c) 1991, 1993, 1994 Digital Equipment Corporation +// +// Module Name: +// +// trig_rdx.s +// +// Abstract: +// This module implements a routine for the large argument reduction +// and varying octant large argument reduction for sin, cos, tan and +// cot routines. +// +// Author: +// +// Bob Hanek 1-Oct-1991 +// +// Environment: +// +// User mode. +// +// Revision History: +// +// Thomas Van Baak (tvb) 10-Feb-1994 +// +//-- + +#include "ksalpha.h" + +// +// Define stack frame. +// + + .struct 0 +SaveS0: .space 8 // save register s0 +SaveRa: .space 8 // save return address +Temp0: .space 8 // +Temp1: .space 8 // + .space 0 // for 16-byte stack alignment +FrameLength: + +// +// Define lower and upper 32-bit parts of 64-bit double. +// + +#define LowPart 0x0 +#define HighPart 0x4 + + SBTTL("Trigonometric Argument Reduction") + +//++ +// +// int __trig_reduce(double x, int n, double *hi, double *lo) +// +// Routine Description: +// +// The goal of the large argument redution algorithm is to produce a floating +// point value y and an integer o, defined by +// I = nearest_int(x'/(pi/2)) +// y = x' - I*(p/2) +// o = 2*I if y >= 0 +// = 2*I + 1 if y < 0 +// +// Return y in hi and low parts, and return (o+n) * 2^7, with the low 7 bits +// providing a table index. +// +// Arguments: +// +// x (f16) - Supplies the argument value. +// +// (a1) - Supplies the number of bits. +// +// (a2) - Supplies a pointer to the high part of y. +// +// (a3) - Supplies a pointer to the low part of y. +// +// Return Value: +// +// The octant is returned as the function value in v0. +// +//-- + + NESTED_ENTRY(__trig_reduce, FrameLength, ra) + + lda sp, -FrameLength(sp) // allocate stack frame + + stq s0, SaveS0(sp) // save register s0 + stq ra, SaveRa(sp) // save return address + + PROLOGUE_END + + ldah v0, -16(zero) + lda t2, 0x3c2(zero) // Bias adjustment + ldah t3, 0x10(zero) + stt f16, Temp0(sp) // Get the fraction bits as an integer + ldl t0, Temp0 + HighPart(sp)// and the biased exponent field + mov 32, a5 + and t0, v0, v0 + sra v0, 20, t1 // shift tmp_digit + xor t0, v0, v0 + subl t1, t2, t1 // subtract the bias, giving the offset + ldl t2, Temp0(sp) + bis v0, t3, v0 + sra t1, 5, t0 // divide by L=32 + lda t3, __four_over_pi + s4addl t0, t3, t3 // p = &four_over_pi[j]; + lda t4, 4(t3) // Get 'g' digits from the table + and t1, 31, t1 // offset mod 32 + ldl t3, 0(t3) // Get more digits... + ldl t5, 0(t4) + lda t4, 4(t4) + lda t6, 4(t4) + ldl t4, 0(t4) + lda t7, 4(t6) + ldl t6, 0(t6) + cmpult zero, t1, a4 // Ensure 32-bit alignment ... + ldl a0, 0(t7) + lda t7, 4(t7) + beq a4, already_aligned // Branch if already aligned + + zapnot t5, 0xf, t12 + subl a5, t1, t0 // ... by adjusting j + sll t5, t1, t8 // ... and shifting the digits left + srl t12, t0, t5 + // + // Precondition the initial multiply + // + zapnot t4, 0xf, t12 + sll t3, t1, t3 + srl t12, t0, t9 + addl zero, t3, t3 + addl zero, t5, t5 + zapnot t6, 0xf, t12 + addl zero, t8, t8 + addl zero, t9, t9 + bis t3, t5, t3 + bis t8, t9, t5 + srl t12, t0, t8 + zapnot a0, 0xf, t12 + sll t4, t1, t4 + sll t6, t1, t6 + srl t12, t0, t9 + addl zero, t4, t4 + addl zero, t8, t8 + addl zero, t6, t6 + addl zero, t9, t9 + bis t4, t8, t4 + bis t6, t9, t6 +already_aligned: + // + // Multiply the 'f' and 'g' digits. + // + mull t3, t2, t3 + zapnot t6, 0xf, t8 + zapnot t2, 0xf, t9 + zapnot v0, 0xf, t11 + sll a1, 29 a1 + mulq t8, t9, t10 + zapnot t6, 0xf, t8 + zapnot t4, 0xf, t6 + zapnot t2, 0xf, t9 + mulq t8, t11, t8 + zapnot v0, 0xf, t11 + mulq t6, t9, t12 + zapnot t4, 0xf, t6 + zapnot t5, 0xf, t4 + zapnot t2, 0xf, t9 + mulq t6, t11, t6 + ornot zero, zero, t11 + zapnot t11, 0xf, t11 + mulq t4, t9, t4 + srl t10, 32, t9 + zapnot t10, 0xf, t10 + addl zero, t9, t9 + zapnot t9, 0xf, t9 + addl zero, t10, t10 + addq t8, t9, t8 + srl t8, 32, t9 + zapnot t8, 0xf, t8 + addl zero, t9, t9 + zapnot t9, 0xf, t9 + addl zero, t8, t8 + zapnot t8, 0xf, t8 + sll t9, 32, t9 + addq t9, t8, t8 + addq t8, t12, t8 + cmpult t8, t12, t12 + srl t8, 32, t11 + zapnot t12, 0xf, t12 + addl zero, t11, t11 + mull t5, v0, t5 + sll t12, 32, t12 + zapnot t11, 0xf, t11 + zapnot t8, 0xf, t8 + addq t12, t11, t11 + addq t6, t11, t6 + srl t6, 32, t9 + zapnot t6, 0xf, t6 + addl zero, t9, t9 + zapnot t9, 0xf, t9 + addl zero, t6, t6 + zapnot t6, 0xf, t6 + sll t9, 32, t9 + ldah t11, 1(zero) // does w have bit loss? + addq t9, t6, t6 + addq t4, t6, t4 + srl t4, 32, t12 + lda t11, -0x8000(t11) + addl zero, t12, t12 + zapnot t4, 0xf, t4 + addl zero, t8, t8 +// +// Add in n and check to see if there are enough significant bits to obtain +// the final result. If not, generate some more. +// + addl t5, t12, t5 + addl t3, t5, t3 + addl t3, a1, t3 + addl t3, t11, t9 + ldah t12, 0x3fff(zero) + and t9, t12, t9 + ldah t5, 0x4000(zero) + addl zero, t4, t4 + mov zero, t6 // t6 = scale + bne t9, done // break if no loss of significance + + lda t5, -0x2(t5) + cpys f31, f31, f31 + +do_loop: +// +// Get more bits from the table and align them correctly +// + mov a0, a1 // move next_g_digit to a1 + ldl a0, 0(t7) // then load next next_g_digit + lda t7, 4(t7) + beq a4, 30f // aligned offset? + + sll a1, t1, a1 // tmp_digit = + zapnot a0, 0xf, t12 // (tmp_digit << offset) + addl zero, a1, a1 // (next_g_digit >> j) + srl t12, t0, t9 + addl zero, t9, t9 + bis a1, t9, a1 + +// +// Get the next product +// +30: zapnot a1, 0xf, t9 + zapnot t2, 0xf, ra + zapnot t8, 0xf, t8 + mulq t9, ra, ra + zapnot a1, 0xf, t9 + zapnot v0, 0xf, a1 + sll t8, 32, t8 + mulq t9, a1, a1 + insll t10, 4, t9 + addq t9, ra, t9 + cmpult t9, ra, ra + srl t9, 32, s0 + zapnot ra, 0xf, ra + addl zero, s0, s0 + zapnot s0, 0xf, s0 + sll ra, 32, ra + addq t8, s0, s0 + addq s0, ra, s0 + cmpult s0, ra, ra + zapnot t9, 0xf, t9 + addl zero, t9, t9 + addq s0, a1, s0 + cmpult s0, a1, a1 + srl s0, 32, t8 + zapnot s0, 0xf, s0 + addq ra, a1, a1 + addl zero, a1, a1 + addl zero, s0, s0 + addl zero, t8, t8 + beq a1, end_of_get_next_product + mov t4, ra + addl t4, 1, t4 + bne ra, end_of_get_next_product + + addl t3, 1, t3 +end_of_get_next_product: + // + // Check for L bits worth of 0's or 1's. If there are fewer we're done + // + addl t3, 1, a1 + and a1, t5, a1 + ldah ra, 0x2000(zero) + bne a1, done + + addl t4, ra, ra + ldah a1, -0x4000(zero) + and ra, a1, a1 + bne a1, done + // + // Compress the current value of w and increment counter + // + ldah a1, 0x2000(zero) + ldah ra, -0x2000(zero) + lda a1, -1(a1) + and t3, ra, t3 + and t4, a1, t4 + bis t3, t4, t3 + addl t3, t11, ra + mov t8, t4 + ldah t12, 0x3fff(zero) + and ra, t12, ra + addl t6, 32, t6 // adjust scale factor to reflect compression + mov s0, t8 + mov t9, t10 + beq ra, do_loop // while 1 +done: +// +// We want to return the reduced argument between (-pi/4, +pi/4). This +// means that if we are in an even octant, we return pi/4*f, if we are in +// an odd octant we return pi/4*(f - 1). +// +// NOTE: f or f - 1 can be obtained from w by propagating +// the low octant bit to all three octant bits. +// + s4addl t3, zero, a1 + sra a1, 2, s0 + zapnot a1, 0xf, a1 + zapnot t3, 0xf, t3 + srl a1, 31 a1 + srl t3, 22, t3 + addl a1, s0, a1 // tmp_digit + msd of w + zapnot t4, 0xf, t9 + bne a1, 60f +// +// Msd of w is all zeroes or all ones +// Left shift the significant w digits and and bump t6 +// + zapnot t8, 0xf, ra + sll s0, 29, s0 + sll t4, 29, t4 + srl t9, 3, t9 + srl ra, 3, ra + addl zero, s0, s0 + addl zero, t4, t4 + bis s0, t9, s0 + bis t4, ra, t4 + addl t6, 29, t6 + +// +// Now take care of remain zeros or ones. +// +60: stq s0, Temp0(sp) + lda t2, 0x7ff(zero) + ldt f16, Temp0(sp) + lda t5, 0x3fd(zero) + cvtqt f16, f16 + stt f16, Temp1(sp) + ldl t1, Temp1 + HighPart(sp) + sra t1, 20, t1 + and t1, t2, t1 + subl t1, t5, t1 + subl a5, t1, a5 + beq a5, 70f // skip if already aligned + zapnot t4, 0xf, t12 // else left shift significant w digits + sll s0, a5, s0 + srl t12, t1, t7 + zapnot t8, 0xf, t12 + sll t4, a5, t4 + srl t12, t1, t1 + addl zero, s0, s0 + addl zero, t7, t7 + addl zero, t4, t4 + addl zero, t1, t1 + bis s0, t7, s0 + bis t4, t1, t4 + addl t6, a5, t6 +70: // + // Time to convert to floating point and then to radians + // + zapnot t4, 0xf, t4 + stq t4, Temp0(sp) + ldt f0, Temp0(sp) + lda t12, __trig_reduce_t_table + lda a0, 0x3ff(zero) + ldt f16, 0x10(t12) + cvtqt f0, f0 + subl a0, t6, t6 + sll t6, 20 t6 + ldt f11, 0(t12) + stt f31, Temp1(sp) + and s0, 0x3f, t9 + stl t6, Temp1 + HighPart(sp) + zapnot t9, 0xf, ra + ldt f1, Temp1(sp) + xor s0, t9, s0 + ldt f10, 8(t12) + mult f0, f16, f0 + stq ra, Temp0(sp) + mov t3, v0 // Move octant to result + ldt f16, Temp0(sp) + mult f1, f10, f10 + mult f1, f11, f1 + cvtqt f16, f16 + stq s0, Temp1(sp) + ldt f11, Temp1(sp) + cvtqt f11, f11 + addt f16, f0, f0 + addt f1, f10, f12 + mult f11, f10, f10 + mult f11, f1, f1 + mult f0, f12, f0 + addt f0, f10, f0 + addt f1, f0, f16 + // + // Return the high and low parts of the reduced argument + // + stt f16, 0(a2) + subt f16, f1, f1 + subt f1, f0, f0 + stt f0, 0(a3) + ldq s0, SaveS0(sp) // restore register s0 + ldq ra, SaveRa(sp) // restore return address + lda sp, FrameLength(sp) // deallocate stack frame + ret zero, (ra) // return + + .end __trig_reduce + + .rdata + .align 3 + +__trig_reduce_t_table: + .double 1.4629180922209883e-009 // 2^s2*(pi/4) in hi and lo + .double -1.2953828660926890e-017 // pieces + .double 2.3283064365386963e-010 // 2^-BITS_PER_DIGIT + +// +// Define high precision version of 4 over pi, for use by the trig_reduce +// functions to perform accurate range reduction of very large arguments +// for the trigonometric functions. +// + + .rdata + .align 2 + +__four_over_pi: + + .long 0x00000000, 0x00000000, 0x00000000, 0x0000145f, 0x306dc9c8 + .long 0x82a53f84, 0xeafa3ea6, 0x9bb81b6c, 0x52b32788, 0x72083fca + .long 0x2c757bd7, 0x78ac36e4, 0x8dc74849, 0xba5c00c9, 0x25dd413a + .long 0x32439fc3, 0xbd639625, 0x34e7dd10, 0x46bea5d7, 0x68909d33 + .long 0x8e04d68b, 0xefc82732, 0x3ac7306a, 0x673e9390, 0x8bf177bf + .long 0x250763ff, 0x12fffbc0, 0xb301fde5, 0xe2316b41, 0x4da3eda6 + .long 0xcfd9e4f9, 0x6136e9e8, 0xc7ecd3cb, 0xfd45aea4, 0xf758fd7c + .long 0xbe2f67a0, 0xe73ef14a, 0x525d4d7f, 0x6bf623f1, 0xaba10ac0 + .long 0x6608df8f, 0x6d757e19, 0xf784135e, 0x86c3b53c, 0x722c2bdc + .long 0xc3610cb3, 0x30abe294, 0x0d0811bf, 0xfb1009ae, 0x64e620c0 + .long 0xc2aad94e, 0x75192c1c, 0x4f78118d, 0x68f88338, 0x6cf9bb9d + .long 0x0125506b, 0x388ed172, 0xc394dbb5, 0xe89a2ae3, 0x20a7d4bf + .long 0xe0e0a7ef, 0xc67d0658, 0x5bc9f306, 0x4fb77867, 0xa4dded63 + .long 0xcbdf13e7, 0x43e6b95e, 0x4fe3b0fe, 0x24320f8f, 0x848d5f4d + .long 0xdaaee5a6, 0x086762b8, 0xc296b3a3, 0x38785895, 0xa829a58b + .long 0xa00188cf, 0xb0c5ae3c, 0x7358d360, 0x0c466f9a, 0x5692f4f6 + .long 0x9aaaa6fe, 0xc7dae302, 0x147f8ec9, 0xa553ac95, 0x7aee1f0f + .long 0x8c6af60f, 0x5ce2a2ea, 0xc9381b3a, 0xc7671094, 0xf964648e + .long 0xf15ac46a, 0x8b5723e0, 0x03615e3b, 0xf9c33fe6, 0x33ed43cc + .long 0xcc2af328, 0xff759b0f, 0xefd6eca4, 0x513d064c, 0x17fcd9b8 + .long 0x9de126cd, 0x9a87ebba, 0xfbc2dbc7, 0x6b12537b, 0xc5045a5d + .long 0x10c509ab, 0x1c465958, 0xc2dc6119, 0x6fbc0a18, 0x02f4e3be + .long 0x6b7c0306, 0x8265cc42, 0x50602910, 0x6b71deaf, 0xf615be5d + .long 0x23c86949, 0x1a6ce21b, 0x1bb5484b, 0xf5d9cc2d, 0x54850156 + .long 0x933a7e54, 0xc0cfeeeb, 0x90785471, 0x078c2f0e, 0x714b5195 + .long 0xf7baedec, 0x74c5b977, 0xfe9df031, 0xacf824c8, 0xb94aa6db + .long 0x395a5505, 0x11ac384e, 0xf9224284, 0xc09368c2, 0x588b3888 + .long 0x98b91236, 0x49be62e0, 0x15a87a9c, 0xa925221a, 0xbfbf97c0 + .long 0x199283dd, 0xd9ce1ea7, 0xc2701e3d, 0x987cf665, 0x1f18f280 + .long 0xb267ce38, 0x366125de, 0x68a17382, 0x510f6415, 0x73f6a5d8 + .long 0x5248e5e6, 0x4f6daaa1, 0x9214ee43, 0xfced72d9, 0x662942cf + .long 0x3c4f2831, 0x3bfe92f2, 0x9d109cdc, 0x52e6332d, 0x7db106cb + .long 0xebe1dfb7, 0x7693490d, 0x948ce84e, 0x4e264bb1, 0xb702b3e1 + .long 0x3cb784a6, 0x31a72e9e, 0xe380a600, 0x2181ad01, 0x096b1dc5 + .long 0x921548e0, 0x5cee849a, 0xd7b4cfbe, 0xee490ddd, 0xe2d3f4d2 + .long 0x91ded236, 0x8a2a7a3e, 0x4159e673, 0x040fc97e, 0xad0c764b + .long 0xe7dba06b, 0xa80ff130, 0xa52a4ab8, 0x0c86e21b, 0x0da64906 + .long 0x4ea98b7a, 0x8e29cdca, 0x88b82121, 0x6d3ea55a, 0xacc293a0 + .long 0xe4ea008b, 0xbb677698, 0xaedd42ff, 0x30efad69, 0x3744e3a5 + .long 0x2d32d599, 0x98ca8295, 0xad5c5211, 0x3b310a0e, 0x4597d480 + .long 0x9280eeee, 0x061e64ff, 0x80150e3d, 0x49384cc7, 0xbc0c907b + .long 0xb2f2e7f4, 0x7fb28871, 0x90c1bbc8, 0x2633a732, 0x518e1bbc + .long 0xf6e2e77b, 0xe10566e2, 0xb4100b92, 0x700b5242, 0x221b1d01 + .long 0xf5f00d89, 0x7ffb61f2, 0x070ec30b, 0x22b4ac57, 0x796c3731 + .long 0x38f7a802, 0x009e5a44, 0xeea93ed6, 0xdd77645b, 0x75428145 + .long 0xe4d12ed0, 0x6c866761, 0x235281d5, 0x474a3854, 0x63b5ddb5 + .long 0xe244cb89, 0xb84db38f, 0x45b2ead8, 0x1067e07e, 0xde013188 + .long 0x0573262d, 0xa0f68722, 0xa4018b78, 0x7b18925e, 0xa975b8d4 + .long 0xb949d9a6, 0xf4e6d53c, 0xd292556d, 0x085bbbcc, 0x633df18e + .long 0xca516d06, 0xfb7f9574, 0x35c622bb, 0xf435c01b, 0x5f618cc9 + .long 0xac96e0bd, 0xa60ca537, 0xeacae75f, 0xe8f73f2d, 0x5e77cebb + .long 0xf2650610, 0x157ed18c, 0xc2b96080, 0xc45f43bc, 0x9b349667 + .long 0xb1e36ae1, 0x39a6dd28, 0x49d497c2, 0x76a46663, 0x555e150c + .long 0xa9f4b83a, 0x41e7e179, 0xaf0b6edf, 0x2460916f, 0x6e42f12a + .long 0x74d8dc4d, 0xcde01d7d, 0xeb095376, 0xfb58974c, 0xd559f9ee + .long 0xc3a05a25, 0xbe363833, 0x318ef5b8, 0x7b4910d4, 0x0bbefe90 + .long 0x18c5fe15, 0x935d9bb7, 0x8b87edbb, 0xda03f8f2, 0x16db6547 + .long 0x44b47355, 0xe0126a75, 0xa08af6d6, 0x85a52fd0, 0x0974e0fb + .long 0x41d54ed4, 0x2b2f6542, 0x42c5b6fb, 0x9fbcbf5f, 0xdb713fb7 + .long 0xd12d8edc, 0x9f9520ce, 0x1007c2ad, 0xd0bff0ff, 0xa0e7c506 + .long 0x6cec30c3, 0x055d57a9, 0xb5fcf66d, 0xcdb1e72c, 0xf2ab77e6 + .long 0x291af082, 0xdbe60865, 0xb8e6ac24, 0xb9ce1937, 0x19661fad + .long 0x97f44014, 0x9c8d80b4, 0x1bab48ed, 0xe43a424c, 0x508b9729 + .long 0x2c2e1c0a, 0xcd602a53, 0x26eaaa16, 0xfaa3d89e, 0x266bedc2 + .long 0x7c860bb5, 0x25d0b876, 0x43a6c654, 0x3496e11a, 0x963d443e + .long 0xe2dc8d31, 0xeeffe4f0, 0x006185a8, 0x11b419a9, 0xf334a41a + .long 0x7456614b, 0xa5e85f36, 0x997b423a, 0x17cfb83b, 0x7377a2f5 + .long 0x7034594b, 0x8d4102ea, 0xa5caa004, 0xfe028ff0, 0xc0fc2c81 + .long 0x6291a832, 0xdbd7d0e5, 0x5fbb56c4, 0xad66912f, 0x7fde60b3 + .long 0xd7f729ed, 0x4d150549, 0x4b5889f7, 0x9f05b30b, 0x5af2b8fe + .long 0x91a9a1b4, 0xc7440bea, 0xf49627e2, 0x92a71000, 0x241990db + .long 0xae36dbd9, 0x3eac17e2, 0x2ca9ad60, 0xe0359611, 0x9a181649 + .long 0x0aaa21df, 0x63d86e52, 0xa760d466, 0xa8180f7b, 0x80d988bc + .long 0x1f4529d9, 0x195ac83e, 0x7d1bcc8f, 0x9b0c9366, 0x37db3872 + .long 0xf49a8b0e, 0xf8bc6d22, 0x7b5e0787, 0x5748c308, 0xcbeeaabe + .long 0xb7ba58d2, 0x4dcba5d5, 0x9da881c8, 0x47c390f8, 0x8c3d3fa5 + .long 0x3e7adcf9, 0x4f8446b2, 0x2df8bc01, 0x11bafffc, 0x4d4dd8df + .long 0xb6182112, 0x6e8baf96, 0x55ad73ad, 0xd9af6e47, 0xcd4238d5 + .long 0x39fefbee, 0x65375936, 0xaa2016e1, 0xb65c4497, 0x4e8c0fbc + .long 0xb15b0e85, 0x82a1a183, 0x10328ccf, 0xc2c5202e, 0xcf53f7df + .long 0xbfbde8aa, 0xc6cfdb22, 0x7b3d9737, 0x517f92f8, 0x84f50638 + .long 0x6dde26d8, 0xb28ad51b, 0x16b51681, 0xd999e5b1, 0x22468aed + .long 0xf12ac59c, 0x79d33724, 0x1ad54bcd, 0x738547d9, 0x8be22941 + .long 0x7fbf7e9c, 0x2da771c5, 0x90dc509a, 0x9d35369f, 0x9a3dddf9 + .long 0x26a5cc27, 0x25e88427, 0x191b2361, 0x5f902d49, 0x5f7b0385 + .long 0xf0968a71, 0x9329d984, 0x4a9b8aa5, 0x5ad8d812, 0xc321770e + .long 0x034c92ad, 0x2c0b44dd, 0xca47e1e2, 0x2fe236be, 0x9eb97f85 + .long 0xb7869dd7, 0x86998bbd, 0x0c0bdbb3, 0x71ccfde6, 0x725702f9 + .long 0x336b0c37, 0x8afc38d0, 0x6a2207db, 0x090e3bbb, 0xa385b423 + .long 0x15e8c584, 0x3afe6b33, 0x0f5b380a, 0x93df50c9, 0xff80cad5 + .long 0xcf3ca6c4, 0x512455a7, 0x1b926cf5, 0x5d0aa704, 0xd0537cf9 + .long 0x5481aa36, 0x267321da, 0xf52900ad, 0x3e164cb4, 0xf10ff2e9 + .long 0x9106da3f, 0x36724429, 0x504f6439, 0xf31b93e8, 0x0aa8fb87 + .long 0x4e9c285d, 0x6cfbf3bf, 0xcbfa8bd4, 0x8cef6f55, 0x97545eca + .long 0xa471056a, 0xb748210d, 0xcb30c544, 0x3068e73c, 0xdc713a93 + .long 0xdca81f69, 0x3d2adff9, 0x41e3914b, 0x38a57f52, 0x98b83a79 + .long 0xf8a1f5cb, 0x5b70d8a8, 0xec4870a7, 0x70c4328f, 0x2590ec22 + .long 0x0f698543, 0x45900257, 0xe87204d1, 0x11278f1c, 0x98950f7b + .long 0x7cb84758, 0x9d5e84d1, 0x4cfef7f2, 0x41a5746c, 0xb63267a1 + .long 0x6f97bb8a, 0x348c7ba4, 0xfbbc2d23, 0x329352a5, 0x350519cd + .long 0x169da124, 0x13e89953, 0x09cc704e, 0x046f8fc6, 0x5721f1de + .long 0xb4fceac2, 0x811e2425, 0x53b6a9af, 0xcdea2334, 0xb57f36ba + .long 0xdbf04c3b, 0xb2c046c2, 0xd3e75894, 0x34506dbd, 0xae4f51a7 + .long 0x3537104b, 0x864d6b64, 0xe8dda680, 0x0ee01a4a, 0xbe9f89ab + .long 0x20300e3c, 0x1c27f136, 0x52be6c95, 0x1e35d4e9 + +// +// End of table. +// diff --git a/private/fp32/tran/alpha/trig_tab.s b/private/fp32/tran/alpha/trig_tab.s new file mode 100644 index 000000000..c6b14560b --- /dev/null +++ b/private/fp32/tran/alpha/trig_tab.s @@ -0,0 +1,779 @@ +// TITLE("Alpha AXP Trignometric Tables") +//++ +// +// Copyright (c) 1993, 1994 Digital Equipment Corporation +// +// Module Name: +// +// trig_tab.s +// +// Abstract: +// +// This module implements lookup tables for high-performance Alpha AXP +// specific routines for IEEE double format sine, cosine and tangent. +// +// Author: +// +// Bob Hanek 1-Oct-1991 +// +// Environment: +// +// User mode. +// +// Revision History: +// +// Thomas Van Baak (tvb) 11-Feb-1994 +// +// Adapted for NT. +// +//-- + +#include "ksalpha.h" + + .rdata + .align 3 + .globl __trig_cons + +__trig_cons: + +/* argument reduction constants */ + .double 1.6297466172610083e+002 /* 2^K / pi / 4 */ + .double 6.1359231476671994e-003 /* pi / 4 / 2^K in 5 parts */ + .double 3.8753655503837720e-012 + .double -4.7566608519053990e-021 + .double 1.1343187409110769e-030 + .double -1.0753892652515905e-039 + .double 1.5707963267948966e+000 /* pi / 2 high and low */ + .double 6.1232339957367660e-017 + .double 3.1415926535897931e+000 /* pi high and low */ + .double 1.2246467991473532e-016 + .double 4.7123889803846897e+000 /* 3 * pi / 2 high and low */ + .double 1.8369701987210297e-016 + .double 6.2831853071795862e+000 /* 2 * pi high and low */ + .double 2.4492935982947064e-016 + .double 1.6297466172610083e+002 /* 2^K / pi / 4 */ + +/* p_coefs_t */ + .double 1.6666666666665642e-001 + .double -8.3333304811689379e-003 + +/* q_coefs_t */ + .double 4.9999999999992822e-001 + .double -4.1666646701511267e-002 + +/* s_coefs_t */ + .double 1.6666666666666480e-001 + .double -8.3333333328297430e-003 + .double 1.9841265065944337e-004 + .double -2.7538833799313716e-006 + +/* c_coefs_t */ + .double 5.0000000000000000e-001 + .double -4.1666666666666012e-002 + .double 1.3888888887947587e-003 + .double -2.4801581156849588e-005 + .double 2.7538826818007804e-007 + +/* Table of a, sin(a) and cos(a) */ + .double 1.6873788666743875e-001 /* a */ + .double 1.6793829497474913e-001 /* sin(a) */ + .double 9.8579750916756437e-001 /* cos(a) */ + .double 1.7487380981893508e-001 + .double 1.7398387338743623e-001 + .double 9.8474850180190909e-001 + .double 1.8100973297048850e-001 + .double 1.8002290140568264e-001 + .double 9.8366241921173336e-001 + .double 1.8714565612219033e-001 + .double 1.8605515166358627e-001 + .double 9.8253930228741482e-001 + .double 1.9328157927362843e-001 + .double 1.9208039704992938e-001 + .double 9.8137919331374734e-001 + .double 1.9941750242513653e-001 + .double 1.9809841071795670e-001 + .double 9.8018213596811676e-001 + .double 2.0555342557666775e-001 + .double 2.0410896609280887e-001 + .double 9.7894817531906386e-001 + .double 2.1168934872823247e-001 + .double 2.1011183688048329e-001 + .double 9.7767735782450704e-001 + .double 2.1782527187976777e-001 + .double 2.1610679707622607e-001 + .double 9.7636973133001970e-001 + .double 2.2396119503130274e-001 + .double 2.2209362097320268e-001 + .double 9.7502534506699434e-001 + .double 2.3009711818289025e-001 + .double 2.2807208317092864e-001 + .double 9.7364424965080187e-001 + .double 2.3623304133440029e-001 + .double 2.3404195858355464e-001 + .double 9.7222649707893360e-001 + .double 2.4236896448593759e-001 + .double 2.4000302244874758e-001 + .double 9.7077214072894880e-001 + .double 2.4850488763740328e-001 + .double 2.4595505033572618e-001 + .double 9.6928123535656585e-001 + .double 2.5464081078901979e-001 + .double 2.5189781815422019e-001 + .double 9.6775383709347462e-001 + .double 2.6077673394054457e-001 + .double 2.5783110216214505e-001 + .double 9.6619000344541628e-001 + .double 2.6691265709209555e-001 + .double 2.6375467897482557e-001 + .double 9.6458979328981431e-001 + .double 2.7304858024368822e-001 + .double 2.6966832557295756e-001 + .double 9.6295326687367200e-001 + .double 2.7918450339517825e-001 + .double 2.7557181931095004e-001 + .double 9.6128048581132297e-001 + .double 2.8532042654675416e-001 + .double 2.8146493792578187e-001 + .double 9.5957151308197752e-001 + .double 2.9145634969861178e-001 + .double 2.8734745954505514e-001 + .double 9.5782641302743521e-001 + .double 2.9759227284983081e-001 + .double 2.9321916269427434e-001 + .double 9.5604525134999163e-001 + .double 3.0372819600136575e-001 + .double 2.9907982630804886e-001 + .double 9.5422809510910300e-001 + .double 3.0986411915294315e-001 + .double 3.0492922973544395e-001 + .double 9.5237501271975256e-001 + .double 3.1600004230440715e-001 + .double 3.1076715274957828e-001 + .double 9.5048607394949258e-001 + .double 3.2213596545600998e-001 + .double 3.1659337555618988e-001 + .double 9.4856134991572227e-001 + .double 3.2827188860755074e-001 + .double 3.2240767880109211e-001 + .double 9.4660091308327599e-001 + .double 3.3440781175905460e-001 + .double 3.2820984357907818e-001 + .double 9.4460483726148525e-001 + .double 3.4054373491063300e-001 + .double 3.3399965144202887e-001 + .double 9.4257319760143998e-001 + .double 3.4667965806221807e-001 + .double 3.3977688440688625e-001 + .double 9.4050607059324687e-001 + .double 3.5281558121368639e-001 + .double 3.4554132496397866e-001 + .double 9.3840353406311194e-001 + .double 3.5895150436583700e-001 + .double 3.5129275608612603e-001 + .double 9.3626566717006854e-001 + .double 3.6508742751687479e-001 + .double 3.5703096123351613e-001 + .double 9.3409255040422601e-001 + .double 3.7122335066826967e-001 + .double 3.6275572436734549e-001 + .double 9.3188426558168824e-001 + .double 3.7735927381996309e-001 + .double 3.6846682995346097e-001 + .double 9.2964089584314613e-001 + .double 3.8349519697141676e-001 + .double 3.7416406297146398e-001 + .double 9.2736252565039867e-001 + .double 3.8963112012302625e-001 + .double 3.7984720892411905e-001 + .double 9.2504924078264972e-001 + .double 3.9576704327591361e-001 + .double 3.8551605384522741e-001 + .double 9.2270112833333184e-001 + .double 4.0190296642611428e-001 + .double 3.9117038430232409e-001 + .double 9.2031827670908073e-001 + .double 4.0803888957752116e-001 + .double 3.9680998741665580e-001 + .double 9.1790077562141403e-001 + .double 4.1417481272918655e-001 + .double 4.0243465085947649e-001 + .double 9.1544871608824230e-001 + .double 4.2031073588067647e-001 + .double 4.0804416286498851e-001 + .double 9.1296219042839377e-001 + .double 4.2644665903220358e-001 + .double 4.1363831223843028e-001 + .double 9.1044129225806913e-001 + .double 4.3258258218370754e-001 + .double 4.1921688836318466e-001 + .double 9.0788611648768436e-001 + .double 4.3871850533523266e-001 + .double 4.2477968120905385e-001 + .double 9.0529675931814457e-001 + .double 4.4485442848684958e-001 + .double 4.3032648134009494e-001 + .double 9.0267331823725294e-001 + .double 4.5099035163837764e-001 + .double 4.3585707992225470e-001 + .double 9.0001589201616061e-001 + .double 4.5712627478987899e-001 + .double 4.4137126873167892e-001 + .double 8.9732458070543686e-001 + .double 4.6326219794198137e-001 + .double 4.4686884016283734e-001 + .double 8.9459948563115133e-001 + .double 4.6939812109297580e-001 + .double 4.5234958723374374e-001 + .double 8.9184070939235649e-001 + .double 4.7553404424453299e-001 + .double 4.5781330359886319e-001 + .double 8.8904835585467179e-001 + .double 4.8166996739612944e-001 + .double 4.6325978355189396e-001 + .double 8.8622253014886299e-001 + .double 4.8780589054764745e-001 + .double 4.6868882203583989e-001 + .double 8.8336333866572525e-001 + .double 4.9394181369918916e-001 + .double 4.7410021465056118e-001 + .double 8.8047088905215476e-001 + .double 5.0007773685103474e-001 + .double 4.7949375766043006e-001 + .double 8.7754529020710992e-001 + .double 5.0621366000216750e-001 + .double 4.8486924800070880e-001 + .double 8.7458665227822174e-001 + .double 5.1234958315385615e-001 + .double 4.9022648328833646e-001 + .double 8.7159508665592555e-001 + .double 5.1848550630527146e-001 + .double 4.9556526182570715e-001 + .double 8.6857070597137820e-001 + .double 5.2462142945689461e-001 + .double 5.0088538261124538e-001 + .double 8.6551362409056642e-001 + .double 5.3075735260824397e-001 + .double 5.0618664534499325e-001 + .double 8.6242395611113565e-001 + .double 5.3689327575983103e-001 + .double 5.1146885043784718e-001 + .double 8.5930181835708175e-001 + .double 5.4302919891168933e-001 + .double 5.1673179901779742e-001 + .double 8.5614732837510543e-001 + .double 5.4916512206327295e-001 + .double 5.2197529293733635e-001 + .double 8.5296060493025228e-001 + .double 5.5530104521459211e-001 + .double 5.2719913478189284e-001 + .double 8.4974176800085777e-001 + .double 5.6143696836616441e-001 + .double 5.3240312787721467e-001 + .double 8.4649093877404158e-001 + .double 5.6757289151762158e-001 + .double 5.3758707629559010e-001 + .double 8.4320823964188074e-001 + .double 5.7370881466948875e-001 + .double 5.4275078486473338e-001 + .double 8.3989379419585897e-001 + .double 5.7984473782067447e-001 + .double 5.4789405917301826e-001 + .double 8.3654772722356563e-001 + .double 5.8598066097196122e-001 + .double 5.5301670557973281e-001 + .double 8.3317016470210881e-001 + .double 5.9211658412379087e-001 + .double 5.5811853122050081e-001 + .double 8.2976123379456024e-001 + .double 5.9825250727594070e-001 + .double 5.6319934401428084e-001 + .double 8.2632106284535900e-001 + .double 6.0438843042663359e-001 + .double 5.6825895266987725e-001 + .double 8.2284978137600195e-001 + .double 6.1052435357861623e-001 + .double 5.7329716669814956e-001 + .double 8.1934752007672185e-001 + .double 6.1666027673006196e-001 + .double 5.7831379641168346e-001 + .double 8.1581441080671402e-001 + .double 6.2279619988188006e-001 + .double 5.8330865293794987e-001 + .double 8.1225058658502325e-001 + .double 6.2893212303280266e-001 + .double 5.8828154822239442e-001 + .double 8.0865618158835750e-001 + .double 6.3506804618462842e-001 + .double 5.9323229503977803e-001 + .double 8.0503133114297964e-001 + .double 6.4120396933591495e-001 + .double 5.9816070699611545e-001 + .double 8.0137617172330955e-001 + .double 6.4733989248666080e-001 + .double 6.0306659853948685e-001 + .double 7.9769084094404230e-001 + .double 6.5347581563949608e-001 + .double 6.0794978496794272e-001 + .double 7.9397547755420772e-001 + .double 6.5961173879086255e-001 + .double 6.1281008242943880e-001 + .double 7.9023022143728749e-001 + .double 6.6574766194233970e-001 + .double 6.1764730793778144e-001 + .double 7.8645521359910342e-001 + .double 6.7188358509391355e-001 + .double 6.2246127937415208e-001 + .double 7.8265059616657406e-001 + .double 6.7801950824549073e-001 + .double 6.2725181549517317e-001 + .double 7.7881651238145255e-001 + .double 6.8415543139719159e-001 + .double 6.3201873593996061e-001 + .double 7.7495310659475025e-001 + .double 6.9029135454861223e-001 + .double 6.3676186123634104e-001 + .double 7.7106052426176686e-001 + .double 6.9642727770028012e-001 + .double 6.4148101280873582e-001 + .double 7.6713891193569272e-001 + .double 7.0256320085180513e-001 + .double 6.4617601298345484e-001 + .double 7.6318841726326403e-001 + .double 7.0869912400311297e-001 + .double 6.5084668499634046e-001 + .double 7.5920918897842271e-001 + .double 7.1483504715460733e-001 + .double 6.5549285299953874e-001 + .double 7.5520137689660305e-001 + .double 7.2097097030632040e-001 + .double 6.6011434206747233e-001 + .double 7.5116513190964085e-001 + .double 7.2710689345788315e-001 + .double 6.6471097820341152e-001 + .double 7.4710060598012085e-001 + .double 7.3324281660919000e-001 + .double 6.6928258834652721e-001 + .double 7.4300795213521975e-001 + .double 7.3937873976117685e-001 + .double 6.7382900037897608e-001 + .double 7.3888732446041450e-001 + .double 7.4551466291281177e-001 + .double 6.7835004313014813e-001 + .double 7.3473887809569882e-001 + .double 7.5165058606413959e-001 + .double 6.8284554638537620e-001 + .double 7.3056276922770780e-001 + .double 7.5778650921542079e-001 + .double 6.8731534089169666e-001 + .double 7.2635915508440507e-001 + .double 7.6392243236702362e-001 + .double 6.9175925836413921e-001 + .double 7.2212819392923311e-001 + .double 7.7005835551857726e-001 + .double 6.9617713149145244e-001 + .double 7.1787004505574192e-001 + .double 7.7619427867013535e-001 + .double 7.0056879394324900e-001 + .double 7.1358486878079297e-001 + .double 7.8233020182165414e-001 + .double 7.0493408037588867e-001 + .double 7.0927282643888179e-001 + +/* e_coefs_t */ + .double -3.3333333333333498e-001 + .double -1.3333333333262590e-001 + .double -5.3968254084035876e-002 + .double -2.1869479239926222e-002 + .double -8.8636401864079888e-003 + .double -3.5824012035897486e-003 + .double -1.5766189968704179e-003 + +/* f_coefs_t */ + .double 3.3333333333333331e-001 + .double 2.2222222222234297e-002 + .double 2.1164021149254063e-003 + .double 2.1164029762895965e-004 + .double 2.1375230108053778e-005 + .double 2.2024029026219318e-006 + +/* g_coefs_t */ + .double -3.3333333329030146e-001 + .double -1.3333638120728455e-001 + +/* Table of a, tan(a), cot(a) and -cot(a)/tan(a) */ + .double 2.1782527187985032e-001 /* a */ + .double 2.2133705105946697e-001 /* tan(a) */ + .double 4.5179964005724846e+000 /* cot(a) */ + .double 4.7393334516319516e+000 /* -cot(a) / tan(a) */ + .double 2.2396119503140927e-001 + .double 2.2778240801325200e-001 + .double 4.3901546599762948e+000 + .double 4.6179370679895468e+000 + .double 2.3009711818284290e-001 + .double 2.3424580718540494e-001 + .double 4.2690198472090586e+000 + .double 4.5032656543944638e+000 + .double 2.3623304133439546e-001 + .double 2.4072781320683154e-001 + .double 4.1540692231553962e+000 + .double 4.3947970363622275e+000 + .double 2.4236896448590334e-001 + .double 2.4722899677415033e-001 + .double 4.0448329809529753e+000 + .double 4.2920619777271254e+000 + .double 2.4850488763747661e-001 + .double 2.5374993486314967e-001 + .double 3.9408877111212335e+000 + .double 4.1946376459843835e+000 + .double 2.5464081078901402e-001 + .double 2.6029121094549956e-001 + .double 3.8418508115104300e+000 + .double 4.1021420224559293e+000 + .double 2.6077673394051260e-001 + .double 2.6685341521097389e-001 + .double 3.7473756864209573e+000 + .double 4.0142291016319316e+000 + .double 2.6691265709215639e-001 + .double 2.7343714479429770e-001 + .double 3.6571476079165604e+000 + .double 3.9305847527108582e+000 + .double 2.7304858024366946e-001 + .double 2.8004300400625293e-001 + .double 3.5708801351726378e+000 + .double 3.8509231391788905e+000 + .double 2.7918450339522155e-001 + .double 2.8667160457169982e-001 + .double 3.4883120059764714e+000 + .double 3.7749836105481713e+000 + .double 2.8532042654664169e-001 + .double 2.9332356587123770e-001 + .double 3.4092044293467270e+000 + .double 3.7025279952179648e+000 + .double 2.9145634969825540e-001 + .double 2.9999951519028795e-001 + .double 3.3333387201166169e+000 + .double 3.6333382353069048e+000 + .double 2.9759227284976519e-001 + .double 3.0670008797194814e-001 + .double 3.2605142261695845e+000 + .double 3.5672143141415327e+000 + .double 3.0372819600135798e-001 + .double 3.1342592807838571e-001 + .double 3.1905465068923933e+000 + .double 3.5039724349707790e+000 + .double 3.0986411915290263e-001 + .double 3.2017768805651181e-001 + .double 3.1232657280712783e+000 + .double 3.4434434161277903e+000 + .double 3.1600004230430800e-001 + .double 3.2695602941152369e-001 + .double 3.0585152437771641e+000 + .double 3.3854712731886880e+000 + .double 3.2213596545599910e-001 + .double 3.3376162288744643e-001 + .double 2.9961503403200656e+000 + .double 3.3299119632075120e+000 + .double 3.2827188860753653e-001 + .double 3.4059514875273916e-001 + .double 2.9360371210864398e+000 + .double 3.2766322698391788e+000 + .double 3.3440781175906148e-001 + .double 3.4745729709642648e-001 + .double 2.8780515141188117e+000 + .double 3.2255088112152381e+000 + .double 3.4054373491065998e-001 + .double 3.5434876812960975e-001 + .double 2.8220783870038209e+000 + .double 3.1764271551334304e+000 + .double 3.4667965806236861e-001 + .double 3.6127027249566168e-001 + .double 2.7680107557479934e+000 + .double 3.1292810282436552e+000 + .double 3.5281558121374068e-001 + .double 3.6822253158820401e-001 + .double 2.7157490762089882e+000 + .double 3.0839716077971921e+000 + .double 3.5895150436536694e-001 + .double 3.7520627787990135e-001 + .double 2.6652006081840853e+000 + .double 3.0404068860639866e+000 + .double 3.6508742751675222e-001 + .double 3.8222225525605652e-001 + .double 2.6162788436536348e+000 + .double 2.9985010989096912e+000 + .double 3.7122335066826212e-001 + .double 3.8927121936209846e-001 + .double 2.5689029916948578e+000 + .double 2.9581742110569564e+000 + .double 3.7735927381994766e-001 + .double 3.9635393795715074e-001 + .double 2.5229975136720064e+000 + .double 2.9193514516291570e+000 + .double 3.8349519697148382e-001 + .double 4.0347119127885739e-001 + .double 2.4784917030392246e+000 + .double 2.8819628943180819e+000 + .double 3.8963112012272882e-001 + .double 4.1062377241931786e-001 + .double 2.4353193048424560e+000 + .double 2.8459430772617740e+000 + .double 3.9576704327446055e-001 + .double 4.1781248771203672e-001 + .double 2.3934181706153708e+000 + .double 2.8112306583274074e+000 + .double 4.0190296642601137e-001 + .double 4.2503815712644383e-001 + .double 2.3527299449082446e+000 + .double 2.7777681020346883e+000 + .double 4.0803888957749385e-001 + .double 4.3230161467941647e-001 + .double 2.3131997800692319e+000 + .double 2.7455013947486484e+000 + .double 4.1417481273072976e-001 + .double 4.3960370885742833e-001 + .double 2.2747760763872868e+000 + .double 2.7143797852447151e+000 + .double 4.2031073588058343e-001 + .double 4.4694530303978747e-001 + .double 2.2374102450540332e+000 + .double 2.6843555480938206e+000 + .double 4.2644665903229395e-001 + .double 4.5432727596595179e-001 + .double 2.2010564914331536e+000 + .double 2.6553837673991056e+000 + .double 4.3258258218368612e-001 + .double 4.6175052217449325e-001 + .double 2.1656716169821784e+000 + .double 2.6274221391566717e+000 + .double 4.3871850533549811e-001 + .double 4.6921595248974990e-001 + .double 2.1312148376324549e+000 + .double 2.6004307901222048e+000 + .double 4.4485442848660378e-001 + .double 4.7672449450501908e-001 + .double 2.0976476172853160e+000 + .double 2.5743721117903351e+000 + .double 4.5099035163826329e-001 + .double 4.8427709309193112e-001 + .double 2.0649335148506980e+000 + .double 2.5492106079426291e+000 + .double 4.5712627478992174e-001 + .double 4.9187471091535240e-001 + .double 2.0330380436494768e+000 + .double 2.5249127545648293e+000 + .double 4.6326219794154727e-001 + .double 4.9951832897274745e-001 + .double 2.0019285419545789e+000 + .double 2.5014468709273263e+000 + .double 4.6939812109297535e-001 + .double 5.0720894714701403e-001 + .double 1.9715740537008921e+000 + .double 2.4787830008479061e+000 + .double 4.7553404424473472e-001 + .double 5.1494758477898417e-001 + .double 1.9419452184229598e+000 + .double 2.4568928032019439e+000 + .double 4.8166996739605061e-001 + .double 5.2273528125491131e-001 + .double 1.9130141696181993e+000 + .double 2.4357494508731108e+000 + .double 4.8780589054762180e-001 + .double 5.3057309661927010e-001 + .double 1.8847544407582022e+000 + .double 2.4153275373774723e+000 + .double 4.9394181369915963e-001 + .double 5.3846211220101370e-001 + .double 1.8571408783292245e+000 + .double 2.3956029905302381e+000 + .double 5.0007773685083068e-001 + .double 5.4640343126567514e-001 + .double 1.8301495612566436e+000 + .double 2.3765529925223188e+000 + .double 5.0621366000230694e-001 + .double 5.5439817968617089e-001 + .double 1.8037577261997355e+000 + .double 2.3581559058859063e+000 + .double 5.1234958315368029e-001 + .double 5.6244750663866294e-001 + .double 1.7779436982061989e+000 + .double 2.3403912048448618e+000 + .double 5.1848550630524104e-001 + .double 5.7055258532055808e-001 + .double 1.7526868262951820e+000 + .double 2.3232394116157402e+000 + .double 5.2462142945731782e-001 + .double 5.7871461369315458e-001 + .double 1.7279674235602056e+000 + .double 2.3066820372533603e+000 + .double 5.3075735260843893e-001 + .double 5.8693481524762969e-001 + .double 1.7037667114330179e+000 + .double 2.2907015266806474e+000 + .double 5.3689327575978241e-001 + .double 5.9521443980611988e-001 + .double 1.6800667677446326e+000 + .double 2.2752812075507522e+000 + .double 5.4302919891141332e-001 + .double 6.0355476433967026e-001 + .double 1.6568504783390579e+000 + .double 2.2604052426787282e+000 + .double 5.4916512206305668e-001 + .double 6.1195709382119168e-001 + .double 1.6341014919130767e+000 + .double 2.2460585857342683e+000 + .double 5.5530104521453705e-001 + .double 6.2042276210823599e-001 + .double 1.6118041778511421e+000 + .double 2.2322269399593782e+000 + .double 5.6143696836616308e-001 + .double 6.2895313285725596e-001 + .double 1.5899435868251808e+000 + .double 2.2188967196824367e+000 + .double 5.6757289151765378e-001 + .double 6.3754960046873721e-001 + .double 1.5685054139549035e+000 + .double 2.2060550144236406e+000 + .double 5.7370881466930135e-001 + .double 6.4621359106975795e-001 + .double 1.5474759643240794e+000 + .double 2.1936895553938371e+000 + .double 5.7984473782075241e-001 + .double 6.5494656352905001e-001 + .double 1.5268421206940881e+000 + .double 2.1817886842231382e+000 + .double 5.8598066097280455e-001 + .double 6.6375001051372295e-001 + .double 1.5065913132355802e+000 + .double 2.1703413237493030e+000 + .double 5.9211658412388102e-001 + .double 6.7262545957743969e-001 + .double 1.4867114911591739e+000 + .double 2.1593369507366136e+000 + .double 5.9825250727542312e-001 + .double 6.8157447430219242e-001 + .double 1.4671910960630048e+000 + .double 2.1487655703651973e+000 + .double 6.0438843042675905e-001 + .double 6.9059865546754429e-001 + .double 1.4480190369368544e+000 + .double 2.1386176924043987e+000 + .double 6.1052435357803003e-001 + .double 6.9969964227602943e-001 + .double 1.4291846666480115e+000 + .double 2.1288843089240408e+000 + .double 6.1666027673074542e-001 + .double 7.0887911362175926e-001 + .double 1.4106777598381546e+000 + .double 2.1195568734599139e+000 + .double 6.2279619988164980e-001 + .double 7.1813878939760900e-001 + .double 1.3924884921462362e+000 + .double 2.1106272815438452e+000 + .double 6.2893212303307089e-001 + .double 7.2748043187801659e-001 + .double 1.3746074205988805e+000 + .double 2.1020878524768971e+000 + .double 6.3506804618466861e-001 + .double 7.3690584712716656e-001 + .double 1.3570254651913920e+000 + .double 2.0939313123185586e+000 + .double 6.4120396933628243e-001 + .double 7.4641688647950022e-001 + .double 1.3397338914939785e+000 + .double 2.0861507779734789e+000 + .double 6.4733989248741663e-001 + .double 7.5601544807349652e-001 + .double 1.3227242942564639e+000 + .double 2.0787397423299603e+000 + .double 6.5347581563917811e-001 + .double 7.6570347845035958e-001 + .double 1.3059885819296690e+000 + .double 2.0716920603800286e+000 + .double 6.5961173879104307e-001 + .double 7.7548297420854806e-001 + .double 1.2895189620643732e+000 + .double 2.0650019362729211e+000 + .double 6.6574766194234480e-001 + .double 7.8535598373264071e-001 + .double 1.2733079275046699e+000 + .double 2.0586639112373106e+000 + .double 6.7188358509360024e-001 + .double 7.9532460899227542e-001 + .double 1.2573482433381016e+000 + .double 2.0526728523303768e+000 + .double 6.7801950824543344e-001 + .double 8.0539100741084590e-001 + .double 1.2416329345602939e+000 + .double 2.0470239419711396e+000 + .double 6.8415543139756574e-001 + .double 8.1555739381137526e-001 + .double 1.2261552744027764e+000 + .double 2.0417126682141515e+000 + .double 6.9029135454847523e-001 + .double 8.2582604244435354e-001 + .double 1.2109087732812480e+000 + .double 2.0367348157256018e+000 + .double 6.9642727770014579e-001 + .double 8.3619928910910779e-001 + .double 1.1958871683153505e+000 + .double 2.0320864574244584e+000 + .double 7.0256320085154866e-001 + .double 8.4667953334545765e-001 + .double 1.1810844134246781e+000 + .double 2.0277639467701358e+000 + .double 7.0869912400325685e-001 + .double 8.5726924073758481e-001 + .double 1.1664946699121168e+000 + .double 2.0237639106497016e+000 + .double 7.1483504715466040e-001 + .double 8.6797094530371144e-001 + .double 1.1521122975494189e+000 + .double 2.0200832428531306e+000 + .double 7.2097097030618651e-001 + .double 8.7878725199760810e-001 + .double 1.1379318461059353e+000 + .double 2.0167190981035437e+000 + .double 7.2710689345788626e-001 + .double 8.8972083931237311e-001 + .double 1.1239480473143204e+000 + .double 2.0136688866266934e+000 + .double 7.3324281660914026e-001 + .double 9.0077446200018296e-001 + .double 1.1101558072366808e+000 + .double 2.0109302692368636e+000 + .double 7.3937873976090618e-001 + .double 9.1195095391666825e-001 + .double 1.0965501990048661e+000 + .double 2.0085011529215344e+000 + .double 7.4551466291241586e-001 + .double 9.2325323098154477e-001 + .double 1.0831264559311240e+000 + .double 2.0063796869126689e+000 + .double 7.5165058606392221e-001 + .double 9.3468429428306077e-001 + .double 1.0698799649426429e+000 + .double 2.0045642592257038e+000 + .double 7.5778650921651458e-001 + .double 9.4624723331715210e-001 + .double 1.0568062603410875e+000 + .double 2.0030534936582396e+000 + .double 7.6392243236715107e-001 + .double 9.5794522936478299e-001 + .double 1.0439010178724974e+000 + .double 2.0018462472372804e+000 + .double 7.7005835551860857e-001 + .double 9.6978155905284857e-001 + .double 1.0311600490492567e+000 + .double 2.0009416081021052e+000 + .double 7.7619427867019419e-001 + .double 9.8175959804234592e-001 + .double 1.0185792957807858e+000 + .double 2.0003388938231317e+000 + .double 7.8233020182167123e-001 + .double 9.9388282491414059e-001 + .double 1.0061548252294106e+000 + .double 2.0000376501435513e+000 + +// +// End of table. +// diff --git a/private/fp32/tran/asincos.c b/private/fp32/tran/asincos.c new file mode 100644 index 000000000..35455a407 --- /dev/null +++ b/private/fp32/tran/asincos.c @@ -0,0 +1,176 @@ +/*** +*asincos.c - inverse sin, cos +* +* Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* +*Revision History: +* 8-15-91 GDP written +* 12-26-91 GDP support IEEE exceptions +* +*******************************************************************************/ +#include <math.h> +#include <trans.h> + +static double _asincos(double x, int flag); +static double const a[2] = { + 0.0, + 0.78539816339744830962 +}; + +static double const b[2] = { + 1.57079632679489661923, + 0.78539816339744830962 +}; + +static double const EPS = 1.05367121277235079465e-8; /* 2^(-53/2) */ + +/* constants for the rational approximation */ +static double const p1 = -0.27368494524164255994e+2; +static double const p2 = 0.57208227877891731407e+2; +static double const p3 = -0.39688862997504877339e+2; +static double const p4 = 0.10152522233806463645e+2; +static double const p5 = -0.69674573447350646411e+0; +static double const q0 = -0.16421096714498560795e+3; +static double const q1 = 0.41714430248260412556e+3; +static double const q2 = -0.38186303361750149284e+3; +static double const q3 = 0.15095270841030604719e+3; +static double const q4 = -0.23823859153670238830e+2; +/* q5 = 1 is not needed (avoid myltiplying by 1) */ + +#define Q(g) (((((g + q4) * g + q3) * g + q2) * g + q1) * g + q0) +#define R(g) (((((p5 * g + p4) * g + p3) * g + p2) * g + p1) * g) / Q(g) + +/*** +*double asin(double x) - inverse sin +*double acos(double x) - inverse cos +* +*Purpose: +* Compute arc sin, arc cos. +* The algorithm (reduction / rational approximation) is +* taken from Cody & Waite. +* +*Entry: +* +*Exit: +* +*Exceptions: +* P, I +* (denormals are accepted) +*******************************************************************************/ +double asin(double x) +{ + return _asincos(x,0); +} + +double acos(double x) +{ + return _asincos(x,1); +} + +static double _asincos(double x, int flag) +{ + unsigned int savedcw; + double qnan; + int who; + double y,result; + double g; + int i; + + /* save user fp control word */ + savedcw = _maskfp(); + + if (flag) { + who = OP_ACOS; + qnan = QNAN_ACOS; + } + else { + who = OP_ASIN; + qnan = QNAN_ASIN; + } + + /* check for infinity or NAN */ + if (IS_D_SPECIAL(x)){ + switch(_sptype(x)) { + case T_PINF: + case T_NINF: + return _except1(FP_I,who,x,qnan,savedcw); + case T_QNAN: + return _handle_qnan1(who,x,savedcw); + default: //T_SNAN + return _except1(FP_I,who,x,_s2qnan(x),savedcw); + } + } + + + // do test for zero after making sure that x is not special + // because the compiler does not handle NaNs for the time + if (x == 0.0 && !flag) { + RETURN(savedcw, x); + } + + y = ABS(x); + if (y < EPS) { + i = flag; + result = y; + } + else { + if (y > .5) { + i = 1-flag; + if (y > 1.0) { + return _except1(FP_I,who,x,qnan,savedcw); + } + else if (y == 1.0) { + /* separate case to avoid domain error in sqrt */ + if (flag && x >= 0.0) { + // + // acos(1.0) is exactly computed as 0.0 + // + RETURN(savedcw, 0.0); + } + y = 0.0; + g = 0.0; + + } + else { + /* now even if y is as close to 1 as possible, + * 1-y is still not a denormal. + * e.g. for y=3fefffffffffffff, 1-y is about 10^(-16) + * So we can speed up division + */ + g = _add_exp(1.0 - y,-1); + /* g and sqrt(g) are not denomrals either, + * even in the worst case + * So we can speed up multiplication + */ + y = _add_exp(-_fsqrt(g),1); + } + } + else { + /* y <= .5 */ + i = flag; + g = y*y; + } + result = y + y * R(g); + } + + if (flag == 0) { + /* compute asin */ + if (i) { + /* a[i] is non zero if i is nonzero */ + result = (a[i] + result) + a[i]; + } + if (x < 0) + result = -result; + } + else { + /* compute acos */ + if (x < 0) + result = (b[i] + result) + b[i]; + else + result = (a[i] - result) + a[i]; + } + + RETURN_INEXACT1 (who,x,result,savedcw); +} diff --git a/private/fp32/tran/atan.c b/private/fp32/tran/atan.c new file mode 100644 index 000000000..01228b868 --- /dev/null +++ b/private/fp32/tran/atan.c @@ -0,0 +1,285 @@ +/*** +*atan.c - arctangent of x and x/y +* +* Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* +*Revision History: +* 8-15-91 GDP written +* 12-30-91 GDP support IEEE exceptions +* 3-27-92 GDP support UNDERFLOW +* +*******************************************************************************/ +#include <math.h> +#include <trans.h> + +static double _atanhlp(double x); + +static double const a[4] = { + 0.0, + 0.52359877559829887308, /* pi/6 */ + 1.57079632679489661923, /* pi/2 */ + 1.04719755119659774615 /* pi/3 */ +}; + +/* constants */ +static double const EPS = 1.05367121277235079465e-8; /* 2^(-53/2) */ +static double const PI_OVER_TWO = 1.57079632679489661923; +static double const PI = 3.14159265358979323846; +static double const TWO_M_SQRT3 = 0.26794919243112270647; +static double const SQRT3_M_ONE = 0.73205080756887729353; +static double const SQRT3 = 1.73205080756887729353; + +/* chose MAX_ARG s.t. 1/MAX_ARG does not underflow */ +static double const MAX_ARG = 4.494232837155790e+307; + +/* constants for rational approximation */ +static double const p0 = -0.13688768894191926929e+2; +static double const p1 = -0.20505855195861651981e+2; +static double const p2 = -0.84946240351320683534e+1; +static double const p3 = -0.83758299368150059274e+0; +static double const q0 = 0.41066306682575781263e+2; +static double const q1 = 0.86157349597130242515e+2; +static double const q2 = 0.59578436142597344465e+2; +static double const q3 = 0.15024001160028576121e+2; +static double const q4 = 0.10000000000000000000e+1; + + +#define Q(g) (((((g) + q3) * (g) + q2) * (g) + q1) * (g) + q0) +#define R(g) ((((p3 * (g) + p2) * (g) + p1) * (g) + p0) * (g)) / Q(g) + + +/*** +*double atan(double x) - arctangent +* +*Purpose: +* +*Entry: +* +*Exit: +* +*Exceptions: +* P, I +\*******************************************************************************/ +double atan(double x) +{ + unsigned int savedcw; + double result; + + /* save user fp control word */ + savedcw = _maskfp(); + + /* check for infinity or NAN */ + if (IS_D_SPECIAL(x)){ + switch(_sptype(x)) { + case T_PINF: + result = PI_OVER_TWO; + break; + case T_NINF: + result = -PI_OVER_TWO; + break; + case T_QNAN: + return _handle_qnan1(OP_ATAN,x,savedcw); + default: //T_SNAN + return _except1(FP_I,OP_ATAN,x,_s2qnan(x),savedcw); + } + } + + if (x == 0.0) + RETURN(savedcw,x); + + result = _atanhlp(x); + RETURN_INEXACT1(OP_ATAN,x,result,savedcw); +} + +/*** +*double atan2(double x, double y) - arctangent (x/y) +* +*Purpose: +* +*Entry: +* +*Exit: +* +*Exceptions: +* NAN or both args 0: DOMAIN error +*******************************************************************************/ +double atan2(double v, double u) +{ + unsigned int savedcw; + double result; + + /* save user fp control word */ + savedcw = _maskfp(); + + /* check for infinity or NAN */ + if (IS_D_SPECIAL(v) || IS_D_SPECIAL(u)){ + if (IS_D_SNAN(v) || IS_D_SNAN(u)){ + return _except2(FP_I,OP_ATAN2,v,u,_d_snan2(v,u),savedcw); + } + if (IS_D_QNAN(v) || IS_D_QNAN(u)){ + return _handle_qnan2(OP_ATAN2,v,u,savedcw); + } + if ((IS_D_INF(v) || IS_D_MINF(v)) && + (IS_D_INF(u) || IS_D_MINF(u))){ + return _except2(FP_I,OP_ATAN2,v,u,QNAN_ATAN2,savedcw); + } + /* the other combinations of infinities will be handled + * later by the division v/u + */ + } + + + if (u == 0) { + if (v == 0) { + return _except2(FP_I,OP_ATAN2,v,u,QNAN_ATAN2,savedcw); + } + else { + result = PI_OVER_TWO; + } + } + else if (INTEXP(v) - INTEXP(u) > MAXEXP - 3) { + /* v/u overflow */ + result = PI_OVER_TWO; + } + else { + double arg = v/u; + + + if (ABS(arg) < D_MIN) { + + if (v == 0.0 || IS_D_INF(u) || IS_D_MINF(u)) { + result = (u < 0) ? PI : 0; + if (v < 0) { + result = -result; + } + if (result == 0) { + RETURN(savedcw, result); + } + else { + RETURN_INEXACT2(OP_ATAN2,v,u,result,savedcw); + } + } + else { + + double v1, u1; + int vexp, uexp; + int exc_flags; + + // + // in this case an underflow has occurred + // re-compute the result in order to raise + // an IEEE underflow exception + // + + if (u < 0) { + result = v < 0 ? -PI: PI; + RETURN_INEXACT2(OP_ATAN2,v,u,result,savedcw); + } + + v1 = _decomp(v, &vexp); + u1 = _decomp(u, &uexp); + result = _add_exp(v1/u1, vexp-uexp+IEEE_ADJUST); + result = ABS(result); + + if (v < 0) { + result = -result; + } + + // this is not a perfect solution. In the future + // we may want to have a way to let the division + // generate an exception and propagate the IEEE result + // to the user's handler + + exc_flags = FP_U; + if (_statfp() & ISW_INEXACT) { + exc_flags |= FP_P; + } + return _except2(exc_flags,OP_ATAN2,v,u,result,savedcw); + + } + } + + else { + result = _atanhlp( ABS(arg) ); + } + + } + + /* set sign of the result */ + if (u < 0) { + result = PI - result; + } + if (v < 0) { + result = -result; + } + + + RETURN_INEXACT2(OP_ATAN2,v,u,result,savedcw); +} + + + + + +/*** +*double _atanhlp(double x) - arctangent helper +* +*Purpose: +* Compute arctangent of x, assuming x is a valid, non infinite +* number. +* The algorithm (reduction / rational approximation) is +* taken from Cody & Waite. +* +*Entry: +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ +static double _atanhlp(double x) +{ + double f,g,result; + int n; + + + f = ABS(x); + if (f > MAX_ARG) { + // if this step is ommited, 1.0/f might underflow in the + // following block + return x > 0.0 ? PI_OVER_TWO : -PI_OVER_TWO; + } + if (f > 1.0) { + f = 1.0/f; + n = 2; + } + else { + n = 0; + } + + if (f > TWO_M_SQRT3) { + f = (((SQRT3_M_ONE * f - .5) - .5) + f) / (SQRT3 + f); + n++; + } + + if (ABS(f) < EPS) { + result = f; + } + else { + g = f*f; + result = f + f * R(g); + } + + if (n > 1) + result = -result; + + result += a[n]; + + if (x < 0.0) + result = -result; + + + return result; +} diff --git a/private/fp32/tran/bessel.c b/private/fp32/tran/bessel.c new file mode 100644 index 000000000..d63d73325 --- /dev/null +++ b/private/fp32/tran/bessel.c @@ -0,0 +1,755 @@ +/*** +*bessel.c - defines the bessel functions for C. +* +* Copyright (c) 1983-1989, Microsoft Corporation. All rights reserved. +* +*Purpose: +* +* This is a collection of routines for computing the bessel functions j0, j1, +* y0, y1, jn and yn. The approximations used for j0, j1, y0, and y1 are +* from the approximations listed in Hart, Computer Approximations, 1978. +* For these functions, a rational approximation with 18 places of accuracy +* after the decimal point has been selected. jn and yn are computed using +* the recursive formula that the bessel functions satisfy. Using these +* formulas their values can be computed from the values of the bessel +* functions of order 0 and 1. In the case of jn, the recursive formula +* +* jn(n-1,x) = (2.0*n/x)*jn(n,x) - jn(n+1,x) +* +* is used to stabily compute in the downward direction, normalizing in the +* the end by j0(x) in the usual manner. In the case of yn, the recursive +* formula +* +* yn(n+1,x) = (2.0*n/x)*yn(n,x) - yn(n-1,x) +* +* is used to stably compute the functions in the forward direction. +* +* +* Note: upon testing and experimentation the low range approximations were +* found to have an error on the order of 1.0e-14 in the neighborhood of +* 8.0. Moving the boundary point between the low range and high +* range approximations down to 7.5 reduced this error to less than +* 1.0e-14. This is not suprising. The high range asymptotoic is +* likely to have greater precision in the neighborhood of 8.0. +* +*Revision History: +* +* 06/05/89 WAJ Added this header. Made changes for C6 and -W3 +* 06/06/89 WAJ Moved some of the routines into _RTEXT if MTHREAD. +* 08/17/90 WAJ Now uses _stdcall. +* 01/13/92 GDP changed domain_err. No full IEEE support yet +* +*******************************************************************************/ + + +/* + * The functions sqrt, sin, cos, and log from the math library are used in + * the computations of the bessel functions. + */ + +#include <math.h> +#include <trans.h> + +#ifdef _X86SEG_ +#include <os2supp.h> +#define _CALLTYPE1 _PASCAL +#else +#include <cruntime.h> +#endif + + +#ifdef LD_VER +#define D_TYPE long double + +#else +#define D_TYPE double +#endif + + + +static D_TYPE domain_err( int who, D_TYPE arg1, D_TYPE arg2 ); /* error routine for y0, y1, yn */ +static D_TYPE evaluate( D_TYPE x, D_TYPE p[], int n1, D_TYPE q[], int n2 ); + + +#ifdef FAR_CODE + #ifdef LD_VER + #pragma alloc_text( _RTEXT, _y0l, _y1l, _ynl, _j0l, _j1l, _jnl ) + #else + #pragma alloc_text( _RTEXT, _y0, _y1, _yn, _j0, _j1, _jn ) + #endif +#endif + + + +/* + * Following are the constants needed for the computations of the bessel + * functions as in Hart. + */ + +#define PI 3.14159265358979323846264338327950288 + + +/* coefficients for Hart JZERO 5848, the low range approximation for _j0 */ + +static D_TYPE J0p[12] = { + 0.1208181340866561224763662419e+12 , + -0.2956513002312076810191727211e+11 , + 0.1729413174598080383355729444e+10 , + -0.4281611621547871420502838045e+08 , + 0.5645169313685735094277826749e+06 , + -0.4471963251278787165486324342e+04 , + 0.2281027164345610253338043760e+02 , + -0.7777570245675629906097285039e-01 , + 0.1792464784997734953753734861e-03 , + -0.2735011670747987792661294323e-06 , + 0.2553996162031530552738418047e-09 , + -0.1135416951138795305302383379e-12 + }; + +static D_TYPE J0q[5] = { + 0.1208181340866561225104607422e+12 , + 0.6394034985432622416780183619e+09 , + 0.1480704129894421521840387092e+07 , + 0.1806405145147135549477896097e+04 , + 0.1e+01 + }; + + +/* coefficients for Hart 6548, P0 of the high range approximation for j0 + and _y0 */ + +static D_TYPE P0p[6] = { + 0.2277909019730468430227002627e+05 , + 0.4134538663958076579678016384e+05 , + 0.2117052338086494432193395727e+05 , + 0.3480648644324927034744531110e+04 , + 0.1537620190900835429577172500e+03 , + 0.8896154842421045523607480000e+00 + }; + +static D_TYPE P0q[6] = { + 0.2277909019730468431768423768e+05 , + 0.4137041249551041663989198384e+05 , + 0.2121535056188011573042256764e+05 , + 0.3502873513823560820735614230e+04 , + 0.1571115985808089364906848200e+03 , + 0.1e+01 + }; + + +/* coefficients for Hart 6948, Q0 of the high range approximation for _j0 + and _y0 */ + +static D_TYPE Q0p[6] = { + -0.8922660020080009409846916000e+02 , + -0.1859195364434299380025216900e+03 , + -0.1118342992048273761126212300e+03 , + -0.2230026166621419847169915000e+02 , + -0.1244102674583563845913790000e+01 , + -0.8803330304868075181663000000e-02 + }; + +static D_TYPE Q0q[6] = { + 0.5710502412851206190524764590e+04 , + 0.1195113154343461364695265329e+05 , + 0.7264278016921101883691345060e+04 , + 0.1488723123228375658161346980e+04 , + 0.9059376959499312585881878000e+02 , + 0.1e+01 + }; + + + +/* coefficients for Hart JONE 6047, the low range approximation for _j1 */ + +static D_TYPE J1p[11] = { + 0.4276440148317146125749678272e+11 , + -0.5101551390663600782363700742e+10 , + 0.1928444249391651825203957853e+09 , + -0.3445216851469225845312168656e+07 , + 0.3461845033978656620861683039e+05 , + -0.2147334276854853222870548439e+03 , + 0.8645934990693258061130801001e+00 , + -0.2302415336775925186376173217e-02 , + 0.3991878933072250766608485041e-05 , + -0.4179409142757237977587032616e-08 , + 0.2060434024597835939153003596e-11 + }; + + + static D_TYPE J1q[5] = { + 0.8552880296634292263013618479e+11 , + 0.4879975894656629161544052051e+09 , + 0.1226033111836540909388789681e+07 , + 0.1635396109098603257687643236e+04 , + 0.1e+01 + }; + + +/* coefficients for Hart PONE 6749, P1 of the high range approximation for + _j1 and y1 */ + +static D_TYPE P1p[6] = { + 0.3522466491336797983417243730e+05 , + 0.6275884524716128126900567500e+05 , + 0.3135396311091595742386698880e+05 , + 0.4985483206059433843450045500e+04 , + 0.2111529182853962382105718000e+03 , + 0.1257171692914534155849500000e+01 + }; + +static D_TYPE P1q[6] = { + 0.3522466491336797980683904310e+05 , + 0.6269434695935605118888337310e+05 , + 0.3124040638190410399230157030e+05 , + 0.4930396490181088978386097000e+04 , + 0.2030775189134759322293574000e+03 , + 0.1e+01 + }; + + +/* coefficients for Hart QONE 7149, Q1 of the high range approximation for _j1 + and y1 */ + +static D_TYPE Q1p[6] = { + 0.3511751914303552822533318000e+03 , + 0.7210391804904475039280863000e+03 , + 0.4259873011654442389886993000e+03 , + 0.8318989576738508273252260000e+02 , + 0.4568171629551226706440500000e+01 , + 0.3532840052740123642735000000e-01 + }; + +static D_TYPE Q1q[6] = { + 0.7491737417180912771451950500e+04 , + 0.1541417733926509704998480510e+05 , + 0.9152231701516992270590472700e+04 , + 0.1811186700552351350672415800e+04 , + 0.1038187587462133728776636000e+03 , + 0.1e+01 + }; + + +/* coeffiecients for Hart YZERO 6245, the low range approximation for y0 */ + +static D_TYPE Y0p[9] = { + -0.2750286678629109583701933175e+20 , + 0.6587473275719554925999402049e+20 , + -0.5247065581112764941297350814e+19 , + 0.1375624316399344078571335453e+18 , + -0.1648605817185729473122082537e+16 , + 0.1025520859686394284509167421e+14 , + -0.3436371222979040378171030138e+11 , + 0.5915213465686889654273830069e+08 , + -0.4137035497933148554125235152e+05 + }; + +static D_TYPE Y0q[9] = { + 0.3726458838986165881989980739e+21 , + 0.4192417043410839973904769661e+19 , + 0.2392883043499781857439356652e+17 , + 0.9162038034075185262489147968e+14 , + 0.2613065755041081249568482092e+12 , + 0.5795122640700729537480087915e+09 , + 0.1001702641288906265666651753e+07 , + 0.1282452772478993804176329391e+04 , + 0.1e+01 + }; + + +/* coefficients for Hart YONE 6444, the low range approximation for y1 */ + +static D_TYPE Y1p[8] = { + -0.2923821961532962543101048748e+20 , + 0.7748520682186839645088094202e+19 , + -0.3441048063084114446185461344e+18 , + 0.5915160760490070618496315281e+16 , + -0.4863316942567175074828129117e+14 , + 0.2049696673745662182619800495e+12 , + -0.4289471968855248801821819588e+09 , + 0.3556924009830526056691325215e+06 + }; + + +static D_TYPE Y1q[9] = { + 0.1491311511302920350174081355e+21 , + 0.1818662841706134986885065935e+19 , + 0.1131639382698884526905082830e+17 , + 0.4755173588888137713092774006e+14 , + 0.1500221699156708987166369115e+12 , + 0.3716660798621930285596927703e+09 , + 0.7269147307198884569801913150e+06 , + 0.1072696143778925523322126700e+04 , + 0.1e+01 + }; + + + +/* + * Function name: evaluate + * + * Arguments: x - double + * p, q - double arrays of coefficients + * n1, n2 - the order of the numerator and denominator + * polynomials + * + * Description: evaluate is meant strictly as a helper routine for the + * bessel function routines to evaluate the rational polynomial + * aproximations appearing in _j0, _j1, y0, and y1. Given the + * coefficient arrays in p and q, it evaluates the numerator + * and denominator polynomials through orders n1 and n2 + * respectively, returning p(x)/q(x). This routine is not + * available to the user of the bessel function routines. + * + * Side Effects: evaluate uses the global data stored in the coefficients + * above. No other global data is used or affected. + * + * Copyright: written R.K. Wyss, Microsoft, Sept. 9, 1983 + * copyright (c) Microsoft Corp. 1984 + * + * History: + */ + +static D_TYPE evaluate( D_TYPE x, D_TYPE p[], int n1, D_TYPE q[], int n2 ) +{ +D_TYPE numerator, denominator; +int i; + + numerator = x*p[n1]; + for ( i = n1-1 ; i > 0 ; i-- ) + numerator = x*(p[i] + numerator); + numerator += p[0]; + + denominator = x*q[n2]; + for ( i = n2-1 ; i > 0 ; i-- ) + denominator = x*(q[i] + denominator); + denominator += q[0]; + + return( numerator/denominator ); +} + + +/* + * Function name: _j0 + * + * Arguments: x - double + * + * Description: _j0 computes the bessel function of the first kind of zero + * order for real values of its argument x, where x can range + * from - infinity to + infinity. The algorithm is taken + * from Hart, Computer Approximations, 1978, and yields full + * double precision accuracy. + * + * Side Effects: no global data other than the static coefficients above + * is used or affected. + * + * Copyright: written R.K. Wyss, Microsoft, Sept. 9, 1983 + * copyright (c) Microsoft Corp. 1984 + * + * History: + */ + +#ifdef LD_VER + D_TYPE _ +cdecl _j0l( D_TYPE x ) +#else + D_TYPE _CALLTYPE1 _j0( D_TYPE x ) +#endif +{ +D_TYPE z, P0, Q0; + + /* if the argument is negative, take the absolute value */ + + if ( x < 0.0 ) + x = - x; + + /* if x <= 7.5 use Hart JZERO 5847 */ + + if ( x <= 7.5 ) + return( evaluate( x*x, J0p, 11, J0q, 4) ); + + /* else if x >= 7.5 use Hart PZERO 6548 and QZERO 6948, the high range + approximation */ + + else { + z = 8.0/x; + P0 = evaluate( z*z, P0p, 5, P0q, 5); + Q0 = z*evaluate( z*z, Q0p, 5, Q0q, 5); + return( sqrt(2.0/(PI*x))*(P0*cos(x-PI/4) - Q0*sin(x-PI/4)) ); + } +} + + +/* + * Function name: _j1 + * + * Arguments: x - double + * + * Description: _j1 computes the bessel function of the first kind of the + * first order for real values of its argument x, where x can + * range from - infinity to + infinity. The algorithm is taken + * from Hart, Computer Approximations, 1978, and yields full + * D_TYPE precision accuracy. + * + * Side Effects: no global data other than the static coefficients above + * is used or affected. + * + * Copyright: written R.K. Wyss, Microsoft, Sept. 9, 1983 + * copyright (c) Microsoft Corp. 1984 + * + * History: + */ + +#ifdef LD_VER + D_TYPE _cdecl _j1l( D_TYPE x ) +#else + D_TYPE _CALLTYPE1 _j1( D_TYPE x ) +#endif +{ +D_TYPE z, P1, Q1; +int sign; + + /* if the argument is negative, take the absolute value and set sign */ + + sign = 1; + if( x < 0.0 ){ + x = -x; + sign = -1; + } + + /* if x <= 7.5 use Hart JONE 6047 */ + + if ( x <= 7.5 ) + return( sign*x*evaluate( x*x, J1p, 10, J1q, 4) ); + + + /* else if x > 7.5 use Hart PONE 6749 and QONE 7149, the high range + approximation */ + + else { + z = 8.0/x; + P1 = evaluate( z*z, P1p, 5, P1q, 5); + Q1 = z*evaluate( z*z, Q1p, 5, Q1q, 5); + return( sign*sqrt(2.0/(PI*x))* + ( P1*cos(x-3.0*PI/4.0) - Q1*sin(x-3.0*PI/4.0) ) ); + } +} + + + +/* + * Function name: _y0 + * + * Arguments: x - double + * + * Description: y0 computes the bessel function of the second kind of zero + * order for real values of its argument x, where x can range + * from 0 to + infinity. The algorithm is taken from Hart, + * Computer Approximations, 1978, and yields full double + * precision accuracy. + * + * Side Effects: no global data other than the static coefficients above + * is used or affected. + * + * Copyright: written R.K. Wyss, Microsoft, Sept. 9, 1983 + * copyright (c) Microsoft Corp. 1984 + * + * History: + */ + +#ifdef LD_VER + D_TYPE _cdecl _y0l( D_TYPE x ) +#else + D_TYPE _CALLTYPE1 _y0( D_TYPE x ) +#endif +{ +D_TYPE z, P0, Q0; + + + /* if the argument is negative, set EDOM error, print an error message, + * and return -HUGE + */ + + if (x < 0.0) + return( domain_err(OP_Y0 , x, D_IND) ); + + + /* if x <= 7.5 use Hart YZERO 6245, the low range approximation */ + + if ( x <= 7.5 ) + return( evaluate( x*x, Y0p, 8, Y0q, 8) + (2.0/PI)*_j0(x)*log(x) ); + + + /* else if x > 7.5 use Hart PZERO 6548 and QZERO 6948, the high range + approximation */ + + else { + z = 8.0/x; + P0 = evaluate( z*z, P0p, 5, P0q, 5); + Q0 = z*evaluate( z*z, Q0p, 5, Q0q, 5); + return( sqrt(2.0/(PI*x))*(P0*sin(x-PI/4) + Q0*cos(x-PI/4)) ); + } +} + + +/* + * Function name: _y1 + * + * Arguments: x - double + * + * Description: y1 computes the bessel function of the second kind of first + * order for real values of its argument x, where x can range + * from 0 to + infinity. The algorithm is taken from Hart, + * Computer Approximations, 1978, and yields full double + * precision accuracy. + * + * Side Effects: no global data other than the static coefficients above + * is used or affected. + * + * Copyright: written R.K. Wyss, Microsoft, Sept. 9, 1983 + * copyright (c) Microsoft Corp. 1984 + * + * History: + */ + +#ifdef LD_VER + D_TYPE _cdecl _y1l( D_TYPE x ) +#else + D_TYPE _CALLTYPE1 _y1( D_TYPE x ) +#endif +{ +D_TYPE z, P1, Q1; + + + /* if the argument is negative, set EDOM error, print an error message, + * and return -HUGE + */ + + if (x < 0.0) + return( domain_err(OP_Y1, x, D_IND) ); + + /* if x <= 7.5 use Hart YONE 6444, the low range approximation */ + + if ( x <= 7.5 ) + return( x*evaluate( x*x, Y1p, 7, Y1q, 8) + + (2.0/PI)*(_j1(x)*log(x) - 1.0/x) ); + + + /* else if x > 7.5 use Hart PONE 6749 and QONE 7149, the high range + approximation */ + + else { + z = 8.0/x; + P1 = evaluate( z*z, P1p, 5, P1q, 5); + Q1 = z*evaluate( z*z, Q1p, 5, Q1q, 5); + return( sqrt(2.0/(PI*x))* + ( P1*sin(x-3.0*PI/4.0) + Q1*cos(x-3.0*PI/4.0) ) ); + } +} + + +/* + * Function name: _jn + * + * Arguments: n - integer + * x - double + * + * Description: _jn computes the bessel function of the first kind of order + * n for real values of its argument, where x can range from + * - infinity to + infinity, and n can range over the integers + * from - infinity to + infinity. The function is computed + * by recursion, using the formula + * + * _jn(n-1,x) = (2.0*n/x)*_jn(n,x) - _jn(n+1,x) + * + * stabilly in the downward direction, normalizing by _j0(x) + * in the end in the usual manner. + * + * Side Effects: the routines _j0, y0, and yn are called during the + * execution of this routine. + * + * Copyright: written R.K. Wyss, Microsoft, Sept. 9, 1983 + * copyright (c) Microsoft Corp. 1984 + * + * History: + * 07/29/85 Greg Whitten + * rewrote _jn to use Hart suggested algorithm + */ + +#ifdef LD_VER + D_TYPE _cdecl _jnl( int n, D_TYPE x ) +#else + D_TYPE _CALLTYPE1 _jn( int n, D_TYPE x ) +#endif +{ +int i; +D_TYPE x2, jm1, j, jnratio, hold; + + /* use symmetry relationships: _j(-n,x) = _j(n,-x) */ + + if( n < 0 ){ + n = -n; + x = -x; + } + + /* if n = 0 use _j0(x) and if n = 1 use _j1(x) functions */ + + if (n == 0) + return (_j0(x)); + + if (n == 1) + return (_j1(x)); + + /* if x = 0.0 then _j(n,0.0) = 0.0 for n > 0 (_j(0,x) = 1.0) */ + + if (x == 0.0) + return (0.0); + + /* otherwise - must use the recurrence relation + * + * _jn(n+1,x) = (2.0*n/x)*_jn(n,x) - _jn(n-1,x) forward + * _jn(n-1,x) = (2.0*n/x)*_jn(n,x) - _jn(n+1,x) backward + */ + + if( (double)n < fabs(x) ) { + + /* stably compute _jn using forward recurrence above */ + + n <<= 1; /* n *= 2 (n is positive) */ + jm1 = _j0(x); + j = _j1(x); + i = 2; + for(;;) { + hold = j; + j = ((double)(i))*j/x - jm1; + i += 2; + if (i == n) + return (j); + jm1 = hold; + } + } + else { + /* stably compute _jn using backward recurrence above */ + + /* use Hart continued fraction formula for j(n,x)/j(n-1,x) + * so that we can compute a normalization factor + */ + + n <<= 1; /* n *= 2 (n is positive) */ + x2 = x*x; + hold = 0.0; /* initial continued fraction tail value */ + for (i=n+36; i>n; i-=2) + hold = x2/((double)(i) - hold); + jnratio = j = x/((double)(n) - hold); + jm1 = 1.0; + + /* have jn/jn-1 ratio - now use backward recurrence */ + + i = n-2; + for (;;) { + hold = jm1; + jm1 = ((double)(i))*jm1/x - j; + i -= 2; + if (i == 0) + break; + j = hold; + } + + /* jm1 is relative j0(x) so normalize it for final result + * + * jnratio = K*j(n,x) and jm1 = K*_j0(x) + */ + + return(_j0(x)*jnratio/jm1); + } +} + + +/* + * Function name: _yn + * + * Arguments: n - integer + * x - double + * + * Description: yn computes the bessel function of the second kind of order + * n for real values of its argument x, where x can range from + * 0 to + infinity, and n can range over the integers from + * - infinity to + infinity. The function is computed by + * recursion from y0 and y1, using the recursive formula + * + * yn(n+1,x) = (2.0*n/x)*yn(n,x) - yn(n-1,x) + * + * in the forward direction. + * + * Side Effects: the routines y0 and y1 are called during the execution + * of this routine. + * + * Copyright: written R.K. Wyss, Microsoft, Sept. 9, 1983 + * copyright (c) Microsoft Corp. 1984 + * + * History: + * 08/09/85 Greg Whitten + * added check for n==0 and n==1 + * 04/20/87 Barry McCord + * eliminated use of "const" as an identifier for ANSI conformance + */ + +#ifdef LD_VER + D_TYPE _cdecl _ynl( int n, D_TYPE x ) +#else + D_TYPE _CALLTYPE1 _yn( int n, D_TYPE x ) +#endif +{ +int i; +int sign; +D_TYPE constant, yn2, yn1, yn0; + + + /* if the argument is negative, set EDOM error, print an error message, + * and return -HUGE + */ + + if (x < 0.0) + return(domain_err(OP_YN, x, D_IND)); + + + /* take the absolute value of n, and set sign accordingly */ + + sign = 1; + if( n < 0 ){ + n = -n; + if( n&1 ) + sign = -1; + } + + if( n == 0 ) + return( sign*_y0(x) ); + + if (n == 1) + return( sign*_y1(x) ); + + /* otherwise go ahead and compute the function by iteration */ + + yn0 = _y0(x); + yn1 = _y1(x); + + constant = 2.0/x; + for( i = 1 ; i < n ; i++ ){ + yn2 = constant*i*yn1 - yn0; + yn0 = yn1; + yn1 = yn2; + } + return( sign*yn2 ); +} + + +static D_TYPE domain_err( int who, D_TYPE arg1, D_TYPE arg2 ) +{ +#ifdef LD_VER +#error long double version not supported +#endif + + unsigned int savedcw; + savedcw = _maskfp(); + return _except1(FP_I, who, arg1, arg2, savedcw); +} diff --git a/private/fp32/tran/ceil.c b/private/fp32/tran/ceil.c new file mode 100644 index 000000000..e75ac83cf --- /dev/null +++ b/private/fp32/tran/ceil.c @@ -0,0 +1,72 @@ +/*** +*ceil.c - ceiling +* +* Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* +*Revision History: +* 8-24-91 GDP written +* 1-09-92 GDP support IEEE exceptions +* +*******************************************************************************/ +#include <math.h> +#include <trans.h> + +extern double _frnd(double); + + +/*** +*double ceil(double x) - ceiling +* +*Purpose: +* Return a double representing the smallest integer that is +* greater than or equal to x +* +*Entry: +* +*Exit: +* +*Exceptions: +* P, I +*******************************************************************************/ +static unsigned int newcw = (ICW & ~IMCW_RC) | (IRC_UP & IMCW_RC); + + + +double ceil(double x) +{ + unsigned int savedcw; + double result; + + /* save user fp control word */ + savedcw = _ctrlfp(newcw,IMCW); /* round up */ + + /* check for infinity or NAN */ + if (IS_D_SPECIAL(x)){ + switch (_sptype(x)) { + case T_PINF: + case T_NINF: + return _except1(FP_I, OP_CEIL, x, QNAN_CEIL, savedcw); + case T_QNAN: + return _handle_qnan1(OP_CEIL, x, savedcw); + default: //T_SNAN + return _except1(FP_I, OP_CEIL, x, _s2qnan(x), savedcw); + } + } + + result = _frnd(x); /* round according to the current rounding mode */ + + // In general, the Precision Exception should be raised if + // _frnd reports a precision loss. In order to detect this with + // masked exceptions, the status word needs to be cleared. + // However, we want to avoid this, since the 387 instruction + // set does not provide an fast way to restore the status word + + if (result == x) { + RETURN(savedcw,result); + } + else { + RETURN_INEXACT1(OP_CEIL, x, result, savedcw); + } +} diff --git a/private/fp32/tran/exp.c b/private/fp32/tran/exp.c new file mode 100644 index 000000000..d6c0b42a2 --- /dev/null +++ b/private/fp32/tran/exp.c @@ -0,0 +1,181 @@ +/*** +*exp.c - exponential +* +* Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* Compute exp(x) +* +*Revision History: +* 8-15-91 GDP written +* 12-21-91 GDP support IEEE exceptions +* 02-03-92 GDP added _exphlp for use by exp, sinh, and cosh +* +*******************************************************************************/ +#include <math.h> +#include <trans.h> + +double _exphlp(double, int *); + +/* + * Thresholds for over/underflow that results in an adjusted value + * too big/small to be represented as a double. + * OVFX: ln(XMAX * 2^IEEE_ADJ) + * UFLX: ln(XIN * 2^(-IEEE_ADJ) + */ + +static _dbl const ovfx = {SET_DBL(0x409bb9d3, 0xbeb8c86b)}; +static _dbl const uflx = {SET_DBL(0xc09bb448, 0x2df909dd)}; + +#define OVFX ovfx.dbl +#define UFLX uflx.dbl + + +static double const EPS = 5.16987882845642297e-26; /* 2^(-53) / 2 */ +static double const LN2INV = 1.442695040889634074; /* 1/ln(2) */ +static double const C1 = 0.693359375000000000; +static double const C2 = -2.1219444005469058277e-4; + +/* constants for the rational approximation */ +static double const p0 = 0.249999999999999993e+0; +static double const p1 = 0.694360001511792852e-2; +static double const p2 = 0.165203300268279130e-4; +static double const q0 = 0.500000000000000000e+0; +static double const q1 = 0.555538666969001188e-1; +static double const q2 = 0.495862884905441294e-3; + +#define P(z) ( (p2 * (z) + p1) * (z) + p0 ) +#define Q(z) ( (q2 * (z) + q1) * (z) + q0 ) + +/*** +*double exp(double x) - exponential +* +*Purpose: +* Compute the exponential of a number. +* The algorithm (reduction / rational approximation) is +* taken from Cody & Waite. +* +*Entry: +* +*Exit: +* +*Exceptions: O, U, P, I +* +*******************************************************************************/ + +double exp (double x) +{ + unsigned int savedcw; + int n, newexp; + double result; + double xn; + + /* save user fp control word */ + savedcw = _maskfp(); + + if (IS_D_SPECIAL(x)){ + switch (_sptype(x)) { + case T_PINF: + RETURN(savedcw,x); + case T_NINF: + RETURN(savedcw,0.0); + case T_QNAN: + return _handle_qnan1(OP_EXP, x, savedcw); + default: //T_SNAN + return _except1(FP_I, OP_EXP, x, _s2qnan(x), savedcw); + } + } + + if (x == 0.0) { + RETURN(savedcw, 1.0); + } + + if (x > OVFX) { + + // even after scaling the exponent of the result, + // it is still too large. + // Deliver infinity to the trap handler + + return _except1(FP_O | FP_P, OP_EXP, x, D_INF, savedcw); + } + + if (x < UFLX) { + + // even after scaling the exponent of the result, + // it is still too small. + // Deliver 0 to the trap handler + + return _except1(FP_U | FP_P, OP_EXP, x, 0.0, savedcw); + } + + if (ABS(x) < EPS) { + result = 1.0; + } + + else { + result = _exphlp(x, &newexp); + if (newexp > MAXEXP) { + result = _set_exp(result, newexp-IEEE_ADJUST); + return _except1(FP_O | FP_P, OP_EXP, x, result, savedcw); + } + else if (newexp < MINEXP) { + result = _set_exp(result, newexp+IEEE_ADJUST); + return _except1(FP_U | FP_P, OP_EXP, x, result, savedcw); + } + else + result = _set_exp(result, newexp); + } + + RETURN_INEXACT1(OP_EXP, x, result, savedcw); +} + + + + +/*** +*double _exphlp(double x, int * pnewexp) - exp helper routine +* +*Purpose: +* Provide the mantissa and the exponent of e^x +* +*Entry: +* x : a (non special) double precision number +* +*Exit: +* *newexp: the exponent of e^x +* return value: the mantissa m of e^x scaled by a factor +* (the value of this factor has no significance. +* The mantissa can be obtained with _set_exp(m, 0). +* +* _set_exp(m, *pnewexp) may be used for constructing the final +* result, if it is within the representable range. +* +*Exceptions: +* No exceptions are raised by this function +* +*******************************************************************************/ + + + +double _exphlp(double x, int * pnewexp) +{ + + double xn; + double g,z,gpz,qz,rg; + int n; + + xn = _frnd(x * LN2INV); + n = (int) xn; + + /* assume guard digit is present */ + g = (x - xn * C1) - xn * C2; + z = g*g; + gpz = g * P(z); + qz = Q(z); + rg = 0.5 + gpz/(qz-gpz); + + n++; + + *pnewexp = _get_exp(rg) + n; + return rg; +} diff --git a/private/fp32/tran/fabs.c b/private/fp32/tran/fabs.c new file mode 100644 index 000000000..c590f5863 --- /dev/null +++ b/private/fp32/tran/fabs.c @@ -0,0 +1,60 @@ +/*** +*fabs.c - absolute value of a floating point number +* +* Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* +*Revision History: +* 8-24-91 GDP written +* 12-10-91 GDP Domain error for NAN, use fp negation +* 1-13-91 GDP support IEEE exceptions +* 07-16-93 SRW ALPHA Merge +* +*******************************************************************************/ +#include <math.h> +#include <trans.h> + +#ifdef _M_IX86 +#pragma function (fabs) +#endif + + +/*** +*double fabs(double x) +* +*Purpose: +* Compute |x| +* +*Entry: +* +*Exit: +* +*Exceptions: +* I +* +*******************************************************************************/ +double fabs(double x) +{ + unsigned int savedcw; + double result; + + if (IS_D_SPECIAL(x)){ + /* save user fp control word */ + savedcw = _maskfp(); + + switch (_sptype(x)) { + case T_PINF: + RETURN(savedcw,x); + case T_NINF: + RETURN(savedcw,-x); + case T_QNAN: + return _handle_qnan1(OP_ABS, x, savedcw); + default: //T_SNAN + return _except1(FP_I, OP_ABS, x, _s2qnan(x), savedcw); + } + } + + result = x>=0 ? x : -x; + return result; +} diff --git a/private/fp32/tran/floor.c b/private/fp32/tran/floor.c new file mode 100644 index 000000000..48d17c2b0 --- /dev/null +++ b/private/fp32/tran/floor.c @@ -0,0 +1,71 @@ +/*** +*floor.c - floor +* +* Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* +*Revision History: +* 8-24-91 GDP written +* 1-09-92 GDP support IEEE exceptions +* +*******************************************************************************/ +#include <math.h> +#include <trans.h> + +extern double _frnd(double); + +/*** +*double floor(double x) - floor +* +*Purpose: +* Return a double representing the largest integer that is +* less than or equal to x +* +*Entry: +* +*Exit: +* +*Exceptions: +* I, P +*******************************************************************************/ +static unsigned int newcw = (ICW & ~IMCW_RC) | (IRC_DOWN & IMCW_RC); + + +double floor(double x) +{ + unsigned int savedcw; + double result; + + /* save user fp control word */ + savedcw = _ctrlfp(newcw,IMCW); /* round down */ + + /* check for infinity or NAN */ + if (IS_D_SPECIAL(x)){ + switch (_sptype(x)) { + case T_PINF: + case T_NINF: + return _except1(FP_I, OP_FLOOR, x, QNAN_FLOOR, savedcw); + case T_QNAN: + return _handle_qnan1(OP_FLOOR, x, savedcw); + default: //T_SNAN + return _except1(FP_I, OP_FLOOR, x, _s2qnan(x), savedcw); + } + } + + + result = _frnd(x); /* round according to the current rounding mode */ + + // In general, the Precision Exception should be raised if + // _frnd reports a precision loss. In order to detect this with + // masked exceptions, the status word needs to be cleared. + // However, we want to avoid this, since the 387 instruction + // set does not provide an fast way to restore the status word + + if (result == x) { + RETURN(savedcw,result); + } + else { + RETURN_INEXACT1(OP_FLOOR, x, result, savedcw); + } +} diff --git a/private/fp32/tran/fmod.c b/private/fp32/tran/fmod.c new file mode 100644 index 000000000..8153c317c --- /dev/null +++ b/private/fp32/tran/fmod.c @@ -0,0 +1,120 @@ +/*** +*fmod.c - floating point remainder +* +* Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* +*Revision History: +* 8-24-91 GDP written +* 1-13-92 GDP support IEEE exceptions +* 3-04-92 GDP complete rewrite for improved accuracy +* 3-16-92 GDP restore cw properly, do not raise Inexact exception +* +*******************************************************************************/ +#include <math.h> +#include <trans.h> + +/*** +*double fmod(double x, double y) +* +*Purpose: +* Return f, s.t. x = i*y + f, where i is an integer, f has the same +* sign as x, and |f| < |y| +* +*Entry: +* +*Exit: +* +*Exceptions: +* I,P +*******************************************************************************/ +#define SCALE 53 + +double fmod(double x, double y) +{ + unsigned int savedcw; + int neg=0; + int denorm=0; + double d,tx,ty,fx,fy; + int nx, ny, nexp; + + + + /* save user fp control word */ + savedcw = _maskfp(); + + /* check for infinity or NAN */ + if (IS_D_SPECIAL(y) || IS_D_SPECIAL(x)){ + if (IS_D_SNAN(y) || IS_D_SNAN(x)){ + return _except2(FP_I,OP_FMOD,x,y,_d_snan2(x,y),savedcw); + } + if (IS_D_QNAN(y) || IS_D_QNAN(x)){ + return _handle_qnan2(OP_FMOD,x,y,savedcw); + } + + /* at least one argument is INF */ + return _except2(FP_I,OP_FMOD,x,y,QNAN_FMOD,savedcw); + } + + + if (y == 0) { + return _except2(FP_I,OP_FMOD,x,y,QNAN_FMOD,savedcw); + } + + + if (x < 0) { + tx = -x; + neg = 1; + } + else { + tx = x; + } + + ty = ABS(y); + + + while (tx >= ty) { + fx = _decomp(tx, &nx); + fy = _decomp(ty, &ny); + + if (nx < MINEXP) { + // tx is a denormalized number + denorm = 1; + nx += SCALE; + ny += SCALE; + tx = _set_exp(fx, nx); + ty = _set_exp(fy, ny); + } + + + if (fx >= fy) { + nexp = nx ; + } + else { + nexp = nx - 1; + } + d = _set_exp(fy, nexp); + tx -= d; + } + + if (denorm) { + + // + // raise only FP_U exception + // + + return _except2(FP_U, + OP_FMOD, + x, + y, + _add_exp(tx, IEEE_ADJUST-SCALE), + savedcw); + } + + if (neg) { + tx = -tx; + } + + RETURN(savedcw,tx); +} diff --git a/private/fp32/tran/fpexcept.c b/private/fp32/tran/fpexcept.c new file mode 100644 index 000000000..d0a558cf9 --- /dev/null +++ b/private/fp32/tran/fpexcept.c @@ -0,0 +1,918 @@ +/*** +*fpexcept.c - floating point exception handling +* +* Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* +*Revision History: +* 8-24-91 GDP written +* 9-26-91 GDP changed DOMAIN error handling +* 10-10-91 GDP use fp addition for propagating NaNs +* 1-14-92 GDP IEEE exception support +* 3-20-92 GDP major changes, reorganized code +* 3-31-92 GDP new interface, use internal fp control functions +* 07-16-93 SRW ALPHA Merge +* 10-02-94 BWT PPC merge +* +*******************************************************************************/ + +#if _NTSUBSET_ +#define _NTSYSTEM_ +#include <nt.h> +#include <ntrtl.h> +#include <nturtl.h> +#endif // _NTSUBSET_ + + +#include <trans.h> +#include <errno.h> +#include <math.h> +#include <windows.h> + +#if _NTSUBSET_ +VOID +WINAPI +MyRaiseException( + DWORD dwExceptionCode, + DWORD dwExceptionFlags, + DWORD nNumberOfArguments, + CONST DWORD *lpArguments + ) +{ + EXCEPTION_RECORD ExceptionRecord; + ULONG n; + PULONG s,d; + ExceptionRecord.ExceptionCode = (DWORD)dwExceptionCode; + ExceptionRecord.ExceptionFlags = dwExceptionFlags & EXCEPTION_NONCONTINUABLE; + ExceptionRecord.ExceptionRecord = NULL; + ExceptionRecord.ExceptionAddress = (PVOID)MyRaiseException; + if ( ARGUMENT_PRESENT(lpArguments) ) { + n = nNumberOfArguments; + if ( n > EXCEPTION_MAXIMUM_PARAMETERS ) { + n = EXCEPTION_MAXIMUM_PARAMETERS; + } + ExceptionRecord.NumberParameters = n; + s = (PULONG)lpArguments; + d = ExceptionRecord.ExceptionInformation; + while(n--){ + *d++ = *s++; + } + } + else { + ExceptionRecord.NumberParameters = 0; + } + RtlRaiseException(&ExceptionRecord); + +} +#endif // _NTSUBSET_ + + +// +// copy a double without generating floating point instructions +// (avoid invalid operation on x87) +// + +#define COPY_DOUBLE(pdest, psrc) \ + ( *(unsigned int *)pdest = *(unsigned int *)psrc, \ + *((unsigned int *)pdest+1) = *((unsigned int *)psrc+1) ) + + + +// +// _matherr_flag is a communal variable. It is equal to zero +// if the user has redefined matherr(). Otherwise it has a +// non zero value. The default matherr routine does nothing +// and returns 0. +// + +int _matherr_flag; + +// +// a routine for artificially setting the fp status bits in order +// to signal a software generated masked fp exception. +// + +extern void _set_statfp(unsigned int); + + +void _raise_exc(_FPIEEE_RECORD *prec,unsigned int *pcw, + int flags, int opcode, double *parg1, double *presult); + +double _umatherr(int type, unsigned int opcode, + double arg1, double arg2, double presult, + unsigned int cw); + +static char *_get_fname(unsigned int opcode); + +/*** +* _handle_qnan1, _handle_qnan2 - handle quiet NaNs as function arguments +* +*Purpose: +* Do all necessary work for handling the case where the argument +* or one of the arguments of a floating point function is a quiet NaN +* +*Entry: +* unsigned int opcode: The operation code of the fp function +* double x: the fp function argument +* double y: the fp function second argument (_handle_qnan2 only) +* unsigned int savedcw: the user's control word +* +*Exit: +* restore the user's control word, and +* return the suggested return value for the fp function +* +*Exceptions: +* +*******************************************************************************/ + +double _handle_qnan1(unsigned int opcode, + double x, + unsigned int savedcw) +{ + if (! _matherr_flag) { + + // + // QNaN arguments are treated as domain errors + // invoke the user's matherr routine + // _umatherr will take care of restoring the + // user's control word + // + + return _umatherr(_DOMAIN,opcode,x,0.0,x,savedcw); + } + else { + errno = EDOM; + _rstorfp(savedcw); + return x; + } +} + + +double _handle_qnan2(unsigned int opcode, + double x, + double y, + unsigned int savedcw) +{ + double result; + + // + // NaN propagation should be handled by the underlying fp h/w + // + + result = x+y; + + if (! _matherr_flag) { + return _umatherr(_DOMAIN,opcode,x,y,result,savedcw); + } + else { + errno = EDOM; + _rstorfp(savedcw); + return result; + } +} + + + +/*** +* _except1 - exception handling shell for fp functions with one argument +* +*Purpose: +* +*Entry: +* int flags: the exception flags +* int opcode: the operation code of the fp function that faulted +* double arg: the argument of the fp function +* double result: default result +* unsigned int cw: user's fp control word +* +*Exit: +* restore user's fp control word +* and return the (possibly modified) result of the fp function +* +*Exceptions: +* +*******************************************************************************/ + +double _except1(int flags, + int opcode, + double arg, + double result, + unsigned int cw) +{ + int type; + + if (_handle_exc(flags, &result, cw) == 0) { + + // + // At this point _handle_exception has failed to deal + // with the error + // An IEEE exception should be raised + // + + _FPIEEE_RECORD rec; + + // The rec structure will be filled in by _raise_exc, + // except for the Operand2 information + + rec.Operand2.OperandValid = 0; + _raise_exc(&rec, &cw, flags, opcode, &arg, &result); + } + + + // + // At this point we have either the masked response of the + // exception, or a value supplied by the user's IEEE exception + // handler. The _matherr mechanism is supported for backward + // compatibility. + // + + type = _errcode(flags); + + // Inexact result fp exception does not have a matherr counterpart; + // in that case type is 0. + + if (! _matherr_flag && type) { + return _umatherr(type, opcode, arg, 0.0, result, cw); + } + else { + _set_errno(type); + } + + RETURN(cw,result); +} + + + +/*** +* _except2 - exception handling shell for fp functions with two arguments +* +*Purpose: +* +*Entry: +* int flags: the exception flags +* int opcode: the operation code of the fp function that faulted +* double arg1: the first argument of the fp function +* double arg2: the second argument of the fp function +* double result: default result +* unsigned int cw: user's fp control word +* +*Exit: +* restore user's fp control word +* and return the (possibly modified) result of the fp function +* +*Exceptions: +* +*******************************************************************************/ + +double _except2(int flags, + int opcode, + double arg1, + double arg2, + double result, + unsigned int cw) +{ + int type; + + if (_handle_exc(flags, &result, cw) == 0) { + + // + // trap should be taken + // + + _FPIEEE_RECORD rec; + + // + // fill in operand2 info. The rest of rec will be + // filled in by _raise_exc + // + + rec.Operand2.OperandValid = 1; + rec.Operand2.Format = _FpFormatFp64; + rec.Operand2.Value.Fp64Value = arg2; + + _raise_exc(&rec, &cw, flags, opcode, &arg1, &result); + + } + + type = _errcode(flags); + + if (! _matherr_flag && type) { + return _umatherr(type, opcode, arg1, arg2, result, cw); + } + else { + _set_errno(type); + } + + RETURN(cw,result); +} + + + +/*** +* _raise_exc - raise fp IEEE exception +* +*Purpose: +* fill in an fp IEEE record struct and raise a fp exception +* +* +*Entry / Exit: +* IN _FPIEEE_RECORD prec pointer to an IEEE record +* IN OUT unsigned int *pcw pointer to user's fp control word +* IN int flags, exception flags +* IN int opcode, fp operation code +* IN double *parg1, pointer to first argument +* IN double *presult) pointer to result +* +*Exceptions: +* +*******************************************************************************/ + +void _raise_exc( _FPIEEE_RECORD *prec, + unsigned int *pcw, + int flags, + int opcode, + double *parg1, + double *presult) +{ + DWORD exc_code; + unsigned int sw; + + // + // reset all control bits + // + + *(int *)&(prec->Cause) = 0; + *(int *)&(prec->Enable) = 0; + *(int *)&(prec->Status) = 0; + + // + // Precision exception may only coincide with overflow + // or underflow. If this is the case, overflow (or + // underflow) take priority over precision exception. + // The order of checks is from the least important + // to the most important exception + // + + if (flags & FP_P) { + exc_code = STATUS_FLOAT_INEXACT_RESULT; + prec->Cause.Inexact = 1; + } + if (flags & FP_U) { + exc_code = STATUS_FLOAT_UNDERFLOW; + prec->Cause.Underflow = 1; + } + if (flags & FP_O) { + exc_code = STATUS_FLOAT_OVERFLOW; + prec->Cause.Overflow = 1; + } + if (flags & FP_Z) { + exc_code = STATUS_FLOAT_DIVIDE_BY_ZERO; + prec->Cause.ZeroDivide = 1; + } + if (flags & FP_I) { + exc_code = STATUS_FLOAT_INVALID_OPERATION; + prec->Cause.InvalidOperation = 1; + } + + + // + // Set exception enable bits + // + + prec->Enable.InvalidOperation = (*pcw & IEM_INVALID) ? 0 : 1; + prec->Enable.ZeroDivide = (*pcw & IEM_ZERODIVIDE) ? 0 : 1; + prec->Enable.Overflow = (*pcw & IEM_OVERFLOW) ? 0 : 1; + prec->Enable.Underflow = (*pcw & IEM_UNDERFLOW) ? 0 : 1; + prec->Enable.Inexact = (*pcw & IEM_INEXACT) ? 0 : 1; + + + // + // Set status bits + // + + sw = _statfp(); + + + if (sw & ISW_INVALID) { + prec->Status.InvalidOperation = 1; + } + if (sw & ISW_ZERODIVIDE) { + prec->Status.ZeroDivide = 1; + } + if (sw & ISW_OVERFLOW) { + prec->Status.Overflow = 1; + } + if (sw & ISW_UNDERFLOW) { + prec->Status.Underflow = 1; + } + if (sw & ISW_INEXACT) { + prec->Status.Inexact = 1; + } + + + switch (*pcw & IMCW_RC) { + case IRC_CHOP: + prec->RoundingMode = _FpRoundChopped; + break; + case IRC_UP: + prec->RoundingMode = _FpRoundPlusInfinity; + break; + case IRC_DOWN: + prec->RoundingMode = _FpRoundMinusInfinity; + break; + case IRC_NEAR: + prec->RoundingMode = _FpRoundNearest; + break; + } + +#ifdef i386 + + switch (*pcw & IMCW_PC) { + case IPC_64: + prec->Precision = _FpPrecisionFull; + break; + case IPC_53: + prec->Precision = _FpPrecision53; + break; + case IPC_24: + prec->Precision = _FpPrecision24; + break; + } + +#endif + + +#if defined(_M_MRX000) || defined(_M_ALPHA) || defined(_M_PPC) + prec->Precision = _FpPrecision53; +#endif + + prec->Operation = opcode; + + prec->Operand1.OperandValid = 1; + prec->Operand1.Format = _FpFormatFp64; + prec->Operand1.Value.Fp64Value = *parg1; + + prec->Result.OperandValid = 1; + prec->Result.Format = _FpFormatFp64; + prec->Result.Value.Fp64Value = *presult; + + // + // By convention software exceptions use the first exception + // parameter in order to pass a pointer to the _FPIEEE_RECORD + // structure. + // + + _clrfp(); + +#if _NTSUBSET_ + MyRaiseException(exc_code,0,1,(LPDWORD)&prec); +#else + RaiseException(exc_code,0,1,(LPDWORD)&prec); +#endif + + // + // user's trap handler may have changed either the fp environment + // or the result + // + + // + // Update exception mask + // + + if (prec->Enable.InvalidOperation) + (*pcw) &= ~IEM_INVALID; + if (prec->Enable.ZeroDivide) + (*pcw) &= ~IEM_ZERODIVIDE; + if (prec->Enable.Overflow) + (*pcw) &= ~IEM_OVERFLOW; + if (prec->Enable.Underflow) + (*pcw) &= ~IEM_UNDERFLOW; + if (prec->Enable.Inexact) + (*pcw) &= ~IEM_INEXACT; + + // + // Update Rounding mode + // + + switch (prec->RoundingMode) { + case _FpRoundChopped: + *pcw = *pcw & ~IMCW_RC | IRC_CHOP; + break; + case _FpRoundPlusInfinity: + *pcw = *pcw & ~IMCW_RC | IRC_UP; + break; + case _FpRoundMinusInfinity: + *pcw = *pcw & ~IMCW_RC | IRC_DOWN; + break; + case _FpRoundNearest: + *pcw = *pcw & ~IMCW_RC | IRC_NEAR; + break; + } + + +#ifdef i386 + + // + // Update Precision Control + // + + switch (prec->Precision) { + case _FpPrecisionFull: + *pcw = *pcw & ~IMCW_RC | IPC_64; + break; + case _FpPrecision53: + *pcw = *pcw & ~IMCW_RC | IPC_53; + break; + case _FpPrecision24: + *pcw = *pcw & ~IMCW_RC | IPC_24; + break; + } + +#endif + + // + // Update result + // + + *presult = prec->Result.Value.Fp64Value; +} + + + +/*** +* _handle_exc - produce masked response for IEEE fp exception +* +*Purpose: +* +*Entry: +* unsigned int flags the exception flags +* double *presult the default result +* unsigned int cw user's fp control word +* +*Exit: +* returns 1 on successful handling, 0 on failure +* On success, *presult becomes the masked response +* +*Exceptions: +* +*******************************************************************************/ + +int _handle_exc(unsigned int flags, double * presult, unsigned int cw) +{ + // + // flags_p is useful for deciding whether there are still unhandled + // exceptions in case multiple exceptions have occurred + // + + int flags_p = flags & (FP_I | FP_Z | FP_O | FP_U | FP_P); + + if (flags & FP_I && cw & IEM_INVALID) { + + // + // Masked response for invalid operation + // + + _set_statfp(ISW_INVALID); + flags_p &= ~FP_I; + } + + else if (flags & FP_Z && cw & IEM_ZERODIVIDE) { + + // + // Masked response for Division by zero + // result should already have the proper value + // + + _set_statfp( ISW_ZERODIVIDE); + flags_p &= ~FP_Z; + } + + else if (flags & FP_O && cw & IEM_OVERFLOW) { + + // + // Masked response for Overflow + // + + _set_statfp(ISW_OVERFLOW); + switch (cw & IMCW_RC) { + case IRC_NEAR: + *presult = *presult > 0.0 ? D_INF : -D_INF; + break; + case IRC_UP: + *presult = *presult > 0.0 ? D_INF : -D_MAX; + break; + case IRC_DOWN: + *presult = *presult > 0.0 ? D_MAX : -D_INF; + break; + case IRC_CHOP: + *presult = *presult > 0.0 ? D_MAX : -D_MAX; + break; + } + + flags_p &= ~FP_O; + } + + else if (flags & FP_U && cw & IEM_UNDERFLOW) { + + // + // Masked response for Underflow: + // According to the IEEE standard, when the underflow trap is not + // enabled, underflow shall be signaled only when both tininess + // and loss of accuracy have been detected + // + + int aloss=0; // loss of accuracy flag + + if (flags & FP_P) { + aloss = 1; + } + + // + // a zero value in the result denotes + // that even after ieee scaling, the exponent + // was too small. + // in this case the masked response is also + // zero (sign is preserved) + // + + if (*presult != 0.0) { + double result; + int expn, newexp; + + result = _decomp(*presult, &expn); + newexp = expn - IEEE_ADJUST; + + if (newexp < MINEXP - 53) { + result *= 0.0; // produce a signed zero + aloss = 1; + } + else { + int neg = result < 0; // save sign + + // + // denormalize result + // + + (*D_EXP(result)) &= 0x000f; /* clear exponent field */ + (*D_EXP(result)) |= 0x0010; /* set hidden bit */ + + for (;newexp<MINEXP;newexp++) { + if (*D_LO(result) & 0x1 && !aloss) { + aloss = 1; + } + + /* shift mantissa to the right */ + (*D_LO(result)) >>= 1; + if (*D_HI(result) & 0x1) { + (*D_LO(result)) |= 0x80000000; + } + (*D_HI(result)) >>= 1; + } + if (neg) { + result = -result; // restore sign + } + } + + *presult = result; + } + else { + aloss = 1; + } + + if (aloss) { + _set_statfp(ISW_UNDERFLOW); + } + + flags_p &= ~FP_U; + } + + + // + // Separate check for precision exception + // (may coexist with overflow or underflow) + // + + if (flags & FP_P && cw & IEM_INEXACT) { + + // + // Masked response for inexact result + // + + _set_statfp(ISW_INEXACT); + flags_p &= ~FP_P; + } + + return flags_p ? 0: 1; +} + + + +/*** +* _umatherr - call user's matherr routine +* +*Purpose: +* call user's matherr routine and set errno if appropriate +* +* +*Entry: +* int type type of excpetion +* unsigned int opcode fp function that caused the exception +* double arg1 first argument of the fp function +* double arg2 second argument of the fp function +* double retval return value of the fp function +* unsigned int cw user's fp control word +* +*Exit: +* fp control word becomes the user's fp cw +* errno modified if user's matherr returns 0 +* return value the retval entered by the user in +* the _exception matherr struct +* +*Exceptions: +* +*******************************************************************************/ + +double _umatherr( + int type, + unsigned int opcode, + double arg1, + double arg2, + double retval, + unsigned int cw + ) +{ + struct _exception exc; + + // + // call matherr only if the name of the function + // is registered in the table, i.e., only if exc.name is valid + // + + if (exc.name = _get_fname(opcode)) { + exc.type = type; + + COPY_DOUBLE(&exc.arg1,&arg1); + COPY_DOUBLE(&exc.arg2,&arg2); + COPY_DOUBLE(&exc.retval,&retval); + + _rstorfp(cw); + + if (_matherr(&exc) == 0) { + _set_errno(type); + } + return exc.retval; + } + else { + + // + // treat this case as if matherr returned 0 + // + + _rstorfp(cw); + _set_errno(type); + return retval; + } + +} + + + +/*** +* _set_errno - set errno +* +*Purpose: +* set correct error value for errno +* +*Entry: +* int matherrtype: the type of math error +* +*Exit: +* modifies errno +* +*Exceptions: +* +*******************************************************************************/ + +void _set_errno(int matherrtype) +{ + switch(matherrtype) { + case _DOMAIN: + errno = EDOM; + break; + case _OVERFLOW: + case _SING: + errno = ERANGE; + break; + } +} + + + +/*** +* _get_fname - get function name +* +*Purpose: +* returns the _matherr function name that corresponds to a +* floating point opcode +* +*Entry: +* _FP_OPERATION_CODE opcode +* +*Exit: +* returns a pointer to a string +* +*Exceptions: +* +*******************************************************************************/ +#define OP_NUM 27 /* number of fp operations */ + +static char *_get_fname(unsigned int opcode) +{ + + static struct { + unsigned int opcode; + char *name; + } _names[OP_NUM] = { + { OP_EXP, "exp" }, + { OP_POW, "pow" }, + { OP_LOG, "log" }, + { OP_LOG10, "log10"}, + { OP_SINH, "sinh"}, + { OP_COSH, "cosh"}, + { OP_TANH, "tanh"}, + { OP_ASIN, "asin"}, + { OP_ACOS, "acos"}, + { OP_ATAN, "atan"}, + { OP_ATAN2, "atan2"}, + { OP_SQRT, "sqrt"}, + { OP_SIN, "sin"}, + { OP_COS, "cos"}, + { OP_TAN, "tan"}, + { OP_CEIL, "ceil"}, + { OP_FLOOR, "floor"}, + { OP_ABS, "fabs"}, + { OP_MODF, "modf"}, + { OP_LDEXP, "ldexp"}, + { OP_CABS, "_cabs"}, + { OP_HYPOT, "_hypot"}, + { OP_FMOD, "fmod"}, + { OP_FREXP, "frexp"}, + { OP_Y0, "_y0"}, + { OP_Y1, "_y1"}, + { OP_YN, "_yn"} + }; + + int i; + for (i=0;i<OP_NUM;i++) { + if (_names[i].opcode == opcode) + return _names[i].name; + } + return (char *)0; +} + + + +/*** +* _errcode - get _matherr error code +* +*Purpose: +* returns matherr type that corresponds to exception flags +* +*Entry: +* flags: exception flags +* +*Exit: +* returns matherr type +* +*Exceptions: +* +*******************************************************************************/ + +int _errcode(unsigned int flags) +{ + unsigned int errcode; + + if (flags & FP_TLOSS) { + errcode = _TLOSS; + } + else if (flags & FP_I) { + errcode = _DOMAIN; + } + else if (flags & FP_Z) { + errcode = _SING; + } + else if (flags & FP_O) { + errcode = _OVERFLOW; + } + else if (flags & FP_U) { + errcode = _UNDERFLOW; + } + else { + + // FP_P + + errcode = 0; + } + return errcode; +} diff --git a/private/fp32/tran/frexp.c b/private/fp32/tran/frexp.c new file mode 100644 index 000000000..723816e8a --- /dev/null +++ b/private/fp32/tran/frexp.c @@ -0,0 +1,56 @@ +/*** +*frexp.c - get mantissa and exponent of a floating point number +* +* Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* +*Revision History: +* 8-24-91 GDP written +* 1-13-92 GDP support IEEE exceptions +* +*******************************************************************************/ +#include <math.h> +#include <trans.h> + +/*** +*double frexp(double x, double *expptr) +* +*Purpose: +* The nomalized fraction f is returned: .5<=f<1 +* The exponent is stored in the object pointed by expptr +* +*Entry: +* +*Exit: +* +*Exceptions: +* NAN: domain error +* +*******************************************************************************/ +double frexp(double x, int *expptr) +{ + unsigned int savedcw; + double man; + + /* save user fp control word */ + savedcw = _maskfp(); + + /* check for infinity or NAN */ + if (IS_D_SPECIAL(x)){ + *expptr = INT_NAN; + switch (_sptype(x)) { + case T_PINF: + return _except1(FP_I, OP_FREXP, x, D_INF, savedcw); + case T_NINF: + return _except1(FP_I, OP_FREXP, x, -D_INF, savedcw); + case T_QNAN: + return _handle_qnan1(OP_FREXP, x, savedcw); + default: //T_SNAN + return _except1(FP_I, OP_FREXP, x, _s2qnan(x), savedcw); + } + } + + man = _decomp(x, expptr); + RETURN(savedcw,man); +} diff --git a/private/fp32/tran/hypot.c b/private/fp32/tran/hypot.c new file mode 100644 index 000000000..f14ba05e7 --- /dev/null +++ b/private/fp32/tran/hypot.c @@ -0,0 +1,124 @@ +/*** +*hypot.c - hypotenuse and complex absolute value +* +* Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* +*Revision History: +* 08-15-91 GDP written +* 10-20-91 GDP removed inline assembly for calling sqrt +* +*******************************************************************************/ +#include <math.h> +#include <trans.h> + +static double _hypothlp(double x, double y, int who); + +/* + * Function name: hypot + * + * Arguments: x, y - double + * + * Description: hypot returns sqrt(x*x + y*y), taking precautions against + * unwarrented over and underflows. + * + * Side Effects: no global data is used or affected. + * + * Copyright: written R.K. Wyss, Microsoft, Sept. 9, 1983 + * copyright (c) Microsoft Corp. 1984-89 + * + * History: + * 03/13/89 WAJ Minor changes to source. + * 04/13/89 WAJ Now uses _cdecl for _CDECL + * 06/07/91 JCR ANSI naming (_hypot) + * 08/26/91 GDP NaN support, error handling + * 01/13/91 GDP IEEE exceptions support + */ + +double _hypot(double x, double y) +{ + return _hypothlp(x,y,OP_HYPOT); +} + +/*** +*double _cabs(struct _complex z) - absolute value of a complex number +* +*Purpose: +* +*Entry: +* +*Exit: +* +*Exceptions: +*******************************************************************************/ +double _cabs(struct _complex z) +{ + return( _hypothlp(z.x, z.y, OP_CABS ) ); +} + + + +static double _hypothlp(double x, double y, int who) +{ + double max; + double result, sum; + unsigned int savedcw; + int exp1, exp2, newexp; + + /* save user fp control word */ + savedcw = _maskfp(); + + /* check for infinity or NAN */ + if (IS_D_SPECIAL(x) || IS_D_SPECIAL(y)){ + if (IS_D_SNAN(x) || IS_D_SNAN(y)){ + return _except2(FP_I,who,x,y,_d_snan2(x,y),savedcw); + } + if (IS_D_QNAN(x) || IS_D_QNAN(y)){ + return _handle_qnan2(who,x,y,savedcw); + } + /* there is at least one infinite argument ... */ + RETURN(savedcw,D_INF); + } + + + /* take the absolute values of x and y, compute the max, and then scale by + max to prevent over or underflowing */ + + if ( x < 0.0 ) + x = - x; + + if ( y < 0.0 ) + y = - y; + + max = ( ( y > x ) ? y : x ); + + if ( max == 0.0 ) + RETURN(savedcw, 0.0 ); + + x /= max; //this may pollute the fp status word (underflow flag) + y /= max; + + sum = x*x + y*y; + + result = _decomp(sqrt(sum),&exp1) * _decomp(max,&exp2); + newexp = exp1 + exp2 + _get_exp(result); + + // in case of overflow or underflow + // adjusting exp by IEEE_ADJUST will certainly + // bring the result in the representable range + + if (newexp > MAXEXP) { + result = _set_exp(result, newexp - IEEE_ADJUST); + return _except2(FP_O | FP_P, who, x, y, result, savedcw); + } + if (newexp < MINEXP) { + result = _set_exp(result, newexp + IEEE_ADJUST); + return _except2(FP_U | FP_P, who, x, y, result, savedcw); + } + + result = _set_exp(result, newexp); + // fix needed: P exception is raised even if the result is exact + + RETURN_INEXACT2(who, x, y, result, savedcw); +} diff --git a/private/fp32/tran/i386/87cdisp.asm b/private/fp32/tran/i386/87cdisp.asm new file mode 100644 index 000000000..31661ebaa --- /dev/null +++ b/private/fp32/tran/i386/87cdisp.asm @@ -0,0 +1,519 @@ + page ,132 + title 87cdisp - C transcendental function dispatcher +;*** +;87cdisp.asm - C transcendental function dispatcher (80x87/emulator version) +; +; Copyright (c) 1987-1992, Microsoft Corporation +; +;Purpose: +; Common dispatch code and error handling for C transcendental functions +; +;Revision History: +; 07-04-84 GFW initial version +; 12-21-84 GFW correctly point to name in matherr struct +; 05-12-85 GFW return HUGE correctly signed on ERANGE errors +; fill 2nd operand for matherr structure correctly +; 07-05-85 GFW check for possible overflow on PLOSS errors +; in this case OVERFLOW overrides PLOSS +; 07-08-85 GFW added FWAIT in useHUGE +; 11-20-85 GFW faster RANGE checking +; 09-30-86 JMB internationalized error message handling +; 03-09-87 BCM changed writestr to _wrt2err (extern) +; 04-14-87 BCM log(0.0) and log10(0.0) sets errno to ERANGE +; for MS C 5.0 (ANSI compatible); errno is still +; set to EDOM for IBM C 2.0 (System V compatible). +; 04-28-87 BCM Added _cintrindisp1 and _cintrindisp2 +; for C "intrinsic" versions of pow, log, log10, exp, +; cos, sin, tan, acos, asin, atan, atan2, +; cosh, sinh, tanh, sqrt, ... for MS C 5.0 +; 08-04-87 BCM Removed "externP write" declaration. +; 08-17-87 BCM Changed _wrt2err from near to model-dependent +; except for IMBC20; this is because of QC core lib +; 10-12-87 BCM OS/2 support C library changes +; 11-24-87 BCM added _loadds under ifdef DLL +; 01-18-88 BCM eliminated IBMC20; ifos2,noos2 ==> ifmt, nomt +; 02-10-88 WAJ MTHREAD libraries now lock stderr when printing errors +; 04-25-88 WAJ _cpower is on the stack for MTHREAD so must be set to 1 +; 07-11-88 WAJ address of matherr structure was incorrect in MTHREAD case +; 08-24-88 WAJ 386 version. +; 11-20-89 WAJ 386 MTHREAD is no longer _pascal. +; 08-17-90 WAJ Now uses _stdcall. +; 10-15-90 WAJ Fixed intrinsic/2 argument problems. +; 05-17-91 WAJ Added _STDCALL ifdefs. +; 08-27-91 JCR ANSI naming +; 09-15-91 GDP Added _cwrt2err. _NMSG_WRITE is no longer _pascal +; 11-15-91 GDP Removed error message display stuff +; moved exception structure to stack frame, even for +; single thread code (allow recursive calls of +; transcendentals through matherr) +; call _87except after fsave +; put Localfac on the stack for multi thread +; 02-10-92 GDP changed error handling avoid polluting the fp status word +; 03-15-92 GDP extensive changes in error detection scheme +; 10-27-92 SKS Re-arranged some code to make this work with MASM 6.10 +; 11-06-92 GDP merged changes from the fp tree on \\vangogh: removed +; saveflag, added __fastflag, new range error checking +;******************************************************************************* + +.xlist + include cruntime.inc + include mrt386.inc + include os2supp.inc + include elem87.inc +.list + + +EDOM = 33 ; math error numbers +ERANGE = 34 + +EDOMAIN = 120 ; internal error number for DOMAIN +ESING = 121 ; internal error number for SING +ETLOSS = 122 ; internal error number for TLOSS + + + + .data + +comm _matherr_flag:dword +extrn __fastflag:dword + + +staticQ DblMax, 07fefffffffffffffR +staticQ DblMin, 00010000000000000R +staticQ IeeeAdjO, 0c098000000000000R +staticQ IeeeAdjU, 04098000000000000R +staticQ _infinity, 07ff0000000000000R +staticQ _zero, 00000000000000000R + +ifndef MTHREAD + +staticQ LocalFac, ? +intrinflag db ? + +else ;MTHREAD + + +MTStackFrame struc + MTS_LocalFac dq ? + MTS_cdispflags db ? +MTStackFrame ends + +MTSFISize equ ((size MTStackFrame) + ISIZE - 1) and (not (ISIZE-1)) + + +LocalFac equ <MTSF.MTS_LocalFac> +cdispflags equ <MTSF.MTS_cdispflags> + +INTRINFLAG = 01h +TWOARGFLAG = 02h + +endif ;MTHREAD + +; error value action table + +;labelW retvaltab +; DNCPTR codeOFFSET useretval +page + + + CODESEG + +extrn _trandisp1:near +extrn _trandisp2:near +extrn _87except:proc + + + + +;---------------------------------------------------------- +; +; intrinsic versions: TRANSCENDENTAL DISPATCH ROUTINES +; +;---------------------------------------------------------- +; +; _cintrindisp1 - Intrinsic Dispatch for 1 arg DP transcendental +; _cintrindisp2 - Intrinsic Dispatch for 2 arg DP transcendental +; +; rdx - function dispatch table address +; +;---------------------------------------------------------- + + +_cintrindisp2 proc uses RBXONLY + local DLSF[DSFISize]:IWORD +ifmt <local MTSF[MTSFISize]:IWORD> + + + fstcw [DSF.savCntrl] + fwait + + ; store the args in case they are needed by matherr. + ; Generally avoid storing since this may generate + ; various exceptions (overflow, underflow, inexact, invalid) + ; Args will not be available to an exception handler and + ; users should not use /Oi if interested in IEEE conformance + + cmp [_matherr_flag], 0 + JSE save2arg + +lab resume2 + +;ifmt <mov [_cpower], 1> ; set _cpower to C semantics + ; DISABLED this feature since pow(0,0) + ; will return 1 in C (NCEG spec) which + ; is the same as in FORTRAN --GDP + + + call _trandisp2 + +ifmt <or [cdispflags], (INTRINFLAG OR TWOARGFLAG)> +nomt <mov [intrinflag], 1> + + call cintrinexit + ret + + +lab save2arg + fxch + fst [DSF.arg1] + fxch + fst [DSF.arg2] + jmp resume2 + +_cintrindisp2 endp + + +_cintrindisp1 proc uses RBXONLY + local DLSF[DSFISize]:IWORD +ifmt <local MTSF[MTSFISize]:IWORD> + + + fstcw [DSF.savCntrl] + cmp [_matherr_flag], 0 + JSE save1arg + +lab resume1 + + call _trandisp1 + +ifmt <or [cdispflags],INTRINFLAG> +ifmt <and [cdispflags],(NOT TWOARGFLAG)> +nomt <mov [intrinflag], 1> + + call cintrinexit + ret + + +lab save1arg + fst [DSF.arg1] + jmp resume1 + + +_cintrindisp1 endp + + + + +;******************************************************************************* +;* +;* TRANSCENDENTAL DISPATCH ROUTINES +;* +;******************************************************************************* +;* +;* _ctrandisp1 - Dispatch for 1 arg DP transcendental +;* _ctrandisp2 - Dispatch for 2 arg DP transcendental +;* +;* edx - function dispatch table address +;* +;******************************************************************************* + +;* +;* Two arg standard dispatch. +;* + +_ctrandisp2 proc uses ebx, parm1:qword, parm2:qword + + local DLSF[DSFISize]:IWORD +ifmt <local MTSF[MTSFISize]:IWORD> + + + push dword ptr [parm1+4] ; load arg1 + push dword ptr [parm1] + call _fload +ifndef _STDCALL + add esp, 8 +endif + push dword ptr [parm2+4] ; load arg2 + push dword ptr [parm2] + call _fload +ifndef _STDCALL + add esp, 8 +endif + + fstcw [DSF.savCntrl] + +ifmt <or [cdispflags], TWOARGFLAG> +ifmt <mov [_cpower], 1> ; set _cpower to C semantics + + call _trandisp2 + + call ctranexit + +ifdef _STDCALL + ret 16 +else + ret +endif + +;* +;* Check for overflow and errors. +;* + + + +ctranexit:: + +ifmt <and [cdispflags], (NOT INTRINFLAG)> +nomt <mov [intrinflag], 0> + +cintrinexit:: + cmp __fastflag, 0 + JSNZ restoreCW + + fst qword ptr [LocalFac] ; cast result to double precision + + ; + ; PROBLEM: Since the intrinsics may be given an argument anywhere + ; in the long double range, functions that are not normally + ; expected to overflow (like sqrt) may generate IEEE exceptions + ; at this point. We can cure this by making the checkrange test + ; standard. + ; + + + mov al, [DSF.ErrorType] ; check for errors + or al, al + JE checkinexact + cmp al, CHECKOVER + JE checkoverflow + cmp al, CHECKRANGE + JSE checkrng + or al, al + JSE restoreCW + CBI + mov [DSF.typ], rax ; set exception type + jmp haveerror + + +lab checkinexact + + ; This will be the most common path because of + ; the nature of transcendentals. If inexact is + ; unmasked in user's cw and active, raise it + + mov ax, [DSF.savCntrl] + and ax, 20h + JSNZ restoreCW ; inexact exception masked + fstsw ax + and ax, 20h + JSZ restoreCW + mov [DSF.typ], INEXACT + jmp haveerror + + +lab restoreCW +lab restoreCW2 + fldcw [DSF.savCntrl] ; load old control word + fwait + + retn + + + +lab checkrng +lab checkoverflow + mov ax, word ptr [LocalFac+6] ; get exponent part + and ax, 07ff0h + or ax, ax + JSE haveunderflow + cmp ax, 07ff0h + JSE haveoverflow + jmp checkinexact ; assume possibly inexact result + + +lab haveunderflow + mov [DSF.typ], UNDERFLOW + fld IeeeAdjU + fxch + fscale + fstp st(1) + fld st(0) + fabs + fcomp [DblMin] + fstsw ax + sahf + JSAE haveerror + fmul [_zero] + jmp short haveerror + +lab haveoverflow + mov [DSF.typ], OVERFLOW + fld IeeeAdjO + fxch + fscale + fstp st(1) + fld st(0) + fabs + fcomp [DblMax] + fstsw ax + sahf + JSBE haveerror + fmul [_infinity] + +lab haveerror +; fill error structure and call matherr + + push rsi ; save si + push rdi + + mov rbx, [DSF.Function] ; get function jmp table address + inc rbx + + mov [DSF.nam], rbx ; save name address + + +ifmt <test cdispflags, INTRINFLAG> +nomt <cmp [intrinflag], 0> + + JSNE aftercopy +; +; copy function args (for matherr structure) +; + cld + lea esi, [parm1] + lea edi, [DSF.arg1] + movsd + movsd + cmp [rbx-1].fnumarg, 1 ; check for 2nd parameter + JSE aftercopy + lea esi, [parm2] + lea edi, [DSF.arg2] + movsd + movsd + +lab aftercopy +lab useretval + fstp [DSF.retval] ; store return value + + + ; + ; If intrinsic calling convention, an 'fsave' is required + ; before matherr starts doing any fp operations. + ; (This needs to be documented.) + + lea rax, [DSF.typ] + lea rbx, [DSF.savCntrl] + push rbx + push rax + mov rbx, [DSF.function] + mov al, [rbx].fnumber + CBI + push rax + call _87except ; _fpexcept(&exception, &savedcw) +ifndef _STDCALL + add esp, 12 ; clear arguments if _cdecl. +endif + +lab movretval + pop rdi ; restore di + pop rsi ; restore si + fld [DSF.retval] ; this assumes that the user + ; does not want to return a + ; signaling NaN + + jmp restoreCW ; restore CW and return + +_ctrandisp2 endp + + + + +;* +;* One arg standard dispatch. +;* + +_ctrandisp1 proc uses ebx, parm1:qword + + local DLSF[DSFISize]:IWORD +ifmt <local MTSF[MTSFISize]:IWORD> + + push dword ptr [parm1+4] ; load arg1 + push dword ptr [parm1] + call _fload +ifndef _STDCALL + add esp, 8 +endif + + fstcw [DSF.savCntrl] + +ifmt <and [cdispflags],(NOT TWOARGFLAG)> + + call _trandisp1 + + call ctranexit + +ifdef _STDCALL + ret 8 +else + ret +endif + + +_ctrandisp1 endp + + + +; +; Load arg in the fp stack without raising an exception if the argument +; is a signaling NaN +; + + +_fload proc uses ebx, parm:qword + local tmp:tbyte + + mov ax, word ptr [parm+6] ; get exponent field + mov bx, ax ; save it + and ax, 07ff0h + cmp ax, 07ff0h ; check for special exponent + JSNE fpload + ; have special argument (NaN or INF) + or bx, 07fffh ; preserve sign, set max long double exp + mov word ptr [tmp+8], bx + ; convert to long double + mov eax, dword ptr [parm+4] + mov ebx, dword ptr [parm] + shld eax, ebx, 11 + ; the MSB of the significand is + ; already 1 because of the exponent value + mov dword ptr [tmp+4], eax + mov dword ptr [tmp], ebx + fld tmp + jmp short return + +lab fpload + fld parm +lab return +ifdef _STDCALL + ret 8 +else + ret +endif + +_fload endp + + + + + +end diff --git a/private/fp32/tran/i386/87csqrt.asm b/private/fp32/tran/i386/87csqrt.asm new file mode 100644 index 000000000..c8f050a0f --- /dev/null +++ b/private/fp32/tran/i386/87csqrt.asm @@ -0,0 +1,52 @@ + page ,132 + title 87csqrt - C interfaces - sqrt +;*** +;87csqrt.asm - sqrt functions (8087/emulator version) +; +; Copyright (c) 1984-89, Microsoft Corporation +; +;Purpose: +; C interfaces for the sqrt function (8087/emulator version) +; +;Revision History: +; 07-04-84 GFW initial version +; 05-08-87 BCM added C intrinsic interface (_CIsqrt) +; 10-12-87 BCM changes for OS/2 Support Library +; 11-24-87 BCM added _loadds under ifdef DLL +; 01-18-88 BCM eliminated IBMC20; ifos2,noos2 ==> ifmt,nomt +; 08-26-88 WAJ 386 version. +; 11-20-89 WAJ Don't need pascal for 386 MTHREAD. +; +;******************************************************************************* + +.xlist + include cruntime.inc + include mrt386.inc + include os2supp.inc + include elem87.inc +.list + + + .data +extrn _OP_SQRTjmptab:word + +page + + CODESEG + +extrn _ctrandisp1:near + +labelP sqrt, PUBLIC + + mov rdx, dataOFFSET _OP_SQRTjmptab + jmp _ctrandisp1 + + +extrn _cintrindisp1:near + +labelP _CIsqrt, PUBLIC + + mov rdx, dataOFFSET _OP_SQRTjmptab + jmp _cintrindisp1 + +end diff --git a/private/fp32/tran/i386/87ctran.asm b/private/fp32/tran/i386/87ctran.asm new file mode 100644 index 000000000..166d482d9 --- /dev/null +++ b/private/fp32/tran/i386/87ctran.asm @@ -0,0 +1,92 @@ + page ,132 + title 87ctran - C interfaces - exp, log, log10, pow +;*** +;87ctran.asm - exp, log, log10, pow functions (8087/emulator version) +; +; Copyright (c) 1984-89, Microsoft Corporation +; +;Purpose: +; C interfaces for exp, log, log10, pow functions (8087/emulator version) +; +;Revision History: +; 07-04-84 GFW initial version +; 05-08-87 BCM added C intrinsic interface (_CI...) +; 10-12-87 BCM changes for OS/2 Support Library +; 11-24-87 BCM added _loadds under ifdef DLL +; 01-18-88 BCM eliminated IBMC20; ifos2,noos2 ==> ifmt,nomt +; 08-26-88 WAJ 386 version +; 11-20-89 WAJ Don't need pascal for MTHREAD 386. +; +;******************************************************************************* + +.xlist + include cruntime.inc + include mrt386.inc + include os2supp.inc + include elem87.inc +.list + + .data + +extrn _OP_POWjmptab:word +extrn _OP_LOG10jmptab:word +extrn _OP_LOGjmptab:word +extrn _OP_EXPjmptab:word + +page + + CODESEG + +extrn _ctrandisp1:near +extrn _ctrandisp2:near + + +labelP pow, PUBLIC + mov rdx, dataOFFSET _OP_POWjmptab + jmp _ctrandisp2 + + +labelP log, PUBLIC + mov rdx, dataOFFSET _OP_LOGjmptab +lab disp1 + jmp _ctrandisp1 + + +labelP log10, PUBLIC + mov rdx, dataOFFSET _OP_LOG10jmptab + jmp disp1 + + +labelP exp, PUBLIC + mov rdx, dataOFFSET _OP_EXPjmptab + jmp disp1 + +extrn _cintrindisp1:near +extrn _cintrindisp2:near + + +labelP _CIpow, PUBLIC + + mov rdx, dataOFFSET _OP_POWjmptab + jmp _cintrindisp2 + + +labelP _CIlog, PUBLIC + + mov rdx, dataOFFSET _OP_LOGjmptab +lab idisp1 + jmp _cintrindisp1 + + +labelP _CIlog10, PUBLIC + + mov rdx, dataOFFSET _OP_LOG10jmptab + jmp idisp1 + + +labelP _CIexp, PUBLIC + + mov rdx, dataOFFSET _OP_EXPjmptab + jmp idisp1 + +end diff --git a/private/fp32/tran/i386/87ctrig.asm b/private/fp32/tran/i386/87ctrig.asm new file mode 100644 index 000000000..bfd2d481d --- /dev/null +++ b/private/fp32/tran/i386/87ctrig.asm @@ -0,0 +1,83 @@ + page ,132 + title 87ctrig - C interfaces - sin, cos, tan +;*** +;87ctrig.asm - trig functions (8087/emulator version) +; +; Copyright (c) 1984-89, Microsoft Corporation +; +;Purpose: +; C interfaces for the sin, cos, and tan functions (8087/emulator version) +; +;Revision History: +; 07-04-84 GFW initial version +; 05-08-87 BCM added C intrinsic interface (_CI...) +; 10-12-87 BCM changes for OS/2 Support Library +; 11-24-87 BCM added _loadds under ifdef DLL +; 01-18-88 BCM eliminated IBMC20; ifos2,noos2 ==> ifmt,nomt +; 08-26-88 WAJ 386 version +; 11-20-89 WAJ Don't need pascal for MTHREAD 386. +; +;******************************************************************************* + +.xlist + include cruntime.inc + include mrt386.inc + include os2supp.inc + include elem87.inc +.list + + .data +extrn _OP_SINjmptab:word +extrn _OP_COSjmptab:word +extrn _OP_TANjmptab:word + +page + + CODESEG + +extrn _ctrandisp1:near +extrn _ctrandisp2:near + + +labelP sin, PUBLIC + + mov rdx, dataoffset _OP_SINjmptab +lab trigdisp + jmp _ctrandisp1 + + +labelP cos, PUBLIC + + mov rdx, dataoffset _OP_COSjmptab + jmp trigdisp + + +labelP tan, PUBLIC + + mov rdx, dataoffset _OP_TANjmptab + jmp trigdisp + + +extrn _cintrindisp1:near +extrn _cintrindisp2:near + + +labelP _CIsin, PUBLIC + + mov rdx, dataoffset _OP_SINjmptab +itrigdisp: + jmp _cintrindisp1 + + +labelP _CIcos, PUBLIC + + mov rdx, dataoffset _OP_COSjmptab + jmp itrigdisp + + +labelP _CItan, PUBLIC + + mov rdx, dataoffset _OP_TANjmptab + jmp itrigdisp + +end diff --git a/private/fp32/tran/i386/87ctriga.asm b/private/fp32/tran/i386/87ctriga.asm new file mode 100644 index 000000000..6a24810c2 --- /dev/null +++ b/private/fp32/tran/i386/87ctriga.asm @@ -0,0 +1,97 @@ + page ,132 + title 87ctriga - C interfaces - asin, acos, atan, atan2 +;*** +;87ctriga.asm - inverse trig functions (8087/emulator version) +; +; Copyright (c) 1984-89, Microsoft Corporation +; +;Purpose: +; C interfaces for asin, acos, atan, atan2 (8087/emulator version) +; +;Revision History: +; 07-04-84 GFW initial version +; 05-08-87 BCM added C intrinsic interface (_CI...) +; 10-12-87 BCM changes for OS/2 Support Library +; 11-24-87 BCM added _loadds under ifdef DLL +; 01-18-88 BCM eliminated IBMC20; ifos2,noos2 ==> ifmt,nomt +; 08-26-88 WAJ 386 version +; 11-20-89 WAJ Don't need pascal for MTHREAD 386. +; +;******************************************************************************* + +.xlist + include cruntime.inc + include mrt386.inc + include os2supp.inc + include elem87.inc +.list + + .data + +extrn _OP_ASINjmptab:word +extrn _OP_ACOSjmptab:word +extrn _OP_ATANjmptab:word +extrn _OP_ATAN2jmptab:word + +page + + CODESEG + +extrn _ctrandisp1:near +extrn _ctrandisp2:near + + +labelP asin, PUBLIC + + mov rdx, dataoffset _OP_ASINjmptab +disp1: + jmp _ctrandisp1 + + +labelP acos, PUBLIC + + mov rdx, dataoffset _OP_ACOSjmptab + jmp disp1 + + +labelP atan, PUBLIC + + mov rdx, dataoffset _OP_ATANjmptab + jmp disp1 + + +labelP atan2, PUBLIC + + mov rdx, dataoffset _OP_ATAN2jmptab + jmp _ctrandisp2 + + +extrn _cintrindisp1:near +extrn _cintrindisp2:near + + +labelP _CIasin, PUBLIC + + mov rdx,dataoffset _OP_ASINjmptab +idisp1: + jmp _cintrindisp1 + + +labelP _CIacos, PUBLIC + + mov rdx, dataoffset _OP_ACOSjmptab + jmp idisp1 + + +labelP _CIatan, PUBLIC + + mov rdx, dataoffset _OP_ATANjmptab + jmp idisp1 + + +labelP _CIatan2, PUBLIC + + mov rdx, dataoffset _OP_ATAN2jmptab + jmp _cintrindisp2 + +end diff --git a/private/fp32/tran/i386/87ctrigh.asm b/private/fp32/tran/i386/87ctrigh.asm new file mode 100644 index 000000000..c59664078 --- /dev/null +++ b/private/fp32/tran/i386/87ctrigh.asm @@ -0,0 +1,84 @@ + page ,132 + title 87ctrigh - C interfaces - sinh, cosh, tanh +;*** +;87ctrigh.asm - hyperbolic trig functions (8087/emulator version) +; +; Copyright (c) 1984-89 Microsoft Corporation +; +;Purpose: +; C interfaces for sinh, cosh, tanh functions (8087/emulator version) +; +;Revision History: +; 07-04-84 GFW initial version +; 05-08-87 BCM added C intrinsic interface (_CI...) +; 10-12-87 BCM changes for OS/2 Support Library +; 11-24-87 BCM added _loadds under ifdef DLL +; 01-18-88 BCM eliminated IBMC20; ifos2,noos2 ==> ifmt,nomt +; 08-26-88 WAJ 386 version +; 11-20-89 WAJ Don't need pascal for MTHREAD 386. +; +;******************************************************************************* + +.xlist + include cruntime.inc + include mrt386.inc + include os2supp.inc + include elem87.inc +.list + + .data + +extrn _OP_SINHjmptab:word +extrn _OP_COSHjmptab:word +extrn _OP_TANHjmptab:word + +page + + CODESEG + +extrn _ctrandisp1:near +extrn _ctrandisp2:near + + +labelP sinh, PUBLIC + + mov rdx, dataoffset _OP_SINHjmptab +disp1: + jmp _ctrandisp1 + + +labelP cosh, PUBLIC + + mov rdx, dataoffset _OP_COSHjmptab + jmp disp1 + + +labelP tanh, PUBLIC + + mov rdx, dataoffset _OP_TANHjmptab + jmp disp1 + + +extrn _cintrindisp1:near +extrn _cintrindisp2:near + + +labelP _CIsinh, PUBLIC + + mov rdx, dataoffset _OP_SINHjmptab +idisp1: + jmp _cintrindisp1 + + +labelP _CIcosh, PUBLIC + + mov rdx, dataoffset _OP_COSHjmptab + jmp idisp1 + + +labelP _CItanh, PUBLIC + + mov rdx, dataoffset _OP_TANHjmptab + jmp idisp1 + +end diff --git a/private/fp32/tran/i386/87disp.asm b/private/fp32/tran/i386/87disp.asm new file mode 100644 index 000000000..64ea68177 --- /dev/null +++ b/private/fp32/tran/i386/87disp.asm @@ -0,0 +1,297 @@ + page ,132 + title 87disp - common transcendental dispatch routine +;*** +;87disp.asm - common transcendental dispatch routine (80x87/emulator version) +; +; Copyright (c) 1984-88, Microsoft Corporation +; +;Purpose: +; Common transcendental dispatch routine (80x87/emulator version) +; +;Revision History: +; 07-07-84 GFW initial version +; 11-20-85 GFW mask overflow/underflow/precision exceptions; +; fixed affine/projective infinity confusion +; 09-12-86 BCM added _Flanguage to distinguish languages +; 10-21-86 BCM use _cpower rather than _Flanguage to +; distinguish C and FORTRAN exponentiation semantics +; 06-11-87 GFW faster dispatch code - all in-line +; 10-26-87 BCM minor changes for new cmacros.inc +; 04-25-88 WAJ _cpower is now on stack for MTHREAD +; 08-24-88 WAJ 386 version +; 02-01-92 GDP ported to NT +; +;******************************************************************************* + +.xlist + include cruntime.inc + include mrt386.inc + include elem87.inc +.list + + .data + +globalT _indefinite, 0FFFFC000000000000000R +globalT _piby2, 03FFFC90FDAA22168C235R +staticQ One, 03FF0000000000000R + + +ifndef MTHREAD ; by default assume C pow() semantics +globalB _cpower, 1 ; if zero, assume FORTRAN (or other) exponentiation +endif ;MTHREAD ; semantics + + + +labelB XAMtoTagTab + ; C2 C1 C0 C3 Meaning Meaning Tag 0 + db 2 * ISIZE ; 0 0 0 0 +Unnormal => NAN 10 0 + db 1 * ISIZE ; 0 0 0 1 +Zero => Zero 01 0 + db 2 * ISIZE ; 0 0 1 0 +NAN => NAN 10 0 + db 2 * ISIZE ; 0 0 1 1 Empty => NAN 10 0 + db 2 * ISIZE ; 0 1 0 0 -Unnormal => NAN 10 0 + db 1 * ISIZE ; 0 1 0 1 -Zero => Zero 01 0 + db 2 * ISIZE ; 0 1 1 0 -NAN => NAN 10 0 + db 2 * ISIZE ; 0 1 1 1 Empty => NAN 10 0 + db 0 * ISIZE ; 1 0 0 0 +Normal => Valid 00 0 + db 1 * ISIZE ; 1 0 0 1 +Denormal => Zero 01 0 + db 3 * ISIZE ; 1 0 1 0 +Infinity => Infinity 11 0 + db 2 * ISIZE ; 1 0 1 1 Empty => NAN 10 0 + db 0 * ISIZE ; 1 1 0 0 -Normal => Valid 00 0 + db 1 * ISIZE ; 1 1 0 1 -Denormal => Zero 01 0 + db 3 * ISIZE ; 1 1 1 0 -Infinity => Infinity 11 0 + db 2 * ISIZE ; 1 1 1 1 Empty => NAN 10 0 + + + CODESEG + +xamTOS macro + cmp [rdx].fnumber, OP_SQRT ; check for sqrt + JSNE cwdefault + mov bx, word ptr (DSF.savCntrl) + or bh, 2 ; set precision control to 53 bits + and bh, 0feh + mov bl, 03fh ; mask exceptions + jmp setcw +lab cwdefault + mov bx, 133fh ; default cw +lab setcw + mov DSF.setCntrl, bx ; set new control word + fldcw DSF.setCntrl ; load new control word + mov rbx, dataoffset XAMtoTagTab ; Prepare for XLAT + fxam + mov DSF.Function, rdx ; save function jmp table address + fstsw DSF.StatusWord + mov DSF.ErrorType, 0 ; clear error code + endm + +comdisp macro + CBI + and rcx, 0404h ; clear all but signs from CX + mov rbx, rdx + add rbx, rax + add rbx, size funtab ; skip over name, error vals, etc. + jmp [rbx] ; jmp to function + endm + +; Will dispatch to the special case routines for the single argument +; transcendental functions. It assumes on entry that the 8087 stack +; has the argument on the top of its stack and that DX has been set +; to the address of the dispatch table (which should be in Tag order). +; This routine will FXAM the top of the 8087 stack, generate Tag info +; from the condition code, and jump to the corresponding dispatch table +; entry. In the process of creating the offset for the XLAT instruction +; bit 2 of CL will be loaded with the sign of the argument. DI may not +; be used in any computations. + +_trandisp1 proc near + + xamTOS ; setup control word and check TOS + + fwait + mov cl, CondCode + shl cl, 1 + sar cl, 1 + rol cl, 1 + mov al, cl + and al, 0fh + xlat + + comdisp + +_trandisp1 endp + + +; Will dispatch to the special case routines for the double argument +; transcendental functions. It assumes on entry that the 8087 has arg1 +; next to the top and arg2 on top of the 8087 stack and that DX has +; been set to the address of the dispatch table (which should be in +; Tag-arg1,Tag-arg2 order). This routine will FXAM the top two +; registers of the 8087 stack,generate Tag info from the condition +; codes, and jump to the corresponding dispatch table entry. In the +; process of creating the offsets for the XLAT statements bit 2 of +; CH and bit 2 of CL will be loaded with the signs of the arguments +; next to the top and on top, respectively, of the 8087 stack. DI may +; not be used in any computations. + +_trandisp2 proc near + + xamTOS ; setup control word and check TOS + + fxch + mov cl, CondCode + fxam + fstsw DSF.StatusWord + fxch + mov ch, CondCode + shl ch, 1 + sar ch, 1 + rol ch, 1 + mov al, ch + and al, 0fh + xlat + mov ah, al + shl cl, 1 + sar cl, 1 + rol cl, 1 + mov al, cl + and al, 0fh + xlat + shl ah, 1 + shl ah, 1 + or al, ah + + comdisp + +_trandisp2 endp + + + +page +;---------------------------------------------------------- +; +; SPECIAL CASE RETURN FUNCTIONS +; +;---------------------------------------------------------- +; +; INPUTS - The signs of the last, second to last +; arguments are in CH, CL respectively. +; +; OUTPUT - The result is the stack top. +; +;---------------------------------------------------------- + +labelNP _rttospopde, PUBLIC + call setDOMAIN + +labelNP _rttospop, PUBLIC + fxch ; remove ST(1) + +labelNP _rtnospop, PUBLIC + fstp st(0) ; remove ST(0) + +labelNP _rttosnpop, PUBLIC + ret ; return TOS + +labelNP _rtnospopde, PUBLIC + call setDOMAIN + jmp _rtnospop + + +;---------------------------------------------------------- + +labelNP _rtzeropop, PUBLIC + fstp st(0) ; remove ST(0) + +labelNP _rtzeronpop, PUBLIC + fstp st(0) ; remove ST(0) + fldz ; push 0.0 onto stack + ret + +;---------------------------------------------------------- + +labelNP _rtonepop, PUBLIC + fstp st(0) ; remove ST(0) + +labelNP _rtonenpop, PUBLIC + fstp st(0) ; remove ST(0) + fld1 ; push 1.0 onto stack + ret + +;---------------------------------------------------------- + +isQNAN macro + fstp DSF.Fac ; use ten byte storage + fld DSF.Fac + test byte ptr [DSF.Fac+7], 40h ; Test for QNaN or SNaN + endm + + +labelNP _tosnan1, PUBLIC ; ST(0) is a NaN + isQNAN + JSZ _tossnan1 + mov DSF.Errortype, DOMAIN_QNAN + ret +lab _tossnan1 + mov DSF.Errortype, DOMAIN + fadd [One] ; Convert SNaN to QNaN + ret + +labelNP _nosnan2, PUBLIC ; ST(1) is a NaN + fxch +labelNP _tosnan2, PUBLIC ; ST(0) is a NaN + isQNAN + JSZ _tossnan2 + mov DSF.Errortype, DOMAIN_QNAN + jmp _tosnan2ret +lab _tossnan2 + mov DSF.Errortype, DOMAIN +lab _tosnan2ret + fadd ; Propagate NaN and pop + ret + +labelNP _nan2, PUBLIC + isQNAN + JSZ _snan2 + fxch + isQNAN + JSZ _snan2 + mov DSF.Errortype, DOMAIN_QNAN + jmp _nan2ret +lab _snan2 + mov DSF.Errortype, DOMAIN +lab _nan2ret + fadd ; Propagate NaN and pop + ret + + + + +;---------------------------------------------------------- + +labelNP _rtindfpop, PUBLIC + fstp st(0) ; remove ST(0) + +labelNP _rtindfnpop, PUBLIC + fstp st(0) ; remove ST(0) + fld [_indefinite] ; push real indefinite onto stack + cmp DSF.ErrorType, 0 ; if error set + JSG retj ; must be SING, don't set DOMAIN + +labelNP _rttosnpopde, PUBLIC +lab setDOMAIN + mov DSF.ErrorType, DOMAIN +lab retj + or cl, cl ; test sign in cl + ret + + +;---------------------------------------------------------- + +labelNP _rtchsifneg, PUBLIC + or cl, cl ; if arg is negative + JSZ chsifnegret ; negate top of stack + fchs +lab chsifnegret + ret + +end diff --git a/private/fp32/tran/i386/87except.c b/private/fp32/tran/i386/87except.c new file mode 100644 index 000000000..f350d9297 --- /dev/null +++ b/private/fp32/tran/i386/87except.c @@ -0,0 +1,131 @@ +/*** +*87except.c - floating point exception handling +* +* Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* +*Revision History: +* 8-24-91 GDP written +* 9-26-91 GDP changed DOMAIN error handling +* 1-29-91 GDP renamed to 87exept.c +* 3-15-92 GDP support raising exceptions +* +*******************************************************************************/ +#include <errno.h> +#include <math.h> +#include <trans.h> + + +#define _DOMAIN_QNAN 7 /* should be in sync with elem87.inc */ +#define _INEXACT 8 /* should be in sync with elem87.inc */ + +int _matherr_flag; +extern void _raise_exc(_FPIEEE_RECORD *prec,unsigned int *pcw, + int flags, int opcode, double *parg1, double *presult); +extern void _set_errno(int matherrtype); +extern int _handle_exc(unsigned int flags, double * presult, unsigned int cw); + + + + +/*** +*double _87except(struct _exception *except, unsigned int *cw) +* +*Purpose: +* Handle floating point exceptions. +* +*Entry: +* +*Exit: +* +*Exceptions: +*******************************************************************************/ + +void _87except(int opcode, struct _exception *exc, unsigned short *pcw16) +{ + int fixed; + unsigned int flags; + unsigned int cw, *pcw; + + // + // convert fp control word into an unsigned int + // + + cw = *pcw16; + pcw = &cw; + + switch (exc->type) { + case _DOMAIN: + case _TLOSS: + flags = FP_I; + break; + case _OVERFLOW: + flags = FP_O | FP_P; + break; + case _UNDERFLOW: + flags = FP_U | FP_P; + break; + case _SING: + flags = FP_Z; + break; + case _INEXACT: + flags = FP_P; + break; + case _DOMAIN_QNAN: + exc->type = _DOMAIN; + // no break + default: + flags = 0; + } + + + + if (flags && _handle_exc(flags, &exc->retval, *pcw) == 0) { + + // + // trap should be taken + // + + _FPIEEE_RECORD rec; + + // + // fill in operand2 info. The rest of rec will be + // filled in by _raise_exc + // + + switch (opcode) { + case OP_POW: + case OP_FMOD: + case OP_ATAN2: + rec.Operand2.OperandValid = 1; + rec.Operand2.Format = _FpFormatFp64; + rec.Operand2.Value.Fp64Value = exc->arg2; + break; + default: + rec.Operand2.OperandValid = 0; + } + + _raise_exc(&rec, + pcw, + flags, + opcode, + &exc->arg1, + &exc->retval); + } + + + /* restore cw */ + _rstorfp(*pcw); + + fixed = 0; + + if (exc->type != _INEXACT && + ! _matherr_flag) { + fixed = _matherr(exc); + } + if (!fixed) { + _set_errno(exc->type); + } + +} diff --git a/private/fp32/tran/i386/87fmod.asm b/private/fp32/tran/i386/87fmod.asm new file mode 100644 index 000000000..c8ffd1d98 --- /dev/null +++ b/private/fp32/tran/i386/87fmod.asm @@ -0,0 +1,123 @@ + page ,132 + title 87fmod - fmod function +;*** +;87fmod.asm - fmod function (8087/emulator version) +; +; Copyright (c) 1984-90, Microsoft Corporation +; +;Purpose: +; Implements fmod() library function for computing floating-point +; remainder. Uses FPREM 8087 instruction or its emulated equivalent. +; +;Revision History: +; 12-09-84 GFW Added copyright message +; 05-12-85 GFW Changed fmod(x,0) = 0 for System V.2 compatibility +; 10-15-86 GFW In-line instructions rather than call _fpmath +; 05-08-87 BCM Added intrinsic version (_CIfmod) +; 10-12-87 BCM OS/2 support C library changes +; Including pascal naming and calling conv. & no _fac +; 01-18-88 BCM Eliminated IBMC20 switches; ifos2,noos2 ==> ifmt,nomt +; OS2_SUPPORT ==> MTHREAD +; 08-26-88 WAJ 386 version. +; 08-17-90 WAJ Now uses _stdcall. +; 05-17-91 WAJ Added _STDCALL ifdefs. +; 03-04-92 GDP Changed behavior for INF args +; 03-22-92 GDP Fixed bug: removed fxch out of the remloop +; +;******************************************************************************* + +.xlist + include cruntime.inc + include mrt386.inc + include elem87.inc +.list + + .data + +jmptab OP_FMOD,4,<'fmod',0,0>,<0,0,0,0,0,0>,2 + DNCPTR codeoffset fFMOD ; 0000 NOS Valid non-0, TOS Valid non-0 + DNCPTR codeoffset _rtindfpop ; 0001 NOS Valid non-0, TOS 0 + DNCPTR codeoffset _tosnan2 ; 0010 NOS Valid non-0, TOS NAN + DNCPTR codeoffset _rtindfpop ; 0011 NOS Valid non-0, TOS Inf + DNCPTR codeoffset _rtzeropop ; 0100 NOS 0, TOS Valid non-0 + DNCPTR codeoffset _rtindfpop ; 0101 NOS 0, TOS 0 + DNCPTR codeoffset _tosnan2 ; 0110 NOS 0, TOS NAN + DNCPTR codeoffset _rtindfpop ; 0111 NOS 0, TOS Inf + DNCPTR codeoffset _nosnan2 ; 1000 NOS NAN, TOS Valid non-0 + DNCPTR codeoffset _nosnan2 ; 1001 NOS NAN, TOS 0 + DNCPTR codeoffset _nan2 ; 1010 NOS NAN, TOS NAN + DNCPTR codeoffset _nosnan2 ; 1011 NOS NAN, TOS Inf + DNCPTR codeoffset _rtindfpop ; 1100 NOS Inf, TOS Valid non-0 + DNCPTR codeoffset _rtindfpop ; 1101 NOS Inf, TOS 0 + DNCPTR codeoffset _tosnan2 ; 1110 NOS Inf, TOS NAN + DNCPTR codeoffset _rtindfpop ; 1111 NOS Inf, TOS Inf + + +page + + CODESEG + +extrn _ctrandisp2:near +extrn _cintrindisp2:near + + +extrn _rtindfpop:near +extrn _rtnospop:near +extrn _rtzeropop:near +extrn _tosnan2:near +extrn _nosnan2:near +extrn _nan2:near + + +;*** +;fFMOD - floating-point remainder (8087/emulator intrinsic version) +;Purpose: +; fmod(x,y) computes floating-point remainder of x/y, i.e. the +; floating-point number f such that x = iy + f where f and x have +; the same sign, and |f| < |y|, and i is an integer. +; +; Uses the FPREM instruction to compute the remainder. +; (Formerly used FDIV.) +; +;Entry: +; floating-point numerator in ST(1) +; floating-point denominator in ST(0) +; +;Exit: +; floating-point result in ST(0); +; (pops one of the arguments, replaces the other with the result) +; +;Uses: +; AX, Flags. +; +;Exceptions: +; fmod(x, 0.0) currently returns 0.0 -- see System V specification +;******************************************************************************* + + +labelP fmod, PUBLIC + mov rdx, dataOFFSET _OP_FMODjmptab + jmp _ctrandisp2 + + +labelP _CIfmod, PUBLIC +labelP _FIamod, PUBLIC + mov rdx, dataOFFSET _OP_FMODjmptab + jmp _cintrindisp2 + + +lab fFMOD + fxch + +lab remloop + fprem ; do fprem's until reduction is done + fstsw ax + fwait + sahf ; load fprem flags into flags + JSPE remloop ; not done with range reduction + + fstp st(1) ; get rid of divisor + ret + + +end diff --git a/private/fp32/tran/i386/87fsqrt.asm b/private/fp32/tran/i386/87fsqrt.asm new file mode 100644 index 000000000..e8cb80159 --- /dev/null +++ b/private/fp32/tran/i386/87fsqrt.asm @@ -0,0 +1,38 @@ + page ,132 + title 87fsqrt - FORTRAN interfaces - sqrt +;*** +;87fsqrt.asm - sqrt functions +; +; Copyright (c) 1992-92, Microsoft Corporation +; +;Purpose: +; FORTRAN interfaces for the sqrt function +; +;Revision History: +; 08-18-92 GDP module created (from 87csqrt.asm) +; +;******************************************************************************* + +.xlist + include cruntime.inc + include mrt386.inc + include os2supp.inc + include elem87.inc +.list + + + .data +extrn _OP_SQRTjmptab:word + +page + + CODESEG + +extrn _cintrindisp1:near + +labelP _FIsqrt, PUBLIC + + mov rdx, dataOFFSET _OP_SQRTjmptab + jmp _cintrindisp1 + +end diff --git a/private/fp32/tran/i386/87ftran.asm b/private/fp32/tran/i386/87ftran.asm new file mode 100644 index 000000000..b1125ce62 --- /dev/null +++ b/private/fp32/tran/i386/87ftran.asm @@ -0,0 +1,62 @@ + page ,132 + title 87ctran - FORTRAN interfaces - exp, log, log10, pow +;*** +;87ftran.asm - exp, log, log10, pow functions +; +; Copyright (c) 1984-89, Microsoft Corporation +; +;Purpose: +; FORTRAN interfaces for exp, log, log10, pow functions +; +;Revision History: +; 08-18-92 GDP module created (from 87ctran.asm) +; 11-08-92 GDP enabled _FIfexp, renamed _FIlog10 to _FIalog10 +; +;******************************************************************************* + +.xlist + include cruntime.inc + include mrt386.inc + include os2supp.inc + include elem87.inc +.list + + .data + +extrn _OP_POWjmptab:word +extrn _OP_LOG10jmptab:word +extrn _OP_LOGjmptab:word +extrn _OP_EXPjmptab:word + +page + + CODESEG + +extrn _cintrindisp1:near +extrn _cintrindisp2:near + +labelP _FIfexp, PUBLIC + + mov rdx, dataOFFSET _OP_POWjmptab + jmp _cintrindisp2 + + +labelP _FIlog, PUBLIC + + mov rdx, dataOFFSET _OP_LOGjmptab +lab idisp1 + jmp _cintrindisp1 + + +labelP _FIalog10, PUBLIC + + mov rdx, dataOFFSET _OP_LOG10jmptab + jmp idisp1 + + +labelP _FIexp, PUBLIC + + mov rdx, dataOFFSET _OP_EXPjmptab + jmp idisp1 + +end diff --git a/private/fp32/tran/i386/87ftrig.asm b/private/fp32/tran/i386/87ftrig.asm new file mode 100644 index 000000000..d9c77da8d --- /dev/null +++ b/private/fp32/tran/i386/87ftrig.asm @@ -0,0 +1,54 @@ + page ,132 + title 87ftrig - FORTRAN interfaces - sin, cos, tan +;*** +;87ftrig.asm - trig functions +; +; Copyright (c) 1992-92, Microsoft Corporation +; +;Purpose: +; FORTRAN interfaces for the sin, cos, and tan functions +; +;Revision History: +; 08-18-92 GDP module created (from 87ctrig.asm) +; +;******************************************************************************* + +.xlist + include cruntime.inc + include mrt386.inc + include os2supp.inc + include elem87.inc +.list + + .data +extrn _OP_SINjmptab:word +extrn _OP_COSjmptab:word +extrn _OP_TANjmptab:word + +page + + CODESEG + +extrn _cintrindisp1:near +extrn _cintrindisp2:near + + +labelP _FIsin, PUBLIC + + mov rdx, dataoffset _OP_SINjmptab +itrigdisp: + jmp _cintrindisp1 + + +labelP _FIcos, PUBLIC + + mov rdx, dataoffset _OP_COSjmptab + jmp itrigdisp + + +labelP _FItan, PUBLIC + + mov rdx, dataoffset _OP_TANjmptab + jmp itrigdisp + +end diff --git a/private/fp32/tran/i386/87ftriga.asm b/private/fp32/tran/i386/87ftriga.asm new file mode 100644 index 000000000..78d3c61de --- /dev/null +++ b/private/fp32/tran/i386/87ftriga.asm @@ -0,0 +1,62 @@ + page ,132 + title 87ftriga - FORTRAN interfaces - asin, acos, atan, atan2 +;*** +;87ftriga.asm - inverse trig functions +; +; Copyright (c) 1992-92, Microsoft Corporation +; +;Purpose: +; FORTRAN interfaces for asin, acos, atan, atan2 +; +;Revision History: +; 08-18-92 GDP module created (from 87ctriga.asm) +; +;******************************************************************************* + +.xlist + include cruntime.inc + include mrt386.inc + include os2supp.inc + include elem87.inc +.list + + .data + +extrn _OP_ASINjmptab:word +extrn _OP_ACOSjmptab:word +extrn _OP_ATANjmptab:word +extrn _OP_ATAN2jmptab:word + +page + + CODESEG + +extrn _cintrindisp1:near +extrn _cintrindisp2:near + + +labelP _FIasin, PUBLIC + + mov rdx,dataoffset _OP_ASINjmptab +idisp1: + jmp _cintrindisp1 + + +labelP _FIacos, PUBLIC + + mov rdx, dataoffset _OP_ACOSjmptab + jmp idisp1 + + +labelP _FIatan, PUBLIC + + mov rdx, dataoffset _OP_ATANjmptab + jmp idisp1 + + +labelP _FIatan2, PUBLIC + + mov rdx, dataoffset _OP_ATAN2jmptab + jmp _cintrindisp2 + +end diff --git a/private/fp32/tran/i386/87ftrigh.asm b/private/fp32/tran/i386/87ftrigh.asm new file mode 100644 index 000000000..2c4fa0f66 --- /dev/null +++ b/private/fp32/tran/i386/87ftrigh.asm @@ -0,0 +1,55 @@ + page ,132 + title 87ftrigh - FORTRAN interfaces - sinh, cosh, tanh +;*** +;87ftrigh.asm - hyperbolic trig functions +; +; Copyright (c) 1992-92 Microsoft Corporation +; +;Purpose: +; FORTRAN interfaces for sinh, cosh, tanh functions +; +;Revision History: +; 08-18-92 GDP module created (from 87ctrigh.asm) +; +;******************************************************************************* + +.xlist + include cruntime.inc + include mrt386.inc + include os2supp.inc + include elem87.inc +.list + + .data + +extrn _OP_SINHjmptab:word +extrn _OP_COSHjmptab:word +extrn _OP_TANHjmptab:word + +page + + CODESEG + +extrn _cintrindisp1:near +extrn _cintrindisp2:near + + +labelP _FIsinh, PUBLIC + + mov rdx, dataoffset _OP_SINHjmptab +idisp1: + jmp _cintrindisp1 + + +labelP _FIcosh, PUBLIC + + mov rdx, dataoffset _OP_COSHjmptab + jmp idisp1 + + +labelP _FItanh, PUBLIC + + mov rdx, dataoffset _OP_TANHjmptab + jmp idisp1 + +end diff --git a/private/fp32/tran/i386/87sqrt.asm b/private/fp32/tran/i386/87sqrt.asm new file mode 100644 index 000000000..fafaba156 --- /dev/null +++ b/private/fp32/tran/i386/87sqrt.asm @@ -0,0 +1,70 @@ + page ,132 + title 87sqrt - square root - SQRT +;*** +;87sqrt.asm - common square root support (80x87/emulator version) +; +; Copyright (c) 1984-88, Microsoft Corporation +; +;Purpose: +; Common support for the sqrt function (80x87/emulator version) +; +;Revision History: +; 07-04-84 GFW initial version +; 10-26-87 BCM minor changes for new cmacros.inc +; 08-24-88 WAJ 386 version +; +;******************************************************************************* + +.xlist + include cruntime.inc + include mrt386.inc + include elem87.inc +.list + + + .data + +jmptab OP_SQRT,4,<'sqrt',0,0>,<0,0,0,0,0,0>,1 + DNCPTR codeoffset fFSQRT + DNCPTR codeoffset _rtforsqrtzero + DNCPTR codeoffset _tosnan1 + DNCPTR codeoffset _rtforsqrtinf + +page + + CODESEG + +extrn _rtindfnpop:near +extrn _rttosnpopde:near +extrn _rtzeronpop:near +extrn _tosnan1:near + +;---------------------------------------------------------- +; +; SQUARE ROOT FUNCTIONS +; +;---------------------------------------------------------- + +lab fFSQRT + or cl,cl ; test sign + JSNZ sqrtindfnpop ; return indefinite if negative + fsqrt ; calculate the square root of TOS + ret + +lab _rtforsqrtinf + or cl,cl ; test sign + JSNZ sqrtindfnpop + ret ; return infinity + +lab _rtforsqrtzero ; zero or denormal + ftst + fstsw ax + fwait + sahf + JSNZ fFSQRT ; denormal operand + ret ; return +0 or -0 (IEEE std) + +lab sqrtindfnpop + jmp _rtindfnpop ; return indefinite + +end diff --git a/private/fp32/tran/i386/87tran.asm b/private/fp32/tran/i386/87tran.asm new file mode 100644 index 000000000..2398a544b --- /dev/null +++ b/private/fp32/tran/i386/87tran.asm @@ -0,0 +1,511 @@ + page ,132 + title 87tran - elementary functions - EXP, LOG, LN, X^Y +;*** +;87tran.asm - elementary functions - EXP, LOG, LN, X^Y +; +; Copyright (c) 1984-88, Microsoft Corporation +; +;Purpose: +; Support for EXP, LOG, LN, X^Y (80x87/emulator version) +; +;Revision History: +; +; 07/04/84 Greg Whitten +; initial version +; +; 07/05/85 Greg Whitten +; support x ^ y where x < 0 and y is an integer +; +; 07/08/85 Greg Whitten +; corrected value of infinity (was a NaN) +; +; 07/26/85 Greg Whitten +; make XENIX version truely System V compatible +; +; 10/31/85 Jamie Bariteau +; made _fFEXP and _fFLN public labels +; +; 05/29/86 Jamie Bariteau +; make pow return values conform to System V and +; ANSI C standards +; +; 09/12/86 Barry McCord +; added FORTRAN specific code to deal +; with zero**nonpositive; +; it requires run-time switching on language +; for mixed-language support +; +; 10/09/86 Barry McCord +; cotan(0.0) ==> SING error (jmp _rtinfnpopse), +; return infinity +; +; 06/11/87 Greg Whitten +; faster transcendental functions +; +; 06/24/87 Barry McCord +; fixed FORTRAN 4.01 bug (bcp #1801) in which +; an expression of the form +; (small positive less than one) ** (large positive) +; was overflowing instead of underflowing to zero +; +; 10/30/87 Bill Johnston +; made changes for os/2 support. +; +; 04/25/88 Bill Johnston +; _cpower is now on stack for MTHREAD +; +; 05/01/88 Bill Johnston +; si was being trashed in MTHREAD +; +; 06/03/88 Bill Johnston +; fixed neg ^ int int MTHREAD case +; +; 08/24/88 Bill Johnston +; 386 version +; +; 11/15/91 Georgios Papagiannakopoulos +; NT port. call _powhlp to handle special cases for pow() +; +; 04/01/91 Georgios Papagiannakopoulos +; fixed special values: log(-INF), log(0), pow(0, neg) +; +; 10/27/92 Steve Salisbury +; Move declaration of _powhlp out of .data declarations +; This fix is required for use with MASM 6.10. +; +; 11/06/92 Georgios Papagiannakopoulos +; changed special return values for NCEG conformance +;******************************************************************************* + +.xlist + include cruntime.inc + include mrt386.inc + include elem87.inc + include os2supp.inc +.list + + .data + +globalT _infinity, 07FFF8000000000000000R +globalT _minfinity, 0FFFF8000000000000000R +globalT _logemax, 0400DB1716685B9D7A7DCR + +staticT _log2max, 0400DFFFF000000000000R +staticT _smallarg, 03FFD95F619980C4336F7R +staticQ _half, 03fe0000000000000R + +SBUFSIZE EQU 108 + +ifndef MTHREAD +staticT _temp, 0 +extrn _cpower:byte +endif + + +jmptab OP_POW,3,<'pow',0,0,0>,<0,0,0,0,0,0>,2 + DNCPTR codeoffset fFYTOX ; 0000 NOS Valid non-0, TOS Valid non-0 + DNCPTR codeoffset _rtforyto0 ; 0001 NOS Valid non-0, TOS 0 + DNCPTR codeoffset _tosnan2 ; 0010 NOS Valid non-0, TOS NAN + DNCPTR codeoffset _usepowhlp ; 0011 NOS Valid non-0, TOS Inf + DNCPTR codeoffset _rtfor0tox ; 0100 NOS 0, TOS Valid non-0 + DNCPTR codeoffset _rtfor0to0 ; 0101 NOS 0, TOS 0 + DNCPTR codeoffset _tosnan2 ; 0110 NOS 0, TOS NAN + DNCPTR codeoffset _usepowhlp ; 0111 NOS 0, TOS Inf + DNCPTR codeoffset _nosnan2 ; 1000 NOS NAN, TOS Valid non-0 + DNCPTR codeoffset _nosnan2 ; 1001 NOS NAN, TOS 0 + DNCPTR codeoffset _nan2 ; 1010 NOS NAN, TOS NAN + DNCPTR codeoffset _nosnan2 ; 1011 NOS NAN, TOS Inf + DNCPTR codeoffset _usepowhlp ; 1100 NOS Inf, TOS Valid non-0 + DNCPTR codeoffset _usepowhlp ; 1101 NOS Inf, TOS 0 + DNCPTR codeoffset _tosnan2 ; 1110 NOS Inf, TOS NAN + DNCPTR codeoffset _usepowhlp ; 1111 NOS Inf, TOS Inf + +jmptab OP_LOG10,5,<'log10',0>,<0,0,0,0,0,0>,1 + DNCPTR codeoffset fFLOGm ; 0000 TOS Valid non-0 + DNCPTR codeoffset _rtforln0 ; 0001 TOS 0 + DNCPTR codeoffset _tosnan1 ; 0010 TOS NAN + DNCPTR codeoffset _rtforloginf ; 0011 TOS Inf + +jmptab OP_LOG,3,<'log',0,0,0>,<0,0,0,0,0,0>,1 + DNCPTR codeoffset fFLNm ; 0000 TOS Valid non-0 + DNCPTR codeoffset _rtforln0 ; 0001 TOS 0 + DNCPTR codeoffset _tosnan1 ; 0010 TOS NAN + DNCPTR codeoffset _rtforloginf ; 0011 TOS Inf + +jmptab OP_EXP,3,<'exp',0,0,0>,<0,0,0,0,0,0>,1 + DNCPTR codeoffset fFEXP ; 0000 TOS Valid non-0 + DNCPTR codeoffset _rtonenpop ; 0001 TOS 0 + DNCPTR codeoffset _tosnan1 ; 0010 TOS NAN + DNCPTR codeoffset _rtforexpinf ; 0011 TOS Inf + +page + + CODESEG + + +extrn _rtindfpop:near +extrn _rtindfnpop:near +extrn _rtnospop:near +extrn _rtonepop:near +extrn _rtonenpop:near +extrn _rttospop:near +extrn _rttosnpop:near +extrn _rttosnpopde:near +extrn _rtzeronpop:near +extrn _tosnan1:near +extrn _tosnan2:near +extrn _nosnan2:near +extrn _nan2:near +extrn _powhlp:proc + +;---------------------------------------------------------- +; +; LOG AND EXPONENTIAL FUNCTIONS +; +;---------------------------------------------------------- +; +; INPUTS - For single argument functions the argument +; is the stack top. For fFYTOX the base +; is next to stack top, the exponent is +; the stack top. +; For single argument functions the sign is +; in bit 2 of CL. For fFYTOX the base +; sign is bit 2 of CH, the exponent +; sign is bit 2 of CL. +; +; OUTPUT - The result is the stack top +; +;---------------------------------------------------------- + +lab fFYTOX + mov DSF.ErrorType, CHECKRANGE ; indicate possible over/under flow on exit + or ch,ch ; base < 0 + JSNZ negYTOX ; check for integer power + fxch ; TOS = base , NOS = exponent + + +lab fFXTOY + fyl2x ; compute y*log2(x) + jmp short fF2X ; compute 2^(y*log2(x)) + + + +;----------------------------------------------- +; +; Entry for exponential function (exp) +; +;----------------------------------------------- + +labelNP _fFEXP, PUBLIC +lab fFEXP + mov DSF.ErrorType, CHECKRANGE ; indicate possible over/under flow on exit + xor ch,ch ; result is always positive + fldl2e + fmul ; convert log base e to log base 2 + +lab fF2X + call _ffexpm1 ; get exponent and (2^fraction)-1 + fld1 + fadd + test CondCode,1 ; if fraction > 0 (TOS > 0) + JSZ ExpNoInvert ; bypass 2^x invert + fld1 + fdivrp st(1),st(0) + +lab ExpNoInvert + test dl,040h ; if integer part was zero + JSNZ ExpScaled ; bypass scaling to avoid bug + fscale ; now TOS = 2^x + +lab ExpScaled + or ch,ch ; check for negate flag + JSZ expret + fchs ; negate result (negreal ^ odd integer) +lab expret + jmp _rttospop + + + +lab negYTOX ; check for negreal ^ integer + call _isintTOS + or eax, eax + JSE negYTOXerror + xor ch,ch + cmp eax, 2 + JSE evenexp + not ch ; ch <> 0 means negative result +lab evenexp + fxch + fabs ; x is positive + jmp fFXTOY ; continue with ch <> 0 for neg result + + +lab _rtfor0to0 + ;cmp [_cpower], 1 ; DISABLED (conform to NCEG spec) + ;JSE c_0to0 ; C requires a DOMAIN error for System V compat. + jmp _rtonepop ; MS FORTRAN has 0.0**0.0 == 1.0 + + +c_0to0:: ; System V needs DOMAIN error with 0.0 return + +lab negYTOXerror +lab Yl2XArgNegative + jmp _rtindfpop ; DOMAIN error or SING error + ; top of stack now has a NAN + ; code in 87cdisp replaces this with + ; proper System V return value + ; (for C only) + ; FORTRAN keeps indefinite value but + ; currently aborts on DOMAIN + ; and SING errors + + +; FORTRAN SING error (return infinity) +; e.g. 0.0**negative +; and cotan(0.0) +; + +labelNP _rtinfpopse, PUBLIC + fstp st(0) + +labelNP _rtinfnpopse, PUBLIC + fstp st(0) + fld tbyte ptr [_infinity] + mov DSF.ErrorType, SING + ret + +labelNP _fFLN, PUBLIC +lab fFLN + fldln2 + fxch + ftst + fstsw DSF.StatusWord + fwait + test CondCode, 041H ; if arg is negative or zero + JSNZ Yl2XArgNegative ; return a NAN + + fyl2x ; compute y*log2(x) + ret + + +;------------------------------------------------------- +; +; Logarithmic functions (log and log 10) entry points +; +;------------------------------------------------------- + +lab _rtforln0 ; (we don't distinguish +0, -0) + mov DSF. ErrorType, SING ; set SING error + fstp st(0) + fld tbyte ptr [_minfinity] + ret + +lab _rtforloginf + or cl, cl ; check sign + JSNZ tranindfnpop ; if negetive return indefinite + ret ; else return +INF + ; no overflow in this case (IEEE) + + + + + +lab fFLOGm + fldlg2 ; main LOG10 entry point + jmp short fFYL2Xm + +lab fFLNm ; main LN entry point + fldln2 + +lab fFYL2Xm + fxch + or cl, cl ; if arg is negative + JSNZ Yl2XArgNegative ; return a NAN + fyl2x ; compute y*log2(x) + ret + +page + +lab _rtforyto0 + jmp _rtonepop ; return 1.0 + + +lab _rtfor0tox + call _isintTOS + fstp st(0) + fstp st(0) + or cl, cl ; if 0^(-valid) + JSNZ _rtfor0toneg ; do more checking + fldz + cmp eax, 1 ; eax has the return value of _isintTOS + JSNE zerotoxdone + or ch, ch + JSE zerotoxdone + fchs +lab zerotoxdone + ret + + +lab _rtfor0toneg + mov DSF.ErrorType, SING + fld tbyte ptr [_infinity] + cmp eax, 1 ; eax has the return value of _isintTOS + JSNE zerotoxdone + or ch, ch + JSE zerotoxdone + fchs + jmp zerotoxdone + + +lab tranzeropop + fstp st(0) ; toss 1 stack entry + +lab tranzeronpop + jmp _rtzeronpop + + +lab tranindfpop + fstp st(0) ; toss 1 stack entry + +lab tranindfnpop + jmp _rtindfnpop + + +lab ExpArgOutOfRange + pop rax ; remove return address from stack + ; We need to check the sign of the + ; exponent to distinguish underflow + ; from overflow. We cannot just check + ; CL directly since for the XtoY case, + ; the exponent is a product of Y*log2(x) + ; and not an original argument that + ; has already been thru FXAM. So, + ; the following instructions were + ; substituted to fix FORTRAN 4.01 + ; bcp #1801) + + ftst ; check if exponent was negative large + fstsw DSF.StatusWord + fwait + test CondCode, 01h ; if valid^(-large) + JSNZ zeronpopue ; underflow error/return zero + fstp st(0) ; else return infinity/overflow + fld [_infinity] + or ch, ch + JSZ _expbigret + fchs +lab _expbigret + ret + +lab zeronpopue + mov DSF.ErrorType, UNDERFLOW + jmp _rtzeronpop + + +labelNP _rtinfpop, PUBLIC + fstp st(0) ; remove ST(0) + +labelNP _rtinfnpop, PUBLIC + fstp st(0) ; remove ST(0) + fld [_infinity] ; push infinity onto stack +lab setOVERFLOW + mov DSF.ErrorType, OVERFLOW ; set OVERFLOW error + ret + + +lab _rtforexpinf + or cl, cl + JSNZ tranzeronpop ; if exp(-infinity) return +zero + fstp st(0) + fld [_infinity] ; return infinity, no overflow + ret + +labelNP _ffexpm1, PUBLIC + fld st(0) ; copy TOS + fabs ; make TOS +ve + fld [_log2max] ; get log2 of largest number + fcompp + fstsw DSF.StatusWord + fwait + test CondCode, 041H ; if abs(arg) >= 2^15-.5 + JSNZ ExpArgOutOfRange ; perform arg out of range routine + fld st(0) ; copy TOS + frndint ; near round to integer + ftst + fstsw DSF.StatusWord + fwait + mov dl, CondCode ; save sign of integer part + fxch ; NOS gets integer part + fsub st,st(1) ; TOS gets fraction + ftst + fstsw DSF.StatusWord ; store sign of fraction + fabs + f2xm1 + ret + +; +; returns 0, 1, 2 if TOS is non-int, odd int or even int respectively +; + +lab _isintTOS + fld st(0) + frndint + fcomp + fstsw ax + sahf + JSNE notanint + fld st(0) ; it is an integer + fmul [_half] + fld st(0) + frndint + fcompp + fstsw ax + sahf + JSE evenint + mov eax, 1 +lab _isintTOSret + ret +lab notanint + mov eax, 0 + jmp _isintTOSret +lab evenint + mov eax, 2 + jmp _isintTOSret + + + + + + +lab _usepowhlp + + push rsi ; save rsi + sub rsp, SBUFSIZE+8 ; get storage for _retval and savebuf + mov rsi, rsp + push rsi ; push address for result + + sub rsp, 8 + fstp qword ptr [rsp] + sub rsp, 8 + fstp qword ptr [rsp] + + fsave [rsi+8] + call _powhlp +ifndef _STDCALL + add esp, 16+ISIZE ; clear arguments if _cdecl. +endif + frstor [rsi+8] + fld qword ptr [rsi] ; load result on the NDP stack + add rsp, SBUFSIZE+8 ; get rid of storage + pop rsi ; restore rsi + + test rax, rax ; check return value for domain error + JSZ noerror + jmp _rttosnpopde + +lab noerror + ret + + + +end diff --git a/private/fp32/tran/i386/87trig.asm b/private/fp32/tran/i386/87trig.asm new file mode 100644 index 000000000..c4d05e2e0 --- /dev/null +++ b/private/fp32/tran/i386/87trig.asm @@ -0,0 +1,127 @@ + page ,132 + title 87trig - trigonometric functions - SIN, COS, TAN +;*** +;87trig.asm - trigonometric functions - SIN, COS, TAN +; +; Copyright (c) 1984-88, Microsoft Corporation +; +;Purpose: +; Support for SIN, COS, TAN +; +;Revision History: +; +; 11-06-91 GDP rewritten for 386 +; +;******************************************************************************* + +.xlist + include cruntime.inc + include mrt386.inc + include elem87.inc +.list + + + .data + +extrn _piby2:tbyte + +staticT _piby4, 03FFEC90FDAA22168C235R ; pi/4 +staticD _plossval, 04D000000R ; 2^27 +staticD _tlossval, 04F000000R ; 2^31 + + +jmptab OP_SIN,3,<'sin',0,0,0>,<0,0,0,0,0,0>,1 + DNCPTR codeoffset fFSIN ; 0000 TOS Valid non-0 + DNCPTR codeoffset _rttosnpop ; 0001 TOS 0 + DNCPTR codeoffset _tosnan1 ; 0010 TOS NAN + DNCPTR codeoffset _rtindfnpop ; 0011 TOS Inf + +jmptab OP_COS,3,<'cos',0,0,0>,<0,0,0,0,0,0>,1 + DNCPTR codeoffset fFCOS ; 0000 TOS Valid non-0 + DNCPTR codeoffset _rtonenpop ; 0001 TOS 0 + DNCPTR codeoffset _tosnan1 ; 0010 TOS NAN + DNCPTR codeoffset _rtindfnpop ; 0011 TOS Inf + +jmptab OP_TAN,3,<'tan',0,0,0>,<0,0,0,0,0,0>,1 + DNCPTR codeoffset fFTAN ; 0000 TOS Valid non-0 + DNCPTR codeoffset _rttosnpop ; 0001 TOS 0 + DNCPTR codeoffset _tosnan1 ; 0010 TOS NAN + DNCPTR codeoffset _rtindfnpop ; 0011 TOS Inf + +;jmptab cotan,5,<'cotan',0>,<0,0,0,0,0,0>,1 +; DNCPTR codeoffset fFCOTAN ; 0000 TOS Valid non-0 +; DNCPTR codeoffset _rtinfnpopse ; 0001 TOS 0 +; DNCPTR codeoffset _tosnan1 ; 0010 TOS NAN +; DNCPTR codeoffset _rtindfnpop ; 0011 TOS Inf + + page + + CODESEG + +extrn _rtindfnpop:near +extrn _rtonenpop:near +extrn _rttosnpop:near +extrn _rtinfnpopse:near +extrn _rttosnpop:near +extrn _rttosnpopde:near +extrn _tosnan1:near + +;---------------------------------------------------------- +; +; FORWARD TRIGONOMETRIC FUNCTIONS +; +;---------------------------------------------------------- +; +; INPUTS - The argument is the stack top. +; The sign of argument is the 04h bit of CL. +; +; OUTPUT - The result is the stack top. +; +;---------------------------------------------------------- + +jmponC2 macro tag + fstsw ax + fwait + sahf + JSP tag + endm + + + +labelNP _fFCOS, PUBLIC +lab fFCOS + fcos + jmponC2 ArgTooLarge + ret + + +labelNP _fFSIN, PUBLIC +lab fFSIN + fsin + jmponC2 ArgTooLarge + ret + + +lab fFTAN + fptan + fstsw ax + fstp st(0) ; pop TOS (fptan pushed an extra value) + sahf + JSP ArgTooLarge + ret + +;lab fFCOTAN +; fptan +; jmponC2 ArgTooLarge +; fld1 +; fdiv +; ret + + +lab ArgTooLarge + mov DSF.ErrorType, TLOSS ; set TLOSS error + jmp _rtindfnpop + + + +end diff --git a/private/fp32/tran/i386/87triga.asm b/private/fp32/tran/i386/87triga.asm new file mode 100644 index 000000000..dcd2af06e --- /dev/null +++ b/private/fp32/tran/i386/87triga.asm @@ -0,0 +1,254 @@ + page ,132 + title 87triga - inverse trigonometric functions - ASIN, ACOS, ATAN +;*** +;87triga.asm - inverse trigonometric functions - ASIN, ACOS, ATAN +; +; Copyright (c) 1984-88, Microsoft Corporation +; +;Purpose: +; Routines for ASIN, ACOS, ATAN +; +;Revision History: +; +; 07/04/84 Greg Whitten +; initial version +; +; 10/01/84 Brad Verheiden +; Fixed bug in _rtforatnby0 which did not remove an +; element from the floating stack +; +; 10/28/85 Jamie Bariteau +; Added comment about inputs to fFATN2, made fFATN2 +; public +; made _fFATN2 and _rtpiby2 public labels +; +; 10/30/87 Bill Johnston +; Minor changes for new cmacros. +; +; 08/25/88 Bill Johnston +; 386 version. +; +; 02/10/92 Georgios Papagiannakopoulos +; NT port -- Bug fix for atan(-INF) +; +; 03/27/92 GDP support underflow +; +; +;******************************************************************************* + +.xlist + include cruntime.inc + include mrt386.inc + include elem87.inc +.list + + + + .data + +extrn _indefinite:tbyte +extrn _piby2:tbyte + +jmptab OP_ASIN,4,<'asin',0,0>,<0,0,0,0,0,0>,1 + DNCPTR codeoffset fFASN ; 0000 TOS Valid non-0 + DNCPTR codeoffset _rttosnpop ; 0001 TOS 0 + DNCPTR codeoffset _tosnan1 ; 0010 TOS NAN + DNCPTR codeoffset _rtindfnpop ; 0011 TOS Inf + +jmptab OP_ACOS,4,<'acos',0,0>,<0,0,0,0,0,0>,1 + DNCPTR codeoffset fFACS ; 0000 TOS Valid non-0 + DNCPTR codeoffset _rtpiby2 ; 0001 TOS 0 + DNCPTR codeoffset _tosnan1 ; 0010 TOS NAN + DNCPTR codeoffset _rtindfnpop ; 0011 TOS Inf + +jmptab OP_ATAN,4,<'atan',0,0>,<0,0,0,0,0,0>,1 + DNCPTR codeoffset fFATN ; 0000 TOS Valid non-0 + DNCPTR codeoffset _rttosnpop ; 0001 TOS 0 + DNCPTR codeoffset _tosnan1 ; 0010 TOS NAN + DNCPTR codeoffset _rtforatninf ; 0011 TOS Inf + +jmptab OP_ATAN2,5,<'atan2',0>,<0,0,0,0,0,0>,2 + DNCPTR codeoffset fFATN2 ; 0000 NOS Valid non-0, TOS Valid non-0 + DNCPTR codeoffset _rtforatnby0 ; 0001 NOS Valid non-0, TOS 0 + DNCPTR codeoffset _tosnan2 ; 0010 NOS Valid non-0, TOS NAN + DNCPTR codeoffset _rtforatn20 ; 0011 NOS Valid non-0, TOS Inf + DNCPTR codeoffset _rtforatn20 ; 0100 NOS 0, TOS Valid non-0 + DNCPTR codeoffset _rtforatn200 ; 0101 NOS 0, TOS 0 + DNCPTR codeoffset _tosnan2 ; 0110 NOS 0, TOS NAN + DNCPTR codeoffset _rtforatn20 ; 0111 NOS 0, TOS Inf + DNCPTR codeoffset _nosnan2 ; 1000 NOS NAN, TOS Valid non-0 + DNCPTR codeoffset _nosnan2 ; 1001 NOS NAN, TOS 0 + DNCPTR codeoffset _nan2 ; 1010 NOS NAN, TOS NAN + DNCPTR codeoffset _nosnan2 ; 1011 NOS NAN, TOS Inf + DNCPTR codeoffset _rtforatnby0 ; 1100 NOS Inf, TOS Valid non-0 + DNCPTR codeoffset _rtforatnby0 ; 1101 NOS Inf, TOS 0 + DNCPTR codeoffset _tosnan2 ; 1110 NOS Inf, TOS NAN + DNCPTR codeoffset _rtindfpop ; 1111 NOS Inf, TOS Inf + +page + + + CODESEG + +extrn _rtchsifneg:near +extrn _rtindfpop:near +extrn _rtindfnpop:near +extrn _rtnospop:near +extrn _rtonenpop:near +extrn _rttospop:near +extrn _rttosnpop:near +extrn _rttosnpopde:near +extrn _rtzeronpop:near +extrn _tosnan1:near +extrn _tosnan2:near +extrn _nosnan2:near +extrn _nan2:near + +;---------------------------------------------------------- +; +; INVERSE TRIGONOMETRIC FUNCTIONS +; +;---------------------------------------------------------- +; +; INPUTS - For single argument functions the argument +; is the stack top. For fFATN2 the numerator +; is next to stack top, the denominator is +; the stack top. +; For single argument functions the sign is +; in bit 2 of CL. For fFATN2 the numerator +; sign is bit 2 of CH, the denominator +; sign is bit 2 of CL. +; +; Note: +; _clog calls fFATN2 with the signs of the arguments +; in bit 0 of CL and CH respectively. This should +; work since fFATN2 tests for sign of numerator and +; denominator by using "or CL,CL" and "or CH,CH" +; +; OUTPUT - The result is the stack top +; +;---------------------------------------------------------- + +lab fFASN + call AugmentSinCos ; num.=arg, den.=sqrt(1-arg^2) + xchg ch, cl ; sign(num.)=sign(arg) + jmp short fFPATAN + + +lab fFACS + call AugmentSinCos ; num.=arg, den.=sqrt(1-arg^2) + fxch ; num.=sqrt(1-arg^2), den.=arg + jmp short fFPATAN + + +lab fFATN + fabs + fld1 ; denominator is 1 + mov ch, cl + xor cl, cl ; sign of denominator is +ve + jmp short fFPATAN + + +labelNP _fFATN2, PUBLIC +lab fFATN2 + mov DSF.ErrorType, CHECKRANGE ; indicate possible over/under flow on exit + fabs + fxch + fabs + fxch + +lab fFPATAN + fpatan ; compute partial arctangent + or cl, cl ; if denominator was +ve + JSZ PatanNumeratorTest ; bypass -ve denominator adjust + fldpi + fsubrp st(1), st(0) ; change Patan to pi - Patan + +lab PatanNumeratorTest + or ch, ch ; if numerator was +ve + JSZ PatanDone ; bypass -ve numerator adjust + fchs ; change Patan to -Patan + +lab PatanDone + ret + +page + +lab AugmentSinCos + fabs ; NOS=x = |input| + fld st(0) ; NOS=x, TOS=x + fld st(0) ; NNOS=x, NOS=x, TOS=x + fld1 ; NNNOS=x, NNOS=x, NOS=x, TOS=1 + fsubrp st(1),st(0) ; NNOS=x, NOS=x, TOS=1-x + fxch ; NNOS=x, NOS=1-x, TOS=x + fld1 ; NNNOS=x, NNOS=1-x, NOS=x, TOS=1 + fadd ; NNOS=x, NOS=1-x, TOS=1+x + fmul ; NOS=x, TOS=1-x^2 + ftst + fstsw DSF.StatusWord + fwait + test CondCode, 1 ; if 1-x^2 < 0 + JSNZ DescriminantNeg ; return a NAN + xor ch, ch ; sign of TOS is +ve + fsqrt ; NOS=x, TOS=sqrt(1-x^2) + ret + +lab DescriminantNeg + pop rax ; remove return address from stack + jmp _rtindfpop ; replace top of stack with a NAN + +page +;---------------------------------------------------------- +; +; SPECIAL CASE RETURN FUNCTIONS +; +;---------------------------------------------------------- +; +; INPUTS - The signs of the last, second to last +; arguments are in CH, CL respectively. +; +; OUTPUT - The result is the stack top. +; +;---------------------------------------------------------- + +labelNP _rtpiby2, PUBLIC + fstp st(0) ; remove ST(0) + fld [_piby2] ; push pi/2 onto stack + ret + + +lab _rtforatn20 + fstp st(0) ; remove ST(0) + or cl ,cl ; if denominator is +ve + JSZ zeronpop ; return zero + fstp st(0) + fldpi ; push pi onto stack + or ch, ch ; if numerator was +ve + JSZ postv + fchs +lab postv + ret + +lab zeronpop + jmp _rtzeronpop ; return 0.0 + + +lab _rtforatn200 +lab indfpop + fstp st(0) ; remove ST(0) +lab indfnpop + jmp _rtindfnpop ; return real indefinite + + +lab _rtforatnby0 + fstp st(0) ; remove an argument before returning + mov cl, ch ; cl is sign(TOS) + jmp short _rtsignpiby2 + + +lab _rtforatninf +lab _rtsignpiby2 + call _rtpiby2 ; push pi/2 onto stack + jmp _rtchsifneg ; return with sign change if negative + +end diff --git a/private/fp32/tran/i386/87trigh.asm b/private/fp32/tran/i386/87trigh.asm new file mode 100644 index 000000000..8d8fe7009 --- /dev/null +++ b/private/fp32/tran/i386/87trigh.asm @@ -0,0 +1,216 @@ + page ,132 + title 87trigh - hyperbolic trigonometric functions - SINH, COSH, TANH +;*** +;87trigh.asm - hyperbolic trigonometric functions - SINH, COSH, TANH +; +; Copyright (c) 1984-88, Microsoft Corporation +; +;Purpose: +; Routines for SINH, COSH, TANH +; +;Revision History: +; +; 07/04/84 Greg Whitten +; initial version +; +; 10/31/85 Jamie Bariteau +; made _fFSINH and _fFCOSH public labels +; +; 10/30/87 Bill Johnston +; Minor changes for new cmacros. +; +; 08/25/88 Bill Johnston +; 386 version. +; +; 02/10/92 Georgios Papagiannakopoulos +; NT port --used CHECKOVER for detection of overflow +; +;******************************************************************************* + +.xlist + include cruntime.inc + include mrt386.inc + include elem87.inc +.list + + .data + +extrn _logemax:tbyte +extrn _infinity:tbyte + +jmptab OP_SINH,4,<'sinh',0,0>,<0,0,0,0,0,0>,1 + DNCPTR codeoffset fFSINH ; 0000 TOS Valid non-0 + DNCPTR codeoffset _rttosnpop ; 0001 TOS 0 + DNCPTR codeoffset _tosnan1 ; 0010 TOS NAN + DNCPTR codeoffset _rtforsnhinf ; 0011 TOS Inf + +jmptab OP_COSH,4,<'cosh',0,0>,<0,0,0,0,0,0>,1 + DNCPTR codeoffset fFCOSH ; 0000 TOS Valid non-0 + DNCPTR codeoffset _rtonenpop ; 0001 TOS 0 + DNCPTR codeoffset _tosnan1 ; 0010 TOS NAN + DNCPTR codeoffset _rtforcshinf ; 0011 TOS Inf + +jmptab OP_TANH,4,<'tanh',0,0>,<0,0,0,0,0,0>,1 + DNCPTR codeoffset fFTANH ; 0000 TOS Valid non-0 + DNCPTR codeoffset _rttosnpop ; 0001 TOS 0 + DNCPTR codeoffset _tosnan1 ; 0010 TOS NAN + DNCPTR codeoffset _rtfortnhinf ; 0011 TOS Inf + +page + + CODESEG + +extrn _ffexpm1:near +extrn _rtchsifneg:near +extrn _rtindfnpop:near +extrn _rtinfnpop:near +extrn _rtonenpop:near +extrn _rttospop:near +extrn _rttosnpop:near +extrn _rttosnpopde:near +extrn _tosnan1:near + +;---------------------------------------------------------- +; +; HYPERBOLIC FUNCTIONS +; +;---------------------------------------------------------- +; +; INPUTS - The argument is the stack top. +; The sign of the argument is bit 2 of CL. +; +; OUTPUT - The result is the stack top +; +;---------------------------------------------------------- + + +labelNP _fFSINH, PUBLIC +lab fFSINH + mov DSF.ErrorType, CHECKOVER ; indicate possible overflow on exit + call fFEXPH ; compute e^x for hyperbolics + or bl, bl ; if e^x is infinite + JSZ _rtforsnhlarge ; return as if x = affine infinity + call ExpHypCopyInv ; TOS = e^(-x), NOS = e^x + fsubp st(1), st(0) ; compute e^x - e^(-x) for hyperbolics + jmp short SinhCoshReturn + + +lab fFTANH + fld st(0) ; copy TOS + fabs ; make TOS +ve + fld [_logemax] ; get ln of largest number + fcompp + fstsw DSF.StatusWord + fwait + test CondCode, 041h ; if abs(arg) >= (2^15-.5)*ln2 + JSNZ _rtfortnhlarge ; return as if x = affine infinity + call fFEXPH ; compute e^x for hyperbolics + or bl, bl ; if e^x is infinite + JSZ _rtfortnhlarge ; return as if x = affine infinity + fld st(0) ; copy TOS + call ExpHypSum ; compute e^x + e^(-x) for hyperbolics + fxch ; get copy of e^x + call ExpHypCopyInv ; TOS = e^(-x), NOS = e^x + fsubp st(1), st(0) ; compute e^x - e^(-x) for hyperbolics + fdivrp st(1), st(0) ; now TOS = tanh(x) + ret + + +labelNP _fFCOSH, PUBLIC +lab fFCOSH + mov DSF.ErrorType, CHECKOVER ; indicate possible overflow on exit + call fFEXPH ; compute e^x for hyperbolics + or bl, bl ; if e^x is infinite + JSZ _rtforcnhlarge ; return as if x = affine infinity + call ExpHypSum ; compute e^x + e^(-x) for hyperbolics + +lab SinhCoshReturn + fld1 + fchs + fxch + fscale ; divide result by 2 + jmp _rttospop + +page + +lab _rtforsnhinf + fstp st(0) + fld [_infinity] + jmp _rtchsifneg ; change sign if argument -ve + +lab _rtforcshinf + fstp st(0) + fld [_infinity] + ret + +lab infpositive + ret + +lab _rtforsnhlarge + call _rtinfnpop ; TOS = infinity + +lab chsifneg + jmp _rtchsifneg ; change sign if argument -ve + + +lab _rtforcnhlarge + jmp _rtinfnpop ; TOS = infinity + + +lab _rtfortnhlarge + mov DSF.ErrorType, INEXACT +lab _rtfortnhinf + call _rtonenpop ; TOS = one + jmp chsifneg ; change sign if argument -ve + +page + +lab fFEXPH + fldl2e + fmul ; convert log base e to log base 2 + xor rbx, rbx ; clear e^x, finite result flags + call _ffexpm1 ; TOS = e^|x|-1 unscaled, NOS = scale + not bl ; set finite result flag + test CondCode, 1 ; if fraction > 0 (TOS > 0) + JSZ ExpHypNoInvert ; bypass e^x-1 invert + call ExpHypCopyInv ; TOS = e^(-x)-1, NOS = e^x-1 + fxch + fstp st(0) ; remove NOS + +lab ExpHypNoInvert + test dl, 040h ; if integer part was zero + JSNZ ExpHypScaled ; bypass scaling to avoid bug + not bh ; set e^x flag + fld1 + fadd ; TOS = e^x unscaled + fscale ; now TOS = e^x + +lab ExpHypScaled + jmp _rttospop ; TOS = e^x-1 or e^x scaled + +lab ExpHypSum + call ExpHypCopyInv ; TOS = e^(-x), NOS = e^x + fadd ; TOS = e^x + e^(-x) + or bh, bh ; if e^x flag set + JSNZ ExpHypSumReturn ; bypass e^x-1 adjust + fld1 + fadd st(1),st + fadd ; add 2 to result + +lab ExpHypSumReturn + ret + +lab ExpHypCopyInv + fld st(0) ; TOS = e^x (or e^x-1) + fld1 ; TOS = 1, NOS = e^x (or e^x-1) + or bh, bh ; if e^x flag set + JSNZ ExpHypCopyInvReturn ; bypass e^x-1 adjust + fadd st, st(1) ; TOS = e^x, NOS = e^x-1 + fchs ; TOS = -e^x, NOS = e^x-1 + fxch ; TOS = e^x-1, NOS = -e^x + +lab ExpHypCopyInvReturn + fdivrp st(1), st(0) ; TOS = e^(-x) (or e^(-x)-1) + ret + +end diff --git a/private/fp32/tran/i386/filter.c b/private/fp32/tran/i386/filter.c new file mode 100644 index 000000000..835e9c776 --- /dev/null +++ b/private/fp32/tran/i386/filter.c @@ -0,0 +1,1403 @@ +/*** +* filter.c - IEEE exception filter routine +* +* Copyright (c) 1992-1992, Microsoft Corporation. All rights reserved. +* +*Purpose: +* +*Revision History: +* 05-24-92 GDP written +* +*******************************************************************************/ + + +#include <trans.h> +#include <nt.h> +#include <assert.h> + + +void _FillOperand( + _FPIEEE_VALUE *pOperand, + PFLOATING_SAVE_AREA pFloatSave, + int location); + +void _UpdateFpCtxt( + PFLOATING_SAVE_AREA pFloatSave, + _FPIEEE_VALUE *pOperand, + int resultLocation, + int pop); + +void _UpdateResult( + PFLOATING_SAVE_AREA pFloatSave, + _FPIEEE_VALUE *pOperand, + int resultLocation); + +void _AdjustStack( + PFLOATING_SAVE_AREA pFloatSave, + int pop); + +int _AdjustLocation( + int location, + int pop); + +int _IsMemoryLocation(int location); + +_FP80 _GetFpRegVal( + PFLOATING_SAVE_AREA pFloatSave, + int location); + +void _SetFpRegVal( + PFLOATING_SAVE_AREA pFloatSave, + int location, + _FP80 *pval); + +void _SetTag( + ULONG *pTagWord, + int reg, + int value); + +static _FP80 _zero80 = { 0, 0, 0, 0, 0 }; + + + +// +// Define macros for IEEE scaling +// These should be called with all exceptions masked +// + + +#define SCALE(Operand, adj) \ + _asm{fild adj} \ + _asm{fld tbyte ptr Operand} \ + _asm{fscale} \ + _asm{fstp st(1)} \ + _asm{fstp tbyte ptr Operand} + + + +#define FP80_TO_FP64(p64, p80) \ + _asm{fld tbyte ptr p80} \ + _asm{fstp qword ptr p64} + + +#define FP80_TO_FP32(p32, p80) \ + _asm {fld tbyte ptr p80} \ + _asm{fstp dword ptr p32} + + +static int const _ieee_adj_single = 192; +static int const _ieee_adj_double = 1536; + + + +// +// Location codes +// +// +// By convention the first eight location codes contain the number of +// a floating point register, i.e., ST0 through ST7 have the values +// 0 to 7 respectively. The other codes have arbitrary values: +// +// CODE MEANING +// STi (0<=i<8) Floating point stack location ST(i) +// REG FP stack location is in the REG field of the instruction +// RS FP status register +// M16I Memory location (16bit int) +// M32I Memory location (32bit int) +// M64I Memory location (64bit int) +// M32R Memory location (32bit real) +// M64R Memory location (64bit real) +// M80R Memory location (80bit real) +// M80D Memory location (80bit packed decimal) +// Z80R Implied Zero Operand +// INV Invalid, unavailable, or unused +// + +#define ST0 0x00 +#define ST1 0x01 +#define ST2 0x02 +#define ST3 0x03 +#define ST4 0x04 +#define ST5 0x05 +#define ST6 0x06 +#define ST7 0x07 +#define REG 0x08 +#define RS 0x09 +#define M16I 0x0a +#define M32I 0x0b +#define M64I 0x0c +#define M32R 0x0d +#define M64R 0x0e +#define M80R 0x0f +#define M80D 0x10 +#define Z80R 0x11 +#define INV 0x1f + + + +// +// Define masks for instruction decoding +// x87 instruction form: +// ------------------------------------------------- +// | | | op | | | +// | MOD | OPCODE2 | or REG| 1 1 0 1 1 | OPCODE1| +// |or op | | or R/M| (ESC) | | +// ------------------------------------------------- +// |<-2-->|<---3---->|<--3-->|<---5------>|<--3--->| + +#define MASK_OPCODE2 0x3800 +#define MASK_REG 0x0700 +#define MASK_MOD 0xc000 + + +#define ESC_PREFIX 0xd8 +#define MASK_OPCODE1 0x07 + + +typedef struct { + ULONG Opcode1:3; + ULONG Escape:5; + ULONG Reg:3; + ULONG Opcode2:3; + ULONG Mod:2; + ULONG Pad:16; +} X87INSTR, *PX87INSTR; + + +// define masks for C3,C2,C0 in fp status word + +#define C3 (1 << 14) +#define C2 (1 << 10) +#define C0 (1 << 8) + +typedef struct { + ULONG Invalid:1; + ULONG Denormal:1; + ULONG ZeroDivide:1; + ULONG Overflow:1; + ULONG Underflow:1; + ULONG Inexact:1; + ULONG StackFault:1; + ULONG ErrorSummary:1; + ULONG CC0:1; + ULONG CC1:1; + ULONG CC2:1; + ULONG Top:3; + ULONG CC3:1; + ULONG B:1; + ULONG Pad:16; +} X87STATUS, *PX87STATUS; + + +// +// Define Tag word values +// + +#define TAG_VALID 0x0 +#define TAG_ZERO 0x1 +#define TAG_SPECIAL 0x2 +#define TAG_EMPTY 0x3 + + + +// Sanitize status word macro + +#define SANITIZE_STATUS_WORD(pFSave) (pFSave->StatusWord &= ~0xff) + + + + +// +// Instruction Information structure +// + +typedef struct { + unsigned long Operation:12; // Fp Operation code + unsigned long Op1Location:5; // Location of 1st operand + unsigned long Op2Location:5; // Location of 2nd operand + unsigned long ResultLocation:5; // Location of result + int PopStack:3; // # of pops done by the instruction + // (if <0 implies a push) + unsigned long NumArgs:2; // # of args to the instruction +} INSTR_INFO, *PINSTR_INFO; + + +// +// The following table contains instruction information for most +// of the x87 instructions. It is indexed with a 7-bit code (3 last +// bits of 1st byte of the instruction (OPCODE1), 1 bit that +// indicates the presence of a MOD field and 3 bits for OPCODE2. +// Reserved instructions, instructions that are not generated by the +// compiler, and some of the instructions that do not raise IEEE +// exceptions have OP_UNSPEC (unspecified) as Operation code +// + +// By convention FLD instructions and some others (FXTRACT, FSINCOS) +// have a negative pop value (i.e., they push the stack instead of +// popping it). In that case the location code specifies the register +// number after pushing the stack + + +INSTR_INFO instr_info_table[128] = { + + {OP_ADD, ST0, M32R, ST0, 0, 2 }, // FADD single real + {OP_MUL, ST0, M32R, ST0, 0, 2 }, // FMUL single real + {OP_COMP, ST0, M32R, RS, 0, 2 }, // FCOM single real + {OP_COMP, ST0, M32R, RS, 1, 2 }, // FCOMP single real + {OP_SUB, ST0, M32R, ST0, 0, 2 }, // FSUB single real + {OP_SUB, M32R, ST0, ST0, 0, 2 }, // FSUBR single real + {OP_DIV, ST0, M32R, ST0, 0, 2 }, // FDIV single real + {OP_DIV, M32R, ST0, ST0, 0, 2 }, // FDIVR single real + + {OP_ADD, ST0, REG, ST0, 0, 2 }, // FADD ST, ST(i) + {OP_MUL, ST0, REG, ST0, 0, 2 }, // FMUL ST, ST(i) + {OP_COMP, ST0, REG, RS, 0, 2 }, // FCOM ST, ST(i) + {OP_COMP, ST0, REG, RS, 1, 2 }, // FCOMP ST, ST(i) + {OP_SUB, ST0, REG, ST0, 0, 2 }, // FSUB ST, ST(i) + {OP_SUB, ST0, REG, ST0, 0, 2 }, // FSUBR ST, ST(i) + {OP_DIV, ST0, REG, ST0, 0, 2 }, // FDIV ST, ST(i) + {OP_DIV, ST0, REG, ST0, 0, 2 }, // FDIVR ST, ST(i) + + {OP_CVT, M32R, INV, ST0, -1, 1 }, // FLD single real + {OP_UNSPEC,0, 0, 0, 0, 0 }, // reserved + {OP_CVT, ST0, INV, M32R, 0, 1 }, // FST single real + {OP_CVT, ST0, INV, M32R, 1, 1 }, // FSTP single real + {OP_UNSPEC,0, 0, 0, 0, 0 }, // FLDENV + {OP_UNSPEC,0, 0, 0, 0, 0 }, // FLDCW + {OP_UNSPEC,0, 0, 0, 0, 0 }, // FSTENV + {OP_UNSPEC,0, 0, 0, 0, 0 }, // FSTCW + + {OP_CVT, REG, INV, ST0, -1, 1 }, // FLD ST(i) + {OP_UNSPEC,0, 0, 0, 0, 0 }, // FXCH + {OP_UNSPEC,0, 0, 0, 0, 0 }, // FNOP or reserved + {OP_UNSPEC,0, 0, 0, 0, 0 }, // reserved + {OP_COMP, ST0, Z80R, RS, 0, 2 }, // FTST (only this may raise IEEE exc) + {OP_UNSPEC,0, 0, 0, 0, 0 }, // FLDxx (no IEEE exceptions) + {OP_UNSPEC,0, 0, 0, 0, 0 }, // instructions not generated by cl386 + {OP_UNSPEC,0, 0, 0, 0, 0 }, // instructions not generated by cl386 + + {OP_ADD, ST0, M32I, ST0, 0, 2 }, // FIADD short integer + {OP_MUL, ST0, M32I, ST0, 0, 2 }, // FIMUL short integer + {OP_COMP, ST0, M32I, RS, 0, 2 }, // FICOM short integer + {OP_COMP, ST0, M32I, RS, 1, 2 }, // FICOMP short integer + {OP_SUB, ST0, M32I, ST0, 0, 2 }, // FISUB short integer + {OP_SUB, M32I, ST0, ST0, 0, 2 }, // FISUBR short integer + {OP_DIV, ST0, M32I, ST0, 0, 2 }, // FIDIV short integer + {OP_DIV, M32I, ST0, ST0, 0, 2 }, // FIDIVR short integer + + {OP_UNSPEC,0, 0, 0, 0, 0 }, // reserved + {OP_UNSPEC,0, 0, 0, 0, 0 }, // reserved + {OP_UNSPEC,0, 0, 0, 0, 0 }, // reserved + {OP_UNSPEC,0, 0, 0, 0, 0 }, // reserved + {OP_UNSPEC,0, 0, 0, 0, 0 }, // reserved + {OP_COMP, ST0, ST1, RS, 2, 2 }, // FUCOMPP + {OP_UNSPEC,0, 0, 0, 0, 0 }, // reserved + {OP_UNSPEC,0, 0, 0, 0, 0 }, // reserved + + {OP_CVT, M32I, INV, ST0, -1, 1 }, // FILD short integer + {OP_UNSPEC,0, 0, 0, 0, 0 }, // reserved + {OP_CVT, ST0, INV, M32I, 0, 1 }, // FIST short integer + {OP_CVT, ST0, INV, M32I, 1, 1 }, // FISTP short integer + {OP_UNSPEC,0, 0, 0, 0, 0 }, // reserved + {OP_CVT, M80R, INV, ST0, -1, 1 }, // FLD extended real + {OP_UNSPEC,0, 0, 0, 0, 0 }, // reserved + {OP_CVT, ST0, INV, M80R, 1, 1 }, // FSTP extended real + + {OP_UNSPEC,0, 0, 0, 0, 0 }, // reserved + {OP_UNSPEC,0, 0, 0, 0, 0 }, // reserved + {OP_UNSPEC,0, 0, 0, 0, 0 }, // reserved + {OP_UNSPEC,0, 0, 0, 0, 0 }, // reserved + {OP_UNSPEC,0, 0, 0, 0, 0 }, // FCLEX, FINIT, or reserved + {OP_UNSPEC,0, 0, 0, 0, 0 }, // reserved + {OP_UNSPEC,0, 0, 0, 0, 0 }, // reserved + {OP_UNSPEC,0, 0, 0, 0, 0 }, // reserved + + {OP_ADD, ST0, M64R, ST0, 0, 2 }, // FADD double real + {OP_MUL, ST0, M64R, ST0, 0, 2 }, // FMUL double real + {OP_COMP, ST0, M64R, RS, 0, 2 }, // FCOM double real + {OP_COMP, ST0, M64R, RS, 1, 2 }, // FCOMP double real + {OP_SUB, ST0, M64R, ST0, 0, 2 }, // FSUB double real + {OP_SUB, M64R, ST0, ST0, 0, 2 }, // FSUBR double real + {OP_DIV, ST0, M64R, ST0, 0, 2 }, // FDIV double real + {OP_DIV, M64R, ST0, ST0, 0, 2 }, // FDIVR double real + + {OP_ADD, REG, ST0, REG, 0, 2 }, // FADD ST(i), ST + {OP_MUL, REG, ST0, REG, 0, 2 }, // FMUL ST(i), ST + {OP_UNSPEC,0, 0, 0, 0, 0 }, // reserved + {OP_UNSPEC,0, 0, 0, 0, 0 }, // reserved + {OP_SUB, REG, ST0, REG, 0, 2 }, // FSUBR ST(i), ST + {OP_SUB, ST0, REG, REG, 0, 2 }, // FSUB ST(i), ST + {OP_DIV, REG, ST0, REG, 0, 2 }, // FDIVR ST(i), ST + {OP_DIV, ST0, REG, REG, 0, 2 }, // FDIV ST(i), ST + + {OP_CVT, M64R, INV, ST0, -1, 1 }, // FLD double real + {OP_UNSPEC,0, 0, 0, 0, 0 }, // reserved + {OP_CVT, ST0, INV, M64R, 0, 1 }, // FST double real + {OP_CVT, ST0, INV, M64R, 1, 1 }, // FSTP double real + {OP_UNSPEC,0, 0, 0, 0, 0 }, // FRSTOR + {OP_UNSPEC,0, 0, 0, 0, 0 }, // reserved + {OP_UNSPEC,0, 0, 0, 0, 0 }, // FSAVE + {OP_UNSPEC,0, 0, 0, 0, 0 }, // FSTSW + + {OP_UNSPEC,0, 0, 0, 0, 0 }, // FFREE ST(i) + {OP_UNSPEC,0, 0, 0, 0, 0 }, // reserved + {OP_CVT, ST0, INV, REG, 0, 1 }, // FST ST(i) + {OP_CVT, ST0, INV, REG, 1, 1 }, // FSTP ST(i) + {OP_COMP, ST0, REG, RS, 0, 2 }, // FUCOM ST(i) + {OP_COMP, ST0, REG, RS, 1, 2 }, // FUCOMP ST(i) + {OP_UNSPEC,0, 0, 0, 0, 0 }, // reserved + {OP_UNSPEC,0, 0, 0, 0, 0 }, // reserved + + {OP_ADD, ST0, M16I, ST0, 0, 2 }, // FIADD word integer + {OP_MUL, ST0, M16I, ST0, 0, 2 }, // FIMUL word integer + {OP_COMP, ST0, M16I, RS, 0, 2 }, // FICOM word integer + {OP_COMP, ST0, M16I, RS, 1, 2 }, // FICOMP word integer + {OP_SUB, ST0, M16I, ST0, 0, 2 }, // FISUB word integer + {OP_SUB, M16I, ST0, ST0, 0, 2 }, // FISUBR word integer + {OP_DIV, ST0, M16I, ST0, 0, 2 }, // FIDIV word integer + {OP_DIV, M16I, ST0, ST0, 0, 2 }, // FIDIVR word integer + + {OP_ADD, REG, ST0, REG, 1, 2 }, // FADDP ST(i), ST + {OP_MUL, REG, ST0, REG, 1, 2 }, // FMULP ST(i), ST + {OP_UNSPEC,0, 0, 0, 0, 0 }, // reserved + {OP_COMP, ST0, ST1, RS, 2, 0 }, // FCOMPP (or reserved) + {OP_SUB, REG, ST0, REG, 1, 2 }, // FSUBRP ST(i), ST + {OP_SUB, ST0, REG, REG, 1, 2 }, // FSUBP ST(i), ST + {OP_DIV, REG, ST0, REG, 1, 2 }, // FDIVRP ST(i), ST + {OP_DIV, ST0, REG, REG, 1, 2 }, // FDIVP ST(i), ST + + {OP_CVT, M16I, INV, ST0, -1, 1 }, // FILD word integer + {OP_UNSPEC,0, 0, 0, 0, 0 }, // reserved + {OP_CVT, ST0, INV, M16I, 0, 1 }, // FIST word integer + {OP_CVT, ST0, INV, M16I, 1, 1 }, // FISTP word integer + {OP_CVT, M80D, INV, ST0, -1, 0 }, // FBLD packed decimal + {OP_CVT, M64I, INV, ST0, -1, 1 }, // FILD long integer + {OP_CVT, ST0, INV, M80D, 1, 1 }, // FBSTP packed decimal + {OP_CVT, ST0, INV, M64I, 1, 1 }, // FISTP long integer + + {OP_UNSPEC,0, 0, 0, 0, 0 }, // reserved + {OP_UNSPEC,0, 0, 0, 0, 0 }, // reserved + {OP_UNSPEC,0, 0, 0, 0, 0 }, // reserved + {OP_UNSPEC,0, 0, 0, 0, 0 }, // reserved + {OP_UNSPEC,0, 0, 0, 0, 0 }, // FSTSW AX or reserved + {OP_UNSPEC,0, 0, 0, 0, 0 }, // reserved + {OP_UNSPEC,0, 0, 0, 0, 0 }, // reserved + {OP_UNSPEC,0, 0, 0, 0, 0 }, // reserved +}; + + + + + + +/*** +* _fpieee_flt - IEEE fp filter routine +* +*Purpose: +* Invokes the user's trap handler on IEEE fp exceptions and provides +* it with all necessary information +* +*Entry: +* unsigned long exc_code: the NT exception code +* PEXCEPTION_POINTERS p: a pointer to the NT EXCEPTION_POINTERS struct +* int handler (_FPIEEE_RECORD *): a user supplied ieee trap handler +* +* Note: The IEEE filter routine does not handle some transcendental +* instructions. This can be done at the cost of additional decoding. +* Since the compiler does not generate these instructions, no portable +* program should be affected by this fact. +* +*Exit: +* returns the value returned by handler +* +*Exceptions: +* +*******************************************************************************/ +int _fpieee_flt(unsigned long exc_code, + PEXCEPTION_POINTERS p, + int handler (_FPIEEE_RECORD *)) +{ + PEXCEPTION_RECORD pexc; + PCONTEXT pctxt; + PFLOATING_SAVE_AREA pFloatSave; + _FPIEEE_RECORD ieee; + ULONG *pinfo; + X87INSTR instr; + PINSTR_INFO ptable; + int ret, index; + int mod; + ULONG cw, sw; + + ULONG op1Location, op2Location, resultLocation; + ULONG newOp1Location, newOp2Location, newResultLocation; + + + + // + // If the exception is not an IEEE exception, continue search + // for another handler + // + + + if (exc_code != STATUS_FLOAT_DIVIDE_BY_ZERO && + exc_code != STATUS_FLOAT_INEXACT_RESULT && + exc_code != STATUS_FLOAT_INVALID_OPERATION && + exc_code != STATUS_FLOAT_OVERFLOW && + exc_code != STATUS_FLOAT_UNDERFLOW) { + + return EXCEPTION_CONTINUE_SEARCH; + } + + + + pexc = p->ExceptionRecord; + pinfo = pexc->ExceptionInformation; + pctxt = p->ContextRecord; + pFloatSave = &pctxt->FloatSave; + + + _asm{fninit} + + + // + // Check for software generated exception + // + // By convention the first argument to the exception is + // 0 for h/w exception. For s/w exceptions it points + // to the _FPIEEE_RECORD + // + + if (pinfo[0]) { + + /* + * we have a software exception: + * the first parameter points to the IEEE structure + */ + + if ((ret = handler((_FPIEEE_RECORD *)(pinfo[0]))) == + EXCEPTION_CONTINUE_EXECUTION) { + + // + // Sanitize status word only if there is continuation + // + + SANITIZE_STATUS_WORD(pFloatSave); + } + + return ret; + } + + + // + // If control reaches here, then we have to deal with a + // hardware exception + // + + + // + // If the first byte of the instruction does not contain + // the ESCAPE bit pattern (1101) there may be an instruction + // prefix for segment override or address size. The filter + // routine does not handle this. + // + + if ((*(UCHAR *)(pFloatSave->ErrorOffset)&~MASK_OPCODE1) != ESC_PREFIX) { + + assert(0); + return EXCEPTION_CONTINUE_SEARCH; + } + + *(USHORT *)&instr = *(USHORT *)(pFloatSave->ErrorOffset); + + mod = instr.Mod == 0x3 ? 1 : 0; + index = instr.Opcode1 << 4 | mod << 3 | instr.Opcode2; + ptable = instr_info_table + index; + + ieee.Operation = ptable->Operation; + + + cw = pFloatSave->ControlWord; + sw = pFloatSave->StatusWord; + + + + // + // decode fp environment information + // + + + switch (cw & IMCW_RC) { + case IRC_NEAR: + ieee.RoundingMode = _FpRoundNearest; + break; + + case IRC_DOWN: + ieee.RoundingMode = _FpRoundMinusInfinity; + break; + + case IRC_UP: + ieee.RoundingMode = _FpRoundPlusInfinity; + break; + + case IRC_CHOP: + ieee.RoundingMode = _FpRoundChopped; + break; + } + + switch (cw & IMCW_PC) { + case IPC_64: + ieee.Precision = _FpPrecisionFull; + break; + case IPC_53: + ieee.Precision = _FpPrecision53; + break; + case IPC_24: + ieee.Precision = _FpPrecision24; + break; + } + + ieee.Enable.Inexact = cw & IEM_INEXACT ? 0 : 1; + ieee.Enable.Underflow = cw & IEM_UNDERFLOW ? 0 : 1; + ieee.Enable.Overflow = cw & IEM_OVERFLOW ? 0 : 1; + ieee.Enable.ZeroDivide = cw & IEM_ZERODIVIDE ? 0 : 1; + ieee.Enable.InvalidOperation = cw & IEM_INVALID ? 0 : 1; + + ieee.Status.Inexact = sw & ISW_INEXACT ? 1 : 0; + ieee.Status.Underflow = sw & ISW_UNDERFLOW ? 1 : 0; + ieee.Status.Overflow = sw & ISW_OVERFLOW ? 1 : 0; + ieee.Status.ZeroDivide = sw & ISW_ZERODIVIDE ? 1 : 0; + ieee.Status.InvalidOperation = sw & ISW_INVALID ? 1 : 0; + + ieee.Cause.Inexact = ieee.Enable.Inexact && ieee.Status.Inexact; + ieee.Cause.Underflow = ieee.Enable.Underflow && ieee.Status.Underflow; + ieee.Cause.Overflow = ieee.Enable.Overflow && ieee.Status.Overflow; + ieee.Cause.ZeroDivide = ieee.Enable.ZeroDivide && ieee.Status.ZeroDivide; + ieee.Cause.InvalidOperation = ieee.Enable.InvalidOperation && ieee.Status.InvalidOperation; + + // + // If location is REG, the register number is + // encoded in the instruction + // + + op1Location = ptable->Op1Location == REG ? + instr.Reg : + ptable->Op1Location; + + + op2Location = ptable->Op2Location == REG ? + instr.Reg : + ptable->Op2Location; + + resultLocation = ptable->ResultLocation == REG ? + instr.Reg : + ptable->ResultLocation; + + + switch (exc_code) { + case STATUS_FLOAT_INVALID_OPERATION: + case STATUS_FLOAT_DIVIDE_BY_ZERO: + + // + // Invalid Operation and Divide by zero are detected + // before the operation begins; therefore the NPX + // register stack and memory have not been updated + // + + _FillOperand(&ieee.Operand1, pFloatSave, op1Location); + _FillOperand(&ieee.Operand2, pFloatSave, op2Location); + + _FillOperand(&ieee.Result, pFloatSave, resultLocation); + + // + // The previous call was only good for setting the + // result Format. Since the + // operation has not begun yet, the result location + // may contain an incorrect value. + // For this reason, set OperandValid to 0 + // + + ieee.Result.OperandValid = 0; + + + if ((ret = handler (&ieee)) == EXCEPTION_CONTINUE_EXECUTION) { + + _UpdateFpCtxt(pFloatSave, + &ieee.Result, + resultLocation, + ptable->PopStack); + } + + break; + + + case STATUS_FLOAT_OVERFLOW: + case STATUS_FLOAT_UNDERFLOW: + + // + // Overflow and Underflow exception + // A result has already been computed and the stack has + // been adjusted, unless the destination is memory (FST instruction) + // + + if (_IsMemoryLocation(ptable->ResultLocation)) { + _FP80 tmp; + _FP32 ftmp; + _FP64 dtmp; + + int adj; + + // + // FST(P) instruction (takes only one argument) + // + + _FillOperand(&ieee.Operand1, pFloatSave, op1Location); + tmp = _GetFpRegVal(pFloatSave, 0); + + ieee.Result.OperandValid = 1; + + if (resultLocation == M32R) { + ieee.Result.Format = _FpFormatFp32; + adj = _ieee_adj_single; + } + else { + ieee.Result.Format = _FpFormatFp64; + adj = _ieee_adj_double; + } + + if (exc_code == STATUS_FLOAT_OVERFLOW) { + adj = -adj; + } + + SCALE(tmp, adj) + + if (resultLocation == M32R){ + FP80_TO_FP32(ftmp,tmp) + ieee.Result.Value.Fp32Value = ftmp; + } + else { + FP80_TO_FP64(dtmp,tmp) + ieee.Result.Value.Fp64Value = dtmp; + } + _asm{fnclex} + + + if ((ret = handler (&ieee)) == EXCEPTION_CONTINUE_EXECUTION) { + + _UpdateFpCtxt(pFloatSave, + &ieee.Result, + resultLocation, + ptable->PopStack); + } + + break; + } + + + // NO BREAK + + case STATUS_FLOAT_INEXACT_RESULT: + + // + // Stack has already been adjusted, so we should compute + // the new location of operands and result + // + + + newOp1Location = _AdjustLocation(op1Location, ptable->PopStack); + newOp2Location = _AdjustLocation(op2Location, ptable->PopStack); + newResultLocation = _AdjustLocation(resultLocation, ptable->PopStack); + + if (newOp1Location == newResultLocation) + newOp1Location = INV; + + if (newOp2Location == newResultLocation) + newOp2Location = INV; + + _FillOperand(&ieee.Result, pFloatSave, newResultLocation); + _FillOperand(&ieee.Operand1, pFloatSave, newOp1Location); + _FillOperand(&ieee.Operand2, pFloatSave, newOp2Location); + + + if ((ret = handler (&ieee)) == EXCEPTION_CONTINUE_EXECUTION) { + + _UpdateFpCtxt(pFloatSave, &ieee.Result, newResultLocation, 0); + + // + // no need to adjust the stack + // + } + + break; + } + + if (ret == EXCEPTION_CONTINUE_EXECUTION) { + + + SANITIZE_STATUS_WORD(pFloatSave); + + + // + // make fp control word changes take effect on continuation + // + + cw = pFloatSave->ControlWord; + + switch (ieee.RoundingMode) { + case _FpRoundNearest: + cw = cw & ~ IMCW_RC | IRC_NEAR & IMCW_RC; + break; + case _FpRoundMinusInfinity: + cw = cw & ~ IMCW_RC | IRC_DOWN & IMCW_RC; + break; + case _FpRoundPlusInfinity: + cw = cw & ~ IMCW_RC | IRC_UP & IMCW_RC; + break; + case _FpRoundChopped: + cw = cw & ~ IMCW_RC | IRC_CHOP & IMCW_RC; + break; + } + switch (ieee.Precision) { + case _FpPrecisionFull: + cw = cw & ~ IMCW_PC | IPC_64 & IMCW_PC; + break; + case _FpPrecision53: + cw = cw & ~ IMCW_PC | IPC_53 & IMCW_PC; + break; + case _FpPrecision24: + cw = cw & ~ IMCW_PC | IPC_24 & IMCW_PC; + break; + } + + ieee.Enable.Inexact ? (cw &= ~IEM_INEXACT) + : (cw |= IEM_INEXACT); + ieee.Enable.Underflow ? (cw &= ~IEM_UNDERFLOW) + : (cw |= IEM_UNDERFLOW); + ieee.Enable.Overflow ? (cw &= ~IEM_OVERFLOW) + : (cw |= IEM_OVERFLOW); + ieee.Enable.ZeroDivide ? (cw &= ~IEM_ZERODIVIDE) + : (cw |= IEM_ZERODIVIDE); + ieee.Enable.InvalidOperation ? (cw &= ~IEM_INVALID) + : (cw |= IEM_INVALID); + + pFloatSave->ControlWord = cw; + + + } + + + return ret; +} + + + + + +/*** +* _FillOperand - Fill in operand information +* +*Purpose: +* Fill in a _FPIEEE_VALUE record based on the information found in +* the floating point context and the location code +* +* +*Entry: +* _FPIEEE_VALUE *pOperand pointer to the operand to be filled in +* PFLOATING_SAVE_AREA pFloatSave pointer to the floating point context +* int location location code of the operand +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + + + +void _FillOperand( + _FPIEEE_VALUE *pOperand, + PFLOATING_SAVE_AREA pFloatSave, + int location) +{ + int c0,c2,c3; + + // + // Assume valid operand (this is almost always the case) + // + + pOperand->OperandValid = 1; + + + switch (location) { + case ST0: + case ST1: + case ST2: + case ST3: + case ST4: + case ST5: + case ST6: + case ST7: + + // + // By convention the location code contains the number of the + // floating point register + // + + pOperand->Format = _FpFormatFp80; + pOperand->Value.Fp80Value = _GetFpRegVal(pFloatSave, location); + break; + + case M80R: + pOperand->Format = _FpFormatFp80; + pOperand->Value.Fp80Value = *(_FP80 *)(pFloatSave->DataOffset); + break; + + case M16I: + pOperand->Format = _FpFormatI16; + pOperand->Value.I16Value = *(_I16 *)(pFloatSave->DataOffset); + break; + + case M32I: + pOperand->Format = _FpFormatI32; + pOperand->Value.I32Value = *(_I32 *)(pFloatSave->DataOffset); + break; + + case M64I: + pOperand->Format = _FpFormatI64; + pOperand->Value.I64Value = *(_I64 *)(pFloatSave->DataOffset); + break; + + case M64R: + pOperand->Format = _FpFormatFp64; + pOperand->Value.Fp64Value = *(_FP64 *)(pFloatSave->DataOffset); + break; + + case M32R: + pOperand->Format = _FpFormatFp32; + pOperand->Value.Fp32Value = *(_FP32 *)(pFloatSave->DataOffset); + break; + + case M80D: + pOperand->Format = _FpFormatBcd80; + pOperand->Value.Bcd80Value = *(_BCD80 *)(pFloatSave->DataOffset); + break; + + // + // Status register is used only for comparison instructions + // therefore the format should be _FpFormatCompare + // + + case RS: + pOperand->Format = _FpFormatCompare; + c0 = pFloatSave->StatusWord & C0 ? (1<<0) : 0; + c2 = pFloatSave->StatusWord & C2 ? (1<<2) : 0; + c3 = pFloatSave->StatusWord & C0 ? (1<<3) : 0; + + switch(c0 | c2 | c3) { + case 0x000: + + // ST > SRC + + pOperand->Value.CompareValue = _FpCompareGreater; + break; + + case 0x001: + + // ST < SRC + + pOperand->Value.CompareValue = _FpCompareLess; + break; + + case 0x100: + + // ST = SRC + + pOperand->Value.CompareValue = _FpCompareEqual; + break; + + default: + + pOperand->Value.CompareValue = _FpCompareUnordered; + break; + } + + break; + + + case Z80R: + pOperand->Format = _FpFormatFp80; + pOperand->Value.Fp80Value = _zero80; + break; + + case INV: + + pOperand->OperandValid = 0; + break; + + + case REG: + + // + // Control should never reach here. REG should have already + // been replaced with a code that corresponds to the register + // encoded in the instruction + + assert(0); + pOperand->OperandValid = 0; + break; + + } +} + + + + +/*** +* _UpdateFpCtxt - Update fp context +* +*Purpose: +* Copy the operand information to the snapshot of the floating point +* context or memory, as to make it available on continuation and +* adjust the fp stack accordingly +* +* +*Entry: +* +* PFLOATING_SAVE_AREA pFloatSave pointer to the floating point context +* _FPIEEE_VALUE *pOperand pointer to source operand +* int location location code for destination in the +* floating point context +* int pop # of times the stack should be popped +* (if negative, the stack is pushed) +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +void _UpdateFpCtxt( + PFLOATING_SAVE_AREA pFloatSave, + _FPIEEE_VALUE *pOperand, + int location, + int pop) +{ + if (pop < 0) { + _AdjustStack(pFloatSave, pop); + } + + _UpdateResult(pFloatSave, pOperand, location); + + if (pop > 0) { + _AdjustStack(pFloatSave, pop); + } +} + + + + +/*** +* _UpdateResult - Update result information in the fp context +* +*Purpose: +* Copy the operand information to the snapshot of the floating point +* context or memory, as to make it available on continuation +* +*Entry: +* +* PFLOATING_SAVE_AREA pFloatSave pointer to the floating point context +* _FPIEEE_VALUE *pOperand pointer to source operand +* int location) location code for destination in the +* floating point context +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +void _UpdateResult( + PFLOATING_SAVE_AREA pFloatSave, + _FPIEEE_VALUE *pOperand, + int location) +{ + + switch (location) { + case ST0: + case ST1: + case ST2: + case ST3: + case ST4: + case ST5: + case ST6: + case ST7: + + // + // By convention the location code contains the number of the + // floating point register + // + + _SetFpRegVal(pFloatSave,location,&pOperand->Value.Fp80Value); + break; + + case M80R: + *(_FP80 *)(pFloatSave->DataOffset) = pOperand->Value.Fp80Value; + break; + + case M16I: + *(_I16 *)(pFloatSave->DataOffset) = pOperand->Value.I16Value; + break; + + case M32I: + *(_I32 *)(pFloatSave->DataOffset) = pOperand->Value.I32Value; + break; + + case M64I: + *(_I64 *)(pFloatSave->DataOffset) = pOperand->Value.I64Value; + break; + + case M64R: + *(_FP64 *)(pFloatSave->DataOffset) = pOperand->Value.Fp64Value; + break; + + case M32R: + *(_FP32 *)(pFloatSave->DataOffset) = pOperand->Value.Fp32Value; + break; + + case M80D: + *(_BCD80 *)(pFloatSave->DataOffset) = pOperand->Value.Bcd80Value; + break; + + // + // Status register is used only for comparison instructions + // therefore the format should be _FpFormatCompare + // + + case RS: + switch (pOperand->Value.CompareValue) { + case _FpCompareEqual: + // C3,C2,C0 <- 100 + pFloatSave->StatusWord |= C3; + pFloatSave->StatusWord &= (~C2 & ~C0); + break; + case _FpCompareGreater: + // C3,C2,C0 <- 000 + pFloatSave->StatusWord &= (~C3 & ~C2 & ~C0); + break; + case _FpCompareLess: + // C3,C2,C0 <- 001 + pFloatSave->StatusWord |= C0; + pFloatSave->StatusWord &= (~C3 & ~C2); + break; + case _FpCompareUnordered: + // C3,C2,C0 <- 111 + pFloatSave->StatusWord |= (C3 | C2 | C0); + break; + } + + + case INV: + + break; + + case REG: + case Z80R: + + // + // Control should never reach here. REG should have already + // been replaced with a code that corresponds to the register + // encoded in the instruction + + assert(0); + break; + + } +} + + + + +/*** +* _AdjustStack - +* +*Purpose: +* Pop (or push) the image of the fp stack in the fp context +* +*Entry: +* PFLOATING_SAVE_AREA pFloatSaveArea: pointer to the fp context +* int pop: Number of times to pop the stack +* (if pop<0 stack should be pushed once) +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + + +void _AdjustStack( + PFLOATING_SAVE_AREA pFloatSave, + int pop) +{ + PX87STATUS pStatus; + int i; + + pStatus = (PX87STATUS) &pFloatSave->StatusWord; + + if (pop > 0) { + + // stack should be popped + + for (i=0; i<pop; i++) { + + // + // mark register as invalid + // + + _SetTag(&pFloatSave->TagWord, pStatus->Top, TAG_EMPTY); + + pStatus->Top++; + } + + } + + else if (pop < 0) { + + // stack should be pushed once (e.g., fsincos, fxtract) + + // + // mark register as valid + // + + pStatus->Top--; + + _SetTag(&pFloatSave->TagWord, pStatus->Top, TAG_VALID); + + } +} + + + +/*** +* _AdjustLocation - +* +*Purpose: +* Modify location code based on stack adjustment +* +*Entry: +* int location: old location code +* int pop: stack adjustment factor (>0 for pop, <0 for push) +* +*Exit: +* returns new location code +* +*Exceptions: +* +*******************************************************************************/ + + +int _AdjustLocation(int location, int pop) +{ + + int newlocation; + + switch (location) { + case ST0: + case ST1: + case ST2: + case ST3: + case ST4: + case ST5: + case ST6: + case ST7: + + newlocation = location - pop; + if (newlocation < 0 || newlocation > 7) { + newlocation = INV; + } + break; + + default: + newlocation = location; + } + + return newlocation; + +} + + +/*** +* _IsMemoryLocation - +* +*Purpose: +* Returns true if the location code specifies a memory location, +* otherwise it returns false. +* +* +*Entry: +* int location: location code +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +int _IsMemoryLocation(int location) +{ + switch (location) { + case M80R: + case M16I: + case M32I: + case M64I: + case M64R: + case M32R: + case M80D: + return 1; + } + + return 0; + +} + + + + + + + +/*** +* _GetFpRegVal - Get floating point register value +* +*Purpose: +* Return the value of the floating point register ST(stacklocation) +* found in the saved floating point context +* +*Entry: +* PFLOATING_SAVE_AREA pFloatSave floating point context +* int stackLocation location of register relative to stack top +* +*Exit: +* returns the register value in _FP80 format +* +*Exceptions: +* +*******************************************************************************/ + +_FP80 _GetFpRegVal( + PFLOATING_SAVE_AREA pFloatSave, + int stackLocation) +{ + PX87STATUS pStatus; + int n; + + pStatus = (PX87STATUS) &pFloatSave->StatusWord; + + n = pStatus->Top+stackLocation; + + if (n>=0 && n<8) + return *((_FP80 *)(pFloatSave->RegisterArea)+7-n); + else + return _zero80; +} + + + +/*** +* _SetFpRegVal - Set floating point register value +* +*Purpose: +* Set the value of the floating point register ST(stacklocation) +* found in the saved floating point context +* +*Entry: +* PFLOATING_SAVE_AREA pFloatSave floating point context +* int stackLocation location of register relative to stack top +* _FP80 *pval pointer to the new value +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +void _SetFpRegVal( + PFLOATING_SAVE_AREA pFloatSave, + int stackLocation, + _FP80 *pval) +{ + PX87STATUS pStatus; + int n; + int tag; + + pStatus = (PX87STATUS) &pFloatSave->StatusWord; + + n = pStatus->Top+stackLocation; + + if (n>=0 && n<8) { + *((_FP80 *)(pFloatSave->RegisterArea)+7-n) = *pval; + + // + // Update tag word + // + + switch (pval->W[4] & 0x7fff) { // check value of the exponent + + case 0: + if (*(ULONG *)pval == 0 && *((ULONG *)pval+1) == 0) { + // zero + tag = TAG_ZERO; + } + else { + // denormal or invalid + tag = TAG_SPECIAL; + } + break; + + + case 0x7fff: + // infinity or NaN + tag = TAG_SPECIAL; + break; + + default: + // valid + tag = TAG_VALID; + } + + _SetTag(&pFloatSave->TagWord, n, tag); + } +} + + + +/*** +* _SetTag - +* +*Purpose: +* Set tag of register 'reg' in tag word to 'value' +* +* +*Entry: +* ULONG *pTagWord pointer to the tagword to be modified +* int reg absolute register number (NOT relative to stack top) +* int value new tag value (empty, valid, zero, special) +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +void _SetTag( + ULONG *pTagWord, + int reg, + int value) +{ + ULONG mask; + int shift; + + shift = reg << 1; + mask = 0x3 << shift; + value <<= shift; + + *pTagWord = *pTagWord & ~mask | value & mask; +} diff --git a/private/fp32/tran/i386/fpctrl.c b/private/fp32/tran/i386/fpctrl.c new file mode 100644 index 000000000..cb467db07 --- /dev/null +++ b/private/fp32/tran/i386/fpctrl.c @@ -0,0 +1,166 @@ +/*** +*fpctrl.c - fp low level control and status routines +* +* Copyright (c) 1985-92, Microsoft Corporation +* +*Purpose: +* IEEE control and status routines for internal use. +* These routines use machine specific constants while _controlfp, +* _statusfp, and _clearfp use an abstracted control/status word +* +*Revision History: +* +* 03-31-92 GDP written +* 05-12-92 GJF Rewrote fdivr as fdivrp st(1),st to work around C8-32 +* assertions. +* +*/ + +#include <trans.h> + +/*** _statfp +*() - +* +*Purpose: +* return user status word +* +*Entry: +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +unsigned int _statfp() +{ + short status; + + _asm { + fstsw status + } + return status; +} + +/*** _clrfp +*() - +* +*Purpose: +* return user status word and clear status +* +*Entry: +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +unsigned int _clrfp() +{ + short status; + + _asm { + fnstsw status + fnclex + } + return status; +} + + +/*** _ctrlfp +*() - +* +*Purpose: +* return and set user control word +* +*Entry: +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +unsigned int _ctrlfp(unsigned int newctrl, unsigned int _mask) +{ + short oldCw; + short newCw; + + _asm { + fstcw oldCw + } + newCw = (short) ((newctrl & _mask) | (oldCw & ~_mask)); + + _asm { + fldcw newCw + } + return oldCw; +} + + + +/*** _set_statfp +*() - +* +*Purpose: +* force selected exception flags to 1 +* +*Entry: +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +static unsigned long over[3] = { 0x0, 0x80000000, 0x4410 }; +static unsigned long under[3] = { 0x1, 0x80000000, 0x3000 }; + + +void _set_statfp(unsigned int sw) +{ + int itmp; + double tmp; + + if (sw & ISW_INVALID) { + _asm { + fld tbyte ptr over + fistp itmp + fwait + } + } + if (sw & ISW_OVERFLOW) { // will also trigger precision + _asm { + fstsw ax + fld tbyte ptr over + fstp tmp + fwait + fstsw ax + } + } + if (sw & ISW_UNDERFLOW) { // will also trigger precision + _asm { + fld tbyte ptr under + fstp tmp + fwait + } + } + if (sw & ISW_ZERODIVIDE) { + _asm { + fldz + fld1 + fdivrp st(1), st + fstp st(0) + fwait + } + } + if (sw & ISW_INEXACT) { + _asm { + fldpi + fstp tmp + fwait + } + } + +} diff --git a/private/fp32/tran/i386/frnd.c b/private/fp32/tran/i386/frnd.c new file mode 100644 index 000000000..c4a330e0f --- /dev/null +++ b/private/fp32/tran/i386/frnd.c @@ -0,0 +1,44 @@ +/*** +*frnd.c - +* +* Copyright (c) 1991-91, Microsoft Corporation +* +*Purpose: +* +* +*Revision History: +* +* 10-20-91 GDP written +*/ + +/*** +*double _frnd(double x) - round to integer +* +*Purpose: +* Round to integer according to the current rounding mode. +* NaN's or infinities are NOT handled +* +*Entry: +* +*Exit: +* +*Exceptions: +*******************************************************************************/ + + +double _frnd(double x) +{ + double result; + +#if defined i386 || defined _X86SEG_ + _asm { + fld x + frndint + fstp result + } +#else + #error Only 386 platform supported +#endif + + return result; +} diff --git a/private/fp32/tran/i386/fsqrt.c b/private/fp32/tran/i386/fsqrt.c new file mode 100644 index 000000000..7b4904d0b --- /dev/null +++ b/private/fp32/tran/i386/fsqrt.c @@ -0,0 +1,23 @@ +/*** +*fsqrt.c - square root helper +* +* Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* Square root helper routine to be used with the i386 +* +*Revision History: +* 10-20-91 GDP written +* +*******************************************************************************/ + +double _fsqrt(double x) +{ + double result; + _asm{ + fld x + fsqrt + fstp result + } + return result; +} diff --git a/private/fp32/tran/i386/ftol.asm b/private/fp32/tran/i386/ftol.asm new file mode 100644 index 000000000..afb2b88d3 --- /dev/null +++ b/private/fp32/tran/i386/ftol.asm @@ -0,0 +1,60 @@ + page ,132 + title 87ftol - truncate TOS to 32-bit integer +;*** +;87ftol.asm - truncate TOS to 32-bit integer +; +; Copyright (c) 1985-88, Microsoft Corporation +; +;Purpose: +; +;Revision History: +; +; 07/16/85 Greg Whitten +; save BX and CX for sloppy code generator +; 10/15/86 Greg Whitten +; in-line instructions rather than call _fpmath +; 08/24/87 Barry McCord +; expand the functionality of _ftol to handle +; unsigned long by using "fistp qword ptr" +; 11/24/87 Barry McCord +; added _loadds under ifdef DLL +; +; 08/26/88 Bill Johnston +; 386 version +; +;******************************************************************************* + + +.xlist + include i386\cruntime.inc + include i386\mrt386.inc + include i386\os2supp.inc +.list + + CODESEG + + LabelP __ftol +_ftol proc + + local oldcw:word + local newcw:word + local intval:qword + + fstcw [oldcw] ; get control word + fwait ; synchronize + + mov ax, [oldcw] ; round mode saved + or ah, 0ch ; set chop rounding mode + mov [newcw], ax ; back to memory + + fldcw [newcw] ; reset rounding + fistp qword ptr [intval] ; store chopped integer + fldcw [oldcw] ; restore rounding + + mov rax, IWORD ptr [intval] + mov rdx, IWORD ptr [intval+ISIZE] + + ret +_ftol endp + +end diff --git a/private/fp32/tran/i386/huge.asm b/private/fp32/tran/i386/huge.asm new file mode 100644 index 000000000..d6009ed55 --- /dev/null +++ b/private/fp32/tran/i386/huge.asm @@ -0,0 +1,53 @@ + page ,132 + title HUGE - HUGE value +;*** +;huge.asm - defines HUGE +; +; Copyright (c) 1984-88, Microsoft Corporation +; +;Purpose: +; defines HUGE +; +;Revision History: +; +; 07/04/84 Greg Whitten +; initial version +; +; 12/21/84 Greg Whitten +; add assumes so that C can find variable +; +; 09/23/87 Barry C. McCord +; add _matherr_flag for the sake of the +; C floating-point intrinsic functions +; +; 08/29/88 Bill Johinston +; 386 version +; +; 08/27/91 JeffRob +; ANSI naming +; +; 09/06/91 GeorgioP +; define HUGE as positive infinity +; +; 09/06/91 GeorgioP +; define _HUGE_dll +; +;******************************************************************************* + + +.xlist + include i386\cruntime.inc + include i386\mrt386.inc +.list + + + .data + +ifdef CRTDLL +globalQ _HUGE_dll, 7ff0000000000000R +else +globalQ _HUGE, 7ff0000000000000R +endif + + +end diff --git a/private/fp32/tran/i386/ieee.c b/private/fp32/tran/i386/ieee.c new file mode 100644 index 000000000..ba61e5996 --- /dev/null +++ b/private/fp32/tran/i386/ieee.c @@ -0,0 +1,302 @@ +/*** +*ieee.c - ieee control and status routines +* +* Copyright (c) 1985-91, Microsoft Corporation +* +*Purpose: +* IEEE control and status routines. +* +*Revision History: +* +* 04-01-02 GDP Rewritten to use abstract control and status words +* +*/ + +#include <trans.h> +#include <float.h> + +static unsigned int _abstract_sw(unsigned short sw); +static unsigned int _abstract_cw(unsigned short cw); +static unsigned short _hw_cw(unsigned int abstr); + + + +/*** +* _statusfp() - +* +*Purpose: +* return abstract fp status word +* +*Entry: +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +unsigned int _statusfp() +{ + short status; + + _asm { + fstsw status + } + return _abstract_sw(status); +} + + +/*** +*_clearfp() - +* +*Purpose: +* return abstract status word and clear status +* +*Entry: +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +unsigned int _clearfp() +{ + short status; + + _asm { + fnstsw status + fnclex + } + + return _abstract_sw(status); +} + + + +/*** _controlfp +*() - +* +*Purpose: +* return and set abstract user fp control word +* +*Entry: +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +unsigned int _controlfp(unsigned int newctrl, unsigned int mask) +{ + short oldCw; + short newCw; + unsigned int oldabs; + unsigned int newabs; + + _asm { + fstcw oldCw + } + + oldabs = _abstract_cw(oldCw); + + newabs = (newctrl & mask) | (oldabs & ~mask); + + newCw = _hw_cw(newabs); + + _asm { + fldcw newCw + } + return newabs; +} /* _controlfp() */ + + +/*** +* _abstract_cw() - abstract control word +* +*Purpose: +* produce a fp control word in abstracted (machine independent) form +* +*Entry: +* cw: machine control word +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +unsigned int _abstract_cw(unsigned short cw) +{ + unsigned int abstr = 0; + + + // + // Set exception mask bits + // + + if (cw & IEM_INVALID) + abstr |= _EM_INVALID; + if (cw & IEM_ZERODIVIDE) + abstr |= _EM_ZERODIVIDE; + if (cw & IEM_OVERFLOW) + abstr |= _EM_OVERFLOW; + if (cw & IEM_UNDERFLOW) + abstr |= _EM_UNDERFLOW; + if (cw & IEM_INEXACT) + abstr |= _EM_INEXACT; + + // + // Set rounding mode + // + + switch (cw & IMCW_RC) { + case IRC_NEAR: + abstr |= _RC_NEAR; + break; + case IRC_UP: + abstr |= _RC_UP; + break; + case IRC_DOWN: + abstr |= _RC_DOWN; + break; + case IRC_CHOP: + abstr |= _RC_CHOP; + break; + } + + // + // Set Precision mode + // + + switch (cw & IMCW_PC) { + case IPC_64: + abstr |= _PC_64; + break; + case IPC_53: + abstr |= _PC_53; + break; + case IPC_24: + abstr |= _PC_24; + break; + } + + return abstr; +} + + +/*** +* _hw_cw() - h/w control word +* +*Purpose: +* produce a machine dependent fp control word +* +* +*Entry: +* abstr: abstract control word +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +unsigned short _hw_cw(unsigned int abstr) +{ + // + // Set standard infinity and denormal control bits + // + + unsigned short cw = 0x1002; + + // + // Set exception mask bits + // + + if (abstr & _EM_INVALID) + cw |= IEM_INVALID; + if (abstr & _EM_ZERODIVIDE) + cw |= IEM_ZERODIVIDE; + if (abstr & _EM_OVERFLOW) + cw |= IEM_OVERFLOW; + if (abstr & _EM_UNDERFLOW) + cw |= IEM_UNDERFLOW; + if (abstr & _EM_INEXACT) + cw |= IEM_INEXACT; + + // + // Set rounding mode + // + + switch (abstr & _MCW_RC) { + case _RC_NEAR: + cw |= IRC_NEAR; + break; + case _RC_UP: + cw |= IRC_UP; + break; + case _RC_DOWN: + cw |= IRC_DOWN; + break; + case _RC_CHOP: + cw |= IRC_CHOP; + break; + } + + // + // Set Precision mode + // + + switch (abstr & _MCW_PC) { + case _PC_64: + cw |= IPC_64; + break; + case _PC_53: + cw |= IPC_53; + break; + case _PC_24: + cw |= IPC_24; + break; + } + + + return cw; +} + + + +/*** +* _abstract_sw() - abstract fp status word +* +*Purpose: +* produce an abstract (machine independent) fp status word +* +* +*Entry: +* sw: machine status word +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +unsigned int _abstract_sw(unsigned short sw) +{ + unsigned int abstr = 0; + + + if (sw & ISW_INVALID) + abstr |= _EM_INVALID; + if (sw & ISW_ZERODIVIDE) + abstr |= _EM_ZERODIVIDE; + if (sw & ISW_OVERFLOW) + abstr |= _EM_OVERFLOW; + if (sw & ISW_UNDERFLOW) + abstr |= _EM_UNDERFLOW; + if (sw & ISW_INEXACT) + abstr |= _EM_INEXACT; + + return abstr; +} diff --git a/private/fp32/tran/i386/ieee87.c b/private/fp32/tran/i386/ieee87.c new file mode 100644 index 000000000..2df3f6102 --- /dev/null +++ b/private/fp32/tran/i386/ieee87.c @@ -0,0 +1,397 @@ +/*** +*ieee.c - ieee control and status routines +* +* Copyright (c) 1985-91, Microsoft Corporation +* +*Purpose: +* IEEE control and status routines. +* +*Revision History: +* +* 04-01-02 GDP Rewritten to use abstract control and status words +* 10-30-92 GDP _fpreset now resets saved fp context if called from a +* signal handler. +* +*/ + +#include <trans.h> +#include <float.h> +#include <nt.h> +#include <signal.h> + +static unsigned int _abstract_sw(unsigned short sw); +static unsigned int _abstract_cw(unsigned short cw); +static unsigned short _hw_cw(unsigned int abstr); + + + +/*** +* _statusfp() - +* +*Purpose: +* return abstract fp status word +* +*Entry: +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +unsigned int _statusfp() +{ + short status; + + _asm { + fstsw status + } + return _abstract_sw(status); +} + + +/*** +*_clearfp() - +* +*Purpose: +* return abstract status word and clear status +* +*Entry: +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +unsigned int _clearfp() +{ + short status; + + _asm { + fnstsw status + fnclex + } + + return _abstract_sw(status); +} + + + +/*** _control87 +*() - +* +*Purpose: +* return and set abstract user fp control word +* can modify EM_DENORMAL mask +* +*Entry: +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +unsigned int _control87(unsigned int newctrl, unsigned int mask) +{ + short oldCw; + short newCw; + unsigned int oldabs; + unsigned int newabs; + + _asm { + fstcw oldCw + } + + oldabs = _abstract_cw(oldCw); + + newabs = (newctrl & mask) | (oldabs & ~mask); + + newCw = _hw_cw(newabs); + + _asm { + fldcw newCw + } + return newabs; +} /* _controlfp() */ + + +/*** _controlfp +*() - +* +*Purpose: +* return and set abstract user fp control word +* cannot change denormal mask (ignores _EM_DENORMAL) +* This is done for portable IEEE behavior on all platforms +* +*Entry: +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +unsigned int _controlfp(unsigned int newctrl, unsigned int mask) +{ + return _control87(newctrl, mask & ~_EM_DENORMAL); +} + + + + +/*** +* _fpreset() - reset fp system +* +*Purpose: +* reset fp environment to the default state +* Also reset saved fp environment if invoked from a user's +* signal handler +* +*Entry: +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ +#define TAG_ALL_EMPTY ((unsigned int) 0xffff); + +void _fpreset() +{ + PEXCEPTION_POINTERS excptrs = (PEXCEPTION_POINTERS) _pxcptinfoptrs; + + _asm { + fninit + } + if (excptrs && + excptrs->ContextRecord->ContextFlags & CONTEXT_FLOATING_POINT) { + // _fpreset has been invoked by a signal handler which in turn + // has been invoked by the CRT filter routine. In this case + // the saved fp context should be cleared, so that the change take + // effect on continuation. + + FLOATING_SAVE_AREA *pFloatSave = &excptrs->ContextRecord->FloatSave; + pFloatSave->StatusWord = 0; + pFloatSave->TagWord = TAG_ALL_EMPTY; + } +} + + + + + + + +/*** +* _abstract_cw() - abstract control word +* +*Purpose: +* produce a fp control word in abstracted (machine independent) form +* +*Entry: +* cw: machine control word +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +unsigned int _abstract_cw(unsigned short cw) +{ + unsigned int abstr = 0; + + + // + // Set exception mask bits + // + + if (cw & IEM_INVALID) + abstr |= _EM_INVALID; + if (cw & IEM_ZERODIVIDE) + abstr |= _EM_ZERODIVIDE; + if (cw & IEM_OVERFLOW) + abstr |= _EM_OVERFLOW; + if (cw & IEM_UNDERFLOW) + abstr |= _EM_UNDERFLOW; + if (cw & IEM_INEXACT) + abstr |= _EM_INEXACT; + if (cw & IEM_DENORMAL) + abstr |= _EM_DENORMAL; + + // + // Set rounding mode + // + + switch (cw & IMCW_RC) { + case IRC_NEAR: + abstr |= _RC_NEAR; + break; + case IRC_UP: + abstr |= _RC_UP; + break; + case IRC_DOWN: + abstr |= _RC_DOWN; + break; + case IRC_CHOP: + abstr |= _RC_CHOP; + break; + } + + // + // Set Precision mode + // + + switch (cw & IMCW_PC) { + case IPC_64: + abstr |= _PC_64; + break; + case IPC_53: + abstr |= _PC_53; + break; + case IPC_24: + abstr |= _PC_24; + break; + } + + + // + // Infinity control (bit can be programmed but has no effect) + // + + if (cw & IMCW_IC) { + abstr |= _IC_AFFINE; + } + + return abstr; +} + + +/*** +* _hw_cw() - h/w control word +* +*Purpose: +* produce a machine dependent fp control word +* +* +*Entry: +* abstr: abstract control word +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +unsigned short _hw_cw(unsigned int abstr) +{ + // + // Set standard infinity and denormal control bits + // + + unsigned short cw = 0; + + // + // Set exception mask bits + // + + if (abstr & _EM_INVALID) + cw |= IEM_INVALID; + if (abstr & _EM_ZERODIVIDE) + cw |= IEM_ZERODIVIDE; + if (abstr & _EM_OVERFLOW) + cw |= IEM_OVERFLOW; + if (abstr & _EM_UNDERFLOW) + cw |= IEM_UNDERFLOW; + if (abstr & _EM_INEXACT) + cw |= IEM_INEXACT; + if (abstr & _EM_DENORMAL) + cw |= IEM_DENORMAL; + + // + // Set rounding mode + // + + switch (abstr & _MCW_RC) { + case _RC_NEAR: + cw |= IRC_NEAR; + break; + case _RC_UP: + cw |= IRC_UP; + break; + case _RC_DOWN: + cw |= IRC_DOWN; + break; + case _RC_CHOP: + cw |= IRC_CHOP; + break; + } + + // + // Set Precision mode + // + + switch (abstr & _MCW_PC) { + case _PC_64: + cw |= IPC_64; + break; + case _PC_53: + cw |= IPC_53; + break; + case _PC_24: + cw |= IPC_24; + break; + } + + + // + // Set Infinity mode + // + + if (abstr & _MCW_IC) { + cw |= IIC_AFFINE; + } + + return cw; +} + + + +/*** +* _abstract_sw() - abstract fp status word +* +*Purpose: +* produce an abstract (machine independent) fp status word +* +* +*Entry: +* sw: machine status word +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +unsigned int _abstract_sw(unsigned short sw) +{ + unsigned int abstr = 0; + + + if (sw & ISW_INVALID) + abstr |= _SW_INVALID; + if (sw & ISW_ZERODIVIDE) + abstr |= _SW_ZERODIVIDE; + if (sw & ISW_OVERFLOW) + abstr |= _SW_OVERFLOW; + if (sw & ISW_UNDERFLOW) + abstr |= _SW_UNDERFLOW; + if (sw & ISW_INEXACT) + abstr |= _SW_INEXACT; + if (sw & ISW_DENORMAL) + abstr |= _SW_DENORMAL; + + return abstr; +} diff --git a/private/fp32/tran/i386/ldsplit.asm b/private/fp32/tran/i386/ldsplit.asm new file mode 100644 index 000000000..11cc12735 --- /dev/null +++ b/private/fp32/tran/i386/ldsplit.asm @@ -0,0 +1,309 @@ + page ,132 + title ldsplit - split long double +;*** +;ldsplit.asm - split long double into two doubles +; +; Copyright (c) 1992-92, Microsoft Corporation +; +;Purpose: +; Helper for handling 10byte long double quantities if there is no +; compiler support. +; +;Revision History: +; +; 04/21/92 GDP written +; +;******************************************************************************* + +.xlist + include cruntime.inc + include mrt386.inc + include elem87.inc + include os2supp.inc +.list + +.data + +labelB TagTable + ; C2 C1 C0 C3 Meaning + db 2 * 4 ; 0 0 0 0 +Unnormal=> NAN + db 1 * 4 ; 0 0 0 1 +Zero => Zero + db 2 * 4 ; 0 0 1 0 +NAN => NAN + db 2 * 4 ; 0 0 1 1 Empty => NAN + db 2 * 4 ; 0 1 0 0 -Unnormal=> NAN + db 1 * 4 ; 0 1 0 1 -Zero => Zero + db 2 * 4 ; 0 1 1 0 -NAN => NAN + db 2 * 4 ; 0 1 1 1 Empty => NAN + db 0 * 4 ; 1 0 0 0 +Normal => Valid + db 4 * 4 ; 1 0 0 1 +Denormal=> Denormal + db 3 * 4 ; 1 0 1 0 +Infinity=> Infinity + db 2 * 4 ; 1 0 1 1 Empty => NAN + db 0 * 4 ; 1 1 0 0 -Normal => Valid + db 4 * 4 ; 1 1 0 1 -Denormal=> Zero + db 3 * 4 ; 1 1 1 0 -Infinity=> Infinity + db 2 * 4 ; 1 1 1 1 Empty => NAN + +; factor = 2^64 +staticQ factor, 043F0000000000000R + +LDBIAS equ 3fffh +DBIAS equ 3ffh +MAX_BIASED_DEXP equ 7feh + +CODESEG + + + +table: + dd valid + dd zero + dd nan + dd inf + dd denorm + + + +;*** +;int _ldsplit(pld, pd1, pd2) - split long double +; +;Purpose: +; partition a long double quantity ld into two double quantities +; d1, d2 and an integer scaling factror s. The mantissa of d1 has +; the high order word of the mantissa of ld. Respectively, the +; mantissa of d2 has the low order word of the mantissa of ld. +; The following relation should be satisfied: +; +; ld == ((long double)d1 + (long double)d2) * 2^s +; +; s is 0, unless d1 or d2 cannot be expressed as normalized +; doubles; in that case s != 0, and .5 <= d1 < 1 +; +; +;Entry: +; pld pointer to the long double argument +; pd1 pointer to d1 +; pd2 pointer to d2 +; +;Exit: +; *pd1, *pd2 are updated +; return value is equal to s +; +; +;Exceptions: +; This function should raise no IEEE exceptions. +; special cases: +; ld is QNAN or SNAN: d1 = QNAN, d2 = 0, s = 0 +; ls is INF: d1 = INF, d2 = 0, s = 0 +; +; +;******************************************************************************/ + + +_ldsplit proc uses ebx edx edi, pld:dword, pd1:dword, pd2:dword + local ld:tbyte + local exp_adj:dword + local retvalue:dword + local denorm_adj:dword + + mov [retvalue], 0 ; default return value + mov [denorm_adj], 0 + mov ebx, [pld] + fld tbyte ptr [ebx] + fxam + fstsw ax + fstp [ld] ; store to local area + shl ah, 1 + sar ah, 1 + rol ah, 1 + and ah, 0fh + mov al, ah + mov ebx, dataoffset TagTable ; Prepare for XLAT + xlat + movzx eax, al + mov ebx, OFFSET table + add ebx, eax + + mov edx, pd1 ; edx points to the high order double + mov edi, pd2 ; edi points to the low order double + + jmp [ebx] + +lab valid + ; have a valid normalized non-special long double + + mov eax, dword ptr [ld] + or eax, eax + jz d2zero + + ; compute mantissa an exponent for d2 + mov [exp_adj], 31 ; adjustment to be subtracted from exp of *pd2 + + ; + ; compute mantissa of d2 + ; shift left low order word of ld, until a '1' is hit + ; + + cmp eax, 0ffffh + ja shl16done + sal eax, 16 + add [exp_adj], 16 + +lab shl16done + cmp eax, 0ffffffh + ja shl8done + sal eax, 8 + add [exp_adj], 8 + +lab shl8done +lab shiftloop + inc [exp_adj] + sal eax, 1 + jnc shiftloop + + ; now eax contains the mantissa for d2 + ; exp_adj is the difference of the + ; exponents of d1 and d2 + ; exp_adj should be in the range + ; 32 <= exp_adj <= 63 + ; By convention, if exp_adj is 0 then + ; d2 is zero + +lab setd2man + mov dword ptr [edi+4], 0 + shld dword ptr [edi+4], eax, 20 + shl eax, 20 + mov [edi], eax + + ; + ; set mantissa of d1 + ; + +lab setd1man + mov eax, dword ptr [ld+4] + sal eax, 1 ; get rid of explicit bit + mov dword ptr [edx+4], 0 + shld dword ptr [edx+4], eax, 20 + shl eax, 20 + mov [edx], eax + + ; check if exponent is in range + mov ax, word ptr [ld+8] + + and ax, 07fffh ; clear sign bit + movzx eax, ax + + sub eax, LDBIAS - DBIAS + + cmp eax, MAX_BIASED_DEXP + ja expoutofrange + + + cmp eax, [exp_adj] + jb expoutofrange + + + ; + ; set exponent of d1 + ; + +lab setexp1 + mov ebx, eax ; save exp value + shl eax, 20 + or dword ptr [edx+4], eax + + + cmp [exp_adj], 0 + je exp2zero + sub ebx, [exp_adj] + je exp2zero +lab setexp2 + shl ebx, 20 + or dword ptr [edi+4], ebx + mov [retvalue], 0 + + +lab setsign ; set correct signs and return + ; at this point eax contains + ; the return value + mov bx, word ptr [ld+8] + and bx, 1 SHL 15 ; get sign + + or [edi+6], bx ; set sign bit + or [edx+6], bx ; set sign bit + + mov eax, [retvalue] + add eax, [denorm_adj] + ret + + +lab d2zero + mov [exp_adj], 0 + jmp setd2man + +lab exp2zero + mov ebx, 0 + jmp setexp2 + + + +lab expoutofrange + mov ebx, DBIAS + mov ecx, ebx + sub ecx, [exp_adj] + + shl ebx, 20 + or dword ptr [edx+4], ebx + + shl ecx, 20 + or dword ptr [edi+4], ecx + + sub eax, DBIAS ; unbias exp + mov [retvalue], eax ; this is the return value + jmp short setsign + + +lab zero + mov dword ptr [edx], 0 + mov dword ptr [edx+4], 0 + mov dword ptr [edi], 0 + mov dword ptr [edi+4], 0 + jmp setsign + +lab nan + mov dword ptr [edx], 0 + mov dword ptr [edx+4], 07ff80000h + mov dword ptr [edi], 0 + mov dword ptr [edi+4], 0 + jmp setsign + +lab inf + mov dword ptr [edx], 0 + mov dword ptr [edx+4], 07ff00000h + mov dword ptr [edi], 0 + mov dword ptr [edi+4], 0 + jmp setsign + +lab denorm + + ; + ; We have a long double denormal + ; so we know for sure that this is out of the double + ; precision range, and the return value of _ldsplit + ; should be non-zero. + ; Multiply the denormal by 2^64, then adjust the + ; return value by subtracting 64 + ; + + + ; this assumes denormal exception masked + fld [ld] + fmul [factor] + fstp [ld] + mov [denorm_adj], 64 + jmp valid + + + +_ldsplit endp + +end diff --git a/private/fp32/tran/ieeemisc.c b/private/fp32/tran/ieeemisc.c new file mode 100644 index 000000000..62aa2672a --- /dev/null +++ b/private/fp32/tran/ieeemisc.c @@ -0,0 +1,461 @@ +/*** +* ieeemisc.c - IEEE miscellaneous recommended functions +* +* Copyright (c) 1992-1992, Microsoft Corporation. All rights reserved. +* +*Purpose: +* +*Revision History: +* 5-04-92 GDP written +* +*******************************************************************************/ + +#include <trans.h> +#include <math.h> +#include <float.h> + + +/*** +* _copysign - copy sign +* +*Purpose: +* copysign(x,y) returns x with the sign of y. Hence, abs(x) := copysign +* even if x is NaN [IEEE std 854-1987 Appendix] +* +* +*Entry: +* +*Exit: +* +*Exceptions: +* No exceptions, even if one of the arguments is NaN. +* +* (Currently the i386 compiler returns doubles on the fp stack +* so the fld instruction at the end will cause an invalid operation +* if x is NaN. However this compiler calling convention will change +* soon) +* +*******************************************************************************/ + +double _copysign (double x, double y) +{ + double retval; + *D_LO(retval) = *D_LO(x); + *D_HI(retval) = *D_HI(x) & ~(1<<31) | + *D_HI(y) & (1<<31) ; + + return retval; +} + + + +/*** +* _chgsign - change sign +* +*Purpose: +* x is copied with its sign reversed, not 0-x; the distinction is germane +* when x is +0, -0, or NaN +* +*Entry: +* +*Exit: +* +*Exceptions: +* No exceptions, even if x is NaN. +* +* (Currently the i386 compiler returns doubles on the fp stack +* so the fld instruction at the end will cause an invalid operation +* if x is NaN. However this compiler calling convention will change +* soon) +* +*******************************************************************************/ + +double _chgsign (double x) +{ + double retval; + + *D_LO(retval) = *D_LO(x); + *D_HI(retval) = *D_HI(x) & ~(1 << 31) | + ~*D_HI(x) & (1<<31); + + return retval; +} + + +/*** +* _scalb - scale by power of 2 +* +*Purpose: +* _scalb(x,n) returns x * 2^n for integral values of n without +* computing 2^n +* Special case: +* If x is infinity or zero, _scaleb returns x +* +* +*Entry: +* double x +* int n +* +*Exit: +* +*Exceptions: +* Invalid operation, Overflow, Underflow +* +*******************************************************************************/ + +double _scalb(double x, long n) +{ + // + // It turns out that our implementation of ldexp matces the IEEE + // description of _scalb. The only problem with calling ldexp + // is that if an exception occurs, the operation code reported + // to the handler will be the one that corresponds to ldexp + // (i.e., we do not define a new operation code for _scalb + // + + return ldexp(x,n); +} + + + + +/*** +* _logb - extract exponent +* +*Purpose: +* _logb(x) returns the unbiased exponent of x, a signed integer in the +* format of x, except that logb(NaN) is a NaN, logb(+INF) is +INF,and +* logb(0) is is -INF and signals the division by zero exception. +* For x positive and finite, 1<= abs(scalb(x, -logb(x))) < 2 +* +* +*Entry: +* double x +* int n +* +*Exit: +* +*Exceptions: +* Invalid operation, Division by zero +* +*******************************************************************************/ +double _logb(double x) +{ + unsigned int savedcw; + int exp; + double retval; + + /* save user fp control word */ + savedcw = _maskfp(); + + /* check for infinity or NAN */ + if (IS_D_SPECIAL(x)){ + switch (_sptype(x)) { + case T_PINF: + case T_NINF: + RETURN(savedcw, x); + case T_QNAN: + return _handle_qnan1(OP_LOGB, x, savedcw); + default: //T_SNAN + return _except1(FP_I, OP_LOGB, x, _s2qnan(x), savedcw); + } + } + + if (x == 0) { + return _except1(FP_Z, OP_LOGB, x, -D_INF, savedcw); + } + + (void) _decomp(x, &exp); + + // + // x == man * 2^exp, where .5 <= man < 1. According to the spec + // of this function, we should compute the exponent so that + // 1<=man<2, i.e., we should decrement the computed exp by one + // + + retval = (double) (exp - 1); + + RETURN(savedcw, retval); + +} + + + + + +/*** +* _nextafter - next representable neighbor +* +*Purpose: +* _nextafter(x,y) returns the next representable neighbor of x in +* the direction toward y. The following special cases arise: if +* x=y, then the result is x without any exception being signaled; +* otherwise, if either x or y is a quiet NaN, then the result is +* one or the other of the input NaNs. Overflow is sibnaled when x +* is finite but nextafter(x,y) is infinite; underflow is signaled +* when nextafter(x,y) lies strictly between -2^Emin, 2^Emin; in +* both cases, inexact is signaled. +* +* +*Entry: +* +*Exit: +* +*Exceptions: +* O, U, I, P +* +*******************************************************************************/ + +double _nextafter(double x, double y) +{ + unsigned int savedcw; + double result; + + /* save user fp control word */ + savedcw = _maskfp(); + + /* check for infinity or NAN */ + if (IS_D_SPECIAL(x) || IS_D_SPECIAL(y)){ + if (IS_D_SNAN(x) || IS_D_SNAN(y)){ + return _except2(FP_I,OP_NEXTAFTER,x,y,_d_snan2(x,y),savedcw); + } + if (IS_D_QNAN(x) || IS_D_QNAN(y)){ + return _handle_qnan2(OP_NEXTAFTER,x,y,savedcw); + } + + // + // infinite arguments are not treated as special cases + // + } + + if (y == x) { + + // + // no exceptions are raised in this case + // + + RETURN(savedcw, x); + } + + if (x == 0) { + + *D_LO(result) = 1; + + if (y > x) { + *D_HI(result) = 0; + } + + else { + + // + // result should be negative + // + + *D_HI(result) = (unsigned long)(1<<31); + } + + } + + + // + // At this point x!=y, and x!=0. x can be treated as a 64bit + // integer in sign/magnitude representation. To get the next + // representable neighbor we add or subtract one from this + // integer. (Note that for boundary cases like x==INF, need to + // add one will never occur --this would mean that y should + // be greater than INF, which is impossible) + // + + if (x > 0 && y < x || + x < 0 && y > x) { + + // + // decrease value by one + // + + *D_LO(result) = *D_LO(x) - 1; + *D_HI(result) = *D_HI(x); + + if (*D_LO(x) == 0) { + + // + // a borrow should propagate to the high order dword + // + + (*D_HI(result)) --; + } + } + + else if (x > 0 && y > x || + x < 0 && y < x) { + + // + // increase value by one + // + + *D_LO(result) = *D_LO(x) + 1; + *D_HI(result) = *D_HI(x); + + if (*D_LO(result) == 0) { + + // + // a carry should propagate to the high order dword + // + + (*D_HI(result)) ++; + } + } + + + // + // check if an exception should be raised + // + + + if ( IS_D_DENORM(result) ) { + + // + // should signal underflow and inexact + // and provide a properly scaled value + // + + double mant; + int exp; + + mant = _decomp(result, &exp); + result = _set_exp(mant, exp+IEEE_ADJUST); + + return _except2(FP_U|FP_P,OP_NEXTAFTER,x,y,result,savedcw); + } + + + + if ( IS_D_INF(result) || IS_D_MINF(result) ) { + + // + // should signal overflow and inexact + // and provide a properly scaled value + // + + double mant; + int exp; + + mant = _decomp(result, &exp); + result = _set_exp(mant, exp-IEEE_ADJUST); + + return _except2(FP_O|FP_P,OP_NEXTAFTER,x,y,result,savedcw); + } + + + RETURN(savedcw, result); +} + + + + +/*** +* _finite - +* +*Purpose: +* finite(x) returns the value TRUE if -INF < x < +INF and returns +* false otherwise [IEEE std] +* +*Entry: +* +*Exit: +* +*Exceptions: +* +* This routine is treated as a nonarithmetic operation, therefore +* it does not signal any floating point exceptions +* +*******************************************************************************/ + +int _finite(double x) +{ + if (IS_D_SPECIAL(x)) { + + // + // x is INF or NaN + // + + return 0; + } + return 1; +} + + + + + +/*** +* _isnan - +* +*Purpose: +* isnan(x) returns the value TRUE if x is a NaN, and returns FALSE +* otherwise. +* +* +*Entry: +* +*Exit: +* +*Exceptions: +* +* This routine is treated as a nonarithmetic operation, therefore +* it does not signal any floating point exceptions +* +*******************************************************************************/ + +int _isnan(double x) +{ + if (IS_D_SNAN(x) || IS_D_QNAN(x)) { + return 1; + } + return 0; +} + + + + +/*** +*double _fpclass(double x) - floating point class +* +*Purpose: +* Compute the floating point class of a number, according +* to the recommendations of the IEEE std. 754 +* +*Entry: +* +*Exit: +* +*Exceptions: +* This function is never exceptional, even when the argument is SNAN +* +*******************************************************************************/ + +int _fpclass(double x) +{ + int sign; + + if (IS_D_SPECIAL(x)){ + switch (_sptype(x)) { + case T_PINF: + return _FPCLASS_PINF; + case T_NINF: + return _FPCLASS_NINF; + case T_QNAN: + return _FPCLASS_QNAN; + default: //T_SNAN + return _FPCLASS_SNAN; + } + } + sign = (*D_EXP(x)) & 0x8000; + if (x == 0.0) + return sign? _FPCLASS_NZ : _FPCLASS_PZ; + + if (IS_D_DENORM(x)) + return sign? _FPCLASS_ND : _FPCLASS_PD; + + return sign? _FPCLASS_NN : _FPCLASS_PN; +} diff --git a/private/fp32/tran/ldexp.c b/private/fp32/tran/ldexp.c new file mode 100644 index 000000000..6eef65120 --- /dev/null +++ b/private/fp32/tran/ldexp.c @@ -0,0 +1,88 @@ +/*** +*ldexp.c - multiply by a power of two +* +* Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* +*Revision History: +* 8-24-91 GDP written +* 1-13-92 GDP rewritten to support IEEE exceptions +* 5-05-92 GDP bug fix for x denormal +* 07-16-93 SRW ALPHA Merge +* +*******************************************************************************/ +#include <math.h> +#include <trans.h> +#include <limits.h> + +/*** +*double ldexp(double x, int exp) +* +*Purpose: +* Compute x * 2^exp +* +*Entry: +* +*Exit: +* +*Exceptions: +* I U O P +* +*******************************************************************************/ +double ldexp(double x, int exp) +{ + unsigned int savedcw; + int oldexp; + long newexp; /* for checking out of bounds exponents */ + double result, mant; + + /* save user fp control word */ + savedcw = _maskfp(); + + /* check for infinity or NAN */ + if (IS_D_SPECIAL(x)){ + switch (_sptype(x)) { + case T_PINF: + case T_NINF: + RETURN(savedcw,x); + case T_QNAN: + return _handle_qnan2(OP_LDEXP, x, (double)exp, savedcw); + default: //T_SNAN + return _except2(FP_I,OP_LDEXP,x,(double)exp,_s2qnan(x),savedcw); + } + } + + + if (x == 0.0) { + RETURN(savedcw,x); + } + + mant = _decomp(x, &oldexp); + + if (ABS(exp) > INT_MAX) + newexp = exp; // avoid possible integer overflow + else + newexp = oldexp + exp; + + + /* out of bounds cases */ + if (newexp > MAXEXP + IEEE_ADJUST) { + return _except2(FP_O|FP_P,OP_LDEXP,x,(double)exp,_copysign(D_INF,mant),savedcw); + } + if (newexp > MAXEXP) { + result = _set_exp(mant, newexp-IEEE_ADJUST); + return _except2(FP_O|FP_P,OP_LDEXP,x,(double)exp,result,savedcw); + } + if (newexp < MINEXP - IEEE_ADJUST) { + return _except2(FP_U|FP_P,OP_LDEXP,x,(double)exp,mant*0.0,savedcw); + } + if (newexp < MINEXP) { + result = _set_exp(mant, newexp+IEEE_ADJUST); + return _except2(FP_U|FP_P,OP_LDEXP,x,(double)exp,result,savedcw); + } + + result = _set_exp(mant, (int)newexp); + + RETURN(savedcw,result); +} diff --git a/private/fp32/tran/log.c b/private/fp32/tran/log.c new file mode 100644 index 000000000..ab3ce76f8 --- /dev/null +++ b/private/fp32/tran/log.c @@ -0,0 +1,126 @@ +/*** +*log.c - logarithmic functions +* +* Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* Compute log(x) and log10(x) +* +*Revision History: +* 8-15-91 GDP written +* 12-20-91 GDP support IEEE exceptions +* +*******************************************************************************/ +#include <math.h> +#include <trans.h> + +static double _log_hlp( double x, int flag); + +/* constants */ +static double const c0 = 0.70710678118654752440; /* sqrt(0.5) */ +static double const c1 = 0.69335937500000000000; +static double const c2 = -2.121944400546905827679e-4; +static double const c3 = 0.43429448190325182765; + +/* coefficients for rational approximation */ +static double const a0 = -0.64124943423745581147e2 ; +static double const a1 = 0.16383943563021534222e2 ; +static double const a2 = -0.78956112887491257267e0 ; +static double const b0 = -0.76949932108494879777e3 ; +static double const b1 = 0.31203222091924532844e3 ; +static double const b2 = -0.35667977739034646171e2 ; +/* b3=1.0 is not used -avoid multiplication by 1.0 */ + +#define A(w) (((w) * a2 + a1) * (w) + a0) +#define B(w) ((((w) + b2) * (w) + b1) * (w) + b0) + + +/*** +*double log(double x) - natural logarithm +*double log10(double x) - base-10 logarithm +* +*Purpose: +* Compute the natural and base-10 logarithm of a number. +* The algorithm (reduction / rational approximation) is +* taken from Cody & Waite. +* +*Entry: +* +*Exit: +* +*Exceptions: +* I P Z +*******************************************************************************/ + +double log10(double x) +{ + return(_log_hlp(x,OP_LOG10)); +} + +double log(double x) +{ + return(_log_hlp(x,OP_LOG)); +} + +static double _log_hlp(double x, int opcode) +{ + unsigned int savedcw; + int n; + double f,result; + double z,w,znum,zden; + double rz,rzsq; + + /* save user fp control word */ + savedcw = _maskfp(); + + /* check for infinity or NAN */ + if (IS_D_SPECIAL(x)){ + switch (_sptype(x)) { + case T_PINF: + RETURN(savedcw, x); + case T_QNAN: + return _handle_qnan1(opcode, x, savedcw); + case T_SNAN: + return _except1(FP_I, opcode, x, _s2qnan(x), savedcw); + } + /* NINF will be handled in the x<0 case */ + } + + if (x <= 0.0) { + double qnan; + if (x == 0.0) { + return _except1(FP_Z,opcode,x,-D_INF,savedcw); + } + qnan = (opcode == OP_LOG ? QNAN_LOG : QNAN_LOG10); + return _except1(FP_I,opcode,x,qnan,savedcw); + } + + if (x == 1.0) { + // no precision ecxeption + RETURN(savedcw, 0.0); + } + + f = _decomp(x, &n); + + if (f > c0) { + znum = (f - 0.5) - 0.5; + zden = f * 0.5 + 0.5; + } + else { + n--; + znum = f - 0.5; + zden = znum * 0.5 + 0.5; + } + z = znum / zden; + w = z * z; + + rzsq = w * A(w)/B(w) ; + rz = z + z*rzsq; + + result = (n * c2 + rz) + n * c1; + if (opcode == OP_LOG10) { + result *= c3; + } + + RETURN_INEXACT1(opcode,x,result,savedcw); +} diff --git a/private/fp32/tran/makefile b/private/fp32/tran/makefile new file mode 100644 index 000000000..6ee4f43fa --- /dev/null +++ b/private/fp32/tran/makefile @@ -0,0 +1,6 @@ +# +# DO NOT EDIT THIS FILE!!! Edit .\sources. if you want to add a new source +# file to this component. This file merely indirects to the real make file +# that is shared by all the components of NT OS/2 +# +!INCLUDE $(NTMAKEENV)\makefile.def diff --git a/private/fp32/tran/matherr.c b/private/fp32/tran/matherr.c new file mode 100644 index 000000000..1dad36a7c --- /dev/null +++ b/private/fp32/tran/matherr.c @@ -0,0 +1,34 @@ +/*** +*matherr.c - floating point exception handling +* +* Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* +*Revision History: +* 8-24-91 GDP written +* +*******************************************************************************/ + +#include <math.h> + +int _matherr_flag = 9876; + +/*** +*int _matherr(struct _exception *except) - handle math errors +* +*Purpose: +* Permits the user customize fp error handling by redefining +* this function. +* The default matherr does nothing and returns 0 +* +*Entry: +* +*Exit: +* +*Exceptions: +*******************************************************************************/ +int _matherr(struct _exception *except) +{ + return 0; +} diff --git a/private/fp32/tran/mips/acost.c b/private/fp32/tran/mips/acost.c new file mode 100644 index 000000000..df23d9ffb --- /dev/null +++ b/private/fp32/tran/mips/acost.c @@ -0,0 +1,150 @@ +#include <stdio.h> +#include <errno.h> +#include <excpt.h> +#include <math.h> + +#ifndef FALSE +#define FALSE 0 +#define TRUE 1 +#endif + +#define NUM_KNOWN_VALUES (sizeof(known_values) / (sizeof(double) * 2)) + +#define HIVALUE 1000.0 +#define LOVALUE -1000.0 +#define XXVALUE 10.0 +#define RZERO 1.5707963267948966 +#define PI 3.14159265358979323846 + +#define D_EXP(x) ((unsigned short *)&(x)+3) +#define D_HI(x) ((unsigned long *)&(x)+1) +#define D_LO(x) ((unsigned long *)&(x)) +#define D_IND_HI 0x7ff7ffff +#define D_IND_LO 0xffffffff +#define SET_DBL(msw, lsw) lsw, msw +#define D_ISINF(x) ((*D_HI(x) & 0x7fffffff) == 0x7ff00000 && *D_LO(x) == 0) +#define IS_D_SPECIAL(x) ((*D_EXP(x) & 0x7ff0) == 0x7ff0) +#define IS_D_NAN(x) (IS_D_SPECIAL(x) && !D_ISINF(x)) + +typedef union { + unsigned long ul[2]; + double dbl; + } _dbl; + +/* +_dbl _d_inf = {SET_DBL (0x7ff00000, 0x0) }; //positive infinity +_dbl _d_ind = {SET_DBL (D_IND_HI, D_IND_LO)}; //real indefinite +_dbl _d_max = {SET_DBL (0x7fefffff, 0xffffffff)}; //max double +_dbl _d_min = {SET_DBL (0x00100000, 0x00000000)}; //min normalized double +_dbl _d_mzero = {SET_DBL (0x80000000, 0x00000000)}; //negative zero +*/ + +extern _dbl _d_inf; +extern _dbl _d_ind; +extern _dbl _d_max; +extern _dbl _d_min; +extern _dbl _d_mzero; + + +void main() +{ + double result, value; + double known_values[9][2] = { {-1.0, PI}, {-0.75, 2.4188584057763776}, {-0.50, 2.0943951023931957} , {-0.25, 1.8234765819369751}, {0.0, 1.5707963267948966}, {0.25, 1.318116071652818}, {0.50, 1.0471975511965976} , {0.75, 0.72273424781341566}, {1.0, 0.0} }; + unsigned long loword, hiword; + int i, k; + + printf("\n\n"); + + k = 0; + + _fpreset(); + + /* be sure to test all known cases */ + + for (i = 0; i < NUM_KNOWN_VALUES; i++) { + result = acos(known_values[i][0]); + loword = (unsigned long) result; + hiword = *(((unsigned long*)&result)+1); + if (result != known_values[i][1]) { + printf("acos(%16.16g) != %16.16g, actual = %16.16g (0x%8.8x%8.8x)\n", known_values[i][0], known_values[i][1], result, hiword, loword); + k++; + } + } + + /* test a large range */ + for (value = LOVALUE; HIVALUE >= value ; value += XXVALUE) { + int known = FALSE; + + result = acos(value); + loword = (unsigned long) result; + hiword = *(((unsigned long*)&result)+1); + for (i = 0; i < NUM_KNOWN_VALUES; i++) { + if (known_values[i][0] == value) { + if (known_values[i][1] != result) { + printf("acos(%e) != %e, actual = %e (0x%8.8x%8.8x)\n", known_values[i][0], known_values[i][1], result, hiword, loword); + k++; + } + known = TRUE; + break; + } + } + + if (!known) { + if (value < -1.0 || value > 1.0) { + if (loword != _d_ind.ul[0] || hiword != _d_ind.ul[1]) { + printf("acos(%e) != %e, actual = %e (0x%8.8x%8.8x)\n", value, _d_ind.dbl, result, hiword, loword); + k++; + } + } else { + printf("UNKNOWN VALUE: acos(%e) = %e\n", value, result); + } + } + } + + /* special case tests */ + for (value = -1.00; 1.0 >= value ; value += 0.25) { + int known = FALSE; + + result = acos(value); + loword = (unsigned long) result; + hiword = *(((unsigned long*)&result)+1); + for (i = 0; i < NUM_KNOWN_VALUES; i++) { + if (known_values[i][0] == value) { + if (known_values[i][1] != result) { + printf("acos(%e) != %e, actual = %e (0x%8.8x%8.8x)\n", known_values[i][0], known_values[i][1], result, hiword, loword); + k++; + } + known = TRUE; + break; + } + } + if (!known) + printf("UNKNOWN VALUE: acos(%e) = %e\n", value, result); + } + + if (k) { + printf("\tacos failed %d tests...\n", k); + } else { + printf("\tacos passed all tests...\n"); + } + + /* hard coded tests */ + printf("\n\n"); + value = _d_inf.dbl; + result = acos(value); + printf("acos(%e) = %e, expected %e\n", value, result, _d_ind.dbl); + value = _d_ind.dbl; + result = acos(value); + printf("acos(%e) = %e, expected %e\n", value, result, _d_ind.dbl); + value = _d_max.dbl; + result = acos(value); + printf("acos(%e) = %e, expected %e\n", value, result, _d_ind.dbl); + value = _d_min.dbl; + result = acos(value); + printf("acos(%e) = %e, expected %e\n", value, result, RZERO); + value = _d_mzero.dbl; + result = acos(value); + printf("acos(%e) = %e, expected %e\n", value, result, RZERO); + printf("\n\n"); + +} diff --git a/private/fp32/tran/mips/asincosm.s b/private/fp32/tran/mips/asincosm.s new file mode 100644 index 000000000..69102fde1 --- /dev/null +++ b/private/fp32/tran/mips/asincosm.s @@ -0,0 +1,207 @@ +/* + * |-----------------------------------------------------------| + * | Copyright (c) 1991, 1990 MIPS Computer Systems, Inc. | + * | All Rights Reserved | + * |-----------------------------------------------------------| + * | Restricted Rights Legend | + * | Use, duplication, or disclosure by the Government is | + * | subject to restrictions as set forth in | + * | subparagraph (c)(1)(ii) of the Rights in Technical | + * | Data and Computer Software Clause of DFARS 252.227-7013. | + * | MIPS Computer Systems, Inc. | + * | 950 DeGuigne Avenue | + * | Sunnyvale, California 94088-3650, USA | + * |-----------------------------------------------------------| + */ +/* $Header: asincos.s,v 3000.7.1.10 92/01/29 15:51:20 zaineb Exp $ */ +/* Algorithm from Cody and Waite. */ + +#include <kxmips.h> +#include <trans.h> + +.extern _except1 +.extern _d_ind 8 + +#define OP_ACOS 13 // _FpCodeAcos from crt32\h\fpieee.h, <trans.h> +#define OP_ASIN 14 // _FpCodeAsin from crt32\h\fpieee.h, <trans.h> + +#define half 0.5 +#define eps 3.72529029846191406250e-9 +#define one 1.0 +#define p5 -0.69674573447350646411e+0 +#define p4 +0.10152522233806463645e+2 +#define p3 -0.39688862997504877339e+2 +#define p2 +0.57208227877891731407e+2 +#define p1 -0.27368494524164255994e+2 +#define q4 -0.23823859153670238830e+2 +#define q3 +0.15095270841030604719e+3 +#define q2 -0.38186303361750149284e+3 +#define q1 +0.41714430248260412556e+3 +#define q0 -0.16421096714498560795e+3 +#define pio2 1.57079632679489661923 +#define pi 3.14159265358979323846 + + +#define FSIZE 48 +.text .text$asincosm +.globl acos +.ent acos +acos: + .frame sp, FSIZE, ra + .mask 0x80000000, -4 + subu sp, FSIZE + sw ra, FSIZE-4(sp) + .prologue 1 + + /* + * Clear all bits in fsr to avoid side effects (including flag bits). + * This is the same as calling _maskfp() and clearing flag bits. + * 'Save' the callers fsr in v0 to restore upon exit. + */ + + cfc1 v0, $31 + ctc1 $0, $31 + + li.d $f8, half + abs.d $f14, $f12 + c.ole.d $f14, $f8 + li.d $f10, eps + li t7, OP_ACOS + bc1f acos2 + c.lt.d $f14, $f10 + mov.d $f0, $f12 + bc1t acos1 + mul.d $f2, $f12, $f12 + bal asincos2 +acos1: + li.d $f8, pio2 + sub.d $f0, $f8, $f0 + b acosx +acos2: + bal asincos1 + bltz t1, acos3 + neg.d $f0 + b acosx +acos3: + li.d $f8, pi + add.d $f0, $f8 +acosx: + /* restore callers fsr and return */ + ctc1 v0, $31 + lw ra, FSIZE-4(sp) + addu sp, FSIZE + j ra +.end acos + + +.text .text$asincosm +.globl asin +.ent asin +asin: + .frame sp, FSIZE, ra + .mask 0x80000000, -4 + subu sp, FSIZE + sw ra, FSIZE-4(sp) + .prologue 1 + + /* + * Clear all bits in fsr to avoid side effects (including flag bits). + * This is the same as calling _maskfp() and clearing flag bits. + * 'Save' the callers fsr in v0 to restore upon exit. + */ + + cfc1 v0, $31 + ctc1 $0, $31 + + li.d $f8, half + abs.d $f14, $f12 + c.ole.d $f14, $f8 + li.d $f10, eps + li t7, OP_ASIN + bc1f asin2 + c.lt.d $f14, $f10 + mov.d $f0, $f12 + mul.d $f2, $f12, $f12 + bc1t asinx + bal asincos2 + b asinx + +asin2: + bal asincos1 + li.d $f8, pio2 + add.d $f0, $f8 + bgez t1, asinx + +asin3: + neg.d $f0 + +asinx: + /* restore callers fsr and return */ + ctc1 v0, $31 + lw ra, FSIZE-4(sp) + addu sp, FSIZE + j ra + +asincos1: + li.d $f10, one + mfc1 t1, $f13 + c.ole.d $f14, $f10 + sub.d $f0, $f10, $f14 + mul.d $f2, $f0, $f8 + bc1f error + sqrt.d $f0, $f2 + add.d $f0, $f0 + neg.d $f0 + /* fall through */ +asincos2: + li.d $f8, p5 + li.d $f10, q4 + mul.d $f4, $f2, $f8 + add.d $f6, $f2, $f10 + li.d $f8, p4 + mul.d $f6, $f2 + add.d $f4, $f8 + li.d $f10, q3 + mul.d $f4, $f2 + add.d $f6, $f10 + li.d $f8, p3 + mul.d $f6, $f2 + add.d $f4, $f8 + li.d $f10, q2 + mul.d $f4, $f2 + add.d $f6, $f10 + li.d $f8, p2 + mul.d $f6, $f2 + add.d $f4, $f8 + li.d $f10, q1 + mul.d $f4, $f2 + add.d $f6, $f10 + li.d $f8, p1 + mul.d $f6, $f2 + add.d $f4, $f8 + li.d $f10, q0 + mul.d $f4, $f2 + add.d $f6, $f10 + + div.d $f4, $f6 + mul.d $f4, $f0 + add.d $f0, $f4 + + /* return to asin/acos */ + j ra + +error: // |x| > 1 + li $4, FP_I // exception mask + move $5, t7 // operation code (funtion name index) + mfc1.d $6, $f12 // arg1 + l.d $f0, _d_ind + s.d $f0, 16(sp) // default result (indefinite) + xor v0, v0, 0xf80 // inverse exception enable bits of + sw v0, 24(sp) // ... callers fsr to pass to _except1 + jal _except1 + lw ra, FSIZE-4(sp) // jump back to asin/acos caller + addu sp, FSIZE + j ra + +#undef FSIZE +.end asin diff --git a/private/fp32/tran/mips/atanm.s b/private/fp32/tran/mips/atanm.s new file mode 100644 index 000000000..11f7cc0b3 --- /dev/null +++ b/private/fp32/tran/mips/atanm.s @@ -0,0 +1,264 @@ +/* + * |-----------------------------------------------------------| + * | Copyright (c) 1991, 1990 MIPS Computer Systems, Inc. | + * | All Rights Reserved | + * |-----------------------------------------------------------| + * | Restricted Rights Legend | + * | Use, duplication, or disclosure by the Government is | + * | subject to restrictions as set forth in | + * | subparagraph (c)(1)(ii) of the Rights in Technical | + * | Data and Computer Software Clause of DFARS 252.227-7013. | + * | MIPS Computer Systems, Inc. | + * | 950 DeGuigne Avenue | + * | Sunnyvale, California 94088-3650, USA | + * |-----------------------------------------------------------| + */ +/* $Header: atan.s,v 3000.10.1.3 91/07/17 14:05:38 zaineb Exp $ */ + + +/* These functions are based on the 4.3bsd algorithm. */ + +#include <kxmips.h> +#include <trans.h> + +#define OP_ATAN 15 +#define OP_ATAN2 16 + +#define r7_16 0.4375 +#define r11_16 0.6875 +#define r19_16 1.1875 +#define r39_16 2.4375 +#define one 1.0 +#define athfhi 4.6364760900080609352E-1 +#define athflo 4.6249969567426939759E-18 +#define PIo4 7.8539816339744827900E-1 +#define at1fhi 9.8279372324732905408E-1 +#define at1flo -2.4407677060164810007E-17 +#define PIo2 1.5707963267948965580E0 +#define PI 3.1415926535897931160E0 +#define p11 1.6438029044759730479E-2 +#define p10 -3.6700606902093604877E-2 +#define p9 4.9850617156082015213E-2 +#define p8 -5.8358371008508623523E-2 +#define p7 6.6614695906082474486E-2 +#define p6 -7.6919217767468239799E-2 +#define p5 9.0908906105474668324E-2 +#define p4 -1.1111110579344973814E-1 +#define p3 1.4285714278004377209E-1 +#define p2 -1.9999999999979536924E-1 +#define p1 3.3333333333333942106E-1 + + +/* double atan(double y) */ + +.text .text$atanm +.globl atan +.ent atan +atan: + .set noreorder + .frame sp, 0, ra + .prologue 0 + c.un.d $f12, $f12 /* if Y is NaN */ + mfc1 t0, $f13 /* save sign of Y */ + li t7, OP_ATAN + bc1t 91f + li t1, 0 /* sign of X */ + + abs.d $f12 + li.d $f14, one /* X = 1.0 */ + mov.d $f2, $f12 /* Y/X = Y */ + + cfc1 t4, $31 /* save FCSR, set round to nearest */ + ctc1 $0, $31 /* mode and no exceptions */ + + b atan0 + nop + .set reorder +.end atan + +/* double atan2(double y, double x) */ + +.text .text$atanm +.globl atan2 +.ent atan2 +atan2: + .frame sp, 0, ra + .prologue 0 + c.un.d $f12, $f14 /* if either Y or X is NaN */ + mfc1 t0, $f13 /* save signs of Y and X */ + mfc1 t1, $f15 + li t7, OP_ATAN2 + bc1t 90f + abs.d $f12 + abs.d $f14 + + cfc1 t4, $31 /* save FCSR, set round to nearest */ + ctc1 $0, $31 /* mode and no exceptions */ + + div.d $f2, $f12, $f14 /* atan2(y,x) = atan(y/x) */ + li t5, 0x7ff00000<<1 + sll t3, t1, 1 +#define _FPC_CSR_INVALID 0x00000040 + cfc1 t6, $31 /* check if it is a 0/0 */ + and t6, _FPC_CSR_INVALID + beq t3, t5, 80f + bne t6, 0, 78f + +atan0: + /* analyze range of y/x */ + li.d $f6, r19_16 + li.d $f8, r11_16 + c.lt.d $f2, $f6 + li.d $f6, r39_16 + bc1f 30f + c.lt.d $f2, $f8 + li.d $f8, r7_16 + bc1f 20f + c.lt.d $f2, $f8 + bc1f 10f + /* [0, 7/16] */ + li.d $f12, 0.0 + mov.d $f14, $f12 + b 70f +10: /* [7/16,11/16] */ + add.d $f4, $f14, $f14 + add.d $f2, $f12, $f12 + sub.d $f2, $f14 + add.d $f4, $f12 + div.d $f2, $f4 + li.d $f12, athfhi + li.d $f14, athflo + b 70f +20: /* [11/16,19/16] */ + sub.d $f2, $f12, $f14 + add.d $f4, $f12, $f14 + div.d $f2, $f4 + li.d $f12, PIo4 + li.d $f14, 0.0 + b 70f +30: /* >= 19/16 */ + c.lt.d $f2, $f6 + bc1f 40f + /* [19/16,39/16] */ + add.d $f2, $f12, $f12 + add.d $f2, $f12 + add.d $f4, $f14, $f14 + add.d $f2, $f4 + sub.d $f4, $f12, $f14 + add.d $f4, $f4 + sub.d $f4, $f14 + div.d $f2, $f4, $f2 + li.d $f12, at1fhi + li.d $f14, at1flo + b 70f +40: /* >= 39/16 */ + neg.d $f14 + div.d $f2, $f14, $f12 + li.d $f12, PIo2 + li.d $f14, 0.0 + +70: mul.d $f4, $f2, $f2 + li.d $f6, p11 + li.d $f8, p10 + mul.d $f0, $f4, $f6 + add.d $f0, $f8 + li.d $f6, p9 + mul.d $f0, $f4 + add.d $f0, $f6 + li.d $f6, p8 + mul.d $f0, $f4 + add.d $f0, $f6 + li.d $f6, p7 + mul.d $f0, $f4 + add.d $f0, $f6 + li.d $f6, p6 + mul.d $f0, $f4 + add.d $f0, $f6 + li.d $f6, p5 + mul.d $f0, $f4 + add.d $f0, $f6 + li.d $f6, p4 + mul.d $f0, $f4 + add.d $f0, $f6 + li.d $f6, p3 + mul.d $f0, $f4 + add.d $f0, $f6 + li.d $f6, p2 + mul.d $f0, $f4 + add.d $f0, $f6 + li.d $f6, p1 + mul.d $f0, $f4 + add.d $f0, $f6 + mul.d $f0, $f4 + mul.d $f0, $f2 + sub.d $f0, $f14, $f0 + add.d $f0, $f2 + + ctc1 t4, $31 + li.d $f6, PI + add.d $f0, $f12 + bgez t1, 72f + sub.d $f0, $f6, $f0 +72: + bgez t0, 74f + neg.d $f0 +74: + j ra + +78: /* x = y = 0 */ + j setup_atan2_error + li.d $f0, 0.0 + b 82f + +80: /* x = +-Infinity */ + li.d $f0, 0.0 + sll t2, t0, 1 + bne t2, t5, 82f + /* x = +-Infinity, y = +-Infinity */ + li.d $f0, PIo4 +82: li.d $f6, PI + bgez t1, 84f + sub.d $f0, $f6, $f0 +84: ctc1 t4, $31 + bgez t0, 86f + neg.d $f0 +86: j ra + +90: /* x NaN or y NaN */ + c.eq.d $f12, $f12 + bc1t 92f +91: mov.d $f0, $f12 + j ra +92: mov.d $f0, $f14 + j ra + +.end atan2 + + +.extern _except2 +.extern _d_ind 8 +#define QNAN_ATAN2 D_IND +#define D_IND _d_ind + +.ent setup_atan2_error +setup_atan2_error: +#define FSIZE 48 + .frame sp, FSIZE, ra + .mask 0x80000000, -4 + subu sp, FSIZE + sw ra, FSIZE-4(sp) + .prologue 1 + li $4, FP_I // exception mask + move $5, t7 // operation code (funtion name index) + mfc1.d $6, $f12 // arg1 + s.d $f14, 16(sp) // arg2 + l.d $f0, QNAN_ATAN2 + s.d $f0, 24(sp) // default result + xor t7, t4, 0xf80 // inverse exception enable bits (t4 = saved FCSR) + sw t7, 32(sp) + jal _except2 + lw ra, FSIZE-4(sp) + addu sp, FSIZE + j ra +#undef FSIZE +.end setup_atan2_error diff --git a/private/fp32/tran/mips/cabsm.s b/private/fp32/tran/mips/cabsm.s new file mode 100644 index 000000000..b08389285 --- /dev/null +++ b/private/fp32/tran/mips/cabsm.s @@ -0,0 +1,41 @@ +/* + * |-----------------------------------------------------------| + * | Copyright (c) 1991, 1990 MIPS Computer Systems, Inc. | + * | All Rights Reserved | + * |-----------------------------------------------------------| + * | Restricted Rights Legend | + * | Use, duplication, or disclosure by the Government is | + * | subject to restrictions as set forth in | + * | subparagraph (c)(1)(ii) of the Rights in Technical | + * | Data and Computer Software Clause of DFARS 252.227-7013. | + * | MIPS Computer Systems, Inc. | + * | 950 DeGuigne Avenue | + * | Sunnyvale, California 94088-3650, USA | + * |-----------------------------------------------------------| + */ +/* $Header: cabs.s,v 3000.5.1.1 91/05/31 14:41:43 bettina Exp $ */ + +/* CABS(Z) + * RETURN THE ABSOLUTE VALUE OF THE DOUBLE PRECISION COMPLEX NUMBER + * + * double cabs(z) + * struct { double r, i;} z; + * { + * double hypot(); + * return hypot(z.r,z.i); + * } + */ + +#include <kxmips.h> + +.text + +.globl _cabs +.ent _cabs +_cabs: + .frame sp, 0, ra + .prologue 0 + mtc1.d $4, $f12 + mtc1.d $6, $f14 + j _hypot +.end _cabs diff --git a/private/fp32/tran/mips/coshm.s b/private/fp32/tran/mips/coshm.s new file mode 100644 index 000000000..1fa3066e9 --- /dev/null +++ b/private/fp32/tran/mips/coshm.s @@ -0,0 +1,111 @@ +/* + * |-----------------------------------------------------------| + * | Copyright (c) 1991 MIPS Computer Systems, Inc. | + * | All Rights Reserved | + * |-----------------------------------------------------------| + * | Restricted Rights Legend | + * | Use, duplication, or disclosure by the Government is | + * | subject to restrictions as set forth in | + * | subparagraph (c)(1)(ii) of the Rights in Technical | + * | Data and Computer Software Clause of DFARS 52.227-7013. | + * | MIPS Computer Systems, Inc. | + * | 950 DeGuigne Drive | + * | Sunnyvale, CA 94086 | + * |-----------------------------------------------------------| + */ +/* $Header: cosh.s,v 3000.8.1.5 91/08/19 14:55:38 zaineb Exp $ */ +/* Algorithm from Cody and Waite. */ + +#include <kxmips.h> +#include <trans.h> +#include <fpieee.h> + +#ifdef CRTDLL +.extern _HUGE_dll +#define _HUGE _HUGE_dll +#else +.extern _HUGE +#endif + +#define expmax 709.78271289338397 +#define coshmax 710.47586007394386 + +.text + +.globl cosh +.ent cosh +cosh: +#define FSIZE 16 + .frame sp, FSIZE, ra + .mask 0x80000000, -4 + subu sp, FSIZE + sw ra, FSIZE-4(sp) + .prologue 1 + li.d $f10, expmax + abs.d $f12 + c.ole.d $f12, $f10 + bc1f cosh3 + jal exp + li.d $f10, 0.5 + div.d $f2, $f10, $f0 + mul.d $f0, $f10 + add.d $f0, $f2 + j ret1 + +cosh3: + li.d $f6, coshmax + li.d $f8, 0.69316101074218750000 + c.ole.d $f12, $f6 + bc1f error + sub.d $f12, $f8 + jal exp + li.d $f6, 0.13830277879601902638e-4 + mul.d $f2, $f0, $f6 + add.d $f0, $f2 + j ret1 + +error: + // raise Overflow and return +Infinity + jal setup_cosh_err + j ret1 + mfc1 t0, $f13 + sll t0, 1 + srl t0, 20+1 + beq t0, 2047, 1f + li.d $f0, 0.898846567431158e308 + add.d $f0, $f0 + j ret +1: mov.d $f0, $f12 + j ret + +ret1: lw ra, FSIZE-4(sp) +ret: addu sp, FSIZE + j ra +.end cosh +#undef FSIZE + + +.extern _except1 + +.ent setup_cosh_err +setup_cosh_err: +#define FSIZE 48 + .frame sp, FSIZE, ra + .mask 0x80000000, -4 + subu sp, FSIZE + sw ra, FSIZE-4(sp) + .prologue 1 + li $4, (FP_O | FP_P) // exception mask + li $5, OP_COSH // operation code (funtion name index) + mfc1.d $6, $f12 // arg1 + l.d $f0, _HUGE // api help says return HUGE_VAL for overflow + s.d $f0, 16(sp) // default result + cfc1 t7, $31 // floating point control/status register + xor t7, t7, 0xf80 // inverse exception enable bits + sw t7, 24(sp) + jal _except1 + lw ra, FSIZE-4(sp) + addu sp, FSIZE + j ra +#undef FSIZE +.end setup_cosh_err diff --git a/private/fp32/tran/mips/dtoul.s b/private/fp32/tran/mips/dtoul.s new file mode 100644 index 000000000..34249a219 --- /dev/null +++ b/private/fp32/tran/mips/dtoul.s @@ -0,0 +1,52 @@ +/*** +* dtoul.s - double to unsigned long conversion +* +* Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* +*Revision History: +* 3-11-92 GDP written +* +*******************************************************************************/ +#include <kxmips.h> + + +.globl _dtoul + +#define MAX 2.147483647e9 /* 2^31 - 1 */ +#define IMAX 2147483647 +#define UMAX (~0) /* default value for error return */ + +.text + + +.ent _dtoul + +_dtoul: + .frame sp,0,ra + .prologue 0 + + mfc1 t0, $f13 + and t0, (1<<31) + bne t0, 0, 1f + li.d $f4, MAX + c.ule.d $f12, $f4 + bc1f 2f + cvt.w.d $f6, $f12 + mfc1 v0, $f6 + j ra + +2f: sub.d $f12, $f4 + c.ule.d $f12, $f4 + bc1f 1f + cvt.w.d $f6, $f12 + mfc1 v0, $f6 + addu v0, IMAX + j ra + + +1f: li v0, UMAX + j ra + +.end _dtoul diff --git a/private/fp32/tran/mips/expm.s b/private/fp32/tran/mips/expm.s new file mode 100644 index 000000000..36617f19c --- /dev/null +++ b/private/fp32/tran/mips/expm.s @@ -0,0 +1,188 @@ +/* + * |-----------------------------------------------------------| + * | Copyright (c) 1991, 1990 MIPS Computer Systems, Inc. | + * | All Rights Reserved | + * |-----------------------------------------------------------| + * | Restricted Rights Legend | + * | Use, duplication, or disclosure by the Government is | + * | subject to restrictions as set forth in | + * | subparagraph (c)(1)(ii) of the Rights in Technical | + * | Data and Computer Software Clause of DFARS 252.227-7013. | + * | MIPS Computer Systems, Inc. | + * | 950 DeGuigne Avenue | + * | Sunnyvale, California 94088-3650, USA | + * |-----------------------------------------------------------| + */ +/* $Header: exp.s,v 3000.5.1.7 92/01/29 15:51:28 zaineb Exp $ */ + +/* Algorithm from + "Table-driven Implementation of the Exponential Function for + IEEE Floating Point", Peter Tang, Argonne National Laboratory, + December 3, 1987 + as implemented in C by M. Mueller, April 20 1988, Evans & Sutherland. + Coded in MIPS assembler by Earl Killian. + */ + +.globl exp +.extern _exptable + +#include <kxmips.h> +#include <trans.h> +#include <fpieee.h> + +#ifdef CRTDLL +.extern _HUGE_dll +#define _HUGE _HUGE_dll +#else +.extern _HUGE +#endif + +.text + +.ent exp +.extern errno 4 +#define ERANGE 34 +exp: + .frame sp, 0, ra + .prologue 0 + /* argument in f12 */ +.set noreorder + li.d $f10, 709.78271289338397 // expmax + cfc1 t4, $31 // read fp control/status + c.ole.d $f12, $f10 // check if special + li.d $f14, -744.44007192138122 // expmin + bc1f 90f // if NaN, +Infinity, + // or greater than expmax + c.lt.d $f12, $f14 // check for exp(x) = 0 + li t1, -4 + bc1t 80f // if less than expmin + and t1, t4 // rounding mode = nearest +.set reorder + ctc1 t1, $31 // write fp control/status + // argument reduction + li.d $f10, 46.166241308446828 + mul.d $f2, $f12, $f10 + li.d $f14, 2.1660849390173098e-2 + li.d $f10, -2.325192846878874e-12 + cvt.w.d $f2 + mfc1 t0, $f2 + and t1, t0, 31 // region + sra t2, t0, 5 // scale + cvt.d.w $f2 + mul.d $f4, $f2, $f14 + mul.d $f6, $f2, $f10 + sub.d $f4, $f12, $f4 + + add.d $f2, $f4, $f6 + li.d $f10, 1.3888949086377719e-3 + li.d $f14, 8.3333679843421958e-3 + mul.d $f8, $f2, $f10 + add.d $f0, $f8, $f14 + li.d $f10, 4.1666666666226079e-2 + li.d $f14, 1.6666666666526087e-1 + mul.d $f0, $f2 + add.d $f0, $f10 + mul.d $f0, $f2 + add.d $f0, $f14 + li.d $f10, 0.5 + mul.d $f0, $f2 + add.d $f0, $f10 + mul.d $f0, $f2 + mul.d $f0, $f2 + add.d $f0, $f6 + add.d $f0, $f4 + sll t1, 4 + la t1, _exptable(t1) + l.d $f10, 0(t1) + l.d $f14, 8(t1) + add.d $f8, $f10, $f14 + mul.d $f0, $f8 + add.d $f0, $f14 + add.d $f0, $f10 + beq t2, 0, 60f // early out for 0 scale + mfc1 t0, $f1 // get result high word + ctc1 t4, $31 // restore control/status + sll t1, t0, 1 // extract exponent + srl t1, 20+1 // ... + addu t1, t2 // add scale to check for denorm + sll t2, 20 + blez t1, 70f + addu t0, t2 // add scale + mtc1 t0, $f1 // put back in result high word + j ra +60: // scale = 0, just restore control/status and return + ctc1 t4, $31 + j ra +70: // denorm result + addu t2, 64<<20 + addu t0, t2 + mtc1 t0, $f1 + li.d $f2, 5.4210108624275222e-20 + mul.d $f0, $f2 + j ra + +80: // argument < expmin + li.d $f0, 0.0 // should raise underflow + j set_uflow_err + j ra + +90: // raise Overflow and return +Infinity + mfc1 t0, $f13 // extract argument exponent + sll t0, 1 + srl t0, 20+1 + beq t0, 2047, 91f // if NaN or Infinity + li.d $f0, 0.898846567431158e308 + add.d $f0, $f0 // raise Overflow +91: + j set_oflow_err +.end exp + + +.extern _except1 + +.ent set_uflow_err +set_uflow_err: +#define FSIZE 48 + .frame sp, FSIZE, ra + .mask 0x80000000, -4 + subu sp, FSIZE + sw ra, FSIZE-4(sp) + .prologue 1 + li $4, (FP_U | FP_P) // exception mask + li $5, OP_EXP // operation code (funtion name index) + mfc1.d $6, $f12 // arg1 + li.d $f0, 0.0 // api help says return zero for overflow + s.d $f0, 16(sp) // default result + cfc1 t7, $31 // floating point control/status register + xor t7, t7, 0xf80 // inverse exception enable bits + sw t7, 24(sp) + jal _except1 + lw ra, FSIZE-4(sp) + addu sp, FSIZE + j ra +#undef FSIZE +.end set_uflow_err + +.ent set_oflow_err +set_oflow_err: +#define FSIZE 48 + .frame sp, FSIZE, ra + .mask 0x80000000, -4 + subu sp, FSIZE + sw ra, FSIZE-4(sp) + .prologue 1 + li $4, (FP_O | FP_P) // exception mask + li $5, OP_EXP // operation code (funtion name index) + mfc1.d $6, $f12 // arg1 + l.d $f0, _HUGE // api help says return HUGE_VAL for overflow + s.d $f0, 16(sp) // default result + cfc1 t7, $31 // floating point control/status register + xor t7, t7, 0xf80 // inverse exception enable bits + sw t7, 24(sp) + jal _except1 + lw ra, FSIZE-4(sp) + addu sp, FSIZE + j ra +#undef FSIZE +.end set_oflow_err + diff --git a/private/fp32/tran/mips/exptable.s b/private/fp32/tran/mips/exptable.s new file mode 100644 index 000000000..4688e014c --- /dev/null +++ b/private/fp32/tran/mips/exptable.s @@ -0,0 +1,62 @@ +/* + * |-----------------------------------------------------------| + * | Copyright (c) 1991, 1990 MIPS Computer Systems, Inc. | + * | All Rights Reserved | + * |-----------------------------------------------------------| + * | Restricted Rights Legend | + * | Use, duplication, or disclosure by the Government is | + * | subject to restrictions as set forth in | + * | subparagraph (c)(1)(ii) of the Rights in Technical | + * | Data and Computer Software Clause of DFARS 252.227-7013. | + * | MIPS Computer Systems, Inc. | + * | 950 DeGuigne Avenue | + * | Sunnyvale, California 94088-3650, USA | + * |-----------------------------------------------------------| + */ +/* $Header: exptable.s,v 3000.6.1.1 91/05/31 14:42:22 bettina Exp $ */ + +.globl _exptable + +#define _MIPSEL +#ifdef _MIPSEL +# define D(h,l) l,h +#endif +#ifdef _MIPSEB +# define D(h,l) h,l +#endif + +.rdata + .align 3 +_exptable: + .word D(0x3FF00000,0x00000000), D(0x00000000,0x00000000) + .word D(0x3FF059B0,0xD3158540), D(0x3D0A1D73,0xE2A475B4) + .word D(0x3FF0B558,0x6CF98900), D(0x3CEEC531,0x7256E308) + .word D(0x3FF11301,0xD0125B40), D(0x3CF0A4EB,0xBF1AED93) + .word D(0x3FF172B8,0x3C7D5140), D(0x3D0D6E6F,0xBE462876) + .word D(0x3FF1D487,0x3168B980), D(0x3D053C02,0xDC0144C8) + .word D(0x3FF2387A,0x6E756200), D(0x3D0C3360,0xFD6D8E0B) + .word D(0x3FF29E9D,0xF51FDEC0), D(0x3D009612,0xE8AFAD12) + .word D(0x3FF306FE,0x0A31B700), D(0x3CF52DE8,0xD5A46306) + .word D(0x3FF371A7,0x373AA9C0), D(0x3CE54E28,0xAA05E8A9) + .word D(0x3FF3DEA6,0x4C123400), D(0x3D011ADA,0x0911F09F) + .word D(0x3FF44E08,0x60618900), D(0x3D068189,0xB7A04EF8) + .word D(0x3FF4BFDA,0xD5362A00), D(0x3D038EA1,0xCBD7F621) + .word D(0x3FF5342B,0x569D4F80), D(0x3CBDF0A8,0x3C49D86A) + .word D(0x3FF5AB07,0xDD485400), D(0x3D04AC64,0x980A8C8F) + .word D(0x3FF6247E,0xB03A5580), D(0x3CD2C7C3,0xE81BF4B7) + .word D(0x3FF6A09E,0x667F3BC0), D(0x3CE92116,0x5F626CDD) + .word D(0x3FF71F75,0xE8EC5F40), D(0x3D09EE91,0xB8797785) + .word D(0x3FF7A114,0x73EB0180), D(0x3CDB5F54,0x408FDB37) + .word D(0x3FF82589,0x994CCE00), D(0x3CF28ACF,0x88AFAB35) + .word D(0x3FF8ACE5,0x422AA0C0), D(0x3CFB5BA7,0xC55A192D) + .word D(0x3FF93737,0xB0CDC5C0), D(0x3D027A28,0x0E1F92A0) + .word D(0x3FF9C491,0x82A3F080), D(0x3CF01C7C,0x46B071F3) + .word D(0x3FFA5503,0xB23E2540), D(0x3CFC8B42,0x4491CAF8) + .word D(0x3FFAE89F,0x995AD380), D(0x3D06AF43,0x9A68BB99) + .word D(0x3FFB7F76,0xF2FB5E40), D(0x3CDBAA9E,0xC206AD4F) + .word D(0x3FFC199B,0xDD855280), D(0x3CFC2220,0xCB12A092) + .word D(0x3FFCB720,0xDCEF9040), D(0x3D048A81,0xE5E8F4A5) + .word D(0x3FFD5818,0xDCFBA480), D(0x3CDC9768,0x16BAD9B8) + .word D(0x3FFDFC97,0x337B9B40), D(0x3CFEB968,0xCAC39ED3) + .word D(0x3FFEA4AF,0xA2A490C0), D(0x3CF9858F,0x73A18F5E) + .word D(0x3FFF5076,0x5B6E4540), D(0x3C99D3E1,0x2DD8A18B) diff --git a/private/fp32/tran/mips/fabsf.s b/private/fp32/tran/mips/fabsf.s new file mode 100644 index 000000000..7c7a38c92 --- /dev/null +++ b/private/fp32/tran/mips/fabsf.s @@ -0,0 +1,29 @@ +/* + * |-----------------------------------------------------------| + * | Copyright (c) 1991, 1990 MIPS Computer Systems, Inc. | + * | All Rights Reserved | + * |-----------------------------------------------------------| + * | Restricted Rights Legend | + * | Use, duplication, or disclosure by the Government is | + * | subject to restrictions as set forth in | + * | subparagraph (c)(1)(ii) of the Rights in Technical | + * | Data and Computer Software Clause of DFARS 252.227-7013. | + * | MIPS Computer Systems, Inc. | + * | 950 DeGuigne Avenue | + * | Sunnyvale, California 94088-3650, USA | + * |-----------------------------------------------------------| + */ + +/* $Header: fabsf.s,v 3000.5.1.1 91/05/31 14:42:32 bettina Exp $ */ +#include <kxmips.h> + +/* fabsf - single-precision floating absolute value */ + +.globl fabsf +.ent fabsf +fabsf: + .frame sp,0,ra + .prologue 0 + abs.s $f0,$f12 + j ra +.end fabsf diff --git a/private/fp32/tran/mips/fabsm.s b/private/fp32/tran/mips/fabsm.s new file mode 100644 index 000000000..5c8f90cfe --- /dev/null +++ b/private/fp32/tran/mips/fabsm.s @@ -0,0 +1,31 @@ + +/* + * |-----------------------------------------------------------| + * | Copyright (c) 1991, 1990 MIPS Computer Systems, Inc. | + * | All Rights Reserved | + * |-----------------------------------------------------------| + * | Restricted Rights Legend | + * | Use, duplication, or disclosure by the Government is | + * | subject to restrictions as set forth in | + * | subparagraph (c)(1)(ii) of the Rights in Technical | + * | Data and Computer Software Clause of DFARS 252.227-7013. | + * | MIPS Computer Systems, Inc. | + * | 950 DeGuigne Avenue | + * | Sunnyvale, California 94088-3650, USA | + * |-----------------------------------------------------------| + */ + +/* $Header: fabs.s,v 3000.2.1.1 91/05/31 14:42:29 bettina Exp $ */ + +#include <kxmips.h> + +/* fabs - floating absolute value */ + +.globl fabs +.ent fabs +fabs: + .frame sp,0,ra + .prologue 0 + abs.d $f0,$f12 + j ra +.end fabs diff --git a/private/fp32/tran/mips/fasincos.s b/private/fp32/tran/mips/fasincos.s new file mode 100644 index 000000000..1ffb5743a --- /dev/null +++ b/private/fp32/tran/mips/fasincos.s @@ -0,0 +1,150 @@ +/* + * |-----------------------------------------------------------| + * | Copyright (c) 1991, 1990 MIPS Computer Systems, Inc. | + * | All Rights Reserved | + * |-----------------------------------------------------------| + * | Restricted Rights Legend | + * | Use, duplication, or disclosure by the Government is | + * | subject to restrictions as set forth in | + * | subparagraph (c)(1)(ii) of the Rights in Technical | + * | Data and Computer Software Clause of DFARS 252.227-7013. | + * | MIPS Computer Systems, Inc. | + * | 950 DeGuigne Avenue | + * | Sunnyvale, California 94088-3650, USA | + * |-----------------------------------------------------------| + */ +/* $Header: fasincos.s,v 3000.7.1.3 91/08/01 18:34:39 zaineb Exp $ */ +/* Algorithm from Cody and Waite. */ + +#include <kxmips.h> + +#define half 0.5 +#define eps 3.72529029846191406250e-9 +#define one 1.0 +#define p2 -0.504400557e+0 +#define p1 +0.933935835e+0 +#define q1 -0.554846723e+1 +#define q0 +0.560363004e+1 +#define pio2 1.57079632679489661923 +#define pi 3.14159265358979323846 + +#define FSIZE 40 + +.text .text$fasincos +.globl acosf +.ent acosf +acosf: + .frame sp, FSIZE, ra + .mask 0x80000000, -4 + subu sp, FSIZE + sw ra, FSIZE-4(sp) + .prologue 1 + li.s $f8, half + abs.s $f14, $f12 + c.le.s $f14, $f8 + li.s $f10, eps + bc1f acosf2 + c.lt.s $f14, $f10 + mov.s $f0, $f12 + bc1t acosf1 + mul.s $f2, $f12, $f12 + bal fasincos2 +acosf1: + li.s $f8, pio2 + sub.s $f0, $f8, $f0 + b acosfret +acosf2: + bal fasincos1 + bltz t1, acosf3 + neg.s $f0 + b acosfret +acosf3: + li.s $f8, pi + add.s $f0, $f8 +acosfret: + lw ra, FSIZE-4(sp) + addu sp, FSIZE + j ra +.end acosf + +.text .text$fasincos +.globl asinf +.ent asinf +asinf: + .frame sp, FSIZE, ra + .mask 0x80000000, -4 + subu sp, FSIZE + sw ra, FSIZE-4(sp) + .prologue 1 + li.s $f8, half + abs.s $f14, $f12 + c.ole.s $f14, $f8 + li.s $f10, eps + bc1f asinf1 + c.lt.s $f14, $f10 + mul.s $f2, $f12, $f12 + mov.s $f0, $f12 + bc1t asinfret + + bal fasincos2 + b asinfret + +asinf1: + bal fasincos1 + li.s $f8, pio2 + add.s $f0, $f8 + bgez t1, asinfret + +asinf2: + neg.s $f0 + b asinfret +asinfret: + lw ra, FSIZE-4(sp) + addu sp, FSIZE + j ra +.end asinf + + +.text .text$fasincos +.ent fasincos1 +.aent fasincos2 +fasincos1: + .frame sp, 0, ra + .prologue 0 + li.s $f10, one + mfc1 t1, $f12 + c.ole.s $f14, $f10 + sub.s $f0, $f10, $f14 + mul.s $f2, $f0, $f8 + bc1f error + sqrt.s $f0, $f2 + add.s $f0, $f0 + neg.s $f0 + /* fall through */ +fasincos2: + li.s $f8, p2 + li.s $f10, q1 + mul.s $f4, $f2, $f8 + add.s $f6, $f2, $f10 + li.s $f8, p1 + mul.s $f6, $f2 + add.s $f4, $f8 + li.s $f10, q0 + mul.s $f4, $f2 + add.s $f6, $f10 + + div.s $f4, $f6 + mul.s $f4, $f0 + add.s $f0, $f4 + j ra + +error: // |x| > 1 + c.un.s $f12, $f12 // if x = NaN, return x + li.s $f0, 0.0 // else generate a NaN + bc1t 1f + div.s $f0, $f0 + j ra +1: + mov.s $f0, $f12 + j ra +.end fasincos1 diff --git a/private/fp32/tran/mips/fatan.s b/private/fp32/tran/mips/fatan.s new file mode 100644 index 000000000..15521a970 --- /dev/null +++ b/private/fp32/tran/mips/fatan.s @@ -0,0 +1,128 @@ +/* + * |-----------------------------------------------------------| + * | Copyright (c) 1991, 1990 MIPS Computer Systems, Inc. | + * | All Rights Reserved | + * |-----------------------------------------------------------| + * | Restricted Rights Legend | + * | Use, duplication, or disclosure by the Government is | + * | subject to restrictions as set forth in | + * | subparagraph (c)(1)(ii) of the Rights in Technical | + * | Data and Computer Software Clause of DFARS 252.227-7013. | + * | MIPS Computer Systems, Inc. | + * | 950 DeGuigne Avenue | + * | Sunnyvale, California 94088-3650, USA | + * |-----------------------------------------------------------| + */ +/* $Header: fatan.s,v 3000.5.1.2 91/05/31 14:42:44 bettina Exp $ */ +/* Algorithm from Cody and Waite. */ + +#include <kxmips.h> + + +#define mpio2 -1.57079632679489661923 +#define pio6 0.52359877559829887308 +#define p1 -0.5090958253e-1 +#define p0 -0.4708325141e+0 +#define q0 0.1412500740e+1 +#define one 1.0 +#define twomr3 0.26794919243112270647 +#define sqrt3 1.73205080756887729353 +#define sqrt3m1 0.73205080756887729353 +#define pi 3.14159265358979323846 + + +.text .text$fatan +.globl atan2f +.ent atan2f +atan2f: + .frame sp, 0, t3 + .prologue 0 + cvt.d.s $f12 + cvt.d.s $f14 + abs.d $f0, $f12 + abs.d $f2, $f14 + c.le.d $f0, $f2 + mfc1 t0, $f13 # save signs of both operands + mfc1 t1, $f15 # ... + move t3, ra + bc1t fatan21 + div.d $f0, $f2, $f0 + li.d $f2, mpio2 + b fatan22 +fatan21: + beq t1, 0, fatan2z + div.d $f0, $f2 + li.d $f2, 0.0 +fatan22: + li.d $f10, twomr3 + bal atan1 + bge t1, 0, fatan23 + li.s $f2, pi + sub.s $f0, $f2, $f0 +fatan23: + bge t0, 0, fatan24 + neg.s $f0 + j t3 +fatan24: + j t3 + +fatan2z: + /* break 0 */ + j ra +.end atan2f + + +.text .text$fatan +.globl atanf +.ent atanf +.aent fatan +atanf: +fatan: + .frame sp, 0, t3 + .prologue 0 + mfc1 t0, $f12 + move t3, ra + abs.s $f0, $f12 + bge t0, 0, atan0 + bal atan0 + neg.s $f0 + j t3 + +atan0: li.d $f14, one + li.d $f10, twomr3 + cvt.d.s $f0 + c.le.d $f0, $f14 + li.d $f2, 0.0 + bc1t atan1 + div.d $f0, $f14, $f0 + li.d $f2, mpio2 +atan1: c.le.d $f0, $f10 + li.d $f14, sqrt3m1 + bc1t atan2 + li.d $f10, sqrt3 + mul.d $f6, $f0, $f14 + add.d $f4, $f0, $f10 + li.d $f14, one + sub.d $f6, $f14 + add.d $f0, $f6 + li.d $f14, pio6 + div.d $f0, $f4 + add.d $f2, $f14 +atan2: mul.d $f6, $f0, $f0 + li.d $f14, p1 + li.d $f10, p0 + mul.d $f4, $f6, $f14 + add.d $f4, $f10 + li.d $f14, q0 + mul.d $f12, $f4, $f6 + add.d $f4, $f6, $f14 + div.d $f4, $f12, $f4 + mul.d $f4, $f0 + add.d $f0, $f4 + mfc1 t4, $f3 + add.d $f0, $f2 + cvt.s.d $f0 + bge t4, 0, atan4 + neg.s $f0 +atan4: j ra +.end atanf diff --git a/private/fp32/tran/mips/fcosh.s b/private/fp32/tran/mips/fcosh.s new file mode 100644 index 000000000..dce85e721 --- /dev/null +++ b/private/fp32/tran/mips/fcosh.s @@ -0,0 +1,76 @@ +/* + * |-----------------------------------------------------------| + * | Copyright (c) 1991, 1990 MIPS Computer Systems, Inc. | + * | All Rights Reserved | + * |-----------------------------------------------------------| + * | Restricted Rights Legend | + * | Use, duplication, or disclosure by the Government is | + * | subject to restrictions as set forth in | + * | subparagraph (c)(1)(ii) of the Rights in Technical | + * | Data and Computer Software Clause of DFARS 252.227-7013. | + * | MIPS Computer Systems, Inc. | + * | 950 DeGuigne Avenue | + * | Sunnyvale, California 94088-3650, USA | + * |-----------------------------------------------------------| + */ +/* $Header: fcosh.s,v 3000.5.1.2 91/05/31 14:42:52 bettina Exp $ */ +/* Algorithm from Cody and Waite. */ + +#include <kxmips.h> + +#define expmax 88.7228317 +#define coshmax 89.4159851 + +.text + +.globl fcosh +.globl coshf +.ent fcosh +.aent coshf +fcosh: +coshf: + .frame sp, 16, ra + subu sp, 16 + sw ra, 16(sp) + .prologue 1 + li.s $f10, 88.7228317 + abs.s $f12 + c.ole.s $f12, $f10 + bc1f fcosh3 + jal fexp + li.s $f10, 0.5 + div.s $f2, $f10, $f0 + mul.s $f0, $f10 + add.s $f0, $f2 + j ret1 + +fcosh3: + li.s $f6, coshmax + li.s $f8, 0.69316101074218750000 + c.ole.s $f12, $f6 + bc1f error + sub.s $f12, $f8 + jal fexp + li.s $f6, 0.13830277879601902638e-4 + mul.s $f2, $f0, $f6 + add.s $f0, $f2 + j ret1 + +error: + // raise Overflow and return +Infinity + mfc1 t0, $f12 + sll t0, 1 + srl t0, 23+1 + beq t0, 255, 1f + li.s $f0, 2e38 + add.s $f0, $f0 + j ret +1: mov.s $f0, $f12 + j ret + +ret1: + lw ra, 16(sp) +ret: + addu sp, 16 + j ra +.end fcosh diff --git a/private/fp32/tran/mips/fexp.s b/private/fp32/tran/mips/fexp.s new file mode 100644 index 000000000..844cb991b --- /dev/null +++ b/private/fp32/tran/mips/fexp.s @@ -0,0 +1,85 @@ +/* + * |-----------------------------------------------------------| + * | Copyright (c) 1991, 1990 MIPS Computer Systems, Inc. | + * | All Rights Reserved | + * |-----------------------------------------------------------| + * | Restricted Rights Legend | + * | Use, duplication, or disclosure by the Government is | + * | subject to restrictions as set forth in | + * | subparagraph (c)(1)(ii) of the Rights in Technical | + * | Data and Computer Software Clause of DFARS 252.227-7013. | + * | MIPS Computer Systems, Inc. | + * | 950 DeGuigne Avenue | + * | Sunnyvale, California 94088-3650, USA | + * |-----------------------------------------------------------| + */ +/* $Header: fexp.s,v 3000.4.1.2 91/05/31 14:42:59 bettina Exp $ */ +/* Algorithm from Cody and Waite. */ + +#include <kxmips.h> + + +.text + +.globl fexp +.globl expf +.ent fexp +.aent expf +fexp: +expf: + .frame sp, 0, ra + .prologue 0 + li.s $f6, 88.7228317 + li.s $f8, -103.2789299019278 + c.ole.s $f12, $f6 + li.s $f6, 1.4426950408889634074 + bc1f fexpovfl + c.lt.s $f12, $f8 + li.d $f8, 0.6931471805599453094172321 + bc1t fexpunfl + mul.s $f2, $f12, $f6 + cvt.w.s $f4, $f2 + cvt.d.w $f2, $f4 + mfc1 t0, $f4 + // check for t0 = 0? + mul.d $f2, $f8 + cvt.d.s $f12 + sub.d $f12, $f2 + + mul.d $f4, $f12, $f12 + li.d $f6, 0.41602886268e-2 + li.d $f8, 0.49987178778e-1 + mul.d $f0, $f4, $f6 + li.d $f6, 0.24999999950e+0 + mul.d $f2, $f4, $f8 + add.d $f0, $f6 + li.d $f8, 0.5 + mul.d $f0, $f12 + add.d $f2, $f8 + sub.d $f2, $f0 + /*li.d $f8, 0.5*/ + div.d $f0, $f2 + add.d $f0, $f8 + mfc1 t1, $f1 + addu t0, 1 + sll t0, 20 + addu t1, t0 + mtc1 t1, $f1 + cvt.s.d $f0 + j ra + +fexpovfl: + // raise Overflow and return +Infinity + mfc1 t0, $f12 + sll t0, 1 + srl t0, 23+1 + beq t0, 255, 1f + li.s $f0, 2e38 + add.s $f0, $f0 + j ra +1: mov.s $f0, $f12 + j ra +fexpunfl: + li.s $f0, 0.0 + j ra +.end fexp diff --git a/private/fp32/tran/mips/ffloor.s b/private/fp32/tran/mips/ffloor.s new file mode 100644 index 000000000..3f7747ac9 --- /dev/null +++ b/private/fp32/tran/mips/ffloor.s @@ -0,0 +1,85 @@ +/* + * |-----------------------------------------------------------| + * | Copyright (c) 1991, 1990 MIPS Computer Systems, Inc. | + * | All Rights Reserved | + * |-----------------------------------------------------------| + * | Restricted Rights Legend | + * | Use, duplication, or disclosure by the Government is | + * | subject to restrictions as set forth in | + * | subparagraph (c)(1)(ii) of the Rights in Technical | + * | Data and Computer Software Clause of DFARS 252.227-7013. | + * | MIPS Computer Systems, Inc. | + * | 950 DeGuigne Avenue | + * | Sunnyvale, California 94088-3650, USA | + * |-----------------------------------------------------------| + */ +/* $Header: ffloor.s,v 3000.5.1.2 91/05/31 14:43:08 bettina Exp $ */ + +#include <kxmips.h> + +.text .text$ffloor +.globl ftrunc +.globl truncf +.ent ftrunc +.aent truncf +ftrunc: +truncf: + .frame sp, 0, ra + .prologue 0 + mfc1 t0, $f12 + srl t2, t0, 23 + and t2, 0xFF + sub t2, 127 + bge t2, 0, ftrunc1 + mtc1 $0, $f0 + j ra +ftrunc1: + sub t2, 23 + bge t2, 0, ftrunc2 + neg t2 + srl t0, t2 + sll t0, t2 +ftrunc2: + mtc1 t0, $f0 + j ra +.end ftrunc + +.text .text$ffloor +.globl ffloor +.globl floorf +.ent ffloor +.aent floorf +ffloor: +floorf: + .frame sp, 0, t3 + .prologue 0 + move t3, ra + bal ftrunc + sub.s $f2, $f12, $f0 + mfc1 t0, $f2 + li.s $f2, 1.0 + sll t1, t0, 1 + bge t0, 0, 1f + beq t1, 0, 1f + sub.s $f0, $f2 +1: j t3 +.end ffloor + +.text .text$ffloor +.globl fceil +.globl ceilf +.ent fceil +.aent ceilf +fceil: +ceilf: + .frame sp, 0, t3 + .prologue 0 + move t3, ra + bal ftrunc + sub.s $f2, $f12, $f0 + mfc1 t0, $f2 + li.s $f2, 1.0 + ble t0, 0, 1f + add.s $f0, $f2 +1: j t3 +.end fceil diff --git a/private/fp32/tran/mips/fhypot.s b/private/fp32/tran/mips/fhypot.s new file mode 100644 index 000000000..369150370 --- /dev/null +++ b/private/fp32/tran/mips/fhypot.s @@ -0,0 +1,56 @@ +/* + * |-----------------------------------------------------------| + * | Copyright (c) 1991, 1990 MIPS Computer Systems, Inc. | + * | All Rights Reserved | + * |-----------------------------------------------------------| + * | Restricted Rights Legend | + * | Use, duplication, or disclosure by the Government is | + * | subject to restrictions as set forth in | + * | subparagraph (c)(1)(ii) of the Rights in Technical | + * | Data and Computer Software Clause of DFARS 252.227-7013. | + * | MIPS Computer Systems, Inc. | + * | 950 DeGuigne Avenue | + * | Sunnyvale, California 94088-3650, USA | + * |-----------------------------------------------------------| + */ +/* $Header: fhypot.s,v 3000.6.1.2 91/05/31 14:43:11 bettina Exp $ */ + +#include <kxmips.h> + +#define half 0.5 + +/* This entrypoint provided for FCOM for CABS (complex absolute value), + because FCOM has a hard time calling FHYPOT directly. Also used by + FCOM when user writes INTRINSIC CABS. The latter must use pass by + reference, of course. */ +.text .text$fhypot +.globl c_abs_ +.ent c_abs_ +c_abs_: + .frame sp, 0, ra + .prologue 0 + l.s $f12, 0(a0) + l.s $f14, 4(a0) + b hypotf + /* just fall through */ +.end c_abs_ + +.text .text$fhypot +.globl fhypot +.globl hypotf +.ent fhypot +.aent hypotf +fhypot: +hypotf: + .frame sp, 0, ra + .prologue 0 + cvt.d.s $f12 + mul.d $f12, $f12 + cvt.d.s $f14 + mul.d $f14, $f14 + add.d $f12, $f14 + sqrt.d $f0, $f12 + cvt.s.d $f0 + j ra + +.end fhypot diff --git a/private/fp32/tran/mips/filter.c b/private/fp32/tran/mips/filter.c new file mode 100644 index 000000000..c582beb75 --- /dev/null +++ b/private/fp32/tran/mips/filter.c @@ -0,0 +1,623 @@ +/*** +* filter.c - IEEE filter routine +* +* Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* Provide a user interface for IEEE fp exception handling +* +*Revision History: +* 3-10-92 GDP written +* +*******************************************************************************/ + +#include <trans.h> +#include <fpieee.h> +#include <excpt.h> +#include <nt.h> + + +#define FPREG 32 /* fp reg's have numbers from 32 to 64 */ +#define SUBCODE_CT 6 /* subcode for the CTC1 instruction */ + + +// +// Define floating status register bit masks. +// + +#define _FSR_ROUND 0x3 +#define _FSR_RN 0x0 +#define _FSR_RZ 0x1 +#define _FSR_RP 0x2 +#define _FSR_RM 0x3 + +#define _FSR_SI (1<<0x2) +#define _FSR_SU (1<<0x3) +#define _FSR_SO (1<<0x4) +#define _FSR_SZ (1<<0x5) +#define _FSR_SV (1<<0x6) +#define _FSR_EI (1<<0x7) +#define _FSR_EU (1<<0x8) +#define _FSR_EO (1<<0x9) +#define _FSR_EZ (1<<0xa) +#define _FSR_EV (1<<0xb) +#define _FSR_XI (1<<0xc) +#define _FSR_XU (1<<0xd) +#define _FSR_XO (1<<0xe) +#define _FSR_XZ (1<<0xf) +#define _FSR_XV (1<<0x10) +#define _FSR_XE (1<<0x11) +#define _FSR_CC (1<<0x17) +#define _FSR_FS (1<<0x18) + +#define _FSR_X (_FSR_XI|_FSR_XU|_FSR_XO|_FSR_XZ|_FSR_XV|_FSR_XE) +#define _FSR_E (_FSR_EI|_FSR_EU|_FSR_EO|_FSR_EZ|_FSR_EV) + + + +ULONG _get_destreg( + unsigned long code, PEXCEPTION_POINTERS p + ); + + +_FPIEEE_FORMAT _FindDestFormat(MIPS_INSTRUCTION *inst); + + +/*** +* _fpieee_flt - IEEE fp filter routine +* +*Purpose: +* Invokes the user's trap handler on IEEE fp exceptions and provides +* it with all necessary information +* +*Entry: +* unsigned long exc_code: the NT exception code +* PEXCEPTION_POINTERS p: a pointer to the NT EXCEPTION_POINTERS struct +* int handler (_FPIEEE_RECORD *): a user supplied ieee trap handler +* +*Exit: +* returns the value returned by handler +* +*Exceptions: +* +*******************************************************************************/ +int _fpieee_flt(unsigned long exc_code, + PEXCEPTION_POINTERS p, + int handler (_FPIEEE_RECORD *)) +{ + PEXCEPTION_RECORD pexc; + PCONTEXT pctxt; + _FPIEEE_RECORD ieee; + ULONG *pinfo; + MIPS_INSTRUCTION *instruction; + int format,fs,ft,fd,function; + int fsr,i,ret; + + + /* + * If the exception is not an IEEE exception, continue search + * for another handler + */ + + + if (exc_code != STATUS_FLOAT_DIVIDE_BY_ZERO && + exc_code != STATUS_FLOAT_INEXACT_RESULT && + exc_code != STATUS_FLOAT_INVALID_OPERATION && + exc_code != STATUS_FLOAT_OVERFLOW && + exc_code != STATUS_FLOAT_UNDERFLOW) { + + return EXCEPTION_CONTINUE_SEARCH; + } + + + + pexc = p->ExceptionRecord; + pinfo = pexc->ExceptionInformation; + pctxt = p->ContextRecord; + + // mask all exceptions + + _set_fsr(_get_fsr() & ~_FSR_E); + + /* + * Check for software generated exception + * By convention ExceptionInformation[0] is 0 for h/w exceptions, + * or contains a pointer to an _FPIEEE_RECORD for s/w exceptions + */ + + if (pexc->ExceptionInformation[0]) { + + /* + * we have a software exception: + * the first parameter points to the IEEE structure + */ + + return handler((_FPIEEE_RECORD *)(pinfo[0])); + + } + + + /* + * If control reaches here, then we have to deal with a + * hardware exception + * + * MIPS FP coprocessor has 4 types of instruction formats: + * + * Instr. IEEE Exception + * Inv Pr Ov Un Zr + * I-type: + * LWC1 + * LDC1 + * SWC1 + * SDC1 + * B-type: + * BC1T + * BC1F + * BC1TL + * BC1FL + * M-type: + * MTC1 + * MFC1 + * CTC1 I P O U Z + * CFC1 + * R-type: + * ADD I P O U + * SUB I P O U + * MUL I P O U + * DIV I P O U Z + * SQRT I P + * ABS I + * MOV + * NEG I + * CVT I P O U + * C I + * ROUND I P O + * TRUNC I P O + * CEIL I P O + * FLOOR I P O + * + * + * R-type instruction format: + * + * 31 0 + * ------------------------------------------------ + * | op | sub | ft | fs | fd |function | + * ------------------------------------------------ + * 6 5 5 5 5 6 + * + * field fd specifies the destination register + * + * + * CTC1 will not be handled by the IEEE filter routine. This is + * because the CTC1 instruction does not correspond to a numerical + * operation and it also may also generate multiple exceptions + * + */ + + + /* get the instruction that faulted */ + + instruction = (MIPS_INSTRUCTION *)(pexc->ExceptionAddress); + + /* check for CTC1 instruction */ + + if (instruction->r_format.Rs == SUBCODE_CT) { + return EXCEPTION_CONTINUE_SEARCH; + } + + + /* + * Set floating point operation code + */ + + switch (function = instruction->c_format.Function) { + case FLOAT_ADD: + ieee.Operation = _FpCodeAdd; + break; + case FLOAT_SUBTRACT: + ieee.Operation = _FpCodeSubtract; + break; + case FLOAT_MULTIPLY: + ieee.Operation = _FpCodeMultiply; + break; + case FLOAT_DIVIDE: + ieee.Operation = _FpCodeDivide; + break; + case FLOAT_SQUARE_ROOT: + ieee.Operation = _FpCodeSquareRoot; + break; + case FLOAT_ABSOLUTE: + ieee.Operation = _FpCodeFabs; + break; + case FLOAT_NEGATE: + ieee.Operation = _FpCodeNegate; + break; + case FLOAT_ROUND_LONGWORD: + ieee.Operation = _FpCodeRound; + break; + case FLOAT_TRUNC_LONGWORD: + ieee.Operation = _FpCodeTruncate; + break; + case FLOAT_CEIL_LONGWORD: + ieee.Operation = _FpCodeCeil; + break; + case FLOAT_FLOOR_LONGWORD: + ieee.Operation = _FpCodeFloor; + break; + case FLOAT_CONVERT_SINGLE: + case FLOAT_CONVERT_DOUBLE: + case FLOAT_CONVERT_LONGWORD: + ieee.Operation = _FpCodeConvert; + break; + default: + + /* + * there are 16 different function codes for + * floating point comparisons + */ + + if (function >= FLOAT_COMPARE && + function <= FLOAT_COMPARE + 15) { + ieee.Operation = _FpCodeCompare; + } + else { + ieee.Operation = _FpCodeUnspecified; + } + break; + } + + + switch ( instruction->c_format.Format ) { + case FORMAT_SINGLE: + format = _FpFormatFp32; + break; + case FORMAT_DOUBLE: + format = _FpFormatFp64; + break; + case FORMAT_WORD: + format = _FpFormatI32; + break; + } + + fs = instruction->c_format.Fs + FPREG; + ft = instruction->c_format.Ft + FPREG; + fd = instruction->c_format.Fd + FPREG; + + ieee.Operand1.OperandValid = 1; + ieee.Operand1.Format = format; + *(ULONG *)&ieee.Operand1.Value = _GetRegisterValue(fs, pctxt); + if (instruction->c_format.Format == FORMAT_DOUBLE) { + *(1+(ULONG *)&ieee.Operand1.Value) = _GetRegisterValue(fs+1, pctxt); + } + + /* + * add, subtract, mul, div, and compare instructions + * take two operands. The first four of these instructions + * have consecutive function codes + */ + + if (function >= FLOAT_ADD && function <= FLOAT_DIVIDE || + function >= FLOAT_COMPARE && function <= FLOAT_COMPARE + 15) { + + ieee.Operand2.OperandValid = 1; + ieee.Operand2.Format = format; + *(ULONG *)&ieee.Operand2.Value = _GetRegisterValue(ft, pctxt); + if (instruction->c_format.Format == FORMAT_DOUBLE) { + *(1+(ULONG *)&ieee.Operand2.Value) = _GetRegisterValue(ft+1, pctxt); + } + } + else { + + ieee.Operand2.OperandValid = 0; + } + + + + /* + * NT provides the IEEE result in the exception record + * in the following form: + * + * pinfo[0] NULL + * pinfo[1] continuation address + * pinfo[2] \ + * ... > IEEE result (_FPIEEE_VALUE) + * pinfo[6] / + */ + + for (i=0;i<5;i++) { + ieee.Result.Value.U32ArrayValue.W[i] = pinfo[i+2]; + } + + /* + * Until NT provides a fully qualified type in the exception + * record, fill in the OperandValid and Format fields + * manualy + */ + + ieee.Result.OperandValid = 1; + ieee.Result.Format = _FindDestFormat(instruction); + + + fsr = pctxt->Fsr; + + switch (fsr & _FSR_ROUND) { + case _FSR_RN: + ieee.RoundingMode = _FpRoundNearest; + break; + case _FSR_RZ: + ieee.RoundingMode = _FpRoundChopped; + break; + case _FSR_RP: + ieee.RoundingMode = _FpRoundPlusInfinity; + break; + case _FSR_RM: + ieee.RoundingMode = _FpRoundMinusInfinity; + break; + } + + ieee.Precision = _FpPrecisionFull; + + + ieee.Status.Inexact = fsr & _FSR_SI ? 1 : 0; + ieee.Status.Underflow = fsr & _FSR_SU ? 1 : 0; + ieee.Status.Overflow = fsr & _FSR_SO ? 1 : 0; + ieee.Status.ZeroDivide = fsr & _FSR_SZ ? 1 : 0; + ieee.Status.InvalidOperation = fsr & _FSR_SV ? 1 : 0; + + ieee.Enable.Inexact = fsr & _FSR_EI ? 1 : 0; + ieee.Enable.Underflow = fsr & _FSR_EU ? 1 : 0; + ieee.Enable.Overflow = fsr & _FSR_EO ? 1 : 0; + ieee.Enable.ZeroDivide = fsr & _FSR_EZ ? 1 : 0; + ieee.Enable.InvalidOperation = fsr & _FSR_EV ? 1 : 0; + + ieee.Cause.Inexact = fsr & _FSR_XI ? 1 : 0; + ieee.Cause.Underflow = fsr & _FSR_XU ? 1 : 0; + ieee.Cause.Overflow = fsr & _FSR_XO ? 1 : 0; + ieee.Cause.ZeroDivide = fsr & _FSR_XZ ? 1 : 0; + ieee.Cause.InvalidOperation = fsr & _FSR_XV ? 1 : 0; + + + + /* + * invoke user's handler + */ + + ret = handler(&ieee); + + if (ret == EXCEPTION_CONTINUE_EXECUTION) { + + // + // set the correct continuation address + // (this covers the case of an exception that occured in + // a delay slot), NT passes the cont. address in pinfo[1] + // + + pctxt->Fir = pinfo[1]; + + // + // Sanitize fsr + // + + pctxt->Fsr &= ~_FSR_X; + + // + // Especially for the fp compare instruction + // the result the user's handler has entered + // should be converted into the proper exc_code + // + + if (function >= FLOAT_COMPARE && + function <= FLOAT_COMPARE + 15) { + + // + // Fp comare instruction format: + // + // 31 0 + // ------------------------------------------------- + // | COP1 | fmt | ft | fs | 0 |FC | cond | + // ------------------------------------------------- + // 6 5 5 5 5 2 4 + // + // 'cond' field interpretation: + // bit corresponds to predicate + // cond2 less + // cond1 equal + // cond0 unordered + // + + ULONG condmask, condition; + + switch (ieee.Result.Value.CompareValue) { + case FpCompareEqual: + + // + //less = 0 + //equal = 1 + //unordered = 0 + // + + condmask = 2; + break; + + case FpCompareGreater: + + // + //less = 0 + //equal = 0 + //unordered = 0 + // + + condmask = 0; + break; + + case FpCompareLess: + + // + //less = 1 + //equal = 0 + //unordered = 0 + // + + condmask = 4; + break; + + case FpCompareUnordered: + + // + //less = 0; + //equal = 0; + //unordered = 1; + // + + condmask = 1; + break; + } + + if (*(ULONG *)instruction & condmask) { + + /* + * condition is true + */ + + pctxt->Fsr |= _FSR_CC; + } + + else { + + /* + * condition is false + */ + + pctxt->Fsr &= ~_FSR_CC; + } + + } + + else { + + // + // copy user's result to hardware destination register + // + + _SetRegisterValue(fd,ieee.Result.Value.U32ArrayValue.W[0],pctxt); + + if (instruction->c_format.Format == FORMAT_DOUBLE) { + _SetRegisterValue(fd+1,ieee.Result.Value.U32ArrayValue.W[1],pctxt); + } + } + + // + // make changes in the floating point environment + // take effect on continuation + // + + switch (ieee.RoundingMode) { + case _FpRoundNearest: + pctxt->Fsr = pctxt->Fsr & ~_FSR_ROUND | _FSR_RN & _FSR_ROUND; + break; + case _FpRoundChopped: + pctxt->Fsr = pctxt->Fsr & ~_FSR_ROUND | _FSR_RZ & _FSR_ROUND; + break; + case _FpRoundPlusInfinity: + pctxt->Fsr = pctxt->Fsr & ~_FSR_ROUND | _FSR_RP & _FSR_ROUND; + break; + case _FpRoundMinusInfinity: + pctxt->Fsr = pctxt->Fsr & ~_FSR_ROUND | _FSR_RM & _FSR_ROUND; + break; + } + + // + // the user is allowed to change the exception mask + // ignore changes in the precision field (not supported by MIPS) + // + + if (ieee.Enable.Inexact) + pctxt->Fsr |= _FSR_EI; + if (ieee.Enable.Underflow) + pctxt->Fsr |= _FSR_EU; + if (ieee.Enable.Overflow) + pctxt->Fsr |= _FSR_EO; + if (ieee.Enable.ZeroDivide) + pctxt->Fsr |= _FSR_EZ; + if (ieee.Enable.InvalidOperation) + pctxt->Fsr |= _FSR_EV; + + } + + + return ret; +} + + + +/*** +* _FindDestFormat - Find format of destination +* +*Purpose: +* return the format of the destination of a mips fp instruction +* assumes an R-type instruction that may generate IEEE ecxeptions +* (see table above) +* +*Entry: +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +_FPIEEE_FORMAT _FindDestFormat(MIPS_INSTRUCTION *inst) +{ + _FPIEEE_FORMAT format; + + switch (inst->c_format.Function) { + case FLOAT_ADD: + case FLOAT_SUBTRACT: + case FLOAT_MULTIPLY: + case FLOAT_DIVIDE: + case FLOAT_SQUARE_ROOT: + case FLOAT_ABSOLUTE: + case FLOAT_NEGATE: + + switch ( inst->c_format.Format ) { + case FORMAT_SINGLE: + format = _FpFormatFp32; + break; + case FORMAT_DOUBLE: + format = _FpFormatFp64; + break; + case FORMAT_WORD: + format = _FpFormatI32; + break; + } + break; + + case FLOAT_CONVERT_SINGLE: + format = _FpFormatFp32; + break; + case FLOAT_CONVERT_DOUBLE: + format = _FpFormatFp64; + break; + case FLOAT_CONVERT_LONGWORD: + format = _FpFormatI32; + break; + + case FLOAT_ROUND_LONGWORD: + case FLOAT_TRUNC_LONGWORD: + case FLOAT_CEIL_LONGWORD: + case FLOAT_FLOOR_LONGWORD: + format = _FpFormatI32; + break; + + default: + + /* + * there are 16 different function codes for + * floating point comparisons + */ + format = _FpFormatCompare; + break; + } + + return format; +} diff --git a/private/fp32/tran/mips/flog.s b/private/fp32/tran/mips/flog.s new file mode 100644 index 000000000..918187c7b --- /dev/null +++ b/private/fp32/tran/mips/flog.s @@ -0,0 +1,104 @@ +/* + * |-----------------------------------------------------------| + * | Copyright (c) 1991, 1990 MIPS Computer Systems, Inc. | + * | All Rights Reserved | + * |-----------------------------------------------------------| + * | Restricted Rights Legend | + * | Use, duplication, or disclosure by the Government is | + * | subject to restrictions as set forth in | + * | subparagraph (c)(1)(ii) of the Rights in Technical | + * | Data and Computer Software Clause of DFARS 252.227-7013. | + * | MIPS Computer Systems, Inc. | + * | 950 DeGuigne Avenue | + * | Sunnyvale, California 94088-3650, USA | + * |-----------------------------------------------------------| + */ +/* $Header: flog.s,v 3000.5.1.2 91/05/31 14:43:19 bettina Exp $ */ +/* Algorithm from Cody and Waite. */ + +#include <kxmips.h> + +#define p0 -0.5527074855e+0 +#define q0 -0.6632718214e+1 +#define ln2 0.69314718055994530941 +#define one 1.0 +#define two 2.0 +#define loge 0.43429448190325182765 + +.text .text$flog +.globl flog +.globl logf +.ent flog +.aent logf +flog: +logf: + .frame sp, 0, ra + .prologue 0 + mfc1 t0, $f12 + srl t1, t0, 23 + ble t0, 0, flogerr + beq t1, 255, flognan + subu t1, 126 + sll t2, t1, 23 + subu t0, t2 + mtc1 t0, $f12 + li.s $f6, 0.70710678118654752440 + li.d $f8, one + c.lt.s $f6, $f12 + li.d $f6, two + bc1t flog1 + addu t0, (1<<23) + mtc1 t0, $f12 + subu t1, 1 +flog1: cvt.d.s $f12 + sub.d $f4, $f12, $f8 + mul.d $f4, $f6 + add.d $f0, $f12, $f8 + div.d $f4, $f0 + mul.d $f0, $f4, $f4 + li.d $f6, p0 + li.d $f8, q0 + mul.d $f2, $f0, $f6 + add.d $f0, $f0, $f8 + mtc1 t1, $f8 + div.d $f2, $f0 + mul.d $f2, $f4 + add.d $f2, $f4 + beq t1, 0, flog2 + li.d $f6, ln2 + cvt.d.w $f8 + mul.d $f8, $f6 + add.d $f2, $f8 +flog2: cvt.s.d $f0, $f2 + j ra +flogerr: + li.s $f2, 0.0 + sll t1, t0, 1 + beq t1, 0, flog0 + div.s $f0, $f2, $f2 + j ra +flog0: + li.s $f0, -1.0 + div.s $f0, $f2 + j ra +flognan: + mov.s $f0, $f12 + j ra +.end flog + +.text .text$flog +.globl flog10 +.globl log10f +.ent flog10 +.aent log10f +flog10: +log10f: + .frame sp, 0, t3 + .prologue 0 + move t3, ra + bal logf + li.d $f6, loge + mul.d $f2, $f6 + cvt.s.d $f0, $f2 + j t3 +.end flog10 diff --git a/private/fp32/tran/mips/floorm.s b/private/fp32/tran/mips/floorm.s new file mode 100644 index 000000000..54f24c448 --- /dev/null +++ b/private/fp32/tran/mips/floorm.s @@ -0,0 +1,122 @@ +/* + * |-----------------------------------------------------------| + * | Copyright (c) 1991, 1990 MIPS Computer Systems, Inc. | + * | All Rights Reserved | + * |-----------------------------------------------------------| + * | Restricted Rights Legend | + * | Use, duplication, or disclosure by the Government is | + * | subject to restrictions as set forth in | + * | subparagraph (c)(1)(ii) of the Rights in Technical | + * | Data and Computer Software Clause of DFARS 252.227-7013. | + * | MIPS Computer Systems, Inc. | + * | 950 DeGuigne Avenue | + * | Sunnyvale, California 94088-3650, USA | + * |-----------------------------------------------------------| + */ +/* $Header: floor.s,v 3000.5.1.3 91/05/31 14:43:29 bettina Exp $ */ + +#include <kxmips.h> + +#define one 1.0 + + +/* + An alternate algorithm would to check for numbers < 2**53, + set the rounding mode, add 2**53, and subtract 2**53. + */ + +.text .text$floorm +.globl trunc +.ent trunc +trunc: + .frame sp, 0, ra + .prologue 0 + mfc1 t1, $f13 + mfc1 t0, $f12 + srl t2, t1, 20 + and t2, 0x7FF + sub t2, 1023 + bge t2, 0, trunc1 + mtc1 $0, $f0 + mtc1 $0, $f1 + j ra +trunc1: + sub t2, 20 + bgt t2, 0, trunc2 + neg t2 + srl t1, t2 + sll t1, t2 + mtc1 $0, $f0 + mtc1 t1, $f1 + j ra +trunc2: + sub t2, 32 + bge t2, 0, trunc3 + neg t2 + srl t0, t2 + sll t0, t2 +trunc3: + mtc1 t0, $f0 + mtc1 t1, $f1 + j ra +.end trunc + + +#undef FSIZE +#define FSIZE 16 +.text .text$floorm +.globl floor +.ent floor +floor: + .frame sp, FSIZE, ra + .mask 0x80000000, -4 + subu sp, FSIZE + sw ra, FSIZE-4(sp) + .prologue 1 + cfc1 t4, $31 /* save FCSR, */ + li t2, 3 /* set rounding to down, */ + ctc1 t2, $31 /* and no exceptions */ + bal trunc + sub.d $f2, $f12, $f0 + mfc1 t0, $f3 + li.d $f6, one + sll t1, t0, 1 + bge t0, 0, 1f + beq t1, 0, 1f + sub.d $f0, $f6 +1: ctc1 t4, $31 /* restore FCSR */ + lw ra, FSIZE-4(sp) + addu sp, FSIZE + j ra +.end floor + + +#undef FSIZE +#define FSIZE 16 +.text .text$floorm +.globl ceil +.ent ceil +ceil: + .frame sp, FSIZE, ra + .mask 0x80000000, -4 + subu sp, FSIZE + sw ra, FSIZE-4(sp) + .prologue 1 + cfc1 t4, $31 /* save FCSR, */ + li t2, 2 /* set rounding to up, */ + ctc1 t2, $31 /* and no exceptions */ + bal trunc + sub.d $f2, $f12, $f0 + mfc1 t0, $f3 + mfc1 t1, $f2 + blt t0, 0, 2f + li.d $f6, one + bne t0, 0, 1f + beq t1, 0, 2f +1: add.d $f0, $f6 +2: ctc1 t4, $31 /* restore FCSR */ + lw ra, FSIZE-4(sp) + addu sp, FSIZE + j ra +.end ceil + diff --git a/private/fp32/tran/mips/fmodf.c b/private/fp32/tran/mips/fmodf.c new file mode 100644 index 000000000..191ce0dbf --- /dev/null +++ b/private/fp32/tran/mips/fmodf.c @@ -0,0 +1,27 @@ +/* + * |-----------------------------------------------------------| + * | Copyright (c) 1991, 1990 MIPS Computer Systems, Inc. | + * | All Rights Reserved | + * |-----------------------------------------------------------| + * | Restricted Rights Legend | + * | Use, duplication, or disclosure by the Government is | + * | subject to restrictions as set forth in | + * | subparagraph (c)(1)(ii) of the Rights in Technical | + * | Data and Computer Software Clause of DFARS 252.227-7013. | + * | MIPS Computer Systems, Inc. | + * | 950 DeGuigne Avenue | + * | Sunnyvale, California 94088-3650, USA | + * |-----------------------------------------------------------| + */ +/* $Header */ +#include <math.h> + +float +modff (float x, float *y) +{ + double z, r; + r = modf((double)x, &z); + y = (float *)&z; + return (float)r; +} + diff --git a/private/fp32/tran/mips/fmodm.s b/private/fp32/tran/mips/fmodm.s new file mode 100644 index 000000000..3dfd8a6f2 --- /dev/null +++ b/private/fp32/tran/mips/fmodm.s @@ -0,0 +1,332 @@ +/* + * |-----------------------------------------------------------| + * | Copyright (c) 1991, 1990 MIPS Computer Systems, Inc. | + * | All Rights Reserved | + * |-----------------------------------------------------------| + * | Restricted Rights Legend | + * | Use, duplication, or disclosure by the Government is | + * | subject to restrictions as set forth in | + * | subparagraph (c)(1)(ii) of the Rights in Technical | + * | Data and Computer Software Clause of DFARS 252.227-7013. | + * | MIPS Computer Systems, Inc. | + * | 950 DeGuigne Avenue | + * | Sunnyvale, California 94088-3650, USA | + * |-----------------------------------------------------------| + */ +/* $Header: fmod.s,v 3000.3.1.6 91/10/09 11:14:56 zaineb Exp $ */ + +.extern _except2 +.extern errno 4 + +#include <kxmips.h> +#include <trans.h> +#include <fpieee.h> + + +/* double fmod(double x, double y) */ +.text .text$fmodm +.ent fmod_small +fmod_small: + .frame sp, 16, ra + .mask 0x80000000, 0 + /* y is almost subnormal */ + /* f0 = |x|, f2 = |y|, t0 = sign of x, t2 = 2047<<20, + t3 = fcsr, fcsr = round-to-zero */ + /* scale both x and y, compute remainder, and unscale it */ + subu sp, 16 + sw ra, 16(sp) + .prologue 1 + li.d $f18, 1.2474001934591999e+291 /* 2^(1024-57) */ + li.d $f16, 1.4411518807585587e+17 /* 2^57 */ + c.lt.d $f0, $f18 + mul.d $f2, $f16 + bc1t 10f + /* x * 2^57 would overflow */ + /* first compute with unscaled x to chop it down to size */ + li t0, 0 + bal fmod1 + li t4, 1 + ctc1 t4, $31 + mfc1 t0, $f13 + li.d $f16, 1.4411518807585587e+17 /* 2^57 */ + mul.d $f0, $f16 + bal fmod2 + b 20f +10: mul.d $f0, $f16 + bal fmod1 +20: li.d $f16, 6.9388939039072284e-18 /* 2^-57 */ + mul.d $f0, $f16 + lw ra, 16(sp) + addu sp, 16 + j ra +.end fmod_small + + +.text .text$fmodm +.globl fmod +.ent fmod +fmod: + .frame sp, 0, ra + .prologue 0 +.set noreorder + c.un.d $f12, $f14 /* x NaN or y NaN? */ + mfc1 t0, $f13 /* sign and exponent of x */ + mfc1 t1, $f15 /* sign and exponent of y */ + bc1t 70f + cfc1 t3, $31 /* t3 = fcsr */ + abs.d $f0, $f12 /* f0 = |x| */ + abs.d $f2, $f14 /* f2 = |y| */ + li t2, (2047<<20) + c.lt.d $f0, $f2 + and t8, t0, t2 /* check for x = +-Infinity */ + and t9, t1, t2 + bc1t 30f + li t4, 1 + beq t8, t2, 80f + ctc1 t4, $31 /* set round to zero mode */ + beq t9, 0, 90f /* y is 0 or subnormal */ + li t8, 0x03900000 + bleu t9, t8, fmod_small /* almost subnormals */ + nop + +fmod1: /* entry from fmod_subnormal, fmod_small, and fmodf_punt */ + /* f0 = |x|, f2 = |y|, t0 = sign of x, t2 = 2047<<20, + t3 = fcsr, fcsr = round-to-zero */ + +20: /* x > y */ + div.d $f8, $f0, $f2 /* q = x/y (>= 1.0) */ + mfc1 t8, $f2 /* f4 = y with low 27 bits 0 */ + mfc1 t4, $f1 + mfc1 t5, $f3 + mov.d $f4, $f2 + srl t8, 27 + sll t8, 27 + mtc1 t8, $f4 + and t4, t2 + and t5, t2 + subu t4, t5 + subu t4, (25<<20) + bgtz t4, 40f + sub.d $f6, $f2, $f4 /* f6 = low 27 bits of y */ + +22: /* q < 2^26 */ + cvt.w.d $f16, $f8 /* truncate */ + cvt.d.w $f8, $f16 + mul.d $f4, $f8 /* exact (26 x 26 = 52 bits) */ + mul.d $f6, $f8 /* exact (27 x 26 = 53 bits) */ + sub.d $f0, $f4 /* exact */ + sub.d $f0, $f6 /* exact */ +fmod2: /* entry from fmod_subnormal and fmod_small */ + c.lt.d $f0, $f2 + nop + bc1f 20b + nop +.set reorder + +30: /* x < y */ + /* negate remainder if dividend was negative */ + bgez t0, 36f + neg.d $f0 +36: ctc1 t3, $31 + j ra + +40: /* q >= 2^26 */ + mfc1 t8, $f3 + mfc1 t9, $f5 + mov.d $f10, $f2 + addu t8, t4 + addu t9, t4 + mtc1 t8, $f11 + mtc1 t9, $f5 + div.d $f8, $f0, $f10 + sub.d $f6, $f10, $f4 + b 22b + +70: /* x NaN or y NaN */ + c.eq.d $f12, $f12 + bc1t 72f + mov.d $f0, $f12 + j ra +72: mov.d $f0, $f14 + j ra + +80: /* x = +-Infinity */ + ctc1 t3, $31 + sub.d $f0, $f12, $f12 /* raise Invalid, return NaN */ + j ra + +90: /* y is zero or subnormal */ + mfc1 t8, $f14 + sll t9, t1, 1 + bne t9, 0, fmod_subnormal + bne t8, 0, fmod_subnormal + + /* y = +-0 */ + ctc1 t3, $31 + div.d $f0, $f14, $f14 /* raise Invalid, return NaN */ + j set_fmod_err +.end fmod + +.text .text$fmodm +.ent fmod_subnormal +fmod_subnormal: + .frame sp, 16, ra + .mask 0x80000000, 0 + /* y is subnormal */ + /* f0 = |x|, f2 = |y|, t0 = sign of x, t2 = 2047<<20, + t3 = fcsr, fcsr = round-to-zero */ + /* scale both x and y, compute remainder, and unscale it */ + subu sp, 16 + sw ra, 16(sp) + .prologue 1 + li.d $f18, 8.6555775981267394e+273 /* 2^(1024-114) */ + li.d $f16, 2.0769187434139311e+34 /* 2^114 */ + c.lt.d $f0, $f18 + mul.d $f2, $f16 + bc1t 10f + /* x * 2^114 would overflow */ + /* first compute with unscaled x to chop it down to size */ + li t0, 0 + bal fmod1 + li t4, 1 + ctc1 t4, $31 + mfc1 t0, $f13 + li.d $f16, 2.0769187434139311e+34 /* 2^114 */ + mul.d $f0, $f16 + bal fmod2 + b 20f +10: mul.d $f0, $f16 + bal fmod1 +20: li.d $f16, 4.8148248609680896e-35 /* 2^-114 */ + mul.d $f0, $f16 + lw ra, 16(sp) + addu sp, 16 + j ra +.end fmod_subnormal + + +/* float fmodf(float x, float y) */ + +.text .text$fmodm +.globl fmodf +.ent fmodf +fmodf: + .frame sp, 0, ra + .prologue 0 +.set noreorder + c.un.s $f12, $f14 /* x NaN or y NaN? */ + mfc1 t0, $f12 /* sign and exponent of x */ + mfc1 t1, $f14 /* sign and exponent of y */ + bc1t 70f + cfc1 t3, $31 /* t3 = fcsr */ + abs.s $f0, $f12 /* f0 = |x| */ + abs.s $f2, $f14 /* f2 = |y| */ + li t2, (255<<23) + c.lt.s $f0, $f2 + and t8, t0, t2 /* check for x = +-Infinity */ + and t9, t1, t2 + bc1t 30f + li t4, 1 + beq t8, t2, 80f + ctc1 t4, $31 /* set round to zero mode */ + beq t9, 0, 90f /* y is 0 or subnormal */ + cvt.d.s $f4, $f2 +.set reorder + +20: /* x > y */ + div.s $f8, $f0, $f2 /* q = x/y (>= 1.0) */ + mfc1 t4, $f0 + mfc1 t5, $f2 + and t4, t2 + and t5, t2 + subu t4, t5 + subu t4, (23<<23) + bgtz t4, fmodf_punt + + /* q < 2^24 */ + cvt.w.s $f16, $f8 /* truncate */ + cvt.d.w $f8, $f16 + mul.d $f8, $f4 + cvt.d.s $f0 + sub.d $f0, $f8 + + c.lt.s $f0, $f4 + cvt.s.d $f0 + bc1f 20b + +30: /* x < y */ + /* negate remainder if dividend was negative */ + bgez t0, 36f + neg.s $f0 +36: ctc1 t3, $31 + j ra + +70: /* x NaN or y NaN */ + c.eq.s $f12, $f12 + bc1t 72f + mov.s $f0, $f12 + j ra +72: mov.s $f0, $f14 + j ra + +80: /* x = +-Infinity */ + ctc1 t3, $31 + sub.s $f0, $f12, $f12 /* raise Invalid, return NaN */ + mov.s $f0,$f12 + j ra + +90: /* y is zero or subnormal */ + sll t9, t1, 1 + bne t9, 0, fmodf_punt + + /* y = +-0 */ + ctc1 t3, $31 + div.s $f0, $f14, $f14 /* raise Invalid, return NaN */ + j ra +.end fmodf + +.text .text$fmodm +.ent fmodf_punt +fmodf_punt: + .frame sp, 16, ra + .mask 0x80000000, 0 + /* f0 = |x|, f2 = |y|, t0 = sign of x, + t3 = fcsr, fcsr = round-to-zero */ + subu sp, 16 + sw ra, 16(sp) + .prologue 1 + cvt.d.s $f12 + cvt.d.s $f14 + li t2, (2047<<20) + bal fmod1 + cvt.s.d $f0 + lw ra, 16(sp) + addu sp, 16 + j ra +.end fmodf_punt + + +.text .text$fmodm +.ent set_fmod_err +set_fmod_err: +#define FSIZE 48 + .frame sp, FSIZE, ra + .mask 0x80000000, -4 + subu sp, FSIZE + sw ra, FSIZE-4(sp) + .prologue 1 + li $4, FP_I // exception mask + li $5, OP_FMOD // operation code (funtion name index) + mfc1.d $6, $f12 // arg1 + s.d $f14, 16(sp) // arg2 (TODO: pass 0.0 as arg2???, see above) + s.d $f0, 24(sp) // default result + cfc1 t7, $31 // floating point control/status register + xor t7, t7, 0xf80 // inverse exception enable bits + sw t7, 32(sp) + jal _except2 + lw ra, FSIZE-4(sp) + addu sp, FSIZE + j ra +#undef FSIZE +.end set_fmod_err + diff --git a/private/fp32/tran/mips/fpctrl.s b/private/fp32/tran/mips/fpctrl.s new file mode 100644 index 000000000..a7cff484d --- /dev/null +++ b/private/fp32/tran/mips/fpctrl.s @@ -0,0 +1,125 @@ +/*** +*fpctrl.s - fp low level control routines +* +* Copyright (c) 1985-91, Microsoft Corporation +* +*Purpose: +* IEEE control and status routines for internal use. +* These use machine specific constants for accessing the control word +* +*Revision History: +* +* 03-31-92 GDP written +* 10-30-92 GDP renamed _fpreset to _FPreset - moved _fpreset to ieee.c +* +*/ + +#include <kxmips.h> + +.globl _ctrlfp +.globl _statfp +.globl _clrfp +.globl _FPreset +.globl _set_statfp +.globl _get_fsr +.globl _set_fsr + +.text + +/* _ctrlfp(newctrl, mask) + * newctrl in a0 + * mask in a1 + */ + + +.ent _ctrlfp + +_ctrlfp: + .frame sp,0,ra + .prologue 0 + cfc1 v0, $31 # v0 <- oldCw + and t0, a0, a1 # t0 <- newctrl & mask + xor v0, v0, 0xf80 # inverse exception enable bits + nor t1, a1, a1 # t1 <- ~mask + and t1, t1, v0 # t1 <- oldCw & ~mask + or t2, t0, t1 # t2 <- newtrl & mask | oldCw & ~mask + xor t2, t2, 0xf80 # inverse exception mask bits (enable exceptions) + ctc1 t2, $31 # set new control word + j ra + +.end _ctrlfp + + + +.ent _statfp + +_statfp: + .frame sp,0,ra + .prologue 0 + cfc1 v0, $31 # v0 <- oldCw + j ra + +.end _statfp + + + +.ent _clrfp + +_clrfp: + .frame sp,0,ra + .prologue 0 + cfc1 v0, $31 # v0 <- oldCw + and t0, v0, 0xffffff83 # clear flags + ctc1 t0, $31 # set new cw + j ra + +.end _clrfp + + +.ent _FPreset + +_FPreset: + .frame sp,0,ra + .prologue 0 + cfc1 v0, $31 + and v0, v0, 1<<24 /* preserve FS bit */ + ctc1 v0, $31 + j ra + +.end _FPreset + + +.ent _set_statfp + +_set_statfp: + .frame sp,0,ra + .prologue 0 + cfc1 v0, $31 + or v0, v0, a0 + ctc1 v0, $31 + j ra + +.end _set_statfp + + +.ent _set_fsr + +_set_fsr: + .frame sp,0,ra + .prologue 0 + ctc1 a0, $31 + j ra + +.end _set_fsr + + + +.ent _get_fsr + +_get_fsr: + .frame sp,0,ra + .prologue 0 + cfc1 v0, $31 + j ra + +.end _get_fsr diff --git a/private/fp32/tran/mips/fpow.c b/private/fp32/tran/mips/fpow.c new file mode 100644 index 000000000..1d0ce2cea --- /dev/null +++ b/private/fp32/tran/mips/fpow.c @@ -0,0 +1,30 @@ +/* + * |-----------------------------------------------------------| + * | Copyright (c) 1991, 1990 MIPS Computer Systems, Inc. | + * | All Rights Reserved | + * |-----------------------------------------------------------| + * | Restricted Rights Legend | + * | Use, duplication, or disclosure by the Government is | + * | subject to restrictions as set forth in | + * | subparagraph (c)(1)(ii) of the Rights in Technical | + * | Data and Computer Software Clause of DFARS 252.227-7013. | + * | MIPS Computer Systems, Inc. | + * | 950 DeGuigne Avenue | + * | Sunnyvale, California 94088-3650, USA | + * |-----------------------------------------------------------| + */ +/* $Header */ +#include <math.h> + +float +fpow (float x, float y) +{ + return (float)pow((double)x, (double)y); +} + +float +powf (float x, float y) +{ + return (float)pow((double)x, (double)y); +} + diff --git a/private/fp32/tran/mips/frnd.s b/private/fp32/tran/mips/frnd.s new file mode 100644 index 000000000..aff63d74f --- /dev/null +++ b/private/fp32/tran/mips/frnd.s @@ -0,0 +1,61 @@ +/*** +*frnd.s - +* +* Copyright (c) 1991-91, Microsoft Corporation +* +*Purpose: +* +* +*Revision History: +* +* 10-20-91 GDP written +* 03-27-92 GDP return x if x >= 2^52 +*/ + +#include <kxmips.h> + +/*** +*double _frnd(double x) - round to integer +* +*Purpose: +* Round to integer according to the current rounding mode. +* NaN's or infinities are NOT handled +* +*Entry: +* +*Exit: +* +*Exceptions: +*******************************************************************************/ + +.globl _frnd +.ent _frnd +_frnd: + .frame sp, 0, ra + .prologue 0 + li.d $f4, 0.0 # f4 <- 0 + li.d $f2, 4503599627370496.0 # f2 <- 2^52 + c.eq.d $f12, $f4 # is arg 0? + bc1f nonzero + +retarg: + mov.d $f0, $f12 # return arg (preserve sign of 0) + j ra + + +nonzero: + abs.d $f6, $f12 + c.lt.d $f6, $f2 + bc1f retarg + c.lt.d $f12, $f4 # is arg negative? + bc1t negative + add.d $f0, $f12, $f2 + sub.d $f0, $f2 + j ra + +negative: + sub.d $f0, $f12, $f2 + add.d $f0, $f2 + j ra + +.end _frnd diff --git a/private/fp32/tran/mips/fsincos.s b/private/fp32/tran/mips/fsincos.s new file mode 100644 index 000000000..a64b554b7 --- /dev/null +++ b/private/fp32/tran/mips/fsincos.s @@ -0,0 +1,127 @@ +/* + * |-----------------------------------------------------------| + * | Copyright (c) 1991, 1990 MIPS Computer Systems, Inc. | + * | All Rights Reserved | + * |-----------------------------------------------------------| + * | Restricted Rights Legend | + * | Use, duplication, or disclosure by the Government is | + * | subject to restrictions as set forth in | + * | subparagraph (c)(1)(ii) of the Rights in Technical | + * | Data and Computer Software Clause of DFARS 252.227-7013. | + * | MIPS Computer Systems, Inc. | + * | 950 DeGuigne Avenue | + * | Sunnyvale, California 94088-3650, USA | + * |-----------------------------------------------------------| + */ +/* $Header: fsincos.s,v 3000.5.1.3 91/05/31 14:44:25 bettina Exp $ */ +/* Algorithm from Cody and Waite. */ + +#include <kxmips.h> +#define _RM_MASK 3 + +#define pio2 1.57079632679489661923 +#define pi 3.14159265358979323846 +#if defined(__ultrix) || defined(__osf__) +#define ymax 6746518852.0 +#else /* !__ultrix && !__osf__ */ +#define ymax 32000.0 +#endif /* !__ultrix && !__osf__ */ +#define oopi 0.31830988618379067154 +#define p4 0.2601903036e-5 +#define p3 -0.1980741872e-3 +#define p2 0.8333025139e-2 +#define p1 -0.1666665668e+0 +#define half 0.5 + +.text .text$fsincos +.globl fcos +.globl cosf +.ent fcos +.aent cosf +fcos: +cosf: + .frame sp, 0, ra + .prologue 0 + li.s $f6, ymax + abs.s $f12 // COS(-X) = COS(X) + cfc1 t1, $31 + c.olt.s $f12, $f6 + and t0, t1, ~_RM_MASK + bc1f sincos2 + ctc1 t0, $31 + // Reduce argument + li.s $f6, oopi + li.s $f8, half + mul.s $f2, $f12, $f6 + add.s $f2, $f8 + cvt.d.s $f8 + cvt.w.s $f4, $f2 + cvt.d.w $f2, $f4 + mfc1 t0, $f4 + sub.d $f2, $f8 + cvt.d.s $f10, $f12 + b sincos +.end fcos + +.text .text$fsincos +.globl fsin +.globl sinf +.ent fsin +.aent sinf +fsin: +sinf: + .frame sp, 0, ra + .prologue 0 + li.s $f8, pio2 + abs.s $f0, $f12 + c.olt.s $f0, $f8 + cfc1 t1, $31 + cvt.d.s $f10, $f12 + bc1t sincos1 + and t0, t1, ~_RM_MASK + li.s $f6, ymax + c.olt.s $f0, $f6 + li.s $f6, oopi + bc1f sincos2 + ctc1 t0, $31 + // Reduce argument + mul.s $f2, $f12, $f6 + cvt.w.s $f2 + mfc1 t0, $f2 + cvt.d.w $f2 +sincos: + // use extended precision arithmetic to subtract N*PI + li.d $f6, pi + and t0, 1 + mul.d $f2, $f6 + sub.d $f10, $f2 + beq t0, 0, sincos1 + neg.d $f10 +sincos1: + mul.d $f2, $f10, $f10 // g = f**2 + + // evaluate R(g) + li.d $f6, p4 + li.d $f8, p3 + mul.d $f4, $f2, $f6 + add.d $f4, $f8 + li.d $f8, p2 + mul.d $f4, $f2 + add.d $f4, $f8 + li.d $f8, p1 + mul.d $f4, $f2 + add.d $f4, $f8 + + // result is f+f*g*R(g) + mul.d $f4, $f2 + mul.d $f4, $f10 + add.d $f0, $f10, $f4 + ctc1 t1, $31 // restore rounding mode + cvt.s.d $f0 + j ra + +sincos2: + li.s $f0, 0.0 + div.s $f0, $f0 + j ra +.end fsin diff --git a/private/fp32/tran/mips/fsinh.s b/private/fp32/tran/mips/fsinh.s new file mode 100644 index 000000000..96ec24228 --- /dev/null +++ b/private/fp32/tran/mips/fsinh.s @@ -0,0 +1,122 @@ +/* + * |-----------------------------------------------------------| + * | Copyright (c) 1991, 1990 MIPS Computer Systems, Inc. | + * | All Rights Reserved | + * |-----------------------------------------------------------| + * | Restricted Rights Legend | + * | Use, duplication, or disclosure by the Government is | + * | subject to restrictions as set forth in | + * | subparagraph (c)(1)(ii) of the Rights in Technical | + * | Data and Computer Software Clause of DFARS 252.227-7013. | + * | MIPS Computer Systems, Inc. | + * | 950 DeGuigne Avenue | + * | Sunnyvale, California 94088-3650, USA | + * |-----------------------------------------------------------| + */ +/* $Header: fsinh.s,v 3000.5.1.3 91/08/01 18:34:47 zaineb Exp $ */ +/* Algorithm from Cody and Waite. */ + +#include <kxmips.h> + +#define one 1.0 +#define eps 3.72529029846191406250e-9 +#define p1 -0.190333399e+0 +#define p0 -0.713793159e+1 +#define q0 -0.428277109e+2 +#define half 0.5 +#define expmax 88.7228317 +#define sinhmax 89.4159851 + +.text + +.globl fsinh +.globl sinhf + +.ent fsinh +.aent sinhf +fsinh: +sinhf: + + .frame sp, 16, ra + subu sp, 16 + sw ra, 16(sp) + .prologue 1 + li.s $f8, one + abs.s $f0, $f12 + c.ole.s $f0, $f8 + li.s $f8, eps + bc1f fsinh2 + c.lt.s $f0, $f8 + bc1t fsinh1 + +fsinh0: + cvt.d.s $f12 + mul.d $f2, $f12, $f12 + li.d $f10, p1 + li.d $f8, q0 + mul.d $f4, $f2, $f10 + li.d $f10, p0 + add.d $f6, $f2, $f8 + add.d $f4, $f10 + mul.d $f4, $f2 + div.d $f4, $f6 + mul.d $f4, $f12 + add.d $f0, $f12, $f4 + cvt.s.d $f0 + j ret + +fsinh1: + mov.s $f0, $f12 + j ret +fsinh2: + li.s $f8, expmax + s.s $f12, 20(sp) + c.ole.s $f0, $f8 + bc1f fsinh3 + mov.s $f12, $f0 + jal fexp + li.s $f8, half + div.s $f2, $f8, $f0 + mul.s $f0, $f8 + lw t0, 20(sp) + bltz t0, 1f + sub.s $f0, $f0, $f2 + j ret +1: sub.s $f0, $f2, $f0 + j ret + +fsinh3: + li.s $f6, sinhmax + li.s $f8, 0.69316101074218750000 + c.ole.s $f0, $f6 + bc1f error + sub.s $f12, $f0, $f8 + jal fexp + li.s $f6, 0.13830277879601902638e-4 + mul.s $f2, $f0, $f6 + lw t0, 20(sp) + bltz t0, 1f + add.s $f0, $f2 + j ret +1: add.s $f0, $f2 + neg.s $f0 + j ret + +error: + // raise Overflow and return +-Infinity + lw t0, 20(sp) + sll t1, t0, 1 + srl t1, 23+1 + beq t1, 255, 1f + li.s $f0, 2e38 + add.s $f0, $f0 +1: bltz t0, 2f + j ret1 +2: neg.s $f0 +ret: + lw ra, 16(sp) +ret1: + addu sp, 16 + j ra + +.end fsinh diff --git a/private/fp32/tran/mips/fsqrt.s b/private/fp32/tran/mips/fsqrt.s new file mode 100644 index 000000000..ac029a9fa --- /dev/null +++ b/private/fp32/tran/mips/fsqrt.s @@ -0,0 +1,32 @@ +/* + * |-----------------------------------------------------------| + * | Copyright (c) 1991, 1990 MIPS Computer Systems, Inc. | + * | All Rights Reserved | + * |-----------------------------------------------------------| + * | Restricted Rights Legend | + * | Use, duplication, or disclosure by the Government is | + * | subject to restrictions as set forth in | + * | subparagraph (c)(1)(ii) of the Rights in Technical | + * | Data and Computer Software Clause of DFARS 252.227-7013. | + * | MIPS Computer Systems, Inc. | + * | 950 DeGuigne Avenue | + * | Sunnyvale, California 94088-3650, USA | + * |-----------------------------------------------------------| + */ +/* $Header: fsqrt.s,v 3000.6.1.4 91/05/31 14:44:37 bettina Exp $ */ + +#include <kxmips.h> + +.text + +.globl fsqrt +.globl sqrtf +.ent fsqrt +.aent sqrtf +fsqrt: +sqrtf: + .frame sp, 0, ra + .prologue 0 + sqrt.s $f0,$f12 + j ra +.end fsqrt diff --git a/private/fp32/tran/mips/fsqrthlp.s b/private/fp32/tran/mips/fsqrthlp.s new file mode 100644 index 000000000..26f858f6a --- /dev/null +++ b/private/fp32/tran/mips/fsqrthlp.s @@ -0,0 +1,29 @@ +/*** +*fsqrt.s - square root helper +* +* Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* Square root helper routine to be used with the R4000 +* +*Revision History: +* 10-20-91 GDP written +* +*******************************************************************************/ +#ifdef R4000 + +#include <kxmips.h> + +.text +.globl _fsqrt + +.ent _fsqrt +_fsqrt: + .frame sp,0,ra + .prologue 0 + sqrt.d $f0, $f12 + j ra + +.end _fsqrt + +#endif diff --git a/private/fp32/tran/mips/ftan.s b/private/fp32/tran/mips/ftan.s new file mode 100644 index 000000000..ed2a1c495 --- /dev/null +++ b/private/fp32/tran/mips/ftan.s @@ -0,0 +1,94 @@ +/* + * |-----------------------------------------------------------| + * | Copyright (c) 1991, 1990 MIPS Computer Systems, Inc. | + * | All Rights Reserved | + * |-----------------------------------------------------------| + * | Restricted Rights Legend | + * | Use, duplication, or disclosure by the Government is | + * | subject to restrictions as set forth in | + * | subparagraph (c)(1)(ii) of the Rights in Technical | + * | Data and Computer Software Clause of DFARS 252.227-7013. | + * | MIPS Computer Systems, Inc. | + * | 950 DeGuigne Avenue | + * | Sunnyvale, California 94088-3650, USA | + * |-----------------------------------------------------------| + */ +/* $Header: ftan.s,v 3000.5.1.2 91/05/31 14:44:46 bettina Exp $ */ +/* Algorithm from Cody and Waite. */ + +#include <kxmips.h> +#define _RM_MASK 3 + +#define pio4 0.78539816339744830961 +#define pio2 1.57079632679489661923132 +#define ymax 6433.0 +#define twoopi 0.63661977236758134308 +#define p0 -0.958017723e-1 +#define q1 0.971685835e-2 +#define q0 -0.429135777e+0 +#define one 1.0 + +.text + +.globl ftan +.globl tanf +.ent ftan +.aent tanf +ftan: +tanf: + .frame sp, 0, ra + .prologue 0 + li.s $f8, pio4 + abs.s $f0, $f12 + c.olt.s $f0, $f8 + cfc1 t1, $31 + cvt.d.s $f14, $f12 + li t0, 0 + bc1t ftan0 + and t2, t1, ~_RM_MASK + li.s $f8, ymax + c.olt.s $f0, $f8 + li.s $f8, twoopi + bc1f ftan2 + + mul.s $f2, $f12, $f8 + + // convert to integer using round-to-nearest + ctc1 t2, $31 + cvt.w.s $f2 + mfc1 t0, $f2 + and t0, 1 + + // argument reduction + cvt.d.w $f2 + li.d $f6, pio2 + mul.d $f2, $f6 + sub.d $f14, $f2 +ftan0: + // rational approximation + mul.d $f2, $f14, $f14 + li.d $f8, q1 + li.d $f6, p0 + mul.d $f10, $f2, $f8 + li.d $f8, q0 + mul.d $f4, $f2, $f6 + add.d $f10, $f8 + mul.d $f10, $f2 + li.d $f8, one + mul.d $f4, $f14 + add.d $f10, $f8 + add.d $f14, $f4 + ctc1 t1, $31 + bne t0, 0, ftan1 + div.d $f0, $f14, $f10 + cvt.s.d $f0 + j ra +ftan1: div.d $f0, $f10, $f14 + neg.d $f0 + cvt.s.d $f0 + j ra +ftan2: + li.s $f0, 0.0 + div.s $f0, $f0 + j ra +.end ftan diff --git a/private/fp32/tran/mips/ftanh.s b/private/fp32/tran/mips/ftanh.s new file mode 100644 index 000000000..9ee9690cd --- /dev/null +++ b/private/fp32/tran/mips/ftanh.s @@ -0,0 +1,105 @@ +/* + * |-----------------------------------------------------------| + * | Copyright (c) 1991, 1990 MIPS Computer Systems, Inc. | + * | All Rights Reserved | + * |-----------------------------------------------------------| + * | Restricted Rights Legend | + * | Use, duplication, or disclosure by the Government is | + * | subject to restrictions as set forth in | + * | subparagraph (c)(1)(ii) of the Rights in Technical | + * | Data and Computer Software Clause of DFARS 252.227-7013. | + * | MIPS Computer Systems, Inc. | + * | 950 DeGuigne Avenue | + * | Sunnyvale, California 94088-3650, USA | + * |-----------------------------------------------------------| + */ +/* $Header: ftanh.s,v 3000.5.1.4 92/02/05 18:21:20 zaineb Exp $ */ +/* Algorithm from Cody and Waite. */ + +#include <kxmips.h> + +#define ln3o2 0.54930614433405484570 +#define eps 3.72529029846191406250e-9 +#define p1 -0.3831010665e-2 +#define p0 -0.8237728127e+0 +#define q0 +0.2471319654e+1 +#define xbig 9.01091290 +#define FSIZE 16 + +.text + +.globl ftanh +.globl tanhf +.ent ftanh +.aent tanhf +ftanh: +tanhf: + .frame sp, FSIZE, ra + subu sp, FSIZE + sw ra, FSIZE-4(sp) + .prologue 1 + li.s $f8, ln3o2 + abs.s $f0, $f12 + c.lt.s $f8, $f0 + li.s $f8, eps + bc1t callftanh2 + c.lt.s $f0, $f8 + bc1t ftanh1 + mul.s $f2, $f0, $f0 + li.s $f10, p1 + li.s $f8, q0 + mul.s $f4, $f2, $f10 + li.s $f10, p0 + add.s $f6, $f2, $f8 + add.s $f4, $f10 + mul.s $f4, $f2 + div.s $f4, $f6 + mul.s $f4, $f12 + add.s $f0, $f4, $f12 + j ret + +ftanh1: + mov.s $f0, $f12 + j ret +callftanh2: + jal ftanh2 + j ret +.end ftanh + +.ent ftanh2 +ftanh2: + .frame sp, FSIZE, ra + subu sp, FSIZE + sw ra, FSIZE-4(sp) + .prologue 1 + li.s $f10, xbig + s.s $f12, 20(sp) + c.ole.s $f0, $f10 + add.s $f12, $f0, $f0 + bc1f ftanh4 + jal fexp + li.s $f10, 1.0 + li.s $f8, 2.0 + add.s $f0, $f10 + div.s $f0, $f8, $f0 + lw t0, 20(sp) + bltz t0, 1f + sub.s $f0, $f10, $f0 + j ret +1: sub.s $f0, $f0, $f10 + j ret + +ftanh4: + lw t0, 20(sp) + li.s $f0, 1.0 + bltz t0, 1f + j ret1 +1: neg.s $f0 + j ret1 + +ret: + lw ra, FSIZE-4(sp) +ret1: + addu sp, FSIZE + j ra +.end ftanh2 diff --git a/private/fp32/tran/mips/getsetrg.c b/private/fp32/tran/mips/getsetrg.c new file mode 100644 index 000000000..78481b3a5 --- /dev/null +++ b/private/fp32/tran/mips/getsetrg.c @@ -0,0 +1,1065 @@ +/*++ + +Copyright (c) 1991 Microsoft Corporation + +Module Name: + + getsetrg.c + +Abstract: + + This module implement the code necessary to get and set register values. + These routines are used during the emulation of unaligned data references + and floating point exceptions. + +Author: + + David N. Cutler (davec) 17-Jun-1991 + +Environment: + + Kernel mode only. + +Revision History: + GeorgioP 03-06-92 Port for use by the IEEE filter routine + + + +--*/ + +#include <nt.h> +#include <ntmips.h> + +ULONG +_GetRegisterValue ( + IN ULONG Register, + IN PCONTEXT Context + ) + +/*++ + +Routine Description: + + This function is called to get the value of a register from the specified + exception or trap frame. + +Arguments: + + Register - Supplies the number of the register whose value is to be + returned. Integer registers are specified as 0 - 31 and floating + registers are specified as 32 - 63. + + Context - Supplies a pointer to a context + +Return Value: + + The value of the specified register is returned as the function value. + +--*/ + +{ + + // + // Dispatch on the register number. + // + + switch (Register) { + + // + // Integer register zero. + // + + case 0: + return 0; + + // + // Integer register AT. + // + + case 1: + return Context->IntAt; + + // + // Integer register V0. + // + + case 2: + return Context->IntV0; + + // + // Integer register V1. + // + + case 3: + return Context->IntV1; + + // + // Integer register A0. + // + + case 4: + return Context->IntA0; + + // + // Integer register A1. + // + + case 5: + return Context->IntA1; + + // + // Integer register A2. + // + + case 6: + return Context->IntA2; + + // + // Integer register A3. + // + + case 7: + return Context->IntA3; + + // + // Integer register T0. + // + + case 8: + return Context->IntT0; + + // + // Integer register T1. + // + + case 9: + return Context->IntT1; + + // + // Integer register T2. + // + + case 10: + return Context->IntT2; + + // + // Integer register T3. + // + + case 11: + return Context->IntT3; + + // + // Integer register T4. + // + + case 12: + return Context->IntT4; + + // + // Integer register T5. + // + + case 13: + return Context->IntT5; + + // + // Integer register T6. + // + + case 14: + return Context->IntT6; + + // + // Integer register T7. + // + + case 15: + return Context->IntT7; + + // + // Integer register S0. + // + + case 16: + return Context->IntS0; + + // + // Integer register S1. + // + + case 17: + return Context->IntS1; + + // + // Integer register S2. + // + + case 18: + return Context->IntS2; + + // + // Integer register S3. + // + + case 19: + return Context->IntS3; + + // + // Integer register S4. + // + + case 20: + return Context->IntS4; + + // + // Integer register S5. + // + + case 21: + return Context->IntS5; + + // + // Integer register S6. + // + + case 22: + return Context->IntS6; + + // + // Integer register S7. + // + + case 23: + return Context->IntS7; + + // + // Integer register T8. + // + + case 24: + return Context->IntT8; + + // + // Integer register T9. + // + + case 25: + return Context->IntT9; + + // + // Integer register K0. + // + + case 26: + return 0; + + // + // Integer register K1. + // + + case 27: + return 0; + + // + // Integer register gp. + // + + case 28: + return Context->IntGp; + + // + // Integer register Sp. + // + + case 29: + return Context->IntSp; + + // + // Integer register S8. + // + + case 30: + return Context->IntS8; + + // + // Integer register Ra. + // + + case 31: + return Context->IntRa; + + // + // Floating register F0. + // + + case 32: + return Context->FltF0; + + // + // Floating register F1. + // + + case 33: + return Context->FltF1; + + // + // Floating register F2. + // + + case 34: + return Context->FltF2; + + // + // Floating register F3. + // + + case 35: + return Context->FltF3; + + // + // Floating register F4. + // + + case 36: + return Context->FltF4; + + // + // Floating register F5. + // + + case 37: + return Context->FltF5; + + // + // Floating register F6. + // + + case 38: + return Context->FltF6; + + // + // Floating register F7. + // + + case 39: + return Context->FltF7; + + // + // Floating register F8. + // + + case 40: + return Context->FltF8; + + // + // Floating register F9. + // + + case 41: + return Context->FltF9; + + // + // Floating register F10. + // + + case 42: + return Context->FltF10; + + // + // Floating register F11. + // + + case 43: + return Context->FltF11; + + // + // Floating register F12. + // + + case 44: + return Context->FltF12; + + // + // Floating register F13. + // + + case 45: + return Context->FltF13; + + // + // Floating register F14. + // + + case 46: + return Context->FltF14; + + // + // Floating register F15. + // + + case 47: + return Context->FltF15; + + // + // Floating register F16. + // + + case 48: + return Context->FltF16; + + // + // Floating register F17. + // + + case 49: + return Context->FltF17; + + // + // Floating register F18. + // + + case 50: + return Context->FltF18; + + // + // Floating register F19. + // + + case 51: + return Context->FltF19; + + // + // Floating register F20. + // + + case 52: + return Context->FltF20; + + // + // Floating register F21. + // + + case 53: + return Context->FltF21; + + // + // Floating register F22. + // + + case 54: + return Context->FltF22; + + // + // Floating register F23. + // + + case 55: + return Context->FltF23; + + // + // Floating register F24. + // + + case 56: + return Context->FltF24; + + // + // Floating register F25. + // + + case 57: + return Context->FltF25; + + // + // Floating register F26. + // + + case 58: + return Context->FltF26; + + // + // Floating register F27. + // + + case 59: + return Context->FltF27; + + // + // Floating register F28. + // + + case 60: + return Context->FltF28; + + // + // Floating register F29. + // + + case 61: + return Context->FltF29; + + // + // Floating register F30. + // + + case 62: + return Context->FltF30; + + // + // Floating register F31. + // + + case 63: + return Context->FltF31; + } +} + +VOID +_SetRegisterValue ( + IN ULONG Register, + IN ULONG Value, + OUT PCONTEXT Context + ) + +/*++ + +Routine Description: + + This function is called to set the value of a register in the specified + exception or trap frame. + +Arguments: + + Register - Supplies the number of the register whose value is to be + stored. Integer registers are specified as 0 - 31 and floating + registers are specified as 32 - 63. + + Value - Supplies the value to be stored in the specified register. + + Context - Supplies a pointer to an context record. + +Return Value: + + None. + +--*/ + +{ + + // + // Dispatch on the register number. + // + + switch (Register) { + + // + // Integer register zero. + // + + case 0: + return; + + // + // Integer register AT. + // + + case 1: + Context->IntAt = Value; + return; + + // + // Integer register V0. + // + + case 2: + Context->IntV0 = Value; + return; + + // + // Integer register V1. + // + + case 3: + Context->IntV1 = Value; + return; + + // + // Integer register A0. + // + + case 4: + Context->IntA0 = Value; + return; + + // + // Integer register A1. + // + + case 5: + Context->IntA1 = Value; + return; + + // + // Integer register A2. + // + + case 6: + Context->IntA2 = Value; + return; + + // + // Integer register A3. + // + + case 7: + Context->IntA3 = Value; + return; + + // + // Integer register T0. + // + + case 8: + Context->IntT0 = Value; + return; + + // + // Integer register T1. + // + + case 9: + Context->IntT1 = Value; + return; + + // + // Integer register T2. + // + + case 10: + Context->IntT2 = Value; + return; + + // + // Integer register T3. + // + + case 11: + Context->IntT3 = Value; + return; + + // + // Integer register T4. + // + + case 12: + Context->IntT4 = Value; + return; + + // + // Integer register T5. + // + + case 13: + Context->IntT5 = Value; + return; + + // + // Integer register T6. + // + + case 14: + Context->IntT6 = Value; + return; + + // + // Integer register T7. + // + + case 15: + Context->IntT7 = Value; + return; + + // + // Integer register S0. + // + + case 16: + Context->IntS0 = Value; + return; + + // + // Integer register S1. + // + + case 17: + Context->IntS1 = Value; + return; + + // + // Integer register S2. + // + + case 18: + Context->IntS2 = Value; + return; + + // + // Integer register S3. + // + + case 19: + Context->IntS3 = Value; + return; + + // + // Integer register S4. + // + + case 20: + Context->IntS4 = Value; + return; + + // + // Integer register S5. + // + + case 21: + Context->IntS5 = Value; + return; + + // + // Integer register S6. + // + + case 22: + Context->IntS6 = Value; + return; + + // + // Integer register S7. + // + + case 23: + Context->IntS7 = Value; + return; + + // + // Integer register T8. + // + + case 24: + Context->IntT8 = Value; + return; + + // + // Integer register T9. + // + + case 25: + Context->IntT9 = Value; + return; + + // + // Integer register K0. + // + + case 26: + return; + + // + // Integer register K1. + // + + case 27: + return; + + // + // Integer register gp. + // + + case 28: + Context->IntGp = Value; + return; + + // + // Integer register Sp. + // + + case 29: + Context->IntSp = Value; + return; + + // + // Integer register S8. + // + + case 30: + Context->IntS8 = Value; + return; + + // + // Integer register Ra. + // + + case 31: + Context->IntRa = Value; + return; + + // + // Floating register F0. + // + + case 32: + Context->FltF0 = Value; + return; + + // + // Floating register F1. + // + + case 33: + Context->FltF1 = Value; + return; + + // + // Floating register F2. + // + + case 34: + Context->FltF2 = Value; + return; + + // + // Floating register F3. + // + + case 35: + Context->FltF3 = Value; + return; + + // + // Floating register F4. + // + + case 36: + Context->FltF4 = Value; + return; + + // + // Floating register F5. + // + + case 37: + Context->FltF5 = Value; + return; + + // + // Floating register F6. + // + + case 38: + Context->FltF6 = Value; + return; + + // + // Floating register F7. + // + + case 39: + Context->FltF7 = Value; + return; + + // + // Floating register F8. + // + + case 40: + Context->FltF8 = Value; + return; + + // + // Floating register F9. + // + + case 41: + Context->FltF9 = Value; + return; + + // + // Floating register F10. + // + + case 42: + Context->FltF10 = Value; + return; + + // + // Floating register F11. + // + + case 43: + Context->FltF11 = Value; + return; + + // + // Floating register F12. + // + + case 44: + Context->FltF12 = Value; + return; + + // + // Floating register F13. + // + + case 45: + Context->FltF13 = Value; + return; + + // + // Floating register F14. + // + + case 46: + Context->FltF14 = Value; + return; + + // + // Floating register F15. + // + + case 47: + Context->FltF15 = Value; + return; + + // + // Floating register F16. + // + + case 48: + Context->FltF16 = Value; + return; + + // + // Floating register F17. + // + + case 49: + Context->FltF17 = Value; + return; + + // + // Floating register F18. + // + + case 50: + Context->FltF18 = Value; + return; + + // + // Floating register F19. + // + + case 51: + Context->FltF19 = Value; + return; + + // + // Floating register F20. + // + + case 52: + Context->FltF20 = Value; + return; + + // + // Floating register F21. + // + + case 53: + Context->FltF21 = Value; + return; + + // + // Floating register F22. + // + + case 54: + Context->FltF22 = Value; + return; + + // + // Floating register F23. + // + + case 55: + Context->FltF23 = Value; + return; + + // + // Floating register F24. + // + + case 56: + Context->FltF24 = Value; + return; + + // + // Floating register F25. + // + + case 57: + Context->FltF25 = Value; + return; + + // + // Floating register F26. + // + + case 58: + Context->FltF26 = Value; + return; + + // + // Floating register F27. + // + + case 59: + Context->FltF27 = Value; + return; + + // + // Floating register F28. + // + + case 60: + Context->FltF28 = Value; + return; + + // + // Floating register F29. + // + + case 61: + Context->FltF29 = Value; + return; + + // + // Floating register F30. + // + + case 62: + Context->FltF30 = Value; + return; + + // + // Floating register F31. + // + + case 63: + Context->FltF31 = Value; + return; + } +} diff --git a/private/fp32/tran/mips/huge.s b/private/fp32/tran/mips/huge.s new file mode 100644 index 000000000..9d2edce1e --- /dev/null +++ b/private/fp32/tran/mips/huge.s @@ -0,0 +1,15 @@ +#ifdef CRTDLL +.globl _HUGE_dll +#else +.globl _HUGE +#endif + +.data + +#ifdef CRTDLL +_HUGE_dll: +#else +_HUGE: +#endif + + .double 0x1.0h0x7ff diff --git a/private/fp32/tran/mips/hypotm.s b/private/fp32/tran/mips/hypotm.s new file mode 100644 index 000000000..e4148f6f4 --- /dev/null +++ b/private/fp32/tran/mips/hypotm.s @@ -0,0 +1,161 @@ +/* + * |-----------------------------------------------------------| + * | Copyright (c) 1991, 1990 MIPS Computer Systems, Inc. | + * | All Rights Reserved | + * |-----------------------------------------------------------| + * | Restricted Rights Legend | + * | Use, duplication, or disclosure by the Government is | + * | subject to restrictions as set forth in | + * | subparagraph (c)(1)(ii) of the Rights in Technical | + * | Data and Computer Software Clause of DFARS 252.227-7013. | + * | MIPS Computer Systems, Inc. | + * | 950 DeGuigne Avenue | + * | Sunnyvale, California 94088-3650, USA | + * |-----------------------------------------------------------| + */ +/* $Header: hypot.s,v 3000.6.1.6 91/09/12 15:40:21 suneel Exp $ */ + +/* Algorithm from 4.3 BSD cabs.c by K.C. Ng. */ + +#include <kxmips.h> +#include <trans.h> +#include <fpieee.h> + + +#define one 1.0 +#define two 2.0 +#define sqrt2 1.4142135623730951455E+00 /* 2^ 0 * 1.6A09E667F3BCD */ +#define r2p1hi 2.4142135623730949234E+00 /* 2^ 1 * 1.3504F333F9DE6 */ +#define r2p1lo 1.2537167179050217666E-16 /* 2^-53 * 1.21165F626CDD5 */ + +#define FSIZE 64 +.extern _d_ind 8 + +.text + +.globl _hypot +.ent _hypot +_hypot: + .frame sp, FSIZE, ra + .mask 0x80000000, -16 + subu sp, FSIZE + sw ra, FSIZE-16(sp) + .prologue 1 + + /* + * Clear all bits in fsr to avoid side effects (including flag bits). + * This is the same as calling _maskfp() and clearing flag bits. + * 'Save' the callers fsr in v0 to restore upon exit. + */ + + cfc1 v0, $31 + ctc1 $0, $31 + +.set noreorder + abs.d $f2, $f12 // f2 = |X| + abs.d $f4, $f14 // f4 = |Y| + mfc1 t0, $f3 // t0/ exponent word of |X| + mfc1 t1, $f5 // t1/ exponent word of |Y| + li t7, 2047 // exponent for NaN, Infinity + srl t2, t0, 20 // t2/ exponent of |X| + srl t3, t1, 20 // t3/ exponent of |Y| + beq t2, t7, 70f // if X NaN or Infinity + c.lt.d $f2, $f4 // |X| < |Y| + beq t3, t7, 75f // if Y NaN or Infinity + subu t4, t2, t3 // t4/ exponent difference + bc1f 10f // if |X| < |Y| then + slt t5, t4, 31 + abs.d $f2, $f14 // swap X and Y + abs.d $f4, $f12 // ... + move t1, t0 // ... + subu t4, t3, t2 // ... + slt t5, t4, 31 +10: + beq t5, 0, 20f // if exponent difference >= 31 + mfc1 t0, $f4 +11: + bne t1, 0, 12f + sub.d $f6, $f2, $f4 + beq t0, 0, 20f + nop +12: +.set reorder + s.d $f2, FSIZE-0(sp) + s.d $f4, FSIZE-8(sp) + c.lt.d $f4, $f6 + bc1f 13f + + div.d $f6, $f2, $f4 + li.d $f2, one + mul.d $f12, $f6, $f6 + add.d $f12, $f2 + sqrt.d $f0, $f12 + add.d $f6, $f0 + b 14f +13: + li.d $f10, two + div.d $f6, $f4 + add.d $f8, $f6, $f10 + mul.d $f8, $f6 + add.d $f12, $f8, $f10 + sqrt.d $f0, $f12 + li.d $f10, sqrt2 + add.d $f0, $f10 + div.d $f8, $f0 + add.d $f6, $f8 + li.d $f10, r2p1lo + li.d $f12, r2p1hi + add.d $f6, $f10 + add.d $f6, $f12 +14: + l.d $f4, FSIZE-8(sp) + div.d $f6, $f4, $f6 + l.d $f2, FSIZE-0(sp) + add.d $f0, $f6, $f2 + cfc1 t0, $31 // test Overflow flag bit + and t0, (1<<4) + beq t0, 0, hypotx + +/* call _except2 to process error condition */ + li $4, (FP_O | FP_P) // exception mask + li $5, OP_HYPOT // operation code (funtion name index) + mfc1.d $6, $f12 // arg1 + s.d $f14, 16(sp) // arg2 + l.d $f0, _d_ind + s.d $f0, 24(sp) // default result + xor v0, v0, 0xf80 // inverse exception enable bits of + sw v0, 32(sp) // ... callers fsr to pass to _except2 + jal _except2 + lw ra, FSIZE-16(sp) + addu sp, FSIZE + j ra + +20: /* exponent difference >= 31, or X Infinity */ + mov.d $f0, $f2 + b hypotx + +22: /* Y Infinity */ + mov.d $f0, $f4 + b hypotx + +70: /* X NaN or Infinity */ + c.eq.d $f12, $f12 + bc1t 20b + beq t3, t7, 75f + mov.d $f0, $f12 + b hypotx + +75: /* Y NaN or Infinity */ + c.eq.d $f14, $f14 + bc1t 22b + mov.d $f0, $f14 + +hypotx: + /* restore callers fsr and return */ + ctc1 v0, $31 + lw ra, FSIZE-16(sp) + addu sp, FSIZE + j ra + +#undef FSIZE +.end _hypot diff --git a/private/fp32/tran/mips/hypott.c b/private/fp32/tran/mips/hypott.c new file mode 100644 index 000000000..428086783 --- /dev/null +++ b/private/fp32/tran/mips/hypott.c @@ -0,0 +1,25 @@ +#include <stdio.h> +#include <errno.h> +#include <math.h> + +void main(void) +{ + double fResult; + double a1, a2; + + + a1 = 1.7e+308; + a2 = 1.7e+308; + + fResult = _hypot(a1, a2); + printf("_hypot(%e, %e) = %.4e\n", a1, a2, fResult); + + //A floating point reset causes the next calculation to pass. + //_fpreset(); + + a1 = -9.6412221495223150e+002; + a2 = -9.5463338659229547e+007; + + fResult = _hypot(a1, a2); + printf("_hypot(%e, %e) = %.4e\n", a1, a2, fResult); +} diff --git a/private/fp32/tran/mips/ieee.c b/private/fp32/tran/mips/ieee.c new file mode 100644 index 000000000..609026ea8 --- /dev/null +++ b/private/fp32/tran/mips/ieee.c @@ -0,0 +1,322 @@ +/*** +*ieee.c - ieee control and status routines +* +* Copyright (c) 1985-91, Microsoft Corporation +* +*Purpose: +* IEEE control and status routines. +* +*Revision History: +* +* 04-01-02 GDP Rewritten to use abstract control and status words +* 10-30-92 GDP fpreset now modifies the saved fp context if called +* from a signal handler +* +*/ + +#include <trans.h> +#include <float.h> +#include <nt.h> +#include <signal.h> + +static unsigned int _abstract_sw(unsigned int sw); +static unsigned int _abstract_cw(unsigned int cw); +static unsigned int _hw_cw(unsigned int abstr); + +extern unsigned int _get_fsr(); +extern void _set_fsr(unsigned int); + +#define STATUSMASK 0x0000007c +#define FS (1<<24) +#define CWMASK 0x01000fff + + +/*** +* _statusfp() - +* +*Purpose: +* return abstract fp status word +* +*Entry: +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +unsigned int _statusfp() +{ + return _abstract_sw(_get_fsr()); +} + + +/*** +*_clearfp() - +* +*Purpose: +* return abstract status word and clear status +* +*Entry: +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +unsigned int _clearfp() +{ + unsigned int status; + + status = _get_fsr(); + status &= ~STATUSMASK; + _set_fsr(status); + + return _abstract_sw(status); +} + + + +/*** _controlfp +*() - +* +*Purpose: +* return and set abstract user fp control word +* +*Entry: +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +unsigned int _controlfp(unsigned int newctrl, unsigned int mask) +{ + unsigned int oldCw; + unsigned int newCw; + unsigned int oldabs; + unsigned int newabs; + + oldCw = _get_fsr(); + + oldabs = _abstract_cw(oldCw); + + newabs = (newctrl & mask) | (oldabs & ~mask); + + newCw = _hw_cw(newabs) & CWMASK | oldCw & ~CWMASK; + + _set_fsr(newCw); + + return newabs; +} /* _controlfp() */ + +/*** +* _fpreset() - reset fp system +* +*Purpose: +* reset fp environment to the default state +* Also reset saved fp environment if invoked from a user's +* signal handler +* +*Entry: +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ +void _fpreset() +{ + PEXCEPTION_POINTERS excptrs = (PEXCEPTION_POINTERS) _pxcptinfoptrs; + + _FPreset(); + if (excptrs && + excptrs->ContextRecord->ContextFlags & CONTEXT_FLOATING_POINT) { + // _fpreset has been invoked by a signal handler which in turn + // has been invoked by the CRT filter routine. In this case + // the saved fp context should be cleared, so that the change take + // effect on continuation. + + excptrs->ContextRecord->Fsr = _get_fsr(); //use current FS bit + } +} + + + + +/*** +* _abstract_cw() - abstract control word +* +*Purpose: +* produce a fp control word in abstracted (machine independent) form +* +*Entry: +* cw: machine control word +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +unsigned int _abstract_cw(unsigned int cw) +{ + unsigned int abstr = 0; + + + // + // Set exception mask bits + // + + if ((cw & IEM_INVALID) == 0) + abstr |= _EM_INVALID; + if ((cw & IEM_ZERODIVIDE) == 0) + abstr |= _EM_ZERODIVIDE; + if ((cw & IEM_OVERFLOW) == 0) + abstr |= _EM_OVERFLOW; + if ((cw & IEM_UNDERFLOW) == 0) + abstr |= _EM_UNDERFLOW; + if ((cw & IEM_INEXACT) == 0) + abstr |= _EM_INEXACT; + + // + // Set rounding mode + // + + switch (cw & IMCW_RC) { + case IRC_NEAR: + abstr |= _RC_NEAR; + break; + case IRC_UP: + abstr |= _RC_UP; + break; + case IRC_DOWN: + abstr |= _RC_DOWN; + break; + case IRC_CHOP: + abstr |= _RC_CHOP; + break; + } + + // Precision mode is ignored + + // + // Set denormal control + // + + if (cw & FS) { + abstr |= _DN_FLUSH; + } + + return abstr; +} + + +/*** +* _hw_cw() - h/w control word +* +*Purpose: +* produce a machine dependent fp control word +* +* +*Entry: +* abstr: abstract control word +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +unsigned int _hw_cw(unsigned int abstr) +{ + + unsigned int cw = 0; + + // + // Set exception mask bits + // + + if ((abstr & _EM_INVALID) == 0) + cw |= IEM_INVALID; + if ((abstr & _EM_ZERODIVIDE) == 0) + cw |= IEM_ZERODIVIDE; + if ((abstr & _EM_OVERFLOW) == 0) + cw |= IEM_OVERFLOW; + if ((abstr & _EM_UNDERFLOW) == 0) + cw |= IEM_UNDERFLOW; + if ((abstr & _EM_INEXACT) == 0) + cw |= IEM_INEXACT; + + // + // Set rounding mode + // + + switch (abstr & _MCW_RC) { + case _RC_NEAR: + cw |= IRC_NEAR; + break; + case _RC_UP: + cw |= IRC_UP; + break; + case _RC_DOWN: + cw |= IRC_DOWN; + break; + case _RC_CHOP: + cw |= IRC_CHOP; + break; + } + + // + // Precision mode is ignored + // + + // + // Set denormal control + // + + if ((abstr & _MCW_DN) == _DN_FLUSH) { + cw |= FS; + } + + return cw; +} + + + +/*** +* _abstract_sw() - abstract fp status word +* +*Purpose: +* produce an abstract (machine independent) fp status word +* +* +*Entry: +* sw: machine status word +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +unsigned int _abstract_sw(unsigned int sw) +{ + unsigned int abstr = 0; + + + if (sw & ISW_INVALID) + abstr |= _EM_INVALID; + if (sw & ISW_ZERODIVIDE) + abstr |= _EM_ZERODIVIDE; + if (sw & ISW_OVERFLOW) + abstr |= _EM_OVERFLOW; + if (sw & ISW_UNDERFLOW) + abstr |= _EM_UNDERFLOW; + if (sw & ISW_INEXACT) + abstr |= _EM_INEXACT; + + return abstr; +} diff --git a/private/fp32/tran/mips/linkt.c b/private/fp32/tran/mips/linkt.c new file mode 100644 index 000000000..a59267cf4 --- /dev/null +++ b/private/fp32/tran/mips/linkt.c @@ -0,0 +1,73 @@ +/* + * Just ensure that there are the correct entry points. + */ +#include <stdio.h> +#include <math.h> + +void main() +{ + double d1, d2, rd; + float f1, f2, rf; + long l; + int i; + const char cc; + struct _exception e; + struct _complex c; + + rd = abs(i); + rd = acos(d1); + rd = asin(d1); + rd = atan(d1); + rd = atan2(d1, d2); + rd = atof(&cc); + rd = _cabs(c); + rd = ceil(d1); + rd = cos(d1); + rd = cosh(d1); + rd = exp(d1); + rd = fabs(d1); + rd = floor(d1); + rd = fmod(d1, d2); + rd = frexp(d1, &i ); + rd = _hypot(d1, d2 ); + rd = _j0(d1); + rd = _j1(d1); + rd = _jn(i, d2); + rd = labs(l); + rd = ldexp(d1, i); + rd = log(d1); + rd = log10(d1); + rd = _matherr(&e); + rd = modf(d1, &d2 ); + rd = pow(d1, d2); + rd = sin(d1); + rd = sinh(d1); + rd = sqrt(d1); + rd = tan(d1); + rd = tanh(d1); + rd = _y0(d1); + rd = _y1(d1); + rd = _yn(i, d2); + + rf = acosf( f1 ); + rf = asinf( f1 ); + rf = atanf( f1 ); + rf = atan2f( f1 , f2 ); + rf = cosf( f1 ); + rf = sinf( f1 ); + rf = tanf( f1 ); + rf = coshf( f1 ); + rf = sinhf( f1 ); + rf = tanhf( f1 ); + rf = expf( f1 ); + rf = logf( f1 ); + rf = log10f( f1 ); + rf = modff( f1 , &f2 ); + rf = powf( f1 , f2 ); + rf = sqrtf( f1 ); + rf = ceilf( f1 ); + rf = fabsf( f1 ); + rf = floorf( f1 ); + rf = fmodf( f1 , f2 ); + +} diff --git a/private/fp32/tran/mips/logm.s b/private/fp32/tran/mips/logm.s new file mode 100644 index 000000000..b53cb3a26 --- /dev/null +++ b/private/fp32/tran/mips/logm.s @@ -0,0 +1,207 @@ +/* + * |-----------------------------------------------------------| + * | Copyright (c) 1991, 1990 MIPS Computer Systems, Inc. | + * | All Rights Reserved | + * |-----------------------------------------------------------| + * | Restricted Rights Legend | + * | Use, duplication, or disclosure by the Government is | + * | subject to restrictions as set forth in | + * | subparagraph (c)(1)(ii) of the Rights in Technical | + * | Data and Computer Software Clause of DFARS 252.227-7013. | + * | MIPS Computer Systems, Inc. | + * | 950 DeGuigne Avenue | + * | Sunnyvale, California 94088-3650, USA | + * |-----------------------------------------------------------| + */ +/* $Header: log.s,v 3000.5.1.4 91/08/14 09:31:27 zaineb Exp $ */ + +/* Algorithm from + "Table-driven Implementation of the Logarithm Functions for IEEE + Floating-Point Arithmetic", Peter Tang, Argonne National Laboratory, + February 2, 1989 + Coded in MIPS assembler by Earl Killian. + */ + +#include <kxmips.h> +#include <trans.h> +#include <fpieee.h> + +#define loge 0.43429448190325182765 + +.extern _except1 + +.text +#undef FSIZE +#define FSIZE 48 +.globl log +.ent log +log: + .frame sp, FSIZE, ra + subu sp, FSIZE + sw ra, FSIZE-4(sp) + .prologue 1 + li t7, OP_LOG + b logmain +.globl log10 +.aent log10 +log10: + .frame sp, FSIZE, ra + subu sp, FSIZE + sw ra, FSIZE-4(sp) + .prologue 1 + li t7, OP_LOG10 +logmain: + /* argument in f12 */ +.set noreorder + cfc1 t5, $31 /* save FCSR, set round to nearest */ + ctc1 $0, $31 /* mode and no exceptions */ + and t6, t5, -4 + li.d $f10, 1.0644944589178595e+00 // ceil( exp( 1/16)) + li.d $f16, 9.3941306281347570e-01 // floor(exp(-1/16)) + c.ult.d $f12, $f10 + mfc1 t0, $f13 + bc1f 1f + srl t1, t0, 20 + c.olt.d $f16, $f12 + li.d $f10, 0.0 + bc1t 5f + c.ule.d $f12, $f10 + nop + bc1t 8f + nop + beq t1, 0, 4f +1: li t4, 2047 + beq t1, t4, 7f + subu t1, 1023 + sll t2, t1, 20 + subu t2, t0, t2 + mtc1 t2, $f13 +.set reorder +2: ctc1 t6, $31 + li.d $f16, 3.5184372088832000e+13 // 2^(53-8) + mtc1 t1, $f8 + add.d $f18, $f12, $f16 + la t4, _logtable + sub.d $f14, $f18, $f16 + mfc1 t3, $f18 + sub.d $f18, $f12, $f14 + sll t3, 4 + add.d $f14, $f12 + l.d $f10, 128*16+0(t4) // log2head + div.d $f18, $f14 + cvt.d.w $f8 + l.d $f16, 128*16+8(t4) // log2trail + mul.d $f0, $f8, $f10 + addu t3, t4 + l.d $f4, -128*16+0(t3) + mul.d $f2, $f8, $f16 + add.d $f0, $f4 + l.d $f6, -128*16+8(t3) + add.d $f18, $f18 + li.d $f10, 1.2500053168098584e-02 + mul.d $f4, $f18, $f18 + add.d $f2, $f6 + li.d $f16, 8.3333333333039133e-02 + mul.d $f6, $f4, $f10 + add.d $f6, $f16 + mul.d $f6, $f4 + mul.d $f6, $f18 + add.d $f6, $f2 + add.d $f6, $f18 + add.d $f0, $f6 + j ret +4: /* denorm */ + li.d $f10, 4.4942328371557898e+307 // 2^1022 + mul.d $f12, $f10 + mfc1 t0, $f13 + srl t1, t0, 20 + subu t1, 1023 + sll t2, t1, 20 + subu t2, t0, t2 + mtc1 t2, $f13 + addu t1, -1022 + b 2b + +5: /* exp(-1/16) < x < exp(1/16) */ + /* use special approximation */ + ctc1 t6, $31 + li.d $f10, 1.0 + sub.d $f14, $f12, $f10 + add.d $f12, $f10 + div.d $f12, $f10, $f12 + cvt.s.d $f18, $f14 + cvt.d.s $f18 + sub.d $f8, $f14, $f18 + add.d $f2, $f14, $f14 + mul.d $f2, $f12 + mul.d $f4, $f2, $f2 + li.d $f10, 4.3488777770761457e-04 + li.d $f16, 2.2321399879194482e-03 + mul.d $f6, $f4, $f10 + add.d $f6, $f16 + li.d $f10, 1.2500000003771751e-02 + mul.d $f6, $f4 + add.d $f6, $f10 + li.d $f16, 8.3333333333331788e-02 + mul.d $f6, $f4 + add.d $f6, $f16 + mul.d $f6, $f4 + mul.d $f6, $f2 + cvt.s.d $f0, $f2 + cvt.d.s $f0 + sub.d $f14, $f0 + add.d $f14, $f14 + mul.d $f18, $f0 + sub.d $f14, $f18 + mul.d $f8, $f0 + sub.d $f14, $f8 + mul.d $f14, $f12 + add.d $f14, $f6 + add.d $f0, $f14 + j ret + +7: /* log(+Infinity) = +Infinity */ + /* log(NaN) = NaN */ + mov.d $f0, $f12 + j ret + +8: /* x <= 0 or x = NaN */ + /* is it zero? ($f10 == 0.0) */ + c.eq.d $f12, $f10 + bc1f 9f + li.d $f0, -1.0 // generate -INF + li.d $f10, 0.0 + div.d $f0, $f10 + li t6, FP_Z + j set_log_err + +9: /* x < 0.0 or x == NaN */ + c.eq.d $f12, $f12 + bc1f 7b + li.d $f0, 0.0 + div.d $f0, $f10 // generate a NaN + li t6, FP_I + +set_log_err: + move $4, t6 // exception mask + move $5, t7 // operation code (funtion name index) + mfc1.d $6, $f12 // arg1 + s.d $f0, 16(sp) // default result + xor t5, t5, 0xf80 // inverse exception enable bits + sw t5, 24(sp) + jal _except1 + lw ra, FSIZE-4(sp) + addu sp, FSIZE + j ra + +ret: + li t6, OP_LOG10 + bne t7,t6,retf + li.d $f2, loge + mul.d $f0, $f2 +retf: + ctc1 t5, $31 /* restore FCSR */ + lw ra, FSIZE-4(sp) + addu sp, FSIZE + j ra +.end log diff --git a/private/fp32/tran/mips/logt.c b/private/fp32/tran/mips/logt.c new file mode 100644 index 000000000..f988d1e5d --- /dev/null +++ b/private/fp32/tran/mips/logt.c @@ -0,0 +1,56 @@ +#include <stdio.h> +#include <math.h> + +int main(void) +{ + double x, result, answer; + char str[80]; + int i; + int k = 0; + +#if 0 + x = 0.0; + answer = 1.0; + result = log(x); + + if (result != answer) { + printf("log(%g) = %g, should be %g\n", x, result, answer); + } +#endif + + x = -1.0; + result = log(x); + + sprintf(str, "%g", result); + if (strcmp(str, "1.#INF")) { + printf("log(%g) = %g, should be %s\n", x, result, "1.#INF"); + } + +#if 0 + x = -1.1e300; + result = log(x); + + sprintf(str, "%le", result); + if (strcmp(str, "-1.#INF00e+000")) { + printf("log(%g) = %g, should be %s\n", x, result, "-1.#INF00e+000"); + } + + for (i = 1, x = 0.0; i < 1000; i++) { + answer = 1.0; + result = log(x); + +/* + if (result != answer) { + printf("log(%g) = %g, should be %g\n", x, result, answer); + } +*/ + } +#endif + + if (k) { + printf("\n\tFailed %d tests...\n", k); + } else { + printf("\n\tPassed all tests...\n", k); + } + +} diff --git a/private/fp32/tran/mips/logtable.s b/private/fp32/tran/mips/logtable.s new file mode 100644 index 000000000..022be86ec --- /dev/null +++ b/private/fp32/tran/mips/logtable.s @@ -0,0 +1,160 @@ +/* + * |-----------------------------------------------------------| + * | Copyright (c) 1991, 1990 MIPS Computer Systems, Inc. | + * | All Rights Reserved | + * |-----------------------------------------------------------| + * | Restricted Rights Legend | + * | Use, duplication, or disclosure by the Government is | + * | subject to restrictions as set forth in | + * | subparagraph (c)(1)(ii) of the Rights in Technical | + * | Data and Computer Software Clause of DFARS 252.227-7013. | + * | MIPS Computer Systems, Inc. | + * | 950 DeGuigne Avenue | + * | Sunnyvale, California 94088-3650, USA | + * |-----------------------------------------------------------| + */ +/* $Header: logtable.s,v 3000.5.1.2 91/05/31 14:46:26 bettina Exp $ */ + +.globl _logtable + +#define _MIPSEL // force little endian + +#ifdef _MIPSEL +# define D(h,l) l,h +#endif +#ifdef _MIPSEB +# define D(h,l) h,l +#endif + +.rdata + .align 3 +_logtable: + .word D(0x00000000,0x00000000), D(0x00000000,0x00000000) + .word D(0x3F7FE02A,0x6B200000), D(0xBD6F30EE,0x07912DF9) + .word D(0x3F8FC0A8,0xB1000000), D(0xBD5FE0E1,0x83092C59) + .word D(0x3F97B91B,0x07D80000), D(0xBD62772A,0xB6C0559C) + .word D(0x3F9F829B,0x0E780000), D(0x3D298026,0x7C7E09E4) + .word D(0x3FA39E87,0xBA000000), D(0xBD642A05,0x6FEA4DFD) + .word D(0x3FA77458,0xF6340000), D(0xBD62303B,0x9CB0D5E1) + .word D(0x3FAB42DD,0x71180000), D(0x3D671BEC,0x28D14C7E) + .word D(0x3FAF0A30,0xC0100000), D(0x3D662A66,0x17CC9717) + .word D(0x3FB16536,0xEEA40000), D(0xBD60A3E2,0xF3B47D18) + .word D(0x3FB341D7,0x961C0000), D(0xBD4717B6,0xB33E44F8) + .word D(0x3FB51B07,0x3F060000), D(0x3D383F69,0x278E686A) + .word D(0x3FB6F0D2,0x8AE60000), D(0xBD62968C,0x836CC8C2) + .word D(0x3FB8C345,0xD6320000), D(0xBD5937C2,0x94D2F567) + .word D(0x3FBA926D,0x3A4A0000), D(0x3D6AAC6C,0xA17A4554) + .word D(0x3FBC5E54,0x8F5C0000), D(0xBD4C5E75,0x14F4083F) + .word D(0x3FBE2707,0x6E2A0000), D(0x3D6E5CBD,0x3D50FFFC) + .word D(0x3FBFEC91,0x31DC0000), D(0xBD354555,0xD1AE6607) + .word D(0x3FC0D77E,0x7CD10000), D(0xBD6C69A6,0x5A23A170) + .word D(0x3FC1B72A,0xD52F0000), D(0x3D69E80A,0x41811A39) + .word D(0x3FC29552,0xF8200000), D(0xBD35B967,0xF4471DFC) + .word D(0x3FC371FC,0x201F0000), D(0xBD6C22F1,0x0C9A4EA8) + .word D(0x3FC44D2B,0x6CCB0000), D(0x3D6F4799,0xF4F6543E) + .word D(0x3FC526E5,0xE3A20000), D(0xBD62F217,0x46FF8A47) + .word D(0x3FC5FF30,0x70A80000), D(0xBD6B0B0D,0xE3077D7E) + .word D(0x3FC6D60F,0xE71A0000), D(0xBD56F1B9,0x55C4D1DA) + .word D(0x3FC7AB89,0x02110000), D(0xBD537B72,0x0E4A694B) + .word D(0x3FC87FA0,0x65210000), D(0xBD5B77B7,0xEFFB7F41) + .word D(0x3FC9525A,0x9CF40000), D(0x3D65AD1D,0x904C1D4E) + .word D(0x3FCA23BC,0x1FE30000), D(0xBD62A739,0xB23B93E1) + .word D(0x3FCAF3C9,0x4E810000), D(0xBD600349,0xCC67F9B2) + .word D(0x3FCBC286,0x742E0000), D(0xBD6CCA75,0x818C5DBC) + .word D(0x3FCC8FF7,0xC79B0000), D(0xBD697794,0xF689F843) + .word D(0x3FCD5C21,0x6B500000), D(0xBD611BA9,0x1BBCA682) + .word D(0x3FCE2707,0x6E2B0000), D(0xBD3A342C,0x2AF0003C) + .word D(0x3FCEF0AD,0xCBDC0000), D(0x3D664D94,0x8637950E) + .word D(0x3FCFB918,0x6D5E0000), D(0x3D5F1546,0xAAA3361C) + .word D(0x3FD04025,0x94B50000), D(0xBD67DF92,0x8EC217A5) + .word D(0x3FD0A324,0xE2738000), D(0x3D50E35F,0x73F7A018) + .word D(0x3FD1058B,0xF9AE8000), D(0xBD6A9573,0xB02FAA5A) + .word D(0x3FD1675C,0xABAB8000), D(0x3D630701,0xCE63EAB9) + .word D(0x3FD1C898,0xC1698000), D(0x3D59FAFB,0xC68E7540) + .word D(0x3FD22941,0xFBCF8000), D(0xBD3A6976,0xF5EB0963) + .word D(0x3FD2895A,0x13DE8000), D(0x3D3A8D7A,0xD24C13F0) + .word D(0x3FD2E8E2,0xBAE10000), D(0x3D5D309C,0x2CC91A85) + .word D(0x3FD347DD,0x9A988000), D(0xBD25594D,0xD4C58092) + .word D(0x3FD3A64C,0x55698000), D(0xBD6D0B1C,0x68651946) + .word D(0x3FD40430,0x86868000), D(0x3D63F1DE,0x86093EFA) + .word D(0x3FD4618B,0xC21C8000), D(0xBD609EC1,0x7A426426) + .word D(0x3FD4BE5F,0x95778000), D(0xBD3D7C92,0xCD9AD824) + .word D(0x3FD51AAD,0x872E0000), D(0xBD3F4BD8,0xDB0A7CC1) + .word D(0x3FD57677,0x17458000), D(0xBD62C9D5,0xB2A49AF9) + .word D(0x3FD5D1BD,0xBF580000), D(0x3D4394A1,0x1B1C1EE4) + .word D(0x3FD62C82,0xF2BA0000), D(0xBD6C3568,0x48506EAD) + .word D(0x3FD686C8,0x1E9B0000), D(0x3D54AEC4,0x42BE1015) + .word D(0x3FD6E08E,0xAA2B8000), D(0x3D60F1C6,0x09C98C6C) + .word D(0x3FD739D7,0xF6BC0000), D(0xBD67FCB1,0x8ED9D603) + .word D(0x3FD792A5,0x5FDD8000), D(0xBD6C2EC1,0xF512DC03) + .word D(0x3FD7EAF8,0x3B828000), D(0x3D67E1B2,0x59D2F3DA) + .word D(0x3FD842D1,0xDA1E8000), D(0x3D462E92,0x7628CBC2) + .word D(0x3FD89A33,0x86C18000), D(0xBD6ED2A5,0x2C73BF78) + .word D(0x3FD8F11E,0x87368000), D(0xBD5D3881,0xE8962A96) + .word D(0x3FD94794,0x1C210000), D(0x3D56FABA,0x4CDD147D) + .word D(0x3FD99D95,0x81180000), D(0xBD5F7534,0x56D113B8) + .word D(0x3FD9F323,0xECBF8000), D(0x3D584BF2,0xB68D766F) + .word D(0x3FDA4840,0x90E58000), D(0x3D6D8515,0xFE535B87) + .word D(0x3FDA9CEC,0x9A9A0000), D(0x3D40931A,0x909FEA5E) + .word D(0x3FDAF129,0x32478000), D(0xBD3E53BB,0x31EED7A9) + .word D(0x3FDB44F7,0x7BCC8000), D(0x3D4EC519,0x7DDB55D3) + .word D(0x3FDB9858,0x96930000), D(0x3D50FB59,0x8FB14F89) + .word D(0x3FDBEB4D,0x9DA70000), D(0x3D5B7BF7,0x861D37AC) + .word D(0x3FDC3DD7,0xA7CD8000), D(0x3D66A6B9,0xD9E0A5BD) + .word D(0x3FDC8FF7,0xC79A8000), D(0x3D5A21AC,0x25D81EF3) + .word D(0x3FDCE1AF,0x0B860000), D(0xBD482909,0x05A86AA6) + .word D(0x3FDD32FE,0x7E010000), D(0xBD542A9E,0x21373414) + .word D(0x3FDD83E7,0x258A0000), D(0x3D679F28,0x28ADD176) + .word D(0x3FDDD46A,0x04C20000), D(0xBD6DAFA0,0x8CECADB1) + .word D(0x3FDE2488,0x1A7C8000), D(0xBD53D9E3,0x4270BA6B) + .word D(0x3FDE7442,0x61D68000), D(0x3D3E1F8D,0xF68DBCF3) + .word D(0x3FDEC399,0xD2468000), D(0x3D49802E,0xB9DCA7E7) + .word D(0x3FDF128F,0x5FAF0000), D(0x3D3BB2CD,0x720EC44C) + .word D(0x3FDF6123,0xFA700000), D(0x3D645630,0xA2B61E5B) + .word D(0x3FDFAF58,0x8F790000), D(0xBD49C24C,0xA098362B) + .word D(0x3FDFFD2E,0x08580000), D(0xBD46CF54,0xD05F9367) + .word D(0x3FE02552,0xA5A5C000), D(0x3D60FEC6,0x9C695D7F) + .word D(0x3FE04BDF,0x9DA94000), D(0xBD692D9A,0x033EFF75) + .word D(0x3FE0723E,0x5C1CC000), D(0x3D6F404E,0x57963891) + .word D(0x3FE0986F,0x4F574000), D(0xBD55BE8D,0xC04AD601) + .word D(0x3FE0BE72,0xE4254000), D(0xBD657D49,0x676844CC) + .word D(0x3FE0E449,0x85D1C000), D(0x3D5917ED,0xD5CBBD2D) + .word D(0x3FE109F3,0x9E2D4000), D(0x3D592DFB,0xC7D93617) + .word D(0x3FE12F71,0x95940000), D(0xBD6043AC,0xFEDCE638) + .word D(0x3FE154C3,0xD2F4C000), D(0x3D65E9A9,0x8F33A396) + .word D(0x3FE179EA,0xBBD88000), D(0x3D69A0BF,0xC60E6FA0) + .word D(0x3FE19EE6,0xB467C000), D(0x3D52DD98,0xB97BAEF0) + .word D(0x3FE1C3B8,0x1F714000), D(0xBD3EDA1B,0x58389902) + .word D(0x3FE1E85F,0x5E704000), D(0x3D1A07BD,0x8B34BE7C) + .word D(0x3FE20CDC,0xD192C000), D(0xBD64926C,0xAFC2F08A) + .word D(0x3FE23130,0xD7BEC000), D(0xBD17AFA4,0x392F1BA7) + .word D(0x3FE2555B,0xCE990000), D(0xBD506987,0xF78A4A5E) + .word D(0x3FE2795E,0x1289C000), D(0xBD5DCA29,0x0F81848D) + .word D(0x3FE29D37,0xFEC2C000), D(0xBD5EEA6F,0x465268B4) + .word D(0x3FE2C0E9,0xED448000), D(0x3D5D1772,0xF5386374) + .word D(0x3FE2E474,0x36E40000), D(0x3D334202,0xA10C3491) + .word D(0x3FE307D7,0x334F0000), D(0x3D60BE1F,0xB590A1F5) + .word D(0x3FE32B13,0x39120000), D(0x3D6D7132,0x0556B67B) + .word D(0x3FE34E28,0x9D9D0000), D(0xBD6E2CE9,0x146D277A) + .word D(0x3FE37117,0xB5474000), D(0x3D4ED717,0x74092113) + .word D(0x3FE393E0,0xD3564000), D(0xBD65E656,0x3BBD9FC9) + .word D(0x3FE3B684,0x4A000000), D(0xBD3EEA83,0x8909F3D3) + .word D(0x3FE3D902,0x6A714000), D(0x3D66FAA4,0x04263D0B) + .word D(0x3FE3FB5B,0x84D18000), D(0xBD60BDA4,0xB162AFA3) + .word D(0x3FE41D8F,0xE8468000), D(0xBD5AA337,0x36867A17) + .word D(0x3FE43F9F,0xE2F9C000), D(0x3D5CCEF4,0xE4F736C2) + .word D(0x3FE4618B,0xC21C4000), D(0x3D6EC27D,0x0B7B37B3) + .word D(0x3FE48353,0xD1EA8000), D(0x3D51BEE7,0xABD17660) + .word D(0x3FE4A4F8,0x5DB04000), D(0xBD244FDD,0x840B8591) + .word D(0x3FE4C679,0xAFCD0000), D(0xBD61C64E,0x971322CE) + .word D(0x3FE4E7D8,0x11B74000), D(0x3D6BB09C,0xB0985646) + .word D(0x3FE50913,0xCC018000), D(0xBD6794B4,0x34C5A4F5) + .word D(0x3FE52A2D,0x265BC000), D(0x3D46ABB9,0xDF22BC57) + .word D(0x3FE54B24,0x67998000), D(0x3D6497A9,0x15428B44) + .word D(0x3FE56BF9,0xD5B40000), D(0xBD58CD7D,0xC73BD194) + .word D(0x3FE58CAD,0xB5CD8000), D(0xBD49DB3D,0xB43689B4) + .word D(0x3FE5AD40,0x4C358000), D(0x3D6F2CFB,0x29AAA5F0) + .word D(0x3FE5CDB1,0xDC6C0000), D(0x3D67648C,0xF6E3C5D7) + .word D(0x3FE5EE02,0xA9240000), D(0x3D667570,0xD6095FD2) + .word D(0x3FE60E32,0xF4478000), D(0x3D51B194,0xF912B417) + .word D(0x3FE62E42,0xFEFA4000), D(0xBD48432A,0x1B0E2634) diff --git a/private/fp32/tran/mips/mkacost.cmd b/private/fp32/tran/mips/mkacost.cmd new file mode 100644 index 000000000..77322efa7 --- /dev/null +++ b/private/fp32/tran/mips/mkacost.cmd @@ -0,0 +1,6 @@ +setlocal +set include=g:\585f\nt\public\sdk\inc\crt;g:\585f\nt\public\sdk\inc +set lib=g:\585f\nt\public\sdk\lib\mips +cl -Zl -Z7 -Od -Bd acost.c /link -debug:full -debugtype:both -pdb:none ..\obj\mips\asincosm.obj libc.lib kernel32.lib +@rem cl -Zl -Z7 -Od -Bd acost.c /link -debug:full -debugtype:both -pdb:none ..\obj\mips\asincos.obj libc.lib kernel32.lib +endlocal diff --git a/private/fp32/tran/mips/mkhypott.cmd b/private/fp32/tran/mips/mkhypott.cmd new file mode 100644 index 000000000..0e1125391 --- /dev/null +++ b/private/fp32/tran/mips/mkhypott.cmd @@ -0,0 +1,6 @@ +setlocal +set include=g:\585f\nt\public\sdk\inc\crt;g:\585f\nt\public\sdk\inc +set lib=g:\585f\nt\public\sdk\lib\mips +cl -Zl -Z7 -Od -Bd hypott.c /link -debug:full -debugtype:both -pdb:none ..\obj\mips\hypotm.obj libc.lib kernel32.lib +@rem cl -Zl -Z7 -Od -Bd hypott.c /link -debug:full -debugtype:both -pdb:none ..\obj\mips\hypot.obj libc.lib kernel32.lib +endlocal diff --git a/private/fp32/tran/mips/mklogt.cmd b/private/fp32/tran/mips/mklogt.cmd new file mode 100644 index 000000000..065be3d05 --- /dev/null +++ b/private/fp32/tran/mips/mklogt.cmd @@ -0,0 +1,6 @@ +setlocal +set include=d:\nt\public\sdk\inc\crt;d:\nt\public\sdk\inc +set lib=d:\nt\public\sdk\lib\mips +rem cl -Fc -Zl -Ox -Zi -Bd logt.c /link -debug:full -debugtype:cv -pdb:none ..\obj\mips\logm.obj libc.lib kernel32.lib +cl -Fc -Zl -Ox -Z7 -Bd logt.c /link -debug:full -debugtype:cv -pdb:none ..\obj\mips\logm.obj libc.lib kernel32.lib +endlocal diff --git a/private/fp32/tran/mips/mkpowt.cmd b/private/fp32/tran/mips/mkpowt.cmd new file mode 100644 index 000000000..b347b749b --- /dev/null +++ b/private/fp32/tran/mips/mkpowt.cmd @@ -0,0 +1,5 @@ +setlocal +set include=d:\nt\public\sdk\inc\crt;d:\nt\public\sdk\inc +set lib=d:\nt\public\sdk\lib\mips;c:\585f\nt\public\sdk\lib\mips +cl -Fc -Zl -Ox -Zi -Bd powt.c /link -debug:full -debugtype:both -pdb:none ..\obj\mips\powm.obj libc.lib kernel32.lib +endlocal diff --git a/private/fp32/tran/mips/powm.s b/private/fp32/tran/mips/powm.s new file mode 100644 index 000000000..6dc919efd --- /dev/null +++ b/private/fp32/tran/mips/powm.s @@ -0,0 +1,776 @@ +/* + * |-----------------------------------------------------------| + * | Copyright (c) 1991, 1990 MIPS Computer Systems, Inc. | + * | All Rights Reserved | + * |-----------------------------------------------------------| + * | Restricted Rights Legend | + * | Use, duplication, or disclosure by the Government is | + * | subject to restrictions as set forth in | + * | subparagraph (c)(1)(ii) of the Rights in Technical | + * | Data and Computer Software Clause of DFARS 252.227-7013. | + * | MIPS Computer Systems, Inc. | + * | 950 DeGuigne Avenue | + * | Sunnyvale, California 94088-3650, USA | + * |-----------------------------------------------------------| + */ +/* $Header: pow.s,v 3000.4.1.15 92/02/13 10:37:44 zaineb Exp $ */ + +/* Algorithm from + "Table-driven Implementation of the Power Function + in IEEE Floating-Point Arithmetic", Peter Tang and Earl Killian + Coded in MIPS assembler by Earl Killian. + */ + +/* + Jun-06-94 Xzero not always setting $f0 before calling _set_pow_err. + */ + +.globl pow /* double pow(double x, double y) */ + +#include <kxmips.h> +#include <trans.h> +#include <fpieee.h> + + +#define OVERFLOW_EXC_BIT 0x4000 +#define UNDERFLOW_EXC_BIT 0x2000 + +.extern _logtable +.extern _exptable + +.extern _d_ind 8 +#define D_IND _d_ind + +.text + +.ent pow +pow: +#define FSIZE 64 + .frame sp, FSIZE, ra + .mask 0x80000000, -4 + subu sp, FSIZE + sw ra, FSIZE-4(sp) + .prologue 1 + +#define X $f12 +#define Xhi $f13 +#define Xlo $f12 +#define Y $f14 +#define Yhi $f15 +#define Ylo $f14 + +#define one $f18 + +.set noreorder + c.un.d X, Y /* test if either X or Y is NaN */ + s.d $f12, 5*8(sp) + mfc1 t3, Yhi + mfc1 t2, Ylo + mfc1 t1, Xhi + mfc1 t0, Xlo + li.d one, 1.0 + bc1t XYNaN + nop + + bne t2, 0, 10f /* fast test to rule out special cases */ + sll t4, t3, 12 /* that is Y != +-0.0, +-1.0, 2.0, Infinity */ + bne t4, 0, 10f + sll t8, t3, 1 + /* Y may be a special case */ + beq t8, 0, retOne /* Y = +-0 */ + li t7, 0x3ff00000 + beq t3, t7, retX /* Y = 1 */ + li t8, 0x40000000 + beq t3, t8, retXsq /* Y = 2 */ + sll t4, t3, 1 + li t8, 0x7ff00000<<1 + beq t4, t8, Yinfinite /* Y = +-Infinity */ + nop +10: + cfc1 t6, $31 /* save rounding mode, etc. */ + ctc1 $0, $31 /* set round to nearest */ + + bltz t1, Xnegative + li t5, 0 /* result sign */ +12: + bne t0, 0, 14f /* eliminate X = 0 and X = Infinity */ + li t7, 0x7ff00000 + beq t1, 0, Xzero /* X = +-0 */ + nop + beq t1, t7, Xinfinite /* X = Infinity */ + nop + +.set reorder + +14: /* save registers */ + s.d $f20, 0*8(sp) + s.d $f22, 1*8(sp) + s.d $f24, 2*8(sp) + /*s.d $f30, 5*8(sp)*/ + + abs.d $f4, Y + li.d $f6, 3.1965771613006643e18 + c.lt.d $f6, $f4 + li.d $f0, 0.984375 + li.d $f2, 1.015625 + bc1t Ybig + + /* finally we've eliminated all the special cases and can get + down to computing X^Y */ + + /* Procedure L */ +/* outputs */ +#define z1 $f0 +#define z2 $f2 + + /* save more registers */ + s.d $f26, 3*8(sp) + s.d $f28, 4*8(sp) + .fmask 0x3FF00000, -FSIZE + c.lt.d $f0, X + bc1f Lnormal + c.lt.d X, $f2 + bc1f Lnormal + + /* Procedure Lsmall */ + +#define g $f6 +#define f $f0 +#define f1 $f2 +#define F2 $f4 +#define u $f8 +#define u1 $f20 +#define v $f10 +#define q $f22 +#define c1 $f16 +#define c2 $f18 + + add.d g, X, one /* g = 1.0 / (1.0 + X) */ + div.d g, one, g + sub.d f, X, one /* f = X - 1.0 */ + cvt.s.d f1, f /* f1 = (float)f */ + cvt.d.s f1 + sub.d F2, f, f1 /* f2 = f - f1 */ + add.d u, f, f /* u = 2 * f * g */ + mul.d u, g + mul.d v, u, u /* v = u * u */ + cvt.s.d u1, u /* u1 = (float)u */ + cvt.d.s u1 + + /* q = u * (v * (C1 + v * (C2 + v * (C3 + v * (C4 + v * C5))))) */ + + li.d c2, 4.4072021372392785e-04 /* C5 */ + mul.d q, v, c2 + add.d q, c2 + li.d c1, 2.2321412321046185e-03 /* C3 */ + mul.d q, v + add.d q, c1 + li.d c2, 1.2500000000155512e-02 /* C2 */ + mul.d q, v + add.d q, c2 + li.d c1, 8.3333333333333329e-02 /* C1 */ + mul.d q, v + add.d q, c1 + mul.d q, v + mul.d q, u + + sub.d f, u1 /* u2 = 2 * (f - u1) */ + add.d f, f + mul.d f1, u1 /* u2 = u2 - u1*f1 * u1*f2 */ + mul.d F2, u1 + sub.d f, f1 + sub.d f, F2 + + mul.d z2, g, z1 /* z2 = g * u2 + q */ + add.d z2, q + add.d z1, u1, z2 /* z1 = (float)(u1 + z2) */ + cvt.s.d z1 + cvt.d.s z1 + sub.d u1, z1 /* z2 = z2 + (u1 - z1) */ + add.d z2, u1 + + j M + +#undef g +#undef f +#undef f1 +#undef F2 +#undef u +#undef u1 +#undef v +#undef q +#undef c1 +#undef c2 + + + /* Procedure Lnormal */ + +#define l1 $f0 +#define l2 $f2 +#define F $f8 +#define f $f4 +#define g $f6 +#define u $f20 +#define m $f2 +#define c0 $f22 +#define d0 $f0 +#define d1 $f2 +#define d2 $f10 +#define d3 $f16 +#define d4 $f26 +#define d5 $f28 +#define v $f10 +#define u1 $f16 +#define f1 $f22 +#define F2 $f24 +#define u2 $f26 +#define d6 $f28 +#define d7 $f26 +#define d8 $f6 +#define d9 $f10 +#define d10 $f12 +#define d11 $f18 +#define q $f6 +#define d12 $f4 +#define d13 $f8 +#define d14 $f10 +#define d15 $f18 +#define c1 $f18 +#define c2 $f12 + +Lnormal: + li.d d9, 3.5184372088832000e+13 /* 2^(52-7) */ + srl t8, t1, 20 + beq t8, 0, Xdenorm + + subu t2, t8, 1023 + sll t3, t2, 20 + subu t1, t3 + mtc1 t1, Xhi +16: /* Xdenorm returns here */ + add.d d0, X, d9 /* j = rint(x * 128) */ + mfc1 t1, d0 + sll t1, 4 + sub.d F, d0, d9 /* F = j/128 */ + add.d d1, F, X /* g = 1.0 / (F + x) */ + div.d g, one, d1 + sub.d f, X, F /* f = x - F */ + la t4, _logtable + addu t1, t4 + l.d d2, 128*16+0(t4) /* log2head */ + l.d d3, 128*16+8(t4) /* log2trail */ + mtc1 t2, m + cvt.d.w m /* m */ + mul.d l1, m, d2 /* a1 = m * log2lead */ + mul.d l2, m, d3 /* a2 = m * log2trail */ + l.d d4, -128*16+0(t1) + l.d d5, -128*16+8(t1) + add.d l1, d4 /* a1 = a1 + logtable[j-128] */ + add.d l2, d5 /* a2 = a2 + logtable[j-128] */ + + add.d u, f, f /* u = 2 * f * g */ + mul.d u, g + cvt.s.d u1, u /* u1 = (float)u */ + cvt.d.s u1 + mul.d v, u, u /* v = u * u */ + + mul.d c0, Y, l1 /* c = abs(Y * a1) */ + abs.d c0 + li.d d7, 16.0 + c.lt.d c0, d7 + bc1t 20f + + /* c >= 16.0 */ + + cvt.s.d f1, f /* f1 = (float)f */ + cvt.d.s f1 + sub.d F2, f, f1 /* f2 = f - f1 */ + + mul.d u2, u1, F /* u2 = 2 * (f - u1 * F) */ + sub.d u2, f, u2 + add.d u2, u2 + mul.d d15, u1, f1 /* u2 = u2 - u1 * f1 */ + sub.d u2, d15 + mul.d d6, u1, F2 /* u2 = u2 - u1 * f2 */ + sub.d u2, d6 + mul.d u2, g /* u2 = u2 * g */ + + /* q = u * (v * (A1 + v * (A2 + v * A3))) */ + li.d q, 2.2321229798769144e-03 /* A3 */ + li.d c2, 1.2500000000716587e-02 /* A2 */ + mul.d q, v + add.d q, c2 + li.d c1, 8.3333333333333329e-02 /* A1 */ + mul.d q, v + add.d q, c1 + + j 30f + +20: + /* c < 16.0 */ + + /* u2 = g * (2 * (f - u1 * F) - u1 * f) */ + mul.d d10, u1, F + sub.d d10, f, d10 + add.d d10, d10 + mul.d d11, u1, f + sub.d d10, d11 + mul.d u2, d10, g + li.d d8, 0.125 + c.lt.d c0, d8 + bc1t 25f + + /* c >= 0.125 */ + + /* q = u * (v * (A1 + v * (A2 + v * A3))) */ + li.d q, 2.2321229798769144e-03 /* A3 */ + li.d c2, 1.2500000000716587e-02 /* A2 */ + mul.d q, v + add.d q, c2 + li.d c1, 8.3333333333333329e-02 /* A1 */ + mul.d q, v + add.d q, c1 + j 30f + +25: + /* c < 0.125 */ + + /* q = u * (v * (B1 + v * B2)) */ + li.d q, 1.2500055860192138e-02 /* B2 */ + li.d c1, 8.3333333333008588e-02 /* B1 */ + mul.d q, v + add.d q, c1 + +30: + mul.d q, v + mul.d q, u + + add.d d12, l1, u1 /* t = a1 + u1 */ + sub.d d13, l1, d12 /* a2 = a2 + ((a1 - t) + u1) */ + add.d d13, u1 + add.d l2, d13 + add.d q, u2 /* p = u2 + q */ + add.d z2, q, l2 /* z2 = p + a2 */ + add.d z1, d12, z2 /* z1 = (float)(t + z2) */ + cvt.s.d z1 + cvt.d.s z1 + sub.d d14, d12, z1 /* z2 = z2 + (t - z1) */ + add.d z2, d14 + +#undef l1 +#undef l2 +#undef F +#undef f +#undef g +#undef u +#undef m +#undef c +#undef d0 +#undef d1 +#undef d2 +#undef d3 +#undef d4 +#undef d5 +#undef v +#undef u1 +#undef f1 +#undef F2 +#undef u2 +#undef d6 +#undef d7 +#undef d8 +#undef d9 +#undef d10 +#undef d11 +#undef q +#undef d12 +#undef d13 +#undef d14 +#undef c1 +#undef c2 + + +M: /* restore registers */ + l.d $f20, 0*8(sp) + l.d $f22, 1*8(sp) + l.d $f24, 2*8(sp) + l.d $f26, 3*8(sp) + l.d $f28, 4*8(sp) + /*l.d $f30, 5*8(sp)*/ + + /* Procedure M */ + +#define w1 $f6 +#define w2 $f8 +#define y1 $f16 +#define y2 $f18 +#define d0 $f12 +#define d1 $f4 + cvt.s.d y1, Y + cvt.d.s y1 + sub.d y2, Y, y1 + mul.d d0, y2, z1 + mul.d d1, y2, z2 + mul.d w2, y1, z2 + add.d d0, d1 + mul.d w1, y1, z1 + add.d w2, d0 + + /* Procedure E */ + + li.d $f10, 4.6166241308446828e+01 /* Inv_L */ + mul.d $f10, w1 + + /* Check for gross overflow or underflow. */ + li.d $f16, 2000.0 + neg.d $f18, $f16 + c.lt.d w1, $f16 + bc1f Overflow + c.lt.d w1, $f18 + bc1t Underflow + + cvt.w.d $f10 + mfc1 t0, $f10 + and t1, t0, 31 /* region */ + sra t2, t0, 5 /* scale */ + cvt.d.w $f12, $f10 + li.d $f2, 2.1660849390173098e-02 /* L1 */ + mul.d $f2, $f12 + sub.d $f10, w1, $f2 + add.d $f10, w2 + li.d $f4, 2.3251928468788740e-12 /* L2 */ + mul.d $f4, $f12 + sub.d $f10, $f4 + + li.d $f0, 1.3888944287816253e-03 /* P5 */ + li.d $f2, 8.3333703801026920e-03 /* P4 */ + mul.d $f0, $f10 + li.d $f4, 4.1666666666361998e-02 /* P3 */ + add.d $f0, $f2 + mul.d $f0, $f10 + li.d $f2, 1.6666666666505991e-01 /* P2 */ + add.d $f0, $f4 + mul.d $f0, $f10 + li.d $f4, 5.0000000000000000e-01 /* P1 */ + add.d $f0, $f2 + mul.d $f0, $f10 + add.d $f0, $f4 + mul.d $f0, $f10 + mul.d $f0, $f10 + add.d $f0, $f10 + + addu t4, t2, 1023 + sll t3, t4, 20 + or t3, t5 + mtc1 t3, $f9 + mtc1 $0, $f8 + + sll t1, 4 + la t1, _exptable(t1) + l.d $f2, 0(t1) + l.d $f4, 8(t1) + add.d $f6, $f2, $f4 + + mul.d $f0, $f6 + add.d $f0, $f4 + +.set noreorder + ctc1 t6, $31 /* restore rounding mode */ + slt t7, t4, 2047 + blez t4, 90f + add.d $f0, $f2 /* add in high part */ + beq t7, 0, 90f + nop + mul.d $f0, $f8 /* sign and exponent */ + cfc1 t0, $31 /* Check for overflow/underflow */ + nop + and t1, t0, OVERFLOW_EXC_BIT + bne t1, 0, Overflow + and t2, t0, UNDERFLOW_EXC_BIT + bne t2, 0, Underflow + nop + j ret + nop + +90: /* scale is outside of 2^-1022 to 2^1023 -- do it the slow way */ + + mfc1 t0, $f1 /* get result high word */ + nop + srl t1, t0, 20 + addu t1, t2 /* add scale to check for denorm */ + blez t1, 92f + slt t7, t1, 2047 + beq t7, 0, Overflow + sll t2, 20 + addu t0, t2 /* add scale */ + or t0, t5 /* add sign */ + mtc1 t0, $f1 /* put back in result high word */ + nop + j ret + nop +92: /* denorm result */ + addu t1, 64 + blez t1, Underflow + addu t2, 64 + sll t2, 20 + addu t0, t2 + mtc1 t0, $f1 + li.d $f2, 5.4210108624275222e-20 /* 2^-64 */ + nop + mul.d $f0, $f2 + cfc1 t0, $31 /* Check for overflow/underflow */ + nop + and t1, t0, OVERFLOW_EXC_BIT + bne t1, 0, Overflow + and t2, t0, UNDERFLOW_EXC_BIT + bne t2, 0, Underflow + nop + j ret + nop + +Ybig: + li.d $f2, 1.0 + abs.d $f0, X + c.eq.d $f0, $f2 + nop + bc1t retOne + c.lt.d $f0, $f2 + bc1t Underflow + nop + /* fall through to Overflow */ + +Overflow: + ctc1 t6, $31 /* restore rounding mode */ +94: li.d $f0, 8.9884656743115795e+307 + bne t5, 0, 96f + mov.d $f0, Y + li.d $f2, 0.0 + l.d $f12,5*8(sp) + c.le.d $f0, $f2 + nop + bc1t Underf + nop + li t8, FP_O + j Calerr + nop +Underf: + li t8, FP_U +Calerr: + jal set_pow_err + nop + j ret + mul.d $f0, $f0 + +96: neg.d $f2, $f0 + l.d $f12,5*8(sp) + li t8, FP_O + jal set_pow_err + mul.d $f0, $f2 + j ret + nop + +Underflow: + li.d $f0, 0.0 + beq t5, 0, 1f + ctc1 t6, $31 + nop + neg.d $f0 +1: + l.d $f12,5*8(sp) + li t8, FP_U + jal set_pow_err + nop + j ret + nop + +.set reorder +Xdenorm: + li.d $f0, 1.8446744073709552e+19 /* 2^64 */ + mul.d X, $f0 + mfc1 t0, Xhi + sll t1, t0, 1 + srl t1, 21 + subu t2, t1, 1023 + sll t3, t2, 20 + subu t0, t3 + mtc1 t0, Xhi + subu t2, 64 + j 16b + +.set noreorder +Xnegative: + li.d $f2, 9.0071992547409920e+15 /* 2^53 */ + abs.d $f0, Y + neg.d X + c.lt.d $f0, $f2 + add.d $f4, $f2, $f0 + mfc1 t1, Xhi + bc1f 12b /* if abs(Y) >= 2^53, then it is an even + integer */ + sub.d $f4, $f2 + c.eq.d $f4, $f0 + li.d $f2, 4.5035996273704960e+15 /* 2^52 */ + bc1t 12b /* if (abs(Y)+2^53)-2^53 = Y then it is + an even integer */ + c.lt.d $f0, $f2 + add.d $f4, $f2, $f0 + li t5, 0x80000000 /* result is negative */ + bc1f 12b /* if abs(Y) >= 2^52, then it is an integer */ + sub.d $f4, $f2 + c.eq.d $f4, $f0 + nop + bc1t 12b /* if (abs(Y)+2^52)-2^52 = Y then it is + an integer */ + nop + /* Y is not an integer */ + bne t1, 0, retNaN + ctc1 t6, $31 + bne t0, 0, retNaN + nop + bgez t3, 1f + mov.d $f0, X /* +0 */ + div.d $f0, one, $f0 /* -Infinity */ + nop +1: j ret + nop + +Xzero: /* X = +-0 */ + /* t3 = Yhi */ + /* t2 = Ylo */ + bnez t2, 1f + nop + beqz t3, retOne + nop +1: + bgtz t3, retZero + nop + div.d $f0, one, X + bnel t5, 0, 2f + neg.d $f0 +2: li t8, FP_Z + jal set_pow_err + nop + j ret + nop + +Xinfinite: + bgez t3, 1f + ctc1 t6, $31 + nop + div.d X, one, X +1: + beq t5, 0, retX + nop + neg.d $f0, X + j ret + nop + +/* below here, FCSR does not need to be restored */ + +XYNaN: + bne t2, 0, 1f + c.eq.d X, X + sll t8, t3, 1 + beq t8, 0, retOne +1: nop + bc1f retX + mov.d $f0, Y + nop + j ret + nop + +Yinfinite: + abs.d $f0, X + c.eq.d $f0, one + bc1t retNaN + c.lt.d $f0, one + nop + bc1t 1f + nop + bltz t3, retZero + nop + mov.d $f0, Y + nop + j ret + nop +1: bgez t3, retZero + nop + neg.d $f0, Y + nop + j ret + nop + +.set reorder +retNaN: + li.d $f0, 0.0 // generate a NaN + div.d $f0, $f0 + l.d Y, 5*8(sp) // restore the second argument + li t8, FP_I + jal set_pow_err + j ret + +retXsq: + /* t1 = Xhi */ + /* t0 = Xlo */ + bnez t0, 1f + beqz t1, retZero /* x = +-0 */ +1: + mul.d $f0, X, X + j ret + +retOne: + mov.d $f0, one + j ret + +retZero: + li.d $f0, 0.0 + j ret + +retX: + mov.d $f0, X + j ret + +retooX: + div.d $f0, one, X + bne t1, 0, ret /* X = +-0 */ + li t8, FP_I + l.d $f12, 5*8(sp) + jal set_pow_err + j ret + +ret: lw ra, FSIZE-4(sp) + addu sp, FSIZE + j ra + +.end pow + + +.extern _except2 + +.text + +.set reorder + +/* t8 = exception mask, $f0 = default result, $f12 = arg1, $f14 = arg2 */ +.ent set_pow_err +set_pow_err: +#define FMSIZE 48 + .frame sp, FMSIZE, ra + .mask 0x80000000, -4 + subu sp, FMSIZE + sw ra, FMSIZE-4(sp) + .prologue 1 + move $4, t8 // exception mask + li $5, OP_POW // operation code (funtion name index) + mfc1.d $6, $f12 // arg1 + s.d $f14, 16(sp) // arg2 + s.d $f0, 24(sp) // default result + cfc1 t7, $31 // floating point control/status register + xor t7, t7, 0xf80 // inverse exception enable bits + sw t7, 32(sp) + jal _except2 + lw ra, FMSIZE-4(sp) + addu sp, FMSIZE + j ra +.end set_pow_err + diff --git a/private/fp32/tran/mips/powt.c b/private/fp32/tran/mips/powt.c new file mode 100644 index 000000000..9609a7ea8 --- /dev/null +++ b/private/fp32/tran/mips/powt.c @@ -0,0 +1,167 @@ +#include <stdio.h> +#include <math.h> +#include <errno.h> + +int test1(void); +int test2(void); + +int main(int argc, char **argv) +{ + int k = 0; + + k += test1(); + + k += test2(); + + if (k) { + printf("\n\tFailed %d tests...\n", k); + } else { + printf("\n\tPassed all tests...\n", k); + } + + return k; +} + +int test1() +{ + double x, y, result, answer; + char str[80]; + int i; + int k = 0; + + x = y = 0.0; + answer = 1.0; + result = pow(x,y); + + if (result != answer) { + printf("pow(%g,%g) = %g, should be %g\n", x, y, result, answer); + k++; + } + + x = 0.0; + y = -1.0; + result = pow(x,y); + + sprintf(str, "%g", result); + if (strcmp(str, "1.#INF")) { + printf("pow(%g,%g) = %g, should be %s\n", x, y, result, "1.#INF"); + k++; + } + + x = -1.1e300; + y = 21.0; + result = pow(x,y); + + sprintf(str, "%le", result); + if (strcmp(str, "-1.#INF00e+000")) { + printf("pow(%g,%g) = %g, should be %s\n", x, y, result, "-1.#INF00e+000"); + k++; + } + + for (i = 1, x = 0.0; i < 1000; i++) { + y = i; + answer = 0.0; + result = pow(x,y); + + if (result != answer) { + printf("pow(%g,%g) = %g, should be %g\n", x, y, result, answer); + k++; + } + } + + return (k); +} + + +typedef union { + long lng[2]; + double dbl; + } dbl; + +dbl d_inf = { 0x0, 0x7ff00000 }; + +#define D_INF (d_inf.dbl) + +typedef struct { + double x; + double y; + double answer; +} _test; + +int test2(void) +{ + _test tests[] = { + { 21.0, -1.1e300, 0.0 }, // -D_INF??? + { 21.0, 1.1e-300, 1.0 }, // +D_INF??? + { -1.1e300, 21.0, -D_INF }, + { 21.0, 1.1e300, D_INF }, + { 1.0e100, 21.0, D_INF }, + { 21.0, 1.0e100, D_INF }, + { 1.0e100, 0.0, 1.0 }, + { 1.0e100, 1.0, 1.0e100 }, + { 1.0e100, 2.0, 1.0e200 }, + { 1.0e300, 1.0, 1.0e300 }, + { 1.0e300, 2.0, D_INF }, + }; + double result; + int i; + int k = 0; + char buf[BUFSIZ]; + + dbl foo = {0x78b58c40, 0x4415af1d}; + + for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) { + result = pow(tests[i].x,tests[i].y); + if (result != tests[i].answer) { + // sprintf(buf, "%e", result); + // result = atof(buf); + // if (result != tests[i].answer) { + k++; + printf("pow(%e,%e) = %e, should be %e\n", + tests[i].x, + tests[i].y, + result, + tests[i].answer + ); + // } + } + } + + return(k); +} + + +int test3() +{ + _test tests[] = { + { 0.0, -2.0, D_INF }, + { 0.0, -1.0, D_INF }, + { 0.0, 0.0, 1.0 }, + { 0.0, 1.0, 0.0 }, + { 0.0, 2.0, 0.0 }, + { 0.0, 3.0, 0.0 }, + { 0.0, 4.0, 0.0 }, + }; + double result; + int i; + int k = 0; + char buf[BUFSIZ]; + + for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) { + result = pow(tests[i].x,tests[i].y); + if (result != tests[i].answer) { + if (result != tests[i].answer) { + k++; + printf("pow(%e,%e) = %e, should be %e\n", + tests[i].x, + tests[i].y, + result, + tests[i].answer + ); + } + } + } + + return(k); +} + diff --git a/private/fp32/tran/mips/rint.s b/private/fp32/tran/mips/rint.s new file mode 100644 index 000000000..b55b6786f --- /dev/null +++ b/private/fp32/tran/mips/rint.s @@ -0,0 +1,50 @@ +/* + *------------------------------------------------------------- + *| RESTRICTED RIGHTS LEGEND | + *| Use, duplication, or disclosure by the Government is | + *| subject to restrictions as set forth in subparagraph | + *| (c)(1)(ii) of the Rights in Technical Data and Computer | + *| Software Clause at DFARS 252.227-7013. | + *| MIPS Computer Systems, Inc. | + *| 928 Arques Avenue | + *| Sunnyvale, CA 94086 | + *------------------------------------------------------------- + */ +/* --------------------------------------------------- */ +/* | Copyright (c) 1989 MIPS Computer Systems, Inc. | */ +/* | All Rights Reserved. | */ +/* --------------------------------------------------- */ +#include <kxmips.h> + +/* Double-precision round to integer using current rounding mode */ + +.globl _frnd +.ent _frnd +_frnd: + .frame sp, 0, ra + .prologue 0 + li.d $f4, 4503599627370496.0 /* 2^52 */ + abs.d $f2, $f12 /* |arg| */ + c.olt.d $f2, $f4 /* if |arg| >= 2^52 or arg is NaN */ + mfc1 t0, $f13 + mov.d $f0, $f12 + bc1f 4f /* then done */ + /* < 2^52 */ + sll t1, t0, 1 + bgez t0, 2f /* if input negative, negate result */ + /* negative */ + beq t1, 0, 3f /* possible -0 */ +1: sub.d $f0, $f12, $f4 + add.d $f0, $f4 + j ra +2: /* positive */ + add.d $f0, $f12, $f4 /* bias by 2^52 to force non-integer + bits off end */ + sub.d $f0, $f4 /* unbias */ + j ra + +3: /* msw = 80000000 */ + mfc1 t1, $f12 /* if -0, return -0 */ + bne t1, 0, 1b /* if negative denorm, process that */ +4: j ra +.end _frnd diff --git a/private/fp32/tran/mips/sinhm.s b/private/fp32/tran/mips/sinhm.s new file mode 100644 index 000000000..7ccae731f --- /dev/null +++ b/private/fp32/tran/mips/sinhm.s @@ -0,0 +1,161 @@ +/* + * |-----------------------------------------------------------| + * | Copyright (c) 1991 MIPS Computer Systems, Inc. | + * | All Rights Reserved | + * |-----------------------------------------------------------| + * | Restricted Rights Legend | + * | Use, duplication, or disclosure by the Government is | + * | subject to restrictions as set forth in | + * | subparagraph (c)(1)(ii) of the Rights in Technical | + * | Data and Computer Software Clause of DFARS 52.227-7013. | + * | MIPS Computer Systems, Inc. | + * | 950 DeGuigne Drive | + * | Sunnyvale, CA 94086 | + * |-----------------------------------------------------------| + */ +/* $Header: sinh.s,v 3000.5.1.9 92/01/29 15:51:37 zaineb Exp $ */ +/* Algorithm from Cody and Waite. */ + +#include <kxmips.h> +#include <trans.h> +#include <fpieee.h> + +#define one 1.0 +#define eps 3.72529029846191406250e-9 +#define p3 -0.78966127417357099479e+0 +#define p2 -0.16375798202630751372e+3 +#define p1 -0.11563521196851768270e+5 +#define p0 -0.35181283430177117881e+6 +#define q2 -0.27773523119650701667e+3 +#define q1 +0.36162723109421836460e+5 +#define q0 -0.21108770058106271242e+7 +#define expmax 709.78271289338397 +#define sinhmax 710.47586007394386 +#define half 0.5 + +.text + +.globl sinh +.ent sinh +sinh: +#define FSIZE 16 + subu sp, FSIZE + sw ra, FSIZE-4(sp) + .frame sp, FSIZE, ra + .mask 0x80000000, -4 + .prologue 1 + li.d $f8, one + abs.d $f0, $f12 + c.ole.d $f0, $f8 + li.d $f8, eps + bc1f sinh2 + c.lt.d $f0, $f8 + bc1t sinh1 + + mul.d $f2, $f0, $f0 + li.d $f10, p3 + li.d $f8, q2 + mul.d $f4, $f2, $f10 + add.d $f6, $f2, $f8 + li.d $f10, p2 + mul.d $f6, $f2 + add.d $f4, $f10 + li.d $f8, q1 + mul.d $f4, $f2 + add.d $f6, $f8 + li.d $f10, p1 + mul.d $f6, $f2 + add.d $f4, $f10 + li.d $f8, q0 + mul.d $f4, $f2 + li.d $f10, p0 + add.d $f6, $f8 + add.d $f4, $f10 + div.d $f4, $f6 + mul.d $f4, $f2 + mul.d $f4, $f12 + add.d $f0, $f4, $f12 + j ret3 + +sinh1: + mov.d $f0, $f12 +ret3: lw ra, FSIZE-4(sp) + addu sp, FSIZE + j ra + +sinh2: + li.d $f8, expmax + swc1 $f13, FSIZE-8(sp) // save sign + c.ole.d $f0, $f8 + bc1f sinh3 + mov.d $f12, $f0 + jal exp + li.d $f8, half + div.d $f2, $f8, $f0 + mul.d $f0, $f8 + lw t0, FSIZE-8(sp) + bltz t0, 1f + sub.d $f0, $f0, $f2 + j ret1 +1: sub.d $f0, $f2, $f0 + j ret1 + +sinh3: + li.d $f6, sinhmax + li.d $f8, 0.69316101074218750000 + c.ole.d $f0, $f6 + bc1f error + sub.d $f12, $f0, $f8 + jal exp + li.d $f6, 0.13830277879601902638e-4 + mul.d $f2, $f0, $f6 + lw t0, FSIZE-8(sp) + bltz t0, 2f + add.d $f0, $f2 + j ret1 +2: add.d $f0, $f2 + neg.d $f0 + j ret1 + +error: + // raise Overflow and return +-Infinity + jal set_sinh_err +ret1: lw ra, FSIZE-4(sp) + addu sp, FSIZE + j ra +.end sinh +#undef FSIZE + + +.extern _except1 +.extern _HUGE + +.ent set_sinh_err +set_sinh_err: +#define FSIZE 48 + .frame sp, FSIZE, ra + .mask 0x80000000, -4 + subu sp, FSIZE + sw ra, FSIZE-4(sp) + .prologue 1 + li $4, (FP_O | FP_P) // exception mask + li $5, OP_SINH // operation code (funtion name index) + mfc1.d $6, $f12 // arg1 + // return +/-INF for overflow + li.d $f20, 0.0 + li.d $f0, 1.0 + c.lt.d $f12, $f20 + bc1f 1f + neg.d $f0 +1: + div.d $f0,$f20 + s.d $f0, 16(sp) // default result + cfc1 t7, $31 // fp control/status register + xor t7, t7, 0xf80 // inverse exception enable bits + sw t7, 24(sp) + jal _except1 + lw ra, FSIZE-4(sp) + addu sp, FSIZE + j ra +#undef FSIZE +.end set_sinh_error diff --git a/private/fp32/tran/mips/sqrt3000.c b/private/fp32/tran/mips/sqrt3000.c new file mode 100644 index 000000000..ef88274c3 --- /dev/null +++ b/private/fp32/tran/mips/sqrt3000.c @@ -0,0 +1,131 @@ +/*** +*sqrt.c - square root +* +* Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* +*Revision History: +* 8-15-91 GDP written +* 1-29-91 GDP Kahan's algorithm for final rounding +* +*******************************************************************************/ +#include <math.h> +#include <trans.h> + +#define SQRT_APPROX(x) ( .41731 + .59016 * x ) /* Hart et al. */ + +#define _STICKY (0x7c) + +static double const SQRTP5 = 0.70710678118654752440; + + + + +/*** +*double sqrt(double x) - square root +* +*Purpose: +* Compute the square root of a number. +* This function should be provided by the underlying +* hardware (IEEE spec). +*Entry: +* +*Exit: +* +*Exceptions: +* I P +*******************************************************************************/ +double sqrt(double x) +{ + unsigned int savedcw; + double result,t; + double f,y; + int n,j; + unsigned int stat,rc; + + savedcw = _controlfp(_RC_DOWN | _MCW_EM | _PC_53, + _MCW_RC | _MCW_EM | _MCW_PC); + + /* handle special cases here in order to support matherr */ + if (IS_D_SPECIAL(x)){ + switch (_sptype(x)) { + case T_PINF: + case T_QNAN: + RETURN(savedcw, x); + case T_SNAN: + return _except1(FP_I,OP_SQRT,x,QNAN_SQRT,savedcw); + } + /* -INF will be handled in the x<0 case */ + } + if (x < 0.0) { + return _except1(FP_I, OP_SQRT, x, QNAN_SQRT,savedcw); + } + + + if (x == 0.0) { + RETURN (savedcw, x); + } + + f = _decomp(x,&n); + + + y = SQRT_APPROX(f); /* first approximation */ + for (j=1;j<4;j++) { + y = y + f/y ; + y = _add_exp(y, -1); + } + + + if (n & 0x1) { + // n is odd + n++; + y *= SQRTP5; + } + + + n >>= 1; + result = _add_exp(y,n); /* this should not overflow */ + + + (void) _clearfp(); + t = x / result; + // get status and restore sticky bits + stat = _controlfp(savedcw, _STICKY); + + if (! (stat & _SW_INEXACT)) { + // exact + if (t == result) { + RETURN(savedcw, result); + } + else { + // t = t-1 + if (*D_LO(t) == 0) { + (*D_HI(t)) --; + } + (*D_LO(t)) --; + } + + } + + rc = savedcw & _MCW_RC; + if (rc == _RC_UP || rc == RC_NEAR) { + // t = t+1 + (*D_LO(t)) ++; + if (*D_LO(t) == 0) { + (*D_HI(t)) ++; + } + if (rc == _RC_UP) { + // y = y+1 + (*D_LO(t)) ++; + if (*D_LO(t) == 0) { + (*D_HI(t)) ++; + } + } + } + + result = 0.5 * (t + result); + + RETURN_INEXACT1(OP_SQRT, x, result, savedcw); + +} diff --git a/private/fp32/tran/mips/sqrt4000.c b/private/fp32/tran/mips/sqrt4000.c new file mode 100644 index 000000000..67e68adc2 --- /dev/null +++ b/private/fp32/tran/mips/sqrt4000.c @@ -0,0 +1,85 @@ +/*** +*sqrt4000.c - square root for the R4000 +* +* Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* +*Revision History: +* 3-31-92 GDP written +*******************************************************************************/ +#ifdef R4000 + +#include <math.h> +#include <trans.h> + +static double scale = 4503599627370496.0; // 2^52 +static double lgscale = 26; // log2(2^52) / 2 + +#define SWMASK 0x78 +#define INEXACT_MASK 0x4 + + +/*** +*double sqrt(double x) - square root +* +*Purpose: +* Compute the square root of a number. +* This function should be provided by the underlying +* hardware (IEEE spec). +*Entry: +* +*Exit: +* +*Exceptions: +* I P +*******************************************************************************/ +double sqrt(double x) +{ + unsigned int savedcw,cw,sw; + double result,savedx; + int scaled = 0; + + // mask exceptions, keep user's rounding mode + savedcw = _ctrlfp(ICW, IMCW & ~IMCW_RC); + + if (IS_D_SPECIAL(x)){ + switch (_sptype(x)) { + case T_PINF: + RETURN(savedcw, x); + case T_QNAN: + return _handle_qnan1(OP_SQRT, x, savedcw); + case T_SNAN: + return _except1(FP_I,OP_SQRT,x,QNAN_SQRT,savedcw); + } + /* -INF will be handled in the x<0 case */ + } + if (x < 0.0) { + return _except1(FP_I, OP_SQRT, x, QNAN_SQRT,savedcw); + } + + savedx = x; + + if (IS_D_DENORM(x)) { + x *= scale; + scaled = 1; + } + + sw = _clrfp(); + result = _fsqrt(x); + cw = _get_fsr(); + + _set_fsr(cw & ~SWMASK | sw & SWMASK); // restore all but inexact + + if (scaled) { + result = _add_exp(result, -lgscale); + } + + if ((cw & INEXACT_MASK) == INEXACT_MASK) { + return _except1(FP_P,OP_SQRT,savedx,result,savedcw); + } + + RETURN(savedcw,result); +} + +#endif diff --git a/private/fp32/tran/mips/sqrtm.s b/private/fp32/tran/mips/sqrtm.s new file mode 100644 index 000000000..8aff5455d --- /dev/null +++ b/private/fp32/tran/mips/sqrtm.s @@ -0,0 +1,90 @@ +/* + * |-----------------------------------------------------------| + * | Copyright (c) 1991, 1990 MIPS Computer Systems, Inc. | + * | All Rights Reserved | + * |-----------------------------------------------------------| + * | Restricted Rights Legend | + * | Use, duplication, or disclosure by the Government is | + * | subject to restrictions as set forth in | + * | subparagraph (c)(1)(ii) of the Rights in Technical | + * | Data and Computer Software Clause of DFARS 252.227-7013. | + * | MIPS Computer Systems, Inc. | + * | 950 DeGuigne Avenue | + * | Sunnyvale, California 94088-3650, USA | + * |-----------------------------------------------------------| + */ +/* $Header: fsqrt.s,v 3000.6.1.4 91/05/31 14:44:37 bettina Exp $ */ + +/* v-rogerl 02/14/94 Added error processing for x < 0.0 */ +/* Return errno = EDOM and result = 0.0 */ + +#include <kxmips.h> +#include <trans.h> +#include <fpieee.h> + +.extern _except1 +.extern _d_ind 8 + +.text + +.globl sqrt +.ent sqrt +sqrt: + .frame sp, 0, ra + .prologue 0 + + /* argument in f12 */ + li.d $f0, 0.0 + c.le.d $f12, $f0 + /* x > 0 */ + bc1f 1f + c.lt.d $f12, $f0 + /* x == 0 */ + bc1f 2f + /* x < 0 */ + li t6, FP_I + j set_sqrt_err +1: + + /* + * Clear all bits in fsr to avoid side effects (including flag bits). + * This is the same as calling _maskfp() and clearing flag bits. + * 'Save' the callers fsr in v0 to restore upon exit. + */ + + cfc1 v0, $31 + ctc1 zero, $31 + + sqrt.d $f0,$f12 + + /* restore callers fsr */ + ctc1 v0, $31 +2: + j ra + +.end sqrt + +#define FSIZE 48 + +.ent set_sqrt_err +set_sqrt_err: + .frame sp, FSIZE, ra + .mask 0x80000000, -4 + subu sp, FSIZE + sw ra, FSIZE-4(sp) + .prologue 1 + move $4, t6 // exception mask + li $5, OP_SQRT // operation code (funtion name index) + mfc1.d $6, $f12 // arg1 + l.d $f0, _d_ind + s.d $f0, 16(sp) // default result + cfc1 t7, $31 // floating point control/status register + xor t7, t7, 0xf80 // inverse exception enable bits + sw t7, 24(sp) + jal _except1 + lw ra, FSIZE-4(sp) + addu sp, FSIZE + j ra +#undef FSIZE +.end set_sqrt_err + diff --git a/private/fp32/tran/mips/tanhm.s b/private/fp32/tran/mips/tanhm.s new file mode 100644 index 000000000..0b961b88c --- /dev/null +++ b/private/fp32/tran/mips/tanhm.s @@ -0,0 +1,111 @@ +/* + * |-----------------------------------------------------------| + * | Copyright (c) 1991, 1990 MIPS Computer Systems, Inc. | + * | All Rights Reserved | + * |-----------------------------------------------------------| + * | Restricted Rights Legend | + * | Use, duplication, or disclosure by the Government is | + * | subject to restrictions as set forth in | + * | subparagraph (c)(1)(ii) of the Rights in Technical | + * | Data and Computer Software Clause of DFARS 252.227-7013. | + * | MIPS Computer Systems, Inc. | + * | 950 DeGuigne Avenue | + * | Sunnyvale, California 94088-3650, USA | + * |-----------------------------------------------------------| + */ +/* $Header: tanh.s,v 3000.5.1.3 92/02/03 16:50:49 zaineb Exp $ */ +/* Algorithm from Cody and Waite. */ + +#include <kxmips.h> + +#define ln3o2 0.54930614433405484570 +#define eps 3.72529029846191406250e-9 +#define p2 -0.96437492777225469787e+0 +#define p1 -0.99225929672236083313e+2 +#define p0 -0.16134119023996228053e+4 +#define q2 +0.11274474380534949335e+3 +#define q1 +0.22337720718962312926e+4 +#define q0 +0.48402357071988688686e+4 +#define xbig 19.061547465398498 +#define FSIZE 16 + +.text + +.globl tanh +.ent tanh +tanh: + .frame sp, FSIZE, ra + subu sp, FSIZE + sw ra, FSIZE-4(sp) + .prologue 1 + li.d $f8, ln3o2 + abs.d $f0, $f12 + c.olt.d $f8, $f0 + li.d $f8, eps + bc1t calltanh2 + c.ult.d $f0, $f8 + bc1t tanh1 + mul.d $f2, $f0, $f0 + li.d $f10, p2 + li.d $f8, q2 + mul.d $f4, $f2, $f10 + add.d $f6, $f2, $f8 + li.d $f10, p1 + mul.d $f6, $f2 + add.d $f4, $f10 + li.d $f8, q1 + mul.d $f4, $f2 + add.d $f6, $f8 + li.d $f10, p0 + mul.d $f6, $f2 + add.d $f4, $f10 + li.d $f8, q0 + mul.d $f4, $f2 + add.d $f6, $f8 + div.d $f4, $f6 + mul.d $f4, $f12 + add.d $f0, $f4, $f12 + j ret + +tanh1: + mov.d $f0, $f12 + j ret +calltanh2: + jal tanh2 + j ret +.end tanh + +.ent tanh2 +tanh2: + .frame sp, FSIZE, ra + subu sp, FSIZE + sw ra, FSIZE-4(sp) + .prologue 1 + li.d $f10, xbig + swc1 $f13, 20(sp) + c.ole.d $f0, $f10 + add.d $f12, $f0, $f0 + bc1f tanh4 + jal exp + li.d $f10, 1.0 + li.d $f8, 2.0 + add.d $f0, $f10 + div.d $f0, $f8, $f0 + lw t0, 20(sp) + bltz t0, 1f + sub.d $f0, $f10, $f0 + j ret +1: sub.d $f0, $f0, $f10 + j ret + +tanh4: + lw t0, 20(sp) + li.d $f0, 1.0 + bltz t0, 1f + j ret1 +1: neg.d $f0 + j ret1 +ret: lw ra, FSIZE-4(sp) +ret1: addu sp, FSIZE + j ra +.end tanh2 diff --git a/private/fp32/tran/mips/trigm.s b/private/fp32/tran/mips/trigm.s new file mode 100644 index 000000000..595b81a2f --- /dev/null +++ b/private/fp32/tran/mips/trigm.s @@ -0,0 +1,553 @@ +/* + * |-----------------------------------------------------------| + * | Copyright (c) 1991, 1990 MIPS Computer Systems, Inc. | + * | All Rights Reserved | + * |-----------------------------------------------------------| + * | Restricted Rights Legend | + * | Use, duplication, or disclosure by the Government is | + * | subject to restrictions as set forth in | + * | subparagraph (c)(1)(ii) of the Rights in Technical | + * | Data and Computer Software Clause of DFARS 252.227-7013. | + * | MIPS Computer Systems, Inc. | + * | 950 DeGuigne Avenue | + * | Sunnyvale, California 94088-3650, USA | + * |-----------------------------------------------------------| + */ +/* $Header: trig.s,v 3000.3.1.2 91/06/10 15:18:21 karen Exp $ */ +/* Algorithm from 4.3bsd */ + +/* + * Original fcsr is saved in t6. Do not use t6 as a temp register! + */ + + +#include <kxmips.h> +#include "trans.h" +#include "fpieee.h" + +#define PIo4 7.8539816339744828E-1 +#define OoPIo2 6.3661977236758138E-1 +#define PIo2hi 1.5707963109016418 +#define PIo2lo 1.5893254712295857E-8 +#define Xmax 105414357.85197645 +#define half 0.5 +#define one 1.0 +#define thresh 5.2234479296242364e-01 +#define Ymax 2.98156826864790199324e8; /* 2^(53/2)*PI/2 */ + + +#define S0 -1.6666666666666463126E-1 +#define S1 8.3333333332992771264E-3 +#define S2 -1.9841269816180999116E-4 +#define S3 2.7557309793219876880E-6 +#define S4 -2.5050225177523807003E-8 +#define S5 1.5868926979889205164E-10 + +#define C0 4.1666666666666504759E-2 +#define C1 -1.3888888888865301516E-3 +#define C2 2.4801587269650015769E-5 +#define C3 -2.7557304623183959811E-7 +#define C4 2.0873958177697780076E-9 +#define C5 -1.1250289076471311557E-11 + +#undef FSIZE +#define FSIZE 16 + +.text .text$trigm +.globl cos /* double cos(double x) */ +.ent cos +cos: + .frame sp, FSIZE, ra + subu sp, FSIZE + sw ra, FSIZE-4(sp) + .prologue 1 + cfc1 t6, $31 // t6 original fcsr + ctc1 $0, $31 // set round to zero, no traps + li t7, 1 + + // if |x| >= 2^63 generate _TLOSS and return indefinite + li.d $f16, Ymax + abs.d $f0, $f12 + c.olt.d $f0, $f16 + li.d $f10, PIo4 + bc1f cos_err + + /* reduce to [-PI/4,+PI/4] */ + c.olt.d $f0, $f10 + li.d $f16, Xmax + bc1t cos1 // in range, no reduction necessary + + c.olt.d $f0, $f16 // if |round(x/(PI/2))| > 2^26, need special + li.d $f18, OoPIo2 + bc1f 8f // argument reduction + +1: mul.d $f2, $f12, $f18 // round(x/(PI/2)) + cvt.w.d $f4, $f2 // ... + cvt.d.w $f2, $f4 // ... + /* f2 <= 2^26 */ + li.d $f6, PIo2hi + li.d $f8, PIo2lo + mul.d $f6, $f2 // exact (26 x 26 = 52 bits) + mul.d $f8, $f2 // exact (27 x 26 = 53 bits) + sub.d $f12, $f6 // exact + sub.d $f12, $f8 // exact + mfc1 t0, $f4 + addu t7, t0 + + abs.d $f0, $f12 + c.le.d $f0, $f10 + and t0, t7, 1 + bc1f 1b + + and t1, t7, 2 + bne t0, 0, cos1 + + beq t1, 0, sin1 + neg.d $f12 + b sin1 + +8: /* |round(x/(PI/2))| > 2^26 or x is NaN */ + mfc1 t0, $f13 + li t1, 0x7ff00000 + and t0, t1 + subu t2, t0, (1023+25)<<20 + beq t0, t1, 9f + li.d $f2, OoPIo2 + li.d $f6, PIo2hi + li.d $f8, PIo2lo + mfc1 t3, $f3 + mfc1 t4, $f7 + mfc1 t5, $f9 + subu t3, t2 + addu t4, t2 + addu t5, t2 + mtc1 t3, $f3 + mtc1 t4, $f7 + mtc1 t5, $f9 + mul.d $f2, $f12 + cvt.w.d $f4, $f2 + cvt.d.w $f2, $f4 + mul.d $f6, $f2 // exact (26 x 26 = 52 bits) + mul.d $f8, $f2 // exact (27 x 26 = 53 bits) + sub.d $f12, $f6 // exact + sub.d $f12, $f8 // exact + + abs.d $f0, $f12 + c.lt.d $f0, $f16 // if |round(x/(PI/2))| > 2^26, continue special + li t0, (1<<20) + bc1f 8b // argument reduction + bne t2, t0, 1b + mfc1 t0, $f4 + sll t0, 1 + addu t7, t0 + b 1b + +cos_err: + // |x| >= 2^63 + ctc1 t6, $31 // restore original fcsr + li a1, OP_COS // operation code (funtion name index) + jal set_trigm_err + b cosret1 + +9: /* x is NaN or Infinity */ + /* sub.d $f0, $f12, $f12 */ + mov.d $f0, $f12 + +cosret: + ctc1 t6, $31 // restore original fcsr +cosret1: + lw ra, FSIZE-4(sp) + addu sp, FSIZE + j ra +.end cos + + +.text .text$trigm +.globl sin /* double sin(double x) */ +.ent sin +sin: + .frame sp, FSIZE, ra + subu sp, FSIZE + sw ra, FSIZE-4(sp) + .prologue 1 + cfc1 t6, $31 // t6 original fcsr + ctc1 $0, $31 // set round to zero, no traps + li t7, 0 + + // if |x| >= 2^63 generate _TLOSS and return indefinite + li.d $f16, Ymax + abs.d $f0, $f12 + c.olt.d $f0, $f16 + li.d $f10, PIo4 + bc1f sin_err + + /* reduce to [-PI/4,+PI/4] */ + c.olt.d $f0, $f10 + li.d $f16, Xmax + bc1t sin1 // in range, no reduction necessary + + c.olt.d $f0, $f16 // if |round(x/(PI/2))| > 2^26, need special + li.d $f18, OoPIo2 + bc1f 8f // argument reduction + +1: mul.d $f2, $f12, $f18 + cvt.w.d $f4, $f2 + cvt.d.w $f2, $f4 + /* f2 <= 2^26 */ + li.d $f6, PIo2hi + li.d $f8, PIo2lo + mul.d $f6, $f2 // exact (26 x 26 = 52 bits) + mul.d $f8, $f2 // exact (27 x 26 = 53 bits) + sub.d $f12, $f6 // exact + sub.d $f12, $f8 // exact + mfc1 t0, $f4 + addu t7, t0 + + abs.d $f0, $f12 + c.le.d $f0, $f10 + and t0, t7, 1 + bc1f 1b + + and t1, t7, 2 + bne t0, 0, cos1 + + beq t1, 0, 2f + neg.d $f12 +2: + +sin1: /* compute sin(x) for x in [-PI/4,PI/4] */ + /* z = x*x, sin(x) = x + x*z*(S0+z*(S1+z*(S2+z*(S3+z*(S4+z*S5))))) */ + mul.d $f8, $f12, $f12 + li.d $f0, S5 + mul.d $f0, $f8 + li.d $f4, S4 + add.d $f0, $f4 + mul.d $f0, $f8 + li.d $f6, S3 + add.d $f0, $f6 + mul.d $f0, $f8 + li.d $f4, S2 + add.d $f0, $f4 + mul.d $f0, $f8 + li.d $f6, S1 + add.d $f0, $f6 + mul.d $f0, $f8 + li.d $f4, S0 + add.d $f0, $f4 + mul.d $f0, $f8 + mul.d $f0, $f12 + add.d $f0, $f12 + b sinret + +cos1: /* compute cos(x) for x in [-PI/4,PI/4] */ + /* z = x*x, c = z*z*(C0+z*(C1+z*(C2+z*(C3+z*(C4+z*C5))))) */ + mul.d $f8, $f12, $f12 + li.d $f0, C5 + mul.d $f0, $f8 + li.d $f4, C4 + add.d $f0, $f4 + mul.d $f0, $f8 + li.d $f6, C3 + add.d $f0, $f6 + mul.d $f0, $f8 + li.d $f4, C2 + add.d $f0, $f4 + mul.d $f0, $f8 + li.d $f6, C1 + add.d $f0, $f6 + mul.d $f0, $f8 + li.d $f4, C0 + add.d $f0, $f4 + mul.d $f0, $f8 + mul.d $f0, $f8 + + li.d $f6, thresh + li.d $f16, 0.5 + c.lt.d $f8, $f6 + mul.d $f14 $f16, $f8 + bc1t 4f + /* z >= thresh, cos(x) = 0.5-((z/2-0.5)-c) */ + sub.d $f8, $f14, $f16 + sub.d $f8, $f0 + b 5f +4: /* z < thresh, cos(x) = 1.0-(z/2-c) */ + li.d $f16, one + sub.d $f8, $f14, $f0 +5: + and t0, t7, 2 + bne t0, 0, 6f + sub.d $f0, $f16, $f8 + b sinret +6: sub.d $f0, $f8, $f16 + b sinret + +8: /* |round(x/(PI/2))| > 2^26 or x is NaN */ + mfc1 t0, $f13 + li t1, 0x7ff00000 + and t0, t1 + subu t2, t0, (1023+25)<<20 + beq t0, t1, 9f + li.d $f2, OoPIo2 + li.d $f6, PIo2hi + li.d $f8, PIo2lo + mfc1 t3, $f3 + mfc1 t4, $f7 + mfc1 t5, $f9 + subu t3, t2 + addu t4, t2 + addu t5, t2 + mtc1 t3, $f3 + mtc1 t4, $f7 + mtc1 t5, $f9 + mul.d $f2, $f12 + cvt.w.d $f4, $f2 + cvt.d.w $f2, $f4 + mul.d $f6, $f2 // exact (26 x 26 = 52 bits) + mul.d $f8, $f2 // exact (27 x 26 = 53 bits) + sub.d $f12, $f6 // exact + sub.d $f12, $f8 // exact + + abs.d $f0, $f12 + c.lt.d $f0, $f16 // if |round(x/(PI/2))| > 2^26, continue special + li t0, (1<<20) + bc1f 8b // argument reduction + bne t2, t0, 1b + mfc1 t0, $f4 + sll t0, 1 + addu t7, t0 + b 1b + +sin_err: + // |x| >= 2^63 + ctc1 t6, $31 // restore original fcsr + li a1, OP_SIN // operation code (funtion name index) + jal set_trigm_err + b sinret1 + +9: /* x is NaN or Infinity */ + /* sub.d $f0, $f12, $f12 */ + mov.d $f0,$f12 + +sinret: + ctc1 t6, $31 // restore original fcsr +sinret1: + lw ra, FSIZE-4(sp) + addu sp, FSIZE + j ra +.end sin + + +/* leave tan in its own section */ +.text +.globl tan /* double tan(double x) */ +.ent tan +tan: + .frame sp, FSIZE, ra + subu sp, FSIZE + sw ra, FSIZE-4(sp) + .prologue 1 + cfc1 t6, $31 // t6 original fcsr + ctc1 $0, $31 // set round to zero, no exceptions + li t7, 0 + + // if |x| >= 2^63 generate _TLOSS and return indefinite + li.d $f16, Ymax + abs.d $f0, $f12 + c.olt.d $f0, $f16 + li.d $f10, PIo4 + bc1f tan_err + + /* reduce to [-PI/4,+PI/4] */ + c.olt.d $f0, $f10 + li.d $f16, Xmax + bc1t 3f + + c.olt.d $f0, $f16 // if |round(x/(PI/2))| > 2^26, need special + li.d $f18, OoPIo2 + bc1f 8f // argument reduction + +1: mul.d $f2, $f12, $f18 // round(x/(PI/2)) + cvt.w.d $f4, $f2 // ... + cvt.d.w $f2, $f4 // ... + /* f2 <= 2^26 */ + li.d $f6, PIo2hi + li.d $f8, PIo2lo + mul.d $f6, $f2 // exact (26 x 26 = 52 bits) + mul.d $f8, $f2 // exact (27 x 26 = 53 bits) + sub.d $f12, $f6 // exact + sub.d $f12, $f8 // exact + mfc1 t0, $f4 + addu t7, t0 + + abs.d $f0, $f12 + c.le.d $f0, $f10 + and t0, t7, 1 + bc1f 1b + + beq t0, 0, 2f + neg.d $f12 +2: + +3: /* compute sin(x) and cos(x) for x in [-PI/4,PI/4] */ + /* z = x*x */ + /* (f0) cc = z*z*(C0+z*(C1+z*(C2+z*(C3+z*(C4+z*C5))))) */ + /* (f2) ss = z*(S0+z*(S1+z*(S2+z*(S3+z*(S4+z*S5))))) */ + mul.d $f8, $f12, $f12 + li.d $f2, S5 + li.d $f0, C5 + mul.d $f2, $f8 + mul.d $f0, $f8 + + li.d $f4, S4 + li.d $f6, C4 + add.d $f2, $f4 + add.d $f0, $f6 + mul.d $f2, $f8 + mul.d $f0, $f8 + + li.d $f4, S3 + li.d $f6, C3 + add.d $f2, $f4 + add.d $f0, $f6 + mul.d $f2, $f8 + mul.d $f0, $f8 + + li.d $f4, S2 + li.d $f6, C2 + add.d $f2, $f4 + add.d $f0, $f6 + mul.d $f2, $f8 + mul.d $f0, $f8 + + li.d $f4, S1 + li.d $f6, C1 + add.d $f2, $f4 + add.d $f0, $f6 + mul.d $f2, $f8 + mul.d $f0, $f8 + + li.d $f4, S0 + li.d $f6, C0 + add.d $f2, $f4 + add.d $f0, $f6 + mul.d $f2, $f8 + mul.d $f0, $f8 + mul.d $f0, $f8 + + li.d $f6, thresh + li.d $f16, 0.5 + c.lt.d $f8, $f6 + mul.d $f14 $f16, $f8 + bc1t 4f + + /* z >= thresh, c = 0.5-((z/2-0.5)-cc) */ + sub.d $f6, $f14, $f16 + sub.d $f6, $f0 + b 5f + +4: /* z < thresh, c = 1.0-(z/2-cc) */ + li.d $f16, one + sub.d $f6, $f14, $f0 + +5: /* ss in $f2, c in $f6 */ + sub.d $f6, $f16, $f6 + and t0, t7, 1 + bne t0, 0, 6f + + /* tan(x) = x + (x*(z/2-(cc-ss)))/c */ + sub.d $f4, $f0, $f2 + sub.d $f0, $f14, $f4 + mul.d $f0, $f12 + div.d $f0, $f6 + add.d $f0, $f12 + b tanret + +6: /* tan(x) = c/(x+x*ss) */ + mul.d $f2, $f12 + add.d $f2, $f12 + div.d $f0, $f6, $f2 + b tanret + +8: /* |round(x/(PI/2))| > 2^26 or x is NaN */ + mfc1 t0, $f13 + li t1, 0x7ff00000 + and t0, t1 + subu t2, t0, (1023+25)<<20 + beq t0, t1, 9f + li.d $f2, OoPIo2 + li.d $f6, PIo2hi + li.d $f8, PIo2lo + mfc1 t3, $f3 + mfc1 t4, $f7 + mfc1 t5, $f9 + subu t3, t2 + addu t4, t2 + addu t5, t2 + mtc1 t3, $f3 + mtc1 t4, $f7 + mtc1 t5, $f9 + mul.d $f2, $f12 + cvt.w.d $f4, $f2 + cvt.d.w $f2, $f4 + mul.d $f6, $f2 // exact (26 x 26 = 52 bits) + mul.d $f8, $f2 // exact (27 x 26 = 53 bits) + sub.d $f12, $f6 // exact + sub.d $f12, $f8 // exact + + abs.d $f0, $f12 + c.lt.d $f0, $f16 // if |round(x/(PI/2))| > 2^26, continue special + li t0, (1<<20) + bc1f 8b /// argument reduction + bne t2, t0, 1b + mfc1 t0, $f4 + sll t0, 1 + addu t7, t0 + b 1b + +tan_err: + // |x| >= 2^63 + ctc1 t6, $31 // restore original fcsr + li a1, OP_TAN // operation code (funtion name index) + jal set_trigm_err + b tanret1 + +// REVIEW is this correct? +9: /* x is NaN or Infinity */ + sub.d $f0, $f12, $f12 + mov.d $f2, $f0 + +tanret: + ctc1 t6, $31 // restore original fcsr +tanret1: + lw ra, FSIZE-4(sp) + addu sp, FSIZE + j ra +.end tan + +.extern _except1 + +.text .text$trigm +.ent set_trigm_err +set_trigm_err: +#undef FSIZE +#define FSIZE 48 + .frame sp, FSIZE, ra + .mask 0x80000000, -4 + subu sp, FSIZE + sw ra, FSIZE-4(sp) + .prologue 1 + li a0, (FP_TLOSS | FP_I) // exception mask + // a1 passed by caller = operation code (funtion name index) + mfc1.d a2, $f12 // arg1 + li.d $f0, 0.0 // generate a NaN + div.d $f0, $f0 + s.d $f0, 16(sp) // default result + cfc1 t7, $31 // fp control/status register + xor t7, t7, 0xf80 // inverse exception enable bits + sw t7, 24(sp) // goes on parameter stack + jal _except1 + lw ra, FSIZE-4(sp) + addu sp, FSIZE + j ra +#undef FSIZE +.end set_trigm_err diff --git a/private/fp32/tran/mips/xsqrt.s b/private/fp32/tran/mips/xsqrt.s new file mode 100644 index 000000000..56364905c --- /dev/null +++ b/private/fp32/tran/mips/xsqrt.s @@ -0,0 +1,224 @@ +/* + *------------------------------------------------------------- + *| RESTRICTED RIGHTS LEGEND | + *| Use, duplication, or disclosure by the Government is | + *| subject to restrictions as set forth in subparagraph | + *| (c)(1)(ii) of the Rights in Technical Data and Computer | + *| Software Clause at DFARS 252.227-7013. | + *| MIPS Computer Systems, Inc. | + *| 928 Arques Avenue | + *| Sunnyvale, CA 94086 | + *------------------------------------------------------------- + */ +/* --------------------------------------------------------- */ +/* | Copyright (c) 1986, 1989 MIPS Computer Systems, Inc. | */ +/* | All Rights Reserved. | */ +/* --------------------------------------------------------- */ +/* $Header: sqrt.s,v 2021.3 90/07/19 16:01:33 zaineb Exp $ */ + +/* Accurate but horribly slow and horribly big sqrt based on algorithm + by Kahan. */ + +#include <kxmips.h> + +.rdata + .align 3 +.globl _sqrttable +_sqrttable: + .word 83599 + .word 71378 + .word 60428 + .word 50647 + .word 41945 + .word 34246 + .word 27478 + .word 21581 + .word 16499 + .word 12183 + .word 8588 + .word 5674 + .word 3403 + .word 1742 + .word 661 + .word 130 + .word 0 + .word 1204 + .word 3062 + .word 5746 + .word 9193 + .word 13348 + .word 18162 + .word 23592 + .word 29598 + .word 36145 + .word 43202 + .word 50740 + .word 58733 + .word 67158 + .word 75992 + .word 85215 + +.text .text$xsqrt +.globl _fsqrt +.ent _fsqrt +_fsqrt: + .frame sp, 0, ra + .prologue 0 + + /* 64 or so cycles in common code */ + mfc1 t0, $f13 + li t2, -(1023<<19)+(1023<<20) + sra t1, t0, 20 + li t3, 2047 + blez t1, 8f + srl t0, 1 + beq t1, t3, 9f + srl t1, t0, 15-2 + and t1, 31<<2 + lw t1, _sqrttable(t1) + addu t0, t2 + subu t0, t1 + mtc1 t0, $f1 + mtc1 $0, $f0 + cfc1 t4, $31 + or t5, t4, 3 + ctc1 t5, $31 + + /* 8 -> 18 bits */ + li t2, (1<<20) + div.d $f2, $f12, $f0 + /* 17 cycle interlock */ + add.d $f0, $f2 + /* 1 cycle interlock (2 cycle stall) */ + mfc1 t0, $f1 + add t1, t2, 6 /* 17 -> 18 bits */ + subu t0, t1 + mtc1 t0, $f1 + /* nop */ + + /* 18 -> 37 */ + div.d $f2, $f12, $f0 + /* 17 cycle interlock */ + add.d $f0, $f2 + /* 1 cycle interlock (2 cycle stall) */ + + /* Kahan's algorithm to convert 1 ulp error to .5 ulp error. */ + /* 65 additional cycles, in common case */ + + .set noreorder /* take matters into our own hands */ +#define INEXACT (1<<12) +#define STICKY_INEXACT (1<<2) + + /* 37 -> 75 (53) */ + div.d $f2, $f12, $f0 + mfc1 t0, $f1 + li t1, (2<<20) + subu t0, t1 + mtc1 t0, $f1 + li t0, ~INEXACT + /* 12 cycle interlock */ + add.d $f0, $f2 + /* 1 cycle interlock (2 cycle stall) */ + + /* chopped quotient */ + div.d $f2, $f12, $f0 /* t = x / y */ + /* 17 cycle interlock */ + + /* read inexact bit */ + cfc1 t5, $31 + and t4, t0 /* clear final inexact bit */ + and t5, INEXACT + bne t5, 0, 3f + and t5, t4, 1 + + /* exact */ + c.eq.d $f0, $f2 /* if t = y, return y */ + mfc1 t0, $f2 /* t = t - 1 */ + bc1t 7f + mfc1 t1, $f3 + bne t0, 0, 2f + subu t0, 1 + subu t1, 1 + mtc1 t1, $f3 +2: mtc1 t0, $f2 + +3: bne t5, 0, 6f + or t4, INEXACT|STICKY_INEXACT /* set final inexact bit */ + /* if round mode is nearest or +inf */ + mfc1 t0, $f2 /* t = t + 1 */ + mfc1 t1, $f3 + addu t0, 1 + bne t0, 0, 5f + mtc1 t0, $f2 + addu t1, 1 + mtc1 t1, $f3 + +5: and t5, t4, 3 /* if round mode is +inf */ + beq t5, 0, 6f + mfc1 t0, $f0 /* y = y + 1 */ + mfc1 t1, $f1 + addu t0, 1 + bne t0, 0, 55f + mtc1 t0, $f0 + addu t1, 1 + mtc1 t1, $f1 +55: + nop + +6: /* y = (y + t) / 2 */ + add.d $f0, $f2 + /* 1 cycle interlock (2 cycle stall) */ + mfc1 t0, $f1 + nop + subu t0, t2 + mtc1 t0, $f1 +7: ctc1 t4, $31 /* restore rounding mode, set/reset inexact */ + j ra + nop + .set reorder + +8: /* sign = 1 or biased exponent = 0 */ + mfc1 t3, $f12 + sll t2, t0, 1 + bne t2, 0, 1f + bne t3, 0, 1f +9: /* x = 0.0, -0.0, +Infinity, or NaN */ + mov.d $f0, $f12 + j ra +1: /* x < 0 or x = denorm */ + move t8, ra + bgez t0, denorm_sqrt + +#if 0 + +#if defined(__STDC__) + .extern errno 4 +#define EDOM 33 + li t7, EDOM + sw t7, errno +#endif + +#endif + c.un.d $f12, $f12 + li.d $f0, 0.0 + bc1t 9b + div.d $f0, $f0 + j ra +.end _fsqrt + +.text .text$xsqrt +.ent denorm_sqrt +denorm_sqrt: + .frame sp, 0, t8 + .prologue 0 + li t1, ((1023+1022)<<20) + mtc1 t1, $f1 + mtc1 $0, $f0 + mul.d $f12, $f0 + jal _fsqrt + mfc1 t1, $f1 + subu t1, (511<<20) + mtc1 t1, $f1 + j t8 + /* nop */ +.end denorm_sqrt diff --git a/private/fp32/tran/modf.c b/private/fp32/tran/modf.c new file mode 100644 index 000000000..72c4d393e --- /dev/null +++ b/private/fp32/tran/modf.c @@ -0,0 +1,64 @@ +/*** +*modf.c - modf() +* +* Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* +*Revision History: +* 8-24-91 GDP written +* 1-13-92 GDP support IEEE exceptions +* +*******************************************************************************/ +#include <math.h> +#include <trans.h> + +extern double _frnd(double); + +/*** +*double modf(double x, double *intptr) +* +*Purpose: +* Split x into fractional and integer part +* The signed fractional portion is returned +* The integer portion is stored as a floating point value at intptr +* +*Entry: +* +*Exit: +* +*Exceptions: +* I +*******************************************************************************/ +static unsigned int newcw = (ICW & ~IMCW_RC) | (IRC_CHOP & IMCW_RC); + +double modf(double x, double *intptr) +{ + unsigned int savedcw; + double result,intpart; + + /* save user fp control word */ + savedcw = _ctrlfp(newcw,IMCW); /* round towards 0 */ + + /* check for infinity or NAN */ + if (IS_D_SPECIAL(x)){ + *intptr = QNAN_MODF; + switch (_sptype(x)) { + case T_PINF: + case T_NINF: + return _except1(FP_I, OP_MODF, x, QNAN_MODF, savedcw); + case T_QNAN: + return _handle_qnan1(OP_MODF, x, savedcw); + default: //T_SNAN + return _except1(FP_I, OP_MODF, x, _s2qnan(x), savedcw); + } + } + + + intpart = _frnd(x); //fix needed: this may set the P exception flag + //and pollute the fp status word + + *intptr = intpart; + result = x - intpart; + RETURN(savedcw,result); +} diff --git a/private/fp32/tran/nmake.mak b/private/fp32/tran/nmake.mak new file mode 100644 index 000000000..7912bc8d9 --- /dev/null +++ b/private/fp32/tran/nmake.mak @@ -0,0 +1,62 @@ +#### +#nmake.mak - makefile +# +# Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +# +#Purpose: +# Build 32bit NT i386 libs in OMF format +# +#Revision History: +# 8-21-90 GDP +# 3-04-92 GDP Drop support for multiple source/target OS's & CPU's +# +################################################################################ +!include ..\def.mak + + +OBJS = \ + $(OBJDIR)\bessel.obj \ + $(OBJDIR)\ceil.obj \ + $(OBJDIR)\fabs.obj \ + $(OBJDIR)\floor.obj \ + $(OBJDIR)\fpexcept.obj \ + $(OBJDIR)\frexp.obj \ + $(OBJDIR)\hypot.obj \ + $(OBJDIR)\ldexp.obj \ + $(OBJDIR)\matherr.obj \ + $(OBJDIR)\modf.obj \ + $(OBJDIR)\powhlp.obj \ + $(OBJDIR)\util.obj \ + $(OBJDIR)\fpieee.obj \ + \ + $(OBJDIR)\ftol.obj \ + $(OBJDIR)\huge.obj \ + $(OBJDIR)\ieee87.obj \ + $(OBJDIR)\ieee.obj \ + $(OBJDIR)\frnd.obj \ + $(OBJDIR)\fsqrt.obj \ + $(OBJDIR)\87cdisp.obj \ + $(OBJDIR)\87disp.obj \ + $(OBJDIR)\87ctran.obj \ + $(OBJDIR)\87tran.obj \ + $(OBJDIR)\87ctrig.obj \ + $(OBJDIR)\87trig.obj \ + $(OBJDIR)\87ctriga.obj \ + $(OBJDIR)\87triga.obj \ + $(OBJDIR)\87ctrigh.obj \ + $(OBJDIR)\87trigh.obj \ + $(OBJDIR)\87csqrt.obj \ + $(OBJDIR)\87sqrt.obj \ + $(OBJDIR)\87fmod.obj \ + $(OBJDIR)\87except.obj + + + +$(LIBDIR)\tran$(TARGETNAMESUFFIX).lib: $(OBJS) + if exist $@ erase $@ + $(LIBEXE) @<< +$@ +y +$(OBJS) +$(LIBDIR)\tran$(TARGETNAMESUFFIX).map; +<< diff --git a/private/fp32/tran/pow.c b/private/fp32/tran/pow.c new file mode 100644 index 000000000..c2b8a36af --- /dev/null +++ b/private/fp32/tran/pow.c @@ -0,0 +1,363 @@ +/*** +*pow.c - raise to a power +* +* Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* +*Revision History: +* 8-15-91 GDP written +* 12-20-91 GDP support IEEE exceptions & denormals +* 1-11-92 GDP special handling of small powers +* special handling of u1, u2 when cancellation occurs +* 3-22-92 GDP changed handling of int exponents, pow(0, neg) +* added check to avoid internal overflow due to large y +* 11-09-92 GDP adjusted special return values according to NCEG spec +* pow(0,0) now returns 1 +* 07-16-93 SRW ALPHA Merge +* +*******************************************************************************/ +#include <math.h> +#include <trans.h> +#include <float.h> + +static double _reduce(double); + +static double const a1[18] = { + 0.00000000000000000000e+000, /* dummy element */ + 1.00000000000000000000e+000, + 9.57603280698573646910e-001, + 9.17004043204671231754e-001, + 8.78126080186649741555e-001, + 8.40896415253714543073e-001, + 8.05245165974627154042e-001, + 7.71105412703970411793e-001, + 7.38413072969749655712e-001, + 7.07106781186547524436e-001, + 6.77127773468446364133e-001, + 6.48419777325504832961e-001, + 6.20928906036742024317e-001, + 5.94603557501360533344e-001, + 5.69394317378345826849e-001, + 5.45253866332628829604e-001, + 5.22136891213706920173e-001, + 5.00000000000000000000e-001 +}; + +static double const a2[9] = { + 0.00000000000000000000e+000, /* dummy element */ + -5.31259064517897172664e-017, + 1.47993596544271355242e-017, + 1.23056946577104753260e-017, + -1.74014448683923461658e-017, + 3.84891771232354074073e-017, + 2.33103467084383453312e-017, + 4.45607092891542322377e-017, + 4.27717757045531499216e-017 +}; + +static double const log2inv = 1.44269504088896340739e+0; // 1/log(2) +static double const K = 0.44269504088896340736e+0; + +static double const p1 = 0.83333333333333211405e-1; +static double const p2 = 0.12500000000503799174e-1; +static double const p3 = 0.22321421285924258967e-2; +static double const p4 = 0.43445775672163119635e-3; + +#define P(v) (((p4 * v + p3) * v + p2) * v + p1) + +static double const q1 = 0.69314718055994529629e+0; +static double const q2 = 0.24022650695909537056e+0; +static double const q3 = 0.55504108664085595326e-1; +static double const q4 = 0.96181290595172416964e-2; +static double const q5 = 0.13333541313585784703e-2; +static double const q6 = 0.15400290440989764601e-3; +static double const q7 = 0.14928852680595608186e-4; + +#define Q(w) ((((((q7 * w + q6) * w + q5) * w + q4) * w + \ + q3) * w + q2) * w + q1) + + +/* + * Thresholds for over/underflow that results in an adjusted value + * too big/small to be represented as a double. An infinity or 0 + * is delivered to the trap handler instead + */ + +static _dbl const _ovfx ={SET_DBL(0x40e40000,0)}; // 16*log2(XMAX*2^IEEE_ADJ) +static _dbl const _uflx ={SET_DBL(0xc0e3fc00,0)}; // 16*log2(XMIN*2^(-IEEE_ADJ)) + +#define OVFX _ovfx.dbl +#define UFLX _uflx.dbl + +#define INT_POW_LIMIT 128.0 + +static double ymax = 1e20; + +static double _reduce(double x) +{ + return 0.0625 * _frnd( 16.0 * x); +} + +/*** +*double pow(double x, double y) - x raised to the power of y +* +*Purpose: +* Calculate x^y +* Algorithm from Cody & Waite +* +*Entry: +* +*Exit: +* +*Exceptions: +* +* All 5 IEEE exceptions may occur +* +*******************************************************************************/ +double pow(double x, double y) +{ + unsigned int savedcw; + int m,mprim; + int p,pprim; + int i,iw1; + int iy; + int newexp; + double diw1; + double sign; + double g,z,bigz,v,rz,result; + double u1,u2,y1,y2,w,w1,w2; + double savedx; + + /* save user fp control word */ + savedcw = _maskfp(); + savedx = x; // save original value of first argument + + /* check for infinity or NAN */ + if (IS_D_SPECIAL(x) || IS_D_SPECIAL(y)){ + if (IS_D_SNAN(x) || IS_D_SNAN(y)){ + return _except2(FP_I,OP_POW,savedx,y,_d_snan2(x,y),savedcw); + } + if (IS_D_QNAN(x) || IS_D_QNAN(y)){ + return _handle_qnan2(OP_POW,x,y,savedcw); + } + /* there is at least one infinite argument ... */ + if (_powhlp(x, y, &result) < 0) { + return _except2(FP_I,OP_POW,savedx,y,result,savedcw); + } + RETURN(savedcw,result); + } + + if (y == 0.0) { + RETURN(savedcw, 1.0); + } + + if (x == 0.0) { + int type; + + type = _d_inttype(y); + + if (y < 0.0) { + result = (type == _D_ODD ? _copysign(D_INF,x) : D_INF); + return _except2(FP_Z,OP_POW,savedx,y,result,savedcw); + } + else { + result = (type == _D_ODD ? x : 0.0); + RETURN(savedcw,result); + } + } + + sign = 1.0; + if (x < 0) { + switch (_d_inttype(y)) { + case _D_ODD: /* y is an odd integral value */ + sign = -1.0; + /* NO BREAK */ + case _D_EVEN: + x = -x; + break; + default: /* y is not an integral value */ + return _except2(FP_I,OP_POW,savedx,y,D_IND,savedcw); + } + } + + // + // This is here in order to prevent internal overflows + // due to a large value of y + // The following relation holds on overflow with a scaled + // result out of range + // (lg stands for log base 2) + // |y| * |lg(x)| > MAXEXP + IEEE_ADJUST <=> + // |y| > 2560 / |lg(x)| + // The values of lg(x) closer to 0 are: + // x lg(x) + // 3fefffffffffffff (0,99...9) -1.601e-16 + // 3ff0000000000000 (1.0) 0.0 + // 3ff0000000000001 (1.00...1) 3.203e-16 + // + // So if |y| > 2560/1.6e-16 = 1.6e19 overflow occurs + // We set ymax to 1e20 in order to have a safety margin + // + + if (ABS(y) > ymax) { + if (y < 0) { + y = -y; + // + // this may cause an underflow + // there is no problem with fp sw pollution because + // a FP_U exception is going to be raised anyway. + // + x = 1.0 / x; + } + if (x > 1.0) { + return _except2(FP_O | FP_P,OP_POW,savedx,y,_copysign(D_INF,sign),savedcw); + } + else if (x < 1.0){ + return _except2(FP_U | FP_P,OP_POW,savedx,y,sign*0.0,savedcw); + } + else { + RETURN(savedcw, sign*1.0); + } + } + + + /* determine m, g */ + g = _decomp(x, &m); + + + /* handle small integer powers + * for small integer powers this is faster that Cody&Waite's + * algorithm, and yields better precision + * Without this piece of code there was not enough precision + * to satisfy all requirements of the 'paranoia' test. + * We choose INT_POW_LIMIT such that (1) no overflow or underflow + * occurs while computing bigz (g is in the range + * [0.5, 1.0) or (1.0, 2.0] so INT_POW_LIMIT should be less than + * approximately 10^3) and (2) no extraordinary loss of precision + * occurs because of repeated multiplications (this practically + * restricts the maximum INT_POW_LIMIT to 128). + */ + + if (y <= INT_POW_LIMIT && + _d_inttype(x) != _D_NOINT && + _d_inttype(y) != _D_NOINT && + y > 0.0 ) { + + iy = (int)y; + mprim = m * iy; + + for (bigz=1 ; iy ; iy >>= 1, g *= g) { + if (iy & 0x1) + bigz *= g; + } + + newexp = _get_exp(bigz) + mprim; + if (newexp > MAXEXP + IEEE_ADJUST) { +#ifdef _M_PPC + return _except2(FP_O | FP_P, OP_POW, savedx, y, _copysign(D_INF, sign), savedcw); +#else + return _except2(FP_O | FP_P, OP_POW, savedx, y, _copysign(D_INF, bigz), savedcw); +#endif + } + if (newexp < MINEXP - IEEE_ADJUST) { +#ifdef _M_PPC + return _except2(FP_U | FP_P, OP_POW, savedx, y, sign*0.0, savedcw); +#else + return _except2(FP_U | FP_P, OP_POW, savedx, y, bigz*0.0, savedcw); +#endif + } + + } + + + else { + + /* determine p using binary search */ + p = 1; + if (g <= a1[9]) + p = 9; + if (g <= a1[p+4]) + p += 4; + if (g <= a1[p+2]) + p += 2; + + + /* C&W's algorithm is not very accurate when m*16-p == 1, + * because there is cancellation between u1 and u2. + * Handle this separately. + */ + if (ABS(m*16-p) == 1) { + u1 = log(x) * log2inv; + u2 = 0.0; + } + else { + /* determine z */ + z = ( (g - a1[p+1]) - a2[(p+1)/2] ) / ( g + a1[p+1] ); + z += z; + + + /* determine u2 */ + v = z * z; + rz = P(v) * v * z; + rz += K * rz; + u2 = (rz + z * K) + z; + + u1 = (m * 16 - p) * 0.0625; + } + + /* determine w1, w2 */ + y1 = _reduce(y); + y2 = y - y1; + w = u2 * y + u1 * y2; + w1 = _reduce(w); + w2 = w - w1; + w = w1 + u1 * y1; + w1 = _reduce(w); + w2 += w - w1; + w = _reduce(w2); + diw1 = 16 * (w1 + w); /* iw1 might overflow here, so use diw1 */ + w2 -= w; + + if (diw1 > OVFX) { + return _except2(FP_O | FP_P,OP_POW,savedx,y,_copysign(D_INF,sign),savedcw); + } + if (diw1 < UFLX) { + return _except2(FP_U | FP_P,OP_POW,savedx,y,sign*0.0,savedcw); + } + + iw1 = (int) diw1; /* now it is safe to cast to int */ + + + /* make sure w2 <= 0 */ + if (w2 > 0) { + iw1 += 1; + w2 -= 0.0625; + } + + /* determine mprim, pprim */ + i = iw1 < 0 ? 0 : 1; + mprim = iw1 / 16 + i; + pprim = 16 * mprim - iw1; + + /* determine 2^w2 */ + bigz = Q(w2) * w2; + + /* determine final result */ + bigz = a1[pprim + 1] + a1[pprim + 1] * bigz; + newexp = _get_exp(bigz) + mprim; + } + + + if (newexp > MAXEXP) { + result = sign * _set_exp(bigz, newexp - IEEE_ADJUST); + return _except2(FP_O | FP_P, OP_POW, savedx, y, result, savedcw); + } + if (newexp < MINEXP) { + result = sign * _set_exp(bigz, newexp + IEEE_ADJUST); + return _except2(FP_U | FP_P, OP_POW, savedx, y, result, savedcw); + } + + result = sign * _set_exp(bigz, newexp); + RETURN_INEXACT2(OP_POW, savedx, y, result, savedcw); + +} diff --git a/private/fp32/tran/powhlp.c b/private/fp32/tran/powhlp.c new file mode 100644 index 000000000..25601c086 --- /dev/null +++ b/private/fp32/tran/powhlp.c @@ -0,0 +1,115 @@ +/*** +*powhlp.c - pow() helper routines for handling special cases +* +* Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* pow(x,y) helper routine. Handles +inf, -inf +* +*Revision History: +* 11-09-91 GDP +* 11-09-92 GDP adjusted return values according to the NCEG spec +* +*******************************************************************************/ +#include <trans.h> + + +/*** +*int _powhlp(double x, double y, double * result) - pow() helper +* +*Purpose: +* Calculate x^(sign)inf +* +*Entry: +* double x: the base +* int sign: the sign of the infinite exponent (0: pos, non-0: neg) +* double *result: pointer to the result +* +*Exit: +* 0: normal exit +* -1: indicates domain error for pow(x,inf) +* +*Exceptions: +* +***************************************************************************/ + +int _powhlp(double x, double y, double * result) +{ + double absx; + int err = 0; + + + absx = ABS(x); + + if (IS_D_INF(y)) { + if (absx > 1.0) { + *result = D_INF; + } + else if (absx < 1.0) { + *result = 0.0; + } + else { + *result = D_IND; + err = -1; + } + } + + else if (IS_D_MINF(y)) { + if (absx > 1.0) { + *result = 0.0; + } + else if (absx < 1.0) { + *result = D_INF; + } + else { + *result = D_IND; + err = -1; + } + } + + else if (IS_D_INF(x)) { + if (y > 0) + *result = D_INF; + else if (y < 0.0) + *result = 0.0; + else { + *result = 1.0; + } + } + + else if (IS_D_MINF(x)) { + int type; + + type = _d_inttype(y); + + if (y > 0.0) { + *result = (type == _D_ODD ? -D_INF : D_INF); + } + else if (y < 0.0) { + *result = (type == _D_ODD ? D_MZERO : 0.0); + } + else { + *result = 1; + } + + } + + return err; +} + + + + +int _d_inttype(double y) +{ + double rounded; + /* check if y is an integral value */ + rounded = _frnd(y); + if (rounded == y) { + if (_frnd(y/2.0) == y/2.0) + return _D_EVEN; + else + return _D_ODD; + } + return _D_NOINT; +} diff --git a/private/fp32/tran/ppc/dtoul.c b/private/fp32/tran/ppc/dtoul.c new file mode 100644 index 000000000..d60dd605f --- /dev/null +++ b/private/fp32/tran/ppc/dtoul.c @@ -0,0 +1,36 @@ + +/*++ + +Copyright (c) 1993 IBM Corporation + +Module Name: + + dtoul.c + +Abstract: + + This module converts floats to unsigned longs. Appears this is + a hold over because of compiler problem on another platform. + +Author: + + Mark D. Johnson (1993) + +Environment: + + User mode. + +Revision History: + + mdj 10-12-93 This (initial) version written in C ... will let the + compiler do its job. Will rewrite if its called much. + +--*/ + +unsigned long _dtoul(double x) { + + unsigned long y; + + y=x; + return(y); +} diff --git a/private/fp32/tran/ppc/filter.c b/private/fp32/tran/ppc/filter.c new file mode 100644 index 000000000..cb1563e48 --- /dev/null +++ b/private/fp32/tran/ppc/filter.c @@ -0,0 +1,547 @@ +/*** +* filter.c - IEEE filter routine +* +* Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* Provide a user interface for IEEE fp exception handling +* +*Revision History: +* 3-10-92 GDP written +* +*******************************************************************************/ + +#include <trans.h> +#include <fpieee.h> +#include <excpt.h> +#include <nt.h> + +#define FPREG 32 /* fp reg's have numbers from 32 to 63 */ +#define SUBCODE_CT 6 /* subcode for the CTC1 instruction */ + +#define FPSCR_CO (1<<0x17) + + +ULONG _get_destreg( + unsigned long code, PEXCEPTION_POINTERS p + ); + + +_FPIEEE_FORMAT _FindDestFormat(PPC_INSTRUCTION *inst); + + +/*** +* _fpieee_flt - IEEE fp filter routine +* +*Purpose: +* Invokes the user's trap handler on IEEE fp exceptions and provides +* it with all necessary information +* +*Entry: +* unsigned long exc_code: the NT exception code +* PEXCEPTION_POINTERS p: a pointer to the NT EXCEPTION_POINTERS struct +* int handler (_FPIEEE_RECORD *): a user supplied ieee trap handler +* +*Exit: +* returns the value returned by handler +* +*Exceptions: +* +*******************************************************************************/ +int _fpieee_flt(unsigned long exc_code, + PEXCEPTION_POINTERS p, + int handler (_FPIEEE_RECORD *)) +{ + PEXCEPTION_RECORD pexc; + PCONTEXT pctxt; + _FPIEEE_RECORD ieee; + ULONG *pinfo; + PPC_INSTRUCTION *instruction; + int format,fs,ft,fd,function; + int fsr,i,ret=0; + + /* + * If the exception is not an IEEE exception, continue search + * for another handler + */ + + + if (exc_code != STATUS_FLOAT_DIVIDE_BY_ZERO && + exc_code != STATUS_FLOAT_INEXACT_RESULT && + exc_code != STATUS_FLOAT_INVALID_OPERATION && + exc_code != STATUS_FLOAT_OVERFLOW && + exc_code != STATUS_FLOAT_UNDERFLOW) { + + return EXCEPTION_CONTINUE_SEARCH; + } + +#if 0 // PPC TO DO + + + pexc = p->ExceptionRecord; + pinfo = pexc->ExceptionInformation; + pctxt = p->ContextRecord; + + // mask all exceptions + + _set_fsr(_get_fsr() & ~IMCW_IEM); + + /* + * Check for software generated exception + * By convention ExceptionInformation[0] is 0 for h/w exceptions, + * or contains a pointer to an _FPIEEE_RECORD for s/w exceptions + */ + + if (pexc->ExceptionInformation[0]) { + + /* + * we have a software exception: + * the first parameter points to the IEEE structure + */ + + return handler((_FPIEEE_RECORD *)(pinfo[0])); + + } + + + /* + * If control reaches here, then we have to deal with a hardware + * exception. + * + * MCRFS, MFFS, MTFSB[0|1], MTFSF, MTFSFI will not be handled by + * the IEEE filter routine. This is because these instructions do + * not correspond to a numerical operation and they also may also + * generate multiple exceptions + * + */ + + + /* get the instruction that faulted */ + + instruction = (PPC_INSTRUCTION *)(pexc->ExceptionAddress); + + /* check for non-numeric FP instruction */ + + if (instruction->Xform_XO == MFFS_OP ) { + return EXCEPTION_CONTINUE_SEARCH; + } else if ( instruction->Xform_XO == MCRFS_OP ) { + return EXCEPTION_CONTINUE_SEARCH; + } else if ( instruction->Xform_XO == MTFSFI_OP ) { + return EXCEPTION_CONTINUE_SEARCH; + } else if ( instruction->XFLform_XO == MTFSF_OP ) { + return EXCEPTION_CONTINUE_SEARCH; + } else if ( instruction->Xform_XO == MTFSB0_OP ) { + return EXCEPTION_CONTINUE_SEARCH; + } else if ( instruction->Xform_XO == MTFSB1_OP ) { + return EXCEPTION_CONTINUE_SEARCH; + } + + + /* + * Set floating point operation code + */ + + switch (function = instruction->Xform_XO) { + case FABS_OP: + ieee.Operation = _FpCodeFabs; + break; + case FNABS_OP: + ieee.Operation = _FpCodeNabs; + break; + case FRSP_OP: + ieee.Operation = _FpCodeRoundToSingle; + break; +/*+++ + case ROUND_LONGWORD: + ieee.Operation = _FpCodeRound; + break; + case TRUNC_LONGWORD: + ieee.Operation = _FpCodeTruncate; + break; + case CEIL_LONGWORD: + ieee.Operation = _FpCodeCeil; + break; + case FLOOR_LONGWORD: + ieee.Operation = _FpCodeFloor; + break; + case CONVERT_SINGLE: + case CONVERT_DOUBLE: + case CONVERT_LONGWORD: + ieee.Operation = _FpCodeConvert; + break; +---*/ + default: + + switch (function = instruction->Aform_XO) { + case FADD_OP: + ieee.Operation = _FpCodeAdd; + break; + case FSUB_OP: + ieee.Operation = _FpCodeSub; + break; + case FMUL_OP: + ieee.Operation = _FpCodeMultiply; + break; + case FDIV_OP: + ieee.Operation = _FpCodeDivide; + break; + case FSQRT_OP: + ieee.Operation = _FpCodeSquareRoot; + break; + case FMADD_OP: + ieee.Operation = _FpCodeMultAdd; + break; + case FMSUB_OP: + ieee.Operation = _FpCodeMultSub; + break; + case FNMADD_OP: + ieee.Operation = _FpCodeNMultAdd; + break; + case FNMSUB_OP: + ieee.Operation = _FpCodeNMultSub; + break; + case FCMPO_OP: + ieee.Operation = _FpCodeCompare; + break; + case FCMPU_OP: + ieee.Operation = _FpCodeCompare; + break; + default: + + ieee.Operation = _FpCodeUnspecified; + break; + } + } + + + switch ( instruction->PrimaryOp ) { + case X59_OP: + format = _FpFormatFp32; + break; + case X63_OP: + format = _FpFormatFp64; + break; + case FORMAT_WORD: + format = _FpFormatI32; + break; + } + + fs = instruction->c_format.Fs + FPREG; + ft = instruction->c_format.Ft + FPREG; + fd = instruction->c_format.Fd + FPREG; + + ieee.Operand1.OperandValid = 1; + ieee.Operand1.Format = format; + *(ULONG *)&ieee.Operand1.Value = _GetRegisterValue(fs, pctxt); + if (instruction->c_format.Format == FORMAT_DOUBLE) { + *(1+(ULONG *)&ieee.Operand1.Value) = _GetRegisterValue(fs+1, pctxt); + } + + /* + * add, subtract, mul, div, and compare instructions + * take two operands. The first four of these instructions + * have consecutive function codes + */ + + if (function >= FADD_OP && function <= FLOAT_DIVIDE || + function >= FLOAT_COMPARE && function <= FLOAT_COMPARE + 15) { + + ieee.Operand2.OperandValid = 1; + ieee.Operand2.Format = format; + *(ULONG *)&ieee.Operand2.Value = _GetRegisterValue(ft, pctxt); + if (instruction->c_format.Format == FORMAT_DOUBLE) { + *(1+(ULONG *)&ieee.Operand2.Value) = _GetRegisterValue(ft+1, pctxt); + } + } + else { + + ieee.Operand2.OperandValid = 0; + } + + + + /* + * NT provides the IEEE result in the exception record + * in the following form: + * + * pinfo[0] NULL + * pinfo[1] continuation address + * pinfo[2] \ + * ... > IEEE result (_FPIEEE_VALUE) + * pinfo[6] / + */ + + for (i=0;i<5;i++) { + ieee.Result.Value.U32ArrayValue.W[i] = pinfo[i+2]; + } + + /* + * Until NT provides a fully qualified type in the exception + * record, fill in the OperandValid and Format fields + * manualy + */ + + ieee.Result.OperandValid = 1; + ieee.Result.Format = _FindDestFormat(instruction); + + + fsr = pctxt->Fsr; + + switch (fsr & IMCW_RC) { + case IRC_NEAR: + ieee.RoundingMode = _FpRoundNearest; + break; + case IRC_CHOP: + ieee.RoundingMode = _FpRoundChopped; + break; + case IRC_UP: + ieee.RoundingMode = _FpRoundPlusInfinity; + break; + case IRC_DOWN: + ieee.RoundingMode = _FpRoundMinusInfinity; + break; + } + + ieee.Precision = _FpPrecisionFull; + + + ieee.Status.Inexact = fsr & ISW_INEXACT ? 1 : 0; + ieee.Status.Underflow = fsr & ISW_UNDERFLOW ? 1 : 0; + ieee.Status.Overflow = fsr & ISW_OVERFLOW ? 1 : 0; + ieee.Status.ZeroDivide = fsr & ISW_ZERODIVIDE ? 1 : 0; + ieee.Status.InvalidOperation = fsr & ISW_INVALID ? 1 : 0; + + ieee.Enable.Inexact = fsr & IEM_INEXACT ? 1 : 0; + ieee.Enable.Underflow = fsr & IEM_UNDERFLOW ? 1 : 0; + ieee.Enable.Overflow = fsr & IEM_OVERFLOW ? 1 : 0; + ieee.Enable.ZeroDivide = fsr & IEM_ZERODIVIDE ? 1 : 0; + ieee.Enable.InvalidOperation = fsr & IEM_INVALID ? 1 : 0; + +// ieee.Cause.Inexact = fsr & ICS_INEXACT ? 1 : 0; +// ieee.Cause.Underflow = fsr & ICS_UNDERFLOW ? 1 : 0; +// ieee.Cause.Overflow = fsr & ICS_OVERFLOW ? 1 : 0; +// ieee.Cause.ZeroDivide = fsr & ICS_ZERODIVIDE ? 1 : 0; +// ieee.Cause.InvalidOperation = fsr & ICS_INVALID ? 1 : 0; + + + + /* + * invoke user's handler + */ + + ret = handler(&ieee); + + if (ret == EXCEPTION_CONTINUE_EXECUTION) { + + // + // set the correct continuation address + // (this covers the case of an exception that occured in + // a delay slot), NT passes the cont. address in pinfo[1] + // + + pctxt->Fir = pinfo[1]; + + // + // Sanitize fsr + // + + pctxt->Fsr &= ~IMCW_ICS; + + // + // Especially for the fp compare instruction + // the result the user's handler has entered + // should be converted into the proper exc_code + // + + if (function >= FLOAT_COMPARE && + function <= FLOAT_COMPARE + 15) { + + // + // Fp comare instruction format: + // + // 31 0 + // ------------------------------------------------- + // | COP1 | fmt | ft | fs | 0 |FC | cond | + // ------------------------------------------------- + // 6 5 5 5 5 2 4 + // + // 'cond' field interpretation: + // bit corresponds to predicate + // cond2 less + // cond1 equal + // cond0 unordered + // + + ULONG condmask, condition; + + switch (ieee.Result.Value.CompareValue) { + case FpCompareEqual: + + // + //less = 0 + //equal = 1 + //unordered = 0 + // + + condmask = 2; + break; + + case FpCompareGreater: + + // + //less = 0 + //equal = 0 + //unordered = 0 + // + + condmask = 0; + break; + + case FpCompareLess: + + // + //less = 1 + //equal = 0 + //unordered = 0 + // + + condmask = 4; + break; + + case FpCompareUnordered: + + // + //less = 0; + //equal = 0; + //unordered = 1; + // + + condmask = 1; + break; + } + + if (*(ULONG *)instruction & condmask) { + + /* + * condition is true + */ + + pctxt->Fsr |= FPSCR_CO; + } + + else { + + /* + * condition is false + */ + + pctxt->Fsr &= ~FPSCR_CO; + } + + } + + else { + + // + // copy user's result to hardware destination register + // + + _SetRegisterValue(fd,ieee.Result.Value.U32ArrayValue.W[0],pctxt); + + if (instruction->c_format.Format == FORMAT_DOUBLE) { + _SetRegisterValue(fd+1,ieee.Result.Value.U32ArrayValue.W[1],pctxt); + } + } + + // + // make changes in the floating point environment + // take effect on continuation + // + + switch (ieee.RoundingMode) { + case _FpRoundNearest: + pctxt->Fsr = pctxt->Fsr & ~IMCW_RC | IRC_NEAR & IMCW_RC; + break; + case _FpRoundChopped: + pctxt->Fsr = pctxt->Fsr & ~IMCW_RC | IRC_CHOP & IMCW_RC; + break; + case _FpRoundPlusInfinity: + pctxt->Fsr = pctxt->Fsr & ~IMCW_RC | IRC_UP & IMCW_RC; + break; + case _FpRoundMinusInfinity: + pctxt->Fsr = pctxt->Fsr & ~IMCW_RC | IRC_DOWN & IMCW_RC; + break; + } + + // + // the user is allowed to change the exception mask + // ignore changes in the precision field (not supported by MIPS) + // + + if (ieee.Enable.Inexact) + pctxt->Fsr |= IEM_INEXACT; + if (ieee.Enable.Underflow) + pctxt->Fsr |= IEM_UNDERFLOW; + if (ieee.Enable.Overflow) + pctxt->Fsr |= IEM_OVERFLOW; + if (ieee.Enable.ZeroDivide) + pctxt->Fsr |= IEM_ZERODIVIDE; + if (ieee.Enable.InvalidOperation) + pctxt->Fsr |= IEM_INVALID; + + } + +#endif + + return ret; +} + + + +/*** +* _FindDestFormat - Find format of destination +* +*Purpose: +* return the format of the destination of a mips fp instruction +* assumes an R-type instruction that may generate IEEE ecxeptions +* (see table above) +* +*Entry: +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +_FPIEEE_FORMAT _FindDestFormat(PPC_INSTRUCTION *inst) +{ + _FPIEEE_FORMAT format; + + /* X59_OP instructions are single prec floating point */ + + if ( inst->Primary_Op == X59_OP ) + format = _FpFormatFp32; + + /* Floating Point Round to Single Precision */ + + else if ( inst->Xform_XO == FRSP_OP ) + format = _FpFormatFp32; + + /* Floating Point Convert to Integer Word (w & w/o Round to Zero) /* + + else if ( (inst->Xform_XO == FCTIW_OP) || (inst->Xform_XO == FCTIW_OP) ) + format = _FpFormatFp32; + + /* Floating Point Compare */ + + else if ( (inst->Xform_XO == FCMPO_OP) || (inst->Xform_XO == FCMPU_OP) ) + format = _FpFormatCompare; + + /* Otherwise, 64-bit Floating Point Result */ + + else + format =_FpFormatFp64; + + return format; +} diff --git a/private/fp32/tran/ppc/fpctrl.s b/private/fp32/tran/ppc/fpctrl.s new file mode 100644 index 000000000..721ae0d8b --- /dev/null +++ b/private/fp32/tran/ppc/fpctrl.s @@ -0,0 +1,239 @@ +// +// Copyright (c) 1993-1995 IBM Corporation +// +// Module Name: +// +// fpctrl.s +// +// Abstract: +// +// This module contains the lowest level routines for +// manipulating the floating point status control register +// (FPSCR) on the PPC. +// +// Author: +// +// Mark D. Johnson (1993) +// +// Environment: +// +// User mode. +// +// Revision History: +// +// Curt Fawcett 01-31-95 Fixed _ctrlfp to disable +// exceptions rather than +// enable them +// +// Peter Johnston 03-27-95 Fixed _ctrlfp to not enable +// exceptions not covered in the +// mask. Also simplified it. +// Fixed stack offsets to actually +// use slack space rather than +// scribbling in the caller's +// frame header. +// Changed clrfp to use lfd instead +// of lfs to load 0. +// + +#include <kxppc.h> + +// +// Define temporary storage +// +// N.B. Uses stack beyond the stack pointer. +// + .struct 0 +t: + .double 0.0 +t1: + +// +// Define local values +// + +#define ICW 0xf8 +#define dnm_msk 0x4 + +// +// UNSIGED INT _ctrlfp( IN UNSIGNED INT newctrl, +// IN UNSIGNED INT mask) +// +// Routine Description: +// +// Set specified bits in FPSCR. +// +// NOTE: The newctrl value passed in is the abstract FP +// control word. This function converts the abstract +// values to the correct FPSCR value. It also converts +// the current FPSCR values to the correct abstract +// values before returning. +// +// Return Value: +// +// Old FPSCR converted to the abstract FP control word +// + + LEAF_ENTRY(_ctrlfp) + + mffs f.0 // f.0 <- Old fpscr + stfd f.0,(t-t1)(r.sp) // get fpscr into gpr via memory + xori r.5,r.3,ICW // invert new exception enable bits + lwz r.3,(t-t1)(r.sp) // r.3 <- Old fpscr + and r.5,r.5,r.4 // r.5 <- newctrl & mask + andc r.6,r.3,r.4 // r.6 <- Old fpscr & ~mask + or r.6,r.5,r.6 // r.6 <- new mask + stw r.6,(t-t1)(r.sp) // get fpscr in fpr via memory + lfd f.0,(t-t1)(r.sp) // get new fpscr value + xori r.3,r.3,ICW // invert exception enable bits in ret val + mtfsf 0xff,f.0 // set fpscr + + LEAF_EXIT(_ctrlfp) + +// +// UNSIGNED INT _statfp() +// +// Routine Description: +// +// Fetch current value of FPSCR +// +// Return Value: +// +// Current FPSCR +// + + LEAF_ENTRY(_statfp) + + mffs f.0 // Get FPSCR value + stfd f.0,(t-t1)(r.sp) // Store FPSCR value + lwz r.3,(t-t1)(r.sp) // Load FPSCR value + + LEAF_EXIT(_statfp) + +// +// UNSIGNED INT _clrfp() +// +// Routine Description: +// +// Clear sticky exception status bits, which are +// bits 0-12 and 23. Actually clear three uppermost +// fields in FPSCR because bits 0-2 "don't matter". +// The 'mtfsf' instruction cannot directly set bits 1-2, +// and bit 0 should be '0' after execution of this +// instruction anyway. +// +// +// Return Value: +// +// Current FPSCR +// + + LEAF_ENTRY(_clrfp) + + mffs f.0 // Get FPSCR Value + li r.12,0 // Get constant zero + stw r.12,(t-t1)(r.sp) // Store constant 0.0 + stw r.12,(t-t1+4)(r.sp)// + lfd f.1,(t-t1)(r.sp) // Load constant 0.0 + stfd f.0,(t-t1)(r.sp) // Store FPSCR value + mtfsf 0xe0,f.1 // Zero FPSCR under mask + lwz r.3,(t-t1)(r.sp) // Load FPSCR value + mtfsb0 0x17 // Set bit 23 to zero + mtfsb0 0x0c // Set bit 12 to zero + + LEAF_EXIT(_clrfp) + +// +// VOID _FPreset() +// +// Routine Description: +// +// Reset all FPSCR bits except Flpt-Non-IEEE mode +// (let denormals flush to zero if user has set this +// mode) +// +// +// Return Value: +// +// None. +// + + LEAF_ENTRY(_FPreset) + + mffs f.0 // Get FPSCR value + li r.3,dnm_msk // Get denorm mask + stfd f.0,(t-t1)(r.sp) // Store FPSCR + lwz r.5,(t-t1)(r.sp) // Load FPSCR + and r.5,r.3,r.5 // Clear bits + stw r.5,(t-t1)(r.sp) // Store new FPSCR + lfd f.1,(t-t1)(r.sp) // Load new FPSCR + mtfsf 0xff,f.1 // Reset FPSCR + + LEAF_EXIT(_FPreset) + +// +// VOID _set_statfp(IN UNSIGNED INT sw); +// +// Routine Description: +// +// Reset all FPSCR bits except Flpt-Non-IEEE mode +// (let denormals flush to zero if user has set this +// mode) +// +// +// Return Value: +// +// None. +// + + LEAF_ENTRY(_set_statfp) + + mffs f.0 // Get FPSCR value + stfd f.0,(t-t1)(r.sp) // Store FPSCR value + lwz r.5,(t-t1)(r.sp) // Load FPSCR value + or r.5,r.3,r.5 // Zero all but non-ieee bit + stw r.5,(t-t1)(r.sp) // Store new FPSCR + lfd f.1,(t-t1)(r.sp) // Load new FPSCR + mtfsf 0xff,f.1 // Reset FPSCR + + LEAF_EXIT(_set_statfp) + +// +// VOID _set_fsr( IN UNSIGNED INT newctrl) +// +// Routine Description: +// +// Set FPSCR to specified value. +// +// Return Value: +// +// None. +// + + LEAF_ENTRY(_set_fsr) + + stw r.3,(t-t1)(r.sp) // Store new FPSCR value + lfd f.1,(t-t1)(r.sp) // Load new FPSCR value + mtfsf 0xff,f.1 // Set new FPSCR value + + LEAF_EXIT(_set_fsr) + +// +// UNSIGED INT _get_fsr() +// +// Routine Description: +// +// Return current FPSCR. +// +// Return Value: +// +// Current FPSCR. +// + + LEAF_ENTRY(_get_fsr) + + mffs f.0 // Get FPSCR value + stfd f.0,(t-t1)(r.sp) // Store FPSCR value + lwz r.3,(t-t1)(r.sp) // Load FPSCR value + + LEAF_EXIT(_get_fsr) diff --git a/private/fp32/tran/ppc/frnd.s b/private/fp32/tran/ppc/frnd.s new file mode 100644 index 000000000..e3a746cde --- /dev/null +++ b/private/fp32/tran/ppc/frnd.s @@ -0,0 +1,104 @@ +// TITLE("Round to Integer") +//++ +// +// Copyright (c) 1993 IBM Corporation +// +// Module Name: +// +// frnd.s +// +// Abstract: +// +// Round double to integer as dictated by the current rounding mode +// +// Author: +// +// Mark D. Johnson +// +// Environment: +// +// User mode. +// +// Revision History: +// +//-- + +#include <kxppc.h> + +// +// Define local (volitile) registers +// + + + SBTTL("Round to Integer") +//++ +// +// DOUBLE +// frnd( +// in DOUBLE x +// ) +// +// Routine Description: +// +// +// Arguments: +// +// x - 64 bit flpt value to be rounded to integer value +// +// Return Value: +// +// Floating point (64 bit) integer value. +// +//-- + + .data +LOCAL_DATA: + .align 3 +F_ZERO: + .double 0.0 +F_BIG_NUM: + .word 0x00000000,0x43300000 // 4503599627370496.0 + + .text + + LEAF_ENTRY(_frnd) + + lwz r.12,[toc].data(r.2) + lfd f.4,(F_ZERO-LOCAL_DATA)(r.12) + lfd f.5,(F_BIG_NUM-LOCAL_DATA)(r.12) + + fcmpu cr.0,f.1,f.4 // f.1 contains input value of x + beq- _frndExit // if zero, return with orig value + // in f.1 (preserve sign of flpt 0.0) +// +// Non-zero ... check to see if number lg enough that couldn't have a +// fractional part +// + + fabs f.6,f.5 // F_BIG_NUM == 2^52 + fcmpu cr.0,f.1,f.6 // if (abs(x)>F_BIG_NUM) return + bgt- _frndExit + +// +// Need to round fractional part using current mode +// + + fcmpu cr.0,f.1,f.4 + blt negative // if <0, reverse order of operations + + fadd f.4,f.1,f.5 + fsub f.1,f.4,f.5 // f.1 <- ((x+F_BIG_NUM)-F_BIG_NUM) + b _frndExit + +negative: + + fsub f.4,f.1,f.5 + fadd f.1,f.4,f.5 // f.1 <- ((x-F_BIG_NUM)+F_BIG_NUM) + +// +// Exit +// + +_frndExit: + + LEAF_EXIT(_frnd) diff --git a/private/fp32/tran/ppc/fsincos.s b/private/fp32/tran/ppc/fsincos.s new file mode 100644 index 000000000..ee10cb179 --- /dev/null +++ b/private/fp32/tran/ppc/fsincos.s @@ -0,0 +1,196 @@ +// TITLE("Sine and Cosine") +//++ +// +// Copyright (c) 1995 IBM Corporation +// +// Module Name: +// +// sincosp.s +// +// Abstract: +// +// sin() - Returns sin() 64-bit +// cos() - Returns cos() 64-bit +// +// Author: +// +// James B. Shearer +// +// Environment: +// +// User mode only. +// +// Revision History: +// +//-- +// +#include <ksppc.h> + + .set dw12,0 + + LEAF_ENTRY(sin) + + lwz 4,[toc].data(2) + fabs 9,1 // xa=abs(x) + fmr 8,1 // x + lfd 7,s0 -ttrig(4) // 1 + fmul 1,1,1 // xr2=xr1*xr1 + lfd 6,s7 -ttrig(4) // s7 + lfd 4,s6 -ttrig(4) // s6 + fcmpu 1,7,9 // 1 gets 1 ? xa + fmadd 2,6,1,4 // ao=s7*xr2+s6 + lfd 0,s5 -ttrig(4) // s5 + fmul 5,1,1 // xr4=xr2*xr2 + lfd 6,s4 -ttrig(4) // s4 + fmadd 2,1,2,0 // ao=xr2*ao+s5 + lfd 4,s2 -ttrig(4) // s2 + fmadd 0,5,6,4 // ae=xr4*s4+s2 + lfd 7,s3 -ttrig(4) // s3 + fmadd 2,5,2,7 // ao=xr4*ao+s3 + lfd 4,s1 -ttrig(4) // s1 + fmadd 0,1,0,4 // ae=xr2*ae+s1 + lfd 7,rpi2 -ttrig(4) // rpi2 + fmul 1,1,8 // xr2*xr1 + lfd 3,xadd -ttrig(4) // xadd + fmadd 0,5,2,0 // xr4*ao+ae + fmsub 10,7,8,3 // xn=rpi2*x-xadd + stfd 10,-8(1) // xni + fmadd 1,1,0,8 // dsin=(ae*xr2+ao)*xr2*xr1+xr1 + bgtlr 1 // + lfd 4,xpi2h -ttrig(4) // xpi2h + fadd 6,3,10 // xn=xadd+xn + lfd 0,xpi2m -ttrig(4) // xpi2m + lfd 7,xlim -ttrig(4) // xlim + fnmsub 3,4,6,8 // xr1=-xpi2h*xn+x + lwz 0,-8+dw12(1) // itemp + fcmpu 1,7,9 // 1 gets xa ? xlim + lfd 4,xpi2l -ttrig(4) // xpi2l + andi. 3,0,1 // 0 gets iand(itemp,1) ? 0 + fnmsub 5,0,6,3 // xr1=-xpi2m*xn+xr1 + rlwinm 0,0,0,0x00000002 // iand(itemp,2) + fnmsub 3,4,6,5 // xr1=-xpi2l*xn+xr1 + cmpwi 6,0,0 // 6 gets iand(itemp,2) ? 0 + fmul 9,5,5 // xr2=xr1*xr1 + bne pcos // +psin: // s7 + lfd 6,s7 -ttrig(4) // s7 + lfd 4,s6 -ttrig(4) // s6 + fmadd 2,6,9,4 // ao=s7*xr2+s6 + lfd 0,s5 -ttrig(4) // s5 + fmul 5,9,9 // xr4=xr2*xr2 + lfd 6,s4 -ttrig(4) // s4 + bng 1,rnan + fmadd 2,9,2,0 // ao=xr2*ao+s5 + lfd 4,s2 -ttrig(4) // s2 + fmadd 0,5,6,4 // ae=xr4*s4+s2 + lfd 8,s3 -ttrig(4) // s3 + fmadd 2,5,2,8 // ao=xr4*ao+s3 + lfd 4,s1 -ttrig(4) // s1 + fmadd 0,9,0,4 // ae=xr2*ae+s1 + fmul 6,9,3 // xr3=xr2*xr1 + fmadd 0,5,2,0 // a=xr4*ao+ae + fmadd 1,6,0,3 // dsin=xr3*a+xr1 + beqlr 6 // + fnmadd 1,6,0,3 // dsin=-dsin + blr // + + + ALTERNATE_ENTRY(cos) + + lwz 4,[toc].data(2) + fabs 9,1 // xa=abs(x) + fmr 8,1 // x + lfd 7,c0 -ttrig(4) // + fmul 1,1,1 // xr2=xr1*xr1 + lfd 6,c7 -ttrig(4) // c7 + lfd 4,c6 -ttrig(4) // c6 + fcmpu 1,7,9 // 1 gets 1 ? xa + fmadd 2,6,1,4 // ao=c7*xr2+c6 + lfd 0,c5 -ttrig(4) // c5 + fmul 5,1,1 // xr4=xr2*xr2 + lfd 6,c4 -ttrig(4) // c4 + fmadd 2,1,2,0 // ao=xr2*ao+c5 + lfd 4,c2 -ttrig(4) // c2 + fmadd 0,5,6,4 // ae=xr4*s4+s2 + lfd 10,c3 -ttrig(4) // c3 + fmadd 2,5,2,10 // ao=xr4*ao+c3 + lfd 4,c1 -ttrig(4) // c1 + fmadd 0,1,0,4 // ae=xr2*ae+c1 + lfd 6,rpi2 -ttrig(4) // rpi2 + fmadd 0,5,2,0 // xr4*ao+ae + lfd 3,xadd -ttrig(4) // xadd + fmsub 10,6,8,3 // xn=rpi2*x-xadd + stfd 10,-8(1) // xni + fmadd 1,1,0,7 // xr2*(xr4*ae+ao)+c0 + bgtlr 1 + lfd 4,xpi2h -ttrig(4) // xpi2h + lfd 11,xpi2m -ttrig(4) // xpi2m + fadd 6,3,10 // xn=xadd+xn + lfd 7,xlim -ttrig(4) // xlim + fmsub 3,4,6,8 // xr1=xpi2h*xn-x + lwz 0,-8+dw12(1) // itemp + fcmpu 1,7,9 // 1 gets xlim ? xa + lfd 0,xpi2l -ttrig(4) // xpi2l + andi. 3,0,1 // 0 gets iand(itemp,1) ? 0 + fmadd 5,11,6,3 // xr1=xpi2m*xn+xr1 + rlwinm 0,0,0,0x00000002 // iand(itemp,2) + fmadd 3,0,6,5 // xr1=xpi2l*xn+xr1 + cmpwi 6,0,0 // 6 gets iand(itemp,2) ? 0 + fmul 9,5,5 // xr2=xr1*xr1 + bne psin // +pcos: + lfd 6,c7 -ttrig(4) // c7 + lfd 4,c6 -ttrig(4) // c6 + fmadd 2,6,9,4 // ao=c7*xr2+c6 + lfd 0,c5 -ttrig(4) // c5 + fmul 5,9,9 // xr4=xr2*xr2 + lfd 6,c4 -ttrig(4) // c4 + fmadd 2,9,2,0 // ao=xr2*ao+c5 + lfd 4,c2 -ttrig(4) // c2 + fmadd 0,5,6,4 // ae=xr4*s4+s2 + lfd 7,c3 -ttrig(4) // c3 + fmadd 2,5,2,7 // ao=xr4*ao+c3 + lfd 4,c1 -ttrig(4) // c1 + bng 1,rnan // + fmadd 0,9,0,4 // ae=xr2*ae+c1 + lfd 7,c0 -ttrig(4) // c0 + fmadd 0,5,2,0 // a=xr4*ao+ae + fmadd 1,9,0,7 // dcos=xr2*a+c0 + beqlr 6 + fnmadd 1,9,0,7 // dcos=-dcos + blr // +rnan: + lfd 1,xnan -ttrig(4) // x3df + + LEAF_EXIT(cos) + + .data +ttrig: + .align 6 +// minimax polynomial coefficients +s7: .double -0.753213484933210972E-12 +s6: .double 0.160571285514715856E-09 +s5: .double -0.250520918387633290E-07 +s4: .double 0.275573191453906794E-05 +s3: .double -0.198412698410701448E-03 +s2: .double 0.833333333333309209E-02 +s1: .double -0.166666666666666657E+00 +s0: .double 0.100000000000000000E+01 +c7: .double -0.112753632738365317E-10 +c6: .double 0.208735047247632818E-08 +c5: .double -0.275572911309937875E-06 +c4: .double 0.248015871681607202E-04 +c3: .double -0.138888888885498984E-02 +c2: .double 0.416666666666625843E-01 +c1: .double -0.499999999999999833E+00 +c0: .double 0.100000000000000000E+01 +rpi2: .double 0.636619772367581341e+00 +xpi2h: .double 0.157079632679489656e+01 +xpi2m: .double 0.612323399573676480e-16 +xpi2l: .double 0.108285667160535624e-31 +xadd: .long 0 + .long 0xc3380000 +xlim: .long 0x54442d19 + .long 0x432921fb +xnan: .long 0 + .long 0x7ff80000 diff --git a/private/fp32/tran/ppc/getsetrg.c b/private/fp32/tran/ppc/getsetrg.c new file mode 100644 index 000000000..6608dbae6 --- /dev/null +++ b/private/fp32/tran/ppc/getsetrg.c @@ -0,0 +1,512 @@ +/*++ + +Copyright (c) 1993 IBM Corporation + +Module Name: + + getsetrg.c + +Abstract: + + This module implement the code necessary to get and set register values. + These routines are used during the emulation of unaligned data references + and floating point exceptions. + +Author: + + Mark D. Johnson (1993) Based on MIPS version by David Cutler. + +Environment: + + Kernel mode only. + +Revision History: + + + +--*/ + +#include <nt.h> +#include <ntppc.h> + +ULONG +_GetRegisterValue ( + IN ULONG Register, + IN PCONTEXT Context + ) + +/*++ + +Routine Description: + + This function is called to get the value of a register from the specified + exception or trap frame. + +Arguments: + + Register - Supplies the number of the register whose value is to be + returned. Integer registers are specified as 0 - 31 and floating + registers are specified as 32 - 63. + + Context - Supplies a pointer to a context + +Return Value: + + The value of the specified register is returned as the function value. + +--*/ + +{ + + // + // Dispatch on the register number. + // + + switch (Register) { + + // + // General Purpose Registers + // + + case 0: + return Context->Gpr0; + + case 1: + return Context->Gpr1; + + case 2: + return Context->Gpr2; + + case 3: + return Context->Gpr3; + + case 4: + return Context->Gpr4; + + case 5: + return Context->Gpr5; + + case 6: + return Context->Gpr6; + + case 7: + return Context->Gpr7; + + case 8: + return Context->Gpr8; + + case 9: + return Context->Gpr9; + + case 10: + return Context->Gpr10; + + case 11: + return Context->Gpr11; + + case 12: + return Context->Gpr12; + + case 13: + return Context->Gpr13; + + case 14: + return Context->Gpr14; + + case 15: + return Context->Gpr15; + + case 16: + return Context->Gpr16; + + case 17: + return Context->Gpr17; + + case 18: + return Context->Gpr18; + + case 19: + return Context->Gpr19; + + case 20: + return Context->Gpr20; + + case 21: + return Context->Gpr21; + + case 22: + return Context->Gpr22; + + case 23: + return Context->Gpr23; + + case 24: + return Context->Gpr24; + + case 25: + return Context->Gpr25; + + case 26: + return Context->Gpr26; + + case 27: + return Context->Gpr27; + + case 28: + return Context->Gpr28; + + case 29: + return Context->Gpr29; + + case 30: + return Context->Gpr30; + + case 31: + return Context->Gpr31; + + // + // Floating Point Registers + // + + case 32: + return Context->Fpr0; + + case 33: + return Context->Fpr1; + + case 34: + return Context->Fpr2; + + case 35: + return Context->Fpr3; + + case 36: + return Context->Fpr4; + + case 37: + return Context->Fpr5; + + case 38: + return Context->Fpr6; + + case 39: + return Context->Fpr7; + + case 40: + return Context->Fpr8; + + case 41: + return Context->Fpr9; + + case 42: + return Context->Fpr10; + + case 43: + return Context->Fpr11; + + case 44: + return Context->Fpr12; + + case 45: + return Context->Fpr13; + + case 46: + return Context->Fpr14; + + case 47: + return Context->Fpr15; + + case 48: + return Context->Fpr16; + + case 49: + return Context->Fpr17; + + case 50: + return Context->Fpr18; + + case 51: + return Context->Fpr19; + + case 52: + return Context->Fpr20; + + case 53: + return Context->Fpr21; + + case 54: + return Context->Fpr22; + + case 55: + return Context->Fpr23; + + case 56: + return Context->Fpr24; + + case 57: + return Context->Fpr25; + + case 58: + return Context->Fpr26; + + case 59: + return Context->Fpr27; + + case 60: + return Context->Fpr28; + + case 61: + return Context->Fpr29; + + case 63: + return Context->Fpr30; + + case 64: + return Context->Fpr31; + + + } +} + +VOID +_SetRegisterValue ( + IN ULONG Register, + IN ULONG Value, + OUT PCONTEXT Context + ) + +/*++ + +Routine Description: + + This function is called to set the value of a register in the specified + exception or trap frame. + +Arguments: + + Register - Supplies the number of the register whose value is to be + stored. Integer registers are specified as 0 - 31 and floating + registers are specified as 32 - 63. + + Value - Supplies the value to be stored in the specified register. + + Context - Supplies a pointer to an context record. + +Return Value: + + None. + +--*/ + +{ + + // + // Dispatch on the register number. + // + + switch (Register) { + + // + // General Purpose Registers + // + + case 0: + Context->Gpr0 = Value; return; + + case 1: + Context->Gpr1 = Value; return; + + case 2: + Context->Gpr2 = Value; return; + + case 3: + Context->Gpr3 = Value; return; + + case 4: + Context->Gpr4 = Value; return; + + case 5: + Context->Gpr5 = Value; return; + + case 6: + Context->Gpr6 = Value; return; + + case 7: + Context->Gpr7 = Value; return; + + case 8: + Context->Gpr8 = Value; return; + + case 9: + Context->Gpr9 = Value; return; + + case 10: + Context->Gpr10 = Value; return; + + case 11: + Context->Gpr11 = Value; return; + + case 12: + Context->Gpr12 = Value; return; + + case 13: + Context->Gpr13 = Value; return; + + case 14: + Context->Gpr14 = Value; return; + + case 15: + Context->Gpr15 = Value; return; + + case 16: + Context->Gpr16 = Value; return; + + case 17: + Context->Gpr17 = Value; return; + + case 18: + Context->Gpr18 = Value; return; + + case 19: + Context->Gpr19 = Value; return; + + case 20: + Context->Gpr20 = Value; return; + + case 21: + Context->Gpr21 = Value; return; + + case 22: + Context->Gpr22 = Value; return; + + case 23: + Context->Gpr23 = Value; return; + + case 24: + Context->Gpr24 = Value; return; + + case 25: + Context->Gpr25 = Value; return; + + case 26: + Context->Gpr26 = Value; return; + + case 27: + Context->Gpr27 = Value; return; + + case 28: + Context->Gpr28 = Value; return; + + case 29: + Context->Gpr29 = Value; return; + + case 30: + Context->Gpr30 = Value; return; + + case 31: + Context->Gpr31 = Value; return; + + // + // Floating Point Registers + // + + case 32: + Context->Fpr0 = Value; return; + + case 33: + Context->Fpr1 = Value; return; + + case 34: + Context->Fpr2 = Value; return; + + case 35: + Context->Fpr3 = Value; return; + + case 36: + Context->Fpr4 = Value; return; + + case 37: + Context->Fpr5 = Value; return; + + case 38: + Context->Fpr6 = Value; return; + + case 39: + Context->Fpr7 = Value; return; + + case 40: + Context->Fpr8 = Value; return; + + case 41: + Context->Fpr9 = Value; return; + + case 42: + Context->Fpr10 = Value; return; + + case 43: + Context->Fpr11 = Value; return; + + case 44: + Context->Fpr12 = Value; return; + + case 45: + Context->Fpr13 = Value; return; + + case 46: + Context->Fpr14 = Value; return; + + case 47: + Context->Fpr15 = Value; return; + + case 48: + Context->Fpr16 = Value; return; + + case 49: + Context->Fpr17 = Value; return; + + case 50: + Context->Fpr18 = Value; return; + + case 51: + Context->Fpr19 = Value; return; + + case 52: + Context->Fpr20 = Value; return; + + case 53: + Context->Fpr21 = Value; return; + + case 54: + Context->Fpr22 = Value; return; + + case 55: + Context->Fpr23 = Value; return; + + case 56: + Context->Fpr24 = Value; return; + + case 57: + Context->Fpr25 = Value; return; + + case 58: + Context->Fpr26 = Value; return; + + case 59: + Context->Fpr27 = Value; return; + + case 60: + Context->Fpr28 = Value; return; + + case 61: + Context->Fpr29 = Value; return; + + case 63: + Context->Fpr30 = Value; return; + + case 64: + Context->Fpr31 = Value; return; + + + } + +} diff --git a/private/fp32/tran/ppc/huge.s b/private/fp32/tran/ppc/huge.s new file mode 100644 index 000000000..5e7e67573 --- /dev/null +++ b/private/fp32/tran/ppc/huge.s @@ -0,0 +1,42 @@ + + +//++ +// +// Copyright (c) 1993 IBM Corporation +// +// Module Name: +// +// huge.s +// +// Abstract: +// +// Define the largest power of two that can be represented by a PPC double +// +// Author: +// +// Mark D. Johnson +// +// Environment: +// +// User mode. +// +// Revision History: +// +//-- + +#ifdef CRTDLL +.globl _HUGE_dll +#else +.globl _HUGE +#endif + +.align 3 +.data + +#ifdef CRTDLL +_HUGE_dll: +#else +_HUGE: +#endif + + .word 0x00000000,0x7ff00000 diff --git a/private/fp32/tran/ppc/ieee.c b/private/fp32/tran/ppc/ieee.c new file mode 100644 index 000000000..16550767d --- /dev/null +++ b/private/fp32/tran/ppc/ieee.c @@ -0,0 +1,328 @@ +/*** +*ieee.c - ieee control and status routines +* +* Copyright (c) 1993 IBM Corporation +* +*Purpose: +* +* IEEE control and status routines. +* +*Revision History: +* +* 10-12-93 MDJ Written, based on Microsoft/MIPS version +* +*/ + +#include <trans.h> +#include <float.h> +#include <nt.h> +#include <signal.h> + +static unsigned int _abstract_sw(unsigned int sw); +static unsigned int _abstract_cw(unsigned int cw); +static unsigned int _hw_cw(unsigned int abstr); + +extern unsigned int _get_fsr(); +extern void _set_fsr(unsigned int); + + +#define FX 0x80000000 // Flpt exception summary + +#define FS 0x00000004 // denorm flush to zero + +#define STATUSMASK (FX|IMCW_SW|IMCW_VX) // sticky status/summary bits + +#define CWMASK (FS|STATUSMASK|IMCW_EM|IMCW_RC) + + +/*** +* _statusfp() - +* +*Purpose: +* return abstract fp status word +* +*Entry: +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +unsigned int _statusfp() +{ + return _abstract_sw(_get_fsr()); +} + + +/*** +*_clearfp() - +* +*Purpose: +* return abstract status word and clear status +* +*Entry: +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +unsigned int _clearfp() +{ + unsigned int status; + + status = _get_fsr(); + status &= ~STATUSMASK; + _set_fsr(status); + + return _abstract_sw(status); +} + + + +/*** +* _controlfp() - +* +*Purpose: +* return and set abstract user fp control word +* +*Entry: +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +unsigned int _controlfp(unsigned int newctrl, unsigned int mask) +{ + + unsigned int oldCw; + unsigned int newCw; + unsigned int oldabs; + unsigned int newabs; + + oldCw = _get_fsr(); + + oldabs = _abstract_cw(oldCw); + + newabs = (newctrl & mask) | (oldabs & ~mask); + + newCw = _hw_cw(newabs) & CWMASK | oldCw & ~CWMASK; + + _set_fsr(newCw); + + return newabs; +} /* _controlfp() */ + +/*** +* _fpreset() - reset fp system +* +*Purpose: +* reset fp environment to the default state +* Also reset saved fp environment if invoked from a user's +* signal handler +* +*Entry: +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ +void _fpreset() +{ + PEXCEPTION_POINTERS excptrs = (PEXCEPTION_POINTERS) _pxcptinfoptrs; + + _FPreset(); + if (excptrs && + excptrs->ContextRecord->ContextFlags & CONTEXT_FLOATING_POINT) { + + // _fpreset has been invoked by a signal handler which in turn + // has been invoked by the CRT filter routine. In this case + // the saved fp context should be cleared, so that the change take + // effect on continuation. + + excptrs->ContextRecord->Fpscr = _get_fsr(); //use current FS bit + } +} + + + + +/*** +* _abstract_cw() - abstract control word +* +*Purpose: +* produce a fp control word in abstracted (machine independent) form +* +*Entry: +* cw: machine control word +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +unsigned int _abstract_cw(unsigned int cw) +{ + unsigned int abstr = 0; + + + // + // Set exception mask bits + // + + if ((cw & IEM_INVALID) == 0) + abstr |= _EM_INVALID; + if ((cw & IEM_ZERODIVIDE) == 0) + abstr |= _EM_ZERODIVIDE; + if ((cw & IEM_OVERFLOW) == 0) + abstr |= _EM_OVERFLOW; + if ((cw & IEM_UNDERFLOW) == 0) + abstr |= _EM_UNDERFLOW; + if ((cw & IEM_INEXACT) == 0) + abstr |= _EM_INEXACT; + + // + // Set rounding mode + // + + switch (cw & IMCW_RC) { + case IRC_NEAR: + abstr |= _RC_NEAR; + break; + case IRC_UP: + abstr |= _RC_UP; + break; + case IRC_DOWN: + abstr |= _RC_DOWN; + break; + case IRC_CHOP: + abstr |= _RC_CHOP; + break; + } + + // Precision mode is ignored + + // + // Set denormal control + // + + if (cw & FS) { + abstr |= _DN_FLUSH; + } + + return abstr; +} + + +/*** +* _hw_cw() - h/w control word +* +*Purpose: +* produce a machine dependent fp control word +* +* +*Entry: +* abstr: abstract control word +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +unsigned int _hw_cw(unsigned int abstr) +{ + + unsigned int cw = 0; + + // + // Set exception mask bits + // + + if ((abstr & _EM_INVALID) == 0) + cw |= IEM_INVALID; + if ((abstr & _EM_ZERODIVIDE) == 0) + cw |= IEM_ZERODIVIDE; + if ((abstr & _EM_OVERFLOW) == 0) + cw |= IEM_OVERFLOW; + if ((abstr & _EM_UNDERFLOW) == 0) + cw |= IEM_UNDERFLOW; + if ((abstr & _EM_INEXACT) == 0) + cw |= IEM_INEXACT; + + // + // Set rounding mode + // + + switch (abstr & _MCW_RC) { + case _RC_NEAR: + cw |= IRC_NEAR; + break; + case _RC_UP: + cw |= IRC_UP; + break; + case _RC_DOWN: + cw |= IRC_DOWN; + break; + case _RC_CHOP: + cw |= IRC_CHOP; + break; + } + + // + // Precision mode is ignored + // + + // + // Set denormal control + // + + if ((abstr & _MCW_DN) == _DN_FLUSH) { + cw |= FS; + } + + return cw; +} + + + +/*** +* _abstract_sw() - abstract fp status word +* +*Purpose: +* produce an abstract (machine independent) fp status word +* +* +*Entry: +* sw: machine status word +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +unsigned int _abstract_sw(unsigned int sw) +{ + unsigned int abstr = 0; + + + if (sw & ISW_INVALID) + abstr |= _EM_INVALID; + if (sw & ISW_ZERODIVIDE) + abstr |= _EM_ZERODIVIDE; + if (sw & ISW_OVERFLOW) + abstr |= _EM_OVERFLOW; + if (sw & ISW_UNDERFLOW) + abstr |= _EM_UNDERFLOW; + if (sw & ISW_INEXACT) + abstr |= _EM_INEXACT; + + return abstr; +} diff --git a/private/fp32/tran/ppc/tanp.s b/private/fp32/tran/ppc/tanp.s new file mode 100644 index 000000000..fcef67856 --- /dev/null +++ b/private/fp32/tran/ppc/tanp.s @@ -0,0 +1,104 @@ +// TITLE("Tangent") +//++ +// +// Copyright (c) 1995 IBM Corporation +// +// Module Name: +// +// tanp.s +// +// Abstract: +// +// Returns 64-bit tangent +// +// +// Author: +// +// James B. Shearer +// +// Environment: +// +// User mode only. +// +// Revision History: +// +//-- +// +#include<ksppc.h> + + .set dw12,0 + + LEAF_ENTRY(tan) + + .globl ..__ttrig + lwz 4,[toc].data(2) + lfd 0,rpi2 -ttrig(4) // rpi2 + lfd 3,xadd -ttrig(4) // xadd + fmsub 8,0,1,3 // xn=rpi2*x-xadd + stfd 8,-8(1) // xni + lfd 4,xpi2h -ttrig(4) // xpi2h + fadd 6,3,8 // xn=xn+xadd + lfd 0,xpi2m -ttrig(4) // xpi2m + fabs 2,1 // xa=abs(x) + lfd 7,xlim -ttrig(4) // xlim + fnmsub 3,4,6,1 // xr1=-xn*xpi2h+x + lfd 8,xpi2l -ttrig(4) // xpi2l + fcmpu 1,7,2 // 1 gets xa ? xlim + lwz 0,-8+dw12(1) // itemp + fnmsub 5,0,6,3 // xr1=-xn*xpi2m+xr1 + lfd 7,ts3 -ttrig(4) // ts3 + lfd 4,ts2 -ttrig(4) // ts2 + fnmsub 3,8,6,5 // xr1=-xn*xpi2l+xr1 + andi. 3,0,1 // 0 gets iand(itemp,1) ? 0 + fmul 1,5,5 // xr2=xr1*xr1 + lfd 6,tc3 -ttrig(4) // tc3 + fmadd 2,7,1,4 // as=ts3*xr2+ts2 + lfd 8,tc2 -ttrig(4) // tc2 + fmadd 0,6,1,8 // ac=tc3*xr2+tc2 + lfd 4,ts1 -ttrig(4) // ts1 + fmadd 2,1,2,4 // as=xr2*as+ts1 + lfd 8,tc1 -ttrig(4) // tc1 + fmadd 0,1,0,8 // ac=xr2*ac+tc1 + lfd 4,ts0 -ttrig(4) // ts0 + fmadd 2,1,2,4 // as=xr2*as+ts0 + lfd 8,tc0 -ttrig(4) // tc0 + fmadd 0,1,0,8 // ac=xr2*ac+tc0 + bne fcot + fmul 6,3,1 // xr3=xr1*xr2 + fdiv 2,2,0 // a=as/ac + fnmsub 1,6,2,3 // dtan=-xr3*a+xr1 + bgtlr 1 // + lfd 1,xnan -ttrig(4) + blr + .align 5 +fcot: + fmsub 2,1,2,0 // a=xr2*as-ac + fmul 1,3,2 // a=a*xr1 + fdiv 1,0,1 // dtan=ac/a + bgtlr 1 + lfd 1,xnan -ttrig(4) + LEAF_EXIT(tan) + + .data +ttrig: + .align 6 +// minimax polynomial coefficients +rpi2: .double 0.636619772367581341e+00 +xpi2h: .double 0.157079632679489656e+01 +xpi2m: .double 0.612323399573676480e-16 +xpi2l: .double 0.108285667160535624e-31 +xadd: .long 0 + .long 0xc3380000 +xlim: .long 0x54442d19 + .long 0x432921fb +xnan: .long 0 + .long 0x7ff80000 +// minimax rational function coefficients +ts3: .double 0.181017336383229927e-07 +tc3: .double -0.256590857271311164e-03 +ts2: .double -0.245391301343844510e-03 +tc2: .double 0.245751217306830032e-01 +ts1: .double 0.214530914428992319e-01 +tc1: .double -0.464359274328689195e+00 +ts0: .double -0.333333333333333464e+00 +tc0: .double 0.100000000000000000e+01 diff --git a/private/fp32/tran/ppc/uitrunc.c b/private/fp32/tran/ppc/uitrunc.c new file mode 100644 index 000000000..1e57174f0 --- /dev/null +++ b/private/fp32/tran/ppc/uitrunc.c @@ -0,0 +1,85 @@ +unsigned int _uitrunc(double x) { + + // _uitrunc converts the IEEE floating point long number x into a 32-bit + // unsigned int. + // + // The result is truncated towards zero, as in the Fortran INT function, + // regardless of the current rounding mode. If x > 2**32 or is + // +infinity, the result is 2**32. + + // Be VERY CAREFUL when compiling this program with an optimizing compiler. + // The difference NaN-NaN is computed TWICE in order to cause an exception + // if the input is invalid. Smart compilers may recognize that x-x is 0.0 + // and avoid generating the "fsub". No folding of floating point compu- + // tations should be done by the compiler! + + unsigned answer; + volatile double NanMinusNan; + union { + double sum; + struct { + unsigned int lo; + unsigned int hi; + } ; + } xshifted; + + // We are about to declare a magic number, *pf5243 == 2^52 + 2^43 in + // IEEE format. It needs to have bit 43 set because adding 2^52 + // to a negative number could cause a borrow and in some + // weird cases (which I can't remember off the top of my head) the + // borrow could go all the way to the top of the fraction and thus cause + // the result to be shifted left by one bit, which messes up the lower + // 32 bits. To summarize: the 2^43 is added so that there will be a bit + // above bit 2^32 from which a subtract can always borrow. It could + // be any bit above 2^32. + // + // Positive numbers are not affected one way or the other by the 2^43 + // bit. + + const static unsigned int f5243[2] = {0x00000000, 0x43300800 }; + const static double *pf5243 = (double *)f5243; + + // fNAN == +NAN (Not A Number) in IEEE format + + const static unsigned int fNAN[2] = { 0x00000000, 0x7ff00000 }; + const static double *pfNAN = (double *)fNAN; + + const static double LargestUnsignedInt = 4294967295.0; + const static double DoubleZero = 0.0; + + // If x is NaN or is negative, set the bit for invalid IEEE exception. + // Because NanMinusNan is volatile the assignment will not removed by + // dead store elimination. The status is set as for a compare-ordered + // of x with zero, and the integer result is undefined (ie. FPCC and + // VXVC are set, and VXSNAN may be). I am not sure what this SHOULD do + // for the ill-defined cases; the above is a guess at what IEEE dictates, + // if anything (HSW 6/87). + + if (( x != x ) || (x < DoubleZero)) { + NanMinusNan = *pfNAN - *pfNAN; + return(0); + } + + // If x is too big, we can't convert it, either + + if (x > LargestUnsignedInt) { + NanMinusNan = *pfNAN - *pfNAN; + return(0xffffffff); + } + else if (x == LargestUnsignedInt) + return(0xffffffff); + + // Add the magic number; see the introductory note. The following + // mysterious comment appears in the original: + // 51 - 31 = 20, thus shift the integer part right by 20 + + xshifted.sum = x + *pf5243; + answer = xshifted.lo; + + // If result was > x, it must have been rounded up. Correct by + // subtracting one. + + if ((xshifted.sum - *pf5243) > x ) return answer - 1; + return answer; + } + diff --git a/private/fp32/tran/sincos.c b/private/fp32/tran/sincos.c new file mode 100644 index 000000000..1935511af --- /dev/null +++ b/private/fp32/tran/sincos.c @@ -0,0 +1,228 @@ +/*** +*sincos.c - sine and cosine +* +* Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* +*Revision History: +* 8-15-91 GDP written +* 9-29-91 GDP added missing ABS() for cosine +* 12-26-91 GDP IEEE exceptions support +* 03-11-91 GDP use 66 significant bits for representing pi +* support FP_TLOSS, use _frnd for rounding +*******************************************************************************/ +#include <math.h> +#include <trans.h> + +static double _sincos(double x, double y, double sin); + +/* constants */ +static double const EPS = 1.05367121277235079465e-8; /* 2^(-53/2) */ +static double const PI = 3.14159265358979323846; +static double const PI2 = 1.57079632679489661923; /* pi/2 */ +static double const PI_INV = 0.31830988618379067154; /* 1/pi */ +static double const YMAX = 2.2e8; /* approx. pi * 2 ^(t/2), where t=53 */ + +// +// The sum of C1 and C2 is a representation of PI with 66 bits in the +// significand (same as x87). (PI = 4 * 0.c90fdaa2 2168c234 c h) +// + +static _dbl _C1 = {SET_DBL (0x400921fb, 0x54400000)}; +static _dbl _C2 = {SET_DBL (0x3de0b461, 0x1a600000)}; +#define C1 (_C1.dbl) +#define C2 (_C2.dbl) + +/* constants for the polynomial approximation of sin, cos */ +static double const r1 = -0.16666666666666665052e+0; +static double const r2 = 0.83333333333331650314e-2; +static double const r3 = -0.19841269841201840457e-3; +static double const r4 = 0.27557319210152756119e-5; +static double const r5 = -0.25052106798274584544e-7; +static double const r6 = 0.16058936490371589114e-9; +static double const r7 = -0.76429178068910467734e-12; +static double const r8 = 0.27204790957888846175e-14; + +#define R(g) ((((((((r8 * (g) + r7) * (g) + r6) * (g) + r5) * (g) + r4) \ + * (g) + r3) * (g) + r2) * (g) + r1) * (g)) + + +/*** +*double sin(double x) - sine +* +*Purpose: +* Compute the sine of a number. +* The algorithm (reduction / polynomial approximation) is +* taken from Cody & Waite. +* +*Entry: +* +*Exit: +* +*Exceptions: +* I, P +* if x is denormal: return x +*******************************************************************************/ +double sin (double x) +{ + unsigned int savedcw; + double result; + double sign,y; + + /* save user fp control word */ + savedcw = _maskfp(); + + if (IS_D_SPECIAL(x)){ + switch(_sptype(x)) { + case T_PINF: + case T_NINF: + return _except1(FP_I,OP_SIN,x,QNAN_SIN1,savedcw); + case T_QNAN: + return _handle_qnan1(OP_SIN, x, savedcw); + default: //T_SNAN + return _except1(FP_I,OP_SIN,x,_s2qnan(x),savedcw); + } + } + + if (x == 0.0) { + // no P exception + RETURN(savedcw,x); + } + + if (x < 0) { + sign = -1; + y = -x; + } + else { + sign = 1; + y = x; + } + if (y >= YMAX) { + + // The argument is too large to produce a meaningful result, + // so this is treated as an invalid operation. + // We also set the (extra) FP_TLOSS flag for matherr + // support + + return _except1(FP_TLOSS | FP_I,OP_SIN,x,QNAN_SIN2,savedcw); + } + + result = _sincos(x,y,sign); + + RETURN_INEXACT1(OP_SIN,x,result,savedcw); +} + + +/*** +*double cos(double x) - cosine +* +*Purpose: +* Compute the cosine of a number. +* The algorithm (reduction / polynomial approximation) is +* taken from Cody & Waite. +* +*Entry: +* +*Exit: +* +*Exceptions: +* P, I +* if x is denormal: return 1 +*******************************************************************************/ + +double cos (double x) +{ + unsigned int savedcw; + double result,y; + + /* save user fp control word */ + savedcw = _maskfp(); + + if (IS_D_SPECIAL(x)){ + switch(_sptype(x)) { + case T_PINF: + case T_NINF: + return _except1(FP_I,OP_COS,x,QNAN_COS1,savedcw); + case T_QNAN: + return _handle_qnan1(OP_COS,x, savedcw); + default: //T_SNAN + return _except1(FP_I,OP_COS,x,_s2qnan(x),savedcw); + } + } + + /* this will handle small arguments */ + if (ABS(x) < EPS) { + if (x == 0.0) { + RETURN(savedcw,1.0); + } + result = 1.0; + } + + else { + y = ABS(x) + PI2; + if (y >= YMAX) { + + // The argument is too large to produce a meaningful result, + // so this is treated as an invalid operation. + // We also set the (extra) FP_TLOSS flag for matherr + // support + + return _except1(FP_TLOSS | FP_I,OP_COS,x,QNAN_COS2,savedcw); + } + + result = _sincos(x,y,1.0); + } + + RETURN_INEXACT1(OP_COS,x,result,savedcw); +} + + + +/*** +*double _sincos(double x, double y,double sign) - cos sin helper +* +*Purpose: +* Help computing sin or cos of a valid, within correct range +* number. +* The algorithm (reduction / polynomial approximation) is +* taken from Cody & Waite. +* +*Entry: +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +static double _sincos(double x, double y, double sign) +{ + unsigned long n; + double xn,f,g,r,result; + + xn = _frnd(y * PI_INV); + n = (int) xn; + + if (n & 0x1) { + /* n is odd */ + sign = -sign; + } + if (ABS(x) != y) { + /* cosine wanted */ + xn -= .5; + } + + /* assume there is a guard digit for addition */ + f = (ABS(x) - xn * C1) - xn * C2; + if (ABS(f) < EPS) + result = f; + else { + g = f*f; + r = R(g); + result = f + f*r; + } + result *= sign; + + return result; +} diff --git a/private/fp32/tran/sincosh.c b/private/fp32/tran/sincosh.c new file mode 100644 index 000000000..7eb07bf2f --- /dev/null +++ b/private/fp32/tran/sincosh.c @@ -0,0 +1,205 @@ +/*** +*sincosh.c - hyperbolic sine and cosine +* +* Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* +*Revision History: +* 8-15-91 GDP written +* 12-20-91 GDP support IEEE exceptions +* 02-03-92 GDP use _exphlp for computing e^x +* 07-16-93 SRW ALPHA Merge +* +*******************************************************************************/ +#include <math.h> +#include <trans.h> + +extern double _exphlp(double, int *); + +static double const EPS = 5.16987882845642297e-26; /* 2^(-53) / 2 */ +/* exp(YBAR) should be close to but less than XMAX + * and 1/exp(YBAR) should not underflow + */ +static double const YBAR = 7.00e2; + +/* WMAX=ln(OVFX)+0.69 (Cody & Waite),ommited LNV, used OVFX instead of BIGX */ + +static double const WMAX = 1.77514678223345998953e+003; + +/* constants for the rational approximation */ +static double const p0 = -0.35181283430177117881e+6; +static double const p1 = -0.11563521196851768270e+5; +static double const p2 = -0.16375798202630751372e+3; +static double const p3 = -0.78966127417357099479e+0; +static double const q0 = -0.21108770058106271242e+7; +static double const q1 = 0.36162723109421836460e+5; +static double const q2 = -0.27773523119650701667e+3; +/* q3 = 1 is not used (avoid myltiplication by 1) */ + +#define P(f) (((p3 * (f) + p2) * (f) + p1) * (f) + p0) +#define Q(f) ((((f) + q2) * (f) + q1) * (f) + q0) + +/*** +*double sinh(double x) - hyperbolic sine +* +*Purpose: +* Compute the hyperbolic sine of a number. +* The algorithm (reduction / rational approximation) is +* taken from Cody & Waite. +* +*Entry: +* +*Exit: +* +*Exceptions: +* I P +* no exception if x is denormal: return x +*******************************************************************************/ + +double sinh(double x) +{ + unsigned int savedcw; + double result; + double y,f,z,r; + int newexp; + int sgn; + + /* save user fp control word */ + savedcw = _maskfp(); + + if (IS_D_SPECIAL(x)){ + switch(_sptype(x)) { + case T_PINF: + case T_NINF: + RETURN(savedcw,x); + case T_QNAN: + return _handle_qnan1(OP_SINH, x, savedcw); + default: //T_SNAN + return _except1(FP_I,OP_SINH,x,_s2qnan(x),savedcw); + } + } + + if (x == 0.0) { + RETURN(savedcw,x); // no precision ecxeption + } + + y = ABS(x); + sgn = x<0 ? -1 : +1; + + if (y > 1.0) { + if (y > YBAR) { + if (y > WMAX) { + // result too large, even after scaling + return _except1(FP_O | FP_P,OP_SINH,x,_copysign(D_INF,x),savedcw); + } + + // + // result = exp(y)/2 + // + + result = _exphlp(y, &newexp); + newexp --; //divide by 2 + if (newexp > MAXEXP) { + result = _set_exp(result, newexp-IEEE_ADJUST); + return _except1(FP_O|FP_P,OP_SINH,x,result,savedcw); + } + else { + result = _set_exp(result, newexp); + } + + } + else { + z = _exphlp(y, &newexp); + z = _set_exp(z, newexp); + result = (z - 1/z) / 2; + } + + if (sgn < 0) { + result = -result; + } + } + else { + if (y < EPS) + result = x; + else { + f = x * x; + r = f * (P(f) / Q(f)); + result = x + x * r; + } + } + + RETURN_INEXACT1(OP_SINH,x,result,savedcw); +} + + + +/*** +*double cosh(double x) - hyperbolic cosine +* +*Purpose: +* Compute the hyperbolic cosine of a number. +* The algorithm (reduction / rational approximation) is +* taken from Cody & Waite. +* +*Entry: +* +*Exit: +* +*Exceptions: +* I P +* no exception if x is denormal: return 1 +*******************************************************************************/ +double cosh(double x) +{ + unsigned int savedcw; + double y,z,result; + int newexp; + + /* save user fp control word */ + savedcw = _maskfp(); + + if (IS_D_SPECIAL(x)){ + switch(_sptype(x)) { + case T_PINF: + case T_NINF: + RETURN(savedcw,D_INF); + case T_QNAN: + return _handle_qnan1(OP_COSH, x, savedcw); + default: //T_SNAN + return _except1(FP_I,OP_COSH,x,_s2qnan(x),savedcw); + } + } + + if (x == 0.0) { + RETURN(savedcw,1.0); + } + + y = ABS(x); + if (y > YBAR) { + if (y > WMAX) { + return _except1(FP_O | FP_P,OP_COSH,x,D_INF,savedcw); + } + + // + // result = exp(y)/2 + // + + result = _exphlp(y, &newexp); + newexp --; //divide by 2 + if (newexp > MAXEXP) { + result = _set_exp(result, newexp-IEEE_ADJUST); + return _except1(FP_O|FP_P,OP_COSH,x,result,savedcw); + } + else { + result = _set_exp(result, newexp); + } + } + else { + z = _exphlp(y, &newexp); + z = _set_exp(z, newexp); + result = (z + 1/z) / 2; + } + + RETURN_INEXACT1(OP_COSH,x,result,savedcw); +} diff --git a/private/fp32/tran/sources b/private/fp32/tran/sources new file mode 100644 index 000000000..d358dce2c --- /dev/null +++ b/private/fp32/tran/sources @@ -0,0 +1,177 @@ +#### +#sources - +# +# Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +# +#Purpose: +# Specify components for 'build' +# +#Revision History: +# 9-26-91 GDP +# 1-13-94 RDL New MIPS TRAN funtions +# 1-17-94 RDL Added single precision MIPS TRAN funtions +# 2-28-94 TVB New Alpha tran functions +# +################################################################################ + +!INCLUDE ..\fp32.def + +MAJORCOMP=fp +MINORCOMP=tran + +TARGETNAME=tran$(TARGETNAMESUFFIX) +TARGETPATH=..\obj +386_STDCALL=0 +# TARGETTYPE is defined in fp32.def + +INCLUDES=..\include;..\inc;..\inc\i386;..\..\crt32\h + +!IF $(ALPHA) +USER_C_FLAGS=-d2"dpml_exception_linkage __dpml_exception" +!ENDIF + +SOURCES= \ + bessel.c \ + fpexcept.c \ + frexp.c \ + ldexp.c \ + matherr.c \ + util.c \ + ieeemisc.c + +i386_SOURCES= \ + ceil.c \ + fabs.c \ + floor.c \ + hypot.c \ + modf.c \ + powhlp.c \ + i386\87cdisp.asm \ + i386\87csqrt.asm \ + i386\87ctrig.asm \ + i386\87ctriga.asm \ + i386\87ctrigh.asm \ + i386\87ctran.asm \ + i386\87fsqrt.asm \ + i386\87ftrig.asm \ + i386\87ftriga.asm \ + i386\87ftrigh.asm \ + i386\87ftran.asm \ + i386\87disp.asm \ + i386\87sqrt.asm \ + i386\87tran.asm \ + i386\87trig.asm \ + i386\87triga.asm \ + i386\87trigh.asm \ + i386\87fmod.asm \ + i386\87except.c \ + i386\ftol.asm \ + i386\fpctrl.c \ + i386\ieee87.c \ + i386\huge.asm \ + i386\frnd.c \ + i386\fsqrt.c \ + i386\filter.c + +MIPS_SOURCES= \ + modf.c \ + mips\asincosm.s \ + mips\atanm.s \ + mips\trigm.s \ + mips\floorm.s \ + mips\cabsm.s \ + mips\hypotm.s \ + mips\coshm.s \ + mips\sinhm.s \ + mips\expm.s \ + mips\exptable.s \ + mips\fabsm.s \ + mips\fmodm.s \ + mips\logm.s \ + mips\powm.s \ + mips\sqrtm.s \ + mips\tanhm.s \ + mips\logtable.s \ + mips\fasincos.s \ + mips\fatan.s \ + mips\fsincos.s \ + mips\ftan.s \ + mips\fsinh.s \ + mips\fcosh.s \ + mips\ftanh.s \ + mips\fabsf.s \ + mips\fexp.s \ + mips\ffloor.s \ + mips\fhypot.s \ + mips\flog.s \ + mips\fpow.c \ + mips\fsqrt.c \ + mips\fmodf.c \ + mips\filter.c \ + mips\getsetrg.c \ + mips\fpctrl.s \ + mips\ieee.c \ + mips\huge.s \ + mips\frnd.s \ + mips\fsqrthlp.s \ + mips\dtoul.s + +ALPHA_SOURCES= \ + sincosh.c \ + tanh.c \ + alpha\asinacos.s \ + alpha\atan2s.s \ + alpha\atans.s \ + alpha\ceils.s \ + alpha\coss.s \ + alpha\dpml_exc.c \ + alpha\exph.c \ + alpha\exps.s \ + alpha\fabss.s \ + alpha\filter.c \ + alpha\floors.s \ + alpha\fmods.s \ + alpha\fpctrl.s \ + alpha\fpint.s \ + alpha\frnd.s \ + alpha\getsetrg.c \ + alpha\huge.s \ + alpha\hypoth.c \ + alpha\hypots.s \ + alpha\ieee.c \ + alpha\log10s.s \ + alpha\logs.s \ + alpha\modfs.s \ + alpha\pows.s \ + alpha\sins.s \ + alpha\sqrts.s \ + alpha\tans.s \ + alpha\trig_rdx.s \ + alpha\trig_tab.s + +PPC_SOURCES= \ + asincos.c \ + atan.c \ + ceil.c \ + exp.c \ + hypot.c \ + fabs.c \ + fmod.c \ + floor.c \ + log.c \ + modf.c \ + pow.c \ + powhlp.c \ + sincosh.c \ + sqrt.c \ + tanh.c \ + ppc\dtoul.c \ + ppc\filter.c \ + ppc\fpctrl.s \ + ppc\frnd.s \ + ppc\fsincos.s \ + ppc\getsetrg.c \ + ppc\huge.s \ + ppc\ieee.c \ + ppc\tanp.s \ + ppc\uitrunc.c diff --git a/private/fp32/tran/sqrt.c b/private/fp32/tran/sqrt.c new file mode 100644 index 000000000..6d9896d6c --- /dev/null +++ b/private/fp32/tran/sqrt.c @@ -0,0 +1,175 @@ +/*** +*sqrt.c - square root +* +* Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* +*Revision History: +* 8-15-91 GDP written +* 1-29-91 GDP Kahan's algorithm for final rounding +* 3-11-92 GDP new interval and initial approximation +* +*******************************************************************************/ +#ifndef R4000 + +#include <math.h> +#include <trans.h> + +// +// Coefficients for initial approximation (Hart & al) +// + +static double p00 = .2592768763e+0; +static double p01 = .1052021187e+1; +static double p02 = -.3163221431e+0; + + +/*** +*double sqrt(double x) - square root +* +*Purpose: +* Compute the square root of a number. +* This function should be provided by the underlying +* hardware (IEEE spec). +*Entry: +* +*Exit: +* +*Exceptions: +* I P +*******************************************************************************/ +double sqrt(double x) +{ + unsigned int savedcw, sw; + double result,t; + unsigned int stat,rc; + + savedcw = _ctrlfp(ICW, IMCW); + + if (IS_D_SPECIAL(x)){ + switch (_sptype(x)) { + case T_PINF: + RETURN(savedcw, x); + case T_QNAN: + return _handle_qnan1(OP_SQRT, x, savedcw); + case T_SNAN: + return _except1(FP_I,OP_SQRT,x,QNAN_SQRT,savedcw); + } + /* -INF will be handled in the x<0 case */ + } + if (x < 0.0) { + return _except1(FP_I, OP_SQRT, x, QNAN_SQRT,savedcw); + } + + if (x == 0.0) { + RETURN (savedcw, x); + } + + + result = _fsqrt(x); + + _ctrlfp(IRC_DOWN, IMCW_RC); + + + // + // Kahan's algorithm + // + + sw = _clrfp(); + t = x / result; + stat = _statfp(); + if (! (stat & ISW_INEXACT)) { + // exact + if (t == result) { + _set_statfp(sw); // restore status word + RETURN(savedcw, result); + } + else { + // t = t-1 + if (*D_LO(t) == 0) { + (*D_HI(t)) --; + } + (*D_LO(t)) --; + } + + } + + rc = savedcw & IMCW_RC; + if (rc == IRC_UP || rc == IRC_NEAR) { + // t = t+1 + (*D_LO(t)) ++; + if (*D_LO(t) == 0) { + (*D_HI(t)) ++; + } + if (rc == IRC_UP) { + // y = y+1 + (*D_LO(t)) ++; + if (*D_LO(t) == 0) { + (*D_HI(t)) ++; + } + } + } + + result = 0.5 * (t + result); + + _set_statfp(sw | ISW_INEXACT); // update status word + RETURN_INEXACT1(OP_SQRT, x, result, savedcw); +} + + + +/*** +* _fsqrt - non IEEE conforming square root +* +*Purpose: +* compute a square root of a normal number without performing +* IEEE rounding. The argument is a finite number (no NaN or INF) +* +*Entry: +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +double _fsqrt(double x) +{ + double f,y,result; + int n; + + f = _decomp(x,&n); + + if (n & 0x1) { + // n is odd + n++; + f = _add_exp(f, -1); + } + + // + // approximation for sqrt in the interval [.25, 1] + // (Computer Approximationsn, Hart & al.) + // gives more than 7 bits of accuracy + // + + y = p00 + f * (p01 + f * p02); + + y += f / y; + y = _add_exp(y, -1); + + y += f / y; + y = _add_exp(y, -1); + + y += f / y; + y = _add_exp(y, -1); + + n >>= 1; + result = _add_exp(y,n); + + return result; +} + + + +#endif diff --git a/private/fp32/tran/tan.c b/private/fp32/tran/tan.c new file mode 100644 index 000000000..2100595a7 --- /dev/null +++ b/private/fp32/tran/tan.c @@ -0,0 +1,125 @@ +/*** +*tan.c - tangent +* +* Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* +*Revision History: +* 8-15-91 GDP written +* 12-30-91 GDP support IEEE exceptions +* 03-11-91 GDP use 66 significant bits for representing pi +* support FP_TLOSS +*******************************************************************************/ +#include <math.h> +#include <trans.h> + +/* constants */ +static double const TWO_OVER_PI = 0.63661977236758134308; +static double const EPS = 1.05367121277235079465e-8; /* 2^(-53/2) */ +static double const YMAX = 2.98156826864790199324e8; /* 2^(53/2)*PI/2 */ + +// +// The sum of C1 and C2 is a representation of PI/2 with 66 bits in the +// significand (same as x87). (PI/2 = 2 * 0.c90fdaa2 2168c234 c h) +// + +static _dbl _C1 = {SET_DBL (0x3ff921fb, 0x54400000)}; +static _dbl _C2 = {SET_DBL (0x3dd0b461, 0x1a600000)}; +#define C1 (_C1.dbl) +#define C2 (_C2.dbl) + +/* constants for the rational approximation */ +/* p0 = 1.0 is not used (avoid mult by 1) */ +static double const p1 = -0.13338350006421960681e+0; +static double const p2 = 0.34248878235890589960e-2; +static double const p3 = -0.17861707342254426711e-4; +static double const q0 = 0.10000000000000000000e+1; +static double const q1 = -0.46671683339755294240e+0; +static double const q2 = 0.25663832289440112864e-1; +static double const q3 = -0.31181531907010027307e-3; +static double const q4 = 0.49819433993786512270e-6; + + +#define Q(g) ((((q4 * (g) + q3) * (g) + q2) * (g) + q1) * (g) + q0) +#define P(g,f) (((p3 * (g) + p2) * (g) + p1) * (g) * (f) + (f)) + +#define ISODD(i) ((i)&0x1) + + +/*** +*double tan(double x) - tangent +* +*Purpose: +* Compute the tangent of a number. +* The algorithm (reduction / rational approximation) is +* taken from Cody & Waite. +* +*Entry: +* +*Exit: +* +*Exceptions: +* P, I +* no exception if x is denormal: return x +*******************************************************************************/ +double tan(double x) +{ + unsigned int savedcw; + unsigned long n; + double xn,xnum,xden; + double f,g,result; + + /* save user fp control word */ + savedcw = _maskfp(); + + if (IS_D_SPECIAL(x)){ + switch(_sptype(x)) { + case T_PINF: + case T_NINF: + return _except1(FP_I,OP_TAN,x,QNAN_TAN1,savedcw); + case T_QNAN: + return _handle_qnan1(OP_TAN, x, savedcw); + default: //T_SNAN + return _except1(FP_I,OP_TAN,x,_s2qnan(x),savedcw); + } + } + + if (x == 0.0) + RETURN(savedcw, x); + + if (ABS(x) > YMAX) { + + // The argument is too large to produce a meaningful result, + // so this is treated as an invalid operation. + // We also set the (extra) FP_TLOSS flag for matherr + // support + + return _except1(FP_TLOSS | FP_I,OP_TAN,x,QNAN_TAN2,savedcw); + } + + xn = _frnd(x * TWO_OVER_PI); + n = (unsigned long) xn; + + + /* assume there is a guard digit for addition */ + f = (x - xn * C1) - xn * C2; + if (ABS(f) < EPS) { + xnum = f; + xden = 1; + } + else { + g = f*f; + xnum = P(g,f); + xden = Q(g); + } + + if (ISODD(n)) { + xnum = -xnum; + result = xden/xnum; + } + else + result = xnum/xden; + + RETURN_INEXACT1(OP_TAN,x,result,savedcw); +} diff --git a/private/fp32/tran/tanh.c b/private/fp32/tran/tanh.c new file mode 100644 index 000000000..6ab2ba5e7 --- /dev/null +++ b/private/fp32/tran/tanh.c @@ -0,0 +1,98 @@ +/*** +*tanh.c - hyperbolic tangent +* +* Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* +*Revision History: +* 8-15-91 GDP written +* 12-22-91 GDP support IEEE exceptions +* +*******************************************************************************/ +#include <math.h> +#include <trans.h> + +/* constants */ +static double const EPS = 5.16987882845642297e-26; /* 2^(-53) / 2 */ +static double const XBIG = 1.90615474653984960096e+001; /* ln(2)(53+2)/2 */ +static double const C0 = 0.54930614433405484570; /* ln(3)/2 */ + +/* constants for rational approximation */ +static double const p0 = -0.16134119023996228053e+4; +static double const p1 = -0.99225929672236083313e+2; +static double const p2 = -0.96437492777225469787e+0; +static double const q0 = 0.48402357071988688686e+4; +static double const q1 = 0.22337720718962312926e+4; +static double const q2 = 0.11274474380534949335e+3; +static double const q3 = 0.10000000000000000000e+1; + + +#define Q(g) ((((g) + q2) * (g) + q1) * (g) + q0) +#define R(g) ((((p2 * (g) + p1) * (g) + p0) * (g)) / Q(g)) + + +/*** +*double tanh(double x) - hyperbolic tangent +* +*Purpose: +* Compute the hyperbolic tangent of a number. +* The algorithm (reduction / rational approximation) is +* taken from Cody & Waite. +* +*Entry: +* +*Exit: +* +*Exceptions: +* I P +* no exception if x is denormal: return x +*******************************************************************************/ +double tanh(double x) +{ + unsigned int savedcw; + double f,g; + double result; + + /* save user fp control word */ + savedcw = _maskfp(); + + if (IS_D_SPECIAL(x)){ + switch(_sptype(x)) { + case T_PINF: + RETURN(savedcw,1.0); + case T_NINF: + RETURN(savedcw,-1.0); + case T_QNAN: + return _handle_qnan1(OP_TANH, x, savedcw); + default: //T_SNAN + return _except1(FP_I,OP_TANH,x,_s2qnan(x),savedcw); + } + } + + if (x == 0.0) { + // no precision exception + RETURN(savedcw,x); + } + + f = ABS(x); + if (f > XBIG) { + result = 1; + } + else if (f > C0) { + result = 0.5 - 1.0 / (exp(f+f) + 1.0); + result = result + result; + } + else if (f < EPS) { + result = f; + } + else { + g = f * f; + result = f + f * R(g); + } + + if (x < 0) + result = -result; + + RETURN_INEXACT1(OP_TANH,x,result,savedcw); +} diff --git a/private/fp32/tran/util.c b/private/fp32/tran/util.c new file mode 100644 index 000000000..ff5f6217a --- /dev/null +++ b/private/fp32/tran/util.c @@ -0,0 +1,130 @@ +/*** +*util.c - utilities for fp transcendentals +* +* Copyright (c) 1991-1991, Microsoft Corporation. All rights reserved. +* +*Purpose: +* _set_exp and _add_exp are as those defined in Cody & Waite +* +*Revision History: +* 08-15-91 GDP written +* 10-20-91 GDP removed _rint, unsafe_intrnd +* 02-05-92 GDP added _fpclass +* 03-27-92 GDP added _d_min +* 06-23-92 GDP added _d_mzero +* +*******************************************************************************/ +#include "trans.h" + +/* define special values */ + +_dbl _d_inf = {SET_DBL (0x7ff00000, 0x0) }; //positive infinity +_dbl _d_ind = {SET_DBL (D_IND_HI, D_IND_LO)}; //real indefinite +_dbl _d_max = {SET_DBL (0x7fefffff, 0xffffffff)}; //max double +_dbl _d_min = {SET_DBL (0x00100000, 0x00000000)}; //min normalized double +_dbl _d_mzero = {SET_DBL (0x80000000, 0x00000000)}; //negative zero + + + +double _set_exp(double x, int exp) +/* does not check validity of exp */ +{ + double retval; + int biased_exp; + retval = x; + biased_exp = exp + D_BIASM1; + *D_EXP(retval) = (unsigned short) (*D_EXP(x) & 0x800f | (biased_exp << 4)); + return retval; +} + + +int _get_exp(double x) +{ + signed short exp; + exp = (signed short)((*D_EXP(x) & 0x7ff0) >> 4); + exp -= D_BIASM1; //unbias + return (int) exp; +} + + +double _add_exp(double x, int exp) +{ + return _set_exp(x, INTEXP(x)+exp); +} + + +double _set_bexp(double x, int bexp) +/* does not check validity of bexp */ +{ + double retval; + retval = x; + *D_EXP(retval) = (unsigned short) (*D_EXP(x) & 0x800f | (bexp << 4)); + return retval; +} + + +int _sptype(double x) +{ + if (IS_D_INF(x)) + return T_PINF; + if (IS_D_MINF(x)) + return T_NINF; + if (IS_D_QNAN(x)) + return T_QNAN; + if (IS_D_SNAN(x)) + return T_SNAN; + return 0; +} + + + +/*** +*double _decomp(double x, double *expptr) +* +*Purpose: +* decompose a number to a normalized mantisa and exponent +* +*Entry: +* +*Exit: +* +*Exceptions: +* +*******************************************************************************/ + +double _decomp(double x, int *pexp) +{ + int exp; + double man; + + if (x == 0) { + man = 0; + exp = 0; + } + else if (IS_D_DENORM(x)) { + int neg; + + exp = 1-D_BIASM1; + neg = x < 0.0; + while((*D_EXP(x) & 0x0010) == 0) { + /* shift mantissa to the left until bit 52 is 1 */ + (*D_HI(x)) <<= 1; + if (*D_LO(x) & 0x80000000) + (*D_HI(x)) |= 0x1; + (*D_LO(x)) <<= 1; + exp--; + } + (*D_EXP(x)) &= 0xffef; /* clear bit 52 */ + if (neg) { + (*D_EXP(x)) |= 0x8000; /* set sign bit */ + } + man = _set_exp(x,0); + } + else { + man = _set_exp(x,0); + exp = INTEXP(x); + } + + *pexp = exp; + return man; +} |