/*
   KDSQRTWC.C

   Fixed Point Square Root example for Watcom C/C++ 10.0 or above.
   Made for 32-bit DOS.  The comments are for general non-compiler-specific
   ANSI C, so it's easier to adapt to another compiler if needed.  Only the
   inline assembler "fixedsqrt()" routine below must be changed if the
   compiler is not Watcom.  Sticking with 32-bit is HIGHLY recommended.

   by Chris Hargrove (Kiwidog-Terraformer/Hornet), 4/26/96
   Written for Imphobia 12

   Compile with: WCC386 -bt=dos KDSQRTWC
                 (add other optimization options as you like).
   Link with: WLINK system dos4g file KDSQRTWC
              (you can also use pmodew; both work fine).
*/

#include <math.h>   /* for using the standard sqrt() operation. */
#include <stdio.h>  /* for printf() and scanf() */

short sqrt_table[512];   /* the global n lookup table.  The values should
                            be 16-bit, so if "short" doesn't mean 16 bits
                            on your compiler, change the type accordingly.*/

void make_sqrt_table(void)
{
  float curfloat;
  unsigned long *curfloatint;  /* should be 32 bits. */
  unsigned int count;

  /* curfloat is the current n we want to find the square root of for the
     table.  Since we have to manipulate n manually, we'll use *curfloatint
     to point to curfloat, allowing integer logical operations on curfloat's
     bits.  If you get a "pointer type mismatch" warning because of this,
     don't worry about it... it's deliberate. */

  curfloatint = &curfloat;

  /* Now we fill in the table... */

  for (count=0; count<0x100; count++)
  {

    /* clear the bits, just for safety. */

    *curfloatint = 0;

    /* Use the count to make our 8-bit mantissa, by bit-shifting it into
       the proper place.  Set the exponent to 0, which in floating point is
       saved as 127 (since it's E+127). */

    *curfloatint = (count << 15) | (127 << 23);

    /* We've got our floating point value, so go ahead and get the sqrt().*/

    curfloat = sqrt(curfloat);

    /* Now all we need is the mantissa, so take the most-significant 8 bits
       of it, and put them in our happy little table... */

    sqrt_table[count] = (*curfloatint >> 15) & 0xFF;

    /* Now we do the same thing again, but for when the exponent is 1 (since
       if we don't store when E is 1, we lose that bit when E is divided by
       2 during the main routine).  It's the same deal, except instead of
       E being 0 (127), it's 1 (128), and the value goes in the upper half
       of the table, from indices 256 through 511. */

    *curfloatint = 0;
    *curfloatint = (count << 15) | (128 << 23);
    curfloat = sqrt(curfloat);
    sqrt_table[count+0x100] = (*curfloatint >> 15) & 0xFF;

    /* That's all we need! :) */
  }
}

/* unsigned int fixedsqrt(unsigned int radicand, short *tableaddy);

   This is the exact assembler mentioned in the article; no changes of any
   kind.  It takes in a single 16.16 fixed point radicand (passed in EAX)
   and the address of the table (in EBX), and returns the 16.16 square root
   back in EAX.  As you can see, these are predominantly 32-bit operations,
   so not using 32-bit protected mode will not only make it a pain in the
   neck to port the routine in general.... it will also make it phenomenally
   slower, due to the DB 66h prefix required for 32-bit operations in real
   mode.  Since each DB 66h is a clock cycle hit, you drop the speed of the
   function dramatically.  In other words, if you want performance, use
   protected mode, end of story. :)  */

unsigned int fixedsqrt(unsigned int radicand, short *tableaddy);
#pragma aux fixedsqrt = \
        "bsr ecx, eax" \
        "jz  done" \
        "ror eax, cl" \
        "sub ecx, 16" \
        "shr eax, 24" \
        "mov edx, ecx" \
        "and edx, 1" \
        "shl edx, 8" \
        "or eax, edx" \
        "sar ecx, 1" \
        "mov eax, [ebx+eax*2]" \
        "and eax, 0FFFFh" \
        "or eax, 100h" \
        "add ecx, 8" \
        "js shiftright" \
        "shl eax, cl" \
        "jmp done" \
        "shiftright:" \
        "neg cl" \
        "shr eax, cl" \
        "done:" \
        value [eax] parm [eax] [ebx] modify [eax ebx ecx edx];


void main(void)
{
  float data, result;
  unsigned int dataint, resultint;

  printf("Building table...\n");
  make_sqrt_table();

  printf("Please enter your radicand, between 0.0001 and 65535.0 :\n");
  scanf("%f", &data);
  data *= 0x10000;  /* Adjust to 16.16 fixed point... */
  dataint = data;
  resultint = fixedsqrt(dataint, sqrt_table);
  result = resultint;
  result /= 0x10000;   /* ...and go back to float again for output. */
  printf("The square root is %f\n",result);
}

