[Tinyos-help] long int square root in TinyOs

Giorgos Mazarakis gemazar at mail.ntua.gr
Fri Mar 2 06:18:26 PST 2007


Dear all,

 

I am trying to implement a fast 32 bit integer square root in TinyOs 1.x on
Mica2 motes.

The fastest code in C I found so far was the fred_sqrt which code I copy in
the end of this message should anyone needs it. The version found on the net
doesn't use casting but it won't work on TinyOs if casting is not used (for
me at least).

 

Compiling the code and simulating it with AVRStudio I found that an
ATMega128  needs about 2000 cycles to execute. 

I found an other 32bit integer square root written in assembly which only
needs about 1100 cycles.

My problem is that I can't figure out how to include this piece of assembly
code in my TinyOs program. I think that WinAVR uses a different syntax for
assembly code than the code written with AVRStudio and I don't know how to
translate it. Can anyone help because I'm lost in translation? 

The assembly code is: 

 

unsigned int lsqrt(unsigned long x)

{

#asm

    push r20

    push r21

    ld   r26,y+

    ld   r27,y+

    ld   r24,y+

    ld   r25,y+

    clr  r0

    clr  r1

    clr  r20

    clr  r21

    clr  r22

    ldi  r23,0x80

    clr  r30

    clr  r31

    clt

__lsqrt0:

    movw r20,r22

    lsr  r21

    ror  r20

    ror  r1

    ror  r0

    or   r20,r30

    or   r21,r31

    brts __lsqrt1

    cp   r26,r0

    cpc  r27,r1

    cpc  r24,r20

    cpc  r25,r21

    brcs __lsqrt2

__lsqrt1:

    sub  r26,r0

    sbc  r27,r1

    sbc  r24,r20

    sbc  r25,r21

    or   r30,r22

    or   r31,r23

__lsqrt2:

    bst  r25,7

    lsl  r26

    rol  r27

    rol  r24

    rol  r25

    lsr  r23

    ror  r22

    brcc __lsqrt0

    pop  r21

    pop  r20

    ret

#endasm

}

 

 

 

The c code for 32 bit integer sqrt is:

 

 

/*          32 bit integer Square root
----------------------------------------------------------------------------
--------

/* 

** Restriction: x <= 0xffffffff 

*/

uint32_t fred_sqrt(uint32_t x) 

{ 

            uint8_t sqq_table[] = { 

        0,   16,  22,  27,  32,  35,  39,  42,  45,  48,  50,  53,  55,  57,


        59,  61,  64,  65,  67,  69,  71,  73,  75,  76,  78,  80,  81,  83,


        84,  86,  87,  89,  90,  91,  93,  94,  96,  97,  98,  99,  101,
102, 

        103, 104, 106, 107, 108, 109, 110, 112, 113, 114, 115, 116, 117,
118, 

        119, 120, 121, 122, 123, 124, 125, 126, 128, 128, 129, 130, 131,
132, 

        133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 144,
145, 

        146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155, 156,
157, 

        158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167,
168, 

        169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178,
178, 

        179, 180, 181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187,
188, 

        189, 189, 190, 191, 192, 192, 193, 193, 194, 195, 195, 196, 197,
197, 

        198, 199, 199, 200, 201, 201, 202, 203, 203, 204, 204, 205, 206,
206, 

        207, 208, 208, 209, 209, 210, 211, 211, 212, 212, 213, 214, 214,
215, 

        215, 216, 217, 217, 218, 218, 219, 219, 220, 221, 221, 222, 222,
223, 

        224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230, 230,
231, 

        231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238,
238, 

        239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245,
246, 

        246, 247, 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252,
253, 

        253, 254, 254, 255 

    }; 

 

    uint32_t   xn; 

 

    if (x >= 0x10000) 

        if (x >= 0x1000000) 

            if (x >= 0x10000000) 

                if (x >= 0x40000000) { 

                    if (x >= 65535UL * 65535UL) 

                        return 65535; 

                    xn = (uint32_t) sqq_table[x >> 24] << 8; 

                } else 

                    xn = (uint32_t) sqq_table[x >> 22] << 7; 

            else if (x >= 0x4000000) 

                xn = (uint32_t) sqq_table[x >> 20] << 6; 

            else 

                xn = (uint32_t) sqq_table[x >> 18] << 5; 

        else { 

            if (x >= 0x100000) 

                if (x >= 0x400000) 

                    xn = (uint32_t) sqq_table[x >> 16] << 4; 

                else 

                    xn = (uint32_t) sqq_table[x >> 14] << 3; 

            else if (x >= 0x40000) 

                xn = (uint32_t) sqq_table[x >> 12] << 2; 

            else 

                xn = (uint32_t) sqq_table[x >> 10] << 1; 

  

            goto nr1; 

        } 

    else if (x >= 0x100) { 

        if (x >= 0x1000) 

            if (x >= 0x4000) 

                xn = (uint32_t) (sqq_table[x >> 8] >> 0) + 1; 

            else 

                xn = (uint32_t) (sqq_table[x >> 6] >> 1) + 1; 

        else if (x >= 0x400) 

            xn = (uint32_t) (sqq_table[x >> 4] >> 2) + 1; 

        else 

            xn = (uint32_t) (sqq_table[x >> 2] >> 3) + 1; 

   

        goto adj; 

    } else 

        return sqq_table[x] >> 4; 

 

 

    xn = (xn + 1 + x / xn) / 2; 

nr1: 

    xn = (xn + 1 + x / xn) / 2; 

adj: 

    if (xn * xn > x) 

        xn--; 

 

    return xn; 

}

 

 

Thanks

Giorgos M

-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://mail.millennium.berkeley.edu/pipermail/tinyos-help/attachments/20070302/774e5ffd/attachment.htm


More information about the Tinyos-help mailing list