///////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////// // (c) 1997 Intel Corp. ///////////////////////////////////////// // Modified under licence 1997 EPCL ///////////////////////////// ///////////////////////////////////////////////////////////////// .file "i64support.s" .data .align 16 .bss .align 16 ///////////////////////////////////////////////////////////////// // 64 bit unsigned integer shift: EDX:EAX >> CL .text .align 16 .globl __ushr64 __ushr64: cmpb $63, %cl ja .hugel cmpb $31, %cl ja .mediumu shrdl %edx,%eax // ,%cl shrl %cl,%edx ret .mediumu: andb $31,%cl shrl %cl,%edx movl %edx,%eax xorl %edx,%edx ret .type __ushr64,@function .size __ushr64,.-__ushr64 ///////////////////////////////////////////////////////////////// // 64 bit integer shift: EDX:EAX >> CL .text .align 16 .globl __ashr64 __ashr64: cmpb $63, %cl ja .huge cmpb $31, %cl ja .medium shrdl %edx,%eax // ,%cl sarl %cl,%edx ret .medium: movl %edx,%eax andb $31,%cl sarl %cl,%eax sarl $31,%edx ret .huge: sarl $31,%edx movl %edx,%eax ret .type __ashr64,@function .size __ashr64,.-__ashr64 ///////////////////////////////////////////////////////////////// // 64 bit shift: EDX:EAX << CL .text .align 16 .globl __ashl64 __ashl64: cmpb $63, %cl ja .hugel cmpb $31, %cl ja .mediuml shldl %eax,%edx // ,%cl shll %cl,%eax ret .hugel: xorl %edx,%edx xorl %eax,%eax ret .mediuml: movl %eax,%edx andb $31,%cl xorl %eax,%eax shll %cl,%edx ret .type __ashl64,@function .size __ashl64,.-__ashl64 ///////////////////////////////////////////////////////////////// // 64 bit integer remainder: // Divisor at top of FP stack, dividend at top-1 // Pop operands and push result onto FP stack .text .align 16 .globl __arem64 __arem64: subl $0x14,%esp pushl %ebx pushl %edi fistpll 20(%esp) fistpll 12(%esp) movl 16(%esp),%eax xorl %edi,%edi testl %eax,%eax jge .rem1 movl 12(%esp),%edx incl %edi negl %eax negl %edx .byte 0x83, 0xD8, 0x00 // sbbl $0,%eax movl %eax,16(%esp) movl %edx,12(%esp) .rem1: movl 24(%esp),%eax testl %eax,%eax jge .rem2 movl 20(%esp),%edx negl %eax negl %edx .byte 0x83, 0xD8, 0x00 // sbbl $0,%eax movl %eax,24(%esp) movl %edx,20(%esp) .rem2: testl %eax,%eax jnz .rem3 movl 20(%esp),%ecx movl 16(%esp),%eax xorl %edx,%edx divl %ecx movl 12(%esp),%eax divl %ecx movl %edx,%eax xorl %edx,%edx decl %edi jns .remskip jmp .remxit .rem3: movl %eax,%ebx movl 20(%esp),%ecx movl 16(%esp),%edx movl 12(%esp),%eax .rem4: shrl $1,%ebx rcrl $1,%ecx shrl $1,%edx rcrl $1,%eax testl %ebx,%ebx jnz .rem4 divl %ecx movl %eax,%ecx mull 24(%esp) xchgl %ecx,%eax mull 20(%esp) addl %ecx,%edx jc .rem5 cmpl 16(%esp),%edx ja .rem5 jb .rem6 cmpl 12(%esp),%eax jbe .rem6 .rem5: subl 20(%esp),%eax sbbl 24(%esp),%edx .rem6: subl 12(%esp),%eax sbbl 16(%esp),%edx decl %edi jns .remxit .remskip: negl %edx negl %eax sbbl $0,%edx .remxit: movl %eax,12(%esp) movl %edx,16(%esp) fildll 12(%esp) addl $0x14,%esp popl %edi popl %ebx ret .type __arem64,@function .size __arem64,.-__arem64 ///////////////////////////////////////////////////////////////// // 64 bit integer shift: EDX:EAX >> CL (ECX negative) or // EDX:EAX << CL (ECX positive) .text .align 16 .globl __ash64 __ash64: cmp $0,%ecx jae __ashl64 neg %ecx jmp __ashr64 .type __ash64,@function .size __ash64,.-__ash64 ///////////////////////////////////////////////////////////////// // <=64 bit circular shift // see C version below for specification. .text .align 16 .globl __cshft64 __cshft64: pushl %edi pushl %esi pushl %ebp pushl %ebx movl 28(%esp), %ebx subl $56, %esp testl %ebx, %ebx je .B1.15 jg .B1.4 movl %ebx, %eax negl %eax movl %eax, %esi andl $63, %esi jmp .B1.5 .B1.4: movl %ebx, %esi andl $63, %esi .B1.5: movl 88(%esp), %ebp cmpl $64, %ebp je .B1.11 movl %ebp, %eax movl %eax, %ecx movl $-1, %eax movl $-1, %edx call __ashl64 movl %eax, %ecx movl %edx, %edi notl %edi notl %ecx movl %eax, 24(%esp) movl %edx, 28(%esp) movl %ecx, 16(%esp) movl %edi, 20(%esp) movl %ebx, 40(%esp) movl 76(%esp), %ebx movl %esi, 44(%esp) movl 80(%esp), %esi movl %ebp, 48(%esp) movl %ebx, %ebp movl %ebx, 52(%esp) movl %esi, %ebx andl %ebp, %eax andl %ebx, %edx movl 52(%esp), %ebx movl 48(%esp), %ebp andl %ebx, %ecx andl %esi, %edi movl %eax, 8(%esp) movl %edx, 12(%esp) movl %ecx, (%esp) movl %edi, 4(%esp) movl 44(%esp), %esi cmpl %ebp, %esi movl 40(%esp), %ebx jl .B1.8 movl %esi, %eax cltd idivl %ebp imull %ebp, %eax subl %eax, %esi .B1.8: testl %ebx, %ebx jle .B1.13 movl (%esp), %ebx movl 4(%esp), %edi movl %esi, %eax movl %eax, %ecx movl %ebx, %eax movl %edi, %edx call __ashl64 movl %ebp, %ecx movl %edx, %ebp movl 16(%esp), %edx movl %edi, 40(%esp) movl 20(%esp), %edi movl %ebx, 44(%esp) movl %eax, %ebx andl %edx, %ebx andl %edi, %ebp subl %esi, %ecx movl 40(%esp), %edi movl %ecx, %eax movl %eax, %ecx movl 44(%esp), %eax movl %edi, %edx call __ushr64 orl %ebx, %eax orl %ebp, %edx movl %eax, 32(%esp) movl %edx, 36(%esp) .B1.10: movl 8(%esp), %eax movl 12(%esp), %edx movl 32(%esp), %ecx movl 36(%esp), %ebx orl %ecx, %eax orl %ebx, %edx addl $56, %esp popl %ebx popl %ebp popl %esi popl %edi ret .B1.11: testl %ebx, %ebx jge .B1.14 movl 76(%esp), %ebx movl 80(%esp), %ebp movl %esi, %eax movl %eax, %ecx movl %ebx, %eax movl %ebp, %edx call __ushr64 movl %ebx, %ecx movl %eax, %ebx movl %ebp, %eax movl %edx, %ebp negl %esi movl %eax, %edx addl $64, %esi movl %esi, %eax movl %ecx, %esi movl %eax, %ecx movl %esi, %eax call __ashl64 movl %ebx, %ecx orl %eax, %ecx orl %edx, %ebp movl %ecx, %eax movl %ebp, %edx addl $56, %esp popl %ebx popl %ebp popl %esi popl %edi ret .B1.13: movl (%esp), %ebx movl 4(%esp), %edi movl %esi, %eax movl %eax, %ecx movl %ebx, %eax movl %edi, %edx call __ushr64 movl %ebx, %ecx movl %eax, %ebx movl %ebp, %eax movl %edx, %ebp subl %esi, %eax movl %ecx, %edx movl %eax, %ecx movl %edx, %eax movl %edi, %edx call __ashl64 movl 16(%esp), %ecx movl 20(%esp), %esi andl %ecx, %eax andl %esi, %edx orl %eax, %ebx orl %edx, %ebp movl %ebx, 32(%esp) movl %ebp, 36(%esp) jmp .B1.10 .B1.14: movl 76(%esp), %ebx movl 80(%esp), %ebp movl %esi, %eax movl %eax, %ecx movl %ebx, %eax movl %ebp, %edx call __ashl64 movl %ebx, %ecx movl %eax, %ebx movl %ebp, %eax movl %edx, %ebp negl %esi movl %eax, %edx addl $64, %esi movl %esi, %eax movl %ecx, %esi movl %eax, %ecx movl %esi, %eax call __ushr64 movl %ebx, %ecx orl %eax, %ecx orl %edx, %ebp movl %ecx, %eax movl %ebp, %edx addl $56, %esp popl %ebx popl %ebp popl %esi popl %edi ret .B1.15: movl 76(%esp), %eax movl 80(%esp), %edx addl $56, %esp popl %ebx popl %ebp popl %esi popl %edi ret .type __cshft64,@function .size __cshft64,.-__cshft64 // // typedef unsigned long long int ULLI; // // ULLI __cshft64(ULLI opnd, int shift, int bits) // { // /* Perform cyclic shift of rightmost 'bits' bits of 'opnd', shifting */ // /* 'shift' bits to the left (if 'shift' positive) or the right. */ // /* Leave the other bits of 'opnd' untouched and return the composite */ // /* result. */ // // int actualShift; // // if (shift == 0) // return opnd; // // if (shift > 0) // actualShift = shift & 63; // else // actualShift = (-shift) & 63; // // if (bits == 64) { // if (shift < 0) // return (opnd >> actualShift) | (opnd << (64-actualShift)); // else // return (opnd << actualShift) | (opnd >> (64-actualShift)); // // } else { // ULLI m1 = (-1LL) << bits; /* mask for untouched bits */ // ULLI m2 = ~m1; /* mask for shifted bits */ // ULLI untouched = opnd & m1; // ULLI field = opnd & m2; // ULLI shifted; // // if (actualShift >= bits) // actualShift -= (actualShift / bits) * bits; // // if (shift > 0) { // shifted = field << actualShift; // shifted = (shifted & m2) | (field >> (bits - actualShift)); // } else { // shifted = field >> actualShift; // shifted = shifted | ((field << (bits - actualShift)) & m2); // } // return untouched | shifted; // } // } // // #include // // typedef struct values { // ULLI pattern; // int bits; // int shift; // } Values; // // Values table[] = { // {0x8181818181814003ULL, 16, 2}, // {0x0123456789abcdefULL, 4, 0}, // {0x0123456789abcdefULL, 60, -4}, // {0x0123456789abcdefULL, 64, -4}, // {0x0123456789abcdefULL, 64, 8}, // {0x0123456789abcdefULL, 12, 20}, // {0x0123456789abcdefULL, 12, -20}, // {0x0123456789abcdefULL, 63, 1}, // {0x0123456789abcdefULL, 63, -1}, // }; // // int main() // { // ULLI t1; // ULLI r1; // unsigned *p; // unsigned *q = (unsigned *)&r1; // int i; // // for (i = 0; i < (sizeof(table)/sizeof(Values)); i += 1) { // r1 = __cshft64(table[i].pattern, table[i].shift, table[i].bits); // p = (unsigned *)&table[i].pattern; // printf("rightmost %2d bits of %08X%08X shifted %3d -> %08X%08X\n", // table[i].bits, *(p+1), *p, table[i].shift, *(q+1), *q); // // } // return 0; // } // // // rightmost 16 bits of 8181818181814003 shifted 2 -> 818181818181000D // rightmost 4 bits of 0123456789ABCDEF shifted 0 -> 0123456789ABCDEF // rightmost 60 bits of 0123456789ABCDEF shifted -4 -> 0F123456789ABCDE // rightmost 64 bits of 0123456789ABCDEF shifted -4 -> F0123456789ABCDE // rightmost 64 bits of 0123456789ABCDEF shifted 8 -> 23456789ABCDEF01 // rightmost 12 bits of 0123456789ABCDEF shifted 20 -> 0123456789ABCFDE // rightmost 12 bits of 0123456789ABCDEF shifted -20 -> 0123456789ABCEFD // rightmost 63 bits of 0123456789ABCDEF shifted 1 -> 02468ACF13579BDE // rightmost 63 bits of 0123456789ABCDEF shifted -1 -> 4091A2B3C4D5E6F7 // ///////////////////////////////////////////////////////////////// // the end ////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////