Gerd Isenberg
Joined: 08 Mar 2006 Posts: 1785 Location: Hattingen, Germany
|
Post subject: Re: New simple and fast bitboard move generator Posted: Sun May 06, 2007 11:16 am |
|
|
In assembly i would try something like this, to use a shared index array with preinitialized registers {0(implicit), 7, 56, 63} to distinguish between different directions if no inner blocking square is found by bitscan. That takes only max two cachelines (if properly 64-byte cacheline-aligned) for the four 2-byte index lookups:
| Code: |
_rookAttacks PROC
shl ecx, 5 ; sq*64
mov rax, rdx ; occ
mov r8, rdx
mov r9, rdx
mov r10d, 7
mov r11d, 56
and rax, QWORD PTR bits[rcx]
and r8, QWORD PTR bits[rcx + 8]
and r9, QWORD PTR bits[rcx + 16]
and rdx, QWORD PTR bits[rcx + 24]
add ecx, ecx ; *2 assumes short lookop
bsf rax, rax
bsf r10, r8
mov r8d, 63
bsr r11, r9
bsr r8, rdx
movzx eax,WORD PTR index[rcx + 2*rax] ; *2 assumes short lookop
or ax, WORD PTR index[rcx + 2*r10]
or ax, WORD PTR index[rcx + 2*r11]
or ax, WORD PTR index[rcx + 2*r8]
mov rax, QWORD PTR rookLookup[8*rax]
ret 0
_rookAttacks ENDP
|
But if i try this C-Code, the generated assembly unfortunately looks very ugly.
| Code: |
u64 rookAttacks(u64 occ, u32 sq) {
unsigned long ln = 0, le = 7, ls = 56, lw = 63;
_BitScanForward64(&ln, occ & bits[sq][0]);
_BitScanForward64(&le, occ & bits[sq][1]);
_BitScanReverse64(&ls, occ & bits[sq][2]);
_BitScanReverse64(&lw, occ & bits[sq][3]);
u32 idx = index[sq][ln] | index[sq][le] | index[sq][ls] | index[sq][lw];
return rookLookup[idx];
}
|
|
|