Nice try, but it has eight lookups and a bunch of instructions and even cmov. I don't think it is faster than a rotate of a lookup with 0x88 square differences:Desperado wrote:Hello again,
last time it was just curiosity, thinking about this topic.
Now i want to have a solution for implementation.
So i started today thinking about it again.
So here is a simple try, and i would be curious
if this is competetive with the _x88_Lookup_approach.
of course, as ever there is room for other ideas and improvements.
thoughts:
- assuming sq0!=sq1 we can simply return(tmp)
- but the _switchDirectionPart_ maybe improved
because there will always be only one direction...
- well, at least this is branchless code without any
lookups.So my hope is, it is good enough for practical
use(beside the zillion of other approaches )
Code: Select all
//--------inBetween--------------------- //-------------------------------------- BTB_T inBetween(SQR_T sq0,SQR_T sq1) { const BTB_T m = (BTB_T) -1; BTB_T tmp = 0; //switchDirection tmp = fileMask(sq0) & fileMask(sq1); tmp|= rankMask(sq0) & rankMask(sq1); tmp|= diagMask(sq0) & diagMask(sq1); tmp|= antiMask(sq0) & antiMask(sq1); //innerBits tmp&= (m<<sq0) ^ (m<<sq1); tmp&= tmp - 1; return(sq0==sq1 ? 0 : tmp); }
MichaelCode: Select all
mov QWORD PTR [rsp+8], rbx movzx r11d, cl lea rcx, OFFSET FLAT:__ImageBase movzx ebx, dl mov r10, QWORD PTR aMask[rcx+rbx*8] mov rax, QWORD PTR dMask[rcx+rbx*8] or rdx, -1 and rax, QWORD PTR dMask[rcx+r11*8] and r10, QWORD PTR aMask[rcx+r11*8] mov r8, rdx or r10, rax mov rax, QWORD PTR rMask[rcx+rbx*8] and rax, QWORD PTR rMask[rcx+r11*8] or r10, rax mov rax, QWORD PTR fMask[rcx+rbx*8] and rax, QWORD PTR fMask[rcx+r11*8] movzx ecx, r11b or r10, rax shl r8, cl movzx ecx, bl shl rdx, cl xor eax, eax xor r8, rdx and r10, r8 lea rcx, QWORD PTR [r10-1] and r10, rcx cmp r11b, bl mov rbx, QWORD PTR [rsp+8] cmove r10, rax mov rax, r10 ret 0
PS: whishing everyone a happy new year
Code: Select all
U64 obstructedBy0x88Diff[240]; // 1920 bytes, 2KByte - 128 Byte
int x88diff(int f, int t) {
return t - f + (t|7) - (f|7) + 120;
}
U64 obstructed(int from, int to) {
return rotateLeft(obstructedBy0x88Diff[x88diff(from,to)], from);
}
Code: Select all
; input ecx from, edx to
mov eax, edx ; to
mov r8d, ecx ; from
or eax, 7 ; t|7
or r8d, 7 ; f|7
sub eax, ecx
sub eax, r8d
lea rax, [rax + rdx + 120]
lea rdx, OFFSET FLAT:__ImageBase
mov rax, QWORD PTR obstructedBy0x88Diff[rdx+8*rax]
rol rax, cl
Gerd