@Martin - If you haven't given up yet on SISSY then I have the following code I'd like you to try. If you have given up then what is your conclusion? I'm kinda hanging out in the wind here.
Because, I'm going with an assembler function I decided upon a ray array like so.
u64* ray = new u64[131072];
The only difference in the init is the addressing into the array.
*(ray + (l << 14) + (sq << 8) + i) = bb;
Of course you can change that to something else if you wish. And I'm not 100% sure my math is correct.
Here is the initialization.
Code: Select all
void InitializeRay() {
u08 sq, sqr, k, l;
s08 x, dx, y, dy;
s32 i;
u64 b, bb;
for (sq = 0; sq < 64; sq++) {
y = sq >> 3;
x = sq & 7;
bob[sq] = 0;
rob[sq] = 0;
for (i = 0; i < 256; i++) {
for (k = 0, l = 0; k <= 56; k += 8, l++) {
bb = 0;
b = (u64)i << k;
for (dx = +1, dy = +1; x + dx < +8 && y + dy < +8; dx++, dy++) {
sqr = (((y + dy) << 3) + x + dx);
bb |= ONE << sqr;
bob[sq] |= ONE << sqr;
if ((ONE << sqr) & b) break;
}
for (dx = -1, dy = +1; x + dx > -1 && y + dy < +8; dx--, dy++) {
sqr = (((y + dy) << 3) + x + dx);
bb |= ONE << sqr;
bob[sq] |= ONE << sqr;
if ((ONE << sqr) & b) break;
}
for (dx = +1, dy = -1; x + dx < +8 && y + dy > -1; dx++, dy--) {
sqr = (((y + dy) << 3) + x + dx);
bb |= ONE << sqr;
bob[sq] |= ONE << sqr;
if ((ONE << sqr) & b) break;
}
for (dx = -1, dy = -1; x + dx > -1 && y + dy > -1; dx--, dy--) {
sqr = (((y + dy) << 3) + x + dx);
bb |= ONE << sqr;
bob[sq] |= ONE << sqr;
if ((ONE << sqr) & b) break;
}
for (dx = -1; x + dx > -1; dx--) {
sqr = (y << 3) + x + dx;
bb |= ONE << sqr;
rob[sq] |= ONE << sqr;
if ((ONE << sqr) & b) break;
}
for (dx = +1; x + dx < +8; dx++) {
sqr = (y << 3) + x + dx;
bb |= ONE << sqr;
rob[sq] |= ONE << sqr;
if ((ONE << sqr) & b) break;
}
for (dy = +1; y + dy < +8; dy++) {
sqr = ((y + dy) << 3) + x;
bb |= ONE << sqr;
rob[sq] |= ONE << sqr;
if ((ONE << sqr) & b) break;
}
for (dy = -1; y + dy > -1; dy--) {
sqr = ((y + dy) << 3) + x;
bb |= ONE << sqr;
rob[sq] |= ONE << sqr;
if ((ONE << sqr) & b) break;
}
*(ray + (l << 14) + (sq << 8) + i) = bb;
I use different amounts in the assembler code because C works with 8 byte math whereas assembler works with byte math. I'm only saying these obvious things because I'm not 100% sure about them. Please tell me if my thinking is off.
Here is the code for the assembler version.
Code: Select all
_DATA SEGMENT
occs STRUCT
rank1 BYTE ?
rank2 BYTE ?
rank3 BYTE ?
rank4 BYTE ?
rank5 BYTE ?
rank6 BYTE ?
rank7 BYTE ?
rank8 BYTE ?
occs ENDS
occu UNION
occs<>
b64 QWORD ?
occu ENDS
oc occu <>
_DATA ENDS
_TEXT SEGMENT
RayAttacks PROC
; rcx = sq
; rdx = address of rss
; r8 = occ
shl rcx, 11
mov oc.b64, r8
add rdx, rcx
movzx r8, oc.rank1
movzx r9, oc.rank2
mov rax, [rdx + r8 * 8]
mov rcx, [rdx + r9 * 8 + 131072]
movzx r8, oc.rank3
movzx r9, oc.rank4
and rax, [rdx + r8 * 8 + (2 * 131072)]
and rcx, [rdx + r9 * 8 + (3 * 131072)]
movzx r8, oc.rank5
movzx r9, oc.rank6
and rax, [rdx + r8 * 8 + (4 * 131072)]
and rcx, [rdx + r9 * 8 + (5 * 131072)]
movzx r8, oc.rank7
movzx r9, oc.rank8
and rax, [rdx + r8 * 8 + (6 * 131072)]
and rcx, [rdx + r9 * 8 + (7 * 131072)]
and rax, rcx
ret
RayAttacks ENDP
_TEXT ENDS
END