Main idea is to mask ray with occupancy and use bitscan to find lsbit/msbit, then use this to build cut mask to remove blocked parts.
Two variants (one with 4 bitscans per slider, another using lsbit isolation trick to build the cut mask).
It seems to work, but I haven't done any rigorous tests. Most likely slower than fancy magics, but it only needs a 4k table
and is super-easy to setup.
Full pseudo-C++ code follows, ulong = uint64_t, ul suffix ditto. bsfl/bsfr are 64-bit bitscans corresponding to x86/x64 instructions.
The code assumes that A1 maps to bit 0 and H8 to 63, but it should be easy adapt to other bitboard layouts (adjust shifts, reorder masks that use bsf vs bsr).
Code: Select all
//-----------------------------------------------------------------------------
ulong line_attacks(ulong occ, const ulong mask[4])
{
ulong res;
/* // variant 1 (with bsf)
auto bit0 = bsfl(occ & mask[0] | (1ul << 63));
res = mask[0] & ((2ul << bit0)-1);
auto bit1 = bsfl(occ & mask[1] | (1ul << 63));
res |= mask[1] & ((2ul << bit1)-1);
*/
// variant 2 (without bsf)
// isolate lsbit to build cut mask
auto lsbit0 = occ & mask[0];
lsbit0 &= -lsbit0;
res = mask[0] & (lsbit0+lsbit0-1);
auto lsbit1 = occ & mask[1];
lsbit1 &= -lsbit1;
res |= mask[1] & (lsbit1+lsbit1-1);
auto bit2 = bsrl(occ & mask[2] | 1);
res |= mask[2] & ~((1ul << bit2)-1);
auto bit3 = bsrl(occ & mask[3] | 1);
res |= mask[3] & ~((1ul << bit3)-1);
return res;
}
//-----------------------------------------------------------------------------
inline ulong shift_up(ulong bb)
{
return bb << 8;
}
//-----------------------------------------------------------------------------
inline ulong shift_down(ulong bb)
{
return bb >> 8;
}
//-----------------------------------------------------------------------------
inline ulong shift_left(ulong bb)
{
bb >>= 1;
bb &= 0x7f7f7f7f7f7f7f7ful;
return bb;
}
//-----------------------------------------------------------------------------
inline ulong shift_right(ulong bb)
{
bb <<= 1;
bb &= 0xfefefefefefefefeul;
return bb;
}
// 4kb table, cache aligned
struct alignas(64) at_table_t
{
// diagonal masks
ulong dmask[4];
// orthogonal masks
ulong omask[4];
}
// attack table
at_table_t at_table[64];
//-----------------------------------------------------------------------------
void compute_pseudo_diag(int sq)
{
ulong bb = 1ul << sq;
ulong bb0 = bb;
ulong bb1 = bb;
ulong bb2 = bb;
ulong bb3 = bb;
for (int i=0; i<7; i++)
{
bb0 |= shift_left(shift_up(bb0));
bb1 |= shift_right(shift_up(bb1));
bb2 |= shift_left(shift_down(bb2));
bb3 |= shift_right(shift_down(bb3));
}
// bsf
at_table[sq].dmask[0] = bb0 & ~bb;
// bsf
at_table[sq].dmask[1] = bb1 & ~bb;
// bsr
at_table[sq].dmask[2] = bb2 & ~bb;
// bsr
at_table[sq].dmask[3] = bb3 & ~bb;
}
//-----------------------------------------------------------------------------
void compute_pseudo_ortho(int sq)
{
ulong bb = 1ul << sq;
ulong bb0 = bb;
ulong bb1 = bb;
ulong bb2 = bb;
ulong bb3 = bb;
for (int i=0; i<7; i++)
{
bb0 |= shift_right(bb0);
bb1 |= shift_up(bb1);
bb2 |= shift_left(bb2);
bb3 |= shift_down(bb3);
}
// bsf
at_table[sq].omask[0] = bb0 & ~bb;
// bsf
at_table[sq].omask[1] = bb1 & ~bb;
// bsr
at_table[sq].omask[2] = bb2 & ~bb;
// bsr
at_table[sq].omask[3] = bb3 & ~bb;
}
//-----------------------------------------------------------------------------
void compute_tables()
{
// compute pseudo-attack masks first
for (int sq=0; sq<64; sq++)
{
compute_pseudo_diag(sq);
compute_pseudo_ortho(sq);
}
}
//-----------------------------------------------------------------------------
inline ulong diag_attacks(int sq, ulong occ)
{
return line_attacks(occ, at_table[sq].dmask);
}
//-----------------------------------------------------------------------------
inline ulong ortho_attacks(int sq, ulong occ)
{
return line_attacks(occ, at_table[sq].omask);
}