After a lot of test, this is my "state of the art", related to finding first/last bit and the bit index:
Code: Select all
const int log2ff[] =
{
0x00,
0x01,
0x02, 0x02,
0x03, 0x03, 0x03, 0x03,
0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08
};
inline int GetSquareCol(const uint64_t &boSquare)
{
uint64_t bo = (boSquare * BOARD_H_COL) >> 56;
return log2ff[(int)bo] - 1;
}
inline int GetSquareRow(const uint64_t &boSquare)
{
#if 0
return (GetSquareCol((((boSquare * BOARD_1_ROW) & BOARD_A_COL) * H1A8) >> 56) - 1) & 0x07;
#else
int row = 0;
uint64_t col = boSquare;
if ((col & 0xffffffffULL) == EMPTY)
{
col >>= 32;
row = 4;
}
if ((col & 0xffffULL) == EMPTY)
{
col >>= 16;
row += 2;
}
if ((col & 0xffULL) == EMPTY)
{
row += 1;
}
return row;
#endif
}
inline uint64_t FirstBit(const uint64_t &bo)
{
// const uint64_t k = bo - 1;
// 10011100 bo
// 10011011 k = bo-1
// 10011111 | bo
// 00000100 ^ k
//return (k | bo) ^ k;
// 10011100 bo
// 10011011 k = bo-1
// 01100100 ~ bo - 1
// 00000100 & k
return bo & ~(bo - 1);
}
inline uint64_t PopFirstBit(uint64_t &boBits)
{
uint64_t boFirst = FirstBit(boBits);
boBits ^= boFirst;
return boFirst;
}
inline uint64_t LastBit(const uint64_t &bo)
{
#if USE_INTRINSIC
unsigned long index;
#ifdef _M_X64
if (_BitScanReverse64(&index, bo)) return squares[index]; // index ? (1ULL << index) : 1;
#else
if (_BitScanReverse(&index, bo >> 32)) return squares[index + 32];
if (_BitScanReverse(&index, (uint32_t)bo)) return squares[index];
#endif
return 0;
#else
uint64_t k = bo;
uint64_t boLast;
for (boLast = 0ULL; k != 0ULL; boLast = PopFirstBit(k));
return boLast;
#endif
}
inline uint64_t PopLastBit(uint64_t &boBits)
{
uint64_t boLast = LastBit(boBits);
boBits ^= boLast;
return boLast;
}
#if USE_INTRINSIC
inline int GetSquareIndex(const uint64_t &boSquare)
{
unsigned long idx;
#ifdef _M_X64
return _BitScanForward64(&idx, boSquare) == 0 ? 0 : idx;
#else
return _BitScanForward(&idx, boSquare >> 32) == 0 ? (_BitScanForward(&idx, uint32_t(boSquare)) == 0 ? 0 : idx) : idx + 32;
#endif
}
#else
// 3 fastest
#define QIDX 3
#if QIDX==1
inline int GetSquareIndex(const uint64_t &boSquare)
{
/**
* trailingZeroCount
* @param bb bitboard to scan
* @return index (0..63) of least significant one bit, 64 if bb is zero
*/
static const int lookup67[67 + 1] = {
64, 0, 1, 39, 2, 15, 40, 23,
3, 12, 16, 59, 41, 19, 24, 54,
4, -1, 13, 10, 17, 62, 60, 28,
42, 30, 20, 51, 25, 44, 55, 47,
5, 32, -1, 38, 14, 22, 11, 58,
18, 53, 63, 9, 61, 27, 29, 50,
43, 46, 31, 37, 21, 57, 52, 8,
26, 49, 45, 36, 56, 7, 48, 35,
6, 34, 33, -1 };
return lookup67[(boSquare & -boSquare) % 67];
}
#elif QIDX==2
#include <intrin.h>
#ifdef __INTRIN_H_x
inline int GetSquareIndex(const uint64_t &boSquare)
{
//unsigned char _BitScanForward64(unsigned long * Index, unsigned __int64 Mask);
//unsigned char _BitScanReverse64(unsigned long * Index, unsigned __int64 Mask); 1
unsigned long index /* = 0 */;
/* if(boSquare!=0ull) */ _BitScanForward64(&index, boSquare);
return index;
}
#endif
#elif QIDX==3
inline int GetSquareIndex(const uint64_t &boSquare)
{
return GetSquareRow(boSquare) * 8 + GetSquareCol(boSquare);
}
#elif QIDX==4
inline int GetSquareIndex(const uint64_t &boSquare)
{
const int deb[64] =
{
0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28, 62, 5, 39, 46, 44, 42, 22, 9, 24, 35, 59, 56, 49, 18, 29, 11,
63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10, 51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12,
};
return deb[((boSquare & -boSquare) * 0x022fdd63cc95386d) >> 58];
}
#elif QIDX==5
inline int GetSquareIndex(const uint64_t &boSquare)
{
return boSquare != 0 ? log2((double)boSquare) : 0;
}
#endif
#endif // USE_INTRINSIC
Using intrinsic function is the fast way but, if you disable intrinsic, you can experiment with various solutions and choose the right one for you.