Here's my code for first/last bit. Changing USE_INTRINSIC and QIDX you can test various version of the same functions. The one choose are those faster, on my environment. Some function of QIDX are just copy from the web but I don't use it, they seems to be slower than other solutions:
Code: Select all
inline int GetSquareCol(const uint64_t &boSquare)
{
uint64_t bo = (boSquare * BOARD_H_COL) >> 56;
return log2ff[(int)bo] - 1;
}
inline int GetSquareRow(const uint64_t &boSquare)
{
#if 0
return (GetSquareCol((((boSquare * BOARD_1_ROW) & BOARD_A_COL) * H1A8) >> 56) - 1) & 0x07;
#else
int row = 0;
uint64_t col = boSquare;
if ((col & 0xffffffffULL) == EMPTY)
{
col >>= 32;
row = 4;
}
if ((col & 0xffffULL) == EMPTY)
{
col >>= 16;
row += 2;
}
if ((col & 0xffULL) == EMPTY)
{
row += 1;
}
return row;
#endif
}
inline uint64_t FirstBit(const uint64_t &bo)
{
// const uint64_t k = bo - 1;
// 10011100 bo
// 10011011 k = bo-1
// 10011111 | bo
// 00000100 ^ k
//return (k | bo) ^ k;
// 10011100 bo
// 10011011 k = bo-1
// 01100100 ~ bo - 1
// 00000100 & k
return bo & ~(bo - 1);
}
inline uint64_t PopFirstBit(uint64_t &boBits)
{
uint64_t boFirst = FirstBit(boBits);
boBits ^= boFirst;
return boFirst;
}
inline uint64_t LastBit(const uint64_t &bo)
{
#if USE_INTRINSIC
unsigned long index;
#ifdef _M_X64
if (_BitScanReverse64(&index, bo)) return squares[index]; // index ? (1ULL << index) : 1;
#else
if (_BitScanReverse(&index, bo >> 32)) return squares[index + 32];
if (_BitScanReverse(&index, (uint32_t)bo)) return squares[index];
#endif
return 0;
#else
uint64_t k = bo;
uint64_t boLast;
for (boLast = 0ULL; k != 0ULL; boLast = PopFirstBit(k));
return boLast;
#endif
}
inline uint64_t PopLastBit(uint64_t &boBits)
{
uint64_t boLast = LastBit(boBits);
boBits ^= boLast;
return boLast;
}
#if USE_INTRINSIC
inline int GetSquareIndex(const uint64_t &boSquare)
{
unsigned long idx;
#ifdef _M_X64
return _BitScanForward64(&idx, boSquare) == 0 ? 0 : idx;
#else
return _BitScanForward(&idx, boSquare >> 32) == 0 ? (_BitScanForward(&idx, uint32_t(boSquare)) == 0 ? 0 : idx) : idx + 32;
#endif
}
#else
#define QIDX 3
#if QIDX==1
inline int GetSquareIndex(const uint64_t &boSquare)
{
/**
* trailingZeroCount
* @param bb bitboard to scan
* @return index (0..63) of least significant one bit, 64 if bb is zero
*/
static const int lookup67[67 + 1] = {
64, 0, 1, 39, 2, 15, 40, 23,
3, 12, 16, 59, 41, 19, 24, 54,
4, -1, 13, 10, 17, 62, 60, 28,
42, 30, 20, 51, 25, 44, 55, 47,
5, 32, -1, 38, 14, 22, 11, 58,
18, 53, 63, 9, 61, 27, 29, 50,
43, 46, 31, 37, 21, 57, 52, 8,
26, 49, 45, 36, 56, 7, 48, 35,
6, 34, 33, -1 };
return lookup67[(boSquare & -boSquare) % 67];
}
#elif QIDX==2
#include <intrin.h>
#ifdef __INTRIN_H_x
inline int GetSquareIndex(const uint64_t &boSquare)
{
//unsigned char _BitScanForward64(unsigned long * Index, unsigned __int64 Mask);
//unsigned char _BitScanReverse64(unsigned long * Index, unsigned __int64 Mask); 1
unsigned long index /* = 0 */;
/* if(boSquare!=0ull) */ _BitScanForward64(&index, boSquare);
return index;
}
#endif
#elif QIDX==3
inline int GetSquareIndex(const uint64_t &boSquare)
{
return GetSquareRow(boSquare) * 8 + GetSquareCol(boSquare);
}
#elif QIDX==4
inline int GetSquareIndex(const uint64_t &boSquare)
{
const int deb[64] =
{
0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28, 62, 5, 39, 46, 44, 42, 22, 9, 24, 35, 59, 56, 49, 18, 29, 11,
63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10, 51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12,
};
return deb[((boSquare & -boSquare) * 0x022fdd63cc95386d) >> 58];
}
#endif
#endif // USE_INTRINSIC
PS: GetSquareRow is not optimal, it can be faster but I don't use it with intrinsic.