Remove MSB

mar · Post by **mar** » Mon Dec 28, 2015 4:04 am

hgm wrote:Conversion to double, and extracting the exponent still seems the way to go to me.

Indeed, that's clever. It still won't work for numbers like ~0ull-1, but such bitboards won't happen.

int Log2(uint64_t l)
{
	// assume little endian
	const int BIG_ENDIAN = 0;
	double d = (double)l;
	unsigned exp = ((const unsigned *)&d)[!BIG_ENDIAN];
	return (int)(exp>>20) - 1023;
}

No masking with 2047 required since it's always >= 0.
Not sure though that this is competitive with Peter's DeBruijn, I haven't compared.

Also not sure if this breaks strict aliasing but I've also found that for example
extracting sign bit from float using it's binary representation is faster than comparison to zero, i.e.

Code: Select all

unsigned float_sign = *(const unsigned *)&f >> 31;

is faster than sign = f<0;
Recently I used this for fast (and accurate) rounding of float to integer:

Code: Select all

int RoundFloatToInt(float f)
{
	static const float ofs[2] = {0.5f, -0.5f};
	return (int)(f + ofs[FloatToBinary(f) >> 31]);
}

Not only is this 5x faster than (int)floor(f+0.5f), it also rounds -0.5f correctly to -1.

stegemma · Post by **stegemma** » Wed Dec 30, 2015 3:07 pm

lucasart wrote:Remove LSB: b &= b-1

Is there an equivalent trick to remove the MSB ?

After a lot of test, this is my "state of the art", related to finding first/last bit and the bit index:

Code: Select all

const int log2ff[] =
{
	0x00,
	0x01,
	0x02, 0x02,
	0x03, 0x03, 0x03, 0x03,
	0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
	0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
	0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
	0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
	0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
	0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08
};

inline int GetSquareCol(const uint64_t &boSquare)
{
	uint64_t bo = (boSquare * BOARD_H_COL) >> 56;
	return log2ff[(int)bo] - 1;
}

inline int GetSquareRow(const uint64_t &boSquare)
{
#if 0
	return (GetSquareCol((((boSquare * BOARD_1_ROW) & BOARD_A_COL) * H1A8) >> 56) - 1) & 0x07;
#else
	int row = 0;
	uint64_t col = boSquare;
	if ((col & 0xffffffffULL) == EMPTY)
	{
		col >>= 32;
		row = 4;
	}
	if ((col & 0xffffULL) == EMPTY)
	{
		col >>= 16;
		row += 2;
	}
	if ((col & 0xffULL) == EMPTY)
	{
		row += 1;
	}
	return row;
#endif
}

inline uint64_t FirstBit(const uint64_t &bo)
{
	// const uint64_t k = bo - 1;		
	// 10011100 bo
	// 10011011 k = bo-1
	// 10011111 | bo
	// 00000100 ^ k
	//return (k | bo) ^ k;

	// 10011100 bo
	// 10011011 k = bo-1
	// 01100100 ~ bo - 1
	// 00000100 & k
	return bo & ~(bo - 1);
}

inline uint64_t PopFirstBit(uint64_t &boBits)
{
	uint64_t boFirst = FirstBit(boBits);
	boBits ^= boFirst;
	return boFirst;
}

inline uint64_t LastBit(const uint64_t &bo)
{
#if USE_INTRINSIC
	unsigned long index;
#ifdef _M_X64
	if (_BitScanReverse64(&index, bo)) return squares[index]; // index ? (1ULL << index) : 1;
#else
	if (_BitScanReverse(&index, bo >> 32)) return squares[index + 32];
	if (_BitScanReverse(&index, (uint32_t)bo)) return squares[index];
#endif
	return 0;
#else
	uint64_t k = bo;
	uint64_t boLast;
	for (boLast = 0ULL; k != 0ULL; boLast = PopFirstBit(k));
	return boLast;
#endif
}

inline uint64_t PopLastBit(uint64_t &boBits)
{
	uint64_t boLast = LastBit(boBits);
	boBits ^= boLast;
	return boLast;
}

#if USE_INTRINSIC
inline int GetSquareIndex(const uint64_t &boSquare)
{
	unsigned long idx;
#ifdef _M_X64
	return _BitScanForward64(&idx, boSquare) == 0 ? 0 : idx;
#else
	return _BitScanForward(&idx, boSquare >> 32) == 0 ? (_BitScanForward(&idx, uint32_t(boSquare)) == 0 ? 0 : idx) : idx + 32;
#endif
}
#else
// 3 fastest
#define QIDX 3
#if QIDX==1
inline int GetSquareIndex(const uint64_t &boSquare)
{
	/**
	* trailingZeroCount
	* @param bb bitboard to scan
	* @return index (0..63) of least significant one bit, 64 if bb is zero
	*/
	static const int lookup67[67 + 1] = {
		64, 0, 1, 39, 2, 15, 40, 23,
		3, 12, 16, 59, 41, 19, 24, 54,
		4, -1, 13, 10, 17, 62, 60, 28,
		42, 30, 20, 51, 25, 44, 55, 47,
		5, 32, -1, 38, 14, 22, 11, 58,
		18, 53, 63, 9, 61, 27, 29, 50,
		43, 46, 31, 37, 21, 57, 52, 8,
		26, 49, 45, 36, 56, 7, 48, 35,
		6, 34, 33, -1 };
	return lookup67[(boSquare & -boSquare) % 67];
}
#elif QIDX==2
#include <intrin.h>
#ifdef __INTRIN_H_x
inline int GetSquareIndex(const uint64_t &boSquare)
{
	//unsigned char _BitScanForward64(unsigned long * Index,  unsigned __int64 Mask);
	//unsigned char _BitScanReverse64(unsigned long * Index, unsigned __int64 Mask); 1
	unsigned long index /* = 0 */;
	/* if(boSquare!=0ull) */ _BitScanForward64(&index, boSquare);
	return index;
}
#endif
#elif QIDX==3
inline int GetSquareIndex(const uint64_t &boSquare)
{
	return GetSquareRow(boSquare) * 8 + GetSquareCol(boSquare);
}
#elif QIDX==4
inline int GetSquareIndex(const uint64_t &boSquare)
{
	const int deb[64] =
	{
		0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28, 62, 5, 39, 46, 44, 42, 22, 9, 24, 35, 59, 56, 49, 18, 29, 11,
		63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10, 51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12,
	};
	return deb[((boSquare & -boSquare) * 0x022fdd63cc95386d) >> 58];
}
#elif QIDX==5
inline int GetSquareIndex(const uint64_t &boSquare)
{
	return boSquare != 0 ? log2((double)boSquare) : 0;
}
#endif
#endif // USE_INTRINSIC

Using intrinsic function is the fast way but, if you disable intrinsic, you can experiment with various solutions and choose the right one for you.

mar · Post by **mar** » Mon Jan 04, 2016 9:18 pm

mar wrote:_BitScanReverse is equivalent to builtin_clz (and so is the x86 bsr instruction)

Oops, Lucas was right with bsr.
bsr or _BitScanReverse indeed return most significat bit index relative to least significant bit.
So in fact bsr = n-1-clz, where n is operand size in bits.

Remove MSB

Re: Remove MSB

Re: Remove MSB

Re: Remove MSB