Fastest bitscan?

Discussion of chess software programming and technical issues.

Moderator: Ras

bhlangonijr
Posts: 482
Joined: Thu Oct 16, 2008 4:23 am
Location: Milky Way

Fastest bitscan?

Post by bhlangonijr »

Hi.

What is the best (fast) method to find the index of LS1B in a bitboard? Considering a multi-platform program (Ie, not use compiler intrinsics/inline assembly).

I found several implementations (google it, chess programming wiki, etc) but not sure which one is fastest.
bob
Posts: 20943
Joined: Mon Feb 27, 2006 7:30 pm
Location: Birmingham, AL

Re: Fastest bitscan?

Post by bob »

bhlangonijr wrote:Hi.

What is the best (fast) method to find the index of LS1B in a bitboard? Considering a multi-platform program (Ie, not use compiler intrinsics/inline assembly).

I found several implementations (google it, chess programming wiki, etc) but not sure which one is fastest.
Hardware BSF/BSR instructions are the best by far. Assuming X86. Other processors (most) have an equivalent. And the new Nehalem and beyond with SSE 4.2 even has a hardware popcnt instruction to count the number of 1 bits.
User avatar
Greg Strong
Posts: 388
Joined: Sun Dec 21, 2008 6:57 pm
Location: Washington, DC

Re: Fastest bitscan?

Post by Greg Strong »

I know you said no compiler intrinsics or inline assembly, so hardware bitscan would be out, but with a preprocessor #ifdef you can easily go both ways - use it if it's there without sacraficing portability. I think the following is about as fast as it gets in both scenarios:

Code: Select all

static const uint64 debruijn64 = 0x07EDD5E59A4E28C2ULL;

static const int32 index64[64] = {
    63,  0, 58,  1, 59, 47, 53,  2,
    60, 39, 48, 27, 54, 33, 42,  3,
    61, 51, 37, 40, 49, 18, 28, 20,
    55, 30, 34, 11, 43, 14, 22,  4,
    62, 57, 46, 52, 38, 26, 32, 41,
    50, 36, 17, 19, 29, 10, 13, 21,
    56, 45, 25, 31, 35, 16,  9, 12,
    44, 24, 15,  8, 23,  7,  6,  5
};

class BitBoard64
{
  protected:
	uint64 data;

  public:
	uint32 GetLSB()
	{
		#ifdef __X64
			unsigned long rtn;
			_BitScanForward64( &rtn, (unsigned long) data );
			return rtn;
		#else
			return (uint32) index64[((data & -data) * debruijn64) >> 58];
		#endif
	}

	uint32 ExtractLSB()
	{
		#ifdef __X64
			unsigned long rtn;
			_BitScanForward64( &rtn, (unsigned long) data );
			_bittestandreset64( (__int64 *) &data, rtn );
			return rtn;
		#else
			int32 index = index64[((data & -data) * debruijn64) >> 58];
			data &= data - 1;
			return (uint32) index;
		#endif
	}
}
I'd be happy to provide the whole BitBoard64 class if you'd like.
bhlangonijr
Posts: 482
Joined: Thu Oct 16, 2008 4:23 am
Location: Milky Way

Re: Fastest bitscan?

Post by bhlangonijr »

I would like to see your bitboard code if you don't mind. :wink:

Thanks!
frankp
Posts: 233
Joined: Sun Mar 12, 2006 3:11 pm

Re: Fastest bitscan?

Post by frankp »

How do you access these in gcc ?

I am currently using __builtin_ctzll
User avatar
Greg Strong
Posts: 388
Joined: Sun Dec 21, 2008 6:57 pm
Location: Washington, DC

Re: Fastest bitscan?

Post by Greg Strong »

Sure, no problem! Here is the .h and .cpp files for the BitBoard64 class.

Note #1 - The class is called "BitBoard64" because there's also "BitBoard96" and "BitBoard128" classes. (I write "universal" chess programs - programs that are flexible enough to play chess variants. These other classes are to accomodate larger board sizes. BitBoard96, for example, will accomodate any board size with up to 96 squares. These classes are passed as template parameters to other classes so that each game uses the appropriate classes quickly and seemelessly.)

Note #2 - This code is Microsoft Visual C++ code, but should be portable, except possibly for the #pragma directives that suspend warning conditions that I know to be safe.

BitBoard64.h:

Code: Select all

#pragma once

#include <intrin.h>

#pragma warning( push )
#pragma warning( disable: 4146 )

static const uint64 debruijn64 = 0x07EDD5E59A4E28C2ULL;
extern const int32 index64[64];

class BitBoard64
{
  protected:
	uint64 data;

  public:
	BitBoard64( uint64 bits = 0ULL )
	{	data = bits;   }

	BitBoard64( BitBoard64 const &other )
	{	data = other.data;   }

	uint64 GetData() const
	{	return data;   }

	static BitBoard64 GetNullBitBoard()
	{	return BitBoard64();   }

	static BitBoard64 GetTrueBitBoard()
	{	return BitBoard64( 0xFFFFFFFFFFFFFFFFULL );   }

	static BitBoard64 GetPositionBitBoard( int nBit )
	{	return BitBoard64( 1ULL << nBit );   }

	void Clear()
	{	data = 0ULL;   }

	BitBoard64 operator ~()
	{	return BitBoard64( ~data );   }

	BitBoard64 operator &( BitBoard64 const &other ) const
	{	return BitBoard64( GetData() & other.GetData() );   }

	BitBoard64 operator &( int intBitMask ) const
	{	return BitBoard64( GetData() & intBitMask );   }

	BitBoard64 operator |( BitBoard64 const &other ) const
	{	return BitBoard64( GetData() | other.GetData() );   }

	BitBoard64 operator ^( BitBoard64 const &other ) const
	{	return BitBoard64( GetData() ^ other.GetData() );   }

	bool operator &&( BitBoard64 const &other ) const
	{	return( (bool) (*this) && (bool) other );   }

	bool operator ||( BitBoard64 const &other ) const
	{	return( (bool) (*this) || (bool) other );   }

	BitBoard64 operator >>( int shift ) const
	{	return BitBoard64( GetData() >> shift );   }

	BitBoard64 operator >>( unsigned shift ) const
	{	return BitBoard64( GetData() >> shift );   }

	BitBoard64 operator <<( int shift ) const
	{	return BitBoard64( GetData() << shift );   }

	BitBoard64 operator <<( unsigned shift ) const
	{	return BitBoard64( GetData() << shift );   }

	BitBoard64 &operator=( BitBoard64 const &other )
	{	data = other.GetData();  return *this;   }

	BitBoard64 &operator=( uint64 new_data )
	{	data = new_data;  return *this;   }

	BitBoard64 &operator=( int new_data )
	{	data = (uint64) new_data;  return *this;   }

	BitBoard64 operator &=( BitBoard64 const &other ) 
	{	data &= other.GetData();  return *this;   }

	BitBoard64 operator |=( BitBoard64 const &other ) 
	{	data |= other.GetData();  return *this;   }

	BitBoard64 operator ^=( BitBoard64 const &other ) 
	{	data ^= other.GetData();  return *this;   }

	operator bool() const 
	{	return data != 0ULL;   }

	bool operator ==( uint64 value )
	{	return data == value;   }

	bool operator ==( int value )
	{	return data == (uint64) value;   }

	bool operator ==( BitBoard64 other )
	{	return data == other.data;   }

	operator uint64() const
	{	return data;   }

	bool operator[]( int nBit ) const
	{
		#ifdef __X64
			return _bittest64( &data, (__int64) nBit );
		#else
			return (data & (1ULL << nBit)) != 0ULL;   
		#endif
	}

	int32 GetBit( int nBit ) const
	{	return (int32) -((int64) ((data >> nBit) & 1ULL));   }

	void SetBit( int nBit )
	{	data = data | (1ULL << nBit);   }

	void ClearBit( int nBit )
	{	data = data & (0xFFFFFFFFFFFFFFFFULL ^ (1ULL << nBit));   }

	void ToggleBit( int nBit )
	{	data = data ^ (1ULL << nBit);   }

	uint32 GetLSB()
	{
		#ifdef __X64
			unsigned long rtn;
			_BitScanForward64( &rtn, (unsigned long) data );
			return rtn;
		#else
			return (uint32) index64[((data & -data) * debruijn64) >> 58];
		#endif
	}

	uint32 ExtractLSB()
	{
		#ifdef __X64
			unsigned long rtn;
			_BitScanForward64( &rtn, (unsigned long) data );
			_bittestandreset64( (__int64 *) &data, rtn );
			return rtn;
		#else
			int32 index = index64[((data & -data) * debruijn64) >> 58];
			data &= data - 1;
			return (uint32) index;
		#endif
	}

	uint32 BitCount()
	{
		#ifdef HAS_POPCOUNT_INSTRUCTION
			return __popcnt64( data );
		#else
			uint64 x = data;
			const uint64 k1 = 0x5555555555555555UL;
			const uint64 k2 = 0x3333333333333333UL;
			const uint64 k4 = 0x0f0f0f0f0f0f0f0fUL;
			const uint64 kf = 0x0101010101010101UL;
			x =  x       - ((x >> 1)  & k1); //put count of each 2 bits into those 2 bits
			x = (x & k2) + ((x >> 2)  & k2); //put count of each 4 bits into those 4 bits
			x = (x       +  (x >> 4)) & k4 ; //put count of each 8 bits into those 8 bits
			x = (x * kf) >> 56; //returns 8 most significant bits of x + (x<<8) + (x<<16) + (x<<24) + ...
			return (uint32) x;
		#endif
	}

	uint32 BitCountMax15()
	{
		#ifdef HAS_POPCOUNT_INSTRUCTION
			return __popcnt64( data );
		#else
			uint64 x = data;
			const uint64 k1 = 0x5555555555555555UL;
			const uint64 k2 = 0x3333333333333333UL;
			const uint64 kf = 0x1111111111111111UL;
			x =  x       - ((x >> 1)  & k1); //put count of each 2 bits into those 2 bits
			x = (x & k2) + ((x >> 2)  & k2); //put count of each 4 bits into those 4 bits
			x = (x * kf) >> 60;
			return (uint32) x;
		#endif
	}
};

#pragma warning( pop )
BitBoard64.cpp:

Code: Select all

#include "StdAfx.h"
#include "BitBoard64.h"

static const int index64[64] = {
    63,  0, 58,  1, 59, 47, 53,  2,
    60, 39, 48, 27, 54, 33, 42,  3,
    61, 51, 37, 40, 49, 18, 28, 20,
    55, 30, 34, 11, 43, 14, 22,  4,
    62, 57, 46, 52, 38, 26, 32, 41,
    50, 36, 17, 19, 29, 10, 13, 21,
    56, 45, 25, 31, 35, 16,  9, 12,
    44, 24, 15,  8, 23,  7,  6,  5
};
From stdafx.h:

Code: Select all

typedef __int16 int16;
typedef __int32 int32;
typedef __int64 int64;
typedef unsigned __int32 uint32;
typedef unsigned __int64 uint64;
typedef unsigned __int64 hashcode;
If you'd like any other related code, like move generation functions using this, just let me know.

Cheers!
Greg
bob
Posts: 20943
Joined: Mon Feb 27, 2006 7:30 pm
Location: Birmingham, AL

Re: Fastest bitscan?

Post by bob »

frankp wrote:How do you access these in gcc ?

I am currently using __builtin_ctzll
I have been using inline gcc. In Crafty, the files are "inline32.h" for 32 bit machines and "inline64.h" for 64 bit processors.

The intrinsic doesn't quite work for me because if no bit is set, I want 64, not an undefined value...
bhlangonijr
Posts: 482
Joined: Thu Oct 16, 2008 4:23 am
Location: Milky Way

Re: Fastest bitscan?

Post by bhlangonijr »

frankp wrote:How do you access these in gcc ?

I am currently using __builtin_ctzll
for gcc you have to write it with inline assembly, I guess.

I found this on chess programming wiki:

Code: Select all

//These processor instructions work only for 64-bit processors
#ifdef _MSC_VER
    #include <intrin.h>
    #ifdef _WIN64
        #pragma intrinsic(_BitScanForward64)
        #pragma intrinsic(_BitScanReverse64)
        #define USING_INTRINSICS
    #endif
#elif defined(__GNUC__) && defined(__LP64__)
    static INLINE unsigned char _BitScanForward64(unsigned int* const Index, const U64 Mask)
    {
        U64 Ret;
        __asm__
        (
            "bsfq %[Mask], %[Ret]"
            :[Ret] "=r" (Ret)
            :[Mask] "mr" (Mask)
        );
        *Index = (unsigned int)Ret;
        return Mask?1:0;
    }
    static INLINE unsigned char _BitScanReverse64(unsigned int* const Index, const U64 Mask)
    {
        U64 Ret;
        __asm__
        (
            "bsrq %[Mask], %[Ret]"
            :[Ret] "=r" (Ret)
            :[Mask] "mr" (Mask)
        );
        *Index = (unsigned int)Ret;
        return Mask?1:0;
    }
    #define USING_INTRINSICS
#endif
bhlangonijr
Posts: 482
Joined: Thu Oct 16, 2008 4:23 am
Location: Milky Way

Re: Fastest bitscan?

Post by bhlangonijr »

Greg Strong wrote:Sure, no problem! Here is the .h and .cpp files for the BitBoard64 class.

Note #1 - The class is called "BitBoard64" because there's also "BitBoard96" and "BitBoard128" classes. (I write "universal" chess programs - programs that are flexible enough to play chess variants. These other classes are to accomodate larger board sizes. BitBoard96, for example, will accomodate any board size with up to 96 squares. These classes are passed as template parameters to other classes so that each game uses the appropriate classes quickly and seemelessly.)

Note #2 - This code is Microsoft Visual C++ code, but should be portable, except possibly for the #pragma directives that suspend warning conditions that I know to be safe.

BitBoard64.h:

Code: Select all

#pragma once

#include <intrin.h>

#pragma warning( push )
#pragma warning( disable: 4146 )

static const uint64 debruijn64 = 0x07EDD5E59A4E28C2ULL;
extern const int32 index64[64];

class BitBoard64
{
  protected:
	uint64 data;

  public:
	BitBoard64( uint64 bits = 0ULL )
	{	data = bits;   }

	BitBoard64( BitBoard64 const &other )
	{	data = other.data;   }

	uint64 GetData() const
	{	return data;   }

	static BitBoard64 GetNullBitBoard()
	{	return BitBoard64();   }

	static BitBoard64 GetTrueBitBoard()
	{	return BitBoard64( 0xFFFFFFFFFFFFFFFFULL );   }

	static BitBoard64 GetPositionBitBoard( int nBit )
	{	return BitBoard64( 1ULL << nBit );   }

	void Clear()
	{	data = 0ULL;   }

	BitBoard64 operator ~()
	{	return BitBoard64( ~data );   }

	BitBoard64 operator &( BitBoard64 const &other ) const
	{	return BitBoard64( GetData() & other.GetData() );   }

	BitBoard64 operator &( int intBitMask ) const
	{	return BitBoard64( GetData() & intBitMask );   }

	BitBoard64 operator |( BitBoard64 const &other ) const
	{	return BitBoard64( GetData() | other.GetData() );   }

	BitBoard64 operator ^( BitBoard64 const &other ) const
	{	return BitBoard64( GetData() ^ other.GetData() );   }

	bool operator &&( BitBoard64 const &other ) const
	{	return( (bool) (*this) && (bool) other );   }

	bool operator ||( BitBoard64 const &other ) const
	{	return( (bool) (*this) || (bool) other );   }

	BitBoard64 operator >>( int shift ) const
	{	return BitBoard64( GetData() >> shift );   }

	BitBoard64 operator >>( unsigned shift ) const
	{	return BitBoard64( GetData() >> shift );   }

	BitBoard64 operator <<( int shift ) const
	{	return BitBoard64( GetData() << shift );   }

	BitBoard64 operator <<( unsigned shift ) const
	{	return BitBoard64( GetData() << shift );   }

	BitBoard64 &operator=( BitBoard64 const &other )
	{	data = other.GetData();  return *this;   }

	BitBoard64 &operator=( uint64 new_data )
	{	data = new_data;  return *this;   }

	BitBoard64 &operator=( int new_data )
	{	data = (uint64) new_data;  return *this;   }

	BitBoard64 operator &=( BitBoard64 const &other ) 
	{	data &= other.GetData();  return *this;   }

	BitBoard64 operator |=( BitBoard64 const &other ) 
	{	data |= other.GetData();  return *this;   }

	BitBoard64 operator ^=( BitBoard64 const &other ) 
	{	data ^= other.GetData();  return *this;   }

	operator bool() const 
	{	return data != 0ULL;   }

	bool operator ==( uint64 value )
	{	return data == value;   }

	bool operator ==( int value )
	{	return data == (uint64) value;   }

	bool operator ==( BitBoard64 other )
	{	return data == other.data;   }

	operator uint64() const
	{	return data;   }

	bool operator[]( int nBit ) const
	{
		#ifdef __X64
			return _bittest64( &data, (__int64) nBit );
		#else
			return (data & (1ULL << nBit)) != 0ULL;   
		#endif
	}

	int32 GetBit( int nBit ) const
	{	return (int32) -((int64) ((data >> nBit) & 1ULL));   }

	void SetBit( int nBit )
	{	data = data | (1ULL << nBit);   }

	void ClearBit( int nBit )
	{	data = data & (0xFFFFFFFFFFFFFFFFULL ^ (1ULL << nBit));   }

	void ToggleBit( int nBit )
	{	data = data ^ (1ULL << nBit);   }

	uint32 GetLSB()
	{
		#ifdef __X64
			unsigned long rtn;
			_BitScanForward64( &rtn, (unsigned long) data );
			return rtn;
		#else
			return (uint32) index64[((data & -data) * debruijn64) >> 58];
		#endif
	}

	uint32 ExtractLSB()
	{
		#ifdef __X64
			unsigned long rtn;
			_BitScanForward64( &rtn, (unsigned long) data );
			_bittestandreset64( (__int64 *) &data, rtn );
			return rtn;
		#else
			int32 index = index64[((data & -data) * debruijn64) >> 58];
			data &= data - 1;
			return (uint32) index;
		#endif
	}

	uint32 BitCount()
	{
		#ifdef HAS_POPCOUNT_INSTRUCTION
			return __popcnt64( data );
		#else
			uint64 x = data;
			const uint64 k1 = 0x5555555555555555UL;
			const uint64 k2 = 0x3333333333333333UL;
			const uint64 k4 = 0x0f0f0f0f0f0f0f0fUL;
			const uint64 kf = 0x0101010101010101UL;
			x =  x       - ((x >> 1)  & k1); //put count of each 2 bits into those 2 bits
			x = (x & k2) + ((x >> 2)  & k2); //put count of each 4 bits into those 4 bits
			x = (x       +  (x >> 4)) & k4 ; //put count of each 8 bits into those 8 bits
			x = (x * kf) >> 56; //returns 8 most significant bits of x + (x<<8) + (x<<16) + (x<<24) + ...
			return (uint32) x;
		#endif
	}

	uint32 BitCountMax15()
	{
		#ifdef HAS_POPCOUNT_INSTRUCTION
			return __popcnt64( data );
		#else
			uint64 x = data;
			const uint64 k1 = 0x5555555555555555UL;
			const uint64 k2 = 0x3333333333333333UL;
			const uint64 kf = 0x1111111111111111UL;
			x =  x       - ((x >> 1)  & k1); //put count of each 2 bits into those 2 bits
			x = (x & k2) + ((x >> 2)  & k2); //put count of each 4 bits into those 4 bits
			x = (x * kf) >> 60;
			return (uint32) x;
		#endif
	}
};

#pragma warning( pop )
BitBoard64.cpp:

Code: Select all

#include "StdAfx.h"
#include "BitBoard64.h"

static const int index64[64] = {
    63,  0, 58,  1, 59, 47, 53,  2,
    60, 39, 48, 27, 54, 33, 42,  3,
    61, 51, 37, 40, 49, 18, 28, 20,
    55, 30, 34, 11, 43, 14, 22,  4,
    62, 57, 46, 52, 38, 26, 32, 41,
    50, 36, 17, 19, 29, 10, 13, 21,
    56, 45, 25, 31, 35, 16,  9, 12,
    44, 24, 15,  8, 23,  7,  6,  5
};
From stdafx.h:

Code: Select all

typedef __int16 int16;
typedef __int32 int32;
typedef __int64 int64;
typedef unsigned __int32 uint32;
typedef unsigned __int64 uint64;
typedef unsigned __int64 hashcode;
If you'd like any other related code, like move generation functions using this, just let me know.

Cheers!
Greg
You are very nice! :)
For now it is good enough.

Thanks a lot.
User avatar
Greg Strong
Posts: 388
Joined: Sun Dec 21, 2008 6:57 pm
Location: Washington, DC

Re: Fastest bitscan?

Post by Greg Strong »

No worries. Just let me know if you've got any questions!