SquareAttackedBy

Discussion of chess software programming and technical issues.

Moderators: hgm, Rebel, chrisw

Gerd Isenberg
Posts: 2250
Joined: Wed Mar 08, 2006 8:47 pm
Location: Hattingen, Germany

Re: SquareAttackedBy

Post by Gerd Isenberg »

Desperado wrote:Hello again,

last time it was just curiosity, thinking about this topic.
Now i want to have a solution for implementation.

So i started today thinking about it again.

So here is a simple try, and i would be curious
if this is competetive with the _x88_Lookup_approach.

of course, as ever there is room for other ideas and improvements.

thoughts:

- assuming sq0!=sq1 we can simply return(tmp)
- but the _switchDirectionPart_ maybe improved
because there will always be only one direction...
- well, at least this is branchless code without any
lookups.So my hope is, it is good enough for practical
use(beside the zillion of other approaches :) )

Code: Select all

//--------inBetween---------------------
//--------------------------------------

BTB_T inBetween(SQR_T sq0,SQR_T sq1)
	{
	 const BTB_T m = (BTB_T) -1;

	 BTB_T tmp = 0;

	 //switchDirection
	 tmp = fileMask(sq0) & fileMask(sq1);
	 tmp|= rankMask(sq0) & rankMask(sq1);
	 tmp|= diagMask(sq0) & diagMask(sq1);
	 tmp|= antiMask(sq0) & antiMask(sq1);

	 //innerBits
	 tmp&= &#40;m<<sq0&#41; ^ &#40;m<<sq1&#41;;
	 tmp&= tmp - 1;

	 return&#40;sq0==sq1 ? 0 &#58; tmp&#41;;
	&#125;

Code: Select all

	mov	QWORD PTR &#91;rsp+8&#93;, rbx
	movzx	r11d, cl
	lea	rcx, OFFSET FLAT&#58;__ImageBase
	movzx	ebx, dl
	mov	r10, QWORD PTR aMask&#91;rcx+rbx*8&#93;
	mov	rax, QWORD PTR dMask&#91;rcx+rbx*8&#93;
	or	rdx, -1
	and	rax, QWORD PTR dMask&#91;rcx+r11*8&#93;
	and	r10, QWORD PTR aMask&#91;rcx+r11*8&#93;
	mov	r8, rdx
	or	r10, rax
	mov	rax, QWORD PTR rMask&#91;rcx+rbx*8&#93;
	and	rax, QWORD PTR rMask&#91;rcx+r11*8&#93;
	or	r10, rax
	mov	rax, QWORD PTR fMask&#91;rcx+rbx*8&#93;
	and	rax, QWORD PTR fMask&#91;rcx+r11*8&#93;
	movzx	ecx, r11b
	or	r10, rax
	shl	r8, cl
	movzx	ecx, bl
	shl	rdx, cl
	xor	eax, eax
	xor	r8, rdx
	and	r10, r8
	lea	rcx, QWORD PTR &#91;r10-1&#93;
	and	r10, rcx
	cmp	r11b, bl
	mov	rbx, QWORD PTR &#91;rsp+8&#93;
	cmove	r10, rax
	mov	rax, r10
	ret	0
Michael

PS: whishing everyone a happy new year :)
Nice try, but it has eight lookups and a bunch of instructions and even cmov. I don't think it is faster than a rotate of a lookup with 0x88 square differences:

Code: Select all

U64 obstructedBy0x88Diff&#91;240&#93;; // 1920 bytes, 2KByte - 128 Byte
 
int x88diff&#40;int f, int t&#41; &#123;
   return t - f + &#40;t|7&#41; - &#40;f|7&#41; + 120;
&#125;
 
U64 obstructed&#40;int from, int to&#41; &#123;
   return rotateLeft&#40;obstructedBy0x88Diff&#91;x88diff&#40;from,to&#41;&#93;, from&#41;;
&#125;
which roughly translates to

Code: Select all

; input ecx from, edx to
mov  eax, edx  ; to
mov  r8d, ecx  ; from  
or   eax, 7    ; t|7
or   r8d, 7    ; f|7
sub  eax, ecx
sub  eax, r8d
lea  rax, &#91;rax + rdx + 120&#93;
lea  rdx, OFFSET FLAT&#58;__ImageBase 
mov  rax, QWORD PTR obstructedBy0x88Diff&#91;rdx+8*rax&#93; 
rol  rax, cl
Cheers und guten Rutsch,
Gerd
User avatar
Zach Wegner
Posts: 1922
Joined: Thu Mar 09, 2006 12:51 am
Location: Earth

Re: SquareAttackedBy

Post by Zach Wegner »

Isn't the condition unneeded? Since if sq1==sq2, (m<<sq0) ^ (m<<sq1) is zero.
Gerd Isenberg
Posts: 2250
Joined: Wed Mar 08, 2006 8:47 pm
Location: Hattingen, Germany

Re: SquareAttackedBy

Post by Gerd Isenberg »

Zach Wegner wrote:Isn't the condition unneeded? Since if sq1==sq2, (m<<sq0) ^ (m<<sq1) is zero.
yep, if sq0 == sq2 ;-)
User avatar
Zach Wegner
Posts: 1922
Joined: Thu Mar 09, 2006 12:51 am
Location: Earth

Re: SquareAttackedBy

Post by Zach Wegner »

:lol:

Oops, my parser has a bug in it :)
User avatar
Desperado
Posts: 879
Joined: Mon Dec 15, 2008 11:45 am

Re: SquareAttackedBy

Post by Desperado »

ok, first impression was , "just another initRoutine" :lol:

But...

Code: Select all


//--------inBetween---------------------
//--------------------------------------

__inline BTB_T inBetween1&#40;SQR_T sq0,SQR_T sq1&#41;
	&#123;
	 static const BTB_T m  = &#40;BTB_T&#41; -1;

	 BTB_T tmp = 0;
	 
	 tmp = allDir&#91;sq0&#93; & allDir&#91;sq1&#93;;
	 tmp&= &#40;m<<sq0&#41;^&#40;m<<sq1&#41;;

	 return&#40;tmp&#41;;
	&#125;

Code: Select all

	movzx	r9d, cl
	or	r8, -1
	movzx	ecx, r9b
	mov	rax, r8
	shl	rax, cl
	movzx	ecx, dl
	shl	r8, cl
	movzx	ecx, dl
	xor	rax, r8
	lea	r8, OFFSET FLAT&#58;allDir
	and	rax, QWORD PTR &#91;r8+r9*8&#93;
	and	rax, QWORD PTR &#91;r8+rcx*8&#93;
	ret	0
a little bit tuned now 8-)

memory :
==========

512 against 1920 bytes

computation:
==========

no lookup in the sense of looking up the result,
just masks to compute the results.
The assembler looks competetive i think.

maybe some little optimizations can be done further.

let me know.

cheers
User avatar
Zach Wegner
Posts: 1922
Joined: Thu Mar 09, 2006 12:51 am
Location: Earth

Re: SquareAttackedBy

Post by Zach Wegner »

Desperado wrote:ok, first impression was , "just another initRoutine" :lol:

But...

Code: Select all


//--------inBetween---------------------
//--------------------------------------

__inline BTB_T inBetween1&#40;SQR_T sq0,SQR_T sq1&#41;
	&#123;
	 static const BTB_T m  = &#40;BTB_T&#41; -1;

	 BTB_T tmp = 0;
	 
	 tmp = allDir&#91;sq0&#93; & allDir&#91;sq1&#93;;
	 tmp&= &#40;m<<sq0&#41;^&#40;m<<sq1&#41;;

	 return&#40;tmp&#41;;
	&#125;

Code: Select all

	movzx	r9d, cl
	or	r8, -1
	movzx	ecx, r9b
	mov	rax, r8
	shl	rax, cl
	movzx	ecx, dl
	shl	r8, cl
	movzx	ecx, dl
	xor	rax, r8
	lea	r8, OFFSET FLAT&#58;allDir
	and	rax, QWORD PTR &#91;r8+r9*8&#93;
	and	rax, QWORD PTR &#91;r8+rcx*8&#93;
	ret	0
a little bit tuned now 8-)

memory :
==========

512 against 1920 bytes

computation:
==========

no lookup in the sense of looking up the result,
just masks to compute the results.
The assembler looks competetive i think.

maybe some little optimizations can be done further.

let me know.

cheers
Unfortunately that doesn't work. Pretty much any two squares are going to have some incorrect squares thrown in. Think about A1 and H8--the main diagonal is set, but also H1 and A8. E1 and D8 would have H4 and A5 set, etc. I spent some time trying to find a good in-between method, and really, the fastest I could find was a [64][64] lookup. Of course, that doesn't make it worthless to search for more.
Gerd Isenberg
Posts: 2250
Joined: Wed Mar 08, 2006 8:47 pm
Location: Hattingen, Germany

Re: SquareAttackedBy

Post by Gerd Isenberg »

Desperado wrote:ok, first impression was , "just another initRoutine" :lol:

But...

Code: Select all


//--------inBetween---------------------
//--------------------------------------

__inline BTB_T inBetween1&#40;SQR_T sq0,SQR_T sq1&#41;
	&#123;
	 static const BTB_T m  = &#40;BTB_T&#41; -1;

	 BTB_T tmp = 0;
	 
	 tmp = allDir&#91;sq0&#93; & allDir&#91;sq1&#93;;
	 tmp&= &#40;m<<sq0&#41;^&#40;m<<sq1&#41;;

	 return&#40;tmp&#41;;
	&#125;

Code: Select all

	movzx	r9d, cl
	or	r8, -1
	movzx	ecx, r9b
	mov	rax, r8
	shl	rax, cl
	movzx	ecx, dl
	shl	r8, cl
	movzx	ecx, dl
	xor	rax, r8
	lea	r8, OFFSET FLAT&#58;allDir
	and	rax, QWORD PTR &#91;r8+r9*8&#93;
	and	rax, QWORD PTR &#91;r8+rcx*8&#93;
	ret	0
a little bit tuned now 8-)

memory :
==========

512 against 1920 bytes

computation:
==========

no lookup in the sense of looking up the result,
just masks to compute the results.
The assembler looks competetive i think.

maybe some little optimizations can be done further.

let me know.

cheers
Does not work that way. Too many lines interact, f.i. inBetween(c3, f6):

Code: Select all

0x844424150EFB0E15  0xA870DF70A8242221  0x8040041008200201  
 . . 1 . . . . 1    . . . 1 . 1 . 1     . . . . . . . 1    
 . . 1 . . . 1 .    . . . . 1 1 1 .     . . . . . . 1 .    
 . . 1 . . 1 . .    1 1 1 1 1 . 1 1     . . 1 . . . . .    
 1 . 1 . 1 . . .  & . . . . 1 1 1 .  =  . . . . 1 . . .    
 . 1 1 1 . . . .    . . . 1 . 1 . 1     . . . 1 . . . .    
 1 1 . 1 1 1 1 1    . . 1 . . 1 . .     . . . . . 1 . .    
 . 1 1 1 . . . .    . 1 . . . 1 . .     . 1 . . . . . .    
 1 . 1 . 1 . . .    1 . . . . 1 . .     1 . . . . . . .    


0x8040041008200201  0x00001FFFFFFC0000  0x0000041008200000
. . . . . . . 1     . . . . . . . .     . . . . . . . .  
. . . . . . 1 .     . . . . . . . .     . . . . . . . .  
. . 1 . . . . .     1 1 1 1 1 . . .     . . 1 . . . . .  
. . . . 1 . . .     1 1 1 1 1 1 1 1     . . . . 1 . . .  
. . . 1 . . . .  &  1 1 1 1 1 1 1 1   = . . . 1 . . . .  
. . . . . 1 . .     . . 1 1 1 1 1 1     . . . . . 1 . .  
. 1 . . . . . .     . . . . . . . .     . . . . . . . .  
1 . . . . . . .     . . . . . . . .     . . . . . . . .  
User avatar
Desperado
Posts: 879
Joined: Mon Dec 15, 2008 11:45 am

Re: SquareAttackedBy

Post by Desperado »

:shock: :shock: :shock:

here is my bug...

Code: Select all


void initModulSquare&#40;void&#41;

	&#123;

	 for&#40;UI_08 i=0;i<64;i++)
		 for&#40;UI_08 j=0;j<64;j++)
			&#123;
			 if&#40;inBetween&#40;i,j&#41;!=inBetween1&#40;i,j&#41;)
				&#123;
				 printf&#40;"\nerror");getchar&#40;);
				&#125;
			 else
				&#123;
				 printSquare&#40;i&#41;;
				 printSquare&#40;j&#41;;
				 printBitboard&#40;inBetween1&#40;i,j&#41;);
				 getchar&#40;);
				&#125;
			&#125;

	 printf&#40;"done");getchar&#40;);

	 initMask&#40;);

	 return;
	&#125;

validating the routines _BEFORE_ maskinitialization!!! (stupidstupidstupid)
:shock:

...but some ideas are left...

thx
User avatar
Desperado
Posts: 879
Joined: Mon Dec 15, 2008 11:45 am

Re: SquareAttackedBy

Post by Desperado »

Code: Select all

//--------x88Difference--&#40;byLookup&#41;-----
//--------------------------------------

static __inline UI_08 x88Difference&#40;SQR_T sq0,SQR_T sq1&#41;

	&#123;return&#40;mailboX88&#91;sq0&#93;-mailboX88&#91;sq1&#93;);&#125;

//--------x88Difference-&#40;byComputation&#41;-
//--------------------------------------

static __inline int x88diff&#40;int f, int t&#41;

	&#123;return t - f + &#40;t|7&#41; - &#40;f|7&#41; + 120;&#125;
//SOME DATA(STRUCTURES):

Code: Select all


	//mailbox based on 16x16 layout
	const SQR_T mailboX88&#91;ID64&#93; = 
		&#123;
		 0x44,0x45,0x46,0x47,0x48,0x49,0x4A,0x4B,
		 0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0x5B,
		 0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,
		 0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,
		 0x84,0x85,0x86,0x87,0x88,0x89,0x8A,0x8B,
		 0x94,0x95,0x96,0x97,0x98,0x99,0x9A,0x9B,
		 0xA4,0xA5,0xA6,0xA7,0xA8,0xA9,0xAA,0xAB,
		 0xB4,0xB5,0xB6,0xB7,0xB8,0xB9,0xBA,0xBB
		&#125;;


	#define FILEDIR 0
	#define RANKDIR 1
	#define DIAGDIR 2
	#define ANTIDIR 3
	#define ALLDIR  4

	struct MASK_T
		&#123;
		 //file,rank,diag,anti
		 BTB_T line&#91;ALLDIR&#93;;
		&#125;;

	static MASK_T mask&#91;ID64&#93;;
	static UI_08  lineX88&#91;256&#93;;
INIT OF LINEX88[]
==============

Code: Select all


				  if&#40;mask&#91;i&#93;.line&#91;FILEDIR&#93;==mask&#91;j&#93;.line&#91;FILEDIR&#93;) lineX88&#91;x88diff&#40;i,j&#41;&#93;=FILEDIR;
			 else if&#40;mask&#91;i&#93;.line&#91;RANKDIR&#93;==mask&#91;j&#93;.line&#91;RANKDIR&#93;) lineX88&#91;x88diff&#40;i,j&#41;&#93;=RANKDIR;
			 else if&#40;mask&#91;i&#93;.line&#91;DIAGDIR&#93;==mask&#91;j&#93;.line&#91;DIAGDIR&#93;) lineX88&#91;x88diff&#40;i,j&#41;&#93;=DIAGDIR;
			 else if&#40;mask&#91;i&#93;.line&#91;ANTIDIR&#93;==mask&#91;j&#93;.line&#91;ANTIDIR&#93;) lineX88&#91;x88diff&#40;i,j&#41;&#93;=ANTIDIR;

ORIGINAL:
=========

Code: Select all

//--------inBetween---------------------
//--------------------------------------

extern __inline BTB_T inBetween&#40;SQR_T sq0,SQR_T sq1&#41;
	&#123;
	 const BTB_T m = &#40;BTB_T&#41; -1;

	 BTB_T tmp = 0;

	 //switchDirection
	 tmp = fileMask&#40;sq0&#41; & fileMask&#40;sq1&#41;;
	 tmp|= rankMask&#40;sq0&#41; & rankMask&#40;sq1&#41;;
	 tmp|= diagMask&#40;sq0&#41; & diagMask&#40;sq1&#41;;
	 tmp|= antiMask&#40;sq0&#41; & antiMask&#40;sq1&#41;;

	 //innerBits
	 tmp&= &#40;m<<sq0&#41; ^ &#40;m<<sq1&#41;;
	 tmp&= tmp - 1;

	 return&#40;tmp&#41;; // FIXED
	&#125;
RESULT:
=======

Code: Select all


extern __inline BTB_T inBetween2&#40;SQR_T sq0,SQR_T sq1&#41;
	&#123;
	 const BTB_T m = &#40;BTB_T&#41; -1;
	 
	 UI_08 lineId	= lineX88&#91;x88diff&#40;sq0,sq1&#41;&#93;;
	 BTB_T tmp		= 0;

    //switchDirection
	 tmp =	mask&#91;sq0&#93;.line&#91;lineId&#93;
		    & mask&#91;sq1&#93;.line&#91;lineId&#93;;

	 //innerBits
	 tmp&= &#40;m<<sq0&#41; ^ &#40;m<<sq1&#41;;
	 tmp&= tmp - 1;

	 return&#40;tmp&#41;;
	&#125;
Well, i believe that the x88_lookup_rotateLeft may still be better.
(although memory is 256/1920,and talking of _additional_ memory
beside the file/rank/diag/anti masks. lineX88[256]. ).
What i am not really sure of is, if it is not better to use X88_lookup
instead of x88_calculation...at least the assembler looks better.

Code: Select all


X88LOOKUP&#58;
============

	mov	QWORD PTR &#91;rsp+8&#93;, rbx
	lea	rbx, OFFSET FLAT&#58;__ImageBase
	movzx	r9d, dl
	movzx	r11d, cl
	movzx	eax, BYTE PTR mailboX88&#91;r11+rbx&#93;
	sub	al, BYTE PTR mailboX88&#91;r9+rbx&#93;
	movzx	r8d, al
	or	rax, -1
	movzx	edx, BYTE PTR lineX88&#91;r8+rbx&#93;
	mov	r8, rax
	shl	r8, cl
	movzx	ecx, r9b
	shl	rax, cl
	xor	r8, rax
	lea	rax, QWORD PTR &#91;rdx+r9*4&#93;
	and	r8, QWORD PTR mask&#91;rbx+rax*8&#93;
	lea	rax, QWORD PTR &#91;rdx+r11*4&#93;
	and	r8, QWORD PTR mask&#91;rbx+rax*8&#93;
	mov	rbx, QWORD PTR &#91;rsp+8&#93;
	lea	rax, QWORD PTR &#91;r8-1&#93;
	and	rax, r8

X88COMPUTATION&#58;
================

	mov	QWORD PTR &#91;rsp+8&#93;, rbx
	mov	QWORD PTR &#91;rsp+16&#93;, rdi
	movzx	ebx, cl
	movzx	edi, dl
	lea	r9, OFFSET FLAT&#58;__ImageBase
	mov	rcx, rbx
	mov	r10, rdi
	mov	rax, rbx
	or	rax, 7
	or	r10, 7
	sub	r10, rax
	or	rax, -1
	mov	r8, rax
	sub	r10, rbx
	shl	r8, cl
	mov	ecx, edi
	add	r10, rdi
	movzx	edx, BYTE PTR lineX88&#91;r10+r9+120&#93;
	shl	rax, cl
	xor	r8, rax
	lea	rcx, QWORD PTR &#91;rdx+rbx*4&#93;
	mov	rbx, QWORD PTR &#91;rsp+8&#93;
	and	r8, QWORD PTR mask&#91;r9+rcx*8&#93;
	lea	rcx, QWORD PTR &#91;rdx+rdi*4&#93;
	mov	rdi, QWORD PTR &#91;rsp+16&#93;
	and	r8, QWORD PTR mask&#91;r9+rcx*8&#93;
	lea	rax, QWORD PTR &#91;r8-1&#93;
	and	rax, r8
Michael