## Back To The Beginning

Discussion of chess software programming and technical issues.

Moderators: bob, hgm, Harvey Williamson

Forum rules
This textbox is used to restore diagrams posted with the [d] tag before the upgrade.
Michael Sherwin
Posts: 3082
Joined: Fri May 26, 2006 1:00 am
Location: WY, USA
Full name: Michael Sherwin

### Back To The Beginning

The first move generator I ever wrote was in a combination of C and 32 bit assembler. It was a GNUChess 4 style generator with improvements. I had to learn C and 32 bit assembler as it was written. Besides being in assembler one of the improvements was that the next square and next direction tables are compressed to save memory which was important at the time. It is now working in my current project.

Here is the initialization code. Don't expect much in the way of style as it was the first C code I ever wrote. And it works! Also I'll show both some C code and assembler code for how to use it.

Code: Select all

``````u08 bns[1808]; // bishop next square
u08 bnd[1808]; // bishop next direction
u08 rns[3704];
u08 rnd[3704];
u08 qns[3872];
u08 qnd[3872];
u08 nns[890];
u08 kns[809];
u08 wpns[360];
u08 bpns[360];

u32 bof[64]; // bishop offsets
u32 rof[64];
u32 qof[64];
u32 nof[64];
u32 kof[64];
u32 pof[64];

void InitMovGen() {
s08 lbrd[120] =
{ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1, 0, 1, 2, 3, 4, 5, 6, 7,-1,
-1, 8, 9,10,11,12,13,14,15,-1,
-1,16,17,18,19,20,21,22,23,-1,
-1,24,25,26,27,28,29,30,31,-1,
-1,32,33,34,35,36,37,38,39,-1,
-1,40,41,42,43,44,45,46,47,-1,
-1,48,49,50,51,52,53,54,55,-1,
-1,56,57,58,59,60,61,62,63,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 };

s08 pdir[7][8] =
{ {9,11,-9,-11,0,0,0,0},
{1,10,-1,-10,0,0,0,0},
{9,11,1,10,-9,-11,-1,-10},
{8,12,19,21,-8,-12,-19,-21},
{9,11,1,10,-9,-11,-1,-10},
{10,9,11,0,0,0,0,0},
{-10,-9,-11,0,0,0,0,0} };

u08 maxs[7] =
{ 6,6,6,0,0,0,0 };

u08 maxdir[7] =
{ 3,3,7,7,7,2,2 };

s08 min = 64, max = 0;
s08 ptyp;
s08 plfs, lds, lts;
s08 fs, ts, nds, pfs;
s08 dir, ndir, ppdir, s;
u08 * sbns = bns, * sbnd = bnd;
u32 * pop = bof;
s32 offset = 0;

for (ptyp = 0; ptyp < 6; ptyp++)
{
switch (ptyp)
{
case 0:
pop = bof;
break;
case 1:
pop = rof;
break;
case 2:
pop = qof;
break;
case 3:
pop = nof;
break;
case 4:
pop = kof;
break;
case 5:
pop = pof;
offset = 8;
}
for (plfs = 21; plfs < 99; plfs++)
if ((pfs = fs = lbrd[plfs]) >= 0)
{
if ((ptyp > 0 && ptyp < 5) || pfs % 2 == 0) min = max = fs;
for (dir = maxdir[ptyp]; dir >= 0; dir--)
if ((ts = lbrd[(lts = plfs + pdir[ptyp][dir])]) >= 0)
{
for (s = maxs[ptyp]; s >= 0; s--)
{
if (ts < min)min = ts;
if (ts > max)max = ts;
if ((ts = lbrd[(lts = lts + pdir[ptyp][dir])]) < 0 || s == 0)
break;
}
}
if (ptyp != 0 && ptyp < 5)
{
if (ptyp < 3)
{
*(pop + pfs) = (offset - min);
offset = offset + max - min + 1;
}
else
{
*(pop + pfs) = (offset - pfs);
if (ptyp == 3)offset = offset + 13;
else offset = offset + 11;
}
}
else
{
if (pfs % 2 != 0)
{
*(pop + pfs) = (offset - min);
*(pop + pfs - 1) = (offset - min);
offset = offset + max - min + 2;
}
}
}
offset = 0;
}

for (ptyp = 0; ptyp < 7; ptyp++)
for (plfs = 21; plfs < 99; plfs++)
if ((pfs = fs = lbrd[plfs]) >= 0)
{
switch (ptyp)
{
case 0:
sbns = bns + bof[pfs];
sbnd = bnd + bof[pfs];
break;
case 1:
sbns = rns + rof[pfs];
sbnd = rnd + rof[pfs];
break;
case 2:
sbns = qns + qof[pfs];
sbnd = qnd + qof[pfs];
break;
case 3:
sbns = nns + nof[pfs];
break;
case 4:
sbns = kns + kof[pfs];
break;
case 5:
sbns = wpns + pof[pfs];
break;
case 6:
sbns = bpns + pof[pfs];
}
for (dir = maxdir[ptyp]; dir >= 0; dir--)
if ((ts = lbrd[(lts = plfs + pdir[ptyp][dir])]) >= 0)
{
for (ndir = dir - 1; ndir >= 0; ndir--)
if ((nds = lbrd[(lds = plfs + pdir[ptyp][ndir])]) >= 0)break;
ppdir = 0;
for (s = maxs[ptyp]; s >= 0; s--)
{
if (ptyp < 5)* (sbns + fs) = ts;
if (ptyp == 5)
{
if (dir == 2 && ndir == 1)
{
*(sbns + fs) = ts;
*(sbns + ts) = nds;
ppdir = 2;
}
if ((dir == 2 && ndir == 0) || (dir == 1))
{
*(sbns + fs) = ts;
*(sbns + ts) = 65; if (pfs > 7 && pfs < 16)* (sbns + ts) = 66;
if (pfs > 31 && pfs < 40)* (sbns + ts) = 67;
if (pfs > 47 && pfs < 56)* (sbns + ts) = 68;
ppdir = 0;
}
if (dir == 1 && ppdir == 2)
{
*(sbns + ts) = 65; if (pfs > 7 && pfs < 16)* (sbns + ts) = 66;
if (pfs > 31 && pfs < 40)* (sbns + ts) = 67;
if (pfs > 47 && pfs < 56)* (sbns + ts) = 68;
}
}
if (ptyp == 6)
{
if (dir == 2 && ndir == 1)
{
*(sbns + fs) = ts;
*(sbns + ts) = nds;
ppdir = 2;
}
if ((dir == 2 && ndir == 0) || (dir == 1))
{
*(sbns + fs) = ts;
*(sbns + ts) = 65; if (pfs < 56 && pfs>47)* (sbns + ts) = 66;
if (pfs < 32 && pfs>23)* (sbns + ts) = 67;
if (pfs < 16 && pfs> 7)* (sbns + ts) = 68;
ppdir = 0;
}
if (dir == 1 && ppdir == 2)
{
*(sbns + ts) = 65; if (pfs < 56 && pfs>47)* (sbns + ts) = 66;
if (pfs < 32 && pfs>23)* (sbns + ts) = 67;
if (pfs < 16 && pfs> 7)* (sbns + ts) = 68;
}
}
if (ndir >= 0 && ptyp < 3)* (sbnd + ts) = nds;
else if (ptyp < 3)* (sbnd + ts) = 64;
fs = ts;
if ((ts = lbrd[(lts = lts + pdir[ptyp][dir])]) < 0 || s == 0)
{
if (ndir >= 0 && ptyp < 5)
{
*(sbns + fs) = nds;
if (ptyp < 3)* (sbnd + fs) = nds;
}
else
if (ptyp < 5)
{
*(sbns + fs) = 64;
if (ptyp < 3)* (sbnd + fs) = 64;
}
break;
}
}
}
}

}``````

Code: Select all

``````case BISHOP:
sbns=bns+bof[fs];
sbnd=bnd+bof[fs];
ts=*(sbns+fs);
while(ts<64)
{
switch(wtrgt[brd[ts]])
{
case VACANT:
ts=*(sbns+ts);
continue;
case FRIEND:
ts=*(sbnd+ts);
continue;
case ENEMY:
ts=*(sbnd+ts);
continue;
default:
return FALSE;
}
}
continue;``````

Code: Select all

``````.data

; Bishop Jump Table
; wbmf = white bishop move function
; wbmnd = white bishop move next direction
; wbmrm = white bishop move record move
; wbmrc =  white bishop move record capture
; wmcbki = white move captures black king illegally
; wnxtm = white next move
wbmf            dd          0
dd          wbmnd,wbmnd,wbmnd,wbmnd,wbmnd,wbmnd,wbmnd,wbmnd
dd          wbmnd,wbmnd,wbmnd,wbmnd,wbmnd,wbmnd,wbmnd,wbmnd
dd          0,0,0
dd          wbmrm
dd          0
dd          wbmrc,wbmrc,wbmrc,wbmrc,wbmrc,wbmrc,wbmrc,wbmrc
dd          wbmrc,wbmrc,wbmrc,wbmrc,wbmrc,wbmrc,wbmrc,wmcbki
dd          wnxtm

.code ;32 bit code

; white move generator
wmg:
push        ebp ; this gets the first piece in a list of linked pieces and jumps to the piece type
push        edi
push        esi
mov         ebp,[ply]
mov         eax,[first+ebp*4]
mov         [lis+ebp*4],eax
mov         edi,[nxt]
jmp         [ptmf+edi*4]

; white bishop move (gen)
wbm:
mov         ecx,[ps+edi*4]
mov         esi,[bol+ecx*4]
movsx       ebx,[bns+esi+ecx]
mov         edx,[brd+ebx*4]
jmp         [wbmf+edx*4]

wbmrm:
mov         [tree.fsq+eax*8],cl
mov         [tree.tsq+eax*8],bl
mov         [tree.typ+eax*8],BMOV
inc         eax
movsx       ebx,[bns+esi+ebx]
mov         edx,[brd+ebx*4]
jmp         [wbmf+edx*4]

wbmrc:
mov         [tree.fsq+eax*8],cl
mov         [tree.tsq+eax*8],dl
mov         [tree.typ+eax*8],BCAP
inc         eax
wbmnd:
movsx       ebx,[bnd+esi+ebx]
mov         edx,[brd+ebx*4]
jmp         [wbmf+edx*4]
``````
I hate if statements. Pawns demand if statements. Therefore I hate pawns.

Ras
Posts: 1171
Joined: Tue Aug 30, 2016 6:19 pm
Contact:

### Re: Back To The Beginning

I'd not use assembly because porting that needs separate code paths for each CPU architecture. E.g. how would you make an Android version? It's also not particularly useful because the move generator doesn't take a significant amount of time anyway.
Rasmus Althoff
https://www.ct800.net

Michael Sherwin
Posts: 3082
Joined: Fri May 26, 2006 1:00 am
Location: WY, USA
Full name: Michael Sherwin

### Re: Back To The Beginning

Ras wrote:
Sat Sep 07, 2019 9:10 am
I'd not use assembly because porting that needs separate code paths for each CPU architecture. E.g. how would you make an Android version? It's also not particularly useful because the move generator doesn't take a significant amount of time anyway.
Depends what it is being used for. My new chess engine has no evaluation function and therefore move generation speed becomes more important. Also bitboards lose to this even in generating captures only. Once the bishop code is entered these three instructions check every square on an empty board until it finds a capture or it runs out of squares.

Code: Select all

``````wbcns: ; white bishop capture next square
movsx       ebx,[bns+esi+ebx]
mov         edx,[brd+ebx*4]
jmp         [wbcf+edx*4]``````
And not everyone programs for multiple platforms. And maybe someone can use this code for whatever reason. Or maybe no one will care to uses it but might be interested for historic reasons. I don't understand why anyone would put this down just because it was posted? Strange, very strange!
I hate if statements. Pawns demand if statements. Therefore I hate pawns.

Ras
Posts: 1171
Joined: Tue Aug 30, 2016 6:19 pm
Contact:

### Re: Back To The Beginning

Michael Sherwin wrote:
Sat Sep 07, 2019 9:31 am
My new chess engine has no evaluation function and therefore move generation speed becomes more important.
Not having an evaluation function? So it just generates moves, searches through trees, and then doesn't evaluate them? Did you actually benchmark how much time the move generation takes?
I don't understand why anyone would put this down just because it was posted?
This is a forum. It is meant for discussion. If you don't want discussion, use a blog without comment function instead.
Rasmus Althoff
https://www.ct800.net

fabianVDW
Posts: 78
Joined: Fri Mar 15, 2019 7:46 pm
Location: Germany
Full name: Fabian von der Warth

### Re: Back To The Beginning

AFAIK, movegeneration speed is a good chunk of the runtime in modern engines. Atleast in FabChess, it is about 25-30% of the time used, being on par with the evaluation function (IIRC).
Author of FabChess: https://github.com/fabianvdW/FabChess
A UCI compliant chess engine written in Rust.
FabChessWiki: https://github.com/fabianvdW/FabChess/wiki
fabianvonderwarth@gmail.com

Michael Sherwin
Posts: 3082
Joined: Fri May 26, 2006 1:00 am
Location: WY, USA
Full name: Michael Sherwin

### Re: Back To The Beginning

Ras wrote:
Sat Sep 07, 2019 10:18 am
Michael Sherwin wrote:
Sat Sep 07, 2019 9:31 am
My new chess engine has no evaluation function and therefore move generation speed becomes more important.
Not having an evaluation function? So it just generates moves, searches through trees, and then doesn't evaluate them? Did you actually benchmark how much time the move generation takes?
I don't understand why anyone would put this down just because it was posted?
This is a forum. It is meant for discussion. If you don't want discussion, use a blog without comment function instead.
On an i7- 3930k overclocked to 4.4 GHz using only one thread in the opening position the assembler version generates 75 million positions per second. That includes all moves made and unmade and no hash counting. On an i9-9900k overclocked to 5GHz it would do double that.

My new engine has no evaluation function because it is a statistical driven engine. It is basically a material only searcher that accumulates statistics as it searches creating a winning percentage chance for each root move.
I hate if statements. Pawns demand if statements. Therefore I hate pawns.

Joost Buijs
Posts: 1004
Joined: Thu Jul 16, 2009 8:47 am
Location: Almere, The Netherlands

### Re: Back To The Beginning

fabianVDW wrote:
Sat Sep 07, 2019 10:21 am
AFAIK, movegeneration speed is a good chunk of the runtime in modern engines. Atleast in FabChess, it is about 25-30% of the time used, being on par with the evaluation function (IIRC).
25% to 30% of the time used by the move generation is quite a lot. Maybe it depends upon staged move generation or not and whether you execute the hash move with or without move generation, when I profile my engine move generation takes considerably less than 10% of the total time used.

mar
Posts: 2050
Joined: Fri Nov 26, 2010 1:00 pm
Location: Czech Republic
Full name: Martin Sedlak

### Re: Back To The Beginning

Ras wrote:
Sat Sep 07, 2019 10:18 am
Not having an evaluation function? So it just generates moves, searches through trees, and then doesn't evaluate them? Did you actually benchmark how much time the move generation takes?
Move generator really is essential part of any chess engine. Most of us had to actually debug it
(you don't need evaluation for perft to make sure your move generator works)

You don't start writing a chess program with evaluation function, you start with material only.
Only when everything is working as it should it makes sense to continue working on evaluation.
Martin Sedlak

zullil
Posts: 5824
Joined: Mon Jan 08, 2007 11:31 pm
Location: PA USA
Full name: Louis Zulli

### Re: Back To The Beginning

Michael Sherwin wrote:
Sat Sep 07, 2019 10:42 am

On an i7- 3930k overclocked to 4.4 GHz using only one thread in the opening position the assembler version generates 75 million positions per second. That includes all moves made and unmade and no hash counting. On an i9-9900k overclocked to 5GHz it would do double that.
Why would going from 4.4 GHz to 5.0 Ghz double the speed of the move generator? What else does the i9 provide?

Joost Buijs
Posts: 1004
Joined: Thu Jul 16, 2009 8:47 am
Location: Almere, The Netherlands

### Re: Back To The Beginning

I determined the time for a complete move-generation over a large number of random positions, for my engine it is on average 274 cycles on a Broadwell processor, at 4 GHz. this amounts to 68.5 ns. My evaluation function runs at approx. 680 cycles or ~170 ns. In practice though I use staged move generation, when the hash-move generates a beta cut-off (which happens often) I don't have to call the move generator at all, this is the reason that the efficiency of move generation has hardly any influence on the engine overall.