Tricky Optimization Question
Moderator: Ras
-
- Posts: 915
- Joined: Sun Dec 27, 2020 2:40 am
- Location: Bremen, Germany
- Full name: Thomas Jahn
Re: Tricky Optimization Question
I find your idea of going without movelists very interesting. I can also imagine how this will give you great perft speeds. But how are you going to approach the task of playing the moves in a sorted order? Like MVV-LVA or history sorting?
-
- Posts: 965
- Joined: Fri Aug 21, 2020 1:25 am
- Location: Planet Earth, Sol system
- Full name: Michael J Sherwin
Re: Tricky Optimization Question
I'll just show some source code from RomiChess.
Code: Select all
void AddMoves(trees *t, s32 depth) {
s32 id;
s32 cid;
u64 indexs;
u64 toSqs;
indexs = wtm ? wIndexs & ~WLEGALbit : bIndexs & ~BLEGALbit;
while(indexs) {
id = FirstBit(indexs);
indexs &= clrBit[id];
toSqs = h->moves[id];
while(toSqs) {
t->fs = piece[id].sq;
t->ts = FirstBit(toSqs);
t->typ = piece[id].typ;
toSqs &= clrBit[t->ts];
if(depth > 6) {
MakeMove((moves *)t);
t->score = Eval();
TakeBack();
if(((h-1)->attacked & setBit[t->ts]) && ((h-1)->ts != t->ts))
t->score -= (piece[id].val / 2);
} else {
t->score = piece[id].tbl[t->ts];
t->score -= piece[id].tbl[t->fs];
cid = board[t->ts];
t->score += piece[cid].val;
if(((h-1)->attacked & setBit[t->ts]) && ((h-1)->ts != t->ts))
t->score -= (piece[id].val / 2);
}
t++;
}
}
(h+1)->t = t;
}
s32 NextMove(s32 alpha, s32 beta, s32 depth, s32 extendBy)
{
s32 id;
s32 cid;
s32 fs;
s32 ts;
u64 pieces;
u64 enemy;
u64 attacks;
moves m;
trees *t;
trees *node;
switch(h->phase) {
case HASHMOVE1:
h->phase = PROMOTE;
if(h->p && h->p->typ != NONE) {
m.m = h->p->m;
switch(h->p->typ) {
case WCASKS:
id = FirstBit(WCASKbit);
if(h->moves[id] & WCASKbit) {
h->moves[id] = 0;
h->node->m = h->p->m;
(h+1)->t = h->node + 1;
return TRUE;
}
break;
case WCASQS:
id = FirstBit(WCASQbit);
if(h->moves[id] & WCASQbit) {
h->moves[id] = 0;
h->node->m = h->p->m;
(h+1)->t = h->node + 1;
return TRUE;
}
break;
case WPRANK7:
case BPRANK2:
goto promote;
case BCASKS:
id = FirstBit(BCASKbit);
if(h->moves[id] & BCASKbit) {
h->moves[id] = 0;
h->node->m = h->p->m;
(h+1)->t = h->node + 1;
return TRUE;
}
break;
case BCASQS:
id = FirstBit(BCASQbit);
if(h->moves[id] & BCASQbit) {
h->moves[id] = 0;
h->node->m = h->p->m;
(h+1)->t = h->node + 1;
return TRUE;
}
break;
default:
if(wtm ? wPieces & setBit[m.fs] : bPieces & setBit[m.fs]) {
id = board[m.fs];
if(h->moves[id] & setBit[m.ts]) {
h->moves[id] ^= setBit[m.ts];
h->node->m = m.m;
h->node->typ = piece[id].typ;
(h+1)->t = h->node + 1;
return TRUE;
}
}
}
}
case PROMOTE:
promote:
h->phase = NEXTPROMO;
t = h->node;
if(wtm) {
pieces = wPawns & (A7bit|B7bit|C7bit|D7bit|E7bit|F7bit|G7bit|H7bit);
} else {
pieces = bPawns & (A2bit|B2bit|C2bit|D2bit|E2bit|F2bit|G2bit|H2bit);
}
while(pieces) {
fs = FirstBit(pieces);
pieces &= clrBit[fs];
id = board[fs];
attacks = h->moves[id];
h->moves[id] = 0;
while(attacks) {
ts = FirstBit(attacks);
attacks &= clrBit[ts];
t->fs = fs;
t->ts = ts;
t->typ = piece[id].typ;
t->flag = QUEEN;
t++;
t->fs = fs;
t->ts = ts;
t->typ = piece[id].typ;
t->flag = KNIGHT;
t++;
}
}
if(t > h->node) {
(h+1)->t = t;
} else goto wincapts;
case NEXTPROMO:
if(h->node + 1 == (h+1)->t) {
h->phase = WINCAPTS;
return TRUE;
}
return TRUE;
case WINCAPTS:
wincapts:
h->phase = NEXTCAPT;
t = h->node;
if(wtm) {
pieces = wPieces;
enemy = bPieces;
} else {
pieces = bPieces;
enemy = wPieces;
}
while(pieces) {
fs = FirstBit(pieces);
pieces &= clrBit[fs];
id = board[fs];
attacks = h->moves[id] & enemy;
while(attacks) {
ts = FirstBit(attacks);
attacks &= clrBit[ts];
cid = board[ts];
t->score = piece[cid].val;
if((h-1)->attacked & setBit[ts] && (h-1)->ts != ts)
t->score -= piece[id].val;
if(t->score >= 0) {
t->fs = fs;
t->ts = ts;
t->typ = piece[id].typ;
t++;
h->moves[id] &= clrBit[ts];
}
}
}
if(t > h->node) {
(h+1)->t = t;
} else goto plykill1;
case NEXTCAPT:
if(h->node + 1 == (h+1)->t) {
h->phase = PLYKILL1;
return TRUE;
}
Sort(h->node, (h+1)->t);
return TRUE;
case PLYKILL1:
plykill1:
h->phase = PLYKILL2;
m.m = h->kill1.m;
if(wtm ? wPieces & setBit[m.fs] : bPieces & setBit[m.fs]) {
id = board[m.fs];
if(piece[id].typ == m.typ) {
if(h->moves[id] & setBit[m.ts]) {
h->moves[id] ^= setBit[m.ts];
h->node->m = m.m;
(h+1)->t = h->node + 1;
return TRUE;
}
}
}
case PLYKILL2:
h->phase = MOVEKILL;
m.m = h->kill2.m;
if(wtm ? wPieces & setBit[m.fs] : bPieces & setBit[m.fs]) {
id = board[m.fs];
if(piece[id].typ == m.typ) {
if(h->moves[id] & setBit[m.ts]) {
h->moves[id] ^= setBit[m.ts];
h->node->m = m.m;
(h+1)->t = h->node + 1;
return TRUE;
}
}
}
case MOVEKILL:
h->phase = ADDMOVES;
m.m = moveKill[piece[board[(h-1)->ts]].fig][(h-1)->fs][(h-1)->ts].m;
if(wtm ? wPieces & setBit[m.fs] : bPieces & setBit[m.fs]) {
id = board[m.fs];
if(piece[id].typ == m.typ) {
if(h->moves[id] & setBit[m.ts]) {
h->moves[id] ^= setBit[m.ts];
h->node->m = m.m;
(h+1)->t = h->node + 1;
return TRUE;
}
}
}
case ADDMOVES:
h->phase = NEXTMOVE;
AddMoves(h->node, depth);
if(h->node == (h+1)->t) return FALSE;
if(depth > 3) {
for(node = h->node; node < (h+1)->t; node++) {
Sort(node, (h+1)->t);
MakeMove((moves *)&node->m);
if(GetReps())
node->score = 0;
else {
inShort++;
node->score = -Search(-beta - 180, -alpha + 20, depth > 7 ? 3 : 1, extendBy);
inShort--;
}
ClrReps();
TakeBack();
}
}
case NEXTMOVE:
if(h->node + 1 == (h+1)->t) {
h->phase = NOMOVES;
return TRUE;
}
Sort(h->node, (h+1)->t);
}
return TRUE;
}

-
- Posts: 1062
- Joined: Tue Apr 28, 2020 10:03 pm
- Full name: Daniel Infuehr
Re: Tricky Optimization Question
But the most important code is behind these:
Code: Select all
Sort(node, (h+1)->t);
MakeMove((moves *)&node->m);
Looks to me like building a movelist with insertion sort.. but maybe I didnt read that right.
Worlds-fastest-Bitboard-Chess-Movegenerator
Daniel Inführ - Software Developer
Daniel Inführ - Software Developer
-
- Posts: 965
- Joined: Fri Aug 21, 2020 1:25 am
- Location: Planet Earth, Sol system
- Full name: Michael J Sherwin
Re: Tricky Optimization Question
That is for the remaining moves after HASHMOVE, PROMOTIONS, WINCAPTS, KILLMOVE1, KILLMOVE2, MOVEKILL or whatever I called them. If I just played the remaining moves without ordering them then even that is unnecessary. I spin the remaining moves into a move list so I can do a shallow search with a widened window to get each move a relative score before playing them.dangi12012 wrote: ↑Mon Mar 28, 2022 2:59 pmBut the most important code is behind these:What are those?Code: Select all
Sort(node, (h+1)->t); MakeMove((moves *)&node->m);
Looks to me like building a movelist with insertion sort.. but maybe I didnt read that right.
Code: Select all
case ADDMOVES:
h->phase = NEXTMOVE;
AddMoves(h->node, depth);
if(h->node == (h+1)->t) return FALSE;
if(depth > 3) {
for(node = h->node; node < (h+1)->t; node++) {
Sort(node, (h+1)->t);
MakeMove((moves *)&node->m);
if(GetReps())
node->score = 0;
else {
inShort++;
node->score = -Search(-beta - 180, -alpha + 20, depth > 7 ? 3 : 1, extendBy); // <-here
inShort--;
}
ClrReps();
TakeBack();
}
}
-
- Posts: 965
- Joined: Fri Aug 21, 2020 1:25 am
- Location: Planet Earth, Sol system
- Full name: Michael J Sherwin
Re: Tricky Optimization Question
I tested it in the debugger for both white and black pawns for all combinations of blockers and it works perfectly! Apparently it is a tricky optimization because no one could offer any solution meeting the criteria. I'm glad it was just not me this time.
I don't see how this can be made any faster for a single pawn on fs.


Code: Select all
case WP2:
genbb[ply][fs] = ((0x0000000000000100ull << fs) & empty);
genbb[ply][fs] |= ((genbb[ply][fs] << 8) & empty);
at = wPawnCapts[fs];
genbb[ply][fs] |= (at & enemy);
atkbb[ply] |= at;
break;
Code: Select all
case BP7:
genbb[ply][fs] = (1ull << (fs - 8)) & empty;
genbb[ply][fs] |= ((genbb[ply][fs] >> 8) & empty);
at = bPawnCapts[fs];
genbb[ply][fs] |= at & enemy;
atkbb[ply] |= at;
break;
-
- Posts: 1062
- Joined: Tue Apr 28, 2020 10:03 pm
- Full name: Daniel Infuehr
Re: Tricky Optimization Question
So to summarize and please correct me if I am wrong:Mike Sherwin wrote: ↑Mon Mar 28, 2022 5:01 pm That is for the remaining moves after HASHMOVE, PROMOTIONS, WINCAPTS, KILLMOVE1, KILLMOVE2, MOVEKILL or whatever I called them. If I just played the remaining moves without ordering them then even that is unnecessary. I spin the remaining moves into a move list so I can do a shallow search with a widened window to get each move a relative score before playing them.
You dont need sorting - because your movegenerator does spit out the moves in the correct way (taking moves first etc) - so taking moves are expanded right then and there:
Code: Select all
MakeMove((moves *)t);
t->score = Eval();
TakeBack();
Worlds-fastest-Bitboard-Chess-Movegenerator
Daniel Inführ - Software Developer
Daniel Inführ - Software Developer
-
- Posts: 965
- Joined: Fri Aug 21, 2020 1:25 am
- Location: Planet Earth, Sol system
- Full name: Michael J Sherwin
Re: Tricky Optimization Question
The "weaker" (remaining) moves are put into a list so they can be searched at a shallow depth and ordered appropriately to save time. But also the clearly losing moves are more consistently affected by LMR. However, most importantly due to all the times that phase is not reached due to a beta cut it saves having to spin the remaining moves into a list at all.dangi12012 wrote: ↑Mon Mar 28, 2022 11:19 pmSo to summarize and please correct me if I am wrong:Mike Sherwin wrote: ↑Mon Mar 28, 2022 5:01 pm That is for the remaining moves after HASHMOVE, PROMOTIONS, WINCAPTS, KILLMOVE1, KILLMOVE2, MOVEKILL or whatever I called them. If I just played the remaining moves without ordering them then even that is unnecessary. I spin the remaining moves into a move list so I can do a shallow search with a widened window to get each move a relative score before playing them.
You dont need sorting - because your movegenerator does spit out the moves in the correct way (taking moves first etc) - so taking moves are expanded right then and there:
But weaker moves are put into a list and expanded last.Code: Select all
MakeMove((moves *)t); t->score = Eval(); TakeBack();
Edit: Okay I see what the mini code segment is from. If depth > 6 then making a move, getting a score and taking the move back is very cheap. And then the score is modified in the next few lines. If depth is 6 or less a less time consuming method is done to evaluate and order the moves. I wrote this code in 2005 and I don't even remember writing it or what it does or how it does it. I have to take my precious little time I have to study it and learn it all over again.
-
- Posts: 1062
- Joined: Tue Apr 28, 2020 10:03 pm
- Full name: Daniel Infuehr
Re: Tricky Optimization Question
If you do this again I recommend going for C++ with constexpr and templates. If you do this then you can have recusive depth but all the "if depth < 6" statements compile into nothingness except for the leaves - which I have proven in another thread improves performance many fold.Mike Sherwin wrote: ↑Tue Mar 29, 2022 2:10 am Edit: Okay I see what the mini code segment is from. If depth > 6 then making a move, getting a score and taking the move back is very cheap. And then the score is modified in the next few lines. If depth is 6 or less a less time consuming method is done to evaluate and order the moves. I wrote this code in 2005 and I don't even remember writing it or what it does or how it does it. I have to take my precious little time I have to study it and learn it all over again.
Time to do my gigantua project again (but on the gpu). There are dozens of ideas that swarm around here and in my head that remain to be tested and implemented.
Worlds-fastest-Bitboard-Chess-Movegenerator
Daniel Inführ - Software Developer
Daniel Inführ - Software Developer