Tricky Optimization Question

lithander · Post by **lithander** » Mon Mar 28, 2022 10:28 am

I find your idea of going without movelists very interesting. I can also imagine how this will give you great perft speeds. But how are you going to approach the task of playing the moves in a sorted order? Like MVV-LVA or history sorting?

Mike Sherwin · Post by **Mike Sherwin** » Mon Mar 28, 2022 11:12 am

lithander wrote: ↑Mon Mar 28, 2022 10:28 am I find your idea of going without movelists very interesting. I can also imagine how this will give you great perft speeds. But how are you going to approach the task of playing the moves in a sorted order? Like MVV-LVA or history sorting?

I'll just show some source code from RomiChess.

Code: Select all

void AddMoves(trees *t, s32 depth) {
  s32 id;
  s32 cid;
  u64 indexs;
  u64 toSqs;
  
  indexs = wtm ? wIndexs & ~WLEGALbit : bIndexs & ~BLEGALbit;
  while(indexs) {
    id = FirstBit(indexs);
    indexs &= clrBit[id];
    toSqs = h->moves[id];
    while(toSqs) {
      t->fs = piece[id].sq;
      t->ts = FirstBit(toSqs);
      t->typ = piece[id].typ;
      toSqs &= clrBit[t->ts];
      if(depth > 6) {
        MakeMove((moves *)t);
        t->score = Eval();
        TakeBack();
        if(((h-1)->attacked & setBit[t->ts]) && ((h-1)->ts != t->ts))
          t->score -= (piece[id].val / 2);
      } else {
        t->score = piece[id].tbl[t->ts];
        t->score -= piece[id].tbl[t->fs];
        cid = board[t->ts];
        t->score += piece[cid].val;
        if(((h-1)->attacked & setBit[t->ts]) && ((h-1)->ts != t->ts))
          t->score -= (piece[id].val / 2);
      }
      t++;
    }
  }
  (h+1)->t = t;
}

s32 NextMove(s32 alpha, s32 beta, s32 depth, s32 extendBy)
{
  s32 id;
  s32 cid;
  s32 fs;
  s32 ts;
  u64 pieces;
  u64 enemy;
  u64 attacks;
  moves m;
  trees *t;
  trees *node;

  switch(h->phase) {
    case HASHMOVE1:
      h->phase = PROMOTE;
      if(h->p && h->p->typ != NONE) {
        m.m = h->p->m;
        switch(h->p->typ) {
          case WCASKS:
            id = FirstBit(WCASKbit);
            if(h->moves[id] & WCASKbit) {
              h->moves[id] = 0;
              h->node->m = h->p->m;
              (h+1)->t = h->node + 1;
              return TRUE;
            } 
            break;
          case WCASQS:
            id = FirstBit(WCASQbit);
            if(h->moves[id] & WCASQbit) {
              h->moves[id] = 0;
              h->node->m = h->p->m;
              (h+1)->t = h->node + 1;
              return TRUE;
            } 
            break;
          case WPRANK7:
          case BPRANK2:
            goto promote;
          case BCASKS:
            id = FirstBit(BCASKbit);
            if(h->moves[id] & BCASKbit) {
              h->moves[id] = 0;
              h->node->m = h->p->m;
              (h+1)->t = h->node + 1;
              return TRUE;
            } 
            break;
          case BCASQS:
            id = FirstBit(BCASQbit);
            if(h->moves[id] & BCASQbit) {
              h->moves[id] = 0;
              h->node->m = h->p->m;
              (h+1)->t = h->node + 1;
              return TRUE;
            } 
            break;
          default:
            if(wtm ? wPieces & setBit[m.fs] : bPieces & setBit[m.fs]) {
              id = board[m.fs];
              if(h->moves[id] & setBit[m.ts]) {
                h->moves[id] ^= setBit[m.ts];
                h->node->m = m.m;
                h->node->typ = piece[id].typ;
                (h+1)->t = h->node + 1;
                return TRUE;
              }  
            }
        }
      }
    case PROMOTE:
promote:
      h->phase = NEXTPROMO;
      t = h->node;
      if(wtm) {
        pieces = wPawns & (A7bit|B7bit|C7bit|D7bit|E7bit|F7bit|G7bit|H7bit);
      } else {
        pieces = bPawns & (A2bit|B2bit|C2bit|D2bit|E2bit|F2bit|G2bit|H2bit);
      }
      while(pieces) {
        fs = FirstBit(pieces);
        pieces &= clrBit[fs];
        id = board[fs];
        attacks = h->moves[id];
        h->moves[id] = 0;
        while(attacks) {
          ts = FirstBit(attacks);
          attacks &= clrBit[ts];
          t->fs = fs;
          t->ts = ts;
          t->typ = piece[id].typ;
          t->flag = QUEEN;
          t++;
          t->fs = fs;
          t->ts = ts;
          t->typ = piece[id].typ;
          t->flag = KNIGHT;
          t++;
        }
      }
      if(t > h->node) {
        (h+1)->t = t;
      } else goto wincapts;
    case NEXTPROMO:
      if(h->node + 1 == (h+1)->t) {
          h->phase = WINCAPTS;
        return TRUE;
      }
      return TRUE;
    case WINCAPTS:
wincapts:
      h->phase = NEXTCAPT;
      t = h->node;
      if(wtm) {
        pieces = wPieces;
        enemy = bPieces;
      } else {
        pieces = bPieces;
        enemy = wPieces;
      }
      while(pieces) {
        fs = FirstBit(pieces);
        pieces &= clrBit[fs];
        id = board[fs];
        attacks = h->moves[id] & enemy;
        while(attacks) {
          ts = FirstBit(attacks);
          attacks &= clrBit[ts];
          cid = board[ts];
          t->score = piece[cid].val;
          if((h-1)->attacked & setBit[ts] && (h-1)->ts != ts)
            t->score -= piece[id].val;
          if(t->score >= 0) {
            t->fs = fs;
            t->ts = ts;
            t->typ = piece[id].typ;
            t++;
            h->moves[id] &= clrBit[ts];
          }
        }
      }
      if(t > h->node) {
        (h+1)->t = t;
      } else goto plykill1;
    case NEXTCAPT:
      if(h->node + 1 == (h+1)->t) {
          h->phase = PLYKILL1;
        return TRUE;
      }
      Sort(h->node, (h+1)->t);
      return TRUE;
    case PLYKILL1:
plykill1:
      h->phase = PLYKILL2;
      m.m = h->kill1.m;
      if(wtm ? wPieces & setBit[m.fs] : bPieces & setBit[m.fs]) {
        id = board[m.fs];
        if(piece[id].typ == m.typ) {
          if(h->moves[id] & setBit[m.ts]) {
            h->moves[id] ^= setBit[m.ts];
            h->node->m = m.m;
            (h+1)->t = h->node + 1;
            return TRUE;
          }   
        }  
      }
    case PLYKILL2:
      h->phase = MOVEKILL;
      m.m = h->kill2.m;
      if(wtm ? wPieces & setBit[m.fs] : bPieces & setBit[m.fs]) {
        id = board[m.fs];
        if(piece[id].typ == m.typ) {
          if(h->moves[id] & setBit[m.ts]) {
            h->moves[id] ^= setBit[m.ts];
            h->node->m = m.m;
            (h+1)->t = h->node + 1;
            return TRUE;
          }   
        }  
      }
    case MOVEKILL:
      h->phase = ADDMOVES;
      m.m = moveKill[piece[board[(h-1)->ts]].fig][(h-1)->fs][(h-1)->ts].m;
      if(wtm ? wPieces & setBit[m.fs] : bPieces & setBit[m.fs]) {
        id = board[m.fs];
        if(piece[id].typ == m.typ) {
          if(h->moves[id] & setBit[m.ts]) {
            h->moves[id] ^= setBit[m.ts];
            h->node->m = m.m;
            (h+1)->t = h->node + 1;
            return TRUE;
          }   
        }  
      }
    case ADDMOVES:
      h->phase = NEXTMOVE;
      AddMoves(h->node, depth);
      if(h->node == (h+1)->t) return FALSE;
      if(depth > 3) {
        for(node = h->node; node < (h+1)->t; node++) {
          Sort(node, (h+1)->t);
          MakeMove((moves *)&node->m);
          if(GetReps())
            node->score = 0;
          else {
            inShort++;
            node->score = -Search(-beta - 180, -alpha + 20, depth > 7 ? 3 : 1, extendBy);
            inShort--;
          }
          ClrReps();
          TakeBack();
        }
      }        
    case NEXTMOVE:
      if(h->node + 1 == (h+1)->t) {
        h->phase = NOMOVES;
        return TRUE;
      }
      Sort(h->node, (h+1)->t);
  }
  return TRUE;
}

It is a bit crude because I was just learning how to program, how to program in C and how to program chess all at the same time. But hey, it worked!

I should be able to improve it this time around. Basically if one has bitboards of all the piece types then it is rather trivial to interrogate the genbb[ply][fs] array for anything that is needed and to remove the corresponding bit. And the bit being there is also confirmation that the move exist.

dangi12012 · Post by **dangi12012** » Mon Mar 28, 2022 2:59 pm

Mike Sherwin wrote: ↑Mon Mar 28, 2022 11:12 am I'll just show some source code from RomiChess.

But the most important code is behind these:

Code: Select all

Sort(node, (h+1)->t);
MakeMove((moves *)&node->m);

What are those?
Looks to me like building a movelist with insertion sort.. but maybe I didnt read that right.

Mike Sherwin · Post by **Mike Sherwin** » Mon Mar 28, 2022 5:01 pm

dangi12012 wrote: ↑Mon Mar 28, 2022 2:59 pm
Mike Sherwin wrote: ↑Mon Mar 28, 2022 11:12 am I'll just show some source code from RomiChess.
But the most important code is behind these:
Code: Select all
Sort(node, (h+1)->t);
MakeMove((moves *)&node->m);
What are those?
Looks to me like building a movelist with insertion sort.. but maybe I didnt read that right.

That is for the remaining moves after HASHMOVE, PROMOTIONS, WINCAPTS, KILLMOVE1, KILLMOVE2, MOVEKILL or whatever I called them. If I just played the remaining moves without ordering them then even that is unnecessary. I spin the remaining moves into a move list so I can do a shallow search with a widened window to get each move a relative score before playing them.

Code: Select all

    case ADDMOVES:
      h->phase = NEXTMOVE;
      AddMoves(h->node, depth);
      if(h->node == (h+1)->t) return FALSE;
      if(depth > 3) {
        for(node = h->node; node < (h+1)->t; node++) {
          Sort(node, (h+1)->t);
          MakeMove((moves *)&node->m);
          if(GetReps())
            node->score = 0;
          else {
            inShort++;
            node->score = -Search(-beta - 180, -alpha + 20, depth > 7 ? 3 : 1, extendBy); // <-here
            inShort--;
          }
          ClrReps();
          TakeBack();
        }
      }

It gives about another ply and a half of search depth do to the better move ordering and helps with LMR as well.

Mike Sherwin · Post by **Mike Sherwin** » Mon Mar 28, 2022 9:36 pm

I tested it in the debugger for both white and black pawns for all combinations of blockers and it works perfectly! Apparently it is a tricky optimization because no one could offer any solution meeting the criteria. I'm glad it was just not me this time.

Code: Select all

	case WP2:
	  genbb[ply][fs] = ((0x0000000000000100ull << fs) & empty);
	  genbb[ply][fs] |= ((genbb[ply][fs] << 8) & empty);
	  at = wPawnCapts[fs];
	  genbb[ply][fs] |= (at & enemy);
	  atkbb[ply] |= at;
	  break;

Code: Select all

	case BP7:
	  genbb[ply][fs] = (1ull << (fs - 8)) & empty;
	  genbb[ply][fs] |= ((genbb[ply][fs] >> 8) & empty);
	  at = bPawnCapts[fs];
	  genbb[ply][fs] |= at & enemy;
	  atkbb[ply] |= at;
	  break;

I don't see how this can be made any faster for a single pawn on fs.

dangi12012 · Post by **dangi12012** » Mon Mar 28, 2022 11:19 pm

Mike Sherwin wrote: ↑Mon Mar 28, 2022 5:01 pm That is for the remaining moves after HASHMOVE, PROMOTIONS, WINCAPTS, KILLMOVE1, KILLMOVE2, MOVEKILL or whatever I called them. If I just played the remaining moves without ordering them then even that is unnecessary. I spin the remaining moves into a move list so I can do a shallow search with a widened window to get each move a relative score before playing them.

So to summarize and please correct me if I am wrong:
You dont need sorting - because your movegenerator does spit out the moves in the correct way (taking moves first etc) - so taking moves are expanded right then and there:

Code: Select all

MakeMove((moves *)t);
t->score = Eval();
TakeBack();

But weaker moves are put into a list and expanded last.

Mike Sherwin · Post by **Mike Sherwin** » Tue Mar 29, 2022 2:10 am

dangi12012 wrote: ↑Mon Mar 28, 2022 11:19 pm
Mike Sherwin wrote: ↑Mon Mar 28, 2022 5:01 pm That is for the remaining moves after HASHMOVE, PROMOTIONS, WINCAPTS, KILLMOVE1, KILLMOVE2, MOVEKILL or whatever I called them. If I just played the remaining moves without ordering them then even that is unnecessary. I spin the remaining moves into a move list so I can do a shallow search with a widened window to get each move a relative score before playing them.
So to summarize and please correct me if I am wrong:
You dont need sorting - because your movegenerator does spit out the moves in the correct way (taking moves first etc) - so taking moves are expanded right then and there:
Code: Select all
MakeMove((moves *)t);
t->score = Eval();
TakeBack();
But weaker moves are put into a list and expanded last.

The "weaker" (remaining) moves are put into a list so they can be searched at a shallow depth and ordered appropriately to save time. But also the clearly losing moves are more consistently affected by LMR. However, most importantly due to all the times that phase is not reached due to a beta cut it saves having to spin the remaining moves into a list at all.

Edit: Okay I see what the mini code segment is from. If depth > 6 then making a move, getting a score and taking the move back is very cheap. And then the score is modified in the next few lines. If depth is 6 or less a less time consuming method is done to evaluate and order the moves. I wrote this code in 2005 and I don't even remember writing it or what it does or how it does it. I have to take my precious little time I have to study it and learn it all over again.

dangi12012 · Post by **dangi12012** » Tue Mar 29, 2022 12:51 pm

Mike Sherwin wrote: ↑Tue Mar 29, 2022 2:10 am Edit: Okay I see what the mini code segment is from. If depth > 6 then making a move, getting a score and taking the move back is very cheap. And then the score is modified in the next few lines. If depth is 6 or less a less time consuming method is done to evaluate and order the moves. I wrote this code in 2005 and I don't even remember writing it or what it does or how it does it. I have to take my precious little time I have to study it and learn it all over again.

If you do this again I recommend going for C++ with constexpr and templates. If you do this then you can have recusive depth but all the "if depth < 6" statements compile into nothingness except for the leaves - which I have proven in another thread improves performance many fold.

Time to do my gigantua project again (but on the gpu). There are dozens of ideas that swarm around here and in my head that remain to be tested and implemented.

Tricky Optimization Question

Re: Tricky Optimization Question

Re: Tricky Optimization Question

Re: Tricky Optimization Question

Re: Tricky Optimization Question

Re: Tricky Optimization Question

Re: Tricky Optimization Question

Re: Tricky Optimization Question

Re: Tricky Optimization Question