I’m looking to do some try on B* Search. As I'm entering terra incognita some comment/advise are welcome.
Right now I have some code to mimic the point 3.6 A Search Example from the paper: “B* Probability Based Search” Hans Berliner, Chris McConnell 22 June 1995.
If somebody else is interested in, I publish this code “as is”. (Quick & dirty implementation) I think the tree code need complete rewrite...
If there is some interest in publish it as a companion code for the paper, say in CPW (Gerd?) feel free to point me the improvement/ correction /comment needed. I’ll rewrite it to get a smarter code for such an “official release”.
Code: Select all
// algebar.cpp :
// Companion code for "B* probability based Search" -> 3.6 A Search Example
// Hans Berliner
// Chris McDonnell
// 22 June 1995
//
#include "stdio.h"
#include "assert.h"
#include "memory.h"
// 0 optval 1 realval
int Datos[22][2] = {
{0,0},{100,20},{40,18},{25,10},{0,20},{40,60},
{-80,80},{76,34},{36,12},{32,16},{0,80},{0,60},
{0,50},{10,20},{15,30},{20,34},{0,55},{0,32},{0,20},
{4,82},{11,70},{16,40}
};
#define INFINITY 99999
#define UNDEFINED 999
#define MAXNODES 0x100
#define MAXMOVES 0x100
#define PLAYER 0
#define OPPONENT 1
#define ROOTNODE 0
double MinAct = 0.15;
int TargetVal;
int BestMoveAtRoot;
int RealValBest;
int OptVal2ndBest;
typedef struct _nodes {
int depth; // depth of this node related to root 0 = root ...
int OptVal; //optimistic value of the node for the side-on-move
int RealVal; //best estimate of the true value of the node
int PessVal; //optimistic value for the side-not-on-move, backed up from its subtree
double OptPrb; //probability that a certain target value can be achieved
// int toMoves; //
int ParentNode;
int toNext;
int MoveCount;
} NODE;
NODE nodes[MAXNODES];
int next[MAXMOVES]; // parent child1,child2..parent, child1,child2...
int FreeNode = 1;
int FreeMove = 1;
int TimeLimit = 1000; // Time limit if needed
// rutinas que acceden a los datos
int GetOptVal(int i)
{
assert(i > 0);
assert(i < 22);
// DoNullMove();
// PVS(DEPTHEVAL,...);
// UnDoNullMove();
return Datos[i][0];
}
int GetRealVal(int i)
{
assert(i > 0);
assert(i < 22);
// PVS(DEPTHEVAL,...);
return Datos[i][1];
}
void Dumptree()
{
int i;
printf("Node Depth OptVal RealVal PessVal TargetVal OptPrb\n");
printf("==================================================\n");
for(i =1;i < FreeNode;i++)
{
printf("%c %1d/%c %3d %3d %3d %3d %.3lf\n",
'A'+i-1,nodes[i].depth,
nodes[i].ParentNode == 0 ? ' ': 'A'+ nodes[i].ParentNode-1,
nodes[i].OptVal,nodes[i].RealVal,nodes[i].PessVal,
TargetVal, nodes[i].OptPrb
);
}
printf("==================================================\n");
getchar();
}
// best for opponent -> minimize
int GetRealValBest(int n)
{
int MaxValue,NumMoves,i,StartPos,pos,B;
NumMoves = nodes[n].MoveCount;
B = StartPos = nodes[n].toNext;
MaxValue = INFINITY;
for(i = 0; i < NumMoves;i++)
{
pos = next[StartPos+i];
if(nodes[pos].RealVal < MaxValue)
{
MaxValue = nodes[pos].RealVal;
B = StartPos+i;
}
}
return B;
}
// Best for player -> maximize
int GetRealValBestVerify(int n)
{
int MaxValue,NumMoves,i,StartPos,pos,B;
NumMoves = nodes[n].MoveCount;
B = StartPos = nodes[n].toNext;
MaxValue = -INFINITY;
for(i = 0; i < NumMoves;i++)
{
pos = next[StartPos+i];
if(nodes[pos].RealVal > MaxValue)
{
MaxValue = nodes[pos].RealVal;
B = StartPos+i;
}
}
return B;
}
// max.
int GetRealValBestAtRoot()
{
int MaxValue = -INFINITY;
int NumMoves,i,StartPos,pos;
StartPos = nodes[ROOTNODE].toNext;
NumMoves = nodes[ROOTNODE].MoveCount;
for(i = 0; i < NumMoves;i++)
{
pos = next[StartPos+i];
if(nodes[pos].RealVal > MaxValue)
{
MaxValue = nodes[pos].RealVal;
BestMoveAtRoot = StartPos+i;
}
}
RealValBest = MaxValue;
return MaxValue;
}
// Best OptVal but not BestAtRoot
int OptValAnyOtherMoveAtRoot()
{
int MaxValue = -INFINITY;
int NumMoves,i,StartPos,pos;
NumMoves = nodes[ROOTNODE].MoveCount;
StartPos = nodes[ROOTNODE].toNext;
for(i = 0; i < NumMoves;i++)
{
pos = next[StartPos+i];
if(nodes[pos].OptVal > MaxValue && StartPos+i != BestMoveAtRoot)
{
MaxValue = nodes[pos].OptVal;
}
}
OptVal2ndBest = MaxValue;
return MaxValue;
}
void RecalProb(int pos)
{
nodes[pos].OptPrb = (nodes[pos].OptVal - TargetVal)*1.0/(nodes[pos].OptVal-nodes[pos].RealVal);
if(nodes[pos].OptPrb > 1.0)
nodes[pos].OptPrb = 1.0;
if(nodes[pos].OptPrb < 0.0)
nodes[pos].OptPrb = 0;
}
int GetBestOptPrb(int parent)
{
int B;
double MaxValue = 0.0;
int NumMoves,i,StartPos,pos;
NumMoves = nodes[parent].MoveCount;
B = StartPos = nodes[parent].toNext;
for(i = 0; i < NumMoves;i++)
{
pos = next[StartPos+i];
RecalProb(pos);
if(nodes[pos].OptPrb > MaxValue)
{
MaxValue = nodes[pos].OptPrb;
B = StartPos+i;
}
}
return B;
}
double GetBestChildOptPrb(NODE &parent)
{
double MaxValue = 0.0;
int NumMoves,i,StartPos,pos;
NumMoves = parent.MoveCount;
StartPos = parent.toNext;
for(i = 0; i < NumMoves;i++)
{
pos = next[StartPos+i];
if(nodes[pos].OptPrb > MaxValue)
{
MaxValue = nodes[pos].OptPrb;
}
}
return MaxValue;
}
int GetBestOptPrbVerify(int parent)
{
double MaxValue = 0.0;
int NumMoves,i,StartPos,pos,B;
NumMoves = nodes[parent].MoveCount;
B = StartPos = nodes[parent].toNext;
for(i = 0; i < NumMoves;i++)
{
pos = next[StartPos+i];
if(nodes[pos].OptPrb > MaxValue)
{
MaxValue = nodes[pos].OptPrb;
B = StartPos+i;
}
}
return B;
}
double SecondRootOptPrb()
{
double MaxValue = 0.0;
int NumMoves,i,StartPos,pos;
NumMoves = nodes[ROOTNODE].MoveCount;
StartPos = nodes[ROOTNODE].toNext;
for(i = 0; i < NumMoves;i++)
{
pos = next[StartPos+i];
RecalProb(pos);
if(nodes[pos].OptPrb > MaxValue && pos != BestMoveAtRoot)
{
MaxValue = nodes[pos].OptPrb;
}
}
return MaxValue ;
}
// Trace down the child subtree selecting
// For Player-to-Move nodes, child with largest OptPrb
// For Opponent-to-Move nodes, child with best RealVal
// Until arriving at a leaf node;
int TraceDown(int a)
{
int SelectedNode,m;
SelectedNode = next[a];
while( nodes[SelectedNode].toNext != 0)
{
if((nodes[SelectedNode].depth%2) == 1)
{
m = GetBestOptPrb(SelectedNode);
SelectedNode = next[m];
}
else
{
m = GetRealValBest(SelectedNode);
SelectedNode = next[m];
}
}
return SelectedNode;
}
// Trace down the child subtree selecting
// For Opponent-to-Move nodes, child with largest OptPrb
// For Player-to-Move nodes, child with best RealVal
// Until arriving at a leaf node;
int TraceDownVerify(int a)
{
int SelectedNode,m;
SelectedNode = next[a];
while( nodes[SelectedNode].toNext != 0)
{
if((nodes[SelectedNode].depth%2) == 0)
{
m = GetBestOptPrbVerify(SelectedNode);
SelectedNode = next[m];
}
else
{
m = GetRealValBestVerify(SelectedNode);
SelectedNode = next[m];
}
}
return SelectedNode;
}
void CalcVerifPrb(NODE &nodo)
{
if(nodo.depth == 0)
{
nodo.OptPrb = 0.999;
return;
}
if(TargetVal >= nodo.RealVal)
nodo.OptPrb = 1.0;
else
{
if(nodo.OptVal > TargetVal && nodo.RealVal >nodo.OptVal)
nodo.OptPrb = 0.0;
else
{
if(nodo.RealVal >nodo.OptVal)
{
nodo.OptPrb = (1.0*(TargetVal - nodo.OptVal ))/(nodo.RealVal - nodo.OptVal);
}
else
{
nodo.OptPrb = (1.0*(TargetVal - nodo.PessVal ))/(nodo.RealVal - nodo.PessVal);
}
}
}
if(nodo.OptPrb > 1.0)
nodo.OptPrb = 1.0;
if(nodo.OptPrb < 0.0)
nodo.OptPrb = 0.0;
}
// Get RealVal for each Child Node of this leaf;
// If it is a Player-to-Move node get OptVals for each Child;
void Expand(int SelectedNode,int modo)
{
int nodo,EvalOpt;
int NumMoves = 3;//= T.GeneraJugadas(pMoves);
nodes[SelectedNode].MoveCount = NumMoves;
nodes[SelectedNode].toNext = FreeMove;
EvalOpt = ((nodes[SelectedNode].depth+1) %2 ) == modo;
for(int i = 0;i < NumMoves;i++)
{
// DoMove();
// reservamos un nodo por cada Jugada
// Reserve a node for each move
nodo = next[FreeMove+i] = FreeNode++;
// Calculamos el valor real y el optimista.
// Get Real and optimistic value.
nodes[nodo].RealVal = GetRealVal(nodo);
nodes[nodo].depth = nodes[SelectedNode].depth + 1;
nodes[nodo].OptVal = UNDEFINED;
nodes[nodo].PessVal = nodes[SelectedNode].OptVal;
nodes[nodo].ParentNode = SelectedNode;
if(EvalOpt)
nodes[nodo].OptVal = GetOptVal(nodo);
else
if(modo == OPPONENT)
nodes[nodo].OptVal = nodes[SelectedNode].PessVal ;
}
FreeMove += NumMoves;
}
void CalcVerifPrbP(int parent)
{
int NumMoves,StartPos,pos,i;
NumMoves = nodes[parent].MoveCount;
StartPos = nodes[parent].toNext;
for(i = 0; i < NumMoves;i++)
{
pos = next[StartPos+i];
CalcVerifPrb(nodes[pos]);
}
}
double Product(NODE &parent,int modo)
{
double product = 1.0;
// calculate OptPrb for child nodes
int NumMoves,StartPos,pos,i;
NumMoves = parent.MoveCount;
StartPos = parent.toNext;
for(i = 0; i < NumMoves;i++)
{
pos = next[StartPos+i];
if(modo == 0)
nodes[pos].OptPrb = (nodes[pos].PessVal - TargetVal)*1.0/(nodes[pos].PessVal -nodes[pos].RealVal);
else
if(nodes[pos].toNext == 0)
CalcVerifPrb(nodes[pos]);
if(nodes[pos].OptPrb > 1.0)
nodes[pos].OptPrb = 1.0;
if(nodes[pos].OptPrb < 0.0)
nodes[pos].OptPrb = 0;
product *= nodes[pos].OptPrb;
}
return product;
}
void Propagate1(NODE &nodo,NODE &parent,int mode)
{
if(mode == PLAYER)
{
if((nodo.depth % 2) == 1)
{
parent.OptVal = nodo.PessVal;
parent.OptPrb = Product(parent,0);
}
else
{
parent.PessVal = nodo.OptVal;
parent.OptPrb = GetBestChildOptPrb(parent);
}
parent.RealVal = nodo.RealVal;
}
else
{
if((nodo.depth % 2) == 0)
{
parent.OptVal = nodo.PessVal;
parent.OptPrb = Product(parent,1);
}
else
{
if(nodo.OptPrb == 0)
{ // calcular OptPrb
CalcVerifPrbP(nodo.ParentNode);
} // Get the best Prb.
parent.PessVal = nodo.OptVal;
parent.OptPrb = GetBestChildOptPrb(parent);
}
}
}
void PropagatePrb1(NODE &nodo,NODE &parent)
{
if((nodo.depth % 2) == 1)
parent.OptPrb = Product(parent,0);
else
parent.OptPrb = GetBestChildOptPrb(parent);
}
void Backup(int mode,int node)
{
while(nodes[node].ParentNode)
{
Propagate1(nodes[node],nodes[nodes[node].ParentNode],mode);
node = nodes[node].ParentNode;
}
}
void BackupPrb(int node)
{
while(nodes[node].ParentNode)
{
PropagatePrb1(nodes[node],nodes[nodes[node].ParentNode]);
node = nodes[node].ParentNode;
}
}
int RealVal2ndBstMoveAtRoot()
{
int Val2Max = -INFINITY;
int NumMoves,i,StartPos,pos;
NumMoves = nodes[ROOTNODE].MoveCount;
StartPos = nodes[ROOTNODE].toNext;
for(i = 0; i < NumMoves;i++)
{
pos = next[StartPos+i];
if(nodes[pos].RealVal > Val2Max && pos != BestMoveAtRoot)
Val2Max = nodes[pos].RealVal;
}
return Val2Max;
}
void PropagateDown(int parent)
{
int NumMoves,i,StartPos,pos;
NumMoves = nodes[parent].MoveCount;
StartPos = nodes[parent].toNext;
for(i = 0; i < NumMoves;i++)
{
pos = next[StartPos+i];
nodes[pos].PessVal = nodes[parent].OptVal;
}
}
// Compute OptVal for all Opp nodes and calc OptPrb
void VerifyInit()
{
int i;
for(i = 1; i < FreeNode;i++)
{
if((nodes[i].depth %2) == 1)
{
nodes[i].OptVal = GetOptVal(i);
// propagate to children Press
PropagateDown(i);
}
CalcVerifPrb(nodes[i]);
}
}
int IsOver(int a)
{
int NumMoves,i,StartPos,pos;
// see if all children < MinAct
int SelectedNode = next[a];
NumMoves = nodes[SelectedNode].MoveCount;
StartPos = nodes[SelectedNode].toNext;
for(i = 0; i < NumMoves;i++)
{
pos = next[StartPos+i];
if(nodes[pos].OptPrb > MinAct)
return 0; // still thing to look at
}
return 1; // game over
}
long GetMicroTime(){ return 0; } // time do not elapse on this demo ;-) ...
void BStar()
{
int a;
long ini = GetMicroTime();
for(;;)
{
printf("SELECT Step\n");
while (GetRealValBestAtRoot() < OptValAnyOtherMoveAtRoot())
{
TargetVal = (OptVal2ndBest+RealValBest)/2;
// Select Root Node with greatest OptPrb;
a = GetBestOptPrb(ROOTNODE);
// Trace down the child subtree selecting
// For Player-to-Move nodes, child with largest OptPrb
// For Opponent-to-Move nodes, child with best RealVal
// Until arriving at a leaf node;
int SelectedNode = TraceDown(a);
Dumptree();
if(SecondRootOptPrb() < MinAct)
break;
printf("Selected node %c\n",'A'+SelectedNode-1);
// Get RealVal for each Child Node of this leaf;
// If it is a Player-to-Move node get OptVals for each Child;
Expand(SelectedNode,PLAYER);
// search path down to new expanded
a = TraceDown(a);
// Back up Values;
Backup(PLAYER,a);
// calculate new TargetVal
TargetVal = (nodes[BestMoveAtRoot].RealVal +OptValAnyOtherMoveAtRoot())/2;
// BackUp OptPrb
BackupPrb(a);
if ((GetMicroTime()-ini) >= TimeLimit) break; // EffortLimitsExceeded
}
printf("End od Select step\n");
TargetVal = RealVal2ndBstMoveAtRoot() - 1;
printf("VERIFY Step\n");
VerifyInit();
while(GetRealValBestAtRoot() >= TargetVal)
{
// Select Reply Node with greatest OptPrb;
// a = GetBestOptPrb(ROOTNODE,OPPONENT);
// Select reply Node with Greatest RealVal
a = BestMoveAtRoot;
// Trace down the child subtree selecting
// For Opponent-to-Move nodes, child with largest OptPrb
// For Player-to-Move nodes, child with best RealVal
// Until arriving at a leaf node;
int SelectedNode = TraceDownVerify(a);
Dumptree();
if(IsOver(a))
return; // Solved
printf("Selected node %c\n",'A'+SelectedNode-1);
// Get RealVal for each Child Node of this leaf;
// If it is an Opponent-to-Move node get OptVals for each Child;
Expand(SelectedNode,OPPONENT);
a = TraceDownVerify(a); // Search leaf node
Backup(OPPONENT,a); // and Back up Values;
if ((GetMicroTime()-ini) >= TimeLimit) return; // EffortLimitsExceeded
}
}
}
void LaunchBStar()
{
printf("expand root\n");
nodes[ROOTNODE].depth = -1;
Expand(ROOTNODE,PLAYER);
BStar();
}
int main(int argc, char* argv[])
{
LaunchBStar();
return 0;
}