I want experts to look at my c-source for hashtables

JensBNielsen · Post by **JensBNielsen** » Thu Jun 11, 2009 12:12 am

My program Dabbaba (rating ca. 1800) is not working properly with hashtables.
I am not really a C-programmer, and I have got some of the code for hashtables from Jim Ablett.
It is the code for defining, read and write of these areas.
See some parts of my program below.

I do not understand all the lines – perhaps there are some obvious errors?

I hope some of you experts out there will take a look.

The most recent version of my program can be found here:
http://homepages.tesco.net/henry.ablett/jims.html
The c-source – one big file – is included.

If you want to run Dabbaba with hashtables, set the use of hashtables to 1 in dabbaba.ini.

Best, Jens Bæk Nielsen

typedef struct
{unsigned long int hash_id;
unsigned long int hash_idx;
signed char hash_fr;
signed char hash_to;
signed int hash_score;
signed char hash_score_final; // 0=not final 1=final 2=vurdering()
signed char hash_depth;
signed char hash_ply;
unsigned char hash_mpc;
} hash_struct;

#define SIZEOFHASH 20
#define HASHELEM 256*256*8
// must be a power of 2 - should be 256*256*16

hash_struct compa[HASHELEM];
hash_struct hash;
int hashint[8];

#define far
int far *farp;
int far *wfarp;
unsigned long int uli_id, uli_idx, uli_id_index, hz, uli_sletigen;
unsigned long int hash_maxindex=HASHELEM, hash_totbytes, uli_sletigenx;

unsigned long int randi_id [h8+1][12];
unsigned long int randi_idx[h8+1][12];

hash_totbytes=(hash_maxindex)*sizeof(hash);
printf("hash_maxindex*sizeof(hash) = %lu bytes\n", hash_totbytes); // sizeof(hash) is 20

hash_totbytes=HASHELEM*SIZEOFHASH*2; // 2 should be removed....???????

farp = malloc((unsigned)hash_totbytes);

printf("allocated %lu bytes for hashtables\n", hash_totbytes);

hash_maxindex-=1;

for (x=0; x<=h8; ++x) {
for (y=0; y<=11; ++y) {

randi_id [x][y]=random()*256*256+random();
randi_idx[x][y]=random()*256*256+random();
}
}

randomize();

for (qqq=0; qqq<99; qqq+=1) // for (qqq=0; qqq<hash_maxindex; qqq+=23)
{ hash.hash_id=qqq+1; hash.hash_idx=qqq;
hash.hash_fr=qqq % 13; hash.hash_to=qqq % 17;
hash.hash_depth=qqq % 12; hash.hash_score=qqq % 22222;
hash.hash_score_final=qqq % 2;
hash.hash_mpc=qqq % 53; hash.hash_ply=qqq % 61;
write_hash(qqq);
}
for (qqq=0; qqq<99; qqq+=1)
{ read_hash(qqq);
if (hash.hash_id!=qqq+1 or hash.hash_idx!=qqq or
hash.hash_fr!=qqq % 13 or hash.hash_to!=qqq % 17 or
hash.hash_depth!=qqq % 12 or hash.hash_score!=qqq % 22222 or
hash.hash_score_final!=qqq % 2 or
hash.hash_mpc!=qqq % 53 or hash.hash_ply!=qqq % 61)
printf("Error hash ");
}
printf("\n End of check-hashtables\n"); // works ok – no errors

init_hash();

void init_hash()
{ long unsigned int qqq;
hash.hash_id=0; hash.hash_fr=0; hash.hash_to=0;
hash.hash_depth=-99; hash.hash_score_final=0; tv_hash_used_elem=0;
hash.hash_mpc=0; hash.hash_ply=99; hash.hash_score=0;

for (qqq=0; qqq<=hash_maxindex; ++qqq)
{
hash.hash_idx=qqq; write_hash(qqq);
}
}

void read_hash(unsigned long int key)
{

hash=compa[key];

{for (hz=0; hz<=7; ++hz) *(hashint+hz)=*(farp+key*8+hz);
memcpy(&hash, hashint, 16);

}
}

void write_hash(unsigned long int key)
{
if (key<=hash_maxindex and swhash>0)
{
compa[key]=hash;
{memcpy(hashint, &hash, 16);
for (hz=0; hz<=7; ++hz) *(farp+key*8+hz)=*(hashint+hz);

}
}
else {++tv_hash_error; happens[11]='A';}
}

void calculate_hashkey()
{ int x; uli_id=uli_idx=0;
for (x=a1; x<=h8; ++x)
{if (sq[x]>=w_p or sq[x]<=b_p)
{uli_idx=uli_idx exor randi_idx[x][bw_index(sq[x])];
uli_id =uli_id exor randi_id [x][bw_index(sq[x])];}
}
if (wh_bl equal black) {uli_idx=uli_idx exor randi_idx[0][0];
uli_id =uli_id exor randi_id [0][0];}

if (p[ply].e_p!=not_allowed)
{uli_idx=uli_idx exor randi_idx[0][1];
uli_id =uli_id exor randi_id [0][1];}

}

void opdat_hash(signed char fr, signed char to, signed int score, signed char ifinal)
{ signed char final, surprise, zz;
if (swhash>0 and p[ply].swnullhash==0
and score<=(int_max) and score>>(int_min)
)
{happens[1]='1';
if (move_fr [ply_1][move_pointer[ply_1]+1] equal stop) final=1;
else final=0;
if (ifinal equal 2) final=2;
uli_id_index=p[ply_1].hash_idx & hash_maxindex;
read_hash(uli_id_index); // destroys adjusting of mate-scores
if (hash.hash_fr equal 0) {++tv_hash_used_elem; goto opdat;}
if (ifinal equal 2) goto ej;

if (mpc>hash.hash_mpc or (iteration-ply_1)>=hash.hash_depth)
goto opdat;

ej: ++tv_hash_ejskriv; goto efter_skriv;
opdat:
hash.hash_depth=iteration-ply_1;
hash.hash_score=score;
if (score>(int_max-100)) hash.hash_score=score+ply_1;
if (score<(int_min+100)) hash.hash_score=score-ply_1;
hash.hash_score_final=final;
hash.hash_fr=fr; hash.hash_to=to;

hash.hash_mpc=mpc; hash.hash_ply=ply_1;
hash.hash_id=p[ply_1].hash_id;
hash.hash_idx=p[ply_1].hash_idx;

uli_id_index=hash.hash_idx & hash_maxindex;
if (fr==0 or fr==stop or to==0) ++tv_hash_error;
if (disctrace>0 and ply<=disctrace) print_hash_write();
write_hash(uli_id_index); ++tv_hash_skriv;
efter_skriv:
;
}
}

Dann Corbit · Post by **Dann Corbit** » Thu Jun 11, 2009 12:33 am

A good strategy to debug hash tables is to have two hash table functions:
1. Incrementally build the hash like normal.
2. Rebuild the entire hash from scratch if _DEBUG is defined

Then, your algorithm should always perform both operations in debug mode and you will find out where the problem is anytime the board is updated in any way because you can compare both hashes. If everything is correct, then both hash values will agree. If anything is wrong, then the hash values will not agree.

A most likely place to find the problem is on the unmake move step. I guess that at least half of hashing errors will stem from something being left out for unmake().

Often, it is some side point like failure to update for castle rights or e.p. status.

Dann Corbit · Post by **Dann Corbit** » Thu Jun 11, 2009 12:36 am

If you want an instant working hash, just do the following:
Every time you change anything on the board, feed the entire board and game state to the UMAC hash function. Mask off however many bits you need for your hash table and it should work fine (make your hash table size a power of 2).

At some point, you will want to improve it.

Zobrist hash is the best hash to use for game playing because undo is so easy with zobrist hash.

MattieShoes · Post by **MattieShoes** » Thu Jun 11, 2009 3:06 am

Interesting -- In my engine, I just store the hash key at each position and when I unmake a move, I just grab the old hash key, so I never have errors in unmake. Is there some reason why this is a bad idea?

ffao · Post by **ffao** » Thu Jun 11, 2009 4:36 am

JensBNielsen wrote: I do not understand all the lines

Am I the only one who finds this a big red flag?

It seems to me (from reading open-source engines) that a lot of chess programmers have the horrible habit of shortening variable names so that they type less, which definitely doesn't help the cause of anyone trying to understand what's going on (what's qqq? hz? uli?)

This should be your obvious first step: you can't have parts of your program which you don't understand, especially when they don't work.

Make an effort to see what's going on. If you absolutely can't do it, it might be helpful to at least try to write something similar, because then you will get the basic principles which don't change from implementation to implementation.

Felipe

Mincho Georgiev · Post by **Mincho Georgiev** » Thu Jun 11, 2009 8:35 am

MattieShoes wrote:Interesting -- In my engine, I just store the hash key at each position and when I unmake a move, I just grab the old hash key, so I never have errors in unmake. Is there some reason why this is a bad idea?

I do it the same way, and i really think that if the hash function /functions/ is working correctly, even better,you get another benefit that way, a natural debugging of the previous key. If you are doing incremental update on the key both in make and after that in unmake, the chance for bugs is twice higher. I would like to read other opinions though .

Edmund · Post by **Edmund** » Thu Jun 11, 2009 8:48 am

xcomponent wrote:
MattieShoes wrote:Interesting -- In my engine, I just store the hash key at each position and when I unmake a move, I just grab the old hash key, so I never have errors in unmake. Is there some reason why this is a bad idea?
I do it the same way, and i really think that if the hash function /functions/ is working correctly, even better,you get another benefit that way, a natural debugging of the previous key. If you are doing incremental update on the key both in make and after that in unmake, the chance for bugs is twice higher. I would like to read other opinions though .

with _DEBUG flag I do both and compare the two keys in the end. This way I am spotting almost all illegal moves made on the board. In release mode I just copy and paste the old key.
Still I am not sure which way is the faster option. The one extreme is to store the whole board, castle-flag, enpassant-flag etc and you can safe the unmake move by just copying the variables. The other extreme is to undo everything incrementally including the hash-key. The point no-one stores the board is because of its huge size. When having a large remaining depth these variables will not any more be stored in the processor cache and the probing from ram would definitely take longer than the incremental version. Hash-keys are much smaller, but still in cases where a lot of calculation is done between make and unmake the incremental version would probably help if it can be done in an efficient enough way.

JensBNielsen · Post by **JensBNielsen** » Thu Jun 11, 2009 3:17 pm

You are right - good variable names makes it easier to read.

But what I meant was, that I am not an experienced c-programmer (Dabbaba is my only real c-program) and I find this strange:

hash=compa[key];
{for (hz=0; hz<=7; ++hz) *(hashint+hz)=*(farp+key*8+hz);
memcpy(&hash, hashint, 16);
}

do you really have to do a complex loop to copy a piece of memory?

JensBNielsen · Post by **JensBNielsen** » Thu Jun 11, 2009 3:20 pm

I also do that.

It only requires a little memory: 1 hashkey for each ply of your max-depth.

MattieShoes · Post by **MattieShoes** » Thu Jun 11, 2009 4:21 pm

Codeman wrote:
xcomponent wrote:
MattieShoes wrote:Interesting -- In my engine, I just store the hash key at each position and when I unmake a move, I just grab the old hash key, so I never have errors in unmake. Is there some reason why this is a bad idea?
I do it the same way, and i really think that if the hash function /functions/ is working correctly, even better,you get another benefit that way, a natural debugging of the previous key. If you are doing incremental update on the key both in make and after that in unmake, the chance for bugs is twice higher. I would like to read other opinions though .
with _DEBUG flag I do both and compare the two keys in the end. This way I am spotting almost all illegal moves made on the board. In release mode I just copy and paste the old key.
Still I am not sure which way is the faster option. The one extreme is to store the whole board, castle-flag, enpassant-flag etc and you can safe the unmake move by just copying the variables. The other extreme is to undo everything incrementally including the hash-key. The point no-one stores the board is because of its huge size. When having a large remaining depth these variables will not any more be stored in the processor cache and the probing from ram would definitely take longer than the incremental version. Hash-keys are much smaller, but still in cases where a lot of calculation is done between make and unmake the incremental version would probably help if it can be done in an efficient enough way.

I store the move made, captured piece, fifty move variable, en passant variable, hash key, and castle permissions when I make a move. I unmake the move and replace the piece manually, and correct the piece tables, but just copy the other values back. It makes for a very simple takeback function at least. I assumed whatever overhead might be incurred from the fetching and replacing the values would be covered by the branching involved in actually updating the variables in reverse, but I can't say I've actually tested that assumption. Unless the difference is stark, I'll go with the simpler code

I want experts to look at my c-source for hashtables

I want experts to look at my c-source for hashtables

Re: I want experts to look at my c-source for hashtables

Re: I want experts to look at my c-source for hashtables

Re: I want experts to look at my c-source for hashtables

Re: I want experts to look at my c-source for hashtables

Re: I want experts to look at my c-source for hashtables

Re: I want experts to look at my c-source for hashtables

Re: I want experts to look at my c-source for hashtables

Re: I want experts to look at my c-source for hashtables

Re: I want experts to look at my c-source for hashtables