Code: Select all
/* Super Fast 'Looking for Magics' by Tord Romstad
* Made 'Super-Fast' by Syed
Fahad, and Matthew Brades
Description: This program is an improvement
over Tord Romstad's Feeding in Randoms to generate magics. I (Syed Fahad)
started this working on this program when I ran Tord's program on my
andriod and it couldn't generate the numbers. It would always fail. So, I
began to speedup his program. Then, when I was stuck fighting a bug that
freezed the computation somewhere when generating Bishop Magics, I showed
my code to my close friend, Matthew Brades, who fixed the code and and then
further speeded up the code by Hardware Instructions. * * Results: *
BEFORE: This program took 2 seconds for generating all magics on Core Duo
2.8 GHz. * AFTER: This program generates all magics almost instantly w/o
compiler optimization on my 1 GHz Micromax Canvas Doodle 3 with 512 MB of
RAM using GCC on C4Driod. * Huge improvement, you see. * * There are a lot
of possible speed ups still, so... * Version: 1.3 */
// Changes in version 1.3
// 1. Prints RShifts and BShifts too.
// 2. Cleaned and commented the code a lot
// 3. Generates different magics in every run due
// to scrambling of random number seeds at
// startup.
// 4. No more prints comma at the end magic.
// Takes ~0.15 secs on my Micromax Canvas
// Doodle 3 with 512 MB of RAM and 1Ghz processor
#include <stdio.h>
#include <stdlib.h>
#include<time.h>
#define USE_32_BIT_MULTIPLICATIONS
typedef unsigned long long uint64;
// A faster alternative to (1ULL << square)
const uint64 BitMask[64] = {
0x1ULL, 0x2ULL, 0x4ULL, 0x8ULL, 0x10ULL, 0x20ULL, 0x40ULL, 0x80ULL,
0x100ULL, 0x200ULL, 0x400ULL, 0x800ULL, 0x1000ULL, 0x2000ULL,
0x4000ULL, 0x8000ULL, 0x10000ULL, 0x20000ULL, 0x40000ULL, 0x80000ULL,
0x100000ULL, 0x200000ULL, 0x400000ULL, 0x800000ULL, 0x1000000ULL,
0x2000000ULL, 0x4000000ULL, 0x8000000ULL, 0x10000000ULL, 0x20000000ULL,
0x40000000ULL, 0x80000000ULL, 0x100000000ULL, 0x200000000ULL,
0x400000000ULL, 0x800000000ULL, 0x1000000000ULL, 0x2000000000ULL,
0x4000000000ULL, 0x8000000000ULL, 0x10000000000ULL, 0x20000000000ULL,
0x40000000000ULL, 0x80000000000ULL, 0x100000000000ULL,
0x200000000000ULL, 0x400000000000ULL, 0x800000000000ULL,
0x1000000000000ULL, 0x2000000000000ULL, 0x4000000000000ULL,
0x8000000000000ULL, 0x10000000000000ULL, 0x20000000000000ULL,
0x40000000000000ULL, 0x80000000000000ULL, 0x100000000000000ULL,
0x200000000000000ULL, 0x400000000000000ULL, 0x800000000000000ULL,
0x1000000000000000ULL, 0x2000000000000000ULL, 0x4000000000000000ULL,
0x8000000000000000ULL
};
// Bishop and Rook masks
uint64 RMask[64];
uint64 BMask[64];
// Random number generator stuff
uint64 a, b, c, d;
#define ROTATE_L(x, k) ((x << k) | (x >> (64 - k)))
uint64 random_uint64()
{
uint64 e = a - ROTATE_L(b, 7);
a = b ^ ROTATE_L(c, 13);
b = c + ROTATE_L(d, 37);
c = d + e;
return d = e + a;
}
// Returns a randomnumber with only a few bits set
#define random_uint64_fewbits() (random_uint64() & random_uint64() & random_uint64())
// Counts the number of bits set
int count_1s(uint64 b)
{
// Non-GCC users
/* unsigned w = (unsigned)b >> 32, v = (unsigned)b; v -= (v >> 1) &
0x55555555; // 0-2 in 2 bits w -= (w >> 1) & 0x55555555; v = ((v >> 2)
& 0x33333333) + (v & 0x33333333); // 0-4 in 4 bits w = ((w >> 2) &
0x33333333) + (w & 0x33333333); v = ((v >> 4) + v + (w >> 4) + w) &
0x0F0F0F0F; return (int)((v * 0x01010101) >> 24); */
return __builtin_popcountll(b);
}
/*
const int BitTable[64] =
{ 63, 30, 3, 32, 25, 41, 22, 33, 15, 50, 42, 13, 11, 53, 19, 34, 61, 29, 2,
51, 21, 43, 45, 10, 18, 47, 1, 54, 9, 57, 0, 35, 62, 31, 40, 4, 49, 5, 52,
26, 60, 6, 23,
44, 46, 27, 56, 16, 7, 39, 48, 24, 59, 14, 12, 55, 38, 28, 58, 20, 37, 17,
36, 8
};
*/
// Clears and returns the index of the first bit (lsb)
int pop_1st_bit(uint64 * bb)
{
// Non-GCC:
/*
uint64 b = *bb ^ (*bb - 1); unsigned int fold = (unsigned)((b &
0xffffffff) ^ (b >> 32)); *bb &= (*bb - 1); return BitTable[(fold *
0x783a9b23) >> 26]; */
int lsb = __builtin_ctz(*bb);
*bb &= *bb - 1;
return lsb;
}
uint64 index_to_uint64(int index, int bits, uint64 m)
{
int i, j, k;
uint64 result = 0ULL;
for (i = 0; i < bits; i++)
{
j = pop_1st_bit(&m);
if ((index & (BitMask[i])) != 0)
result |= (BitMask[j]);
}
return result;
}
uint64 rmask(int sq)
{
return RMask[sq];
}
uint64 bmask(int sq)
{
return BMask[sq];
}
uint64 rmask_gen(int sq)
{
uint64 result = 0ULL;
int rk = sq % 8, fl = sq % 8, r, f;
for (r = rk + 1; r <= 6; r++)
result |= (BitMask[(fl + r * 8)]);
for (r = rk - 1; r >= 1; r--)
result |= (BitMask[(fl + r * 8)]);
for (f = fl + 1; f <= 6; f++)
result |= (BitMask[(f + rk * 8)]);
for (f = fl - 1; f >= 1; f--)
result |= (BitMask[(f + rk * 8)]);
return result;
}
uint64 bmask_gen(int sq)
{
uint64 result = 0ULL;
int rk = sq / 8, fl = sq % 8, r, f;
for (r = rk + 1, f = fl + 1; r <= 6 && f <= 6; r++, f++)
result |= (BitMask[(f + r * 8)]);
for (r = rk + 1, f = fl - 1; r <= 6 && f >= 1; r++, f--)
result |= (BitMask[(f + r * 8)]);
for (r = rk - 1, f = fl + 1; r >= 1 && f <= 6; r--, f++)
result |= (BitMask[(f + r * 8)]);
for (r = rk - 1, f = fl - 1; r >= 1 && f >= 1; r--, f--)
result |= (BitMask[(f + r * 8)]);
return result;
}
uint64 ratt(int sq, uint64 block)
{
uint64 result = 0ULL;
int rk = sq / 8, fl = sq % 8, r, f;
for (r = rk + 1; r <= 7; r++)
{
result |= (BitMask[(fl + r * 8)]);
if ((block & (BitMask[(fl + r * 8)])) != 0)
break;
}
for (r = rk - 1; r >= 0; r--)
{
result |= (BitMask[(fl + r * 8)]);
if ((block & (BitMask[(fl + r * 8)])) != 0)
break;
}
for (f = fl + 1; f <= 7; f++)
{
result |= (BitMask[(fl + r * 8)]);
if ((block & (BitMask[(fl + r * 8)])) != 0)
break;
}
for (f = fl - 1; f >= 0; f--)
{
result |= (BitMask[(fl + r * 8)]);
if ((block & (BitMask[(fl + r * 8)])) != 0)
break;
}
return result;
}
uint64 batt(int sq, uint64 block)
{
uint64 result = 0ULL;
int rk = sq / 8, fl = sq % 8, r, f;
for (r = rk + 1, f = fl + 1; r <= 7 && f <= 7; r++, f++)
{
result |= (BitMask[(f + r * 8)]);
if ((block & (BitMask[(f + r * 8)])) != 0)
break;
}
for (r = rk + 1, f = fl - 1; r <= 7 && f >= 0; r++, f--)
{
result |= (BitMask[(f + r * 8)]);
if ((block & (BitMask[(f + r * 8)])) != 0)
break;
}
for (r = rk - 1, f = fl + 1; r >= 0 && f <= 7; r--, f++)
{
result |= (BitMask[(f + r * 8)]);
if ((block & (BitMask[(f + r * 8)])) != 0)
break;
}
for (r = rk - 1, f = fl - 1; r >= 0 && f >= 0; r--, f--)
{
result |= (BitMask[(f + r * 8)]);
if ((block & (BitMask[(f + r * 8)])) != 0)
break;
}
return result;
}
int transform(uint64 b, uint64 magic, int bits)
{
#if defined(USE_32_BIT_MULTIPLICATIONS)
return (unsigned)((int)b * (int)magic ^ (int)(b >> 32) * (int)(magic >> 32)) >> (32 - bits);
#else
return (int)((b * magic) >> (64 - bits));
#endif
}
uint64 find_magic(int sq, int m, int bishop)
{
uint64 mask, b[4096], a[4096], used[4096], magic;
int i, j, k, n, mbits, fail;
int c = 0;
mask = bishop ? bmask(sq) : rmask(sq);
n = count_1s(mask);
for (i = 0; i < (BitMask[n]); i++)
{
b[i] = index_to_uint64(i, n, mask);
a[i] = bishop ? batt(sq, b[i]) : ratt(sq, b[i]);
}
for (k = 0; k < 100000000; ++k)
{
magic = random_uint64_fewbits();
if (count_1s((mask * magic) & 0xFF00000000000000ULL) < 6)
continue;
for (i = 0; i < 4096; i++)
used[i] = 0ULL;
for (i = 0, fail = 0; (fail == 0) && i < (BitMask[n]); i++)
{
j = transform(b[i], magic, m);
if (used[j] == 0ULL)
used[j] = a[i];
else if (used[j] != a[i])
fail = 1;
}
if (fail == 0)
return magic;
}
printf("***Failed***\n");
return 0ULL;
}
const int RBits[64] = { 12, 11, 11, 11, 11, 11, 11, 12, 11, 10, 10, 10, 10, 10, 10, 11, 11, 10,
10, 10, 10, 10, 10, 11, 11, 10, 10, 10, 10, 10, 10, 11, 11, 10, 10, 10, 10,
10, 10,
11, 11, 10, 10, 10, 10, 10, 10, 11, 11, 10, 10, 10, 10, 10, 10, 11, 12, 11,
11, 11,
11, 11, 11, 12
};
const int BBits[64] = { 6, 5, 5, 5, 5, 5, 5, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 7, 7, 7, 7, 5, 5,
5, 5, 7, 9, 9, 7, 5, 5, 5, 5, 7, 9, 9, 7, 5, 5, 5, 5, 7, 7, 7, 7, 5, 5, 5,
5, 5, 5,
5, 5, 5, 5, 6, 5, 5, 5, 5, 5, 5, 6
};
int main()
{
// Seeds for random number generator
a = 0xF1EA5EED, b = c = d = 0xD4E12C77;
int square;
uint64 RookMagics[64];
uint64 BishMagics[64];
// Generate rook and bishop masks
for (square = 0; square < 64; ++square)
{
RMask[square] = rmask_gen(square);
BMask[square] = bmask_gen(square);
}
// Scramble around
double scramble = clock();
while (scramble--)
random_uint64();
double startTime, endTime;
// Record the start time
startTime = clock();
// Generate Magics for each Square
for (square = 0; square < 64; square++)
{
RookMagics[square] = find_magic(square, RBits[square], 0);
BishMagics[square] = find_magic(square, BBits[square], 1);
}
// Record the end time
endTime = clock();
// Print the Rook Magics
printf("const uint64 RMagic[64] = {\n");
for (square = 0; square < 64; square++)
{
(square != 63) ? printf(" 0x%llxULL,\n", RookMagics[square]) :
printf(" 0x%llxULL\n};\n\n", RookMagics[63]);
}
// Print the Bishop Magics
printf("const uint64 BMagic[64] = {\n");
for (square = 0; square < 64; square++)
{
(square != 63) ? printf(" 0x%llxULL,\n", BishMagics[square]) :
printf(" 0x%llxULL\n};\n\n", BishMagics[63]);
}
// Print Rook Shifts. Shift = 64 - Bit of that square
printf("const uint64 RShifts[64] = {\n");
for (square = 0; square < 64; square++)
{
if (square && !(square % 8))
printf("\n");
(square != 63) ? printf(" %d,", 64 - RBits[square]) :
printf(" %d\n};\n\n", 64 - RBits[63]);
}
printf("const uint64 BShifts[64] = {\n");
for (square = 0; square < 64; square++)
{
if (square && !(square % 8))
printf("\n");
(square != 63) ? printf(" %d,", 64 - BBits[square]) :
printf(" %d\n};\n\n", 64 - BBits[63]);
}
// Print the time taken
printf("Took %f seconds", (endTime - startTime) / (double)CLOCKS_PER_SEC);
// fflush(stdout);
return 0;
}
Just a thought:
What about comparing a set of magics with another?