In lots of testing games, the number of raw moves per position was never more than 93, so the highest useful Ciura number is 57.
Here is a benchmark for x86 command line; I'm using Cygwin under Windows, so it should work as well under Linux. I'm using GCC with the "likely/unlikely" defines copied from the Linux kernel. The "dummy" output is just to keep the compiler from optimising away the benchmark.
I've included the "regular" shellsort implementation so that comparing the differences to the jump table implementation is easy. In fact, the inner two loops are copy-paste.
Code: Select all
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#define LIKELY(x) __builtin_expect(!!(x),1)
#define UNLIKELY(x) __builtin_expect(!!(x),0)
#define MV_PER_POS 60l
#define NUM_POS 8000000l
#define GAP_4 57
#define GAP_3 23
#define GAP_2 10
#define GAP_1 4
#define GAP_0 1
struct mvdata {
uint8_t flag;
uint8_t from;
uint8_t to;
int8_t mvv_lva;
};
typedef union {
struct mvdata m;
uint32_t u;
} MOVE;
double timediff_sec(struct timeval t0, struct timeval t1)
{
return ((t1.tv_sec - t0.tv_sec) + (t1.tv_usec - t0.tv_usec) / 1000000.0);
}
static void Shellsort_Opt(MOVE *restrict movelist, int N)
{
/*note: the jump table does not exactly match the shell sort gaps.
it does not make sense e.g. with N=11 to run through the loop
for gap=10 because the loop will run only once, that is not worth
the overhead.*/
static void *jump_table[] = {
/*N = 0..1*/
&&shellsort_no_action, &&shellsort_no_action,
/*N = 2..6*/
&&shellsort_gap_0, &&shellsort_gap_0, &&shellsort_gap_0,
&&shellsort_gap_0, &&shellsort_gap_0,
/*N = 7..12*/
&&shellsort_gap_1, &&shellsort_gap_1, &&shellsort_gap_1,
&&shellsort_gap_1, &&shellsort_gap_1, &&shellsort_gap_1,
/*N = 13 .. 25*/
&&shellsort_gap_2, &&shellsort_gap_2, &&shellsort_gap_2,
&&shellsort_gap_2, &&shellsort_gap_2, &&shellsort_gap_2,
&&shellsort_gap_2, &&shellsort_gap_2, &&shellsort_gap_2,
&&shellsort_gap_2, &&shellsort_gap_2, &&shellsort_gap_2,
&&shellsort_gap_2,
/*N = 26 .. 59*/
&&shellsort_gap_3, &&shellsort_gap_3, &&shellsort_gap_3, &&shellsort_gap_3,
&&shellsort_gap_3, &&shellsort_gap_3, &&shellsort_gap_3, &&shellsort_gap_3,
&&shellsort_gap_3, &&shellsort_gap_3, &&shellsort_gap_3, &&shellsort_gap_3,
&&shellsort_gap_3, &&shellsort_gap_3, &&shellsort_gap_3, &&shellsort_gap_3,
&&shellsort_gap_3, &&shellsort_gap_3, &&shellsort_gap_3, &&shellsort_gap_3,
&&shellsort_gap_3, &&shellsort_gap_3, &&shellsort_gap_3, &&shellsort_gap_3,
&&shellsort_gap_3, &&shellsort_gap_3, &&shellsort_gap_3, &&shellsort_gap_3,
&&shellsort_gap_3, &&shellsort_gap_3, &&shellsort_gap_3, &&shellsort_gap_3,
&&shellsort_gap_3, &&shellsort_gap_3,
/*N = 60*/
&&shellsort_gap_4
};
/*clip N to 60 for the jump, that's enough
because the maximum sequence gap is 57.*/
goto *jump_table[((N < 61) ? N : 60)];
shellsort_gap_4:
for (int i = GAP_4; (LIKELY(i < N)); i++)
{
int value = movelist[i].m.mvv_lva;
uint32_t tmp_move = movelist[i].u;
int j;
for (j = i - GAP_4; (LIKELY(j >= 0)); j -= GAP_4)
{
MOVE current_move;
current_move.u = movelist[j].u;
if (UNLIKELY(current_move.m.mvv_lva >= value))
break;
movelist[j+GAP_4].u = current_move.u;
}
movelist[j+GAP_4].u = tmp_move;
}
shellsort_gap_3:
for (int i = GAP_3; (LIKELY(i < N)); i++)
{
int value = movelist[i].m.mvv_lva;
uint32_t tmp_move = movelist[i].u;
int j;
for (j = i - GAP_3; (LIKELY(j >= 0)); j -= GAP_3)
{
MOVE current_move;
current_move.u = movelist[j].u;
if (UNLIKELY(current_move.m.mvv_lva >= value))
break;
movelist[j+GAP_3].u = current_move.u;
}
movelist[j+GAP_3].u = tmp_move;
}
shellsort_gap_2:
for (int i = GAP_2; (LIKELY(i < N)); i++)
{
int value = movelist[i].m.mvv_lva;
uint32_t tmp_move = movelist[i].u;
int j;
for (j = i - GAP_2; (LIKELY(j >= 0)); j -= GAP_2)
{
MOVE current_move;
current_move.u = movelist[j].u;
if (UNLIKELY(current_move.m.mvv_lva >= value))
break;
movelist[j+GAP_2].u = current_move.u;
}
movelist[j+GAP_2].u = tmp_move;
}
shellsort_gap_1:
for (int i = GAP_1; (LIKELY(i < N)); i++)
{
int value = movelist[i].m.mvv_lva;
uint32_t tmp_move = movelist[i].u;
int j;
for (j = i - GAP_1; (LIKELY(j >= 0)); j -= GAP_1)
{
MOVE current_move;
current_move.u = movelist[j].u;
if (UNLIKELY(current_move.m.mvv_lva >= value))
break;
movelist[j+GAP_1].u = current_move.u;
}
movelist[j+GAP_1].u = tmp_move;
}
shellsort_gap_0:
for (int i = GAP_0; (LIKELY(i < N)); i++)
{
int value = movelist[i].m.mvv_lva;
uint32_t tmp_move = movelist[i].u;
int j;
for (j = i - GAP_0; (LIKELY(j >= 0)); j -= GAP_0)
{
MOVE current_move;
current_move.u = movelist[j].u;
if (UNLIKELY(current_move.m.mvv_lva >= value))
break;
movelist[j+GAP_0].u = current_move.u;
}
movelist[j+GAP_0].u = tmp_move;
}
shellsort_no_action: /*for N=0 and N=1*/
return;
}
static void Shellsort(MOVE *restrict movelist, int N)
{
static int shell_sort_gaps[] = {1, 4, 10, 23, 57 /*, 132, 301, 701*/};
int sizeIndex;
for (sizeIndex = sizeof(shell_sort_gaps)/sizeof(shell_sort_gaps[0]) - 1; (LIKELY(sizeIndex >= 0)); sizeIndex--)
{
int i;
int gap = shell_sort_gaps[sizeIndex];
for (i = gap; (LIKELY(i < N)); i++)
{
int value = movelist[i].m.mvv_lva;
uint32_t tmp_move = movelist[i].u;
int j;
for (j = i - gap; (LIKELY(j >= 0)); j -= gap)
{
MOVE current_move;
current_move.u = movelist[j].u;
if (UNLIKELY(current_move.m.mvv_lva >= value))
break;
movelist[j+gap].u = current_move.u;
}
movelist[j+gap].u = tmp_move;
}
}
}
int MOVE_cmp_by_mvv_lva(const void *a, const void *b)
{
MOVE *ia = (MOVE *)a;
MOVE *ib = (MOVE *)b;
return (int)ib->m.mvv_lva - (int)ia->m.mvv_lva;
}
void Init_List(MOVE *restrict mv_list, size_t len)
{
size_t i;
MOVE amove;
amove.u = 0;
srand(0);
for (i = 0; i < len; i++)
{
amove.m.mvv_lva = (rand() & 0xff) - 128;
mv_list[i].u = amove.u;
}
}
int main(void)
{
size_t i;
MOVE *move_list;
struct timeval start_time, stop_time;
move_list = (MOVE *) calloc(MV_PER_POS*NUM_POS, sizeof(MOVE));
if (move_list == NULL)
{
printf("insufficient memory.\r\n");
return(1);
}
Init_List(move_list, MV_PER_POS*NUM_POS);
gettimeofday(&start_time, 0);
for (i = 0; i < MV_PER_POS*NUM_POS; i += MV_PER_POS)
Shellsort(move_list + i, MV_PER_POS);
gettimeofday(&stop_time, 0);
printf("shellsort: %.2lf seconds // dummy: %d\r\n", timediff_sec(start_time, stop_time), move_list[MV_PER_POS*NUM_POS - 1].m.mvv_lva);
Init_List(move_list, MV_PER_POS*NUM_POS);
gettimeofday(&start_time, 0);
for (i = 0; i < MV_PER_POS*NUM_POS; i += MV_PER_POS)
Shellsort_Opt(move_list + i, MV_PER_POS);
gettimeofday(&stop_time, 0);
printf("shellsort opt: %.2lf seconds // dummy: %d\r\n", timediff_sec(start_time, stop_time), move_list[MV_PER_POS*NUM_POS - 1].m.mvv_lva);
Init_List(move_list, MV_PER_POS*NUM_POS);
gettimeofday(&start_time, 0);
for (i = 0; i < MV_PER_POS*NUM_POS; i += MV_PER_POS)
qsort(move_list + i, MV_PER_POS, sizeof(MOVE), MOVE_cmp_by_mvv_lva);
gettimeofday(&stop_time, 0);
printf("quicksort: %.2lf seconds // dummy: %d\r\n", timediff_sec(start_time, stop_time), move_list[MV_PER_POS*NUM_POS - 1].m.mvv_lva);
free(move_list);
return 0;
}