Have no more time to investigate this, but want to summarize:
I'm compiling stockfish under OS X 10.6 on a Nehalem Mac Pro using gcc-4.8:
Code: Select all
LZsMacPro-OSX6: ~/Documents/Chess/Test/Stockfish/src] gcc -v
Using built-in specs.
COLLECT_GCC=gcc
COLLECT_LTO_WRAPPER=/opt/local/libexec/gcc/x86_64-apple-darwin10/4.8.1/lto-wrapper
Target: x86_64-apple-darwin10
Configured with: ../gcc-4.8.1/configure --prefix=/opt/local --build=x86_64-apple-darwin10 --enable-languages=c,c++,objc,obj-c++,lto,fortran,java --libdir=/opt/local/lib/gcc48 --includedir=/opt/local/include/gcc48 --infodir=/opt/local/share/info --mandir=/opt/local/share/man --datarootdir=/opt/local/share/gcc-4.8 --with-local-prefix=/opt/local --with-system-zlib --disable-nls --program-suffix=-mp-4.8 --with-gxx-include-dir=/opt/local/include/gcc48/c++/ --with-gmp=/opt/local --with-mpfr=/opt/local --with-mpc=/opt/local --with-ppl=/opt/local --with-cloog=/opt/local --enable-cloog-backend=isl --disable-cloog-version-check --enable-stage1-checking --disable-multilib --enable-lto --enable-libstdcxx-time --with-as=/opt/local/bin/as --with-ld=/opt/local/bin/ld --with-ar=/opt/local/bin/ar --with-bugurl=https://trac.macports.org/newticket --with-pkgversion='MacPorts gcc48 4.8.1_1'
Thread model: posix
gcc version 4.8.1 (MacPorts gcc48 4.8.1_1)
I've replaced each popcount call in the Stockfish source with
I've included -mpopcnt in the makefile, which instructs gcc to replace the builtin popcount with the hardware popcnt instruction.
I make a non-pgo build:
Code: Select all
LZsMacPro-OSX6: ~/Documents/Chess/Test/Stockfish/src] make build ARCH=osx-x86-64
make ARCH=osx-x86-64 COMP=gcc config-sanity
Config:
debug: 'no'
optimize: 'yes'
arch: 'x86_64'
os: 'osx'
bits: '64'
prefetch: 'yes'
bsfq: 'yes'
popcnt: 'yes'
sse: 'yes'
Flags:
CXX: g++
CXXFLAGS: -Wall -Wcast-qual -fno-exceptions -fno-rtti -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT
LDFLAGS: -lpthread -Wall -Wcast-qual -fno-exceptions -fno-rtti -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT
Testing config sanity. If this fails, try 'make help' ...
make ARCH=osx-x86-64 COMP=gcc all
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o benchmark.o benchmark.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o bitbase.o bitbase.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o bitboard.o bitboard.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o book.o book.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o endgame.o endgame.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o evaluate.o evaluate.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o main.o main.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o material.o material.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o misc.o misc.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o movegen.o movegen.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o movepick.o movepick.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o notation.o notation.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o pawns.o pawns.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o position.o position.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o search.o search.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o thread.o thread.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o timeman.o timeman.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o tt.o tt.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o uci.o uci.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o ucioption.o ucioption.cpp
g++ -o stockfish benchmark.o bitbase.o bitboard.o book.o endgame.o evaluate.o main.o material.o misc.o movegen.o movepick.o notation.o pawns.o position.o search.o thread.o timeman.o tt.o uci.o ucioption.o -lpthread -Wall -Wcast-qual -fno-exceptions -fno-rtti -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT
I run Stockfish's bench command; the signature is correct for the source I'm using:
Code: Select all
===========================
Total time (ms) : 3663
Nodes searched : 4558173
Nodes/second : 1244382
The executable uses the popcnt instruction, as comfirmed by otool (and a hex editor too):
Code: Select all
LZsMacPro-OSX6: ~/Documents/Chess/Test/Stockfish/src] otool -tvQ ./stockfish | grep popcnt
0000000100003c75 popcnt %r13,%rax
0000000100003e32 popcnt %rax,%rax
0000000100004155 popcnt %r13,%rax
0000000100004312 popcnt %rax,%rax
0000000100006992 popcnt 0x00000400(%rax,%rdi,8),%r8
000000010000b6e7 popcnt %rsi,%rsi
000000010000c1bb popcnt %rax,%rdx
000000010000c2de popcnt %rbp,%rbp
000000010000c2f0 popcnt %rdx,%rdx
000000010000c4e1 popcnt %rcx,%rcx
000000010000c4f4 popcnt %rdx,%rdx
000000010000c7dd popcnt %rcx,%rcx
000000010000c7fc popcnt %r10,%r10
000000010000c84b popcnt %r15,%r15
000000010000ca52 popcnt %rcx,%rcx
000000010000ca69 popcnt %r11,%r11
000000010000cacb popcnt %rcx,%rcx
000000010000cbfb popcnt %r11,%r11
000000010000cc0f popcnt %rax,%rax
000000010000cde0 popcnt %r11,%r11
000000010000cdf9 popcnt %rcx,%rcx
000000010000d09c popcnt %rcx,%rcx
000000010000d0b8 popcnt %r10,%r10
000000010000d0fe popcnt %r13,%rcx
000000010000d2e2 popcnt %rcx,%rcx
000000010000d2f7 popcnt %r11,%r11
000000010000d351 popcnt %rsi,%rcx
000000010000d519 popcnt %rax,%rcx
000000010000d692 popcnt %rdi,%rcx
000000010000d6a9 popcnt %r8,%r8
000000010000d6c9 popcnt %rax,%rax
000000010000d6ee popcnt %r9,%r9
000000010000d86c popcnt %rcx,%rdi
000000010000d9dc popcnt %rdi,%rdi
000000010000d9f3 popcnt %r9,%r9
000000010000da13 popcnt %rcx,%rcx
000000010000da38 popcnt %rsi,%rsi
000000010000e340 popcnt %rcx,%rsi
000000010000e385 popcnt %rcx,%rcx
000000010000e4c7 popcnt %rax,%rdx
000000010000e820 popcnt %rcx,%rcx
000000010000e850 popcnt %rax,%rax
000000010000e880 popcnt %rsi,%rsi
000000010000e8b0 popcnt %rdi,%rdi
000000010000f068 popcnt %rax,%rdx
000000010000f17d popcnt %r13,%r13
000000010000f18f popcnt %rdx,%rdx
000000010000f380 popcnt %rcx,%rcx
000000010000f393 popcnt %rdx,%rdx
000000010000f68c popcnt %rcx,%rcx
000000010000f6ab popcnt %r10,%r10
000000010000f6fa popcnt %r15,%r15
000000010000f8f2 popcnt %rcx,%rcx
000000010000f909 popcnt %r11,%r11
000000010000f96b popcnt %rcx,%rcx
000000010000fb2b popcnt %r13,%r13
000000010000fb3d popcnt %rax,%rax
000000010000fd10 popcnt %r11,%r11
000000010000fd27 popcnt %rcx,%rcx
000000010000ffcd popcnt %rcx,%rcx
000000010000ffef popcnt %r10,%r10
0000000100010034 popcnt %rax,%rcx
0000000100010222 popcnt %rcx,%rcx
000000010001023a popcnt %r11,%r11
0000000100010290 popcnt %rdi,%rcx
00000001000104aa popcnt %rax,%rcx
0000000100010620 popcnt %rdi,%rcx
0000000100010637 popcnt %r8,%r8
0000000100010657 popcnt %rax,%rax
000000010001067c popcnt %r9,%r9
00000001000107f7 popcnt %rcx,%rdi
0000000100010971 popcnt %r11,%rdi
0000000100010988 popcnt %r9,%r9
00000001000109a8 popcnt %rcx,%rcx
00000001000109cd popcnt %rsi,%rsi
0000000100011323 popcnt %rdx,%rdx
000000010001135e popcnt %rsi,%rsi
0000000100011535 popcnt %rdx,%rdx
0000000100011585 popcnt %rsi,%rax
0000000100011837 popcnt %rax,%rdx
0000000100011d10 popcnt %rcx,%rcx
0000000100011d40 popcnt %rdi,%rdi
0000000100011d70 popcnt %rax,%rax
0000000100011da0 popcnt %rsi,%rsi
000000010001d323 popcnt %rcx,%rcx
000000010001d455 popcnt %rsi,%rdx
000000010001d462 popcnt %r12,%r12
000000010001d5a4 popcnt %rcx,%rcx
000000010001d6f9 popcnt %rsi,%rdx
000000010001d702 popcnt %r14,%r14
Now clean and make a profile-build, using the same source and makefile:
Code: Select all
LZsMacPro-OSX6: ~/Documents/Chess/Test/Stockfish/src] make clean
rm -f stockfish stockfish.exe *.o .depend *~ core bench.txt *.gcda
LZsMacPro-OSX6: ~/Documents/Chess/Test/Stockfish/src] make profile-build ARCH=osx-x86-64
make ARCH=osx-x86-64 COMP=gcc config-sanity
Config:
debug: 'no'
optimize: 'yes'
arch: 'x86_64'
os: 'osx'
bits: '64'
prefetch: 'yes'
bsfq: 'yes'
popcnt: 'yes'
sse: 'yes'
Flags:
CXX: g++
CXXFLAGS: -Wall -Wcast-qual -fno-exceptions -fno-rtti -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT
LDFLAGS: -lpthread -Wall -Wcast-qual -fno-exceptions -fno-rtti -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT
Testing config sanity. If this fails, try 'make help' ...
Step 0/4. Preparing for profile build.
make ARCH=osx-x86-64 COMP=gcc gcc-profile-prepare
make ARCH=osx-x86-64 COMP=gcc gcc-profile-clean
Step 1/4. Building executable for benchmark ...
make ARCH=osx-x86-64 COMP=gcc gcc-profile-make
make ARCH=osx-x86-64 COMP=gcc \
EXTRACXXFLAGS='-fprofile-generate' \
EXTRALDFLAGS='-lgcov' \
all
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-generate -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o benchmark.o benchmark.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-generate -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o bitbase.o bitbase.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-generate -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o bitboard.o bitboard.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-generate -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o book.o book.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-generate -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o endgame.o endgame.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-generate -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o evaluate.o evaluate.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-generate -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o main.o main.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-generate -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o material.o material.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-generate -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o misc.o misc.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-generate -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o movegen.o movegen.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-generate -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o movepick.o movepick.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-generate -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o notation.o notation.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-generate -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o pawns.o pawns.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-generate -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o position.o position.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-generate -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o search.o search.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-generate -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o thread.o thread.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-generate -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o timeman.o timeman.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-generate -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o tt.o tt.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-generate -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o uci.o uci.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-generate -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o ucioption.o ucioption.cpp
g++ -o stockfish benchmark.o bitbase.o bitboard.o book.o endgame.o evaluate.o main.o material.o misc.o movegen.o movepick.o notation.o pawns.o position.o search.o thread.o timeman.o tt.o uci.o ucioption.o -lgcov -lpthread -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-generate -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT
Step 2/4. Running benchmark for pgo-build ...
Position: 1/16
Position: 2/16
Position: 3/16
Position: 4/16
Position: 5/16
Position: 6/16
Position: 7/16
Position: 8/16
Position: 9/16
Position: 10/16
Position: 11/16
Position: 12/16
Position: 13/16
Position: 14/16
Position: 15/16
Position: 16/16
===========================
Total time (ms) : 1603
Nodes searched : 1358900
Nodes/second : 847723
Step 3/4. Building final executable ...
make ARCH=osx-x86-64 COMP=gcc gcc-profile-use
make ARCH=osx-x86-64 COMP=gcc \
EXTRACXXFLAGS='-fprofile-use' \
EXTRALDFLAGS='-lgcov' \
all
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-use -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o benchmark.o benchmark.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-use -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o bitbase.o bitbase.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-use -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o bitboard.o bitboard.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-use -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o book.o book.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-use -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o endgame.o endgame.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-use -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o evaluate.o evaluate.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-use -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o main.o main.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-use -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o material.o material.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-use -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o misc.o misc.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-use -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o movegen.o movegen.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-use -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o movepick.o movepick.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-use -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o notation.o notation.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-use -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o pawns.o pawns.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-use -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o position.o position.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-use -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o search.o search.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-use -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o thread.o thread.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-use -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o timeman.o timeman.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-use -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o tt.o tt.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-use -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o uci.o uci.cpp
g++ -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-use -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT -c -o ucioption.o ucioption.cpp
g++ -o stockfish benchmark.o bitbase.o bitboard.o book.o endgame.o evaluate.o main.o material.o misc.o movegen.o movepick.o notation.o pawns.o position.o search.o thread.o timeman.o tt.o uci.o ucioption.o -lgcov -lpthread -Wall -Wcast-qual -fno-exceptions -fno-rtti -fprofile-use -ansi -pedantic -Wno-long-long -Wextra -Wshadow -DNDEBUG -O3 -fno-tree-pre -DIS_64BIT -msse -DUSE_BSFQ -mpopcnt -DUSE_POPCNT
Step 4/4. Deleting profile data ...
make ARCH=osx-x86-64 COMP=gcc gcc-profile-clean
Again, the executable produces the correct bench signature:
Code: Select all
===========================
Total time (ms) : 3584
Nodes searched : 4558173
Nodes/second : 1271811
But no indication of a popcnt instruction, according to otool (and a hex editor too):
Code: Select all
LZsMacPro-OSX6: ~/Documents/Chess/Test/Stockfish/src] otool -tvQ ./stockfish | grep popcnt
LZsMacPro-OSX6: ~/Documents/Chess/Test/Stockfish/src]
What is especially mystifying is that repeating the entire process with gcc-4.6 yields a PGO binary that includes the popcnt instruction, and is clearly faster then the PGO binary produced by gcc-4.8:
Code: Select all
===========================
Total time (ms) : 3480
Nodes searched : 4558173
Nodes/second : 1309819
Code: Select all
LZsMacPro-OSX6: ~/Documents/Chess/Test/Stockfish/src] otool -tvQ ./stockfish | grep popcnt
0000000100002973 popcnt %r8,%rsi
0000000100002b71 popcnt %r10,%rax
00000001000051f9 popcnt 0x00000400(%rax,%r11,8),%r9
0000000100007afc popcnt %rax,%rax
0000000100007d33 popcnt %r10,%r10
0000000100008195 popcnt %rax,%rax
00000001000084c1 popcnt %rax,%rax
000000010000864c popcnt %rdx,%r15
000000010000886b popcnt %r8,%r8
0000000100008ba6 popcnt %rdx,%r15
0000000100008daa popcnt %rdx,%r15
0000000100008f57 popcnt %r9,%r9
000000010000971a popcnt %r15,%r13
0000000100009723 popcnt %r8,%rdx
0000000100009bef popcnt %r13,%rsi
0000000100009d1b popcnt %rsi,%r8
0000000100009d3b popcnt %rcx,%rcx
0000000100009df9 popcnt %rcx,%rcx
0000000100009ec9 popcnt %rcx,%r10
000000010000a159 popcnt %rcx,%r14
000000010000a1e9 popcnt %rsi,%r14
000000010000a2a3 popcnt %rcx,%r12
000000010000a4e9 popcnt %r9,%r14
000000010000a52d popcnt %rcx,%rax
000000010000a7c9 popcnt %rcx,%rcx
000000010000a80d popcnt %rcx,%rax
000000010000aac5 popcnt %r8,%r8
000000010000aad5 popcnt %rax,%r8
000000010000aba5 popcnt %r10,%rdx
000000010000ac23 popcnt %r14,%rdx
000000010000acd6 popcnt %rcx,%r14
000000010000acef popcnt %r9,%r9
000000010000ad66 popcnt %rcx,%r10
000000010000ad7d popcnt %r8,%r8
000000010000add0 popcnt %rsi,%r11
000000010000ae57 popcnt %rdx,%r10
000000010000af94 popcnt %r10,%r10
000000010000b054 popcnt %rdx,%r14
000000010000b120 popcnt %rcx,%rcx
000000010000b191 popcnt %rdx,%rdx
000000010000b1cb popcnt %rcx,%rcx
000000010000b1f4 popcnt %rdi,%rdi
000000010000b2e9 popcnt %rcx,%rcx
000000010000b313 popcnt %r9,%r8
000000010000b325 popcnt %r10,%r10
000000010000b3cf popcnt %rcx,%rsi
000000010000b483 popcnt %rcx,%rcx
000000010000ba29 popcnt %rax,%rcx
000000010000bac2 popcnt %rax,%rcx
000000010000bb93 popcnt %rsi,%r8
000000010000bbaa popcnt %rax,%r10
000000010000bd6d popcnt %rcx,%r15
000000010000bd86 popcnt %rdx,%r15
000000010000c08c popcnt %rcx,%rcx
000000010000c09e popcnt %r11,%r11
000000010000c11f popcnt %r15,%rax
000000010000c28a popcnt %rcx,%r9
000000010000c2a1 popcnt %rax,%r11
000000010000c30e popcnt %rsi,%rax
000000010000c423 popcnt %rcx,%rcx
000000010000c435 popcnt %rdx,%rsi
000000010000c5d3 popcnt %rcx,%r15
000000010000c5ec popcnt %rdx,%rdx
000000010000c86c popcnt %rcx,%r13
000000010000c88b popcnt %r11,%r11
000000010000c901 popcnt %r13,%rax
000000010000ca77 popcnt %rcx,%r8
000000010000ca95 popcnt %rax,%rax
000000010000caf3 popcnt %rsi,%r11
000000010000cc47 popcnt %rsi,%r9
000000010000ccbd popcnt %rax,%rdi
000000010000cd2f popcnt %rcx,%r12
000000010000cdeb popcnt %rsi,%r14
000000010000ce06 popcnt %rdi,%rdi
000000010000ce1e popcnt %rcx,%r9
000000010000ce44 popcnt %rdx,%rdx
000000010000cf96 popcnt %rcx,%r9
000000010000d007 popcnt %rdx,%r11
000000010000d073 popcnt %rcx,%rcx
000000010000d139 popcnt %r9,%r11
000000010000d154 popcnt %r10,%r10
000000010000d16e popcnt %rcx,%rcx
000000010000d193 popcnt %rdx,%rsi
00000001000163ee popcnt %r12,%r11
0000000100016684 popcnt %r15,%r12
000000010001694f popcnt %r15,%r15
0000000100016961 popcnt %rbp,%r13
00000001000169ac popcnt %r13,%rbp
00000001000169b4 popcnt %rbx,%r10
Here's the makefile, for reference:
Code: Select all
LZsMacPro-OSX6: ~/Documents/Chess/Test/Stockfish/src] more Makefile
# Stockfish, a UCI chess playing engine derived from Glaurung 2.1
# Copyright (C) 2004-2008 Tord Romstad (Glaurung author)
# Copyright (C) 2008-2013 Marco Costalba, Joona Kiiski, Tord Romstad
#
# Stockfish is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Stockfish is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
### ==========================================================================
### Section 1. General Configuration
### ==========================================================================
### Establish the operating system name
UNAME = $(shell uname)
### Executable name
EXE = stockfish
### Installation dir definitions
PREFIX = /usr/local
# Haiku has a non-standard filesystem layout
ifeq ($(UNAME),Haiku)
PREFIX=/boot/common
endif
BINDIR = $(PREFIX)/bin
### Built-in benchmark for pgo-builds and signature
PGOBENCH = ./$(EXE) bench 32 1 10 default depth
SIGNBENCH = ./$(EXE) bench
### Object files
OBJS = benchmark.o bitbase.o bitboard.o book.o endgame.o evaluate.o main.o \
material.o misc.o movegen.o movepick.o notation.o pawns.o position.o \
search.o thread.o timeman.o tt.o uci.o ucioption.o
### ==========================================================================
### Section 2. High-level Configuration
### ==========================================================================
#
# flag --- Comp switch --- Description
# ----------------------------------------------------------------------------
#
# debug = yes/no --- -DNDEBUG --- Enable/Disable debug mode
# optimize = yes/no --- (-O3/-fast etc.) --- Enable/Disable optimizations
# arch = (name) --- (-arch) --- Target architecture
# os = (name) --- --- Target operating system
# bits = 64/32 --- -DIS_64BIT --- 64-/32-bit operating system
# prefetch = yes/no --- -DUSE_PREFETCH --- Use prefetch x86 asm-instruction
# bsfq = yes/no --- -DUSE_BSFQ --- Use bsfq x86_64 asm-instruction (only
# with GCC and ICC 64-bit)
# popcnt = yes/no --- -DUSE_POPCNT --- Use popcnt x86_64 asm-instruction
# sse = yes/no --- -msse --- Use Intel Streaming SIMD Extensions
#
# Note that Makefile is space sensitive, so when adding new architectures
# or modifying existing flags, you have to make sure there are no extra spaces
# at the end of the line for flag values.
### 2.1. General
debug = no
optimize = yes
### 2.2 Architecture specific
# General-section
ifeq ($(ARCH),general-64)
arch = any
os = any
bits = 64
prefetch = no
bsfq = no
popcnt = no
sse = no
endif
ifeq ($(ARCH),general-32)
arch = any
os = any
bits = 32
prefetch = no
bsfq = no
popcnt = no
sse = no
endif
# x86-section
ifeq ($(ARCH),x86-64)
arch = x86_64
os = any
bits = 64
prefetch = yes
bsfq = yes
popcnt = no
sse = yes
endif
ifeq ($(ARCH),x86-64-modern)
arch = x86_64
os = any
bits = 64
prefetch = yes
bsfq = yes
popcnt = yes
sse = yes
endif
ifeq ($(ARCH),x86-32)
arch = i386
os = any
bits = 32
prefetch = yes
bsfq = no
popcnt = no
sse = yes
endif
ifeq ($(ARCH),x86-32-old)
arch = i386
os = any
bits = 32
prefetch = no
bsfq = no
popcnt = no
sse = no
endif
#arm section
ifeq ($(ARCH),armv7)
arch = armv7
os = any
bits = 32
prefetch = yes
bsfq = yes
popcnt = no
sse = no
endif
# osx-section
ifeq ($(ARCH),osx-ppc-64)
arch = ppc64
os = osx
bits = 64
prefetch = no
bsfq = no
popcnt = no
sse = no
endif
ifeq ($(ARCH),osx-ppc-32)
arch = ppc
os = osx
bits = 32
prefetch = no
bsfq = no
popcnt = no
sse = no
endif
ifeq ($(ARCH),osx-x86-64)
arch = x86_64
os = osx
bits = 64
prefetch = yes
bsfq = yes
popcnt = yes
sse = yes
endif
ifeq ($(ARCH),osx-x86-32)
arch = i386
os = osx
bits = 32
prefetch = yes
bsfq = no
popcnt = no
sse = yes
endif
### ==========================================================================
### Section 3. Low-level configuration
### ==========================================================================
### 3.1 Selecting compiler (default = gcc)
ifeq ($(COMP),)
COMP=gcc
endif
ifeq ($(COMP),mingw)
comp=mingw
CXX=g++
profile_prepare = gcc-profile-prepare
profile_make = gcc-profile-make
profile_use = gcc-profile-use
profile_clean = gcc-profile-clean
endif
ifeq ($(COMP),gcc)
comp=gcc
CXX=g++
profile_prepare = gcc-profile-prepare
profile_make = gcc-profile-make
profile_use = gcc-profile-use
profile_clean = gcc-profile-clean
endif
ifeq ($(COMP),icc)
comp=icc
CXX=icpc
profile_prepare = icc-profile-prepare
profile_make = icc-profile-make
profile_use = icc-profile-use
profile_clean = icc-profile-clean
endif
ifeq ($(COMP),clang)
comp=clang
CXX=clang++
profile_prepare = gcc-profile-prepare
profile_make = gcc-profile-make
profile_use = gcc-profile-use
profile_clean = gcc-profile-clean
endif
### 3.2 General compiler settings
CXXFLAGS = -Wall -Wcast-qual -fno-exceptions -fno-rtti $(EXTRACXXFLAGS)
ifeq ($(comp),gcc)
CXXFLAGS += -ansi -pedantic -Wno-long-long -Wextra -Wshadow
endif
ifeq ($(comp),mingw)
CXXFLAGS += -Wextra -Wshadow
endif
ifeq ($(comp),icc)
CXXFLAGS += -wd383,981,1418,1419,1476,10187,10188,11505,11503 -Wcheck -Wabi -Wdeprecated -strict-ansi
endif
ifeq ($(comp),clang)
CXXFLAGS += -ansi -pedantic -Wno-long-long -Wextra -Wshadow
endif
ifeq ($(os),osx)
CXXFLAGS +=
endif
### 3.3 General linker settings
LDFLAGS = $(EXTRALDFLAGS)
ifeq ($(comp),mingw)
LDFLAGS += -static-libstdc++ -static-libgcc
endif
### On mingw use Windows threads, otherwise POSIX
ifneq ($(comp),mingw)
# Haiku has pthreads in its libroot, so only link it in on other platforms
ifneq ($(UNAME),Haiku)
LDFLAGS += -lpthread
endif
endif
ifeq ($(os),osx)
LDFLAGS +=
endif
### 3.4 Debugging
ifeq ($(debug),no)
CXXFLAGS += -DNDEBUG
else
CXXFLAGS += -g
endif
### 3.5 Optimization
ifeq ($(optimize),yes)
ifeq ($(comp),gcc)
CXXFLAGS += -O3 -fno-tree-pre
ifeq ($(os),osx)
ifeq ($(arch),i386)
CXXFLAGS += -mdynamic-no-pic
endif
ifeq ($(arch),x86_64)
CXXFLAGS +=
endif
endif
ifeq ($(arch),armv7)
CXXFLAGS += -fno-gcse
endif
endif
ifeq ($(comp),mingw)
CXXFLAGS += -O3
endif
ifeq ($(comp),icc)
ifeq ($(os),osx)
CXXFLAGS += -fast -mdynamic-no-pic
else
CXXFLAGS += -O3
endif
endif
ifeq ($(comp),clang)
### -O4 requires a linker that supports LLVM's LTO
CXXFLAGS += -O3
ifeq ($(os),osx)
ifeq ($(arch),i386)
CXXFLAGS += -mdynamic-no-pic
endif
ifeq ($(arch),x86_64)
CXXFLAGS += -mdynamic-no-pic
endif
endif
endif
endif
### 3.6. Bits
ifeq ($(bits),64)
CXXFLAGS += -DIS_64BIT
endif
### 3.7 prefetch
ifeq ($(prefetch),yes)
ifeq ($(sse),yes)
CXXFLAGS += -msse
DEPENDFLAGS += -msse
endif
else
CXXFLAGS += -DNO_PREFETCH
endif
### 3.8 bsfq
ifeq ($(bsfq),yes)
CXXFLAGS += -DUSE_BSFQ
endif
### 3.9 popcnt
ifeq ($(popcnt),yes)
CXXFLAGS += -mpopcnt -DUSE_POPCNT
endif
### 3.10 Link Time Optimization, it works since gcc 4.5 but not on mingw.
### This is a mix of compile and link time options because the lto link phase
### needs access to the optimization flags.
ifeq ($(comp),gcc)
ifeq ($(optimize),yes)
ifeq ($(debug),no)
GCC_MAJOR := `$(CXX) -dumpversion | cut -f1 -d.`
GCC_MINOR := `$(CXX) -dumpversion | cut -f2 -d.`
ifeq (1,$(shell expr \( $(GCC_MAJOR) \> 4 \) \| \( $(GCC_MAJOR) \= 4 \& $(GCC_MINOR) \>= 5 \)))
CXXFLAGS +=
LDFLAGS += $(CXXFLAGS)
endif
endif
endif
endif
### ==========================================================================
### Section 4. Public targets
### ==========================================================================
help:
@echo ""
@echo "To compile stockfish, type: "
@echo ""
@echo "make target ARCH=arch [COMP=comp]"
@echo ""
@echo "Supported targets:"
@echo ""
@echo "build > Standard build"
@echo "signature-build > Standard build with embedded signature"
@echo "profile-build > PGO build"
@echo "signature-profile-build > PGO build with embedded signature"
@echo "strip > Strip executable"
@echo "install > Install executable"
@echo "clean > Clean up"
@echo ""
@echo "Supported archs:"
@echo ""
@echo "x86-64 > x86 64-bit"
@echo "x86-64-modern > x86 64-bit with popcnt support"
@echo "x86-32 > x86 32-bit with SSE support"
@echo "x86-32-old > x86 32-bit fall back for old hardware"
@echo "osx-ppc-64 > PPC-Mac OS X 64 bit"
@echo "osx-ppc-32 > PPC-Mac OS X 32 bit"
@echo "osx-x86-64 > x86-Mac OS X 64 bit"
@echo "osx-x86-32 > x86-Mac OS X 32 bit"
@echo "armv7 > ARMv7 32 bit"
@echo "general-64 > unspecified 64-bit"
@echo "general-32 > unspecified 32-bit"
@echo ""
@echo "Supported compilers:"
@echo ""
@echo "gcc > Gnu compiler (default)"
@echo "mingw > Gnu compiler with MinGW under Windows"
@echo "clang > LLVM Clang compiler"
@echo "icc > Intel compiler"
@echo ""
@echo "Non-standard targets:"
@echo ""
@echo "make hpux > Compile for HP-UX. Compiler = aCC"
@echo ""
@echo "Examples. If you don't know what to do, you likely want to run: "
@echo ""
@echo "make build ARCH=x86-64 (This is for 64-bit systems)"
@echo "make build ARCH=x86-32 (This is for 32-bit systems)"
@echo ""
.PHONY: build profile-build embed-signature
build:
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) config-sanity
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) all
profile-build:
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) config-sanity
@echo ""
@echo "Step 0/4. Preparing for profile build."
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_prepare)
@echo ""
@echo "Step 1/4. Building executable for benchmark ..."
@touch *.cpp *.h
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_make)
@echo ""
@echo "Step 2/4. Running benchmark for pgo-build ..."
@$(PGOBENCH) > /dev/null
@echo ""
@echo "Step 3/4. Building final executable ..."
@touch *.cpp
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_use)
@echo ""
@echo "Step 4/4. Deleting profile data ..."
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) $(profile_clean)
embed-signature:
@echo "Running benchmark for getting the signature ..."
@$(SIGNBENCH) 2>&1 | sed -n 's/Nodes searched : \(.*\)/\1/p' > sign.txt
@sed -i -e 's,^,/static const string Version/s/"\\(.*\\)"/"sig-,1' -e 's,$$,"/1,1' sign.txt
@sed -i -f sign.txt misc.cpp
@rm sign.txt
signature-build: build embed-signature
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) all
signature-profile-build: build embed-signature profile-build
strip:
strip $(EXE)
install:
-mkdir -p -m 755 $(BINDIR)
-cp $(EXE) $(BINDIR)
-strip $(BINDIR)/$(EXE)
clean:
$(RM) $(EXE) $(EXE).exe *.o .depend *~ core bench.txt *.gcda
default:
help
### ==========================================================================
### Section 5. Private targets
### ==========================================================================
all: $(EXE) .depend
config-sanity:
@echo ""
@echo "Config:"
@echo "debug: '$(debug)'"
@echo "optimize: '$(optimize)'"
@echo "arch: '$(arch)'"
@echo "os: '$(os)'"
@echo "bits: '$(bits)'"
@echo "prefetch: '$(prefetch)'"
@echo "bsfq: '$(bsfq)'"
@echo "popcnt: '$(popcnt)'"
@echo "sse: '$(sse)'"
@echo ""
@echo "Flags:"
@echo "CXX: $(CXX)"
@echo "CXXFLAGS: $(CXXFLAGS)"
@echo "LDFLAGS: $(LDFLAGS)"
@echo ""
@echo "Testing config sanity. If this fails, try 'make help' ..."
@echo ""
@test "$(debug)" = "yes" || test "$(debug)" = "no"
@test "$(optimize)" = "yes" || test "$(optimize)" = "no"
@test "$(arch)" = "any" || test "$(arch)" = "x86_64" || test "$(arch)" = "i386" || \
test "$(arch)" = "ppc64" || test "$(arch)" = "ppc" || test "$(arch)" = "armv7"
@test "$(os)" = "any" || test "$(os)" = "osx"
@test "$(bits)" = "32" || test "$(bits)" = "64"
@test "$(prefetch)" = "yes" || test "$(prefetch)" = "no"
@test "$(bsfq)" = "yes" || test "$(bsfq)" = "no"
@test "$(popcnt)" = "yes" || test "$(popcnt)" = "no"
@test "$(sse)" = "yes" || test "$(sse)" = "no"
@test "$(comp)" = "gcc" || test "$(comp)" = "icc" || test "$(comp)" = "mingw" || test "$(comp)" = "clang"
$(EXE): $(OBJS)
$(CXX) -o $@ $(OBJS) $(LDFLAGS)
gcc-profile-prepare:
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) gcc-profile-clean
gcc-profile-make:
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) \
EXTRACXXFLAGS='-fprofile-generate' \
EXTRALDFLAGS='-lgcov' \
all
gcc-profile-use:
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) \
EXTRACXXFLAGS='-fprofile-use' \
EXTRALDFLAGS='-lgcov' \
all
gcc-profile-clean:
@rm -rf *.gcda *.gcno bench.txt
icc-profile-prepare:
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) icc-profile-clean
@mkdir profdir
icc-profile-make:
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) \
EXTRACXXFLAGS='-prof-gen=srcpos -prof_dir ./profdir' \
all
icc-profile-use:
$(MAKE) ARCH=$(ARCH) COMP=$(COMP) \
EXTRACXXFLAGS='-prof_use -prof_dir ./profdir' \
all
icc-profile-clean:
@rm -rf profdir bench.txt
.depend:
-@$(CXX) $(DEPENDFLAGS) -MM $(OBJS:.o=.cpp) > $@ 2> /dev/null
-include .depend
### ==========================================================================
### Section 6. Non-standard targets
### ==========================================================================
hpux:
$(MAKE) \
CXX='/opt/aCC/bin/aCC -AA +hpxstd98 -mt +O3 -DNDEBUG -DNO_PREFETCH' \
CXXFLAGS="" \
LDFLAGS="" \
all