Didn't forget to mention this. Actually knew nothing about it.lucasart wrote: But what you forget to mention is that they are counter-productive in the case of HT.

Moderator: Ras
Didn't forget to mention this. Actually knew nothing about it.lucasart wrote: But what you forget to mention is that they are counter-productive in the case of HT.
Code: Select all
#include <atomic>
class Spinlock {
std::atomic_int lock;
public:
Spinlock() { lock = 1; } // Init here to workaround a bug with MSVC 2013
void acquire() {
while (lock.fetch_sub(1, std::memory_order_acquire) != 1)
while (lock.load(std::memory_order_relaxed) <= 0) {}
}
void release() { lock.store(1, std::memory_order_release); }
};
int spin_lock(Spinlock s)
{
s.acquire();
return 0;
}
Code: Select all
_Z9spin_lockP8Spinlock:
.L2:
movl (%rdi), %eax
.L4:
leal -1(%rax), %edx
movl %eax, %ecx
lock cmpxchgl %edx, (%rdi)
jne .L4
cmpl $1, %ecx
je .L12
.L7:
movl (%rdi), %eax
testl %eax, %eax
jg .L2
jmp .L7
.L12:
xorl %eax, %eax
ret
Code: Select all
_Z9spin_lockP8Spinlock:
.L2:
movl $-1, %eax
lock xaddl %eax, (%rdi)
cmpl $1, %eax
je .L7
.L5:
movl (%rdi), %eax
testl %eax, %eax
jle .L5
jmp .L2
.L7:
xorb %al, %al
ret
Code: Select all
pthread_spin_lock:
mov 4(%esp), %eax
1: LOCK
decl 0(%eax)
jne 2f
xor %eax, %eax
ret
.align 16
2: rep
nop
cmpl $0, 0(%eax)
jg 1b
jmp 2b
Code: Select all
#include <immintrin.h>
...
void acquire() {
while (lock.fetch_sub(1, std::memory_order_acquire) != 1)
while (lock.load(std::memory_order_relaxed) <= 0) { _mm_pause(); }
Code: Select all
_Z9spin_lockP8Spinlock:
.L2:
movl $-1, %eax
lock xaddl %eax, (%rdi)
cmpl $1, %eax
je .L7
.L5:
movl (%rdi), %eax
testl %eax, %eax
jg .L2
rep nop
jmp .L5
.L7:
xorb %al, %al
ret