blob: ac02b17f5db0f672596a88d2ce8d93d21d907efe [file] [log] [blame]
[email protected]91d91fa2011-04-29 20:45:221// Copyright (c) 2011 The Chromium Authors. All rights reserved.
license.botbf09a502008-08-24 00:55:552// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
[email protected]611dbe02008-08-05 09:57:364
5// This file is an internal atomic implementation, use base/atomicops.h instead.
6
7#ifndef BASE_ATOMICOPS_INTERNALS_X86_GCC_H_
8#define BASE_ATOMICOPS_INTERNALS_X86_GCC_H_
9
[email protected]0bea7252011-08-05 15:34:0010#include "base/base_export.h"
[email protected]91d91fa2011-04-29 20:45:2211
[email protected]611dbe02008-08-05 09:57:3612// This struct is not part of the public API of this module; clients may not
[email protected]0bea7252011-08-05 15:34:0013// use it. (However, it's exported via BASE_EXPORT because clients implicitly
[email protected]91d91fa2011-04-29 20:45:2214// do use it at link time by inlining these functions.)
[email protected]611dbe02008-08-05 09:57:3615// Features of this x86. Values may not be correct before main() is run,
16// but are set conservatively.
17struct AtomicOps_x86CPUFeatureStruct {
18 bool has_amd_lock_mb_bug; // Processor has AMD memory-barrier bug; do lfence
19 // after acquire compare-and-swap.
20 bool has_sse2; // Processor has SSE2.
21};
[email protected]0bea7252011-08-05 15:34:0022BASE_EXPORT extern struct AtomicOps_x86CPUFeatureStruct
[email protected]91d91fa2011-04-29 20:45:2223 AtomicOps_Internalx86CPUFeatures;
[email protected]611dbe02008-08-05 09:57:3624
25#define ATOMICOPS_COMPILER_BARRIER() __asm__ __volatile__("" : : : "memory")
26
27namespace base {
28namespace subtle {
29
30// 32-bit low-level operations on any platform.
31
32inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
33 Atomic32 old_value,
34 Atomic32 new_value) {
35 Atomic32 prev;
36 __asm__ __volatile__("lock; cmpxchgl %1,%2"
37 : "=a" (prev)
38 : "q" (new_value), "m" (*ptr), "0" (old_value)
39 : "memory");
40 return prev;
41}
42
43inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
44 Atomic32 new_value) {
45 __asm__ __volatile__("xchgl %1,%0" // The lock prefix is implicit for xchg.
46 : "=r" (new_value)
47 : "m" (*ptr), "0" (new_value)
48 : "memory");
49 return new_value; // Now it's the previous value.
50}
51
52inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
53 Atomic32 increment) {
54 Atomic32 temp = increment;
55 __asm__ __volatile__("lock; xaddl %0,%1"
56 : "+r" (temp), "+m" (*ptr)
57 : : "memory");
58 // temp now holds the old value of *ptr
59 return temp + increment;
60}
61
62inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
63 Atomic32 increment) {
64 Atomic32 temp = increment;
65 __asm__ __volatile__("lock; xaddl %0,%1"
66 : "+r" (temp), "+m" (*ptr)
67 : : "memory");
68 // temp now holds the old value of *ptr
69 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
70 __asm__ __volatile__("lfence" : : : "memory");
71 }
72 return temp + increment;
73}
74
75inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
76 Atomic32 old_value,
77 Atomic32 new_value) {
78 Atomic32 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
79 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
80 __asm__ __volatile__("lfence" : : : "memory");
81 }
82 return x;
83}
84
85inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
86 Atomic32 old_value,
87 Atomic32 new_value) {
88 return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
89}
90
91inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
92 *ptr = value;
93}
94
95#if defined(__x86_64__)
96
97// 64-bit implementations of memory barrier can be simpler, because it
98// "mfence" is guaranteed to exist.
99inline void MemoryBarrier() {
100 __asm__ __volatile__("mfence" : : : "memory");
101}
102
103inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
104 *ptr = value;
105 MemoryBarrier();
106}
107
108#else
109
110inline void MemoryBarrier() {
111 if (AtomicOps_Internalx86CPUFeatures.has_sse2) {
112 __asm__ __volatile__("mfence" : : : "memory");
113 } else { // mfence is faster but not present on PIII
114 Atomic32 x = 0;
115 NoBarrier_AtomicExchange(&x, 0); // acts as a barrier on PIII
116 }
117}
118
119inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
120 if (AtomicOps_Internalx86CPUFeatures.has_sse2) {
121 *ptr = value;
122 __asm__ __volatile__("mfence" : : : "memory");
123 } else {
124 NoBarrier_AtomicExchange(ptr, value);
125 // acts as a barrier on PIII
126 }
127}
128#endif
129
130inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
131 ATOMICOPS_COMPILER_BARRIER();
132 *ptr = value; // An x86 store acts as a release barrier.
133 // See comments in Atomic64 version of Release_Store(), below.
134}
135
136inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) {
137 return *ptr;
138}
139
140inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
141 Atomic32 value = *ptr; // An x86 load acts as a acquire barrier.
142 // See comments in Atomic64 version of Release_Store(), below.
143 ATOMICOPS_COMPILER_BARRIER();
144 return value;
145}
146
147inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
148 MemoryBarrier();
149 return *ptr;
150}
151
152#if defined(__x86_64__)
153
154// 64-bit low-level operations on 64-bit platform.
155
156inline Atomic64 NoBarrier_CompareAndSwap(volatile Atomic64* ptr,
157 Atomic64 old_value,
158 Atomic64 new_value) {
159 Atomic64 prev;
160 __asm__ __volatile__("lock; cmpxchgq %1,%2"
161 : "=a" (prev)
162 : "q" (new_value), "m" (*ptr), "0" (old_value)
163 : "memory");
164 return prev;
165}
166
167inline Atomic64 NoBarrier_AtomicExchange(volatile Atomic64* ptr,
168 Atomic64 new_value) {
169 __asm__ __volatile__("xchgq %1,%0" // The lock prefix is implicit for xchg.
170 : "=r" (new_value)
171 : "m" (*ptr), "0" (new_value)
172 : "memory");
173 return new_value; // Now it's the previous value.
174}
175
176inline Atomic64 NoBarrier_AtomicIncrement(volatile Atomic64* ptr,
177 Atomic64 increment) {
178 Atomic64 temp = increment;
179 __asm__ __volatile__("lock; xaddq %0,%1"
180 : "+r" (temp), "+m" (*ptr)
181 : : "memory");
182 // temp now contains the previous value of *ptr
183 return temp + increment;
184}
185
186inline Atomic64 Barrier_AtomicIncrement(volatile Atomic64* ptr,
187 Atomic64 increment) {
188 Atomic64 temp = increment;
189 __asm__ __volatile__("lock; xaddq %0,%1"
190 : "+r" (temp), "+m" (*ptr)
191 : : "memory");
192 // temp now contains the previous value of *ptr
193 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
194 __asm__ __volatile__("lfence" : : : "memory");
195 }
196 return temp + increment;
197}
198
199inline void NoBarrier_Store(volatile Atomic64* ptr, Atomic64 value) {
200 *ptr = value;
201}
202
203inline void Acquire_Store(volatile Atomic64* ptr, Atomic64 value) {
204 *ptr = value;
205 MemoryBarrier();
206}
207
208inline void Release_Store(volatile Atomic64* ptr, Atomic64 value) {
209 ATOMICOPS_COMPILER_BARRIER();
210
211 *ptr = value; // An x86 store acts as a release barrier
212 // for current AMD/Intel chips as of Jan 2008.
213 // See also Acquire_Load(), below.
214
215 // When new chips come out, check:
216 // IA-32 Intel Architecture Software Developer's Manual, Volume 3:
217 // System Programming Guide, Chatper 7: Multiple-processor management,
218 // Section 7.2, Memory Ordering.
219 // Last seen at:
220 // http://developer.intel.com/design/pentium4/manuals/index_new.htm
221 //
222 // x86 stores/loads fail to act as barriers for a few instructions (clflush
223 // maskmovdqu maskmovq movntdq movnti movntpd movntps movntq) but these are
224 // not generated by the compiler, and are rare. Users of these instructions
225 // need to know about cache behaviour in any case since all of these involve
226 // either flushing cache lines or non-temporal cache hints.
227}
228
229inline Atomic64 NoBarrier_Load(volatile const Atomic64* ptr) {
230 return *ptr;
231}
232
233inline Atomic64 Acquire_Load(volatile const Atomic64* ptr) {
234 Atomic64 value = *ptr; // An x86 load acts as a acquire barrier,
235 // for current AMD/Intel chips as of Jan 2008.
236 // See also Release_Store(), above.
237 ATOMICOPS_COMPILER_BARRIER();
238 return value;
239}
240
241inline Atomic64 Release_Load(volatile const Atomic64* ptr) {
242 MemoryBarrier();
243 return *ptr;
244}
[email protected]616f9a12009-07-27 21:17:23245
246inline Atomic64 Acquire_CompareAndSwap(volatile Atomic64* ptr,
247 Atomic64 old_value,
248 Atomic64 new_value) {
249 Atomic64 x = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
250 if (AtomicOps_Internalx86CPUFeatures.has_amd_lock_mb_bug) {
251 __asm__ __volatile__("lfence" : : : "memory");
252 }
253 return x;
254}
255
256inline Atomic64 Release_CompareAndSwap(volatile Atomic64* ptr,
257 Atomic64 old_value,
258 Atomic64 new_value) {
259 return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
260}
261
[email protected]611dbe02008-08-05 09:57:36262#endif // defined(__x86_64__)
263
264} // namespace base::subtle
265} // namespace base
266
267#undef ATOMICOPS_COMPILER_BARRIER
268
269#endif // BASE_ATOMICOPS_INTERNALS_X86_GCC_H_