Blame - base/atomicops_internals_arm_gcc.h - chromium/src.git

blob: 9f4fe2e586e2425270ae2f57da0730bc6bcda8b8 [file] [log] [blame]

[email protected]	2054dc1	2013-06-10 14:00:06	[diff] [blame]	1	// Copyright 2013 The Chromium Authors. All rights reserved.
				2	// Use of this source code is governed by a BSD-style license that can be
				3	// found in the LICENSE file.
				4
				5	// This file is an internal atomic implementation, use base/atomicops.h instead.
				6	//
				7	// LinuxKernelCmpxchg and Barrier_AtomicIncrement are from Google Gears.
				8
				9	#ifndef BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_
				10	#define BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_
				11
				12	namespace base {
				13	namespace subtle {
				14
				15	// Memory barriers on ARM are funky, but the kernel is here to help:
				16	//
				17	// * ARMv5 didn't support SMP, there is no memory barrier instruction at
				18	// all on this architecture, or when targeting its machine code.
				19	//
				20	// * Some ARMv6 CPUs support SMP. A full memory barrier can be produced by
				21	// writing a random value to a very specific coprocessor register.
				22	//
				23	// * On ARMv7, the "dmb" instruction is used to perform a full memory
				24	// barrier (though writing to the co-processor will still work).
				25	// However, on single core devices (e.g. Nexus One, or Nexus S),
				26	// this instruction will take up to 200 ns, which is huge, even though
				27	// it's completely un-needed on these devices.
				28	//
				29	// * There is no easy way to determine at runtime if the device is
				30	// single or multi-core. However, the kernel provides a useful helper
				31	// function at a fixed memory address (0xffff0fa0), which will always
				32	// perform a memory barrier in the most efficient way. I.e. on single
				33	// core devices, this is an empty function that exits immediately.
				34	// On multi-core devices, it implements a full memory barrier.
				35	//
				36	// * This source could be compiled to ARMv5 machine code that runs on a
				37	// multi-core ARMv6 or ARMv7 device. In this case, memory barriers
				38	// are needed for correct execution. Always call the kernel helper, even
				39	// when targeting ARMv5TE.
				40	//
				41
				42	inline void MemoryBarrier() {
				43	// Note: This is a function call, which is also an implicit compiler
				44	// barrier.
				45	typedef void (*KernelMemoryBarrierFunc)();
				46	((KernelMemoryBarrierFunc)0xffff0fa0)();
				47	}
				48
				49	// An ARM toolchain would only define one of these depending on which
				50	// variant of the target architecture is being used. This tests against
				51	// any known ARMv6 or ARMv7 variant, where it is possible to directly
				52	// use ldrex/strex instructions to implement fast atomic operations.
				53	#if defined(__ARM_ARCH_7__) \|\| defined(__ARM_ARCH_7A__) \|\| \
				54	defined(__ARM_ARCH_7R__) \|\| defined(__ARM_ARCH_7M__) \|\| \
				55	defined(__ARM_ARCH_6__) \|\| defined(__ARM_ARCH_6J__) \|\| \
				56	defined(__ARM_ARCH_6K__) \|\| defined(__ARM_ARCH_6Z__) \|\| \
				57	defined(__ARM_ARCH_6KZ__) \|\| defined(__ARM_ARCH_6T2__)
				58
				59	inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
				60	Atomic32 old_value,
				61	Atomic32 new_value) {
				62	Atomic32 prev_value;
				63	int reloop;
				64	do {
				65	// The following is equivalent to:
				66	//
				67	// prev_value = LDREX(ptr)
				68	// reloop = 0
				69	// if (prev_value != old_value)
				70	// reloop = STREX(ptr, new_value)
				71	__asm__ __volatile__(" ldrex %0, [%3]\n"
				72	" mov %1, #0\n"
[email protected]	8ceb4b90f	2013-07-11 03:52:52	[diff] [blame]	73	" cmp %0, %4\n"
[email protected]	2054dc1	2013-06-10 14:00:06	[diff] [blame]	74	#ifdef __thumb2__
				75	" it eq\n"
				76	#endif
				77	" strexeq %1, %5, [%3]\n"
				78	: "=&r"(prev_value), "=&r"(reloop), "+m"(*ptr)
				79	: "r"(ptr), "r"(old_value), "r"(new_value)
				80	: "cc", "memory");
				81	} while (reloop != 0);
				82	return prev_value;
				83	}
				84
				85	inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
				86	Atomic32 old_value,
				87	Atomic32 new_value) {
				88	Atomic32 result = NoBarrier_CompareAndSwap(ptr, old_value, new_value);
				89	MemoryBarrier();
				90	return result;
				91	}
				92
				93	inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
				94	Atomic32 old_value,
				95	Atomic32 new_value) {
				96	MemoryBarrier();
				97	return NoBarrier_CompareAndSwap(ptr, old_value, new_value);
				98	}
				99
				100	inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
				101	Atomic32 increment) {
				102	Atomic32 value;
				103	int reloop;
				104	do {
				105	// Equivalent to:
				106	//
				107	// value = LDREX(ptr)
				108	// value += increment
				109	// reloop = STREX(ptr, value)
				110	//
				111	__asm__ __volatile__(" ldrex %0, [%3]\n"
				112	" add %0, %0, %4\n"
				113	" strex %1, %0, [%3]\n"
				114	: "=&r"(value), "=&r"(reloop), "+m"(*ptr)
				115	: "r"(ptr), "r"(increment)
				116	: "cc", "memory");
				117	} while (reloop);
				118	return value;
				119	}
				120
				121	inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
				122	Atomic32 increment) {
				123	// TODO(digit): Investigate if it's possible to implement this with
				124	// a single MemoryBarrier() operation between the LDREX and STREX.
				125	// See http://crbug.com/246514
				126	MemoryBarrier();
				127	Atomic32 result = NoBarrier_AtomicIncrement(ptr, increment);
				128	MemoryBarrier();
				129	return result;
				130	}
				131
				132	inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
				133	Atomic32 new_value) {
				134	Atomic32 old_value;
				135	int reloop;
				136	do {
				137	// old_value = LDREX(ptr)
[email protected]	8ceb4b90f	2013-07-11 03:52:52	[diff] [blame]	138	// reloop = STREX(ptr, new_value)
[email protected]	2054dc1	2013-06-10 14:00:06	[diff] [blame]	139	__asm__ __volatile__(" ldrex %0, [%3]\n"
				140	" strex %1, %4, [%3]\n"
				141	: "=&r"(old_value), "=&r"(reloop), "+m"(*ptr)
				142	: "r"(ptr), "r"(new_value)
				143	: "cc", "memory");
				144	} while (reloop != 0);
				145	return old_value;
				146	}
				147
				148	// This tests against any known ARMv5 variant.
				149	#elif defined(__ARM_ARCH_5__) \|\| defined(__ARM_ARCH_5T__) \|\| \
				150	defined(__ARM_ARCH_5TE__) \|\| defined(__ARM_ARCH_5TEJ__)
				151
				152	// The kernel also provides a helper function to perform an atomic
				153	// compare-and-swap operation at the hard-wired address 0xffff0fc0.
				154	// On ARMv5, this is implemented by a special code path that the kernel
				155	// detects and treats specially when thread pre-emption happens.
				156	// On ARMv6 and higher, it uses LDREX/STREX instructions instead.
				157	//
				158	// Note that this always perform a full memory barrier, there is no
				159	// need to add calls MemoryBarrier() before or after it. It also
				160	// returns 0 on success, and 1 on exit.
				161	//
				162	// Available and reliable since Linux 2.6.24. Both Android and ChromeOS
				163	// use newer kernel revisions, so this should not be a concern.
				164	namespace {
				165
				166	inline int LinuxKernelCmpxchg(Atomic32 old_value,
				167	Atomic32 new_value,
				168	volatile Atomic32* ptr) {
				169	typedef int (KernelCmpxchgFunc)(Atomic32, Atomic32, volatile Atomic32);
				170	return ((KernelCmpxchgFunc)0xffff0fc0)(old_value, new_value, ptr);
				171	}
				172
				173	} // namespace
				174
				175	inline Atomic32 NoBarrier_CompareAndSwap(volatile Atomic32* ptr,
				176	Atomic32 old_value,
				177	Atomic32 new_value) {
				178	Atomic32 prev_value;
				179	for (;;) {
				180	prev_value = *ptr;
				181	if (prev_value != old_value)
				182	return prev_value;
				183	if (!LinuxKernelCmpxchg(old_value, new_value, ptr))
				184	return old_value;
				185	}
				186	}
				187
				188	inline Atomic32 NoBarrier_AtomicExchange(volatile Atomic32* ptr,
				189	Atomic32 new_value) {
				190	Atomic32 old_value;
				191	do {
				192	old_value = *ptr;
				193	} while (LinuxKernelCmpxchg(old_value, new_value, ptr));
				194	return old_value;
				195	}
				196
				197	inline Atomic32 NoBarrier_AtomicIncrement(volatile Atomic32* ptr,
				198	Atomic32 increment) {
				199	return Barrier_AtomicIncrement(ptr, increment);
				200	}
				201
				202	inline Atomic32 Barrier_AtomicIncrement(volatile Atomic32* ptr,
				203	Atomic32 increment) {
				204	for (;;) {
				205	// Atomic exchange the old value with an incremented one.
				206	Atomic32 old_value = *ptr;
				207	Atomic32 new_value = old_value + increment;
				208	if (!LinuxKernelCmpxchg(old_value, new_value, ptr)) {
				209	// The exchange took place as expected.
				210	return new_value;
				211	}
				212	// Otherwise, *ptr changed mid-loop and we need to retry.
				213	}
				214	}
				215
				216	inline Atomic32 Acquire_CompareAndSwap(volatile Atomic32* ptr,
				217	Atomic32 old_value,
				218	Atomic32 new_value) {
				219	Atomic32 prev_value;
				220	for (;;) {
				221	prev_value = *ptr;
				222	if (prev_value != old_value) {
				223	// Always ensure acquire semantics.
				224	MemoryBarrier();
				225	return prev_value;
				226	}
				227	if (!LinuxKernelCmpxchg(old_value, new_value, ptr))
				228	return old_value;
				229	}
				230	}
				231
				232	inline Atomic32 Release_CompareAndSwap(volatile Atomic32* ptr,
				233	Atomic32 old_value,
				234	Atomic32 new_value) {
[email protected]	8ceb4b90f	2013-07-11 03:52:52	[diff] [blame]	235	// This could be implemented as:
				236	// MemoryBarrier();
				237	// return NoBarrier_CompareAndSwap();
				238	//
				239	// But would use 3 barriers per succesful CAS. To save performance,
				240	// use Acquire_CompareAndSwap(). Its implementation guarantees that:
				241	// - A succesful swap uses only 2 barriers (in the kernel helper).
				242	// - An early return due to (prev_value != old_value) performs
				243	// a memory barrier with no store, which is equivalent to the
				244	// generic implementation above.
				245	return Acquire_CompareAndSwap(ptr, old_value, new_value);
[email protected]	2054dc1	2013-06-10 14:00:06	[diff] [blame]	246	}
				247
				248	#else
				249	# error "Your CPU's ARM architecture is not supported yet"
				250	#endif
				251
				252	// NOTE: Atomicity of the following load and store operations is only
				253	// guaranteed in case of 32-bit alignement of \|ptr\| values.
				254
				255	inline void NoBarrier_Store(volatile Atomic32* ptr, Atomic32 value) {
				256	*ptr = value;
				257	}
				258
				259	inline void Acquire_Store(volatile Atomic32* ptr, Atomic32 value) {
				260	*ptr = value;
				261	MemoryBarrier();
				262	}
				263
				264	inline void Release_Store(volatile Atomic32* ptr, Atomic32 value) {
				265	MemoryBarrier();
				266	*ptr = value;
				267	}
				268
				269	inline Atomic32 NoBarrier_Load(volatile const Atomic32* ptr) { return *ptr; }
				270
				271	inline Atomic32 Acquire_Load(volatile const Atomic32* ptr) {
				272	Atomic32 value = *ptr;
				273	MemoryBarrier();
				274	return value;
				275	}
				276
				277	inline Atomic32 Release_Load(volatile const Atomic32* ptr) {
				278	MemoryBarrier();
				279	return *ptr;
				280	}
				281
				282	} // namespace base::subtle
				283	} // namespace base
				284
				285	#endif // BASE_ATOMICOPS_INTERNALS_ARM_GCC_H_