Instruction_level_parallelism

  • Post author:
  • Post category:其他




Instruction_level_parallelism and False_sharing

void Instruction_level_parallelism()
{
	unsigned int steps = 1 * 1024 * 1024 * 1024u - 1;
	cout << steps << endl;
	int *a = new int[2];
	time_t begin = time(nullptr);
	// Loop 1
	for (unsigned int i = 0; i < steps; i++)
	{
		a[0]++;
		a[1]++;
	}
	time_t end = time(nullptr);
	cout << end - begin << endl;
	// Loop 2
	begin = time(nullptr);
	for (unsigned int i = 0; i < steps; i++)
	{
		a[0]++;
		a[0]++;
	}
	end = time(nullptr);
	cout << end - begin << endl;
}
#ifdef __cpp_lib_hardware_interference_size
	using std::hardware_constructive_interference_size;
	using std::hardware_destructive_interference_size;
#else
	// Lucky guess �� __cacheline_aligned �� L1_CACHE_BYTES �� L1_CACHE_SHIFT �� ...
	constexpr std::size_t hardware_constructive_interference_size = 2 * sizeof(std::max_align_t);
	constexpr std::size_t hardware_destructive_interference_size = 2 * sizeof(std::max_align_t);
#endif

	struct Data
	{
		std::atomic<std::uint8_t> lock;
		std::uint8_t data;
	};

	struct DataPack
	{
		std::uint8_t port;
		alignas(hardware_constructive_interference_size) Data pack;
		std::uint8_t wait;
	};

	struct KeepApart
	{
		std::uint8_t air;
		alignas(hardware_destructive_interference_size) std::atomic<std::uint8_t> ice;
		alignas(hardware_destructive_interference_size) std::atomic<std::uint8_t> fire;
		std::uint8_t soil;
	};

	void test_false_share()
	{
		std::cout
			<< "alignof(std::max_align_t) == "
			<< alignof(std::max_align_t) << '\n'
			<< "sizeof( std::max_align_t ) == " 
			<< sizeof(std::max_align_t) << "\n\n";
		std::cout
			<< "hardware_destructive_interference_size == "
			<< hardware_destructive_interference_size << '\n'
			<< "hardware_constructive_interference_size == "
			<< hardware_constructive_interference_size << "\n\n";

		std::cout
			<< "offsetof( DataPack::port ) : " << offsetof(DataPack, port) << '\n'
			<< "offsetof( DataPack::pack ) : " << offsetof(DataPack, pack) << '\n'
			<< "offsetof( DataPack::wait ) : " << offsetof(DataPack, wait) << '\n'
			<< "sizeof  ( DataPack )       : " << sizeof(DataPack) << '\n'
			<< '\n';

		std::cout
			<< "offsetof( KeepApart::air  ) : " << offsetof(KeepApart, air) << '\n'
			<< "offsetof( KeepApart::ice  ) ! " << offsetof(KeepApart, ice) << '\n'
			<< "offsetof( KeepApart::fire ) ! " << offsetof(KeepApart, fire) << '\n'
			<< "offsetof( KeepApart::soil ) : " << offsetof(KeepApart, soil) << '\n'
			<< "sizeof  ( KeepApart )       : " << sizeof(KeepApart) << '\n'
			<< '\n';
	}


OutPut:
alignof(std::max_align_t) == 16
sizeof( std::max_align_t ) == 32

hardware_destructive_interference_size == 64
hardware_constructive_interference_size == 64

offsetof( DataPack::port ) : 0
offsetof( DataPack::pack ) : 64
offsetof( DataPack::wait ) : 66
sizeof  ( DataPack )       : 128

offsetof( KeepApart::air  ) : 0
offsetof( KeepApart::ice  ) ! 64
offsetof( KeepApart::fire ) ! 128
offsetof( KeepApart::soil ) : 129
sizeof  ( KeepApart )       : 192
  • false sharing,当两个成员变量在同一个cache line时,其中一个成员变量被修改,会导致cache line无效,另一个成员变量就没法享受cache带来的好处了。所以有时候要避免false sharing。

  • true sharing,如果一个结构体的sizeof的大小小于或等于cache line大小,那么这个结构体的成员间就存在true sharing

  • Destructive interference size: a number that’s suitable as an offset between two objects to likely avoid false-sharing due to different runtime access patterns from different threads.

  • Constructive interference size: a number that’s suitable as a limit on two objects’ combined memory footprint size and base alignment to likely promote true-sharing between them.