32 #include <tbb/task_scheduler_init.h>
54 unsigned int count = 0)
56 __asm__ __volatile__ (
"cpuid"
65 static inline unsigned int cpuid_max(
unsigned int extended = 0)
75 cpuid(regs, 0x80000000);
86 #define XL_BIT(register, cpuid_bit, lift_bit) \
87 if (regs.register & x86_64::CPUID_BIT_ ##cpuid_bit) \
88 ret.vector_extensions |= x86_64::lift_bit
114 cpuid(regs, 0x80000001);
127 #define C(desc, level, type, total_size, associativity, line_size) \
129 ret.caches.push_back({cpu_cache::type, level, associativity, total_size * 1024, line_size}); \
132 C(0x06, 1, instruction, 8, 4, 32);
133 C(0x08, 1, instruction, 16, 4, 32);
134 C(0x09, 1, instruction, 32, 4, 64);
135 C(0x0a, 1, data, 8, 2, 32);
136 C(0x0c, 1, data, 16, 4, 32);
137 C(0x0d, 1, data, 16, 4, 64);
138 C(0x0e, 1, data, 24, 6, 64);
139 C(0x1d, 2, unified, 128, 2, 64);
140 C(0x21, 2, unified, 256, 8, 64);
141 C(0x22, 3, unified, 512, 4, 64);
142 C(0x23, 3, unified, 1024, 8, 64);
143 C(0x24, 2, unified, 1024, 16, 64);
144 C(0x25, 3, unified, 2048, 8, 64);
145 C(0x29, 3, unified, 4096, 8, 64);
146 C(0x2c, 1, data, 32, 8, 64);
147 C(0x30, 1, instruction, 32, 8, 64);
148 C(0x41, 2, unified, 128, 4, 32);
149 C(0x42, 2, unified, 256, 4, 32);
150 C(0x43, 2, unified, 512, 4, 32);
151 C(0x44, 2, unified, 1024, 4, 32);
152 C(0x45, 2, unified, 2048, 4, 32);
153 C(0x46, 3, unified, 4096, 4, 64);
154 C(0x47, 3, unified, 8192, 8, 64);
155 C(0x48, 2, unified, 3072, 12, 64);
163 uint8 model = (regs.
eax & 0xf0) >> 4;
164 uint8 family = (regs.
eax & 0xf00) >> 8;
166 if (family == 0x0f && model == 0x06)
176 C(0x4a, 3, unified, 6144, 12, 64);
177 C(0x4b, 3, unified, 8192, 16, 64);
178 C(0x4c, 3, unified, 12288, 12, 64);
179 C(0x4d, 3, unified, 16 * 1024, 16, 64);
180 C(0x4e, 2, unified, 6 * 1024, 24, 64);
181 C(0x60, 1, data, 16, 8, 64);
182 C(0x66, 1, data, 8, 4, 64);
183 C(0x67, 1, data, 16, 4, 64);
184 C(0x68, 1, data, 32, 4, 64);
185 C(0x78, 2, unified, 1024, 4, 64);
186 C(0x79, 2, unified, 128, 8, 64);
187 C(0x7a, 2, unified, 256, 8, 64);
188 C(0x7b, 2, unified, 512, 8, 64);
189 C(0x7c, 2, unified, 1024, 8, 64);
190 C(0x7d, 2, unified, 2048, 8, 64);
191 C(0x7f, 2, unified, 512, 2, 64);
192 C(0x80, 2, unified, 512, 8, 64);
193 C(0x82, 2, unified, 256, 8, 32);
194 C(0x83, 2, unified, 512, 8, 32);
195 C(0x84, 2, unified, 1024, 8, 32);
196 C(0x85, 2, unified, 2048, 8, 32);
197 C(0x86, 2, unified, 512, 4, 64);
198 C(0x87, 2, unified, 1024, 8, 64);
199 C(0xd0, 3, unified, 512, 4, 64);
200 C(0xd1, 3, unified, 1024, 4, 64);
201 C(0xd2, 3, unified, 2048, 4, 64);
202 C(0xd6, 3, unified, 1024, 8, 64);
203 C(0xd7, 3, unified, 2048, 8, 64);
204 C(0xd8, 3, unified, 4096, 8, 64);
205 C(0xdc, 3, unified, 1536, 12, 64);
206 C(0xdd, 3, unified, 3072, 12, 64);
207 C(0xde, 3, unified, 6144, 12, 64);
208 C(0xe2, 3, unified, 2048, 16, 64);
209 C(0xe3, 3, unified, 4096, 16, 64);
210 C(0xe4, 3, unified, 8192, 16, 64);
211 C(0xea, 3, unified, 12288, 24, 64);
212 C(0xeb, 3, unified, 18432, 24, 64);
213 C(0xec, 3, unified, 24576, 24, 64);
221 for(
uint32 in_ecx = 0; ; in_ecx++)
223 cpuid(regs, 4, in_ecx);
225 if ((regs.
eax & 0xf) == 0)
230 switch(regs.
eax & 0xf)
245 unsigned int level = ((regs.
eax >> 5) & 0x3);
247 unsigned int ways = ((regs.
ebx >> 22) & 0xff) + 1;
248 unsigned int partitions = ((regs.
ebx >> 12) & 0xff) + 1;
249 unsigned int line_size = (regs.
ebx & 0x3ff) + 1;
250 unsigned int sets = regs.
ecx + 1;
252 unsigned int total_size = ways * partitions * line_size * sets;
254 cpu_cache cache = { cache_type, level, ways, total_size, line_size };
255 ret.
caches.push_back(cache);
266 if ((regs.
eax & (1u << 31)) == 0)
273 if ((regs.
ebx & (1u << 31)) == 0)
281 if ((regs.
ecx & (1u << 31)) == 0)
289 if ((regs.
edx & (1u << 31)) == 0)
302 char brand_string[
sizeof(
unsigned int) * 4 * 4 + 1] = { 0 };
306 cpuid(regs, 0x80000002);
307 memcpy(&brand_string[ 0], ®s.
eax,
sizeof(regs.
eax));
308 memcpy(&brand_string[ 4], ®s.
ebx,
sizeof(regs.
ebx));
309 memcpy(&brand_string[ 8], ®s.
ecx,
sizeof(regs.
ecx));
310 memcpy(&brand_string[12], ®s.
edx,
sizeof(regs.
edx));
312 cpuid(regs, 0x80000003);
313 memcpy(&brand_string[16], ®s.
eax,
sizeof(regs.
eax));
314 memcpy(&brand_string[20], ®s.
ebx,
sizeof(regs.
ebx));
315 memcpy(&brand_string[24], ®s.
ecx,
sizeof(regs.
ecx));
316 memcpy(&brand_string[28], ®s.
edx,
sizeof(regs.
edx));
318 cpuid(regs, 0x80000004);
319 memcpy(&brand_string[32], ®s.
eax,
sizeof(regs.
eax));
320 memcpy(&brand_string[36], ®s.
ebx,
sizeof(regs.
ebx));
321 memcpy(&brand_string[40], ®s.
ecx,
sizeof(regs.
ecx));
322 memcpy(&brand_string[44], ®s.
edx,
sizeof(regs.
edx));
324 std::string name = brand_string;
327 auto first_non_space = name.find_first_not_of(
" ");
328 auto last_non_space = name.find_last_not_of(
" ");
330 ret.
name = name.substr(first_non_space, last_non_space - first_non_space + 1);
335 namespace __internal {
340 unsigned int max_level;
342 ret.
num_threads = tbb::task_scheduler_init::default_num_threads();
static constexpr uint32 SSE4_1
static constexpr uint32 AVX2
static void identify_vector_extensions(cpu_config &ret)
static unsigned int cpuid_max_extended(void)
static constexpr uint32 AVX
cpu_config identify_host_cpu(void)
static constexpr uint32 SSE_FMA4
#define XL_BIT(register, cpuid_bit, lift_bit)
static void scan_leaf4_cache_info(cpu_config &ret)
std::vector< cpu_cache > caches
static constexpr uint32 SSE
static constexpr uint32 SSE3_S
static void cpuid(cpuid_regs &output, unsigned int code, unsigned int count=0)
static constexpr uint32 SSE4_2
static void decode_cache_descriptor(cpu_config &ret, uint8 desc)
static constexpr uint32 SSE_FMA3
static void identify_caches(cpu_config &ret)
static void get_cpu_brand_string(cpu_config &ret)
static constexpr uint32 SSE2
static constexpr uint32 SSE_XOP
static constexpr uint32 SSE4_a
static constexpr uint32 SSE_F16C
#define C(desc, level, type, total_size, associativity, line_size)
static constexpr uint32 SSE3
static unsigned int cpuid_max(unsigned int extended=0)