topology.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright IBM Corp. 2007, 2011
  4. */
  5. #define KMSG_COMPONENT "cpu"
  6. #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
  7. #include <linux/workqueue.h>
  8. #include <linux/memblock.h>
  9. #include <linux/uaccess.h>
  10. #include <linux/sysctl.h>
  11. #include <linux/cpuset.h>
  12. #include <linux/device.h>
  13. #include <linux/export.h>
  14. #include <linux/kernel.h>
  15. #include <linux/sched.h>
  16. #include <linux/sched/topology.h>
  17. #include <linux/delay.h>
  18. #include <linux/init.h>
  19. #include <linux/slab.h>
  20. #include <linux/cpu.h>
  21. #include <linux/smp.h>
  22. #include <linux/mm.h>
  23. #include <linux/nodemask.h>
  24. #include <linux/node.h>
  25. #include <asm/sysinfo.h>
  26. #define PTF_HORIZONTAL (0UL)
  27. #define PTF_VERTICAL (1UL)
  28. #define PTF_CHECK (2UL)
  29. enum {
  30. TOPOLOGY_MODE_HW,
  31. TOPOLOGY_MODE_SINGLE,
  32. TOPOLOGY_MODE_PACKAGE,
  33. TOPOLOGY_MODE_UNINITIALIZED
  34. };
  35. struct mask_info {
  36. struct mask_info *next;
  37. unsigned char id;
  38. cpumask_t mask;
  39. };
  40. static int topology_mode = TOPOLOGY_MODE_UNINITIALIZED;
  41. static void set_topology_timer(void);
  42. static void topology_work_fn(struct work_struct *work);
  43. static struct sysinfo_15_1_x *tl_info;
  44. static DECLARE_WORK(topology_work, topology_work_fn);
  45. /*
  46. * Socket/Book linked lists and cpu_topology updates are
  47. * protected by "sched_domains_mutex".
  48. */
  49. static struct mask_info socket_info;
  50. static struct mask_info book_info;
  51. static struct mask_info drawer_info;
  52. struct cpu_topology_s390 cpu_topology[NR_CPUS];
  53. EXPORT_SYMBOL_GPL(cpu_topology);
  54. static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int cpu)
  55. {
  56. static cpumask_t mask;
  57. cpumask_clear(&mask);
  58. if (!cpumask_test_cpu(cpu, &cpu_setup_mask))
  59. goto out;
  60. cpumask_set_cpu(cpu, &mask);
  61. switch (topology_mode) {
  62. case TOPOLOGY_MODE_HW:
  63. while (info) {
  64. if (cpumask_test_cpu(cpu, &info->mask)) {
  65. cpumask_copy(&mask, &info->mask);
  66. break;
  67. }
  68. info = info->next;
  69. }
  70. break;
  71. case TOPOLOGY_MODE_PACKAGE:
  72. cpumask_copy(&mask, cpu_present_mask);
  73. break;
  74. default:
  75. fallthrough;
  76. case TOPOLOGY_MODE_SINGLE:
  77. break;
  78. }
  79. cpumask_and(&mask, &mask, &cpu_setup_mask);
  80. out:
  81. cpumask_copy(dst, &mask);
  82. }
  83. static void cpu_thread_map(cpumask_t *dst, unsigned int cpu)
  84. {
  85. static cpumask_t mask;
  86. unsigned int max_cpu;
  87. cpumask_clear(&mask);
  88. if (!cpumask_test_cpu(cpu, &cpu_setup_mask))
  89. goto out;
  90. cpumask_set_cpu(cpu, &mask);
  91. if (topology_mode != TOPOLOGY_MODE_HW)
  92. goto out;
  93. cpu -= cpu % (smp_cpu_mtid + 1);
  94. max_cpu = min(cpu + smp_cpu_mtid, nr_cpu_ids - 1);
  95. for (; cpu <= max_cpu; cpu++) {
  96. if (cpumask_test_cpu(cpu, &cpu_setup_mask))
  97. cpumask_set_cpu(cpu, &mask);
  98. }
  99. out:
  100. cpumask_copy(dst, &mask);
  101. }
  102. #define TOPOLOGY_CORE_BITS 64
  103. static void add_cpus_to_mask(struct topology_core *tl_core,
  104. struct mask_info *drawer,
  105. struct mask_info *book,
  106. struct mask_info *socket)
  107. {
  108. struct cpu_topology_s390 *topo;
  109. unsigned int core;
  110. for_each_set_bit(core, &tl_core->mask, TOPOLOGY_CORE_BITS) {
  111. unsigned int max_cpu, rcore;
  112. int cpu;
  113. rcore = TOPOLOGY_CORE_BITS - 1 - core + tl_core->origin;
  114. cpu = smp_find_processor_id(rcore << smp_cpu_mt_shift);
  115. if (cpu < 0)
  116. continue;
  117. max_cpu = min(cpu + smp_cpu_mtid, nr_cpu_ids - 1);
  118. for (; cpu <= max_cpu; cpu++) {
  119. topo = &cpu_topology[cpu];
  120. topo->drawer_id = drawer->id;
  121. topo->book_id = book->id;
  122. topo->socket_id = socket->id;
  123. topo->core_id = rcore;
  124. topo->thread_id = cpu;
  125. topo->dedicated = tl_core->d;
  126. cpumask_set_cpu(cpu, &drawer->mask);
  127. cpumask_set_cpu(cpu, &book->mask);
  128. cpumask_set_cpu(cpu, &socket->mask);
  129. smp_cpu_set_polarization(cpu, tl_core->pp);
  130. }
  131. }
  132. }
  133. static void clear_masks(void)
  134. {
  135. struct mask_info *info;
  136. info = &socket_info;
  137. while (info) {
  138. cpumask_clear(&info->mask);
  139. info = info->next;
  140. }
  141. info = &book_info;
  142. while (info) {
  143. cpumask_clear(&info->mask);
  144. info = info->next;
  145. }
  146. info = &drawer_info;
  147. while (info) {
  148. cpumask_clear(&info->mask);
  149. info = info->next;
  150. }
  151. }
  152. static union topology_entry *next_tle(union topology_entry *tle)
  153. {
  154. if (!tle->nl)
  155. return (union topology_entry *)((struct topology_core *)tle + 1);
  156. return (union topology_entry *)((struct topology_container *)tle + 1);
  157. }
  158. static void tl_to_masks(struct sysinfo_15_1_x *info)
  159. {
  160. struct mask_info *socket = &socket_info;
  161. struct mask_info *book = &book_info;
  162. struct mask_info *drawer = &drawer_info;
  163. union topology_entry *tle, *end;
  164. clear_masks();
  165. tle = info->tle;
  166. end = (union topology_entry *)((unsigned long)info + info->length);
  167. while (tle < end) {
  168. switch (tle->nl) {
  169. case 3:
  170. drawer = drawer->next;
  171. drawer->id = tle->container.id;
  172. break;
  173. case 2:
  174. book = book->next;
  175. book->id = tle->container.id;
  176. break;
  177. case 1:
  178. socket = socket->next;
  179. socket->id = tle->container.id;
  180. break;
  181. case 0:
  182. add_cpus_to_mask(&tle->cpu, drawer, book, socket);
  183. break;
  184. default:
  185. clear_masks();
  186. return;
  187. }
  188. tle = next_tle(tle);
  189. }
  190. }
  191. static void topology_update_polarization_simple(void)
  192. {
  193. int cpu;
  194. for_each_possible_cpu(cpu)
  195. smp_cpu_set_polarization(cpu, POLARIZATION_HRZ);
  196. }
  197. static int ptf(unsigned long fc)
  198. {
  199. int rc;
  200. asm volatile(
  201. " .insn rre,0xb9a20000,%1,%1\n"
  202. " ipm %0\n"
  203. " srl %0,28\n"
  204. : "=d" (rc)
  205. : "d" (fc) : "cc");
  206. return rc;
  207. }
  208. int topology_set_cpu_management(int fc)
  209. {
  210. int cpu, rc;
  211. if (!MACHINE_HAS_TOPOLOGY)
  212. return -EOPNOTSUPP;
  213. if (fc)
  214. rc = ptf(PTF_VERTICAL);
  215. else
  216. rc = ptf(PTF_HORIZONTAL);
  217. if (rc)
  218. return -EBUSY;
  219. for_each_possible_cpu(cpu)
  220. smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
  221. return rc;
  222. }
  223. void update_cpu_masks(void)
  224. {
  225. struct cpu_topology_s390 *topo, *topo_package, *topo_sibling;
  226. int cpu, sibling, pkg_first, smt_first, id;
  227. for_each_possible_cpu(cpu) {
  228. topo = &cpu_topology[cpu];
  229. cpu_thread_map(&topo->thread_mask, cpu);
  230. cpu_group_map(&topo->core_mask, &socket_info, cpu);
  231. cpu_group_map(&topo->book_mask, &book_info, cpu);
  232. cpu_group_map(&topo->drawer_mask, &drawer_info, cpu);
  233. topo->booted_cores = 0;
  234. if (topology_mode != TOPOLOGY_MODE_HW) {
  235. id = topology_mode == TOPOLOGY_MODE_PACKAGE ? 0 : cpu;
  236. topo->thread_id = cpu;
  237. topo->core_id = cpu;
  238. topo->socket_id = id;
  239. topo->book_id = id;
  240. topo->drawer_id = id;
  241. }
  242. }
  243. for_each_online_cpu(cpu) {
  244. topo = &cpu_topology[cpu];
  245. pkg_first = cpumask_first(&topo->core_mask);
  246. topo_package = &cpu_topology[pkg_first];
  247. if (cpu == pkg_first) {
  248. for_each_cpu(sibling, &topo->core_mask) {
  249. topo_sibling = &cpu_topology[sibling];
  250. smt_first = cpumask_first(&topo_sibling->thread_mask);
  251. if (sibling == smt_first)
  252. topo_package->booted_cores++;
  253. }
  254. } else {
  255. topo->booted_cores = topo_package->booted_cores;
  256. }
  257. }
  258. }
  259. void store_topology(struct sysinfo_15_1_x *info)
  260. {
  261. stsi(info, 15, 1, topology_mnest_limit());
  262. }
  263. static void __arch_update_dedicated_flag(void *arg)
  264. {
  265. if (topology_cpu_dedicated(smp_processor_id()))
  266. set_cpu_flag(CIF_DEDICATED_CPU);
  267. else
  268. clear_cpu_flag(CIF_DEDICATED_CPU);
  269. }
  270. static int __arch_update_cpu_topology(void)
  271. {
  272. struct sysinfo_15_1_x *info = tl_info;
  273. int rc = 0;
  274. mutex_lock(&smp_cpu_state_mutex);
  275. if (MACHINE_HAS_TOPOLOGY) {
  276. rc = 1;
  277. store_topology(info);
  278. tl_to_masks(info);
  279. }
  280. update_cpu_masks();
  281. if (!MACHINE_HAS_TOPOLOGY)
  282. topology_update_polarization_simple();
  283. mutex_unlock(&smp_cpu_state_mutex);
  284. return rc;
  285. }
  286. int arch_update_cpu_topology(void)
  287. {
  288. struct device *dev;
  289. int cpu, rc;
  290. rc = __arch_update_cpu_topology();
  291. on_each_cpu(__arch_update_dedicated_flag, NULL, 0);
  292. for_each_online_cpu(cpu) {
  293. dev = get_cpu_device(cpu);
  294. if (dev)
  295. kobject_uevent(&dev->kobj, KOBJ_CHANGE);
  296. }
  297. return rc;
  298. }
  299. static void topology_work_fn(struct work_struct *work)
  300. {
  301. rebuild_sched_domains();
  302. }
  303. void topology_schedule_update(void)
  304. {
  305. schedule_work(&topology_work);
  306. }
  307. static void topology_flush_work(void)
  308. {
  309. flush_work(&topology_work);
  310. }
  311. static void topology_timer_fn(struct timer_list *unused)
  312. {
  313. if (ptf(PTF_CHECK))
  314. topology_schedule_update();
  315. set_topology_timer();
  316. }
  317. static struct timer_list topology_timer;
  318. static atomic_t topology_poll = ATOMIC_INIT(0);
  319. static void set_topology_timer(void)
  320. {
  321. if (atomic_add_unless(&topology_poll, -1, 0))
  322. mod_timer(&topology_timer, jiffies + msecs_to_jiffies(100));
  323. else
  324. mod_timer(&topology_timer, jiffies + msecs_to_jiffies(60 * MSEC_PER_SEC));
  325. }
  326. void topology_expect_change(void)
  327. {
  328. if (!MACHINE_HAS_TOPOLOGY)
  329. return;
  330. /* This is racy, but it doesn't matter since it is just a heuristic.
  331. * Worst case is that we poll in a higher frequency for a bit longer.
  332. */
  333. if (atomic_read(&topology_poll) > 60)
  334. return;
  335. atomic_add(60, &topology_poll);
  336. set_topology_timer();
  337. }
  338. static int cpu_management;
  339. static ssize_t dispatching_show(struct device *dev,
  340. struct device_attribute *attr,
  341. char *buf)
  342. {
  343. ssize_t count;
  344. mutex_lock(&smp_cpu_state_mutex);
  345. count = sprintf(buf, "%d\n", cpu_management);
  346. mutex_unlock(&smp_cpu_state_mutex);
  347. return count;
  348. }
  349. static ssize_t dispatching_store(struct device *dev,
  350. struct device_attribute *attr,
  351. const char *buf,
  352. size_t count)
  353. {
  354. int val, rc;
  355. char delim;
  356. if (sscanf(buf, "%d %c", &val, &delim) != 1)
  357. return -EINVAL;
  358. if (val != 0 && val != 1)
  359. return -EINVAL;
  360. rc = 0;
  361. cpus_read_lock();
  362. mutex_lock(&smp_cpu_state_mutex);
  363. if (cpu_management == val)
  364. goto out;
  365. rc = topology_set_cpu_management(val);
  366. if (rc)
  367. goto out;
  368. cpu_management = val;
  369. topology_expect_change();
  370. out:
  371. mutex_unlock(&smp_cpu_state_mutex);
  372. cpus_read_unlock();
  373. return rc ? rc : count;
  374. }
  375. static DEVICE_ATTR_RW(dispatching);
  376. static ssize_t cpu_polarization_show(struct device *dev,
  377. struct device_attribute *attr, char *buf)
  378. {
  379. int cpu = dev->id;
  380. ssize_t count;
  381. mutex_lock(&smp_cpu_state_mutex);
  382. switch (smp_cpu_get_polarization(cpu)) {
  383. case POLARIZATION_HRZ:
  384. count = sprintf(buf, "horizontal\n");
  385. break;
  386. case POLARIZATION_VL:
  387. count = sprintf(buf, "vertical:low\n");
  388. break;
  389. case POLARIZATION_VM:
  390. count = sprintf(buf, "vertical:medium\n");
  391. break;
  392. case POLARIZATION_VH:
  393. count = sprintf(buf, "vertical:high\n");
  394. break;
  395. default:
  396. count = sprintf(buf, "unknown\n");
  397. break;
  398. }
  399. mutex_unlock(&smp_cpu_state_mutex);
  400. return count;
  401. }
  402. static DEVICE_ATTR(polarization, 0444, cpu_polarization_show, NULL);
  403. static struct attribute *topology_cpu_attrs[] = {
  404. &dev_attr_polarization.attr,
  405. NULL,
  406. };
  407. static struct attribute_group topology_cpu_attr_group = {
  408. .attrs = topology_cpu_attrs,
  409. };
  410. static ssize_t cpu_dedicated_show(struct device *dev,
  411. struct device_attribute *attr, char *buf)
  412. {
  413. int cpu = dev->id;
  414. ssize_t count;
  415. mutex_lock(&smp_cpu_state_mutex);
  416. count = sprintf(buf, "%d\n", topology_cpu_dedicated(cpu));
  417. mutex_unlock(&smp_cpu_state_mutex);
  418. return count;
  419. }
  420. static DEVICE_ATTR(dedicated, 0444, cpu_dedicated_show, NULL);
  421. static struct attribute *topology_extra_cpu_attrs[] = {
  422. &dev_attr_dedicated.attr,
  423. NULL,
  424. };
  425. static struct attribute_group topology_extra_cpu_attr_group = {
  426. .attrs = topology_extra_cpu_attrs,
  427. };
  428. int topology_cpu_init(struct cpu *cpu)
  429. {
  430. int rc;
  431. rc = sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group);
  432. if (rc || !MACHINE_HAS_TOPOLOGY)
  433. return rc;
  434. rc = sysfs_create_group(&cpu->dev.kobj, &topology_extra_cpu_attr_group);
  435. if (rc)
  436. sysfs_remove_group(&cpu->dev.kobj, &topology_cpu_attr_group);
  437. return rc;
  438. }
  439. static const struct cpumask *cpu_thread_mask(int cpu)
  440. {
  441. return &cpu_topology[cpu].thread_mask;
  442. }
  443. const struct cpumask *cpu_coregroup_mask(int cpu)
  444. {
  445. return &cpu_topology[cpu].core_mask;
  446. }
  447. static const struct cpumask *cpu_book_mask(int cpu)
  448. {
  449. return &cpu_topology[cpu].book_mask;
  450. }
  451. static const struct cpumask *cpu_drawer_mask(int cpu)
  452. {
  453. return &cpu_topology[cpu].drawer_mask;
  454. }
  455. static struct sched_domain_topology_level s390_topology[] = {
  456. { cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
  457. { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
  458. { cpu_book_mask, SD_INIT_NAME(BOOK) },
  459. { cpu_drawer_mask, SD_INIT_NAME(DRAWER) },
  460. { cpu_cpu_mask, SD_INIT_NAME(DIE) },
  461. { NULL, },
  462. };
  463. static void __init alloc_masks(struct sysinfo_15_1_x *info,
  464. struct mask_info *mask, int offset)
  465. {
  466. int i, nr_masks;
  467. nr_masks = info->mag[TOPOLOGY_NR_MAG - offset];
  468. for (i = 0; i < info->mnest - offset; i++)
  469. nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i];
  470. nr_masks = max(nr_masks, 1);
  471. for (i = 0; i < nr_masks; i++) {
  472. mask->next = memblock_alloc(sizeof(*mask->next), 8);
  473. if (!mask->next)
  474. panic("%s: Failed to allocate %zu bytes align=0x%x\n",
  475. __func__, sizeof(*mask->next), 8);
  476. mask = mask->next;
  477. }
  478. }
  479. void __init topology_init_early(void)
  480. {
  481. struct sysinfo_15_1_x *info;
  482. set_sched_topology(s390_topology);
  483. if (topology_mode == TOPOLOGY_MODE_UNINITIALIZED) {
  484. if (MACHINE_HAS_TOPOLOGY)
  485. topology_mode = TOPOLOGY_MODE_HW;
  486. else
  487. topology_mode = TOPOLOGY_MODE_SINGLE;
  488. }
  489. if (!MACHINE_HAS_TOPOLOGY)
  490. goto out;
  491. tl_info = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
  492. if (!tl_info)
  493. panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
  494. __func__, PAGE_SIZE, PAGE_SIZE);
  495. info = tl_info;
  496. store_topology(info);
  497. pr_info("The CPU configuration topology of the machine is: %d %d %d %d %d %d / %d\n",
  498. info->mag[0], info->mag[1], info->mag[2], info->mag[3],
  499. info->mag[4], info->mag[5], info->mnest);
  500. alloc_masks(info, &socket_info, 1);
  501. alloc_masks(info, &book_info, 2);
  502. alloc_masks(info, &drawer_info, 3);
  503. out:
  504. cpumask_set_cpu(0, &cpu_setup_mask);
  505. __arch_update_cpu_topology();
  506. __arch_update_dedicated_flag(NULL);
  507. }
  508. static inline int topology_get_mode(int enabled)
  509. {
  510. if (!enabled)
  511. return TOPOLOGY_MODE_SINGLE;
  512. return MACHINE_HAS_TOPOLOGY ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE;
  513. }
  514. static inline int topology_is_enabled(void)
  515. {
  516. return topology_mode != TOPOLOGY_MODE_SINGLE;
  517. }
  518. static int __init topology_setup(char *str)
  519. {
  520. bool enabled;
  521. int rc;
  522. rc = kstrtobool(str, &enabled);
  523. if (rc)
  524. return rc;
  525. topology_mode = topology_get_mode(enabled);
  526. return 0;
  527. }
  528. early_param("topology", topology_setup);
  529. static int topology_ctl_handler(struct ctl_table *ctl, int write,
  530. void *buffer, size_t *lenp, loff_t *ppos)
  531. {
  532. int enabled = topology_is_enabled();
  533. int new_mode;
  534. int rc;
  535. struct ctl_table ctl_entry = {
  536. .procname = ctl->procname,
  537. .data = &enabled,
  538. .maxlen = sizeof(int),
  539. .extra1 = SYSCTL_ZERO,
  540. .extra2 = SYSCTL_ONE,
  541. };
  542. rc = proc_douintvec_minmax(&ctl_entry, write, buffer, lenp, ppos);
  543. if (rc < 0 || !write)
  544. return rc;
  545. mutex_lock(&smp_cpu_state_mutex);
  546. new_mode = topology_get_mode(enabled);
  547. if (topology_mode != new_mode) {
  548. topology_mode = new_mode;
  549. topology_schedule_update();
  550. }
  551. mutex_unlock(&smp_cpu_state_mutex);
  552. topology_flush_work();
  553. return rc;
  554. }
  555. static struct ctl_table topology_ctl_table[] = {
  556. {
  557. .procname = "topology",
  558. .mode = 0644,
  559. .proc_handler = topology_ctl_handler,
  560. },
  561. { },
  562. };
  563. static struct ctl_table topology_dir_table[] = {
  564. {
  565. .procname = "s390",
  566. .maxlen = 0,
  567. .mode = 0555,
  568. .child = topology_ctl_table,
  569. },
  570. { },
  571. };
  572. static int __init topology_init(void)
  573. {
  574. timer_setup(&topology_timer, topology_timer_fn, TIMER_DEFERRABLE);
  575. if (MACHINE_HAS_TOPOLOGY)
  576. set_topology_timer();
  577. else
  578. topology_update_polarization_simple();
  579. register_sysctl_table(topology_dir_table);
  580. return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching);
  581. }
  582. device_initcall(topology_init);