rdtgroup.c 84 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * User interface for Resource Allocation in Resource Director Technology(RDT)
  4. *
  5. * Copyright (C) 2016 Intel Corporation
  6. *
  7. * Author: Fenghua Yu <[email protected]>
  8. *
  9. * More information about RDT be found in the Intel (R) x86 Architecture
  10. * Software Developer Manual.
  11. */
  12. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  13. #include <linux/cacheinfo.h>
  14. #include <linux/cpu.h>
  15. #include <linux/debugfs.h>
  16. #include <linux/fs.h>
  17. #include <linux/fs_parser.h>
  18. #include <linux/sysfs.h>
  19. #include <linux/kernfs.h>
  20. #include <linux/seq_buf.h>
  21. #include <linux/seq_file.h>
  22. #include <linux/sched/signal.h>
  23. #include <linux/sched/task.h>
  24. #include <linux/slab.h>
  25. #include <linux/task_work.h>
  26. #include <linux/user_namespace.h>
  27. #include <uapi/linux/magic.h>
  28. #include <asm/resctrl.h>
  29. #include "internal.h"
  30. DEFINE_STATIC_KEY_FALSE(rdt_enable_key);
  31. DEFINE_STATIC_KEY_FALSE(rdt_mon_enable_key);
  32. DEFINE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
  33. static struct kernfs_root *rdt_root;
  34. struct rdtgroup rdtgroup_default;
  35. LIST_HEAD(rdt_all_groups);
  36. /* list of entries for the schemata file */
  37. LIST_HEAD(resctrl_schema_all);
  38. /* Kernel fs node for "info" directory under root */
  39. static struct kernfs_node *kn_info;
  40. /* Kernel fs node for "mon_groups" directory under root */
  41. static struct kernfs_node *kn_mongrp;
  42. /* Kernel fs node for "mon_data" directory under root */
  43. static struct kernfs_node *kn_mondata;
  44. static struct seq_buf last_cmd_status;
  45. static char last_cmd_status_buf[512];
  46. struct dentry *debugfs_resctrl;
  47. void rdt_last_cmd_clear(void)
  48. {
  49. lockdep_assert_held(&rdtgroup_mutex);
  50. seq_buf_clear(&last_cmd_status);
  51. }
  52. void rdt_last_cmd_puts(const char *s)
  53. {
  54. lockdep_assert_held(&rdtgroup_mutex);
  55. seq_buf_puts(&last_cmd_status, s);
  56. }
  57. void rdt_last_cmd_printf(const char *fmt, ...)
  58. {
  59. va_list ap;
  60. va_start(ap, fmt);
  61. lockdep_assert_held(&rdtgroup_mutex);
  62. seq_buf_vprintf(&last_cmd_status, fmt, ap);
  63. va_end(ap);
  64. }
  65. void rdt_staged_configs_clear(void)
  66. {
  67. struct rdt_resource *r;
  68. struct rdt_domain *dom;
  69. lockdep_assert_held(&rdtgroup_mutex);
  70. for_each_alloc_capable_rdt_resource(r) {
  71. list_for_each_entry(dom, &r->domains, list)
  72. memset(dom->staged_config, 0, sizeof(dom->staged_config));
  73. }
  74. }
  75. /*
  76. * Trivial allocator for CLOSIDs. Since h/w only supports a small number,
  77. * we can keep a bitmap of free CLOSIDs in a single integer.
  78. *
  79. * Using a global CLOSID across all resources has some advantages and
  80. * some drawbacks:
  81. * + We can simply set "current->closid" to assign a task to a resource
  82. * group.
  83. * + Context switch code can avoid extra memory references deciding which
  84. * CLOSID to load into the PQR_ASSOC MSR
  85. * - We give up some options in configuring resource groups across multi-socket
  86. * systems.
  87. * - Our choices on how to configure each resource become progressively more
  88. * limited as the number of resources grows.
  89. */
  90. static int closid_free_map;
  91. static int closid_free_map_len;
  92. int closids_supported(void)
  93. {
  94. return closid_free_map_len;
  95. }
  96. static void closid_init(void)
  97. {
  98. struct resctrl_schema *s;
  99. u32 rdt_min_closid = 32;
  100. /* Compute rdt_min_closid across all resources */
  101. list_for_each_entry(s, &resctrl_schema_all, list)
  102. rdt_min_closid = min(rdt_min_closid, s->num_closid);
  103. closid_free_map = BIT_MASK(rdt_min_closid) - 1;
  104. /* CLOSID 0 is always reserved for the default group */
  105. closid_free_map &= ~1;
  106. closid_free_map_len = rdt_min_closid;
  107. }
  108. static int closid_alloc(void)
  109. {
  110. u32 closid = ffs(closid_free_map);
  111. if (closid == 0)
  112. return -ENOSPC;
  113. closid--;
  114. closid_free_map &= ~(1 << closid);
  115. return closid;
  116. }
  117. void closid_free(int closid)
  118. {
  119. closid_free_map |= 1 << closid;
  120. }
  121. /**
  122. * closid_allocated - test if provided closid is in use
  123. * @closid: closid to be tested
  124. *
  125. * Return: true if @closid is currently associated with a resource group,
  126. * false if @closid is free
  127. */
  128. static bool closid_allocated(unsigned int closid)
  129. {
  130. return (closid_free_map & (1 << closid)) == 0;
  131. }
  132. /**
  133. * rdtgroup_mode_by_closid - Return mode of resource group with closid
  134. * @closid: closid if the resource group
  135. *
  136. * Each resource group is associated with a @closid. Here the mode
  137. * of a resource group can be queried by searching for it using its closid.
  138. *
  139. * Return: mode as &enum rdtgrp_mode of resource group with closid @closid
  140. */
  141. enum rdtgrp_mode rdtgroup_mode_by_closid(int closid)
  142. {
  143. struct rdtgroup *rdtgrp;
  144. list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
  145. if (rdtgrp->closid == closid)
  146. return rdtgrp->mode;
  147. }
  148. return RDT_NUM_MODES;
  149. }
  150. static const char * const rdt_mode_str[] = {
  151. [RDT_MODE_SHAREABLE] = "shareable",
  152. [RDT_MODE_EXCLUSIVE] = "exclusive",
  153. [RDT_MODE_PSEUDO_LOCKSETUP] = "pseudo-locksetup",
  154. [RDT_MODE_PSEUDO_LOCKED] = "pseudo-locked",
  155. };
  156. /**
  157. * rdtgroup_mode_str - Return the string representation of mode
  158. * @mode: the resource group mode as &enum rdtgroup_mode
  159. *
  160. * Return: string representation of valid mode, "unknown" otherwise
  161. */
  162. static const char *rdtgroup_mode_str(enum rdtgrp_mode mode)
  163. {
  164. if (mode < RDT_MODE_SHAREABLE || mode >= RDT_NUM_MODES)
  165. return "unknown";
  166. return rdt_mode_str[mode];
  167. }
  168. /* set uid and gid of rdtgroup dirs and files to that of the creator */
  169. static int rdtgroup_kn_set_ugid(struct kernfs_node *kn)
  170. {
  171. struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
  172. .ia_uid = current_fsuid(),
  173. .ia_gid = current_fsgid(), };
  174. if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
  175. gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
  176. return 0;
  177. return kernfs_setattr(kn, &iattr);
  178. }
  179. static int rdtgroup_add_file(struct kernfs_node *parent_kn, struct rftype *rft)
  180. {
  181. struct kernfs_node *kn;
  182. int ret;
  183. kn = __kernfs_create_file(parent_kn, rft->name, rft->mode,
  184. GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
  185. 0, rft->kf_ops, rft, NULL, NULL);
  186. if (IS_ERR(kn))
  187. return PTR_ERR(kn);
  188. ret = rdtgroup_kn_set_ugid(kn);
  189. if (ret) {
  190. kernfs_remove(kn);
  191. return ret;
  192. }
  193. return 0;
  194. }
  195. static int rdtgroup_seqfile_show(struct seq_file *m, void *arg)
  196. {
  197. struct kernfs_open_file *of = m->private;
  198. struct rftype *rft = of->kn->priv;
  199. if (rft->seq_show)
  200. return rft->seq_show(of, m, arg);
  201. return 0;
  202. }
  203. static ssize_t rdtgroup_file_write(struct kernfs_open_file *of, char *buf,
  204. size_t nbytes, loff_t off)
  205. {
  206. struct rftype *rft = of->kn->priv;
  207. if (rft->write)
  208. return rft->write(of, buf, nbytes, off);
  209. return -EINVAL;
  210. }
  211. static const struct kernfs_ops rdtgroup_kf_single_ops = {
  212. .atomic_write_len = PAGE_SIZE,
  213. .write = rdtgroup_file_write,
  214. .seq_show = rdtgroup_seqfile_show,
  215. };
  216. static const struct kernfs_ops kf_mondata_ops = {
  217. .atomic_write_len = PAGE_SIZE,
  218. .seq_show = rdtgroup_mondata_show,
  219. };
  220. static bool is_cpu_list(struct kernfs_open_file *of)
  221. {
  222. struct rftype *rft = of->kn->priv;
  223. return rft->flags & RFTYPE_FLAGS_CPUS_LIST;
  224. }
  225. static int rdtgroup_cpus_show(struct kernfs_open_file *of,
  226. struct seq_file *s, void *v)
  227. {
  228. struct rdtgroup *rdtgrp;
  229. struct cpumask *mask;
  230. int ret = 0;
  231. rdtgrp = rdtgroup_kn_lock_live(of->kn);
  232. if (rdtgrp) {
  233. if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
  234. if (!rdtgrp->plr->d) {
  235. rdt_last_cmd_clear();
  236. rdt_last_cmd_puts("Cache domain offline\n");
  237. ret = -ENODEV;
  238. } else {
  239. mask = &rdtgrp->plr->d->cpu_mask;
  240. seq_printf(s, is_cpu_list(of) ?
  241. "%*pbl\n" : "%*pb\n",
  242. cpumask_pr_args(mask));
  243. }
  244. } else {
  245. seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
  246. cpumask_pr_args(&rdtgrp->cpu_mask));
  247. }
  248. } else {
  249. ret = -ENOENT;
  250. }
  251. rdtgroup_kn_unlock(of->kn);
  252. return ret;
  253. }
  254. /*
  255. * This is safe against resctrl_sched_in() called from __switch_to()
  256. * because __switch_to() is executed with interrupts disabled. A local call
  257. * from update_closid_rmid() is protected against __switch_to() because
  258. * preemption is disabled.
  259. */
  260. static void update_cpu_closid_rmid(void *info)
  261. {
  262. struct rdtgroup *r = info;
  263. if (r) {
  264. this_cpu_write(pqr_state.default_closid, r->closid);
  265. this_cpu_write(pqr_state.default_rmid, r->mon.rmid);
  266. }
  267. /*
  268. * We cannot unconditionally write the MSR because the current
  269. * executing task might have its own closid selected. Just reuse
  270. * the context switch code.
  271. */
  272. resctrl_sched_in(current);
  273. }
  274. /*
  275. * Update the PGR_ASSOC MSR on all cpus in @cpu_mask,
  276. *
  277. * Per task closids/rmids must have been set up before calling this function.
  278. */
  279. static void
  280. update_closid_rmid(const struct cpumask *cpu_mask, struct rdtgroup *r)
  281. {
  282. int cpu = get_cpu();
  283. if (cpumask_test_cpu(cpu, cpu_mask))
  284. update_cpu_closid_rmid(r);
  285. smp_call_function_many(cpu_mask, update_cpu_closid_rmid, r, 1);
  286. put_cpu();
  287. }
  288. static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
  289. cpumask_var_t tmpmask)
  290. {
  291. struct rdtgroup *prgrp = rdtgrp->mon.parent, *crgrp;
  292. struct list_head *head;
  293. /* Check whether cpus belong to parent ctrl group */
  294. cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask);
  295. if (!cpumask_empty(tmpmask)) {
  296. rdt_last_cmd_puts("Can only add CPUs to mongroup that belong to parent\n");
  297. return -EINVAL;
  298. }
  299. /* Check whether cpus are dropped from this group */
  300. cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
  301. if (!cpumask_empty(tmpmask)) {
  302. /* Give any dropped cpus to parent rdtgroup */
  303. cpumask_or(&prgrp->cpu_mask, &prgrp->cpu_mask, tmpmask);
  304. update_closid_rmid(tmpmask, prgrp);
  305. }
  306. /*
  307. * If we added cpus, remove them from previous group that owned them
  308. * and update per-cpu rmid
  309. */
  310. cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
  311. if (!cpumask_empty(tmpmask)) {
  312. head = &prgrp->mon.crdtgrp_list;
  313. list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
  314. if (crgrp == rdtgrp)
  315. continue;
  316. cpumask_andnot(&crgrp->cpu_mask, &crgrp->cpu_mask,
  317. tmpmask);
  318. }
  319. update_closid_rmid(tmpmask, rdtgrp);
  320. }
  321. /* Done pushing/pulling - update this group with new mask */
  322. cpumask_copy(&rdtgrp->cpu_mask, newmask);
  323. return 0;
  324. }
  325. static void cpumask_rdtgrp_clear(struct rdtgroup *r, struct cpumask *m)
  326. {
  327. struct rdtgroup *crgrp;
  328. cpumask_andnot(&r->cpu_mask, &r->cpu_mask, m);
  329. /* update the child mon group masks as well*/
  330. list_for_each_entry(crgrp, &r->mon.crdtgrp_list, mon.crdtgrp_list)
  331. cpumask_and(&crgrp->cpu_mask, &r->cpu_mask, &crgrp->cpu_mask);
  332. }
  333. static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
  334. cpumask_var_t tmpmask, cpumask_var_t tmpmask1)
  335. {
  336. struct rdtgroup *r, *crgrp;
  337. struct list_head *head;
  338. /* Check whether cpus are dropped from this group */
  339. cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
  340. if (!cpumask_empty(tmpmask)) {
  341. /* Can't drop from default group */
  342. if (rdtgrp == &rdtgroup_default) {
  343. rdt_last_cmd_puts("Can't drop CPUs from default group\n");
  344. return -EINVAL;
  345. }
  346. /* Give any dropped cpus to rdtgroup_default */
  347. cpumask_or(&rdtgroup_default.cpu_mask,
  348. &rdtgroup_default.cpu_mask, tmpmask);
  349. update_closid_rmid(tmpmask, &rdtgroup_default);
  350. }
  351. /*
  352. * If we added cpus, remove them from previous group and
  353. * the prev group's child groups that owned them
  354. * and update per-cpu closid/rmid.
  355. */
  356. cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
  357. if (!cpumask_empty(tmpmask)) {
  358. list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) {
  359. if (r == rdtgrp)
  360. continue;
  361. cpumask_and(tmpmask1, &r->cpu_mask, tmpmask);
  362. if (!cpumask_empty(tmpmask1))
  363. cpumask_rdtgrp_clear(r, tmpmask1);
  364. }
  365. update_closid_rmid(tmpmask, rdtgrp);
  366. }
  367. /* Done pushing/pulling - update this group with new mask */
  368. cpumask_copy(&rdtgrp->cpu_mask, newmask);
  369. /*
  370. * Clear child mon group masks since there is a new parent mask
  371. * now and update the rmid for the cpus the child lost.
  372. */
  373. head = &rdtgrp->mon.crdtgrp_list;
  374. list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
  375. cpumask_and(tmpmask, &rdtgrp->cpu_mask, &crgrp->cpu_mask);
  376. update_closid_rmid(tmpmask, rdtgrp);
  377. cpumask_clear(&crgrp->cpu_mask);
  378. }
  379. return 0;
  380. }
  381. static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
  382. char *buf, size_t nbytes, loff_t off)
  383. {
  384. cpumask_var_t tmpmask, newmask, tmpmask1;
  385. struct rdtgroup *rdtgrp;
  386. int ret;
  387. if (!buf)
  388. return -EINVAL;
  389. if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
  390. return -ENOMEM;
  391. if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) {
  392. free_cpumask_var(tmpmask);
  393. return -ENOMEM;
  394. }
  395. if (!zalloc_cpumask_var(&tmpmask1, GFP_KERNEL)) {
  396. free_cpumask_var(tmpmask);
  397. free_cpumask_var(newmask);
  398. return -ENOMEM;
  399. }
  400. rdtgrp = rdtgroup_kn_lock_live(of->kn);
  401. if (!rdtgrp) {
  402. ret = -ENOENT;
  403. goto unlock;
  404. }
  405. if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
  406. rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
  407. ret = -EINVAL;
  408. rdt_last_cmd_puts("Pseudo-locking in progress\n");
  409. goto unlock;
  410. }
  411. if (is_cpu_list(of))
  412. ret = cpulist_parse(buf, newmask);
  413. else
  414. ret = cpumask_parse(buf, newmask);
  415. if (ret) {
  416. rdt_last_cmd_puts("Bad CPU list/mask\n");
  417. goto unlock;
  418. }
  419. /* check that user didn't specify any offline cpus */
  420. cpumask_andnot(tmpmask, newmask, cpu_online_mask);
  421. if (!cpumask_empty(tmpmask)) {
  422. ret = -EINVAL;
  423. rdt_last_cmd_puts("Can only assign online CPUs\n");
  424. goto unlock;
  425. }
  426. if (rdtgrp->type == RDTCTRL_GROUP)
  427. ret = cpus_ctrl_write(rdtgrp, newmask, tmpmask, tmpmask1);
  428. else if (rdtgrp->type == RDTMON_GROUP)
  429. ret = cpus_mon_write(rdtgrp, newmask, tmpmask);
  430. else
  431. ret = -EINVAL;
  432. unlock:
  433. rdtgroup_kn_unlock(of->kn);
  434. free_cpumask_var(tmpmask);
  435. free_cpumask_var(newmask);
  436. free_cpumask_var(tmpmask1);
  437. return ret ?: nbytes;
  438. }
  439. /**
  440. * rdtgroup_remove - the helper to remove resource group safely
  441. * @rdtgrp: resource group to remove
  442. *
  443. * On resource group creation via a mkdir, an extra kernfs_node reference is
  444. * taken to ensure that the rdtgroup structure remains accessible for the
  445. * rdtgroup_kn_unlock() calls where it is removed.
  446. *
  447. * Drop the extra reference here, then free the rdtgroup structure.
  448. *
  449. * Return: void
  450. */
  451. static void rdtgroup_remove(struct rdtgroup *rdtgrp)
  452. {
  453. kernfs_put(rdtgrp->kn);
  454. kfree(rdtgrp);
  455. }
  456. static void _update_task_closid_rmid(void *task)
  457. {
  458. /*
  459. * If the task is still current on this CPU, update PQR_ASSOC MSR.
  460. * Otherwise, the MSR is updated when the task is scheduled in.
  461. */
  462. if (task == current)
  463. resctrl_sched_in(task);
  464. }
  465. static void update_task_closid_rmid(struct task_struct *t)
  466. {
  467. if (IS_ENABLED(CONFIG_SMP) && task_curr(t))
  468. smp_call_function_single(task_cpu(t), _update_task_closid_rmid, t, 1);
  469. else
  470. _update_task_closid_rmid(t);
  471. }
  472. static int __rdtgroup_move_task(struct task_struct *tsk,
  473. struct rdtgroup *rdtgrp)
  474. {
  475. /* If the task is already in rdtgrp, no need to move the task. */
  476. if ((rdtgrp->type == RDTCTRL_GROUP && tsk->closid == rdtgrp->closid &&
  477. tsk->rmid == rdtgrp->mon.rmid) ||
  478. (rdtgrp->type == RDTMON_GROUP && tsk->rmid == rdtgrp->mon.rmid &&
  479. tsk->closid == rdtgrp->mon.parent->closid))
  480. return 0;
  481. /*
  482. * Set the task's closid/rmid before the PQR_ASSOC MSR can be
  483. * updated by them.
  484. *
  485. * For ctrl_mon groups, move both closid and rmid.
  486. * For monitor groups, can move the tasks only from
  487. * their parent CTRL group.
  488. */
  489. if (rdtgrp->type == RDTCTRL_GROUP) {
  490. WRITE_ONCE(tsk->closid, rdtgrp->closid);
  491. WRITE_ONCE(tsk->rmid, rdtgrp->mon.rmid);
  492. } else if (rdtgrp->type == RDTMON_GROUP) {
  493. if (rdtgrp->mon.parent->closid == tsk->closid) {
  494. WRITE_ONCE(tsk->rmid, rdtgrp->mon.rmid);
  495. } else {
  496. rdt_last_cmd_puts("Can't move task to different control group\n");
  497. return -EINVAL;
  498. }
  499. }
  500. /*
  501. * Ensure the task's closid and rmid are written before determining if
  502. * the task is current that will decide if it will be interrupted.
  503. * This pairs with the full barrier between the rq->curr update and
  504. * resctrl_sched_in() during context switch.
  505. */
  506. smp_mb();
  507. /*
  508. * By now, the task's closid and rmid are set. If the task is current
  509. * on a CPU, the PQR_ASSOC MSR needs to be updated to make the resource
  510. * group go into effect. If the task is not current, the MSR will be
  511. * updated when the task is scheduled in.
  512. */
  513. update_task_closid_rmid(tsk);
  514. return 0;
  515. }
  516. static bool is_closid_match(struct task_struct *t, struct rdtgroup *r)
  517. {
  518. return (rdt_alloc_capable &&
  519. (r->type == RDTCTRL_GROUP) && (t->closid == r->closid));
  520. }
  521. static bool is_rmid_match(struct task_struct *t, struct rdtgroup *r)
  522. {
  523. return (rdt_mon_capable &&
  524. (r->type == RDTMON_GROUP) && (t->rmid == r->mon.rmid));
  525. }
  526. /**
  527. * rdtgroup_tasks_assigned - Test if tasks have been assigned to resource group
  528. * @r: Resource group
  529. *
  530. * Return: 1 if tasks have been assigned to @r, 0 otherwise
  531. */
  532. int rdtgroup_tasks_assigned(struct rdtgroup *r)
  533. {
  534. struct task_struct *p, *t;
  535. int ret = 0;
  536. lockdep_assert_held(&rdtgroup_mutex);
  537. rcu_read_lock();
  538. for_each_process_thread(p, t) {
  539. if (is_closid_match(t, r) || is_rmid_match(t, r)) {
  540. ret = 1;
  541. break;
  542. }
  543. }
  544. rcu_read_unlock();
  545. return ret;
  546. }
  547. static int rdtgroup_task_write_permission(struct task_struct *task,
  548. struct kernfs_open_file *of)
  549. {
  550. const struct cred *tcred = get_task_cred(task);
  551. const struct cred *cred = current_cred();
  552. int ret = 0;
  553. /*
  554. * Even if we're attaching all tasks in the thread group, we only
  555. * need to check permissions on one of them.
  556. */
  557. if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
  558. !uid_eq(cred->euid, tcred->uid) &&
  559. !uid_eq(cred->euid, tcred->suid)) {
  560. rdt_last_cmd_printf("No permission to move task %d\n", task->pid);
  561. ret = -EPERM;
  562. }
  563. put_cred(tcred);
  564. return ret;
  565. }
  566. static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp,
  567. struct kernfs_open_file *of)
  568. {
  569. struct task_struct *tsk;
  570. int ret;
  571. rcu_read_lock();
  572. if (pid) {
  573. tsk = find_task_by_vpid(pid);
  574. if (!tsk) {
  575. rcu_read_unlock();
  576. rdt_last_cmd_printf("No task %d\n", pid);
  577. return -ESRCH;
  578. }
  579. } else {
  580. tsk = current;
  581. }
  582. get_task_struct(tsk);
  583. rcu_read_unlock();
  584. ret = rdtgroup_task_write_permission(tsk, of);
  585. if (!ret)
  586. ret = __rdtgroup_move_task(tsk, rdtgrp);
  587. put_task_struct(tsk);
  588. return ret;
  589. }
  590. static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of,
  591. char *buf, size_t nbytes, loff_t off)
  592. {
  593. struct rdtgroup *rdtgrp;
  594. int ret = 0;
  595. pid_t pid;
  596. if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
  597. return -EINVAL;
  598. rdtgrp = rdtgroup_kn_lock_live(of->kn);
  599. if (!rdtgrp) {
  600. rdtgroup_kn_unlock(of->kn);
  601. return -ENOENT;
  602. }
  603. rdt_last_cmd_clear();
  604. if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
  605. rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
  606. ret = -EINVAL;
  607. rdt_last_cmd_puts("Pseudo-locking in progress\n");
  608. goto unlock;
  609. }
  610. ret = rdtgroup_move_task(pid, rdtgrp, of);
  611. unlock:
  612. rdtgroup_kn_unlock(of->kn);
  613. return ret ?: nbytes;
  614. }
  615. static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s)
  616. {
  617. struct task_struct *p, *t;
  618. pid_t pid;
  619. rcu_read_lock();
  620. for_each_process_thread(p, t) {
  621. if (is_closid_match(t, r) || is_rmid_match(t, r)) {
  622. pid = task_pid_vnr(t);
  623. if (pid)
  624. seq_printf(s, "%d\n", pid);
  625. }
  626. }
  627. rcu_read_unlock();
  628. }
  629. static int rdtgroup_tasks_show(struct kernfs_open_file *of,
  630. struct seq_file *s, void *v)
  631. {
  632. struct rdtgroup *rdtgrp;
  633. int ret = 0;
  634. rdtgrp = rdtgroup_kn_lock_live(of->kn);
  635. if (rdtgrp)
  636. show_rdt_tasks(rdtgrp, s);
  637. else
  638. ret = -ENOENT;
  639. rdtgroup_kn_unlock(of->kn);
  640. return ret;
  641. }
  642. #ifdef CONFIG_PROC_CPU_RESCTRL
  643. /*
  644. * A task can only be part of one resctrl control group and of one monitor
  645. * group which is associated to that control group.
  646. *
  647. * 1) res:
  648. * mon:
  649. *
  650. * resctrl is not available.
  651. *
  652. * 2) res:/
  653. * mon:
  654. *
  655. * Task is part of the root resctrl control group, and it is not associated
  656. * to any monitor group.
  657. *
  658. * 3) res:/
  659. * mon:mon0
  660. *
  661. * Task is part of the root resctrl control group and monitor group mon0.
  662. *
  663. * 4) res:group0
  664. * mon:
  665. *
  666. * Task is part of resctrl control group group0, and it is not associated
  667. * to any monitor group.
  668. *
  669. * 5) res:group0
  670. * mon:mon1
  671. *
  672. * Task is part of resctrl control group group0 and monitor group mon1.
  673. */
  674. int proc_resctrl_show(struct seq_file *s, struct pid_namespace *ns,
  675. struct pid *pid, struct task_struct *tsk)
  676. {
  677. struct rdtgroup *rdtg;
  678. int ret = 0;
  679. mutex_lock(&rdtgroup_mutex);
  680. /* Return empty if resctrl has not been mounted. */
  681. if (!static_branch_unlikely(&rdt_enable_key)) {
  682. seq_puts(s, "res:\nmon:\n");
  683. goto unlock;
  684. }
  685. list_for_each_entry(rdtg, &rdt_all_groups, rdtgroup_list) {
  686. struct rdtgroup *crg;
  687. /*
  688. * Task information is only relevant for shareable
  689. * and exclusive groups.
  690. */
  691. if (rdtg->mode != RDT_MODE_SHAREABLE &&
  692. rdtg->mode != RDT_MODE_EXCLUSIVE)
  693. continue;
  694. if (rdtg->closid != tsk->closid)
  695. continue;
  696. seq_printf(s, "res:%s%s\n", (rdtg == &rdtgroup_default) ? "/" : "",
  697. rdtg->kn->name);
  698. seq_puts(s, "mon:");
  699. list_for_each_entry(crg, &rdtg->mon.crdtgrp_list,
  700. mon.crdtgrp_list) {
  701. if (tsk->rmid != crg->mon.rmid)
  702. continue;
  703. seq_printf(s, "%s", crg->kn->name);
  704. break;
  705. }
  706. seq_putc(s, '\n');
  707. goto unlock;
  708. }
  709. /*
  710. * The above search should succeed. Otherwise return
  711. * with an error.
  712. */
  713. ret = -ENOENT;
  714. unlock:
  715. mutex_unlock(&rdtgroup_mutex);
  716. return ret;
  717. }
  718. #endif
  719. static int rdt_last_cmd_status_show(struct kernfs_open_file *of,
  720. struct seq_file *seq, void *v)
  721. {
  722. int len;
  723. mutex_lock(&rdtgroup_mutex);
  724. len = seq_buf_used(&last_cmd_status);
  725. if (len)
  726. seq_printf(seq, "%.*s", len, last_cmd_status_buf);
  727. else
  728. seq_puts(seq, "ok\n");
  729. mutex_unlock(&rdtgroup_mutex);
  730. return 0;
  731. }
  732. static int rdt_num_closids_show(struct kernfs_open_file *of,
  733. struct seq_file *seq, void *v)
  734. {
  735. struct resctrl_schema *s = of->kn->parent->priv;
  736. seq_printf(seq, "%u\n", s->num_closid);
  737. return 0;
  738. }
  739. static int rdt_default_ctrl_show(struct kernfs_open_file *of,
  740. struct seq_file *seq, void *v)
  741. {
  742. struct resctrl_schema *s = of->kn->parent->priv;
  743. struct rdt_resource *r = s->res;
  744. seq_printf(seq, "%x\n", r->default_ctrl);
  745. return 0;
  746. }
  747. static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
  748. struct seq_file *seq, void *v)
  749. {
  750. struct resctrl_schema *s = of->kn->parent->priv;
  751. struct rdt_resource *r = s->res;
  752. seq_printf(seq, "%u\n", r->cache.min_cbm_bits);
  753. return 0;
  754. }
  755. static int rdt_shareable_bits_show(struct kernfs_open_file *of,
  756. struct seq_file *seq, void *v)
  757. {
  758. struct resctrl_schema *s = of->kn->parent->priv;
  759. struct rdt_resource *r = s->res;
  760. seq_printf(seq, "%x\n", r->cache.shareable_bits);
  761. return 0;
  762. }
  763. /**
  764. * rdt_bit_usage_show - Display current usage of resources
  765. *
  766. * A domain is a shared resource that can now be allocated differently. Here
  767. * we display the current regions of the domain as an annotated bitmask.
  768. * For each domain of this resource its allocation bitmask
  769. * is annotated as below to indicate the current usage of the corresponding bit:
  770. * 0 - currently unused
  771. * X - currently available for sharing and used by software and hardware
  772. * H - currently used by hardware only but available for software use
  773. * S - currently used and shareable by software only
  774. * E - currently used exclusively by one resource group
  775. * P - currently pseudo-locked by one resource group
  776. */
  777. static int rdt_bit_usage_show(struct kernfs_open_file *of,
  778. struct seq_file *seq, void *v)
  779. {
  780. struct resctrl_schema *s = of->kn->parent->priv;
  781. /*
  782. * Use unsigned long even though only 32 bits are used to ensure
  783. * test_bit() is used safely.
  784. */
  785. unsigned long sw_shareable = 0, hw_shareable = 0;
  786. unsigned long exclusive = 0, pseudo_locked = 0;
  787. struct rdt_resource *r = s->res;
  788. struct rdt_domain *dom;
  789. int i, hwb, swb, excl, psl;
  790. enum rdtgrp_mode mode;
  791. bool sep = false;
  792. u32 ctrl_val;
  793. mutex_lock(&rdtgroup_mutex);
  794. hw_shareable = r->cache.shareable_bits;
  795. list_for_each_entry(dom, &r->domains, list) {
  796. if (sep)
  797. seq_putc(seq, ';');
  798. sw_shareable = 0;
  799. exclusive = 0;
  800. seq_printf(seq, "%d=", dom->id);
  801. for (i = 0; i < closids_supported(); i++) {
  802. if (!closid_allocated(i))
  803. continue;
  804. ctrl_val = resctrl_arch_get_config(r, dom, i,
  805. s->conf_type);
  806. mode = rdtgroup_mode_by_closid(i);
  807. switch (mode) {
  808. case RDT_MODE_SHAREABLE:
  809. sw_shareable |= ctrl_val;
  810. break;
  811. case RDT_MODE_EXCLUSIVE:
  812. exclusive |= ctrl_val;
  813. break;
  814. case RDT_MODE_PSEUDO_LOCKSETUP:
  815. /*
  816. * RDT_MODE_PSEUDO_LOCKSETUP is possible
  817. * here but not included since the CBM
  818. * associated with this CLOSID in this mode
  819. * is not initialized and no task or cpu can be
  820. * assigned this CLOSID.
  821. */
  822. break;
  823. case RDT_MODE_PSEUDO_LOCKED:
  824. case RDT_NUM_MODES:
  825. WARN(1,
  826. "invalid mode for closid %d\n", i);
  827. break;
  828. }
  829. }
  830. for (i = r->cache.cbm_len - 1; i >= 0; i--) {
  831. pseudo_locked = dom->plr ? dom->plr->cbm : 0;
  832. hwb = test_bit(i, &hw_shareable);
  833. swb = test_bit(i, &sw_shareable);
  834. excl = test_bit(i, &exclusive);
  835. psl = test_bit(i, &pseudo_locked);
  836. if (hwb && swb)
  837. seq_putc(seq, 'X');
  838. else if (hwb && !swb)
  839. seq_putc(seq, 'H');
  840. else if (!hwb && swb)
  841. seq_putc(seq, 'S');
  842. else if (excl)
  843. seq_putc(seq, 'E');
  844. else if (psl)
  845. seq_putc(seq, 'P');
  846. else /* Unused bits remain */
  847. seq_putc(seq, '0');
  848. }
  849. sep = true;
  850. }
  851. seq_putc(seq, '\n');
  852. mutex_unlock(&rdtgroup_mutex);
  853. return 0;
  854. }
  855. static int rdt_min_bw_show(struct kernfs_open_file *of,
  856. struct seq_file *seq, void *v)
  857. {
  858. struct resctrl_schema *s = of->kn->parent->priv;
  859. struct rdt_resource *r = s->res;
  860. seq_printf(seq, "%u\n", r->membw.min_bw);
  861. return 0;
  862. }
  863. static int rdt_num_rmids_show(struct kernfs_open_file *of,
  864. struct seq_file *seq, void *v)
  865. {
  866. struct rdt_resource *r = of->kn->parent->priv;
  867. seq_printf(seq, "%d\n", r->num_rmid);
  868. return 0;
  869. }
  870. static int rdt_mon_features_show(struct kernfs_open_file *of,
  871. struct seq_file *seq, void *v)
  872. {
  873. struct rdt_resource *r = of->kn->parent->priv;
  874. struct mon_evt *mevt;
  875. list_for_each_entry(mevt, &r->evt_list, list)
  876. seq_printf(seq, "%s\n", mevt->name);
  877. return 0;
  878. }
  879. static int rdt_bw_gran_show(struct kernfs_open_file *of,
  880. struct seq_file *seq, void *v)
  881. {
  882. struct resctrl_schema *s = of->kn->parent->priv;
  883. struct rdt_resource *r = s->res;
  884. seq_printf(seq, "%u\n", r->membw.bw_gran);
  885. return 0;
  886. }
  887. static int rdt_delay_linear_show(struct kernfs_open_file *of,
  888. struct seq_file *seq, void *v)
  889. {
  890. struct resctrl_schema *s = of->kn->parent->priv;
  891. struct rdt_resource *r = s->res;
  892. seq_printf(seq, "%u\n", r->membw.delay_linear);
  893. return 0;
  894. }
  895. static int max_threshold_occ_show(struct kernfs_open_file *of,
  896. struct seq_file *seq, void *v)
  897. {
  898. seq_printf(seq, "%u\n", resctrl_rmid_realloc_threshold);
  899. return 0;
  900. }
  901. static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of,
  902. struct seq_file *seq, void *v)
  903. {
  904. struct resctrl_schema *s = of->kn->parent->priv;
  905. struct rdt_resource *r = s->res;
  906. if (r->membw.throttle_mode == THREAD_THROTTLE_PER_THREAD)
  907. seq_puts(seq, "per-thread\n");
  908. else
  909. seq_puts(seq, "max\n");
  910. return 0;
  911. }
  912. static ssize_t max_threshold_occ_write(struct kernfs_open_file *of,
  913. char *buf, size_t nbytes, loff_t off)
  914. {
  915. unsigned int bytes;
  916. int ret;
  917. ret = kstrtouint(buf, 0, &bytes);
  918. if (ret)
  919. return ret;
  920. if (bytes > resctrl_rmid_realloc_limit)
  921. return -EINVAL;
  922. resctrl_rmid_realloc_threshold = resctrl_arch_round_mon_val(bytes);
  923. return nbytes;
  924. }
  925. /*
  926. * rdtgroup_mode_show - Display mode of this resource group
  927. */
  928. static int rdtgroup_mode_show(struct kernfs_open_file *of,
  929. struct seq_file *s, void *v)
  930. {
  931. struct rdtgroup *rdtgrp;
  932. rdtgrp = rdtgroup_kn_lock_live(of->kn);
  933. if (!rdtgrp) {
  934. rdtgroup_kn_unlock(of->kn);
  935. return -ENOENT;
  936. }
  937. seq_printf(s, "%s\n", rdtgroup_mode_str(rdtgrp->mode));
  938. rdtgroup_kn_unlock(of->kn);
  939. return 0;
  940. }
  941. static enum resctrl_conf_type resctrl_peer_type(enum resctrl_conf_type my_type)
  942. {
  943. switch (my_type) {
  944. case CDP_CODE:
  945. return CDP_DATA;
  946. case CDP_DATA:
  947. return CDP_CODE;
  948. default:
  949. case CDP_NONE:
  950. return CDP_NONE;
  951. }
  952. }
  953. /**
  954. * __rdtgroup_cbm_overlaps - Does CBM for intended closid overlap with other
  955. * @r: Resource to which domain instance @d belongs.
  956. * @d: The domain instance for which @closid is being tested.
  957. * @cbm: Capacity bitmask being tested.
  958. * @closid: Intended closid for @cbm.
  959. * @exclusive: Only check if overlaps with exclusive resource groups
  960. *
  961. * Checks if provided @cbm intended to be used for @closid on domain
  962. * @d overlaps with any other closids or other hardware usage associated
  963. * with this domain. If @exclusive is true then only overlaps with
  964. * resource groups in exclusive mode will be considered. If @exclusive
  965. * is false then overlaps with any resource group or hardware entities
  966. * will be considered.
  967. *
  968. * @cbm is unsigned long, even if only 32 bits are used, to make the
  969. * bitmap functions work correctly.
  970. *
  971. * Return: false if CBM does not overlap, true if it does.
  972. */
  973. static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
  974. unsigned long cbm, int closid,
  975. enum resctrl_conf_type type, bool exclusive)
  976. {
  977. enum rdtgrp_mode mode;
  978. unsigned long ctrl_b;
  979. int i;
  980. /* Check for any overlap with regions used by hardware directly */
  981. if (!exclusive) {
  982. ctrl_b = r->cache.shareable_bits;
  983. if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len))
  984. return true;
  985. }
  986. /* Check for overlap with other resource groups */
  987. for (i = 0; i < closids_supported(); i++) {
  988. ctrl_b = resctrl_arch_get_config(r, d, i, type);
  989. mode = rdtgroup_mode_by_closid(i);
  990. if (closid_allocated(i) && i != closid &&
  991. mode != RDT_MODE_PSEUDO_LOCKSETUP) {
  992. if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) {
  993. if (exclusive) {
  994. if (mode == RDT_MODE_EXCLUSIVE)
  995. return true;
  996. continue;
  997. }
  998. return true;
  999. }
  1000. }
  1001. }
  1002. return false;
  1003. }
  1004. /**
  1005. * rdtgroup_cbm_overlaps - Does CBM overlap with other use of hardware
  1006. * @s: Schema for the resource to which domain instance @d belongs.
  1007. * @d: The domain instance for which @closid is being tested.
  1008. * @cbm: Capacity bitmask being tested.
  1009. * @closid: Intended closid for @cbm.
  1010. * @exclusive: Only check if overlaps with exclusive resource groups
  1011. *
  1012. * Resources that can be allocated using a CBM can use the CBM to control
  1013. * the overlap of these allocations. rdtgroup_cmb_overlaps() is the test
  1014. * for overlap. Overlap test is not limited to the specific resource for
  1015. * which the CBM is intended though - when dealing with CDP resources that
  1016. * share the underlying hardware the overlap check should be performed on
  1017. * the CDP resource sharing the hardware also.
  1018. *
  1019. * Refer to description of __rdtgroup_cbm_overlaps() for the details of the
  1020. * overlap test.
  1021. *
  1022. * Return: true if CBM overlap detected, false if there is no overlap
  1023. */
  1024. bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d,
  1025. unsigned long cbm, int closid, bool exclusive)
  1026. {
  1027. enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type);
  1028. struct rdt_resource *r = s->res;
  1029. if (__rdtgroup_cbm_overlaps(r, d, cbm, closid, s->conf_type,
  1030. exclusive))
  1031. return true;
  1032. if (!resctrl_arch_get_cdp_enabled(r->rid))
  1033. return false;
  1034. return __rdtgroup_cbm_overlaps(r, d, cbm, closid, peer_type, exclusive);
  1035. }
  1036. /**
  1037. * rdtgroup_mode_test_exclusive - Test if this resource group can be exclusive
  1038. *
  1039. * An exclusive resource group implies that there should be no sharing of
  1040. * its allocated resources. At the time this group is considered to be
  1041. * exclusive this test can determine if its current schemata supports this
  1042. * setting by testing for overlap with all other resource groups.
  1043. *
  1044. * Return: true if resource group can be exclusive, false if there is overlap
  1045. * with allocations of other resource groups and thus this resource group
  1046. * cannot be exclusive.
  1047. */
  1048. static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
  1049. {
  1050. int closid = rdtgrp->closid;
  1051. struct resctrl_schema *s;
  1052. struct rdt_resource *r;
  1053. bool has_cache = false;
  1054. struct rdt_domain *d;
  1055. u32 ctrl;
  1056. list_for_each_entry(s, &resctrl_schema_all, list) {
  1057. r = s->res;
  1058. if (r->rid == RDT_RESOURCE_MBA)
  1059. continue;
  1060. has_cache = true;
  1061. list_for_each_entry(d, &r->domains, list) {
  1062. ctrl = resctrl_arch_get_config(r, d, closid,
  1063. s->conf_type);
  1064. if (rdtgroup_cbm_overlaps(s, d, ctrl, closid, false)) {
  1065. rdt_last_cmd_puts("Schemata overlaps\n");
  1066. return false;
  1067. }
  1068. }
  1069. }
  1070. if (!has_cache) {
  1071. rdt_last_cmd_puts("Cannot be exclusive without CAT/CDP\n");
  1072. return false;
  1073. }
  1074. return true;
  1075. }
  1076. /**
  1077. * rdtgroup_mode_write - Modify the resource group's mode
  1078. *
  1079. */
  1080. static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of,
  1081. char *buf, size_t nbytes, loff_t off)
  1082. {
  1083. struct rdtgroup *rdtgrp;
  1084. enum rdtgrp_mode mode;
  1085. int ret = 0;
  1086. /* Valid input requires a trailing newline */
  1087. if (nbytes == 0 || buf[nbytes - 1] != '\n')
  1088. return -EINVAL;
  1089. buf[nbytes - 1] = '\0';
  1090. rdtgrp = rdtgroup_kn_lock_live(of->kn);
  1091. if (!rdtgrp) {
  1092. rdtgroup_kn_unlock(of->kn);
  1093. return -ENOENT;
  1094. }
  1095. rdt_last_cmd_clear();
  1096. mode = rdtgrp->mode;
  1097. if ((!strcmp(buf, "shareable") && mode == RDT_MODE_SHAREABLE) ||
  1098. (!strcmp(buf, "exclusive") && mode == RDT_MODE_EXCLUSIVE) ||
  1099. (!strcmp(buf, "pseudo-locksetup") &&
  1100. mode == RDT_MODE_PSEUDO_LOCKSETUP) ||
  1101. (!strcmp(buf, "pseudo-locked") && mode == RDT_MODE_PSEUDO_LOCKED))
  1102. goto out;
  1103. if (mode == RDT_MODE_PSEUDO_LOCKED) {
  1104. rdt_last_cmd_puts("Cannot change pseudo-locked group\n");
  1105. ret = -EINVAL;
  1106. goto out;
  1107. }
  1108. if (!strcmp(buf, "shareable")) {
  1109. if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
  1110. ret = rdtgroup_locksetup_exit(rdtgrp);
  1111. if (ret)
  1112. goto out;
  1113. }
  1114. rdtgrp->mode = RDT_MODE_SHAREABLE;
  1115. } else if (!strcmp(buf, "exclusive")) {
  1116. if (!rdtgroup_mode_test_exclusive(rdtgrp)) {
  1117. ret = -EINVAL;
  1118. goto out;
  1119. }
  1120. if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
  1121. ret = rdtgroup_locksetup_exit(rdtgrp);
  1122. if (ret)
  1123. goto out;
  1124. }
  1125. rdtgrp->mode = RDT_MODE_EXCLUSIVE;
  1126. } else if (!strcmp(buf, "pseudo-locksetup")) {
  1127. ret = rdtgroup_locksetup_enter(rdtgrp);
  1128. if (ret)
  1129. goto out;
  1130. rdtgrp->mode = RDT_MODE_PSEUDO_LOCKSETUP;
  1131. } else {
  1132. rdt_last_cmd_puts("Unknown or unsupported mode\n");
  1133. ret = -EINVAL;
  1134. }
  1135. out:
  1136. rdtgroup_kn_unlock(of->kn);
  1137. return ret ?: nbytes;
  1138. }
  1139. /**
  1140. * rdtgroup_cbm_to_size - Translate CBM to size in bytes
  1141. * @r: RDT resource to which @d belongs.
  1142. * @d: RDT domain instance.
  1143. * @cbm: bitmask for which the size should be computed.
  1144. *
  1145. * The bitmask provided associated with the RDT domain instance @d will be
  1146. * translated into how many bytes it represents. The size in bytes is
  1147. * computed by first dividing the total cache size by the CBM length to
  1148. * determine how many bytes each bit in the bitmask represents. The result
  1149. * is multiplied with the number of bits set in the bitmask.
  1150. *
  1151. * @cbm is unsigned long, even if only 32 bits are used to make the
  1152. * bitmap functions work correctly.
  1153. */
  1154. unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
  1155. struct rdt_domain *d, unsigned long cbm)
  1156. {
  1157. struct cpu_cacheinfo *ci;
  1158. unsigned int size = 0;
  1159. int num_b, i;
  1160. num_b = bitmap_weight(&cbm, r->cache.cbm_len);
  1161. ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask));
  1162. for (i = 0; i < ci->num_leaves; i++) {
  1163. if (ci->info_list[i].level == r->cache_level) {
  1164. size = ci->info_list[i].size / r->cache.cbm_len * num_b;
  1165. break;
  1166. }
  1167. }
  1168. return size;
  1169. }
  1170. /**
  1171. * rdtgroup_size_show - Display size in bytes of allocated regions
  1172. *
  1173. * The "size" file mirrors the layout of the "schemata" file, printing the
  1174. * size in bytes of each region instead of the capacity bitmask.
  1175. *
  1176. */
  1177. static int rdtgroup_size_show(struct kernfs_open_file *of,
  1178. struct seq_file *s, void *v)
  1179. {
  1180. struct resctrl_schema *schema;
  1181. enum resctrl_conf_type type;
  1182. struct rdtgroup *rdtgrp;
  1183. struct rdt_resource *r;
  1184. struct rdt_domain *d;
  1185. unsigned int size;
  1186. int ret = 0;
  1187. u32 closid;
  1188. bool sep;
  1189. u32 ctrl;
  1190. rdtgrp = rdtgroup_kn_lock_live(of->kn);
  1191. if (!rdtgrp) {
  1192. rdtgroup_kn_unlock(of->kn);
  1193. return -ENOENT;
  1194. }
  1195. if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
  1196. if (!rdtgrp->plr->d) {
  1197. rdt_last_cmd_clear();
  1198. rdt_last_cmd_puts("Cache domain offline\n");
  1199. ret = -ENODEV;
  1200. } else {
  1201. seq_printf(s, "%*s:", max_name_width,
  1202. rdtgrp->plr->s->name);
  1203. size = rdtgroup_cbm_to_size(rdtgrp->plr->s->res,
  1204. rdtgrp->plr->d,
  1205. rdtgrp->plr->cbm);
  1206. seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size);
  1207. }
  1208. goto out;
  1209. }
  1210. closid = rdtgrp->closid;
  1211. list_for_each_entry(schema, &resctrl_schema_all, list) {
  1212. r = schema->res;
  1213. type = schema->conf_type;
  1214. sep = false;
  1215. seq_printf(s, "%*s:", max_name_width, schema->name);
  1216. list_for_each_entry(d, &r->domains, list) {
  1217. if (sep)
  1218. seq_putc(s, ';');
  1219. if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
  1220. size = 0;
  1221. } else {
  1222. if (is_mba_sc(r))
  1223. ctrl = d->mbps_val[closid];
  1224. else
  1225. ctrl = resctrl_arch_get_config(r, d,
  1226. closid,
  1227. type);
  1228. if (r->rid == RDT_RESOURCE_MBA)
  1229. size = ctrl;
  1230. else
  1231. size = rdtgroup_cbm_to_size(r, d, ctrl);
  1232. }
  1233. seq_printf(s, "%d=%u", d->id, size);
  1234. sep = true;
  1235. }
  1236. seq_putc(s, '\n');
  1237. }
  1238. out:
  1239. rdtgroup_kn_unlock(of->kn);
  1240. return ret;
  1241. }
  1242. /* rdtgroup information files for one cache resource. */
  1243. static struct rftype res_common_files[] = {
  1244. {
  1245. .name = "last_cmd_status",
  1246. .mode = 0444,
  1247. .kf_ops = &rdtgroup_kf_single_ops,
  1248. .seq_show = rdt_last_cmd_status_show,
  1249. .fflags = RF_TOP_INFO,
  1250. },
  1251. {
  1252. .name = "num_closids",
  1253. .mode = 0444,
  1254. .kf_ops = &rdtgroup_kf_single_ops,
  1255. .seq_show = rdt_num_closids_show,
  1256. .fflags = RF_CTRL_INFO,
  1257. },
  1258. {
  1259. .name = "mon_features",
  1260. .mode = 0444,
  1261. .kf_ops = &rdtgroup_kf_single_ops,
  1262. .seq_show = rdt_mon_features_show,
  1263. .fflags = RF_MON_INFO,
  1264. },
  1265. {
  1266. .name = "num_rmids",
  1267. .mode = 0444,
  1268. .kf_ops = &rdtgroup_kf_single_ops,
  1269. .seq_show = rdt_num_rmids_show,
  1270. .fflags = RF_MON_INFO,
  1271. },
  1272. {
  1273. .name = "cbm_mask",
  1274. .mode = 0444,
  1275. .kf_ops = &rdtgroup_kf_single_ops,
  1276. .seq_show = rdt_default_ctrl_show,
  1277. .fflags = RF_CTRL_INFO | RFTYPE_RES_CACHE,
  1278. },
  1279. {
  1280. .name = "min_cbm_bits",
  1281. .mode = 0444,
  1282. .kf_ops = &rdtgroup_kf_single_ops,
  1283. .seq_show = rdt_min_cbm_bits_show,
  1284. .fflags = RF_CTRL_INFO | RFTYPE_RES_CACHE,
  1285. },
  1286. {
  1287. .name = "shareable_bits",
  1288. .mode = 0444,
  1289. .kf_ops = &rdtgroup_kf_single_ops,
  1290. .seq_show = rdt_shareable_bits_show,
  1291. .fflags = RF_CTRL_INFO | RFTYPE_RES_CACHE,
  1292. },
  1293. {
  1294. .name = "bit_usage",
  1295. .mode = 0444,
  1296. .kf_ops = &rdtgroup_kf_single_ops,
  1297. .seq_show = rdt_bit_usage_show,
  1298. .fflags = RF_CTRL_INFO | RFTYPE_RES_CACHE,
  1299. },
  1300. {
  1301. .name = "min_bandwidth",
  1302. .mode = 0444,
  1303. .kf_ops = &rdtgroup_kf_single_ops,
  1304. .seq_show = rdt_min_bw_show,
  1305. .fflags = RF_CTRL_INFO | RFTYPE_RES_MB,
  1306. },
  1307. {
  1308. .name = "bandwidth_gran",
  1309. .mode = 0444,
  1310. .kf_ops = &rdtgroup_kf_single_ops,
  1311. .seq_show = rdt_bw_gran_show,
  1312. .fflags = RF_CTRL_INFO | RFTYPE_RES_MB,
  1313. },
  1314. {
  1315. .name = "delay_linear",
  1316. .mode = 0444,
  1317. .kf_ops = &rdtgroup_kf_single_ops,
  1318. .seq_show = rdt_delay_linear_show,
  1319. .fflags = RF_CTRL_INFO | RFTYPE_RES_MB,
  1320. },
  1321. /*
  1322. * Platform specific which (if any) capabilities are provided by
  1323. * thread_throttle_mode. Defer "fflags" initialization to platform
  1324. * discovery.
  1325. */
  1326. {
  1327. .name = "thread_throttle_mode",
  1328. .mode = 0444,
  1329. .kf_ops = &rdtgroup_kf_single_ops,
  1330. .seq_show = rdt_thread_throttle_mode_show,
  1331. },
  1332. {
  1333. .name = "max_threshold_occupancy",
  1334. .mode = 0644,
  1335. .kf_ops = &rdtgroup_kf_single_ops,
  1336. .write = max_threshold_occ_write,
  1337. .seq_show = max_threshold_occ_show,
  1338. .fflags = RF_MON_INFO | RFTYPE_RES_CACHE,
  1339. },
  1340. {
  1341. .name = "cpus",
  1342. .mode = 0644,
  1343. .kf_ops = &rdtgroup_kf_single_ops,
  1344. .write = rdtgroup_cpus_write,
  1345. .seq_show = rdtgroup_cpus_show,
  1346. .fflags = RFTYPE_BASE,
  1347. },
  1348. {
  1349. .name = "cpus_list",
  1350. .mode = 0644,
  1351. .kf_ops = &rdtgroup_kf_single_ops,
  1352. .write = rdtgroup_cpus_write,
  1353. .seq_show = rdtgroup_cpus_show,
  1354. .flags = RFTYPE_FLAGS_CPUS_LIST,
  1355. .fflags = RFTYPE_BASE,
  1356. },
  1357. {
  1358. .name = "tasks",
  1359. .mode = 0644,
  1360. .kf_ops = &rdtgroup_kf_single_ops,
  1361. .write = rdtgroup_tasks_write,
  1362. .seq_show = rdtgroup_tasks_show,
  1363. .fflags = RFTYPE_BASE,
  1364. },
  1365. {
  1366. .name = "schemata",
  1367. .mode = 0644,
  1368. .kf_ops = &rdtgroup_kf_single_ops,
  1369. .write = rdtgroup_schemata_write,
  1370. .seq_show = rdtgroup_schemata_show,
  1371. .fflags = RF_CTRL_BASE,
  1372. },
  1373. {
  1374. .name = "mode",
  1375. .mode = 0644,
  1376. .kf_ops = &rdtgroup_kf_single_ops,
  1377. .write = rdtgroup_mode_write,
  1378. .seq_show = rdtgroup_mode_show,
  1379. .fflags = RF_CTRL_BASE,
  1380. },
  1381. {
  1382. .name = "size",
  1383. .mode = 0444,
  1384. .kf_ops = &rdtgroup_kf_single_ops,
  1385. .seq_show = rdtgroup_size_show,
  1386. .fflags = RF_CTRL_BASE,
  1387. },
  1388. };
  1389. static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags)
  1390. {
  1391. struct rftype *rfts, *rft;
  1392. int ret, len;
  1393. rfts = res_common_files;
  1394. len = ARRAY_SIZE(res_common_files);
  1395. lockdep_assert_held(&rdtgroup_mutex);
  1396. for (rft = rfts; rft < rfts + len; rft++) {
  1397. if (rft->fflags && ((fflags & rft->fflags) == rft->fflags)) {
  1398. ret = rdtgroup_add_file(kn, rft);
  1399. if (ret)
  1400. goto error;
  1401. }
  1402. }
  1403. return 0;
  1404. error:
  1405. pr_warn("Failed to add %s, err=%d\n", rft->name, ret);
  1406. while (--rft >= rfts) {
  1407. if ((fflags & rft->fflags) == rft->fflags)
  1408. kernfs_remove_by_name(kn, rft->name);
  1409. }
  1410. return ret;
  1411. }
  1412. static struct rftype *rdtgroup_get_rftype_by_name(const char *name)
  1413. {
  1414. struct rftype *rfts, *rft;
  1415. int len;
  1416. rfts = res_common_files;
  1417. len = ARRAY_SIZE(res_common_files);
  1418. for (rft = rfts; rft < rfts + len; rft++) {
  1419. if (!strcmp(rft->name, name))
  1420. return rft;
  1421. }
  1422. return NULL;
  1423. }
  1424. void __init thread_throttle_mode_init(void)
  1425. {
  1426. struct rftype *rft;
  1427. rft = rdtgroup_get_rftype_by_name("thread_throttle_mode");
  1428. if (!rft)
  1429. return;
  1430. rft->fflags = RF_CTRL_INFO | RFTYPE_RES_MB;
  1431. }
  1432. /**
  1433. * rdtgroup_kn_mode_restrict - Restrict user access to named resctrl file
  1434. * @r: The resource group with which the file is associated.
  1435. * @name: Name of the file
  1436. *
  1437. * The permissions of named resctrl file, directory, or link are modified
  1438. * to not allow read, write, or execute by any user.
  1439. *
  1440. * WARNING: This function is intended to communicate to the user that the
  1441. * resctrl file has been locked down - that it is not relevant to the
  1442. * particular state the system finds itself in. It should not be relied
  1443. * on to protect from user access because after the file's permissions
  1444. * are restricted the user can still change the permissions using chmod
  1445. * from the command line.
  1446. *
  1447. * Return: 0 on success, <0 on failure.
  1448. */
  1449. int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name)
  1450. {
  1451. struct iattr iattr = {.ia_valid = ATTR_MODE,};
  1452. struct kernfs_node *kn;
  1453. int ret = 0;
  1454. kn = kernfs_find_and_get_ns(r->kn, name, NULL);
  1455. if (!kn)
  1456. return -ENOENT;
  1457. switch (kernfs_type(kn)) {
  1458. case KERNFS_DIR:
  1459. iattr.ia_mode = S_IFDIR;
  1460. break;
  1461. case KERNFS_FILE:
  1462. iattr.ia_mode = S_IFREG;
  1463. break;
  1464. case KERNFS_LINK:
  1465. iattr.ia_mode = S_IFLNK;
  1466. break;
  1467. }
  1468. ret = kernfs_setattr(kn, &iattr);
  1469. kernfs_put(kn);
  1470. return ret;
  1471. }
  1472. /**
  1473. * rdtgroup_kn_mode_restore - Restore user access to named resctrl file
  1474. * @r: The resource group with which the file is associated.
  1475. * @name: Name of the file
  1476. * @mask: Mask of permissions that should be restored
  1477. *
  1478. * Restore the permissions of the named file. If @name is a directory the
  1479. * permissions of its parent will be used.
  1480. *
  1481. * Return: 0 on success, <0 on failure.
  1482. */
  1483. int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
  1484. umode_t mask)
  1485. {
  1486. struct iattr iattr = {.ia_valid = ATTR_MODE,};
  1487. struct kernfs_node *kn, *parent;
  1488. struct rftype *rfts, *rft;
  1489. int ret, len;
  1490. rfts = res_common_files;
  1491. len = ARRAY_SIZE(res_common_files);
  1492. for (rft = rfts; rft < rfts + len; rft++) {
  1493. if (!strcmp(rft->name, name))
  1494. iattr.ia_mode = rft->mode & mask;
  1495. }
  1496. kn = kernfs_find_and_get_ns(r->kn, name, NULL);
  1497. if (!kn)
  1498. return -ENOENT;
  1499. switch (kernfs_type(kn)) {
  1500. case KERNFS_DIR:
  1501. parent = kernfs_get_parent(kn);
  1502. if (parent) {
  1503. iattr.ia_mode |= parent->mode;
  1504. kernfs_put(parent);
  1505. }
  1506. iattr.ia_mode |= S_IFDIR;
  1507. break;
  1508. case KERNFS_FILE:
  1509. iattr.ia_mode |= S_IFREG;
  1510. break;
  1511. case KERNFS_LINK:
  1512. iattr.ia_mode |= S_IFLNK;
  1513. break;
  1514. }
  1515. ret = kernfs_setattr(kn, &iattr);
  1516. kernfs_put(kn);
  1517. return ret;
  1518. }
  1519. static int rdtgroup_mkdir_info_resdir(void *priv, char *name,
  1520. unsigned long fflags)
  1521. {
  1522. struct kernfs_node *kn_subdir;
  1523. int ret;
  1524. kn_subdir = kernfs_create_dir(kn_info, name,
  1525. kn_info->mode, priv);
  1526. if (IS_ERR(kn_subdir))
  1527. return PTR_ERR(kn_subdir);
  1528. ret = rdtgroup_kn_set_ugid(kn_subdir);
  1529. if (ret)
  1530. return ret;
  1531. ret = rdtgroup_add_files(kn_subdir, fflags);
  1532. if (!ret)
  1533. kernfs_activate(kn_subdir);
  1534. return ret;
  1535. }
  1536. static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
  1537. {
  1538. struct resctrl_schema *s;
  1539. struct rdt_resource *r;
  1540. unsigned long fflags;
  1541. char name[32];
  1542. int ret;
  1543. /* create the directory */
  1544. kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL);
  1545. if (IS_ERR(kn_info))
  1546. return PTR_ERR(kn_info);
  1547. ret = rdtgroup_add_files(kn_info, RF_TOP_INFO);
  1548. if (ret)
  1549. goto out_destroy;
  1550. /* loop over enabled controls, these are all alloc_capable */
  1551. list_for_each_entry(s, &resctrl_schema_all, list) {
  1552. r = s->res;
  1553. fflags = r->fflags | RF_CTRL_INFO;
  1554. ret = rdtgroup_mkdir_info_resdir(s, s->name, fflags);
  1555. if (ret)
  1556. goto out_destroy;
  1557. }
  1558. for_each_mon_capable_rdt_resource(r) {
  1559. fflags = r->fflags | RF_MON_INFO;
  1560. sprintf(name, "%s_MON", r->name);
  1561. ret = rdtgroup_mkdir_info_resdir(r, name, fflags);
  1562. if (ret)
  1563. goto out_destroy;
  1564. }
  1565. ret = rdtgroup_kn_set_ugid(kn_info);
  1566. if (ret)
  1567. goto out_destroy;
  1568. kernfs_activate(kn_info);
  1569. return 0;
  1570. out_destroy:
  1571. kernfs_remove(kn_info);
  1572. return ret;
  1573. }
  1574. static int
  1575. mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp,
  1576. char *name, struct kernfs_node **dest_kn)
  1577. {
  1578. struct kernfs_node *kn;
  1579. int ret;
  1580. /* create the directory */
  1581. kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
  1582. if (IS_ERR(kn))
  1583. return PTR_ERR(kn);
  1584. if (dest_kn)
  1585. *dest_kn = kn;
  1586. ret = rdtgroup_kn_set_ugid(kn);
  1587. if (ret)
  1588. goto out_destroy;
  1589. kernfs_activate(kn);
  1590. return 0;
  1591. out_destroy:
  1592. kernfs_remove(kn);
  1593. return ret;
  1594. }
  1595. static void l3_qos_cfg_update(void *arg)
  1596. {
  1597. bool *enable = arg;
  1598. wrmsrl(MSR_IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL);
  1599. }
  1600. static void l2_qos_cfg_update(void *arg)
  1601. {
  1602. bool *enable = arg;
  1603. wrmsrl(MSR_IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL);
  1604. }
  1605. static inline bool is_mba_linear(void)
  1606. {
  1607. return rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl.membw.delay_linear;
  1608. }
  1609. static int set_cache_qos_cfg(int level, bool enable)
  1610. {
  1611. void (*update)(void *arg);
  1612. struct rdt_resource *r_l;
  1613. cpumask_var_t cpu_mask;
  1614. struct rdt_domain *d;
  1615. int cpu;
  1616. if (level == RDT_RESOURCE_L3)
  1617. update = l3_qos_cfg_update;
  1618. else if (level == RDT_RESOURCE_L2)
  1619. update = l2_qos_cfg_update;
  1620. else
  1621. return -EINVAL;
  1622. if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
  1623. return -ENOMEM;
  1624. r_l = &rdt_resources_all[level].r_resctrl;
  1625. list_for_each_entry(d, &r_l->domains, list) {
  1626. if (r_l->cache.arch_has_per_cpu_cfg)
  1627. /* Pick all the CPUs in the domain instance */
  1628. for_each_cpu(cpu, &d->cpu_mask)
  1629. cpumask_set_cpu(cpu, cpu_mask);
  1630. else
  1631. /* Pick one CPU from each domain instance to update MSR */
  1632. cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
  1633. }
  1634. cpu = get_cpu();
  1635. /* Update QOS_CFG MSR on this cpu if it's in cpu_mask. */
  1636. if (cpumask_test_cpu(cpu, cpu_mask))
  1637. update(&enable);
  1638. /* Update QOS_CFG MSR on all other cpus in cpu_mask. */
  1639. smp_call_function_many(cpu_mask, update, &enable, 1);
  1640. put_cpu();
  1641. free_cpumask_var(cpu_mask);
  1642. return 0;
  1643. }
  1644. /* Restore the qos cfg state when a domain comes online */
  1645. void rdt_domain_reconfigure_cdp(struct rdt_resource *r)
  1646. {
  1647. struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
  1648. if (!r->cdp_capable)
  1649. return;
  1650. if (r->rid == RDT_RESOURCE_L2)
  1651. l2_qos_cfg_update(&hw_res->cdp_enabled);
  1652. if (r->rid == RDT_RESOURCE_L3)
  1653. l3_qos_cfg_update(&hw_res->cdp_enabled);
  1654. }
  1655. static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_domain *d)
  1656. {
  1657. u32 num_closid = resctrl_arch_get_num_closid(r);
  1658. int cpu = cpumask_any(&d->cpu_mask);
  1659. int i;
  1660. d->mbps_val = kcalloc_node(num_closid, sizeof(*d->mbps_val),
  1661. GFP_KERNEL, cpu_to_node(cpu));
  1662. if (!d->mbps_val)
  1663. return -ENOMEM;
  1664. for (i = 0; i < num_closid; i++)
  1665. d->mbps_val[i] = MBA_MAX_MBPS;
  1666. return 0;
  1667. }
  1668. static void mba_sc_domain_destroy(struct rdt_resource *r,
  1669. struct rdt_domain *d)
  1670. {
  1671. kfree(d->mbps_val);
  1672. d->mbps_val = NULL;
  1673. }
  1674. /*
  1675. * MBA software controller is supported only if
  1676. * MBM is supported and MBA is in linear scale.
  1677. */
  1678. static bool supports_mba_mbps(void)
  1679. {
  1680. struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
  1681. return (is_mbm_local_enabled() &&
  1682. r->alloc_capable && is_mba_linear());
  1683. }
  1684. /*
  1685. * Enable or disable the MBA software controller
  1686. * which helps user specify bandwidth in MBps.
  1687. */
  1688. static int set_mba_sc(bool mba_sc)
  1689. {
  1690. struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
  1691. u32 num_closid = resctrl_arch_get_num_closid(r);
  1692. struct rdt_domain *d;
  1693. int i;
  1694. if (!supports_mba_mbps() || mba_sc == is_mba_sc(r))
  1695. return -EINVAL;
  1696. r->membw.mba_sc = mba_sc;
  1697. list_for_each_entry(d, &r->domains, list) {
  1698. for (i = 0; i < num_closid; i++)
  1699. d->mbps_val[i] = MBA_MAX_MBPS;
  1700. }
  1701. return 0;
  1702. }
  1703. static int cdp_enable(int level)
  1704. {
  1705. struct rdt_resource *r_l = &rdt_resources_all[level].r_resctrl;
  1706. int ret;
  1707. if (!r_l->alloc_capable)
  1708. return -EINVAL;
  1709. ret = set_cache_qos_cfg(level, true);
  1710. if (!ret)
  1711. rdt_resources_all[level].cdp_enabled = true;
  1712. return ret;
  1713. }
  1714. static void cdp_disable(int level)
  1715. {
  1716. struct rdt_hw_resource *r_hw = &rdt_resources_all[level];
  1717. if (r_hw->cdp_enabled) {
  1718. set_cache_qos_cfg(level, false);
  1719. r_hw->cdp_enabled = false;
  1720. }
  1721. }
  1722. int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable)
  1723. {
  1724. struct rdt_hw_resource *hw_res = &rdt_resources_all[l];
  1725. if (!hw_res->r_resctrl.cdp_capable)
  1726. return -EINVAL;
  1727. if (enable)
  1728. return cdp_enable(l);
  1729. cdp_disable(l);
  1730. return 0;
  1731. }
  1732. static void cdp_disable_all(void)
  1733. {
  1734. if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3))
  1735. resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false);
  1736. if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2))
  1737. resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false);
  1738. }
  1739. /*
  1740. * We don't allow rdtgroup directories to be created anywhere
  1741. * except the root directory. Thus when looking for the rdtgroup
  1742. * structure for a kernfs node we are either looking at a directory,
  1743. * in which case the rdtgroup structure is pointed at by the "priv"
  1744. * field, otherwise we have a file, and need only look to the parent
  1745. * to find the rdtgroup.
  1746. */
  1747. static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn)
  1748. {
  1749. if (kernfs_type(kn) == KERNFS_DIR) {
  1750. /*
  1751. * All the resource directories use "kn->priv"
  1752. * to point to the "struct rdtgroup" for the
  1753. * resource. "info" and its subdirectories don't
  1754. * have rdtgroup structures, so return NULL here.
  1755. */
  1756. if (kn == kn_info || kn->parent == kn_info)
  1757. return NULL;
  1758. else
  1759. return kn->priv;
  1760. } else {
  1761. return kn->parent->priv;
  1762. }
  1763. }
  1764. struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn)
  1765. {
  1766. struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
  1767. if (!rdtgrp)
  1768. return NULL;
  1769. atomic_inc(&rdtgrp->waitcount);
  1770. kernfs_break_active_protection(kn);
  1771. mutex_lock(&rdtgroup_mutex);
  1772. /* Was this group deleted while we waited? */
  1773. if (rdtgrp->flags & RDT_DELETED)
  1774. return NULL;
  1775. return rdtgrp;
  1776. }
  1777. void rdtgroup_kn_unlock(struct kernfs_node *kn)
  1778. {
  1779. struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
  1780. if (!rdtgrp)
  1781. return;
  1782. mutex_unlock(&rdtgroup_mutex);
  1783. if (atomic_dec_and_test(&rdtgrp->waitcount) &&
  1784. (rdtgrp->flags & RDT_DELETED)) {
  1785. if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
  1786. rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
  1787. rdtgroup_pseudo_lock_remove(rdtgrp);
  1788. kernfs_unbreak_active_protection(kn);
  1789. rdtgroup_remove(rdtgrp);
  1790. } else {
  1791. kernfs_unbreak_active_protection(kn);
  1792. }
  1793. }
  1794. static int mkdir_mondata_all(struct kernfs_node *parent_kn,
  1795. struct rdtgroup *prgrp,
  1796. struct kernfs_node **mon_data_kn);
  1797. static int rdt_enable_ctx(struct rdt_fs_context *ctx)
  1798. {
  1799. int ret = 0;
  1800. if (ctx->enable_cdpl2)
  1801. ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, true);
  1802. if (!ret && ctx->enable_cdpl3)
  1803. ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, true);
  1804. if (!ret && ctx->enable_mba_mbps)
  1805. ret = set_mba_sc(true);
  1806. return ret;
  1807. }
  1808. static int schemata_list_add(struct rdt_resource *r, enum resctrl_conf_type type)
  1809. {
  1810. struct resctrl_schema *s;
  1811. const char *suffix = "";
  1812. int ret, cl;
  1813. s = kzalloc(sizeof(*s), GFP_KERNEL);
  1814. if (!s)
  1815. return -ENOMEM;
  1816. s->res = r;
  1817. s->num_closid = resctrl_arch_get_num_closid(r);
  1818. if (resctrl_arch_get_cdp_enabled(r->rid))
  1819. s->num_closid /= 2;
  1820. s->conf_type = type;
  1821. switch (type) {
  1822. case CDP_CODE:
  1823. suffix = "CODE";
  1824. break;
  1825. case CDP_DATA:
  1826. suffix = "DATA";
  1827. break;
  1828. case CDP_NONE:
  1829. suffix = "";
  1830. break;
  1831. }
  1832. ret = snprintf(s->name, sizeof(s->name), "%s%s", r->name, suffix);
  1833. if (ret >= sizeof(s->name)) {
  1834. kfree(s);
  1835. return -EINVAL;
  1836. }
  1837. cl = strlen(s->name);
  1838. /*
  1839. * If CDP is supported by this resource, but not enabled,
  1840. * include the suffix. This ensures the tabular format of the
  1841. * schemata file does not change between mounts of the filesystem.
  1842. */
  1843. if (r->cdp_capable && !resctrl_arch_get_cdp_enabled(r->rid))
  1844. cl += 4;
  1845. if (cl > max_name_width)
  1846. max_name_width = cl;
  1847. INIT_LIST_HEAD(&s->list);
  1848. list_add(&s->list, &resctrl_schema_all);
  1849. return 0;
  1850. }
  1851. static int schemata_list_create(void)
  1852. {
  1853. struct rdt_resource *r;
  1854. int ret = 0;
  1855. for_each_alloc_capable_rdt_resource(r) {
  1856. if (resctrl_arch_get_cdp_enabled(r->rid)) {
  1857. ret = schemata_list_add(r, CDP_CODE);
  1858. if (ret)
  1859. break;
  1860. ret = schemata_list_add(r, CDP_DATA);
  1861. } else {
  1862. ret = schemata_list_add(r, CDP_NONE);
  1863. }
  1864. if (ret)
  1865. break;
  1866. }
  1867. return ret;
  1868. }
  1869. static void schemata_list_destroy(void)
  1870. {
  1871. struct resctrl_schema *s, *tmp;
  1872. list_for_each_entry_safe(s, tmp, &resctrl_schema_all, list) {
  1873. list_del(&s->list);
  1874. kfree(s);
  1875. }
  1876. }
  1877. static int rdt_get_tree(struct fs_context *fc)
  1878. {
  1879. struct rdt_fs_context *ctx = rdt_fc2context(fc);
  1880. struct rdt_domain *dom;
  1881. struct rdt_resource *r;
  1882. int ret;
  1883. cpus_read_lock();
  1884. mutex_lock(&rdtgroup_mutex);
  1885. /*
  1886. * resctrl file system can only be mounted once.
  1887. */
  1888. if (static_branch_unlikely(&rdt_enable_key)) {
  1889. ret = -EBUSY;
  1890. goto out;
  1891. }
  1892. ret = rdt_enable_ctx(ctx);
  1893. if (ret < 0)
  1894. goto out_cdp;
  1895. ret = schemata_list_create();
  1896. if (ret) {
  1897. schemata_list_destroy();
  1898. goto out_mba;
  1899. }
  1900. closid_init();
  1901. ret = rdtgroup_create_info_dir(rdtgroup_default.kn);
  1902. if (ret < 0)
  1903. goto out_schemata_free;
  1904. if (rdt_mon_capable) {
  1905. ret = mongroup_create_dir(rdtgroup_default.kn,
  1906. &rdtgroup_default, "mon_groups",
  1907. &kn_mongrp);
  1908. if (ret < 0)
  1909. goto out_info;
  1910. ret = mkdir_mondata_all(rdtgroup_default.kn,
  1911. &rdtgroup_default, &kn_mondata);
  1912. if (ret < 0)
  1913. goto out_mongrp;
  1914. rdtgroup_default.mon.mon_data_kn = kn_mondata;
  1915. }
  1916. ret = rdt_pseudo_lock_init();
  1917. if (ret)
  1918. goto out_mondata;
  1919. ret = kernfs_get_tree(fc);
  1920. if (ret < 0)
  1921. goto out_psl;
  1922. if (rdt_alloc_capable)
  1923. static_branch_enable_cpuslocked(&rdt_alloc_enable_key);
  1924. if (rdt_mon_capable)
  1925. static_branch_enable_cpuslocked(&rdt_mon_enable_key);
  1926. if (rdt_alloc_capable || rdt_mon_capable)
  1927. static_branch_enable_cpuslocked(&rdt_enable_key);
  1928. if (is_mbm_enabled()) {
  1929. r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
  1930. list_for_each_entry(dom, &r->domains, list)
  1931. mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL);
  1932. }
  1933. goto out;
  1934. out_psl:
  1935. rdt_pseudo_lock_release();
  1936. out_mondata:
  1937. if (rdt_mon_capable)
  1938. kernfs_remove(kn_mondata);
  1939. out_mongrp:
  1940. if (rdt_mon_capable)
  1941. kernfs_remove(kn_mongrp);
  1942. out_info:
  1943. kernfs_remove(kn_info);
  1944. out_schemata_free:
  1945. schemata_list_destroy();
  1946. out_mba:
  1947. if (ctx->enable_mba_mbps)
  1948. set_mba_sc(false);
  1949. out_cdp:
  1950. cdp_disable_all();
  1951. out:
  1952. rdt_last_cmd_clear();
  1953. mutex_unlock(&rdtgroup_mutex);
  1954. cpus_read_unlock();
  1955. return ret;
  1956. }
  1957. enum rdt_param {
  1958. Opt_cdp,
  1959. Opt_cdpl2,
  1960. Opt_mba_mbps,
  1961. nr__rdt_params
  1962. };
  1963. static const struct fs_parameter_spec rdt_fs_parameters[] = {
  1964. fsparam_flag("cdp", Opt_cdp),
  1965. fsparam_flag("cdpl2", Opt_cdpl2),
  1966. fsparam_flag("mba_MBps", Opt_mba_mbps),
  1967. {}
  1968. };
  1969. static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
  1970. {
  1971. struct rdt_fs_context *ctx = rdt_fc2context(fc);
  1972. struct fs_parse_result result;
  1973. int opt;
  1974. opt = fs_parse(fc, rdt_fs_parameters, param, &result);
  1975. if (opt < 0)
  1976. return opt;
  1977. switch (opt) {
  1978. case Opt_cdp:
  1979. ctx->enable_cdpl3 = true;
  1980. return 0;
  1981. case Opt_cdpl2:
  1982. ctx->enable_cdpl2 = true;
  1983. return 0;
  1984. case Opt_mba_mbps:
  1985. if (!supports_mba_mbps())
  1986. return -EINVAL;
  1987. ctx->enable_mba_mbps = true;
  1988. return 0;
  1989. }
  1990. return -EINVAL;
  1991. }
  1992. static void rdt_fs_context_free(struct fs_context *fc)
  1993. {
  1994. struct rdt_fs_context *ctx = rdt_fc2context(fc);
  1995. kernfs_free_fs_context(fc);
  1996. kfree(ctx);
  1997. }
  1998. static const struct fs_context_operations rdt_fs_context_ops = {
  1999. .free = rdt_fs_context_free,
  2000. .parse_param = rdt_parse_param,
  2001. .get_tree = rdt_get_tree,
  2002. };
  2003. static int rdt_init_fs_context(struct fs_context *fc)
  2004. {
  2005. struct rdt_fs_context *ctx;
  2006. ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL);
  2007. if (!ctx)
  2008. return -ENOMEM;
  2009. ctx->kfc.root = rdt_root;
  2010. ctx->kfc.magic = RDTGROUP_SUPER_MAGIC;
  2011. fc->fs_private = &ctx->kfc;
  2012. fc->ops = &rdt_fs_context_ops;
  2013. put_user_ns(fc->user_ns);
  2014. fc->user_ns = get_user_ns(&init_user_ns);
  2015. fc->global = true;
  2016. return 0;
  2017. }
  2018. static int reset_all_ctrls(struct rdt_resource *r)
  2019. {
  2020. struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
  2021. struct rdt_hw_domain *hw_dom;
  2022. struct msr_param msr_param;
  2023. cpumask_var_t cpu_mask;
  2024. struct rdt_domain *d;
  2025. int i, cpu;
  2026. if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
  2027. return -ENOMEM;
  2028. msr_param.res = r;
  2029. msr_param.low = 0;
  2030. msr_param.high = hw_res->num_closid;
  2031. /*
  2032. * Disable resource control for this resource by setting all
  2033. * CBMs in all domains to the maximum mask value. Pick one CPU
  2034. * from each domain to update the MSRs below.
  2035. */
  2036. list_for_each_entry(d, &r->domains, list) {
  2037. hw_dom = resctrl_to_arch_dom(d);
  2038. cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
  2039. for (i = 0; i < hw_res->num_closid; i++)
  2040. hw_dom->ctrl_val[i] = r->default_ctrl;
  2041. }
  2042. cpu = get_cpu();
  2043. /* Update CBM on this cpu if it's in cpu_mask. */
  2044. if (cpumask_test_cpu(cpu, cpu_mask))
  2045. rdt_ctrl_update(&msr_param);
  2046. /* Update CBM on all other cpus in cpu_mask. */
  2047. smp_call_function_many(cpu_mask, rdt_ctrl_update, &msr_param, 1);
  2048. put_cpu();
  2049. free_cpumask_var(cpu_mask);
  2050. return 0;
  2051. }
  2052. /*
  2053. * Move tasks from one to the other group. If @from is NULL, then all tasks
  2054. * in the systems are moved unconditionally (used for teardown).
  2055. *
  2056. * If @mask is not NULL the cpus on which moved tasks are running are set
  2057. * in that mask so the update smp function call is restricted to affected
  2058. * cpus.
  2059. */
  2060. static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to,
  2061. struct cpumask *mask)
  2062. {
  2063. struct task_struct *p, *t;
  2064. read_lock(&tasklist_lock);
  2065. for_each_process_thread(p, t) {
  2066. if (!from || is_closid_match(t, from) ||
  2067. is_rmid_match(t, from)) {
  2068. WRITE_ONCE(t->closid, to->closid);
  2069. WRITE_ONCE(t->rmid, to->mon.rmid);
  2070. /*
  2071. * Order the closid/rmid stores above before the loads
  2072. * in task_curr(). This pairs with the full barrier
  2073. * between the rq->curr update and resctrl_sched_in()
  2074. * during context switch.
  2075. */
  2076. smp_mb();
  2077. /*
  2078. * If the task is on a CPU, set the CPU in the mask.
  2079. * The detection is inaccurate as tasks might move or
  2080. * schedule before the smp function call takes place.
  2081. * In such a case the function call is pointless, but
  2082. * there is no other side effect.
  2083. */
  2084. if (IS_ENABLED(CONFIG_SMP) && mask && task_curr(t))
  2085. cpumask_set_cpu(task_cpu(t), mask);
  2086. }
  2087. }
  2088. read_unlock(&tasklist_lock);
  2089. }
  2090. static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp)
  2091. {
  2092. struct rdtgroup *sentry, *stmp;
  2093. struct list_head *head;
  2094. head = &rdtgrp->mon.crdtgrp_list;
  2095. list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) {
  2096. free_rmid(sentry->mon.rmid);
  2097. list_del(&sentry->mon.crdtgrp_list);
  2098. if (atomic_read(&sentry->waitcount) != 0)
  2099. sentry->flags = RDT_DELETED;
  2100. else
  2101. rdtgroup_remove(sentry);
  2102. }
  2103. }
  2104. /*
  2105. * Forcibly remove all of subdirectories under root.
  2106. */
  2107. static void rmdir_all_sub(void)
  2108. {
  2109. struct rdtgroup *rdtgrp, *tmp;
  2110. /* Move all tasks to the default resource group */
  2111. rdt_move_group_tasks(NULL, &rdtgroup_default, NULL);
  2112. list_for_each_entry_safe(rdtgrp, tmp, &rdt_all_groups, rdtgroup_list) {
  2113. /* Free any child rmids */
  2114. free_all_child_rdtgrp(rdtgrp);
  2115. /* Remove each rdtgroup other than root */
  2116. if (rdtgrp == &rdtgroup_default)
  2117. continue;
  2118. if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
  2119. rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
  2120. rdtgroup_pseudo_lock_remove(rdtgrp);
  2121. /*
  2122. * Give any CPUs back to the default group. We cannot copy
  2123. * cpu_online_mask because a CPU might have executed the
  2124. * offline callback already, but is still marked online.
  2125. */
  2126. cpumask_or(&rdtgroup_default.cpu_mask,
  2127. &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
  2128. free_rmid(rdtgrp->mon.rmid);
  2129. kernfs_remove(rdtgrp->kn);
  2130. list_del(&rdtgrp->rdtgroup_list);
  2131. if (atomic_read(&rdtgrp->waitcount) != 0)
  2132. rdtgrp->flags = RDT_DELETED;
  2133. else
  2134. rdtgroup_remove(rdtgrp);
  2135. }
  2136. /* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
  2137. update_closid_rmid(cpu_online_mask, &rdtgroup_default);
  2138. kernfs_remove(kn_info);
  2139. kernfs_remove(kn_mongrp);
  2140. kernfs_remove(kn_mondata);
  2141. }
  2142. static void rdt_kill_sb(struct super_block *sb)
  2143. {
  2144. struct rdt_resource *r;
  2145. cpus_read_lock();
  2146. mutex_lock(&rdtgroup_mutex);
  2147. set_mba_sc(false);
  2148. /*Put everything back to default values. */
  2149. for_each_alloc_capable_rdt_resource(r)
  2150. reset_all_ctrls(r);
  2151. cdp_disable_all();
  2152. rmdir_all_sub();
  2153. rdt_pseudo_lock_release();
  2154. rdtgroup_default.mode = RDT_MODE_SHAREABLE;
  2155. schemata_list_destroy();
  2156. static_branch_disable_cpuslocked(&rdt_alloc_enable_key);
  2157. static_branch_disable_cpuslocked(&rdt_mon_enable_key);
  2158. static_branch_disable_cpuslocked(&rdt_enable_key);
  2159. kernfs_kill_sb(sb);
  2160. mutex_unlock(&rdtgroup_mutex);
  2161. cpus_read_unlock();
  2162. }
  2163. static struct file_system_type rdt_fs_type = {
  2164. .name = "resctrl",
  2165. .init_fs_context = rdt_init_fs_context,
  2166. .parameters = rdt_fs_parameters,
  2167. .kill_sb = rdt_kill_sb,
  2168. };
  2169. static int mon_addfile(struct kernfs_node *parent_kn, const char *name,
  2170. void *priv)
  2171. {
  2172. struct kernfs_node *kn;
  2173. int ret = 0;
  2174. kn = __kernfs_create_file(parent_kn, name, 0444,
  2175. GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
  2176. &kf_mondata_ops, priv, NULL, NULL);
  2177. if (IS_ERR(kn))
  2178. return PTR_ERR(kn);
  2179. ret = rdtgroup_kn_set_ugid(kn);
  2180. if (ret) {
  2181. kernfs_remove(kn);
  2182. return ret;
  2183. }
  2184. return ret;
  2185. }
  2186. /*
  2187. * Remove all subdirectories of mon_data of ctrl_mon groups
  2188. * and monitor groups with given domain id.
  2189. */
  2190. static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
  2191. unsigned int dom_id)
  2192. {
  2193. struct rdtgroup *prgrp, *crgrp;
  2194. char name[32];
  2195. list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
  2196. sprintf(name, "mon_%s_%02d", r->name, dom_id);
  2197. kernfs_remove_by_name(prgrp->mon.mon_data_kn, name);
  2198. list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list)
  2199. kernfs_remove_by_name(crgrp->mon.mon_data_kn, name);
  2200. }
  2201. }
  2202. static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
  2203. struct rdt_domain *d,
  2204. struct rdt_resource *r, struct rdtgroup *prgrp)
  2205. {
  2206. union mon_data_bits priv;
  2207. struct kernfs_node *kn;
  2208. struct mon_evt *mevt;
  2209. struct rmid_read rr;
  2210. char name[32];
  2211. int ret;
  2212. sprintf(name, "mon_%s_%02d", r->name, d->id);
  2213. /* create the directory */
  2214. kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
  2215. if (IS_ERR(kn))
  2216. return PTR_ERR(kn);
  2217. ret = rdtgroup_kn_set_ugid(kn);
  2218. if (ret)
  2219. goto out_destroy;
  2220. if (WARN_ON(list_empty(&r->evt_list))) {
  2221. ret = -EPERM;
  2222. goto out_destroy;
  2223. }
  2224. priv.u.rid = r->rid;
  2225. priv.u.domid = d->id;
  2226. list_for_each_entry(mevt, &r->evt_list, list) {
  2227. priv.u.evtid = mevt->evtid;
  2228. ret = mon_addfile(kn, mevt->name, priv.priv);
  2229. if (ret)
  2230. goto out_destroy;
  2231. if (is_mbm_event(mevt->evtid))
  2232. mon_event_read(&rr, r, d, prgrp, mevt->evtid, true);
  2233. }
  2234. kernfs_activate(kn);
  2235. return 0;
  2236. out_destroy:
  2237. kernfs_remove(kn);
  2238. return ret;
  2239. }
  2240. /*
  2241. * Add all subdirectories of mon_data for "ctrl_mon" groups
  2242. * and "monitor" groups with given domain id.
  2243. */
  2244. static void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
  2245. struct rdt_domain *d)
  2246. {
  2247. struct kernfs_node *parent_kn;
  2248. struct rdtgroup *prgrp, *crgrp;
  2249. struct list_head *head;
  2250. list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
  2251. parent_kn = prgrp->mon.mon_data_kn;
  2252. mkdir_mondata_subdir(parent_kn, d, r, prgrp);
  2253. head = &prgrp->mon.crdtgrp_list;
  2254. list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
  2255. parent_kn = crgrp->mon.mon_data_kn;
  2256. mkdir_mondata_subdir(parent_kn, d, r, crgrp);
  2257. }
  2258. }
  2259. }
  2260. static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn,
  2261. struct rdt_resource *r,
  2262. struct rdtgroup *prgrp)
  2263. {
  2264. struct rdt_domain *dom;
  2265. int ret;
  2266. list_for_each_entry(dom, &r->domains, list) {
  2267. ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp);
  2268. if (ret)
  2269. return ret;
  2270. }
  2271. return 0;
  2272. }
  2273. /*
  2274. * This creates a directory mon_data which contains the monitored data.
  2275. *
  2276. * mon_data has one directory for each domain which are named
  2277. * in the format mon_<domain_name>_<domain_id>. For ex: A mon_data
  2278. * with L3 domain looks as below:
  2279. * ./mon_data:
  2280. * mon_L3_00
  2281. * mon_L3_01
  2282. * mon_L3_02
  2283. * ...
  2284. *
  2285. * Each domain directory has one file per event:
  2286. * ./mon_L3_00/:
  2287. * llc_occupancy
  2288. *
  2289. */
  2290. static int mkdir_mondata_all(struct kernfs_node *parent_kn,
  2291. struct rdtgroup *prgrp,
  2292. struct kernfs_node **dest_kn)
  2293. {
  2294. struct rdt_resource *r;
  2295. struct kernfs_node *kn;
  2296. int ret;
  2297. /*
  2298. * Create the mon_data directory first.
  2299. */
  2300. ret = mongroup_create_dir(parent_kn, prgrp, "mon_data", &kn);
  2301. if (ret)
  2302. return ret;
  2303. if (dest_kn)
  2304. *dest_kn = kn;
  2305. /*
  2306. * Create the subdirectories for each domain. Note that all events
  2307. * in a domain like L3 are grouped into a resource whose domain is L3
  2308. */
  2309. for_each_mon_capable_rdt_resource(r) {
  2310. ret = mkdir_mondata_subdir_alldom(kn, r, prgrp);
  2311. if (ret)
  2312. goto out_destroy;
  2313. }
  2314. return 0;
  2315. out_destroy:
  2316. kernfs_remove(kn);
  2317. return ret;
  2318. }
  2319. /**
  2320. * cbm_ensure_valid - Enforce validity on provided CBM
  2321. * @_val: Candidate CBM
  2322. * @r: RDT resource to which the CBM belongs
  2323. *
  2324. * The provided CBM represents all cache portions available for use. This
  2325. * may be represented by a bitmap that does not consist of contiguous ones
  2326. * and thus be an invalid CBM.
  2327. * Here the provided CBM is forced to be a valid CBM by only considering
  2328. * the first set of contiguous bits as valid and clearing all bits.
  2329. * The intention here is to provide a valid default CBM with which a new
  2330. * resource group is initialized. The user can follow this with a
  2331. * modification to the CBM if the default does not satisfy the
  2332. * requirements.
  2333. */
  2334. static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r)
  2335. {
  2336. unsigned int cbm_len = r->cache.cbm_len;
  2337. unsigned long first_bit, zero_bit;
  2338. unsigned long val = _val;
  2339. if (!val)
  2340. return 0;
  2341. first_bit = find_first_bit(&val, cbm_len);
  2342. zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
  2343. /* Clear any remaining bits to ensure contiguous region */
  2344. bitmap_clear(&val, zero_bit, cbm_len - zero_bit);
  2345. return (u32)val;
  2346. }
  2347. /*
  2348. * Initialize cache resources per RDT domain
  2349. *
  2350. * Set the RDT domain up to start off with all usable allocations. That is,
  2351. * all shareable and unused bits. All-zero CBM is invalid.
  2352. */
  2353. static int __init_one_rdt_domain(struct rdt_domain *d, struct resctrl_schema *s,
  2354. u32 closid)
  2355. {
  2356. enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type);
  2357. enum resctrl_conf_type t = s->conf_type;
  2358. struct resctrl_staged_config *cfg;
  2359. struct rdt_resource *r = s->res;
  2360. u32 used_b = 0, unused_b = 0;
  2361. unsigned long tmp_cbm;
  2362. enum rdtgrp_mode mode;
  2363. u32 peer_ctl, ctrl_val;
  2364. int i;
  2365. cfg = &d->staged_config[t];
  2366. cfg->have_new_ctrl = false;
  2367. cfg->new_ctrl = r->cache.shareable_bits;
  2368. used_b = r->cache.shareable_bits;
  2369. for (i = 0; i < closids_supported(); i++) {
  2370. if (closid_allocated(i) && i != closid) {
  2371. mode = rdtgroup_mode_by_closid(i);
  2372. if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
  2373. /*
  2374. * ctrl values for locksetup aren't relevant
  2375. * until the schemata is written, and the mode
  2376. * becomes RDT_MODE_PSEUDO_LOCKED.
  2377. */
  2378. continue;
  2379. /*
  2380. * If CDP is active include peer domain's
  2381. * usage to ensure there is no overlap
  2382. * with an exclusive group.
  2383. */
  2384. if (resctrl_arch_get_cdp_enabled(r->rid))
  2385. peer_ctl = resctrl_arch_get_config(r, d, i,
  2386. peer_type);
  2387. else
  2388. peer_ctl = 0;
  2389. ctrl_val = resctrl_arch_get_config(r, d, i,
  2390. s->conf_type);
  2391. used_b |= ctrl_val | peer_ctl;
  2392. if (mode == RDT_MODE_SHAREABLE)
  2393. cfg->new_ctrl |= ctrl_val | peer_ctl;
  2394. }
  2395. }
  2396. if (d->plr && d->plr->cbm > 0)
  2397. used_b |= d->plr->cbm;
  2398. unused_b = used_b ^ (BIT_MASK(r->cache.cbm_len) - 1);
  2399. unused_b &= BIT_MASK(r->cache.cbm_len) - 1;
  2400. cfg->new_ctrl |= unused_b;
  2401. /*
  2402. * Force the initial CBM to be valid, user can
  2403. * modify the CBM based on system availability.
  2404. */
  2405. cfg->new_ctrl = cbm_ensure_valid(cfg->new_ctrl, r);
  2406. /*
  2407. * Assign the u32 CBM to an unsigned long to ensure that
  2408. * bitmap_weight() does not access out-of-bound memory.
  2409. */
  2410. tmp_cbm = cfg->new_ctrl;
  2411. if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < r->cache.min_cbm_bits) {
  2412. rdt_last_cmd_printf("No space on %s:%d\n", s->name, d->id);
  2413. return -ENOSPC;
  2414. }
  2415. cfg->have_new_ctrl = true;
  2416. return 0;
  2417. }
  2418. /*
  2419. * Initialize cache resources with default values.
  2420. *
  2421. * A new RDT group is being created on an allocation capable (CAT)
  2422. * supporting system. Set this group up to start off with all usable
  2423. * allocations.
  2424. *
  2425. * If there are no more shareable bits available on any domain then
  2426. * the entire allocation will fail.
  2427. */
  2428. static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid)
  2429. {
  2430. struct rdt_domain *d;
  2431. int ret;
  2432. list_for_each_entry(d, &s->res->domains, list) {
  2433. ret = __init_one_rdt_domain(d, s, closid);
  2434. if (ret < 0)
  2435. return ret;
  2436. }
  2437. return 0;
  2438. }
  2439. /* Initialize MBA resource with default values. */
  2440. static void rdtgroup_init_mba(struct rdt_resource *r, u32 closid)
  2441. {
  2442. struct resctrl_staged_config *cfg;
  2443. struct rdt_domain *d;
  2444. list_for_each_entry(d, &r->domains, list) {
  2445. if (is_mba_sc(r)) {
  2446. d->mbps_val[closid] = MBA_MAX_MBPS;
  2447. continue;
  2448. }
  2449. cfg = &d->staged_config[CDP_NONE];
  2450. cfg->new_ctrl = r->default_ctrl;
  2451. cfg->have_new_ctrl = true;
  2452. }
  2453. }
  2454. /* Initialize the RDT group's allocations. */
  2455. static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
  2456. {
  2457. struct resctrl_schema *s;
  2458. struct rdt_resource *r;
  2459. int ret = 0;
  2460. rdt_staged_configs_clear();
  2461. list_for_each_entry(s, &resctrl_schema_all, list) {
  2462. r = s->res;
  2463. if (r->rid == RDT_RESOURCE_MBA) {
  2464. rdtgroup_init_mba(r, rdtgrp->closid);
  2465. if (is_mba_sc(r))
  2466. continue;
  2467. } else {
  2468. ret = rdtgroup_init_cat(s, rdtgrp->closid);
  2469. if (ret < 0)
  2470. goto out;
  2471. }
  2472. ret = resctrl_arch_update_domains(r, rdtgrp->closid);
  2473. if (ret < 0) {
  2474. rdt_last_cmd_puts("Failed to initialize allocations\n");
  2475. goto out;
  2476. }
  2477. }
  2478. rdtgrp->mode = RDT_MODE_SHAREABLE;
  2479. out:
  2480. rdt_staged_configs_clear();
  2481. return ret;
  2482. }
  2483. static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
  2484. const char *name, umode_t mode,
  2485. enum rdt_group_type rtype, struct rdtgroup **r)
  2486. {
  2487. struct rdtgroup *prdtgrp, *rdtgrp;
  2488. struct kernfs_node *kn;
  2489. uint files = 0;
  2490. int ret;
  2491. prdtgrp = rdtgroup_kn_lock_live(parent_kn);
  2492. if (!prdtgrp) {
  2493. ret = -ENODEV;
  2494. goto out_unlock;
  2495. }
  2496. if (rtype == RDTMON_GROUP &&
  2497. (prdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
  2498. prdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)) {
  2499. ret = -EINVAL;
  2500. rdt_last_cmd_puts("Pseudo-locking in progress\n");
  2501. goto out_unlock;
  2502. }
  2503. /* allocate the rdtgroup. */
  2504. rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL);
  2505. if (!rdtgrp) {
  2506. ret = -ENOSPC;
  2507. rdt_last_cmd_puts("Kernel out of memory\n");
  2508. goto out_unlock;
  2509. }
  2510. *r = rdtgrp;
  2511. rdtgrp->mon.parent = prdtgrp;
  2512. rdtgrp->type = rtype;
  2513. INIT_LIST_HEAD(&rdtgrp->mon.crdtgrp_list);
  2514. /* kernfs creates the directory for rdtgrp */
  2515. kn = kernfs_create_dir(parent_kn, name, mode, rdtgrp);
  2516. if (IS_ERR(kn)) {
  2517. ret = PTR_ERR(kn);
  2518. rdt_last_cmd_puts("kernfs create error\n");
  2519. goto out_free_rgrp;
  2520. }
  2521. rdtgrp->kn = kn;
  2522. /*
  2523. * kernfs_remove() will drop the reference count on "kn" which
  2524. * will free it. But we still need it to stick around for the
  2525. * rdtgroup_kn_unlock(kn) call. Take one extra reference here,
  2526. * which will be dropped by kernfs_put() in rdtgroup_remove().
  2527. */
  2528. kernfs_get(kn);
  2529. ret = rdtgroup_kn_set_ugid(kn);
  2530. if (ret) {
  2531. rdt_last_cmd_puts("kernfs perm error\n");
  2532. goto out_destroy;
  2533. }
  2534. files = RFTYPE_BASE | BIT(RF_CTRLSHIFT + rtype);
  2535. ret = rdtgroup_add_files(kn, files);
  2536. if (ret) {
  2537. rdt_last_cmd_puts("kernfs fill error\n");
  2538. goto out_destroy;
  2539. }
  2540. if (rdt_mon_capable) {
  2541. ret = alloc_rmid();
  2542. if (ret < 0) {
  2543. rdt_last_cmd_puts("Out of RMIDs\n");
  2544. goto out_destroy;
  2545. }
  2546. rdtgrp->mon.rmid = ret;
  2547. ret = mkdir_mondata_all(kn, rdtgrp, &rdtgrp->mon.mon_data_kn);
  2548. if (ret) {
  2549. rdt_last_cmd_puts("kernfs subdir error\n");
  2550. goto out_idfree;
  2551. }
  2552. }
  2553. kernfs_activate(kn);
  2554. /*
  2555. * The caller unlocks the parent_kn upon success.
  2556. */
  2557. return 0;
  2558. out_idfree:
  2559. free_rmid(rdtgrp->mon.rmid);
  2560. out_destroy:
  2561. kernfs_put(rdtgrp->kn);
  2562. kernfs_remove(rdtgrp->kn);
  2563. out_free_rgrp:
  2564. kfree(rdtgrp);
  2565. out_unlock:
  2566. rdtgroup_kn_unlock(parent_kn);
  2567. return ret;
  2568. }
  2569. static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp)
  2570. {
  2571. kernfs_remove(rgrp->kn);
  2572. free_rmid(rgrp->mon.rmid);
  2573. rdtgroup_remove(rgrp);
  2574. }
  2575. /*
  2576. * Create a monitor group under "mon_groups" directory of a control
  2577. * and monitor group(ctrl_mon). This is a resource group
  2578. * to monitor a subset of tasks and cpus in its parent ctrl_mon group.
  2579. */
  2580. static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn,
  2581. const char *name, umode_t mode)
  2582. {
  2583. struct rdtgroup *rdtgrp, *prgrp;
  2584. int ret;
  2585. ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTMON_GROUP, &rdtgrp);
  2586. if (ret)
  2587. return ret;
  2588. prgrp = rdtgrp->mon.parent;
  2589. rdtgrp->closid = prgrp->closid;
  2590. /*
  2591. * Add the rdtgrp to the list of rdtgrps the parent
  2592. * ctrl_mon group has to track.
  2593. */
  2594. list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list);
  2595. rdtgroup_kn_unlock(parent_kn);
  2596. return ret;
  2597. }
  2598. /*
  2599. * These are rdtgroups created under the root directory. Can be used
  2600. * to allocate and monitor resources.
  2601. */
  2602. static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
  2603. const char *name, umode_t mode)
  2604. {
  2605. struct rdtgroup *rdtgrp;
  2606. struct kernfs_node *kn;
  2607. u32 closid;
  2608. int ret;
  2609. ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTCTRL_GROUP, &rdtgrp);
  2610. if (ret)
  2611. return ret;
  2612. kn = rdtgrp->kn;
  2613. ret = closid_alloc();
  2614. if (ret < 0) {
  2615. rdt_last_cmd_puts("Out of CLOSIDs\n");
  2616. goto out_common_fail;
  2617. }
  2618. closid = ret;
  2619. ret = 0;
  2620. rdtgrp->closid = closid;
  2621. ret = rdtgroup_init_alloc(rdtgrp);
  2622. if (ret < 0)
  2623. goto out_id_free;
  2624. list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups);
  2625. if (rdt_mon_capable) {
  2626. /*
  2627. * Create an empty mon_groups directory to hold the subset
  2628. * of tasks and cpus to monitor.
  2629. */
  2630. ret = mongroup_create_dir(kn, rdtgrp, "mon_groups", NULL);
  2631. if (ret) {
  2632. rdt_last_cmd_puts("kernfs subdir error\n");
  2633. goto out_del_list;
  2634. }
  2635. }
  2636. goto out_unlock;
  2637. out_del_list:
  2638. list_del(&rdtgrp->rdtgroup_list);
  2639. out_id_free:
  2640. closid_free(closid);
  2641. out_common_fail:
  2642. mkdir_rdt_prepare_clean(rdtgrp);
  2643. out_unlock:
  2644. rdtgroup_kn_unlock(parent_kn);
  2645. return ret;
  2646. }
  2647. /*
  2648. * We allow creating mon groups only with in a directory called "mon_groups"
  2649. * which is present in every ctrl_mon group. Check if this is a valid
  2650. * "mon_groups" directory.
  2651. *
  2652. * 1. The directory should be named "mon_groups".
  2653. * 2. The mon group itself should "not" be named "mon_groups".
  2654. * This makes sure "mon_groups" directory always has a ctrl_mon group
  2655. * as parent.
  2656. */
  2657. static bool is_mon_groups(struct kernfs_node *kn, const char *name)
  2658. {
  2659. return (!strcmp(kn->name, "mon_groups") &&
  2660. strcmp(name, "mon_groups"));
  2661. }
  2662. static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
  2663. umode_t mode)
  2664. {
  2665. /* Do not accept '\n' to avoid unparsable situation. */
  2666. if (strchr(name, '\n'))
  2667. return -EINVAL;
  2668. /*
  2669. * If the parent directory is the root directory and RDT
  2670. * allocation is supported, add a control and monitoring
  2671. * subdirectory
  2672. */
  2673. if (rdt_alloc_capable && parent_kn == rdtgroup_default.kn)
  2674. return rdtgroup_mkdir_ctrl_mon(parent_kn, name, mode);
  2675. /*
  2676. * If RDT monitoring is supported and the parent directory is a valid
  2677. * "mon_groups" directory, add a monitoring subdirectory.
  2678. */
  2679. if (rdt_mon_capable && is_mon_groups(parent_kn, name))
  2680. return rdtgroup_mkdir_mon(parent_kn, name, mode);
  2681. return -EPERM;
  2682. }
  2683. static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
  2684. {
  2685. struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
  2686. int cpu;
  2687. /* Give any tasks back to the parent group */
  2688. rdt_move_group_tasks(rdtgrp, prdtgrp, tmpmask);
  2689. /* Update per cpu rmid of the moved CPUs first */
  2690. for_each_cpu(cpu, &rdtgrp->cpu_mask)
  2691. per_cpu(pqr_state.default_rmid, cpu) = prdtgrp->mon.rmid;
  2692. /*
  2693. * Update the MSR on moved CPUs and CPUs which have moved
  2694. * task running on them.
  2695. */
  2696. cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
  2697. update_closid_rmid(tmpmask, NULL);
  2698. rdtgrp->flags = RDT_DELETED;
  2699. free_rmid(rdtgrp->mon.rmid);
  2700. /*
  2701. * Remove the rdtgrp from the parent ctrl_mon group's list
  2702. */
  2703. WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
  2704. list_del(&rdtgrp->mon.crdtgrp_list);
  2705. kernfs_remove(rdtgrp->kn);
  2706. return 0;
  2707. }
  2708. static int rdtgroup_ctrl_remove(struct rdtgroup *rdtgrp)
  2709. {
  2710. rdtgrp->flags = RDT_DELETED;
  2711. list_del(&rdtgrp->rdtgroup_list);
  2712. kernfs_remove(rdtgrp->kn);
  2713. return 0;
  2714. }
  2715. static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
  2716. {
  2717. int cpu;
  2718. /* Give any tasks back to the default group */
  2719. rdt_move_group_tasks(rdtgrp, &rdtgroup_default, tmpmask);
  2720. /* Give any CPUs back to the default group */
  2721. cpumask_or(&rdtgroup_default.cpu_mask,
  2722. &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
  2723. /* Update per cpu closid and rmid of the moved CPUs first */
  2724. for_each_cpu(cpu, &rdtgrp->cpu_mask) {
  2725. per_cpu(pqr_state.default_closid, cpu) = rdtgroup_default.closid;
  2726. per_cpu(pqr_state.default_rmid, cpu) = rdtgroup_default.mon.rmid;
  2727. }
  2728. /*
  2729. * Update the MSR on moved CPUs and CPUs which have moved
  2730. * task running on them.
  2731. */
  2732. cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
  2733. update_closid_rmid(tmpmask, NULL);
  2734. closid_free(rdtgrp->closid);
  2735. free_rmid(rdtgrp->mon.rmid);
  2736. rdtgroup_ctrl_remove(rdtgrp);
  2737. /*
  2738. * Free all the child monitor group rmids.
  2739. */
  2740. free_all_child_rdtgrp(rdtgrp);
  2741. return 0;
  2742. }
  2743. static int rdtgroup_rmdir(struct kernfs_node *kn)
  2744. {
  2745. struct kernfs_node *parent_kn = kn->parent;
  2746. struct rdtgroup *rdtgrp;
  2747. cpumask_var_t tmpmask;
  2748. int ret = 0;
  2749. if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
  2750. return -ENOMEM;
  2751. rdtgrp = rdtgroup_kn_lock_live(kn);
  2752. if (!rdtgrp) {
  2753. ret = -EPERM;
  2754. goto out;
  2755. }
  2756. /*
  2757. * If the rdtgroup is a ctrl_mon group and parent directory
  2758. * is the root directory, remove the ctrl_mon group.
  2759. *
  2760. * If the rdtgroup is a mon group and parent directory
  2761. * is a valid "mon_groups" directory, remove the mon group.
  2762. */
  2763. if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn &&
  2764. rdtgrp != &rdtgroup_default) {
  2765. if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
  2766. rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
  2767. ret = rdtgroup_ctrl_remove(rdtgrp);
  2768. } else {
  2769. ret = rdtgroup_rmdir_ctrl(rdtgrp, tmpmask);
  2770. }
  2771. } else if (rdtgrp->type == RDTMON_GROUP &&
  2772. is_mon_groups(parent_kn, kn->name)) {
  2773. ret = rdtgroup_rmdir_mon(rdtgrp, tmpmask);
  2774. } else {
  2775. ret = -EPERM;
  2776. }
  2777. out:
  2778. rdtgroup_kn_unlock(kn);
  2779. free_cpumask_var(tmpmask);
  2780. return ret;
  2781. }
  2782. static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
  2783. {
  2784. if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3))
  2785. seq_puts(seq, ",cdp");
  2786. if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2))
  2787. seq_puts(seq, ",cdpl2");
  2788. if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl))
  2789. seq_puts(seq, ",mba_MBps");
  2790. return 0;
  2791. }
  2792. static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = {
  2793. .mkdir = rdtgroup_mkdir,
  2794. .rmdir = rdtgroup_rmdir,
  2795. .show_options = rdtgroup_show_options,
  2796. };
  2797. static int __init rdtgroup_setup_root(void)
  2798. {
  2799. int ret;
  2800. rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops,
  2801. KERNFS_ROOT_CREATE_DEACTIVATED |
  2802. KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK,
  2803. &rdtgroup_default);
  2804. if (IS_ERR(rdt_root))
  2805. return PTR_ERR(rdt_root);
  2806. mutex_lock(&rdtgroup_mutex);
  2807. rdtgroup_default.closid = 0;
  2808. rdtgroup_default.mon.rmid = 0;
  2809. rdtgroup_default.type = RDTCTRL_GROUP;
  2810. INIT_LIST_HEAD(&rdtgroup_default.mon.crdtgrp_list);
  2811. list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups);
  2812. ret = rdtgroup_add_files(kernfs_root_to_node(rdt_root), RF_CTRL_BASE);
  2813. if (ret) {
  2814. kernfs_destroy_root(rdt_root);
  2815. goto out;
  2816. }
  2817. rdtgroup_default.kn = kernfs_root_to_node(rdt_root);
  2818. kernfs_activate(rdtgroup_default.kn);
  2819. out:
  2820. mutex_unlock(&rdtgroup_mutex);
  2821. return ret;
  2822. }
  2823. static void domain_destroy_mon_state(struct rdt_domain *d)
  2824. {
  2825. bitmap_free(d->rmid_busy_llc);
  2826. kfree(d->mbm_total);
  2827. kfree(d->mbm_local);
  2828. }
  2829. void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d)
  2830. {
  2831. lockdep_assert_held(&rdtgroup_mutex);
  2832. if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA)
  2833. mba_sc_domain_destroy(r, d);
  2834. if (!r->mon_capable)
  2835. return;
  2836. /*
  2837. * If resctrl is mounted, remove all the
  2838. * per domain monitor data directories.
  2839. */
  2840. if (static_branch_unlikely(&rdt_mon_enable_key))
  2841. rmdir_mondata_subdir_allrdtgrp(r, d->id);
  2842. if (is_mbm_enabled())
  2843. cancel_delayed_work(&d->mbm_over);
  2844. if (is_llc_occupancy_enabled() && has_busy_rmid(r, d)) {
  2845. /*
  2846. * When a package is going down, forcefully
  2847. * decrement rmid->ebusy. There is no way to know
  2848. * that the L3 was flushed and hence may lead to
  2849. * incorrect counts in rare scenarios, but leaving
  2850. * the RMID as busy creates RMID leaks if the
  2851. * package never comes back.
  2852. */
  2853. __check_limbo(d, true);
  2854. cancel_delayed_work(&d->cqm_limbo);
  2855. }
  2856. domain_destroy_mon_state(d);
  2857. }
  2858. static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
  2859. {
  2860. size_t tsize;
  2861. if (is_llc_occupancy_enabled()) {
  2862. d->rmid_busy_llc = bitmap_zalloc(r->num_rmid, GFP_KERNEL);
  2863. if (!d->rmid_busy_llc)
  2864. return -ENOMEM;
  2865. }
  2866. if (is_mbm_total_enabled()) {
  2867. tsize = sizeof(*d->mbm_total);
  2868. d->mbm_total = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
  2869. if (!d->mbm_total) {
  2870. bitmap_free(d->rmid_busy_llc);
  2871. return -ENOMEM;
  2872. }
  2873. }
  2874. if (is_mbm_local_enabled()) {
  2875. tsize = sizeof(*d->mbm_local);
  2876. d->mbm_local = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
  2877. if (!d->mbm_local) {
  2878. bitmap_free(d->rmid_busy_llc);
  2879. kfree(d->mbm_total);
  2880. return -ENOMEM;
  2881. }
  2882. }
  2883. return 0;
  2884. }
  2885. int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d)
  2886. {
  2887. int err;
  2888. lockdep_assert_held(&rdtgroup_mutex);
  2889. if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA)
  2890. /* RDT_RESOURCE_MBA is never mon_capable */
  2891. return mba_sc_domain_allocate(r, d);
  2892. if (!r->mon_capable)
  2893. return 0;
  2894. err = domain_setup_mon_state(r, d);
  2895. if (err)
  2896. return err;
  2897. if (is_mbm_enabled()) {
  2898. INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow);
  2899. mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL);
  2900. }
  2901. if (is_llc_occupancy_enabled())
  2902. INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo);
  2903. /* If resctrl is mounted, add per domain monitor data directories. */
  2904. if (static_branch_unlikely(&rdt_mon_enable_key))
  2905. mkdir_mondata_subdir_allrdtgrp(r, d);
  2906. return 0;
  2907. }
  2908. /*
  2909. * rdtgroup_init - rdtgroup initialization
  2910. *
  2911. * Setup resctrl file system including set up root, create mount point,
  2912. * register rdtgroup filesystem, and initialize files under root directory.
  2913. *
  2914. * Return: 0 on success or -errno
  2915. */
  2916. int __init rdtgroup_init(void)
  2917. {
  2918. int ret = 0;
  2919. seq_buf_init(&last_cmd_status, last_cmd_status_buf,
  2920. sizeof(last_cmd_status_buf));
  2921. ret = rdtgroup_setup_root();
  2922. if (ret)
  2923. return ret;
  2924. ret = sysfs_create_mount_point(fs_kobj, "resctrl");
  2925. if (ret)
  2926. goto cleanup_root;
  2927. ret = register_filesystem(&rdt_fs_type);
  2928. if (ret)
  2929. goto cleanup_mountpoint;
  2930. /*
  2931. * Adding the resctrl debugfs directory here may not be ideal since
  2932. * it would let the resctrl debugfs directory appear on the debugfs
  2933. * filesystem before the resctrl filesystem is mounted.
  2934. * It may also be ok since that would enable debugging of RDT before
  2935. * resctrl is mounted.
  2936. * The reason why the debugfs directory is created here and not in
  2937. * rdt_get_tree() is because rdt_get_tree() takes rdtgroup_mutex and
  2938. * during the debugfs directory creation also &sb->s_type->i_mutex_key
  2939. * (the lockdep class of inode->i_rwsem). Other filesystem
  2940. * interactions (eg. SyS_getdents) have the lock ordering:
  2941. * &sb->s_type->i_mutex_key --> &mm->mmap_lock
  2942. * During mmap(), called with &mm->mmap_lock, the rdtgroup_mutex
  2943. * is taken, thus creating dependency:
  2944. * &mm->mmap_lock --> rdtgroup_mutex for the latter that can cause
  2945. * issues considering the other two lock dependencies.
  2946. * By creating the debugfs directory here we avoid a dependency
  2947. * that may cause deadlock (even though file operations cannot
  2948. * occur until the filesystem is mounted, but I do not know how to
  2949. * tell lockdep that).
  2950. */
  2951. debugfs_resctrl = debugfs_create_dir("resctrl", NULL);
  2952. return 0;
  2953. cleanup_mountpoint:
  2954. sysfs_remove_mount_point(fs_kobj, "resctrl");
  2955. cleanup_root:
  2956. kernfs_destroy_root(rdt_root);
  2957. return ret;
  2958. }
  2959. void __exit rdtgroup_exit(void)
  2960. {
  2961. debugfs_remove_recursive(debugfs_resctrl);
  2962. unregister_filesystem(&rdt_fs_type);
  2963. sysfs_remove_mount_point(fs_kobj, "resctrl");
  2964. kernfs_destroy_root(rdt_root);
  2965. }