drbd_state.c 73 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. drbd_state.c
  4. This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
  5. Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
  6. Copyright (C) 1999-2008, Philipp Reisner <[email protected]>.
  7. Copyright (C) 2002-2008, Lars Ellenberg <[email protected]>.
  8. Thanks to Carter Burden, Bart Grantham and Gennadiy Nerubayev
  9. from Logicworks, Inc. for making SDP replication support possible.
  10. */
  11. #include <linux/drbd_limits.h>
  12. #include "drbd_int.h"
  13. #include "drbd_protocol.h"
  14. #include "drbd_req.h"
  15. #include "drbd_state_change.h"
  16. struct after_state_chg_work {
  17. struct drbd_work w;
  18. struct drbd_device *device;
  19. union drbd_state os;
  20. union drbd_state ns;
  21. enum chg_state_flags flags;
  22. struct completion *done;
  23. struct drbd_state_change *state_change;
  24. };
  25. enum sanitize_state_warnings {
  26. NO_WARNING,
  27. ABORTED_ONLINE_VERIFY,
  28. ABORTED_RESYNC,
  29. CONNECTION_LOST_NEGOTIATING,
  30. IMPLICITLY_UPGRADED_DISK,
  31. IMPLICITLY_UPGRADED_PDSK,
  32. };
  33. static void count_objects(struct drbd_resource *resource,
  34. unsigned int *n_devices,
  35. unsigned int *n_connections)
  36. {
  37. struct drbd_device *device;
  38. struct drbd_connection *connection;
  39. int vnr;
  40. *n_devices = 0;
  41. *n_connections = 0;
  42. idr_for_each_entry(&resource->devices, device, vnr)
  43. (*n_devices)++;
  44. for_each_connection(connection, resource)
  45. (*n_connections)++;
  46. }
  47. static struct drbd_state_change *alloc_state_change(unsigned int n_devices, unsigned int n_connections, gfp_t gfp)
  48. {
  49. struct drbd_state_change *state_change;
  50. unsigned int size, n;
  51. size = sizeof(struct drbd_state_change) +
  52. n_devices * sizeof(struct drbd_device_state_change) +
  53. n_connections * sizeof(struct drbd_connection_state_change) +
  54. n_devices * n_connections * sizeof(struct drbd_peer_device_state_change);
  55. state_change = kmalloc(size, gfp);
  56. if (!state_change)
  57. return NULL;
  58. state_change->n_devices = n_devices;
  59. state_change->n_connections = n_connections;
  60. state_change->devices = (void *)(state_change + 1);
  61. state_change->connections = (void *)&state_change->devices[n_devices];
  62. state_change->peer_devices = (void *)&state_change->connections[n_connections];
  63. state_change->resource->resource = NULL;
  64. for (n = 0; n < n_devices; n++)
  65. state_change->devices[n].device = NULL;
  66. for (n = 0; n < n_connections; n++)
  67. state_change->connections[n].connection = NULL;
  68. return state_change;
  69. }
  70. struct drbd_state_change *remember_old_state(struct drbd_resource *resource, gfp_t gfp)
  71. {
  72. struct drbd_state_change *state_change;
  73. struct drbd_device *device;
  74. unsigned int n_devices;
  75. struct drbd_connection *connection;
  76. unsigned int n_connections;
  77. int vnr;
  78. struct drbd_device_state_change *device_state_change;
  79. struct drbd_peer_device_state_change *peer_device_state_change;
  80. struct drbd_connection_state_change *connection_state_change;
  81. /* Caller holds req_lock spinlock.
  82. * No state, no device IDR, no connections lists can change. */
  83. count_objects(resource, &n_devices, &n_connections);
  84. state_change = alloc_state_change(n_devices, n_connections, gfp);
  85. if (!state_change)
  86. return NULL;
  87. kref_get(&resource->kref);
  88. state_change->resource->resource = resource;
  89. state_change->resource->role[OLD] =
  90. conn_highest_role(first_connection(resource));
  91. state_change->resource->susp[OLD] = resource->susp;
  92. state_change->resource->susp_nod[OLD] = resource->susp_nod;
  93. state_change->resource->susp_fen[OLD] = resource->susp_fen;
  94. connection_state_change = state_change->connections;
  95. for_each_connection(connection, resource) {
  96. kref_get(&connection->kref);
  97. connection_state_change->connection = connection;
  98. connection_state_change->cstate[OLD] =
  99. connection->cstate;
  100. connection_state_change->peer_role[OLD] =
  101. conn_highest_peer(connection);
  102. connection_state_change++;
  103. }
  104. device_state_change = state_change->devices;
  105. peer_device_state_change = state_change->peer_devices;
  106. idr_for_each_entry(&resource->devices, device, vnr) {
  107. kref_get(&device->kref);
  108. device_state_change->device = device;
  109. device_state_change->disk_state[OLD] = device->state.disk;
  110. /* The peer_devices for each device have to be enumerated in
  111. the order of the connections. We may not use for_each_peer_device() here. */
  112. for_each_connection(connection, resource) {
  113. struct drbd_peer_device *peer_device;
  114. peer_device = conn_peer_device(connection, device->vnr);
  115. peer_device_state_change->peer_device = peer_device;
  116. peer_device_state_change->disk_state[OLD] =
  117. device->state.pdsk;
  118. peer_device_state_change->repl_state[OLD] =
  119. max_t(enum drbd_conns,
  120. C_WF_REPORT_PARAMS, device->state.conn);
  121. peer_device_state_change->resync_susp_user[OLD] =
  122. device->state.user_isp;
  123. peer_device_state_change->resync_susp_peer[OLD] =
  124. device->state.peer_isp;
  125. peer_device_state_change->resync_susp_dependency[OLD] =
  126. device->state.aftr_isp;
  127. peer_device_state_change++;
  128. }
  129. device_state_change++;
  130. }
  131. return state_change;
  132. }
  133. static void remember_new_state(struct drbd_state_change *state_change)
  134. {
  135. struct drbd_resource_state_change *resource_state_change;
  136. struct drbd_resource *resource;
  137. unsigned int n;
  138. if (!state_change)
  139. return;
  140. resource_state_change = &state_change->resource[0];
  141. resource = resource_state_change->resource;
  142. resource_state_change->role[NEW] =
  143. conn_highest_role(first_connection(resource));
  144. resource_state_change->susp[NEW] = resource->susp;
  145. resource_state_change->susp_nod[NEW] = resource->susp_nod;
  146. resource_state_change->susp_fen[NEW] = resource->susp_fen;
  147. for (n = 0; n < state_change->n_devices; n++) {
  148. struct drbd_device_state_change *device_state_change =
  149. &state_change->devices[n];
  150. struct drbd_device *device = device_state_change->device;
  151. device_state_change->disk_state[NEW] = device->state.disk;
  152. }
  153. for (n = 0; n < state_change->n_connections; n++) {
  154. struct drbd_connection_state_change *connection_state_change =
  155. &state_change->connections[n];
  156. struct drbd_connection *connection =
  157. connection_state_change->connection;
  158. connection_state_change->cstate[NEW] = connection->cstate;
  159. connection_state_change->peer_role[NEW] =
  160. conn_highest_peer(connection);
  161. }
  162. for (n = 0; n < state_change->n_devices * state_change->n_connections; n++) {
  163. struct drbd_peer_device_state_change *peer_device_state_change =
  164. &state_change->peer_devices[n];
  165. struct drbd_device *device =
  166. peer_device_state_change->peer_device->device;
  167. union drbd_dev_state state = device->state;
  168. peer_device_state_change->disk_state[NEW] = state.pdsk;
  169. peer_device_state_change->repl_state[NEW] =
  170. max_t(enum drbd_conns, C_WF_REPORT_PARAMS, state.conn);
  171. peer_device_state_change->resync_susp_user[NEW] =
  172. state.user_isp;
  173. peer_device_state_change->resync_susp_peer[NEW] =
  174. state.peer_isp;
  175. peer_device_state_change->resync_susp_dependency[NEW] =
  176. state.aftr_isp;
  177. }
  178. }
  179. void copy_old_to_new_state_change(struct drbd_state_change *state_change)
  180. {
  181. struct drbd_resource_state_change *resource_state_change = &state_change->resource[0];
  182. unsigned int n_device, n_connection, n_peer_device, n_peer_devices;
  183. #define OLD_TO_NEW(x) \
  184. (x[NEW] = x[OLD])
  185. OLD_TO_NEW(resource_state_change->role);
  186. OLD_TO_NEW(resource_state_change->susp);
  187. OLD_TO_NEW(resource_state_change->susp_nod);
  188. OLD_TO_NEW(resource_state_change->susp_fen);
  189. for (n_connection = 0; n_connection < state_change->n_connections; n_connection++) {
  190. struct drbd_connection_state_change *connection_state_change =
  191. &state_change->connections[n_connection];
  192. OLD_TO_NEW(connection_state_change->peer_role);
  193. OLD_TO_NEW(connection_state_change->cstate);
  194. }
  195. for (n_device = 0; n_device < state_change->n_devices; n_device++) {
  196. struct drbd_device_state_change *device_state_change =
  197. &state_change->devices[n_device];
  198. OLD_TO_NEW(device_state_change->disk_state);
  199. }
  200. n_peer_devices = state_change->n_devices * state_change->n_connections;
  201. for (n_peer_device = 0; n_peer_device < n_peer_devices; n_peer_device++) {
  202. struct drbd_peer_device_state_change *p =
  203. &state_change->peer_devices[n_peer_device];
  204. OLD_TO_NEW(p->disk_state);
  205. OLD_TO_NEW(p->repl_state);
  206. OLD_TO_NEW(p->resync_susp_user);
  207. OLD_TO_NEW(p->resync_susp_peer);
  208. OLD_TO_NEW(p->resync_susp_dependency);
  209. }
  210. #undef OLD_TO_NEW
  211. }
  212. void forget_state_change(struct drbd_state_change *state_change)
  213. {
  214. unsigned int n;
  215. if (!state_change)
  216. return;
  217. if (state_change->resource->resource)
  218. kref_put(&state_change->resource->resource->kref, drbd_destroy_resource);
  219. for (n = 0; n < state_change->n_devices; n++) {
  220. struct drbd_device *device = state_change->devices[n].device;
  221. if (device)
  222. kref_put(&device->kref, drbd_destroy_device);
  223. }
  224. for (n = 0; n < state_change->n_connections; n++) {
  225. struct drbd_connection *connection =
  226. state_change->connections[n].connection;
  227. if (connection)
  228. kref_put(&connection->kref, drbd_destroy_connection);
  229. }
  230. kfree(state_change);
  231. }
  232. static int w_after_state_ch(struct drbd_work *w, int unused);
  233. static void after_state_ch(struct drbd_device *device, union drbd_state os,
  234. union drbd_state ns, enum chg_state_flags flags,
  235. struct drbd_state_change *);
  236. static enum drbd_state_rv is_valid_state(struct drbd_device *, union drbd_state);
  237. static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state, struct drbd_connection *);
  238. static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns);
  239. static union drbd_state sanitize_state(struct drbd_device *device, union drbd_state os,
  240. union drbd_state ns, enum sanitize_state_warnings *warn);
  241. static inline bool is_susp(union drbd_state s)
  242. {
  243. return s.susp || s.susp_nod || s.susp_fen;
  244. }
  245. bool conn_all_vols_unconf(struct drbd_connection *connection)
  246. {
  247. struct drbd_peer_device *peer_device;
  248. bool rv = true;
  249. int vnr;
  250. rcu_read_lock();
  251. idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
  252. struct drbd_device *device = peer_device->device;
  253. if (device->state.disk != D_DISKLESS ||
  254. device->state.conn != C_STANDALONE ||
  255. device->state.role != R_SECONDARY) {
  256. rv = false;
  257. break;
  258. }
  259. }
  260. rcu_read_unlock();
  261. return rv;
  262. }
  263. /* Unfortunately the states where not correctly ordered, when
  264. they where defined. therefore can not use max_t() here. */
  265. static enum drbd_role max_role(enum drbd_role role1, enum drbd_role role2)
  266. {
  267. if (role1 == R_PRIMARY || role2 == R_PRIMARY)
  268. return R_PRIMARY;
  269. if (role1 == R_SECONDARY || role2 == R_SECONDARY)
  270. return R_SECONDARY;
  271. return R_UNKNOWN;
  272. }
  273. static enum drbd_role min_role(enum drbd_role role1, enum drbd_role role2)
  274. {
  275. if (role1 == R_UNKNOWN || role2 == R_UNKNOWN)
  276. return R_UNKNOWN;
  277. if (role1 == R_SECONDARY || role2 == R_SECONDARY)
  278. return R_SECONDARY;
  279. return R_PRIMARY;
  280. }
  281. enum drbd_role conn_highest_role(struct drbd_connection *connection)
  282. {
  283. enum drbd_role role = R_SECONDARY;
  284. struct drbd_peer_device *peer_device;
  285. int vnr;
  286. rcu_read_lock();
  287. idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
  288. struct drbd_device *device = peer_device->device;
  289. role = max_role(role, device->state.role);
  290. }
  291. rcu_read_unlock();
  292. return role;
  293. }
  294. enum drbd_role conn_highest_peer(struct drbd_connection *connection)
  295. {
  296. enum drbd_role peer = R_UNKNOWN;
  297. struct drbd_peer_device *peer_device;
  298. int vnr;
  299. rcu_read_lock();
  300. idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
  301. struct drbd_device *device = peer_device->device;
  302. peer = max_role(peer, device->state.peer);
  303. }
  304. rcu_read_unlock();
  305. return peer;
  306. }
  307. enum drbd_disk_state conn_highest_disk(struct drbd_connection *connection)
  308. {
  309. enum drbd_disk_state disk_state = D_DISKLESS;
  310. struct drbd_peer_device *peer_device;
  311. int vnr;
  312. rcu_read_lock();
  313. idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
  314. struct drbd_device *device = peer_device->device;
  315. disk_state = max_t(enum drbd_disk_state, disk_state, device->state.disk);
  316. }
  317. rcu_read_unlock();
  318. return disk_state;
  319. }
  320. enum drbd_disk_state conn_lowest_disk(struct drbd_connection *connection)
  321. {
  322. enum drbd_disk_state disk_state = D_MASK;
  323. struct drbd_peer_device *peer_device;
  324. int vnr;
  325. rcu_read_lock();
  326. idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
  327. struct drbd_device *device = peer_device->device;
  328. disk_state = min_t(enum drbd_disk_state, disk_state, device->state.disk);
  329. }
  330. rcu_read_unlock();
  331. return disk_state;
  332. }
  333. enum drbd_disk_state conn_highest_pdsk(struct drbd_connection *connection)
  334. {
  335. enum drbd_disk_state disk_state = D_DISKLESS;
  336. struct drbd_peer_device *peer_device;
  337. int vnr;
  338. rcu_read_lock();
  339. idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
  340. struct drbd_device *device = peer_device->device;
  341. disk_state = max_t(enum drbd_disk_state, disk_state, device->state.pdsk);
  342. }
  343. rcu_read_unlock();
  344. return disk_state;
  345. }
  346. enum drbd_conns conn_lowest_conn(struct drbd_connection *connection)
  347. {
  348. enum drbd_conns conn = C_MASK;
  349. struct drbd_peer_device *peer_device;
  350. int vnr;
  351. rcu_read_lock();
  352. idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
  353. struct drbd_device *device = peer_device->device;
  354. conn = min_t(enum drbd_conns, conn, device->state.conn);
  355. }
  356. rcu_read_unlock();
  357. return conn;
  358. }
  359. static bool no_peer_wf_report_params(struct drbd_connection *connection)
  360. {
  361. struct drbd_peer_device *peer_device;
  362. int vnr;
  363. bool rv = true;
  364. rcu_read_lock();
  365. idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
  366. if (peer_device->device->state.conn == C_WF_REPORT_PARAMS) {
  367. rv = false;
  368. break;
  369. }
  370. rcu_read_unlock();
  371. return rv;
  372. }
  373. static void wake_up_all_devices(struct drbd_connection *connection)
  374. {
  375. struct drbd_peer_device *peer_device;
  376. int vnr;
  377. rcu_read_lock();
  378. idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
  379. wake_up(&peer_device->device->state_wait);
  380. rcu_read_unlock();
  381. }
  382. /**
  383. * cl_wide_st_chg() - true if the state change is a cluster wide one
  384. * @device: DRBD device.
  385. * @os: old (current) state.
  386. * @ns: new (wanted) state.
  387. */
  388. static int cl_wide_st_chg(struct drbd_device *device,
  389. union drbd_state os, union drbd_state ns)
  390. {
  391. return (os.conn >= C_CONNECTED && ns.conn >= C_CONNECTED &&
  392. ((os.role != R_PRIMARY && ns.role == R_PRIMARY) ||
  393. (os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
  394. (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S) ||
  395. (os.disk != D_FAILED && ns.disk == D_FAILED))) ||
  396. (os.conn >= C_CONNECTED && ns.conn == C_DISCONNECTING) ||
  397. (os.conn == C_CONNECTED && ns.conn == C_VERIFY_S) ||
  398. (os.conn == C_CONNECTED && ns.conn == C_WF_REPORT_PARAMS);
  399. }
  400. static union drbd_state
  401. apply_mask_val(union drbd_state os, union drbd_state mask, union drbd_state val)
  402. {
  403. union drbd_state ns;
  404. ns.i = (os.i & ~mask.i) | val.i;
  405. return ns;
  406. }
  407. enum drbd_state_rv
  408. drbd_change_state(struct drbd_device *device, enum chg_state_flags f,
  409. union drbd_state mask, union drbd_state val)
  410. {
  411. unsigned long flags;
  412. union drbd_state ns;
  413. enum drbd_state_rv rv;
  414. spin_lock_irqsave(&device->resource->req_lock, flags);
  415. ns = apply_mask_val(drbd_read_state(device), mask, val);
  416. rv = _drbd_set_state(device, ns, f, NULL);
  417. spin_unlock_irqrestore(&device->resource->req_lock, flags);
  418. return rv;
  419. }
  420. /**
  421. * drbd_force_state() - Impose a change which happens outside our control on our state
  422. * @device: DRBD device.
  423. * @mask: mask of state bits to change.
  424. * @val: value of new state bits.
  425. */
  426. void drbd_force_state(struct drbd_device *device,
  427. union drbd_state mask, union drbd_state val)
  428. {
  429. drbd_change_state(device, CS_HARD, mask, val);
  430. }
  431. static enum drbd_state_rv
  432. _req_st_cond(struct drbd_device *device, union drbd_state mask,
  433. union drbd_state val)
  434. {
  435. union drbd_state os, ns;
  436. unsigned long flags;
  437. enum drbd_state_rv rv;
  438. if (test_and_clear_bit(CL_ST_CHG_SUCCESS, &device->flags))
  439. return SS_CW_SUCCESS;
  440. if (test_and_clear_bit(CL_ST_CHG_FAIL, &device->flags))
  441. return SS_CW_FAILED_BY_PEER;
  442. spin_lock_irqsave(&device->resource->req_lock, flags);
  443. os = drbd_read_state(device);
  444. ns = sanitize_state(device, os, apply_mask_val(os, mask, val), NULL);
  445. rv = is_valid_transition(os, ns);
  446. if (rv >= SS_SUCCESS)
  447. rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */
  448. if (!cl_wide_st_chg(device, os, ns))
  449. rv = SS_CW_NO_NEED;
  450. if (rv == SS_UNKNOWN_ERROR) {
  451. rv = is_valid_state(device, ns);
  452. if (rv >= SS_SUCCESS) {
  453. rv = is_valid_soft_transition(os, ns, first_peer_device(device)->connection);
  454. if (rv >= SS_SUCCESS)
  455. rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */
  456. }
  457. }
  458. spin_unlock_irqrestore(&device->resource->req_lock, flags);
  459. return rv;
  460. }
  461. /**
  462. * drbd_req_state() - Perform an eventually cluster wide state change
  463. * @device: DRBD device.
  464. * @mask: mask of state bits to change.
  465. * @val: value of new state bits.
  466. * @f: flags
  467. *
  468. * Should not be called directly, use drbd_request_state() or
  469. * _drbd_request_state().
  470. */
  471. static enum drbd_state_rv
  472. drbd_req_state(struct drbd_device *device, union drbd_state mask,
  473. union drbd_state val, enum chg_state_flags f)
  474. {
  475. struct completion done;
  476. unsigned long flags;
  477. union drbd_state os, ns;
  478. enum drbd_state_rv rv;
  479. void *buffer = NULL;
  480. init_completion(&done);
  481. if (f & CS_SERIALIZE)
  482. mutex_lock(device->state_mutex);
  483. if (f & CS_INHIBIT_MD_IO)
  484. buffer = drbd_md_get_buffer(device, __func__);
  485. spin_lock_irqsave(&device->resource->req_lock, flags);
  486. os = drbd_read_state(device);
  487. ns = sanitize_state(device, os, apply_mask_val(os, mask, val), NULL);
  488. rv = is_valid_transition(os, ns);
  489. if (rv < SS_SUCCESS) {
  490. spin_unlock_irqrestore(&device->resource->req_lock, flags);
  491. goto abort;
  492. }
  493. if (cl_wide_st_chg(device, os, ns)) {
  494. rv = is_valid_state(device, ns);
  495. if (rv == SS_SUCCESS)
  496. rv = is_valid_soft_transition(os, ns, first_peer_device(device)->connection);
  497. spin_unlock_irqrestore(&device->resource->req_lock, flags);
  498. if (rv < SS_SUCCESS) {
  499. if (f & CS_VERBOSE)
  500. print_st_err(device, os, ns, rv);
  501. goto abort;
  502. }
  503. if (drbd_send_state_req(first_peer_device(device), mask, val)) {
  504. rv = SS_CW_FAILED_BY_PEER;
  505. if (f & CS_VERBOSE)
  506. print_st_err(device, os, ns, rv);
  507. goto abort;
  508. }
  509. wait_event(device->state_wait,
  510. (rv = _req_st_cond(device, mask, val)));
  511. if (rv < SS_SUCCESS) {
  512. if (f & CS_VERBOSE)
  513. print_st_err(device, os, ns, rv);
  514. goto abort;
  515. }
  516. spin_lock_irqsave(&device->resource->req_lock, flags);
  517. ns = apply_mask_val(drbd_read_state(device), mask, val);
  518. rv = _drbd_set_state(device, ns, f, &done);
  519. } else {
  520. rv = _drbd_set_state(device, ns, f, &done);
  521. }
  522. spin_unlock_irqrestore(&device->resource->req_lock, flags);
  523. if (f & CS_WAIT_COMPLETE && rv == SS_SUCCESS) {
  524. D_ASSERT(device, current != first_peer_device(device)->connection->worker.task);
  525. wait_for_completion(&done);
  526. }
  527. abort:
  528. if (buffer)
  529. drbd_md_put_buffer(device);
  530. if (f & CS_SERIALIZE)
  531. mutex_unlock(device->state_mutex);
  532. return rv;
  533. }
  534. /**
  535. * _drbd_request_state() - Request a state change (with flags)
  536. * @device: DRBD device.
  537. * @mask: mask of state bits to change.
  538. * @val: value of new state bits.
  539. * @f: flags
  540. *
  541. * Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE
  542. * flag, or when logging of failed state change requests is not desired.
  543. */
  544. enum drbd_state_rv
  545. _drbd_request_state(struct drbd_device *device, union drbd_state mask,
  546. union drbd_state val, enum chg_state_flags f)
  547. {
  548. enum drbd_state_rv rv;
  549. wait_event(device->state_wait,
  550. (rv = drbd_req_state(device, mask, val, f)) != SS_IN_TRANSIENT_STATE);
  551. return rv;
  552. }
  553. /*
  554. * We grab drbd_md_get_buffer(), because we don't want to "fail" the disk while
  555. * there is IO in-flight: the transition into D_FAILED for detach purposes
  556. * may get misinterpreted as actual IO error in a confused endio function.
  557. *
  558. * We wrap it all into wait_event(), to retry in case the drbd_req_state()
  559. * returns SS_IN_TRANSIENT_STATE.
  560. *
  561. * To avoid potential deadlock with e.g. the receiver thread trying to grab
  562. * drbd_md_get_buffer() while trying to get out of the "transient state", we
  563. * need to grab and release the meta data buffer inside of that wait_event loop.
  564. */
  565. static enum drbd_state_rv
  566. request_detach(struct drbd_device *device)
  567. {
  568. return drbd_req_state(device, NS(disk, D_FAILED),
  569. CS_VERBOSE | CS_ORDERED | CS_INHIBIT_MD_IO);
  570. }
  571. int drbd_request_detach_interruptible(struct drbd_device *device)
  572. {
  573. int ret, rv;
  574. drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */
  575. wait_event_interruptible(device->state_wait,
  576. (rv = request_detach(device)) != SS_IN_TRANSIENT_STATE);
  577. drbd_resume_io(device);
  578. ret = wait_event_interruptible(device->misc_wait,
  579. device->state.disk != D_FAILED);
  580. if (rv == SS_IS_DISKLESS)
  581. rv = SS_NOTHING_TO_DO;
  582. if (ret)
  583. rv = ERR_INTR;
  584. return rv;
  585. }
  586. enum drbd_state_rv
  587. _drbd_request_state_holding_state_mutex(struct drbd_device *device, union drbd_state mask,
  588. union drbd_state val, enum chg_state_flags f)
  589. {
  590. enum drbd_state_rv rv;
  591. BUG_ON(f & CS_SERIALIZE);
  592. wait_event_cmd(device->state_wait,
  593. (rv = drbd_req_state(device, mask, val, f)) != SS_IN_TRANSIENT_STATE,
  594. mutex_unlock(device->state_mutex),
  595. mutex_lock(device->state_mutex));
  596. return rv;
  597. }
  598. static void print_st(struct drbd_device *device, const char *name, union drbd_state ns)
  599. {
  600. drbd_err(device, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c%c%c }\n",
  601. name,
  602. drbd_conn_str(ns.conn),
  603. drbd_role_str(ns.role),
  604. drbd_role_str(ns.peer),
  605. drbd_disk_str(ns.disk),
  606. drbd_disk_str(ns.pdsk),
  607. is_susp(ns) ? 's' : 'r',
  608. ns.aftr_isp ? 'a' : '-',
  609. ns.peer_isp ? 'p' : '-',
  610. ns.user_isp ? 'u' : '-',
  611. ns.susp_fen ? 'F' : '-',
  612. ns.susp_nod ? 'N' : '-'
  613. );
  614. }
  615. void print_st_err(struct drbd_device *device, union drbd_state os,
  616. union drbd_state ns, enum drbd_state_rv err)
  617. {
  618. if (err == SS_IN_TRANSIENT_STATE)
  619. return;
  620. drbd_err(device, "State change failed: %s\n", drbd_set_st_err_str(err));
  621. print_st(device, " state", os);
  622. print_st(device, "wanted", ns);
  623. }
  624. static long print_state_change(char *pb, union drbd_state os, union drbd_state ns,
  625. enum chg_state_flags flags)
  626. {
  627. char *pbp;
  628. pbp = pb;
  629. *pbp = 0;
  630. if (ns.role != os.role && flags & CS_DC_ROLE)
  631. pbp += sprintf(pbp, "role( %s -> %s ) ",
  632. drbd_role_str(os.role),
  633. drbd_role_str(ns.role));
  634. if (ns.peer != os.peer && flags & CS_DC_PEER)
  635. pbp += sprintf(pbp, "peer( %s -> %s ) ",
  636. drbd_role_str(os.peer),
  637. drbd_role_str(ns.peer));
  638. if (ns.conn != os.conn && flags & CS_DC_CONN)
  639. pbp += sprintf(pbp, "conn( %s -> %s ) ",
  640. drbd_conn_str(os.conn),
  641. drbd_conn_str(ns.conn));
  642. if (ns.disk != os.disk && flags & CS_DC_DISK)
  643. pbp += sprintf(pbp, "disk( %s -> %s ) ",
  644. drbd_disk_str(os.disk),
  645. drbd_disk_str(ns.disk));
  646. if (ns.pdsk != os.pdsk && flags & CS_DC_PDSK)
  647. pbp += sprintf(pbp, "pdsk( %s -> %s ) ",
  648. drbd_disk_str(os.pdsk),
  649. drbd_disk_str(ns.pdsk));
  650. return pbp - pb;
  651. }
  652. static void drbd_pr_state_change(struct drbd_device *device, union drbd_state os, union drbd_state ns,
  653. enum chg_state_flags flags)
  654. {
  655. char pb[300];
  656. char *pbp = pb;
  657. pbp += print_state_change(pbp, os, ns, flags ^ CS_DC_MASK);
  658. if (ns.aftr_isp != os.aftr_isp)
  659. pbp += sprintf(pbp, "aftr_isp( %d -> %d ) ",
  660. os.aftr_isp,
  661. ns.aftr_isp);
  662. if (ns.peer_isp != os.peer_isp)
  663. pbp += sprintf(pbp, "peer_isp( %d -> %d ) ",
  664. os.peer_isp,
  665. ns.peer_isp);
  666. if (ns.user_isp != os.user_isp)
  667. pbp += sprintf(pbp, "user_isp( %d -> %d ) ",
  668. os.user_isp,
  669. ns.user_isp);
  670. if (pbp != pb)
  671. drbd_info(device, "%s\n", pb);
  672. }
  673. static void conn_pr_state_change(struct drbd_connection *connection, union drbd_state os, union drbd_state ns,
  674. enum chg_state_flags flags)
  675. {
  676. char pb[300];
  677. char *pbp = pb;
  678. pbp += print_state_change(pbp, os, ns, flags);
  679. if (is_susp(ns) != is_susp(os) && flags & CS_DC_SUSP)
  680. pbp += sprintf(pbp, "susp( %d -> %d ) ",
  681. is_susp(os),
  682. is_susp(ns));
  683. if (pbp != pb)
  684. drbd_info(connection, "%s\n", pb);
  685. }
  686. /**
  687. * is_valid_state() - Returns an SS_ error code if ns is not valid
  688. * @device: DRBD device.
  689. * @ns: State to consider.
  690. */
  691. static enum drbd_state_rv
  692. is_valid_state(struct drbd_device *device, union drbd_state ns)
  693. {
  694. /* See drbd_state_sw_errors in drbd_strings.c */
  695. enum drbd_fencing_p fp;
  696. enum drbd_state_rv rv = SS_SUCCESS;
  697. struct net_conf *nc;
  698. rcu_read_lock();
  699. fp = FP_DONT_CARE;
  700. if (get_ldev(device)) {
  701. fp = rcu_dereference(device->ldev->disk_conf)->fencing;
  702. put_ldev(device);
  703. }
  704. nc = rcu_dereference(first_peer_device(device)->connection->net_conf);
  705. if (nc) {
  706. if (!nc->two_primaries && ns.role == R_PRIMARY) {
  707. if (ns.peer == R_PRIMARY)
  708. rv = SS_TWO_PRIMARIES;
  709. else if (conn_highest_peer(first_peer_device(device)->connection) == R_PRIMARY)
  710. rv = SS_O_VOL_PEER_PRI;
  711. }
  712. }
  713. if (rv <= 0)
  714. goto out; /* already found a reason to abort */
  715. else if (ns.role == R_SECONDARY && device->open_cnt)
  716. rv = SS_DEVICE_IN_USE;
  717. else if (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.disk < D_UP_TO_DATE)
  718. rv = SS_NO_UP_TO_DATE_DISK;
  719. else if (fp >= FP_RESOURCE &&
  720. ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk >= D_UNKNOWN)
  721. rv = SS_PRIMARY_NOP;
  722. else if (ns.role == R_PRIMARY && ns.disk <= D_INCONSISTENT && ns.pdsk <= D_INCONSISTENT)
  723. rv = SS_NO_UP_TO_DATE_DISK;
  724. else if (ns.conn > C_CONNECTED && ns.disk < D_INCONSISTENT)
  725. rv = SS_NO_LOCAL_DISK;
  726. else if (ns.conn > C_CONNECTED && ns.pdsk < D_INCONSISTENT)
  727. rv = SS_NO_REMOTE_DISK;
  728. else if (ns.conn > C_CONNECTED && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)
  729. rv = SS_NO_UP_TO_DATE_DISK;
  730. else if ((ns.conn == C_CONNECTED ||
  731. ns.conn == C_WF_BITMAP_S ||
  732. ns.conn == C_SYNC_SOURCE ||
  733. ns.conn == C_PAUSED_SYNC_S) &&
  734. ns.disk == D_OUTDATED)
  735. rv = SS_CONNECTED_OUTDATES;
  736. else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
  737. (nc->verify_alg[0] == 0))
  738. rv = SS_NO_VERIFY_ALG;
  739. else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
  740. first_peer_device(device)->connection->agreed_pro_version < 88)
  741. rv = SS_NOT_SUPPORTED;
  742. else if (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)
  743. rv = SS_NO_UP_TO_DATE_DISK;
  744. else if ((ns.conn == C_STARTING_SYNC_S || ns.conn == C_STARTING_SYNC_T) &&
  745. ns.pdsk == D_UNKNOWN)
  746. rv = SS_NEED_CONNECTION;
  747. else if (ns.conn >= C_CONNECTED && ns.pdsk == D_UNKNOWN)
  748. rv = SS_CONNECTED_OUTDATES;
  749. out:
  750. rcu_read_unlock();
  751. return rv;
  752. }
  753. /**
  754. * is_valid_soft_transition() - Returns an SS_ error code if the state transition is not possible
  755. * This function limits state transitions that may be declined by DRBD. I.e.
  756. * user requests (aka soft transitions).
  757. * @os: old state.
  758. * @ns: new state.
  759. * @connection: DRBD connection.
  760. */
  761. static enum drbd_state_rv
  762. is_valid_soft_transition(union drbd_state os, union drbd_state ns, struct drbd_connection *connection)
  763. {
  764. enum drbd_state_rv rv = SS_SUCCESS;
  765. if ((ns.conn == C_STARTING_SYNC_T || ns.conn == C_STARTING_SYNC_S) &&
  766. os.conn > C_CONNECTED)
  767. rv = SS_RESYNC_RUNNING;
  768. if (ns.conn == C_DISCONNECTING && os.conn == C_STANDALONE)
  769. rv = SS_ALREADY_STANDALONE;
  770. if (ns.disk > D_ATTACHING && os.disk == D_DISKLESS)
  771. rv = SS_IS_DISKLESS;
  772. if (ns.conn == C_WF_CONNECTION && os.conn < C_UNCONNECTED)
  773. rv = SS_NO_NET_CONFIG;
  774. if (ns.disk == D_OUTDATED && os.disk < D_OUTDATED && os.disk != D_ATTACHING)
  775. rv = SS_LOWER_THAN_OUTDATED;
  776. if (ns.conn == C_DISCONNECTING && os.conn == C_UNCONNECTED)
  777. rv = SS_IN_TRANSIENT_STATE;
  778. /* While establishing a connection only allow cstate to change.
  779. Delay/refuse role changes, detach attach etc... (they do not touch cstate) */
  780. if (test_bit(STATE_SENT, &connection->flags) &&
  781. !((ns.conn == C_WF_REPORT_PARAMS && os.conn == C_WF_CONNECTION) ||
  782. (ns.conn >= C_CONNECTED && os.conn == C_WF_REPORT_PARAMS)))
  783. rv = SS_IN_TRANSIENT_STATE;
  784. /* Do not promote during resync handshake triggered by "force primary".
  785. * This is a hack. It should really be rejected by the peer during the
  786. * cluster wide state change request. */
  787. if (os.role != R_PRIMARY && ns.role == R_PRIMARY
  788. && ns.pdsk == D_UP_TO_DATE
  789. && ns.disk != D_UP_TO_DATE && ns.disk != D_DISKLESS
  790. && (ns.conn <= C_WF_SYNC_UUID || ns.conn != os.conn))
  791. rv = SS_IN_TRANSIENT_STATE;
  792. if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED)
  793. rv = SS_NEED_CONNECTION;
  794. if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
  795. ns.conn != os.conn && os.conn > C_CONNECTED)
  796. rv = SS_RESYNC_RUNNING;
  797. if ((ns.conn == C_STARTING_SYNC_S || ns.conn == C_STARTING_SYNC_T) &&
  798. os.conn < C_CONNECTED)
  799. rv = SS_NEED_CONNECTION;
  800. if ((ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)
  801. && os.conn < C_WF_REPORT_PARAMS)
  802. rv = SS_NEED_CONNECTION; /* No NetworkFailure -> SyncTarget etc... */
  803. if (ns.conn == C_DISCONNECTING && ns.pdsk == D_OUTDATED &&
  804. os.conn < C_CONNECTED && os.pdsk > D_OUTDATED)
  805. rv = SS_OUTDATE_WO_CONN;
  806. return rv;
  807. }
  808. static enum drbd_state_rv
  809. is_valid_conn_transition(enum drbd_conns oc, enum drbd_conns nc)
  810. {
  811. /* no change -> nothing to do, at least for the connection part */
  812. if (oc == nc)
  813. return SS_NOTHING_TO_DO;
  814. /* disconnect of an unconfigured connection does not make sense */
  815. if (oc == C_STANDALONE && nc == C_DISCONNECTING)
  816. return SS_ALREADY_STANDALONE;
  817. /* from C_STANDALONE, we start with C_UNCONNECTED */
  818. if (oc == C_STANDALONE && nc != C_UNCONNECTED)
  819. return SS_NEED_CONNECTION;
  820. /* When establishing a connection we need to go through WF_REPORT_PARAMS!
  821. Necessary to do the right thing upon invalidate-remote on a disconnected resource */
  822. if (oc < C_WF_REPORT_PARAMS && nc >= C_CONNECTED)
  823. return SS_NEED_CONNECTION;
  824. /* After a network error only C_UNCONNECTED or C_DISCONNECTING may follow. */
  825. if (oc >= C_TIMEOUT && oc <= C_TEAR_DOWN && nc != C_UNCONNECTED && nc != C_DISCONNECTING)
  826. return SS_IN_TRANSIENT_STATE;
  827. /* After C_DISCONNECTING only C_STANDALONE may follow */
  828. if (oc == C_DISCONNECTING && nc != C_STANDALONE)
  829. return SS_IN_TRANSIENT_STATE;
  830. return SS_SUCCESS;
  831. }
  832. /**
  833. * is_valid_transition() - Returns an SS_ error code if the state transition is not possible
  834. * This limits hard state transitions. Hard state transitions are facts there are
  835. * imposed on DRBD by the environment. E.g. disk broke or network broke down.
  836. * But those hard state transitions are still not allowed to do everything.
  837. * @ns: new state.
  838. * @os: old state.
  839. */
  840. static enum drbd_state_rv
  841. is_valid_transition(union drbd_state os, union drbd_state ns)
  842. {
  843. enum drbd_state_rv rv;
  844. rv = is_valid_conn_transition(os.conn, ns.conn);
  845. /* we cannot fail (again) if we already detached */
  846. if (ns.disk == D_FAILED && os.disk == D_DISKLESS)
  847. rv = SS_IS_DISKLESS;
  848. return rv;
  849. }
  850. static void print_sanitize_warnings(struct drbd_device *device, enum sanitize_state_warnings warn)
  851. {
  852. static const char *msg_table[] = {
  853. [NO_WARNING] = "",
  854. [ABORTED_ONLINE_VERIFY] = "Online-verify aborted.",
  855. [ABORTED_RESYNC] = "Resync aborted.",
  856. [CONNECTION_LOST_NEGOTIATING] = "Connection lost while negotiating, no data!",
  857. [IMPLICITLY_UPGRADED_DISK] = "Implicitly upgraded disk",
  858. [IMPLICITLY_UPGRADED_PDSK] = "Implicitly upgraded pdsk",
  859. };
  860. if (warn != NO_WARNING)
  861. drbd_warn(device, "%s\n", msg_table[warn]);
  862. }
  863. /**
  864. * sanitize_state() - Resolves implicitly necessary additional changes to a state transition
  865. * @device: DRBD device.
  866. * @os: old state.
  867. * @ns: new state.
  868. * @warn: placeholder for returned state warning.
  869. *
  870. * When we loose connection, we have to set the state of the peers disk (pdsk)
  871. * to D_UNKNOWN. This rule and many more along those lines are in this function.
  872. */
  873. static union drbd_state sanitize_state(struct drbd_device *device, union drbd_state os,
  874. union drbd_state ns, enum sanitize_state_warnings *warn)
  875. {
  876. enum drbd_fencing_p fp;
  877. enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max;
  878. if (warn)
  879. *warn = NO_WARNING;
  880. fp = FP_DONT_CARE;
  881. if (get_ldev(device)) {
  882. rcu_read_lock();
  883. fp = rcu_dereference(device->ldev->disk_conf)->fencing;
  884. rcu_read_unlock();
  885. put_ldev(device);
  886. }
  887. /* Implications from connection to peer and peer_isp */
  888. if (ns.conn < C_CONNECTED) {
  889. ns.peer_isp = 0;
  890. ns.peer = R_UNKNOWN;
  891. if (ns.pdsk > D_UNKNOWN || ns.pdsk < D_INCONSISTENT)
  892. ns.pdsk = D_UNKNOWN;
  893. }
  894. /* Clear the aftr_isp when becoming unconfigured */
  895. if (ns.conn == C_STANDALONE && ns.disk == D_DISKLESS && ns.role == R_SECONDARY)
  896. ns.aftr_isp = 0;
  897. /* An implication of the disk states onto the connection state */
  898. /* Abort resync if a disk fails/detaches */
  899. if (ns.conn > C_CONNECTED && (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) {
  900. if (warn)
  901. *warn = ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T ?
  902. ABORTED_ONLINE_VERIFY : ABORTED_RESYNC;
  903. ns.conn = C_CONNECTED;
  904. }
  905. /* Connection breaks down before we finished "Negotiating" */
  906. if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING &&
  907. get_ldev_if_state(device, D_NEGOTIATING)) {
  908. if (device->ed_uuid == device->ldev->md.uuid[UI_CURRENT]) {
  909. ns.disk = device->new_state_tmp.disk;
  910. ns.pdsk = device->new_state_tmp.pdsk;
  911. } else {
  912. if (warn)
  913. *warn = CONNECTION_LOST_NEGOTIATING;
  914. ns.disk = D_DISKLESS;
  915. ns.pdsk = D_UNKNOWN;
  916. }
  917. put_ldev(device);
  918. }
  919. /* D_CONSISTENT and D_OUTDATED vanish when we get connected */
  920. if (ns.conn >= C_CONNECTED && ns.conn < C_AHEAD) {
  921. if (ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED)
  922. ns.disk = D_UP_TO_DATE;
  923. if (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED)
  924. ns.pdsk = D_UP_TO_DATE;
  925. }
  926. /* Implications of the connection state on the disk states */
  927. disk_min = D_DISKLESS;
  928. disk_max = D_UP_TO_DATE;
  929. pdsk_min = D_INCONSISTENT;
  930. pdsk_max = D_UNKNOWN;
  931. switch ((enum drbd_conns)ns.conn) {
  932. case C_WF_BITMAP_T:
  933. case C_PAUSED_SYNC_T:
  934. case C_STARTING_SYNC_T:
  935. case C_WF_SYNC_UUID:
  936. case C_BEHIND:
  937. disk_min = D_INCONSISTENT;
  938. disk_max = D_OUTDATED;
  939. pdsk_min = D_UP_TO_DATE;
  940. pdsk_max = D_UP_TO_DATE;
  941. break;
  942. case C_VERIFY_S:
  943. case C_VERIFY_T:
  944. disk_min = D_UP_TO_DATE;
  945. disk_max = D_UP_TO_DATE;
  946. pdsk_min = D_UP_TO_DATE;
  947. pdsk_max = D_UP_TO_DATE;
  948. break;
  949. case C_CONNECTED:
  950. disk_min = D_DISKLESS;
  951. disk_max = D_UP_TO_DATE;
  952. pdsk_min = D_DISKLESS;
  953. pdsk_max = D_UP_TO_DATE;
  954. break;
  955. case C_WF_BITMAP_S:
  956. case C_PAUSED_SYNC_S:
  957. case C_STARTING_SYNC_S:
  958. case C_AHEAD:
  959. disk_min = D_UP_TO_DATE;
  960. disk_max = D_UP_TO_DATE;
  961. pdsk_min = D_INCONSISTENT;
  962. pdsk_max = D_CONSISTENT; /* D_OUTDATED would be nice. But explicit outdate necessary*/
  963. break;
  964. case C_SYNC_TARGET:
  965. disk_min = D_INCONSISTENT;
  966. disk_max = D_INCONSISTENT;
  967. pdsk_min = D_UP_TO_DATE;
  968. pdsk_max = D_UP_TO_DATE;
  969. break;
  970. case C_SYNC_SOURCE:
  971. disk_min = D_UP_TO_DATE;
  972. disk_max = D_UP_TO_DATE;
  973. pdsk_min = D_INCONSISTENT;
  974. pdsk_max = D_INCONSISTENT;
  975. break;
  976. case C_STANDALONE:
  977. case C_DISCONNECTING:
  978. case C_UNCONNECTED:
  979. case C_TIMEOUT:
  980. case C_BROKEN_PIPE:
  981. case C_NETWORK_FAILURE:
  982. case C_PROTOCOL_ERROR:
  983. case C_TEAR_DOWN:
  984. case C_WF_CONNECTION:
  985. case C_WF_REPORT_PARAMS:
  986. case C_MASK:
  987. break;
  988. }
  989. if (ns.disk > disk_max)
  990. ns.disk = disk_max;
  991. if (ns.disk < disk_min) {
  992. if (warn)
  993. *warn = IMPLICITLY_UPGRADED_DISK;
  994. ns.disk = disk_min;
  995. }
  996. if (ns.pdsk > pdsk_max)
  997. ns.pdsk = pdsk_max;
  998. if (ns.pdsk < pdsk_min) {
  999. if (warn)
  1000. *warn = IMPLICITLY_UPGRADED_PDSK;
  1001. ns.pdsk = pdsk_min;
  1002. }
  1003. if (fp == FP_STONITH &&
  1004. (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) &&
  1005. !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED))
  1006. ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */
  1007. if (device->resource->res_opts.on_no_data == OND_SUSPEND_IO &&
  1008. (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) &&
  1009. !(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE))
  1010. ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */
  1011. if (ns.aftr_isp || ns.peer_isp || ns.user_isp) {
  1012. if (ns.conn == C_SYNC_SOURCE)
  1013. ns.conn = C_PAUSED_SYNC_S;
  1014. if (ns.conn == C_SYNC_TARGET)
  1015. ns.conn = C_PAUSED_SYNC_T;
  1016. } else {
  1017. if (ns.conn == C_PAUSED_SYNC_S)
  1018. ns.conn = C_SYNC_SOURCE;
  1019. if (ns.conn == C_PAUSED_SYNC_T)
  1020. ns.conn = C_SYNC_TARGET;
  1021. }
  1022. return ns;
  1023. }
  1024. void drbd_resume_al(struct drbd_device *device)
  1025. {
  1026. if (test_and_clear_bit(AL_SUSPENDED, &device->flags))
  1027. drbd_info(device, "Resumed AL updates\n");
  1028. }
  1029. /* helper for _drbd_set_state */
  1030. static void set_ov_position(struct drbd_device *device, enum drbd_conns cs)
  1031. {
  1032. if (first_peer_device(device)->connection->agreed_pro_version < 90)
  1033. device->ov_start_sector = 0;
  1034. device->rs_total = drbd_bm_bits(device);
  1035. device->ov_position = 0;
  1036. if (cs == C_VERIFY_T) {
  1037. /* starting online verify from an arbitrary position
  1038. * does not fit well into the existing protocol.
  1039. * on C_VERIFY_T, we initialize ov_left and friends
  1040. * implicitly in receive_DataRequest once the
  1041. * first P_OV_REQUEST is received */
  1042. device->ov_start_sector = ~(sector_t)0;
  1043. } else {
  1044. unsigned long bit = BM_SECT_TO_BIT(device->ov_start_sector);
  1045. if (bit >= device->rs_total) {
  1046. device->ov_start_sector =
  1047. BM_BIT_TO_SECT(device->rs_total - 1);
  1048. device->rs_total = 1;
  1049. } else
  1050. device->rs_total -= bit;
  1051. device->ov_position = device->ov_start_sector;
  1052. }
  1053. device->ov_left = device->rs_total;
  1054. }
  1055. /**
  1056. * _drbd_set_state() - Set a new DRBD state
  1057. * @device: DRBD device.
  1058. * @ns: new state.
  1059. * @flags: Flags
  1060. * @done: Optional completion, that will get completed after the after_state_ch() finished
  1061. *
  1062. * Caller needs to hold req_lock. Do not call directly.
  1063. */
  1064. enum drbd_state_rv
  1065. _drbd_set_state(struct drbd_device *device, union drbd_state ns,
  1066. enum chg_state_flags flags, struct completion *done)
  1067. {
  1068. struct drbd_peer_device *peer_device = first_peer_device(device);
  1069. struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
  1070. union drbd_state os;
  1071. enum drbd_state_rv rv = SS_SUCCESS;
  1072. enum sanitize_state_warnings ssw;
  1073. struct after_state_chg_work *ascw;
  1074. struct drbd_state_change *state_change;
  1075. os = drbd_read_state(device);
  1076. ns = sanitize_state(device, os, ns, &ssw);
  1077. if (ns.i == os.i)
  1078. return SS_NOTHING_TO_DO;
  1079. rv = is_valid_transition(os, ns);
  1080. if (rv < SS_SUCCESS)
  1081. return rv;
  1082. if (!(flags & CS_HARD)) {
  1083. /* pre-state-change checks ; only look at ns */
  1084. /* See drbd_state_sw_errors in drbd_strings.c */
  1085. rv = is_valid_state(device, ns);
  1086. if (rv < SS_SUCCESS) {
  1087. /* If the old state was illegal as well, then let
  1088. this happen...*/
  1089. if (is_valid_state(device, os) == rv)
  1090. rv = is_valid_soft_transition(os, ns, connection);
  1091. } else
  1092. rv = is_valid_soft_transition(os, ns, connection);
  1093. }
  1094. if (rv < SS_SUCCESS) {
  1095. if (flags & CS_VERBOSE)
  1096. print_st_err(device, os, ns, rv);
  1097. return rv;
  1098. }
  1099. print_sanitize_warnings(device, ssw);
  1100. drbd_pr_state_change(device, os, ns, flags);
  1101. /* Display changes to the susp* flags that where caused by the call to
  1102. sanitize_state(). Only display it here if we where not called from
  1103. _conn_request_state() */
  1104. if (!(flags & CS_DC_SUSP))
  1105. conn_pr_state_change(connection, os, ns,
  1106. (flags & ~CS_DC_MASK) | CS_DC_SUSP);
  1107. /* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference
  1108. * on the ldev here, to be sure the transition -> D_DISKLESS resp.
  1109. * drbd_ldev_destroy() won't happen before our corresponding
  1110. * after_state_ch works run, where we put_ldev again. */
  1111. if ((os.disk != D_FAILED && ns.disk == D_FAILED) ||
  1112. (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))
  1113. atomic_inc(&device->local_cnt);
  1114. if (!is_sync_state(os.conn) && is_sync_state(ns.conn))
  1115. clear_bit(RS_DONE, &device->flags);
  1116. /* FIXME: Have any flags been set earlier in this function already? */
  1117. state_change = remember_old_state(device->resource, GFP_ATOMIC);
  1118. /* changes to local_cnt and device flags should be visible before
  1119. * changes to state, which again should be visible before anything else
  1120. * depending on that change happens. */
  1121. smp_wmb();
  1122. device->state.i = ns.i;
  1123. device->resource->susp = ns.susp;
  1124. device->resource->susp_nod = ns.susp_nod;
  1125. device->resource->susp_fen = ns.susp_fen;
  1126. smp_wmb();
  1127. remember_new_state(state_change);
  1128. /* put replicated vs not-replicated requests in seperate epochs */
  1129. if (drbd_should_do_remote((union drbd_dev_state)os.i) !=
  1130. drbd_should_do_remote((union drbd_dev_state)ns.i))
  1131. start_new_tl_epoch(connection);
  1132. if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING)
  1133. drbd_print_uuids(device, "attached to UUIDs");
  1134. /* Wake up role changes, that were delayed because of connection establishing */
  1135. if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS &&
  1136. no_peer_wf_report_params(connection)) {
  1137. clear_bit(STATE_SENT, &connection->flags);
  1138. wake_up_all_devices(connection);
  1139. }
  1140. wake_up(&device->misc_wait);
  1141. wake_up(&device->state_wait);
  1142. wake_up(&connection->ping_wait);
  1143. /* Aborted verify run, or we reached the stop sector.
  1144. * Log the last position, unless end-of-device. */
  1145. if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) &&
  1146. ns.conn <= C_CONNECTED) {
  1147. device->ov_start_sector =
  1148. BM_BIT_TO_SECT(drbd_bm_bits(device) - device->ov_left);
  1149. if (device->ov_left)
  1150. drbd_info(device, "Online Verify reached sector %llu\n",
  1151. (unsigned long long)device->ov_start_sector);
  1152. }
  1153. if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) &&
  1154. (ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)) {
  1155. drbd_info(device, "Syncer continues.\n");
  1156. device->rs_paused += (long)jiffies
  1157. -(long)device->rs_mark_time[device->rs_last_mark];
  1158. if (ns.conn == C_SYNC_TARGET)
  1159. mod_timer(&device->resync_timer, jiffies);
  1160. }
  1161. if ((os.conn == C_SYNC_TARGET || os.conn == C_SYNC_SOURCE) &&
  1162. (ns.conn == C_PAUSED_SYNC_T || ns.conn == C_PAUSED_SYNC_S)) {
  1163. drbd_info(device, "Resync suspended\n");
  1164. device->rs_mark_time[device->rs_last_mark] = jiffies;
  1165. }
  1166. if (os.conn == C_CONNECTED &&
  1167. (ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T)) {
  1168. unsigned long now = jiffies;
  1169. int i;
  1170. set_ov_position(device, ns.conn);
  1171. device->rs_start = now;
  1172. device->rs_last_sect_ev = 0;
  1173. device->ov_last_oos_size = 0;
  1174. device->ov_last_oos_start = 0;
  1175. for (i = 0; i < DRBD_SYNC_MARKS; i++) {
  1176. device->rs_mark_left[i] = device->ov_left;
  1177. device->rs_mark_time[i] = now;
  1178. }
  1179. drbd_rs_controller_reset(device);
  1180. if (ns.conn == C_VERIFY_S) {
  1181. drbd_info(device, "Starting Online Verify from sector %llu\n",
  1182. (unsigned long long)device->ov_position);
  1183. mod_timer(&device->resync_timer, jiffies);
  1184. }
  1185. }
  1186. if (get_ldev(device)) {
  1187. u32 mdf = device->ldev->md.flags & ~(MDF_CONSISTENT|MDF_PRIMARY_IND|
  1188. MDF_CONNECTED_IND|MDF_WAS_UP_TO_DATE|
  1189. MDF_PEER_OUT_DATED|MDF_CRASHED_PRIMARY);
  1190. mdf &= ~MDF_AL_CLEAN;
  1191. if (test_bit(CRASHED_PRIMARY, &device->flags))
  1192. mdf |= MDF_CRASHED_PRIMARY;
  1193. if (device->state.role == R_PRIMARY ||
  1194. (device->state.pdsk < D_INCONSISTENT && device->state.peer == R_PRIMARY))
  1195. mdf |= MDF_PRIMARY_IND;
  1196. if (device->state.conn > C_WF_REPORT_PARAMS)
  1197. mdf |= MDF_CONNECTED_IND;
  1198. if (device->state.disk > D_INCONSISTENT)
  1199. mdf |= MDF_CONSISTENT;
  1200. if (device->state.disk > D_OUTDATED)
  1201. mdf |= MDF_WAS_UP_TO_DATE;
  1202. if (device->state.pdsk <= D_OUTDATED && device->state.pdsk >= D_INCONSISTENT)
  1203. mdf |= MDF_PEER_OUT_DATED;
  1204. if (mdf != device->ldev->md.flags) {
  1205. device->ldev->md.flags = mdf;
  1206. drbd_md_mark_dirty(device);
  1207. }
  1208. if (os.disk < D_CONSISTENT && ns.disk >= D_CONSISTENT)
  1209. drbd_set_ed_uuid(device, device->ldev->md.uuid[UI_CURRENT]);
  1210. put_ldev(device);
  1211. }
  1212. /* Peer was forced D_UP_TO_DATE & R_PRIMARY, consider to resync */
  1213. if (os.disk == D_INCONSISTENT && os.pdsk == D_INCONSISTENT &&
  1214. os.peer == R_SECONDARY && ns.peer == R_PRIMARY)
  1215. set_bit(CONSIDER_RESYNC, &device->flags);
  1216. /* Receiver should clean up itself */
  1217. if (os.conn != C_DISCONNECTING && ns.conn == C_DISCONNECTING)
  1218. drbd_thread_stop_nowait(&connection->receiver);
  1219. /* Now the receiver finished cleaning up itself, it should die */
  1220. if (os.conn != C_STANDALONE && ns.conn == C_STANDALONE)
  1221. drbd_thread_stop_nowait(&connection->receiver);
  1222. /* Upon network failure, we need to restart the receiver. */
  1223. if (os.conn > C_WF_CONNECTION &&
  1224. ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT)
  1225. drbd_thread_restart_nowait(&connection->receiver);
  1226. /* Resume AL writing if we get a connection */
  1227. if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) {
  1228. drbd_resume_al(device);
  1229. connection->connect_cnt++;
  1230. }
  1231. /* remember last attach time so request_timer_fn() won't
  1232. * kill newly established sessions while we are still trying to thaw
  1233. * previously frozen IO */
  1234. if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) &&
  1235. ns.disk > D_NEGOTIATING)
  1236. device->last_reattach_jif = jiffies;
  1237. ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC);
  1238. if (ascw) {
  1239. ascw->os = os;
  1240. ascw->ns = ns;
  1241. ascw->flags = flags;
  1242. ascw->w.cb = w_after_state_ch;
  1243. ascw->device = device;
  1244. ascw->done = done;
  1245. ascw->state_change = state_change;
  1246. drbd_queue_work(&connection->sender_work,
  1247. &ascw->w);
  1248. } else {
  1249. drbd_err(device, "Could not kmalloc an ascw\n");
  1250. }
  1251. return rv;
  1252. }
  1253. static int w_after_state_ch(struct drbd_work *w, int unused)
  1254. {
  1255. struct after_state_chg_work *ascw =
  1256. container_of(w, struct after_state_chg_work, w);
  1257. struct drbd_device *device = ascw->device;
  1258. after_state_ch(device, ascw->os, ascw->ns, ascw->flags, ascw->state_change);
  1259. forget_state_change(ascw->state_change);
  1260. if (ascw->flags & CS_WAIT_COMPLETE)
  1261. complete(ascw->done);
  1262. kfree(ascw);
  1263. return 0;
  1264. }
  1265. static void abw_start_sync(struct drbd_device *device, int rv)
  1266. {
  1267. if (rv) {
  1268. drbd_err(device, "Writing the bitmap failed not starting resync.\n");
  1269. _drbd_request_state(device, NS(conn, C_CONNECTED), CS_VERBOSE);
  1270. return;
  1271. }
  1272. switch (device->state.conn) {
  1273. case C_STARTING_SYNC_T:
  1274. _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
  1275. break;
  1276. case C_STARTING_SYNC_S:
  1277. drbd_start_resync(device, C_SYNC_SOURCE);
  1278. break;
  1279. }
  1280. }
  1281. int drbd_bitmap_io_from_worker(struct drbd_device *device,
  1282. int (*io_fn)(struct drbd_device *),
  1283. char *why, enum bm_flag flags)
  1284. {
  1285. int rv;
  1286. D_ASSERT(device, current == first_peer_device(device)->connection->worker.task);
  1287. /* open coded non-blocking drbd_suspend_io(device); */
  1288. atomic_inc(&device->suspend_cnt);
  1289. drbd_bm_lock(device, why, flags);
  1290. rv = io_fn(device);
  1291. drbd_bm_unlock(device);
  1292. drbd_resume_io(device);
  1293. return rv;
  1294. }
  1295. int notify_resource_state_change(struct sk_buff *skb,
  1296. unsigned int seq,
  1297. struct drbd_resource_state_change *resource_state_change,
  1298. enum drbd_notification_type type)
  1299. {
  1300. struct drbd_resource *resource = resource_state_change->resource;
  1301. struct resource_info resource_info = {
  1302. .res_role = resource_state_change->role[NEW],
  1303. .res_susp = resource_state_change->susp[NEW],
  1304. .res_susp_nod = resource_state_change->susp_nod[NEW],
  1305. .res_susp_fen = resource_state_change->susp_fen[NEW],
  1306. };
  1307. return notify_resource_state(skb, seq, resource, &resource_info, type);
  1308. }
  1309. int notify_connection_state_change(struct sk_buff *skb,
  1310. unsigned int seq,
  1311. struct drbd_connection_state_change *connection_state_change,
  1312. enum drbd_notification_type type)
  1313. {
  1314. struct drbd_connection *connection = connection_state_change->connection;
  1315. struct connection_info connection_info = {
  1316. .conn_connection_state = connection_state_change->cstate[NEW],
  1317. .conn_role = connection_state_change->peer_role[NEW],
  1318. };
  1319. return notify_connection_state(skb, seq, connection, &connection_info, type);
  1320. }
  1321. int notify_device_state_change(struct sk_buff *skb,
  1322. unsigned int seq,
  1323. struct drbd_device_state_change *device_state_change,
  1324. enum drbd_notification_type type)
  1325. {
  1326. struct drbd_device *device = device_state_change->device;
  1327. struct device_info device_info = {
  1328. .dev_disk_state = device_state_change->disk_state[NEW],
  1329. };
  1330. return notify_device_state(skb, seq, device, &device_info, type);
  1331. }
  1332. int notify_peer_device_state_change(struct sk_buff *skb,
  1333. unsigned int seq,
  1334. struct drbd_peer_device_state_change *p,
  1335. enum drbd_notification_type type)
  1336. {
  1337. struct drbd_peer_device *peer_device = p->peer_device;
  1338. struct peer_device_info peer_device_info = {
  1339. .peer_repl_state = p->repl_state[NEW],
  1340. .peer_disk_state = p->disk_state[NEW],
  1341. .peer_resync_susp_user = p->resync_susp_user[NEW],
  1342. .peer_resync_susp_peer = p->resync_susp_peer[NEW],
  1343. .peer_resync_susp_dependency = p->resync_susp_dependency[NEW],
  1344. };
  1345. return notify_peer_device_state(skb, seq, peer_device, &peer_device_info, type);
  1346. }
  1347. static void broadcast_state_change(struct drbd_state_change *state_change)
  1348. {
  1349. struct drbd_resource_state_change *resource_state_change = &state_change->resource[0];
  1350. bool resource_state_has_changed;
  1351. unsigned int n_device, n_connection, n_peer_device, n_peer_devices;
  1352. int (*last_func)(struct sk_buff *, unsigned int, void *,
  1353. enum drbd_notification_type) = NULL;
  1354. void *last_arg = NULL;
  1355. #define HAS_CHANGED(state) ((state)[OLD] != (state)[NEW])
  1356. #define FINAL_STATE_CHANGE(type) \
  1357. ({ if (last_func) \
  1358. last_func(NULL, 0, last_arg, type); \
  1359. })
  1360. #define REMEMBER_STATE_CHANGE(func, arg, type) \
  1361. ({ FINAL_STATE_CHANGE(type | NOTIFY_CONTINUES); \
  1362. last_func = (typeof(last_func))func; \
  1363. last_arg = arg; \
  1364. })
  1365. mutex_lock(&notification_mutex);
  1366. resource_state_has_changed =
  1367. HAS_CHANGED(resource_state_change->role) ||
  1368. HAS_CHANGED(resource_state_change->susp) ||
  1369. HAS_CHANGED(resource_state_change->susp_nod) ||
  1370. HAS_CHANGED(resource_state_change->susp_fen);
  1371. if (resource_state_has_changed)
  1372. REMEMBER_STATE_CHANGE(notify_resource_state_change,
  1373. resource_state_change, NOTIFY_CHANGE);
  1374. for (n_connection = 0; n_connection < state_change->n_connections; n_connection++) {
  1375. struct drbd_connection_state_change *connection_state_change =
  1376. &state_change->connections[n_connection];
  1377. if (HAS_CHANGED(connection_state_change->peer_role) ||
  1378. HAS_CHANGED(connection_state_change->cstate))
  1379. REMEMBER_STATE_CHANGE(notify_connection_state_change,
  1380. connection_state_change, NOTIFY_CHANGE);
  1381. }
  1382. for (n_device = 0; n_device < state_change->n_devices; n_device++) {
  1383. struct drbd_device_state_change *device_state_change =
  1384. &state_change->devices[n_device];
  1385. if (HAS_CHANGED(device_state_change->disk_state))
  1386. REMEMBER_STATE_CHANGE(notify_device_state_change,
  1387. device_state_change, NOTIFY_CHANGE);
  1388. }
  1389. n_peer_devices = state_change->n_devices * state_change->n_connections;
  1390. for (n_peer_device = 0; n_peer_device < n_peer_devices; n_peer_device++) {
  1391. struct drbd_peer_device_state_change *p =
  1392. &state_change->peer_devices[n_peer_device];
  1393. if (HAS_CHANGED(p->disk_state) ||
  1394. HAS_CHANGED(p->repl_state) ||
  1395. HAS_CHANGED(p->resync_susp_user) ||
  1396. HAS_CHANGED(p->resync_susp_peer) ||
  1397. HAS_CHANGED(p->resync_susp_dependency))
  1398. REMEMBER_STATE_CHANGE(notify_peer_device_state_change,
  1399. p, NOTIFY_CHANGE);
  1400. }
  1401. FINAL_STATE_CHANGE(NOTIFY_CHANGE);
  1402. mutex_unlock(&notification_mutex);
  1403. #undef HAS_CHANGED
  1404. #undef FINAL_STATE_CHANGE
  1405. #undef REMEMBER_STATE_CHANGE
  1406. }
  1407. /* takes old and new peer disk state */
  1408. static bool lost_contact_to_peer_data(enum drbd_disk_state os, enum drbd_disk_state ns)
  1409. {
  1410. if ((os >= D_INCONSISTENT && os != D_UNKNOWN && os != D_OUTDATED)
  1411. && (ns < D_INCONSISTENT || ns == D_UNKNOWN || ns == D_OUTDATED))
  1412. return true;
  1413. /* Scenario, starting with normal operation
  1414. * Connected Primary/Secondary UpToDate/UpToDate
  1415. * NetworkFailure Primary/Unknown UpToDate/DUnknown (frozen)
  1416. * ...
  1417. * Connected Primary/Secondary UpToDate/Diskless (resumed; needs to bump uuid!)
  1418. */
  1419. if (os == D_UNKNOWN
  1420. && (ns == D_DISKLESS || ns == D_FAILED || ns == D_OUTDATED))
  1421. return true;
  1422. return false;
  1423. }
  1424. /**
  1425. * after_state_ch() - Perform after state change actions that may sleep
  1426. * @device: DRBD device.
  1427. * @os: old state.
  1428. * @ns: new state.
  1429. * @flags: Flags
  1430. * @state_change: state change to broadcast
  1431. */
  1432. static void after_state_ch(struct drbd_device *device, union drbd_state os,
  1433. union drbd_state ns, enum chg_state_flags flags,
  1434. struct drbd_state_change *state_change)
  1435. {
  1436. struct drbd_resource *resource = device->resource;
  1437. struct drbd_peer_device *peer_device = first_peer_device(device);
  1438. struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
  1439. struct sib_info sib;
  1440. broadcast_state_change(state_change);
  1441. sib.sib_reason = SIB_STATE_CHANGE;
  1442. sib.os = os;
  1443. sib.ns = ns;
  1444. if ((os.disk != D_UP_TO_DATE || os.pdsk != D_UP_TO_DATE)
  1445. && (ns.disk == D_UP_TO_DATE && ns.pdsk == D_UP_TO_DATE)) {
  1446. clear_bit(CRASHED_PRIMARY, &device->flags);
  1447. if (device->p_uuid)
  1448. device->p_uuid[UI_FLAGS] &= ~((u64)2);
  1449. }
  1450. /* Inform userspace about the change... */
  1451. drbd_bcast_event(device, &sib);
  1452. if (!(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE) &&
  1453. (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE))
  1454. drbd_khelper(device, "pri-on-incon-degr");
  1455. /* Here we have the actions that are performed after a
  1456. state change. This function might sleep */
  1457. if (ns.susp_nod) {
  1458. enum drbd_req_event what = NOTHING;
  1459. spin_lock_irq(&device->resource->req_lock);
  1460. if (os.conn < C_CONNECTED && conn_lowest_conn(connection) >= C_CONNECTED)
  1461. what = RESEND;
  1462. if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) &&
  1463. conn_lowest_disk(connection) == D_UP_TO_DATE)
  1464. what = RESTART_FROZEN_DISK_IO;
  1465. if (resource->susp_nod && what != NOTHING) {
  1466. _tl_restart(connection, what);
  1467. _conn_request_state(connection,
  1468. (union drbd_state) { { .susp_nod = 1 } },
  1469. (union drbd_state) { { .susp_nod = 0 } },
  1470. CS_VERBOSE);
  1471. }
  1472. spin_unlock_irq(&device->resource->req_lock);
  1473. }
  1474. if (ns.susp_fen) {
  1475. spin_lock_irq(&device->resource->req_lock);
  1476. if (resource->susp_fen && conn_lowest_conn(connection) >= C_CONNECTED) {
  1477. /* case2: The connection was established again: */
  1478. struct drbd_peer_device *peer_device;
  1479. int vnr;
  1480. rcu_read_lock();
  1481. idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
  1482. clear_bit(NEW_CUR_UUID, &peer_device->device->flags);
  1483. rcu_read_unlock();
  1484. /* We should actively create a new uuid, _before_
  1485. * we resume/resent, if the peer is diskless
  1486. * (recovery from a multiple error scenario).
  1487. * Currently, this happens with a slight delay
  1488. * below when checking lost_contact_to_peer_data() ...
  1489. */
  1490. _tl_restart(connection, RESEND);
  1491. _conn_request_state(connection,
  1492. (union drbd_state) { { .susp_fen = 1 } },
  1493. (union drbd_state) { { .susp_fen = 0 } },
  1494. CS_VERBOSE);
  1495. }
  1496. spin_unlock_irq(&device->resource->req_lock);
  1497. }
  1498. /* Became sync source. With protocol >= 96, we still need to send out
  1499. * the sync uuid now. Need to do that before any drbd_send_state, or
  1500. * the other side may go "paused sync" before receiving the sync uuids,
  1501. * which is unexpected. */
  1502. if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) &&
  1503. (ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) &&
  1504. connection->agreed_pro_version >= 96 && get_ldev(device)) {
  1505. drbd_gen_and_send_sync_uuid(peer_device);
  1506. put_ldev(device);
  1507. }
  1508. /* Do not change the order of the if above and the two below... */
  1509. if (os.pdsk == D_DISKLESS &&
  1510. ns.pdsk > D_DISKLESS && ns.pdsk != D_UNKNOWN) { /* attach on the peer */
  1511. /* we probably will start a resync soon.
  1512. * make sure those things are properly reset. */
  1513. device->rs_total = 0;
  1514. device->rs_failed = 0;
  1515. atomic_set(&device->rs_pending_cnt, 0);
  1516. drbd_rs_cancel_all(device);
  1517. drbd_send_uuids(peer_device);
  1518. drbd_send_state(peer_device, ns);
  1519. }
  1520. /* No point in queuing send_bitmap if we don't have a connection
  1521. * anymore, so check also the _current_ state, not only the new state
  1522. * at the time this work was queued. */
  1523. if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S &&
  1524. device->state.conn == C_WF_BITMAP_S)
  1525. drbd_queue_bitmap_io(device, &drbd_send_bitmap, NULL,
  1526. "send_bitmap (WFBitMapS)",
  1527. BM_LOCKED_TEST_ALLOWED);
  1528. /* Lost contact to peer's copy of the data */
  1529. if (lost_contact_to_peer_data(os.pdsk, ns.pdsk)) {
  1530. if (get_ldev(device)) {
  1531. if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) &&
  1532. device->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
  1533. if (drbd_suspended(device)) {
  1534. set_bit(NEW_CUR_UUID, &device->flags);
  1535. } else {
  1536. drbd_uuid_new_current(device);
  1537. drbd_send_uuids(peer_device);
  1538. }
  1539. }
  1540. put_ldev(device);
  1541. }
  1542. }
  1543. if (ns.pdsk < D_INCONSISTENT && get_ldev(device)) {
  1544. if (os.peer != R_PRIMARY && ns.peer == R_PRIMARY &&
  1545. device->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
  1546. drbd_uuid_new_current(device);
  1547. drbd_send_uuids(peer_device);
  1548. }
  1549. /* D_DISKLESS Peer becomes secondary */
  1550. if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
  1551. /* We may still be Primary ourselves.
  1552. * No harm done if the bitmap still changes,
  1553. * redirtied pages will follow later. */
  1554. drbd_bitmap_io_from_worker(device, &drbd_bm_write,
  1555. "demote diskless peer", BM_LOCKED_SET_ALLOWED);
  1556. put_ldev(device);
  1557. }
  1558. /* Write out all changed bits on demote.
  1559. * Though, no need to da that just yet
  1560. * if there is a resync going on still */
  1561. if (os.role == R_PRIMARY && ns.role == R_SECONDARY &&
  1562. device->state.conn <= C_CONNECTED && get_ldev(device)) {
  1563. /* No changes to the bitmap expected this time, so assert that,
  1564. * even though no harm was done if it did change. */
  1565. drbd_bitmap_io_from_worker(device, &drbd_bm_write,
  1566. "demote", BM_LOCKED_TEST_ALLOWED);
  1567. put_ldev(device);
  1568. }
  1569. /* Last part of the attaching process ... */
  1570. if (ns.conn >= C_CONNECTED &&
  1571. os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) {
  1572. drbd_send_sizes(peer_device, 0, 0); /* to start sync... */
  1573. drbd_send_uuids(peer_device);
  1574. drbd_send_state(peer_device, ns);
  1575. }
  1576. /* We want to pause/continue resync, tell peer. */
  1577. if (ns.conn >= C_CONNECTED &&
  1578. ((os.aftr_isp != ns.aftr_isp) ||
  1579. (os.user_isp != ns.user_isp)))
  1580. drbd_send_state(peer_device, ns);
  1581. /* In case one of the isp bits got set, suspend other devices. */
  1582. if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) &&
  1583. (ns.aftr_isp || ns.peer_isp || ns.user_isp))
  1584. suspend_other_sg(device);
  1585. /* Make sure the peer gets informed about eventual state
  1586. changes (ISP bits) while we were in WFReportParams. */
  1587. if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED)
  1588. drbd_send_state(peer_device, ns);
  1589. if (os.conn != C_AHEAD && ns.conn == C_AHEAD)
  1590. drbd_send_state(peer_device, ns);
  1591. /* We are in the progress to start a full sync... */
  1592. if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
  1593. (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S))
  1594. /* no other bitmap changes expected during this phase */
  1595. drbd_queue_bitmap_io(device,
  1596. &drbd_bmio_set_n_write, &abw_start_sync,
  1597. "set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED);
  1598. /* first half of local IO error, failure to attach,
  1599. * or administrative detach */
  1600. if (os.disk != D_FAILED && ns.disk == D_FAILED) {
  1601. enum drbd_io_error_p eh = EP_PASS_ON;
  1602. int was_io_error = 0;
  1603. /* corresponding get_ldev was in _drbd_set_state, to serialize
  1604. * our cleanup here with the transition to D_DISKLESS.
  1605. * But is is still not save to dreference ldev here, since
  1606. * we might come from an failed Attach before ldev was set. */
  1607. if (device->ldev) {
  1608. rcu_read_lock();
  1609. eh = rcu_dereference(device->ldev->disk_conf)->on_io_error;
  1610. rcu_read_unlock();
  1611. was_io_error = test_and_clear_bit(WAS_IO_ERROR, &device->flags);
  1612. /* Intentionally call this handler first, before drbd_send_state().
  1613. * See: 2932204 drbd: call local-io-error handler early
  1614. * People may chose to hard-reset the box from this handler.
  1615. * It is useful if this looks like a "regular node crash". */
  1616. if (was_io_error && eh == EP_CALL_HELPER)
  1617. drbd_khelper(device, "local-io-error");
  1618. /* Immediately allow completion of all application IO,
  1619. * that waits for completion from the local disk,
  1620. * if this was a force-detach due to disk_timeout
  1621. * or administrator request (drbdsetup detach --force).
  1622. * Do NOT abort otherwise.
  1623. * Aborting local requests may cause serious problems,
  1624. * if requests are completed to upper layers already,
  1625. * and then later the already submitted local bio completes.
  1626. * This can cause DMA into former bio pages that meanwhile
  1627. * have been re-used for other things.
  1628. * So aborting local requests may cause crashes,
  1629. * or even worse, silent data corruption.
  1630. */
  1631. if (test_and_clear_bit(FORCE_DETACH, &device->flags))
  1632. tl_abort_disk_io(device);
  1633. /* current state still has to be D_FAILED,
  1634. * there is only one way out: to D_DISKLESS,
  1635. * and that may only happen after our put_ldev below. */
  1636. if (device->state.disk != D_FAILED)
  1637. drbd_err(device,
  1638. "ASSERT FAILED: disk is %s during detach\n",
  1639. drbd_disk_str(device->state.disk));
  1640. if (ns.conn >= C_CONNECTED)
  1641. drbd_send_state(peer_device, ns);
  1642. drbd_rs_cancel_all(device);
  1643. /* In case we want to get something to stable storage still,
  1644. * this may be the last chance.
  1645. * Following put_ldev may transition to D_DISKLESS. */
  1646. drbd_md_sync(device);
  1647. }
  1648. put_ldev(device);
  1649. }
  1650. /* second half of local IO error, failure to attach,
  1651. * or administrative detach,
  1652. * after local_cnt references have reached zero again */
  1653. if (os.disk != D_DISKLESS && ns.disk == D_DISKLESS) {
  1654. /* We must still be diskless,
  1655. * re-attach has to be serialized with this! */
  1656. if (device->state.disk != D_DISKLESS)
  1657. drbd_err(device,
  1658. "ASSERT FAILED: disk is %s while going diskless\n",
  1659. drbd_disk_str(device->state.disk));
  1660. if (ns.conn >= C_CONNECTED)
  1661. drbd_send_state(peer_device, ns);
  1662. /* corresponding get_ldev in __drbd_set_state
  1663. * this may finally trigger drbd_ldev_destroy. */
  1664. put_ldev(device);
  1665. }
  1666. /* Notify peer that I had a local IO error, and did not detached.. */
  1667. if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT && ns.conn >= C_CONNECTED)
  1668. drbd_send_state(peer_device, ns);
  1669. /* Disks got bigger while they were detached */
  1670. if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING &&
  1671. test_and_clear_bit(RESYNC_AFTER_NEG, &device->flags)) {
  1672. if (ns.conn == C_CONNECTED)
  1673. resync_after_online_grow(device);
  1674. }
  1675. /* A resync finished or aborted, wake paused devices... */
  1676. if ((os.conn > C_CONNECTED && ns.conn <= C_CONNECTED) ||
  1677. (os.peer_isp && !ns.peer_isp) ||
  1678. (os.user_isp && !ns.user_isp))
  1679. resume_next_sg(device);
  1680. /* sync target done with resync. Explicitly notify peer, even though
  1681. * it should (at least for non-empty resyncs) already know itself. */
  1682. if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED)
  1683. drbd_send_state(peer_device, ns);
  1684. /* Verify finished, or reached stop sector. Peer did not know about
  1685. * the stop sector, and we may even have changed the stop sector during
  1686. * verify to interrupt/stop early. Send the new state. */
  1687. if (os.conn == C_VERIFY_S && ns.conn == C_CONNECTED
  1688. && verify_can_do_stop_sector(device))
  1689. drbd_send_state(peer_device, ns);
  1690. /* This triggers bitmap writeout of potentially still unwritten pages
  1691. * if the resync finished cleanly, or aborted because of peer disk
  1692. * failure, or on transition from resync back to AHEAD/BEHIND.
  1693. *
  1694. * Connection loss is handled in drbd_disconnected() by the receiver.
  1695. *
  1696. * For resync aborted because of local disk failure, we cannot do
  1697. * any bitmap writeout anymore.
  1698. *
  1699. * No harm done if some bits change during this phase.
  1700. */
  1701. if ((os.conn > C_CONNECTED && os.conn < C_AHEAD) &&
  1702. (ns.conn == C_CONNECTED || ns.conn >= C_AHEAD) && get_ldev(device)) {
  1703. drbd_queue_bitmap_io(device, &drbd_bm_write_copy_pages, NULL,
  1704. "write from resync_finished", BM_LOCKED_CHANGE_ALLOWED);
  1705. put_ldev(device);
  1706. }
  1707. if (ns.disk == D_DISKLESS &&
  1708. ns.conn == C_STANDALONE &&
  1709. ns.role == R_SECONDARY) {
  1710. if (os.aftr_isp != ns.aftr_isp)
  1711. resume_next_sg(device);
  1712. }
  1713. drbd_md_sync(device);
  1714. }
  1715. struct after_conn_state_chg_work {
  1716. struct drbd_work w;
  1717. enum drbd_conns oc;
  1718. union drbd_state ns_min;
  1719. union drbd_state ns_max; /* new, max state, over all devices */
  1720. enum chg_state_flags flags;
  1721. struct drbd_connection *connection;
  1722. struct drbd_state_change *state_change;
  1723. };
  1724. static int w_after_conn_state_ch(struct drbd_work *w, int unused)
  1725. {
  1726. struct after_conn_state_chg_work *acscw =
  1727. container_of(w, struct after_conn_state_chg_work, w);
  1728. struct drbd_connection *connection = acscw->connection;
  1729. enum drbd_conns oc = acscw->oc;
  1730. union drbd_state ns_max = acscw->ns_max;
  1731. struct drbd_peer_device *peer_device;
  1732. int vnr;
  1733. broadcast_state_change(acscw->state_change);
  1734. forget_state_change(acscw->state_change);
  1735. kfree(acscw);
  1736. /* Upon network configuration, we need to start the receiver */
  1737. if (oc == C_STANDALONE && ns_max.conn == C_UNCONNECTED)
  1738. drbd_thread_start(&connection->receiver);
  1739. if (oc == C_DISCONNECTING && ns_max.conn == C_STANDALONE) {
  1740. struct net_conf *old_conf;
  1741. mutex_lock(&notification_mutex);
  1742. idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
  1743. notify_peer_device_state(NULL, 0, peer_device, NULL,
  1744. NOTIFY_DESTROY | NOTIFY_CONTINUES);
  1745. notify_connection_state(NULL, 0, connection, NULL, NOTIFY_DESTROY);
  1746. mutex_unlock(&notification_mutex);
  1747. mutex_lock(&connection->resource->conf_update);
  1748. old_conf = connection->net_conf;
  1749. connection->my_addr_len = 0;
  1750. connection->peer_addr_len = 0;
  1751. RCU_INIT_POINTER(connection->net_conf, NULL);
  1752. conn_free_crypto(connection);
  1753. mutex_unlock(&connection->resource->conf_update);
  1754. kvfree_rcu(old_conf);
  1755. }
  1756. if (ns_max.susp_fen) {
  1757. /* case1: The outdate peer handler is successful: */
  1758. if (ns_max.pdsk <= D_OUTDATED) {
  1759. rcu_read_lock();
  1760. idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
  1761. struct drbd_device *device = peer_device->device;
  1762. if (test_bit(NEW_CUR_UUID, &device->flags)) {
  1763. drbd_uuid_new_current(device);
  1764. clear_bit(NEW_CUR_UUID, &device->flags);
  1765. }
  1766. }
  1767. rcu_read_unlock();
  1768. spin_lock_irq(&connection->resource->req_lock);
  1769. _tl_restart(connection, CONNECTION_LOST_WHILE_PENDING);
  1770. _conn_request_state(connection,
  1771. (union drbd_state) { { .susp_fen = 1 } },
  1772. (union drbd_state) { { .susp_fen = 0 } },
  1773. CS_VERBOSE);
  1774. spin_unlock_irq(&connection->resource->req_lock);
  1775. }
  1776. }
  1777. conn_md_sync(connection);
  1778. kref_put(&connection->kref, drbd_destroy_connection);
  1779. return 0;
  1780. }
  1781. static void conn_old_common_state(struct drbd_connection *connection, union drbd_state *pcs, enum chg_state_flags *pf)
  1782. {
  1783. enum chg_state_flags flags = ~0;
  1784. struct drbd_peer_device *peer_device;
  1785. int vnr, first_vol = 1;
  1786. union drbd_dev_state os, cs = {
  1787. { .role = R_SECONDARY,
  1788. .peer = R_UNKNOWN,
  1789. .conn = connection->cstate,
  1790. .disk = D_DISKLESS,
  1791. .pdsk = D_UNKNOWN,
  1792. } };
  1793. rcu_read_lock();
  1794. idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
  1795. struct drbd_device *device = peer_device->device;
  1796. os = device->state;
  1797. if (first_vol) {
  1798. cs = os;
  1799. first_vol = 0;
  1800. continue;
  1801. }
  1802. if (cs.role != os.role)
  1803. flags &= ~CS_DC_ROLE;
  1804. if (cs.peer != os.peer)
  1805. flags &= ~CS_DC_PEER;
  1806. if (cs.conn != os.conn)
  1807. flags &= ~CS_DC_CONN;
  1808. if (cs.disk != os.disk)
  1809. flags &= ~CS_DC_DISK;
  1810. if (cs.pdsk != os.pdsk)
  1811. flags &= ~CS_DC_PDSK;
  1812. }
  1813. rcu_read_unlock();
  1814. *pf |= CS_DC_MASK;
  1815. *pf &= flags;
  1816. (*pcs).i = cs.i;
  1817. }
  1818. static enum drbd_state_rv
  1819. conn_is_valid_transition(struct drbd_connection *connection, union drbd_state mask, union drbd_state val,
  1820. enum chg_state_flags flags)
  1821. {
  1822. enum drbd_state_rv rv = SS_SUCCESS;
  1823. union drbd_state ns, os;
  1824. struct drbd_peer_device *peer_device;
  1825. int vnr;
  1826. rcu_read_lock();
  1827. idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
  1828. struct drbd_device *device = peer_device->device;
  1829. os = drbd_read_state(device);
  1830. ns = sanitize_state(device, os, apply_mask_val(os, mask, val), NULL);
  1831. if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED)
  1832. ns.disk = os.disk;
  1833. if (ns.i == os.i)
  1834. continue;
  1835. rv = is_valid_transition(os, ns);
  1836. if (rv >= SS_SUCCESS && !(flags & CS_HARD)) {
  1837. rv = is_valid_state(device, ns);
  1838. if (rv < SS_SUCCESS) {
  1839. if (is_valid_state(device, os) == rv)
  1840. rv = is_valid_soft_transition(os, ns, connection);
  1841. } else
  1842. rv = is_valid_soft_transition(os, ns, connection);
  1843. }
  1844. if (rv < SS_SUCCESS) {
  1845. if (flags & CS_VERBOSE)
  1846. print_st_err(device, os, ns, rv);
  1847. break;
  1848. }
  1849. }
  1850. rcu_read_unlock();
  1851. return rv;
  1852. }
  1853. static void
  1854. conn_set_state(struct drbd_connection *connection, union drbd_state mask, union drbd_state val,
  1855. union drbd_state *pns_min, union drbd_state *pns_max, enum chg_state_flags flags)
  1856. {
  1857. union drbd_state ns, os, ns_max = { };
  1858. union drbd_state ns_min = {
  1859. { .role = R_MASK,
  1860. .peer = R_MASK,
  1861. .conn = val.conn,
  1862. .disk = D_MASK,
  1863. .pdsk = D_MASK
  1864. } };
  1865. struct drbd_peer_device *peer_device;
  1866. enum drbd_state_rv rv;
  1867. int vnr, number_of_volumes = 0;
  1868. if (mask.conn == C_MASK) {
  1869. /* remember last connect time so request_timer_fn() won't
  1870. * kill newly established sessions while we are still trying to thaw
  1871. * previously frozen IO */
  1872. if (connection->cstate != C_WF_REPORT_PARAMS && val.conn == C_WF_REPORT_PARAMS)
  1873. connection->last_reconnect_jif = jiffies;
  1874. connection->cstate = val.conn;
  1875. }
  1876. rcu_read_lock();
  1877. idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
  1878. struct drbd_device *device = peer_device->device;
  1879. number_of_volumes++;
  1880. os = drbd_read_state(device);
  1881. ns = apply_mask_val(os, mask, val);
  1882. ns = sanitize_state(device, os, ns, NULL);
  1883. if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED)
  1884. ns.disk = os.disk;
  1885. rv = _drbd_set_state(device, ns, flags, NULL);
  1886. BUG_ON(rv < SS_SUCCESS);
  1887. ns.i = device->state.i;
  1888. ns_max.role = max_role(ns.role, ns_max.role);
  1889. ns_max.peer = max_role(ns.peer, ns_max.peer);
  1890. ns_max.conn = max_t(enum drbd_conns, ns.conn, ns_max.conn);
  1891. ns_max.disk = max_t(enum drbd_disk_state, ns.disk, ns_max.disk);
  1892. ns_max.pdsk = max_t(enum drbd_disk_state, ns.pdsk, ns_max.pdsk);
  1893. ns_min.role = min_role(ns.role, ns_min.role);
  1894. ns_min.peer = min_role(ns.peer, ns_min.peer);
  1895. ns_min.conn = min_t(enum drbd_conns, ns.conn, ns_min.conn);
  1896. ns_min.disk = min_t(enum drbd_disk_state, ns.disk, ns_min.disk);
  1897. ns_min.pdsk = min_t(enum drbd_disk_state, ns.pdsk, ns_min.pdsk);
  1898. }
  1899. rcu_read_unlock();
  1900. if (number_of_volumes == 0) {
  1901. ns_min = ns_max = (union drbd_state) { {
  1902. .role = R_SECONDARY,
  1903. .peer = R_UNKNOWN,
  1904. .conn = val.conn,
  1905. .disk = D_DISKLESS,
  1906. .pdsk = D_UNKNOWN
  1907. } };
  1908. }
  1909. ns_min.susp = ns_max.susp = connection->resource->susp;
  1910. ns_min.susp_nod = ns_max.susp_nod = connection->resource->susp_nod;
  1911. ns_min.susp_fen = ns_max.susp_fen = connection->resource->susp_fen;
  1912. *pns_min = ns_min;
  1913. *pns_max = ns_max;
  1914. }
  1915. static enum drbd_state_rv
  1916. _conn_rq_cond(struct drbd_connection *connection, union drbd_state mask, union drbd_state val)
  1917. {
  1918. enum drbd_state_rv err, rv = SS_UNKNOWN_ERROR; /* continue waiting */;
  1919. if (test_and_clear_bit(CONN_WD_ST_CHG_OKAY, &connection->flags))
  1920. rv = SS_CW_SUCCESS;
  1921. if (test_and_clear_bit(CONN_WD_ST_CHG_FAIL, &connection->flags))
  1922. rv = SS_CW_FAILED_BY_PEER;
  1923. err = conn_is_valid_transition(connection, mask, val, 0);
  1924. if (err == SS_SUCCESS && connection->cstate == C_WF_REPORT_PARAMS)
  1925. return rv;
  1926. return err;
  1927. }
  1928. enum drbd_state_rv
  1929. _conn_request_state(struct drbd_connection *connection, union drbd_state mask, union drbd_state val,
  1930. enum chg_state_flags flags)
  1931. {
  1932. enum drbd_state_rv rv = SS_SUCCESS;
  1933. struct after_conn_state_chg_work *acscw;
  1934. enum drbd_conns oc = connection->cstate;
  1935. union drbd_state ns_max, ns_min, os;
  1936. bool have_mutex = false;
  1937. struct drbd_state_change *state_change;
  1938. if (mask.conn) {
  1939. rv = is_valid_conn_transition(oc, val.conn);
  1940. if (rv < SS_SUCCESS)
  1941. goto abort;
  1942. }
  1943. rv = conn_is_valid_transition(connection, mask, val, flags);
  1944. if (rv < SS_SUCCESS)
  1945. goto abort;
  1946. if (oc == C_WF_REPORT_PARAMS && val.conn == C_DISCONNECTING &&
  1947. !(flags & (CS_LOCAL_ONLY | CS_HARD))) {
  1948. /* This will be a cluster-wide state change.
  1949. * Need to give up the spinlock, grab the mutex,
  1950. * then send the state change request, ... */
  1951. spin_unlock_irq(&connection->resource->req_lock);
  1952. mutex_lock(&connection->cstate_mutex);
  1953. have_mutex = true;
  1954. set_bit(CONN_WD_ST_CHG_REQ, &connection->flags);
  1955. if (conn_send_state_req(connection, mask, val)) {
  1956. /* sending failed. */
  1957. clear_bit(CONN_WD_ST_CHG_REQ, &connection->flags);
  1958. rv = SS_CW_FAILED_BY_PEER;
  1959. /* need to re-aquire the spin lock, though */
  1960. goto abort_unlocked;
  1961. }
  1962. if (val.conn == C_DISCONNECTING)
  1963. set_bit(DISCONNECT_SENT, &connection->flags);
  1964. /* ... and re-aquire the spinlock.
  1965. * If _conn_rq_cond() returned >= SS_SUCCESS, we must call
  1966. * conn_set_state() within the same spinlock. */
  1967. spin_lock_irq(&connection->resource->req_lock);
  1968. wait_event_lock_irq(connection->ping_wait,
  1969. (rv = _conn_rq_cond(connection, mask, val)),
  1970. connection->resource->req_lock);
  1971. clear_bit(CONN_WD_ST_CHG_REQ, &connection->flags);
  1972. if (rv < SS_SUCCESS)
  1973. goto abort;
  1974. }
  1975. state_change = remember_old_state(connection->resource, GFP_ATOMIC);
  1976. conn_old_common_state(connection, &os, &flags);
  1977. flags |= CS_DC_SUSP;
  1978. conn_set_state(connection, mask, val, &ns_min, &ns_max, flags);
  1979. conn_pr_state_change(connection, os, ns_max, flags);
  1980. remember_new_state(state_change);
  1981. acscw = kmalloc(sizeof(*acscw), GFP_ATOMIC);
  1982. if (acscw) {
  1983. acscw->oc = os.conn;
  1984. acscw->ns_min = ns_min;
  1985. acscw->ns_max = ns_max;
  1986. acscw->flags = flags;
  1987. acscw->w.cb = w_after_conn_state_ch;
  1988. kref_get(&connection->kref);
  1989. acscw->connection = connection;
  1990. acscw->state_change = state_change;
  1991. drbd_queue_work(&connection->sender_work, &acscw->w);
  1992. } else {
  1993. drbd_err(connection, "Could not kmalloc an acscw\n");
  1994. }
  1995. abort:
  1996. if (have_mutex) {
  1997. /* mutex_unlock() "... must not be used in interrupt context.",
  1998. * so give up the spinlock, then re-aquire it */
  1999. spin_unlock_irq(&connection->resource->req_lock);
  2000. abort_unlocked:
  2001. mutex_unlock(&connection->cstate_mutex);
  2002. spin_lock_irq(&connection->resource->req_lock);
  2003. }
  2004. if (rv < SS_SUCCESS && flags & CS_VERBOSE) {
  2005. drbd_err(connection, "State change failed: %s\n", drbd_set_st_err_str(rv));
  2006. drbd_err(connection, " mask = 0x%x val = 0x%x\n", mask.i, val.i);
  2007. drbd_err(connection, " old_conn:%s wanted_conn:%s\n", drbd_conn_str(oc), drbd_conn_str(val.conn));
  2008. }
  2009. return rv;
  2010. }
  2011. enum drbd_state_rv
  2012. conn_request_state(struct drbd_connection *connection, union drbd_state mask, union drbd_state val,
  2013. enum chg_state_flags flags)
  2014. {
  2015. enum drbd_state_rv rv;
  2016. spin_lock_irq(&connection->resource->req_lock);
  2017. rv = _conn_request_state(connection, mask, val, flags);
  2018. spin_unlock_irq(&connection->resource->req_lock);
  2019. return rv;
  2020. }