kcmsock.c 45 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Kernel Connection Multiplexor
  4. *
  5. * Copyright (c) 2016 Tom Herbert <[email protected]>
  6. */
  7. #include <linux/bpf.h>
  8. #include <linux/errno.h>
  9. #include <linux/errqueue.h>
  10. #include <linux/file.h>
  11. #include <linux/filter.h>
  12. #include <linux/in.h>
  13. #include <linux/kernel.h>
  14. #include <linux/module.h>
  15. #include <linux/net.h>
  16. #include <linux/netdevice.h>
  17. #include <linux/poll.h>
  18. #include <linux/rculist.h>
  19. #include <linux/skbuff.h>
  20. #include <linux/socket.h>
  21. #include <linux/uaccess.h>
  22. #include <linux/workqueue.h>
  23. #include <linux/syscalls.h>
  24. #include <linux/sched/signal.h>
  25. #include <net/kcm.h>
  26. #include <net/netns/generic.h>
  27. #include <net/sock.h>
  28. #include <uapi/linux/kcm.h>
  29. unsigned int kcm_net_id;
  30. static struct kmem_cache *kcm_psockp __read_mostly;
  31. static struct kmem_cache *kcm_muxp __read_mostly;
  32. static struct workqueue_struct *kcm_wq;
  33. static inline struct kcm_sock *kcm_sk(const struct sock *sk)
  34. {
  35. return (struct kcm_sock *)sk;
  36. }
  37. static inline struct kcm_tx_msg *kcm_tx_msg(struct sk_buff *skb)
  38. {
  39. return (struct kcm_tx_msg *)skb->cb;
  40. }
  41. static void report_csk_error(struct sock *csk, int err)
  42. {
  43. csk->sk_err = EPIPE;
  44. sk_error_report(csk);
  45. }
  46. static void kcm_abort_tx_psock(struct kcm_psock *psock, int err,
  47. bool wakeup_kcm)
  48. {
  49. struct sock *csk = psock->sk;
  50. struct kcm_mux *mux = psock->mux;
  51. /* Unrecoverable error in transmit */
  52. spin_lock_bh(&mux->lock);
  53. if (psock->tx_stopped) {
  54. spin_unlock_bh(&mux->lock);
  55. return;
  56. }
  57. psock->tx_stopped = 1;
  58. KCM_STATS_INCR(psock->stats.tx_aborts);
  59. if (!psock->tx_kcm) {
  60. /* Take off psocks_avail list */
  61. list_del(&psock->psock_avail_list);
  62. } else if (wakeup_kcm) {
  63. /* In this case psock is being aborted while outside of
  64. * write_msgs and psock is reserved. Schedule tx_work
  65. * to handle the failure there. Need to commit tx_stopped
  66. * before queuing work.
  67. */
  68. smp_mb();
  69. queue_work(kcm_wq, &psock->tx_kcm->tx_work);
  70. }
  71. spin_unlock_bh(&mux->lock);
  72. /* Report error on lower socket */
  73. report_csk_error(csk, err);
  74. }
  75. /* RX mux lock held. */
  76. static void kcm_update_rx_mux_stats(struct kcm_mux *mux,
  77. struct kcm_psock *psock)
  78. {
  79. STRP_STATS_ADD(mux->stats.rx_bytes,
  80. psock->strp.stats.bytes -
  81. psock->saved_rx_bytes);
  82. mux->stats.rx_msgs +=
  83. psock->strp.stats.msgs - psock->saved_rx_msgs;
  84. psock->saved_rx_msgs = psock->strp.stats.msgs;
  85. psock->saved_rx_bytes = psock->strp.stats.bytes;
  86. }
  87. static void kcm_update_tx_mux_stats(struct kcm_mux *mux,
  88. struct kcm_psock *psock)
  89. {
  90. KCM_STATS_ADD(mux->stats.tx_bytes,
  91. psock->stats.tx_bytes - psock->saved_tx_bytes);
  92. mux->stats.tx_msgs +=
  93. psock->stats.tx_msgs - psock->saved_tx_msgs;
  94. psock->saved_tx_msgs = psock->stats.tx_msgs;
  95. psock->saved_tx_bytes = psock->stats.tx_bytes;
  96. }
  97. static int kcm_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
  98. /* KCM is ready to receive messages on its queue-- either the KCM is new or
  99. * has become unblocked after being blocked on full socket buffer. Queue any
  100. * pending ready messages on a psock. RX mux lock held.
  101. */
  102. static void kcm_rcv_ready(struct kcm_sock *kcm)
  103. {
  104. struct kcm_mux *mux = kcm->mux;
  105. struct kcm_psock *psock;
  106. struct sk_buff *skb;
  107. if (unlikely(kcm->rx_wait || kcm->rx_psock || kcm->rx_disabled))
  108. return;
  109. while (unlikely((skb = __skb_dequeue(&mux->rx_hold_queue)))) {
  110. if (kcm_queue_rcv_skb(&kcm->sk, skb)) {
  111. /* Assuming buffer limit has been reached */
  112. skb_queue_head(&mux->rx_hold_queue, skb);
  113. WARN_ON(!sk_rmem_alloc_get(&kcm->sk));
  114. return;
  115. }
  116. }
  117. while (!list_empty(&mux->psocks_ready)) {
  118. psock = list_first_entry(&mux->psocks_ready, struct kcm_psock,
  119. psock_ready_list);
  120. if (kcm_queue_rcv_skb(&kcm->sk, psock->ready_rx_msg)) {
  121. /* Assuming buffer limit has been reached */
  122. WARN_ON(!sk_rmem_alloc_get(&kcm->sk));
  123. return;
  124. }
  125. /* Consumed the ready message on the psock. Schedule rx_work to
  126. * get more messages.
  127. */
  128. list_del(&psock->psock_ready_list);
  129. psock->ready_rx_msg = NULL;
  130. /* Commit clearing of ready_rx_msg for queuing work */
  131. smp_mb();
  132. strp_unpause(&psock->strp);
  133. strp_check_rcv(&psock->strp);
  134. }
  135. /* Buffer limit is okay now, add to ready list */
  136. list_add_tail(&kcm->wait_rx_list,
  137. &kcm->mux->kcm_rx_waiters);
  138. /* paired with lockless reads in kcm_rfree() */
  139. WRITE_ONCE(kcm->rx_wait, true);
  140. }
  141. static void kcm_rfree(struct sk_buff *skb)
  142. {
  143. struct sock *sk = skb->sk;
  144. struct kcm_sock *kcm = kcm_sk(sk);
  145. struct kcm_mux *mux = kcm->mux;
  146. unsigned int len = skb->truesize;
  147. sk_mem_uncharge(sk, len);
  148. atomic_sub(len, &sk->sk_rmem_alloc);
  149. /* For reading rx_wait and rx_psock without holding lock */
  150. smp_mb__after_atomic();
  151. if (!READ_ONCE(kcm->rx_wait) && !READ_ONCE(kcm->rx_psock) &&
  152. sk_rmem_alloc_get(sk) < sk->sk_rcvlowat) {
  153. spin_lock_bh(&mux->rx_lock);
  154. kcm_rcv_ready(kcm);
  155. spin_unlock_bh(&mux->rx_lock);
  156. }
  157. }
  158. static int kcm_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
  159. {
  160. struct sk_buff_head *list = &sk->sk_receive_queue;
  161. if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
  162. return -ENOMEM;
  163. if (!sk_rmem_schedule(sk, skb, skb->truesize))
  164. return -ENOBUFS;
  165. skb->dev = NULL;
  166. skb_orphan(skb);
  167. skb->sk = sk;
  168. skb->destructor = kcm_rfree;
  169. atomic_add(skb->truesize, &sk->sk_rmem_alloc);
  170. sk_mem_charge(sk, skb->truesize);
  171. skb_queue_tail(list, skb);
  172. if (!sock_flag(sk, SOCK_DEAD))
  173. sk->sk_data_ready(sk);
  174. return 0;
  175. }
  176. /* Requeue received messages for a kcm socket to other kcm sockets. This is
  177. * called with a kcm socket is receive disabled.
  178. * RX mux lock held.
  179. */
  180. static void requeue_rx_msgs(struct kcm_mux *mux, struct sk_buff_head *head)
  181. {
  182. struct sk_buff *skb;
  183. struct kcm_sock *kcm;
  184. while ((skb = skb_dequeue(head))) {
  185. /* Reset destructor to avoid calling kcm_rcv_ready */
  186. skb->destructor = sock_rfree;
  187. skb_orphan(skb);
  188. try_again:
  189. if (list_empty(&mux->kcm_rx_waiters)) {
  190. skb_queue_tail(&mux->rx_hold_queue, skb);
  191. continue;
  192. }
  193. kcm = list_first_entry(&mux->kcm_rx_waiters,
  194. struct kcm_sock, wait_rx_list);
  195. if (kcm_queue_rcv_skb(&kcm->sk, skb)) {
  196. /* Should mean socket buffer full */
  197. list_del(&kcm->wait_rx_list);
  198. /* paired with lockless reads in kcm_rfree() */
  199. WRITE_ONCE(kcm->rx_wait, false);
  200. /* Commit rx_wait to read in kcm_free */
  201. smp_wmb();
  202. goto try_again;
  203. }
  204. }
  205. }
  206. /* Lower sock lock held */
  207. static struct kcm_sock *reserve_rx_kcm(struct kcm_psock *psock,
  208. struct sk_buff *head)
  209. {
  210. struct kcm_mux *mux = psock->mux;
  211. struct kcm_sock *kcm;
  212. WARN_ON(psock->ready_rx_msg);
  213. if (psock->rx_kcm)
  214. return psock->rx_kcm;
  215. spin_lock_bh(&mux->rx_lock);
  216. if (psock->rx_kcm) {
  217. spin_unlock_bh(&mux->rx_lock);
  218. return psock->rx_kcm;
  219. }
  220. kcm_update_rx_mux_stats(mux, psock);
  221. if (list_empty(&mux->kcm_rx_waiters)) {
  222. psock->ready_rx_msg = head;
  223. strp_pause(&psock->strp);
  224. list_add_tail(&psock->psock_ready_list,
  225. &mux->psocks_ready);
  226. spin_unlock_bh(&mux->rx_lock);
  227. return NULL;
  228. }
  229. kcm = list_first_entry(&mux->kcm_rx_waiters,
  230. struct kcm_sock, wait_rx_list);
  231. list_del(&kcm->wait_rx_list);
  232. /* paired with lockless reads in kcm_rfree() */
  233. WRITE_ONCE(kcm->rx_wait, false);
  234. psock->rx_kcm = kcm;
  235. /* paired with lockless reads in kcm_rfree() */
  236. WRITE_ONCE(kcm->rx_psock, psock);
  237. spin_unlock_bh(&mux->rx_lock);
  238. return kcm;
  239. }
  240. static void kcm_done(struct kcm_sock *kcm);
  241. static void kcm_done_work(struct work_struct *w)
  242. {
  243. kcm_done(container_of(w, struct kcm_sock, done_work));
  244. }
  245. /* Lower sock held */
  246. static void unreserve_rx_kcm(struct kcm_psock *psock,
  247. bool rcv_ready)
  248. {
  249. struct kcm_sock *kcm = psock->rx_kcm;
  250. struct kcm_mux *mux = psock->mux;
  251. if (!kcm)
  252. return;
  253. spin_lock_bh(&mux->rx_lock);
  254. psock->rx_kcm = NULL;
  255. /* paired with lockless reads in kcm_rfree() */
  256. WRITE_ONCE(kcm->rx_psock, NULL);
  257. /* Commit kcm->rx_psock before sk_rmem_alloc_get to sync with
  258. * kcm_rfree
  259. */
  260. smp_mb();
  261. if (unlikely(kcm->done)) {
  262. spin_unlock_bh(&mux->rx_lock);
  263. /* Need to run kcm_done in a task since we need to qcquire
  264. * callback locks which may already be held here.
  265. */
  266. INIT_WORK(&kcm->done_work, kcm_done_work);
  267. schedule_work(&kcm->done_work);
  268. return;
  269. }
  270. if (unlikely(kcm->rx_disabled)) {
  271. requeue_rx_msgs(mux, &kcm->sk.sk_receive_queue);
  272. } else if (rcv_ready || unlikely(!sk_rmem_alloc_get(&kcm->sk))) {
  273. /* Check for degenerative race with rx_wait that all
  274. * data was dequeued (accounted for in kcm_rfree).
  275. */
  276. kcm_rcv_ready(kcm);
  277. }
  278. spin_unlock_bh(&mux->rx_lock);
  279. }
  280. /* Lower sock lock held */
  281. static void psock_data_ready(struct sock *sk)
  282. {
  283. struct kcm_psock *psock;
  284. read_lock_bh(&sk->sk_callback_lock);
  285. psock = (struct kcm_psock *)sk->sk_user_data;
  286. if (likely(psock))
  287. strp_data_ready(&psock->strp);
  288. read_unlock_bh(&sk->sk_callback_lock);
  289. }
  290. /* Called with lower sock held */
  291. static void kcm_rcv_strparser(struct strparser *strp, struct sk_buff *skb)
  292. {
  293. struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp);
  294. struct kcm_sock *kcm;
  295. try_queue:
  296. kcm = reserve_rx_kcm(psock, skb);
  297. if (!kcm) {
  298. /* Unable to reserve a KCM, message is held in psock and strp
  299. * is paused.
  300. */
  301. return;
  302. }
  303. if (kcm_queue_rcv_skb(&kcm->sk, skb)) {
  304. /* Should mean socket buffer full */
  305. unreserve_rx_kcm(psock, false);
  306. goto try_queue;
  307. }
  308. }
  309. static int kcm_parse_func_strparser(struct strparser *strp, struct sk_buff *skb)
  310. {
  311. struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp);
  312. struct bpf_prog *prog = psock->bpf_prog;
  313. int res;
  314. res = bpf_prog_run_pin_on_cpu(prog, skb);
  315. return res;
  316. }
  317. static int kcm_read_sock_done(struct strparser *strp, int err)
  318. {
  319. struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp);
  320. unreserve_rx_kcm(psock, true);
  321. return err;
  322. }
  323. static void psock_state_change(struct sock *sk)
  324. {
  325. /* TCP only does a EPOLLIN for a half close. Do a EPOLLHUP here
  326. * since application will normally not poll with EPOLLIN
  327. * on the TCP sockets.
  328. */
  329. report_csk_error(sk, EPIPE);
  330. }
  331. static void psock_write_space(struct sock *sk)
  332. {
  333. struct kcm_psock *psock;
  334. struct kcm_mux *mux;
  335. struct kcm_sock *kcm;
  336. read_lock_bh(&sk->sk_callback_lock);
  337. psock = (struct kcm_psock *)sk->sk_user_data;
  338. if (unlikely(!psock))
  339. goto out;
  340. mux = psock->mux;
  341. spin_lock_bh(&mux->lock);
  342. /* Check if the socket is reserved so someone is waiting for sending. */
  343. kcm = psock->tx_kcm;
  344. if (kcm && !unlikely(kcm->tx_stopped))
  345. queue_work(kcm_wq, &kcm->tx_work);
  346. spin_unlock_bh(&mux->lock);
  347. out:
  348. read_unlock_bh(&sk->sk_callback_lock);
  349. }
  350. static void unreserve_psock(struct kcm_sock *kcm);
  351. /* kcm sock is locked. */
  352. static struct kcm_psock *reserve_psock(struct kcm_sock *kcm)
  353. {
  354. struct kcm_mux *mux = kcm->mux;
  355. struct kcm_psock *psock;
  356. psock = kcm->tx_psock;
  357. smp_rmb(); /* Must read tx_psock before tx_wait */
  358. if (psock) {
  359. WARN_ON(kcm->tx_wait);
  360. if (unlikely(psock->tx_stopped))
  361. unreserve_psock(kcm);
  362. else
  363. return kcm->tx_psock;
  364. }
  365. spin_lock_bh(&mux->lock);
  366. /* Check again under lock to see if psock was reserved for this
  367. * psock via psock_unreserve.
  368. */
  369. psock = kcm->tx_psock;
  370. if (unlikely(psock)) {
  371. WARN_ON(kcm->tx_wait);
  372. spin_unlock_bh(&mux->lock);
  373. return kcm->tx_psock;
  374. }
  375. if (!list_empty(&mux->psocks_avail)) {
  376. psock = list_first_entry(&mux->psocks_avail,
  377. struct kcm_psock,
  378. psock_avail_list);
  379. list_del(&psock->psock_avail_list);
  380. if (kcm->tx_wait) {
  381. list_del(&kcm->wait_psock_list);
  382. kcm->tx_wait = false;
  383. }
  384. kcm->tx_psock = psock;
  385. psock->tx_kcm = kcm;
  386. KCM_STATS_INCR(psock->stats.reserved);
  387. } else if (!kcm->tx_wait) {
  388. list_add_tail(&kcm->wait_psock_list,
  389. &mux->kcm_tx_waiters);
  390. kcm->tx_wait = true;
  391. }
  392. spin_unlock_bh(&mux->lock);
  393. return psock;
  394. }
  395. /* mux lock held */
  396. static void psock_now_avail(struct kcm_psock *psock)
  397. {
  398. struct kcm_mux *mux = psock->mux;
  399. struct kcm_sock *kcm;
  400. if (list_empty(&mux->kcm_tx_waiters)) {
  401. list_add_tail(&psock->psock_avail_list,
  402. &mux->psocks_avail);
  403. } else {
  404. kcm = list_first_entry(&mux->kcm_tx_waiters,
  405. struct kcm_sock,
  406. wait_psock_list);
  407. list_del(&kcm->wait_psock_list);
  408. kcm->tx_wait = false;
  409. psock->tx_kcm = kcm;
  410. /* Commit before changing tx_psock since that is read in
  411. * reserve_psock before queuing work.
  412. */
  413. smp_mb();
  414. kcm->tx_psock = psock;
  415. KCM_STATS_INCR(psock->stats.reserved);
  416. queue_work(kcm_wq, &kcm->tx_work);
  417. }
  418. }
  419. /* kcm sock is locked. */
  420. static void unreserve_psock(struct kcm_sock *kcm)
  421. {
  422. struct kcm_psock *psock;
  423. struct kcm_mux *mux = kcm->mux;
  424. spin_lock_bh(&mux->lock);
  425. psock = kcm->tx_psock;
  426. if (WARN_ON(!psock)) {
  427. spin_unlock_bh(&mux->lock);
  428. return;
  429. }
  430. smp_rmb(); /* Read tx_psock before tx_wait */
  431. kcm_update_tx_mux_stats(mux, psock);
  432. WARN_ON(kcm->tx_wait);
  433. kcm->tx_psock = NULL;
  434. psock->tx_kcm = NULL;
  435. KCM_STATS_INCR(psock->stats.unreserved);
  436. if (unlikely(psock->tx_stopped)) {
  437. if (psock->done) {
  438. /* Deferred free */
  439. list_del(&psock->psock_list);
  440. mux->psocks_cnt--;
  441. sock_put(psock->sk);
  442. fput(psock->sk->sk_socket->file);
  443. kmem_cache_free(kcm_psockp, psock);
  444. }
  445. /* Don't put back on available list */
  446. spin_unlock_bh(&mux->lock);
  447. return;
  448. }
  449. psock_now_avail(psock);
  450. spin_unlock_bh(&mux->lock);
  451. }
  452. static void kcm_report_tx_retry(struct kcm_sock *kcm)
  453. {
  454. struct kcm_mux *mux = kcm->mux;
  455. spin_lock_bh(&mux->lock);
  456. KCM_STATS_INCR(mux->stats.tx_retries);
  457. spin_unlock_bh(&mux->lock);
  458. }
  459. /* Write any messages ready on the kcm socket. Called with kcm sock lock
  460. * held. Return bytes actually sent or error.
  461. */
  462. static int kcm_write_msgs(struct kcm_sock *kcm)
  463. {
  464. struct sock *sk = &kcm->sk;
  465. struct kcm_psock *psock;
  466. struct sk_buff *skb, *head;
  467. struct kcm_tx_msg *txm;
  468. unsigned short fragidx, frag_offset;
  469. unsigned int sent, total_sent = 0;
  470. int ret = 0;
  471. kcm->tx_wait_more = false;
  472. psock = kcm->tx_psock;
  473. if (unlikely(psock && psock->tx_stopped)) {
  474. /* A reserved psock was aborted asynchronously. Unreserve
  475. * it and we'll retry the message.
  476. */
  477. unreserve_psock(kcm);
  478. kcm_report_tx_retry(kcm);
  479. if (skb_queue_empty(&sk->sk_write_queue))
  480. return 0;
  481. kcm_tx_msg(skb_peek(&sk->sk_write_queue))->sent = 0;
  482. } else if (skb_queue_empty(&sk->sk_write_queue)) {
  483. return 0;
  484. }
  485. head = skb_peek(&sk->sk_write_queue);
  486. txm = kcm_tx_msg(head);
  487. if (txm->sent) {
  488. /* Send of first skbuff in queue already in progress */
  489. if (WARN_ON(!psock)) {
  490. ret = -EINVAL;
  491. goto out;
  492. }
  493. sent = txm->sent;
  494. frag_offset = txm->frag_offset;
  495. fragidx = txm->fragidx;
  496. skb = txm->frag_skb;
  497. goto do_frag;
  498. }
  499. try_again:
  500. psock = reserve_psock(kcm);
  501. if (!psock)
  502. goto out;
  503. do {
  504. skb = head;
  505. txm = kcm_tx_msg(head);
  506. sent = 0;
  507. do_frag_list:
  508. if (WARN_ON(!skb_shinfo(skb)->nr_frags)) {
  509. ret = -EINVAL;
  510. goto out;
  511. }
  512. for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags;
  513. fragidx++) {
  514. skb_frag_t *frag;
  515. frag_offset = 0;
  516. do_frag:
  517. frag = &skb_shinfo(skb)->frags[fragidx];
  518. if (WARN_ON(!skb_frag_size(frag))) {
  519. ret = -EINVAL;
  520. goto out;
  521. }
  522. ret = kernel_sendpage(psock->sk->sk_socket,
  523. skb_frag_page(frag),
  524. skb_frag_off(frag) + frag_offset,
  525. skb_frag_size(frag) - frag_offset,
  526. MSG_DONTWAIT);
  527. if (ret <= 0) {
  528. if (ret == -EAGAIN) {
  529. /* Save state to try again when there's
  530. * write space on the socket
  531. */
  532. txm->sent = sent;
  533. txm->frag_offset = frag_offset;
  534. txm->fragidx = fragidx;
  535. txm->frag_skb = skb;
  536. ret = 0;
  537. goto out;
  538. }
  539. /* Hard failure in sending message, abort this
  540. * psock since it has lost framing
  541. * synchronization and retry sending the
  542. * message from the beginning.
  543. */
  544. kcm_abort_tx_psock(psock, ret ? -ret : EPIPE,
  545. true);
  546. unreserve_psock(kcm);
  547. txm->sent = 0;
  548. kcm_report_tx_retry(kcm);
  549. ret = 0;
  550. goto try_again;
  551. }
  552. sent += ret;
  553. frag_offset += ret;
  554. KCM_STATS_ADD(psock->stats.tx_bytes, ret);
  555. if (frag_offset < skb_frag_size(frag)) {
  556. /* Not finished with this frag */
  557. goto do_frag;
  558. }
  559. }
  560. if (skb == head) {
  561. if (skb_has_frag_list(skb)) {
  562. skb = skb_shinfo(skb)->frag_list;
  563. goto do_frag_list;
  564. }
  565. } else if (skb->next) {
  566. skb = skb->next;
  567. goto do_frag_list;
  568. }
  569. /* Successfully sent the whole packet, account for it. */
  570. skb_dequeue(&sk->sk_write_queue);
  571. kfree_skb(head);
  572. sk->sk_wmem_queued -= sent;
  573. total_sent += sent;
  574. KCM_STATS_INCR(psock->stats.tx_msgs);
  575. } while ((head = skb_peek(&sk->sk_write_queue)));
  576. out:
  577. if (!head) {
  578. /* Done with all queued messages. */
  579. WARN_ON(!skb_queue_empty(&sk->sk_write_queue));
  580. unreserve_psock(kcm);
  581. }
  582. /* Check if write space is available */
  583. sk->sk_write_space(sk);
  584. return total_sent ? : ret;
  585. }
  586. static void kcm_tx_work(struct work_struct *w)
  587. {
  588. struct kcm_sock *kcm = container_of(w, struct kcm_sock, tx_work);
  589. struct sock *sk = &kcm->sk;
  590. int err;
  591. lock_sock(sk);
  592. /* Primarily for SOCK_DGRAM sockets, also handle asynchronous tx
  593. * aborts
  594. */
  595. err = kcm_write_msgs(kcm);
  596. if (err < 0) {
  597. /* Hard failure in write, report error on KCM socket */
  598. pr_warn("KCM: Hard failure on kcm_write_msgs %d\n", err);
  599. report_csk_error(&kcm->sk, -err);
  600. goto out;
  601. }
  602. /* Primarily for SOCK_SEQPACKET sockets */
  603. if (likely(sk->sk_socket) &&
  604. test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
  605. clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
  606. sk->sk_write_space(sk);
  607. }
  608. out:
  609. release_sock(sk);
  610. }
  611. static void kcm_push(struct kcm_sock *kcm)
  612. {
  613. if (kcm->tx_wait_more)
  614. kcm_write_msgs(kcm);
  615. }
  616. static ssize_t kcm_sendpage(struct socket *sock, struct page *page,
  617. int offset, size_t size, int flags)
  618. {
  619. struct sock *sk = sock->sk;
  620. struct kcm_sock *kcm = kcm_sk(sk);
  621. struct sk_buff *skb = NULL, *head = NULL;
  622. long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
  623. bool eor;
  624. int err = 0;
  625. int i;
  626. if (flags & MSG_SENDPAGE_NOTLAST)
  627. flags |= MSG_MORE;
  628. /* No MSG_EOR from splice, only look at MSG_MORE */
  629. eor = !(flags & MSG_MORE);
  630. lock_sock(sk);
  631. sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
  632. err = -EPIPE;
  633. if (sk->sk_err)
  634. goto out_error;
  635. if (kcm->seq_skb) {
  636. /* Previously opened message */
  637. head = kcm->seq_skb;
  638. skb = kcm_tx_msg(head)->last_skb;
  639. i = skb_shinfo(skb)->nr_frags;
  640. if (skb_can_coalesce(skb, i, page, offset)) {
  641. skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], size);
  642. skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG;
  643. goto coalesced;
  644. }
  645. if (i >= MAX_SKB_FRAGS) {
  646. struct sk_buff *tskb;
  647. tskb = alloc_skb(0, sk->sk_allocation);
  648. while (!tskb) {
  649. kcm_push(kcm);
  650. err = sk_stream_wait_memory(sk, &timeo);
  651. if (err)
  652. goto out_error;
  653. }
  654. if (head == skb)
  655. skb_shinfo(head)->frag_list = tskb;
  656. else
  657. skb->next = tskb;
  658. skb = tskb;
  659. skb->ip_summed = CHECKSUM_UNNECESSARY;
  660. i = 0;
  661. }
  662. } else {
  663. /* Call the sk_stream functions to manage the sndbuf mem. */
  664. if (!sk_stream_memory_free(sk)) {
  665. kcm_push(kcm);
  666. set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
  667. err = sk_stream_wait_memory(sk, &timeo);
  668. if (err)
  669. goto out_error;
  670. }
  671. head = alloc_skb(0, sk->sk_allocation);
  672. while (!head) {
  673. kcm_push(kcm);
  674. err = sk_stream_wait_memory(sk, &timeo);
  675. if (err)
  676. goto out_error;
  677. }
  678. skb = head;
  679. i = 0;
  680. }
  681. get_page(page);
  682. skb_fill_page_desc_noacc(skb, i, page, offset, size);
  683. skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG;
  684. coalesced:
  685. skb->len += size;
  686. skb->data_len += size;
  687. skb->truesize += size;
  688. sk->sk_wmem_queued += size;
  689. sk_mem_charge(sk, size);
  690. if (head != skb) {
  691. head->len += size;
  692. head->data_len += size;
  693. head->truesize += size;
  694. }
  695. if (eor) {
  696. bool not_busy = skb_queue_empty(&sk->sk_write_queue);
  697. /* Message complete, queue it on send buffer */
  698. __skb_queue_tail(&sk->sk_write_queue, head);
  699. kcm->seq_skb = NULL;
  700. KCM_STATS_INCR(kcm->stats.tx_msgs);
  701. if (flags & MSG_BATCH) {
  702. kcm->tx_wait_more = true;
  703. } else if (kcm->tx_wait_more || not_busy) {
  704. err = kcm_write_msgs(kcm);
  705. if (err < 0) {
  706. /* We got a hard error in write_msgs but have
  707. * already queued this message. Report an error
  708. * in the socket, but don't affect return value
  709. * from sendmsg
  710. */
  711. pr_warn("KCM: Hard failure on kcm_write_msgs\n");
  712. report_csk_error(&kcm->sk, -err);
  713. }
  714. }
  715. } else {
  716. /* Message not complete, save state */
  717. kcm->seq_skb = head;
  718. kcm_tx_msg(head)->last_skb = skb;
  719. }
  720. KCM_STATS_ADD(kcm->stats.tx_bytes, size);
  721. release_sock(sk);
  722. return size;
  723. out_error:
  724. kcm_push(kcm);
  725. err = sk_stream_error(sk, flags, err);
  726. /* make sure we wake any epoll edge trigger waiter */
  727. if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN))
  728. sk->sk_write_space(sk);
  729. release_sock(sk);
  730. return err;
  731. }
  732. static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
  733. {
  734. struct sock *sk = sock->sk;
  735. struct kcm_sock *kcm = kcm_sk(sk);
  736. struct sk_buff *skb = NULL, *head = NULL;
  737. size_t copy, copied = 0;
  738. long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
  739. int eor = (sock->type == SOCK_DGRAM) ?
  740. !(msg->msg_flags & MSG_MORE) : !!(msg->msg_flags & MSG_EOR);
  741. int err = -EPIPE;
  742. lock_sock(sk);
  743. /* Per tcp_sendmsg this should be in poll */
  744. sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
  745. if (sk->sk_err)
  746. goto out_error;
  747. if (kcm->seq_skb) {
  748. /* Previously opened message */
  749. head = kcm->seq_skb;
  750. skb = kcm_tx_msg(head)->last_skb;
  751. goto start;
  752. }
  753. /* Call the sk_stream functions to manage the sndbuf mem. */
  754. if (!sk_stream_memory_free(sk)) {
  755. kcm_push(kcm);
  756. set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
  757. err = sk_stream_wait_memory(sk, &timeo);
  758. if (err)
  759. goto out_error;
  760. }
  761. if (msg_data_left(msg)) {
  762. /* New message, alloc head skb */
  763. head = alloc_skb(0, sk->sk_allocation);
  764. while (!head) {
  765. kcm_push(kcm);
  766. err = sk_stream_wait_memory(sk, &timeo);
  767. if (err)
  768. goto out_error;
  769. head = alloc_skb(0, sk->sk_allocation);
  770. }
  771. skb = head;
  772. /* Set ip_summed to CHECKSUM_UNNECESSARY to avoid calling
  773. * csum_and_copy_from_iter from skb_do_copy_data_nocache.
  774. */
  775. skb->ip_summed = CHECKSUM_UNNECESSARY;
  776. }
  777. start:
  778. while (msg_data_left(msg)) {
  779. bool merge = true;
  780. int i = skb_shinfo(skb)->nr_frags;
  781. struct page_frag *pfrag = sk_page_frag(sk);
  782. if (!sk_page_frag_refill(sk, pfrag))
  783. goto wait_for_memory;
  784. if (!skb_can_coalesce(skb, i, pfrag->page,
  785. pfrag->offset)) {
  786. if (i == MAX_SKB_FRAGS) {
  787. struct sk_buff *tskb;
  788. tskb = alloc_skb(0, sk->sk_allocation);
  789. if (!tskb)
  790. goto wait_for_memory;
  791. if (head == skb)
  792. skb_shinfo(head)->frag_list = tskb;
  793. else
  794. skb->next = tskb;
  795. skb = tskb;
  796. skb->ip_summed = CHECKSUM_UNNECESSARY;
  797. continue;
  798. }
  799. merge = false;
  800. }
  801. copy = min_t(int, msg_data_left(msg),
  802. pfrag->size - pfrag->offset);
  803. if (!sk_wmem_schedule(sk, copy))
  804. goto wait_for_memory;
  805. err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb,
  806. pfrag->page,
  807. pfrag->offset,
  808. copy);
  809. if (err)
  810. goto out_error;
  811. /* Update the skb. */
  812. if (merge) {
  813. skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
  814. } else {
  815. skb_fill_page_desc(skb, i, pfrag->page,
  816. pfrag->offset, copy);
  817. get_page(pfrag->page);
  818. }
  819. pfrag->offset += copy;
  820. copied += copy;
  821. if (head != skb) {
  822. head->len += copy;
  823. head->data_len += copy;
  824. }
  825. continue;
  826. wait_for_memory:
  827. kcm_push(kcm);
  828. err = sk_stream_wait_memory(sk, &timeo);
  829. if (err)
  830. goto out_error;
  831. }
  832. if (eor) {
  833. bool not_busy = skb_queue_empty(&sk->sk_write_queue);
  834. if (head) {
  835. /* Message complete, queue it on send buffer */
  836. __skb_queue_tail(&sk->sk_write_queue, head);
  837. kcm->seq_skb = NULL;
  838. KCM_STATS_INCR(kcm->stats.tx_msgs);
  839. }
  840. if (msg->msg_flags & MSG_BATCH) {
  841. kcm->tx_wait_more = true;
  842. } else if (kcm->tx_wait_more || not_busy) {
  843. err = kcm_write_msgs(kcm);
  844. if (err < 0) {
  845. /* We got a hard error in write_msgs but have
  846. * already queued this message. Report an error
  847. * in the socket, but don't affect return value
  848. * from sendmsg
  849. */
  850. pr_warn("KCM: Hard failure on kcm_write_msgs\n");
  851. report_csk_error(&kcm->sk, -err);
  852. }
  853. }
  854. } else {
  855. /* Message not complete, save state */
  856. partial_message:
  857. if (head) {
  858. kcm->seq_skb = head;
  859. kcm_tx_msg(head)->last_skb = skb;
  860. }
  861. }
  862. KCM_STATS_ADD(kcm->stats.tx_bytes, copied);
  863. release_sock(sk);
  864. return copied;
  865. out_error:
  866. kcm_push(kcm);
  867. if (sock->type == SOCK_SEQPACKET) {
  868. /* Wrote some bytes before encountering an
  869. * error, return partial success.
  870. */
  871. if (copied)
  872. goto partial_message;
  873. if (head != kcm->seq_skb)
  874. kfree_skb(head);
  875. } else {
  876. kfree_skb(head);
  877. kcm->seq_skb = NULL;
  878. }
  879. err = sk_stream_error(sk, msg->msg_flags, err);
  880. /* make sure we wake any epoll edge trigger waiter */
  881. if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN))
  882. sk->sk_write_space(sk);
  883. release_sock(sk);
  884. return err;
  885. }
  886. static int kcm_recvmsg(struct socket *sock, struct msghdr *msg,
  887. size_t len, int flags)
  888. {
  889. struct sock *sk = sock->sk;
  890. struct kcm_sock *kcm = kcm_sk(sk);
  891. int err = 0;
  892. struct strp_msg *stm;
  893. int copied = 0;
  894. struct sk_buff *skb;
  895. skb = skb_recv_datagram(sk, flags, &err);
  896. if (!skb)
  897. goto out;
  898. /* Okay, have a message on the receive queue */
  899. stm = strp_msg(skb);
  900. if (len > stm->full_len)
  901. len = stm->full_len;
  902. err = skb_copy_datagram_msg(skb, stm->offset, msg, len);
  903. if (err < 0)
  904. goto out;
  905. copied = len;
  906. if (likely(!(flags & MSG_PEEK))) {
  907. KCM_STATS_ADD(kcm->stats.rx_bytes, copied);
  908. if (copied < stm->full_len) {
  909. if (sock->type == SOCK_DGRAM) {
  910. /* Truncated message */
  911. msg->msg_flags |= MSG_TRUNC;
  912. goto msg_finished;
  913. }
  914. stm->offset += copied;
  915. stm->full_len -= copied;
  916. } else {
  917. msg_finished:
  918. /* Finished with message */
  919. msg->msg_flags |= MSG_EOR;
  920. KCM_STATS_INCR(kcm->stats.rx_msgs);
  921. }
  922. }
  923. out:
  924. skb_free_datagram(sk, skb);
  925. return copied ? : err;
  926. }
  927. static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
  928. struct pipe_inode_info *pipe, size_t len,
  929. unsigned int flags)
  930. {
  931. struct sock *sk = sock->sk;
  932. struct kcm_sock *kcm = kcm_sk(sk);
  933. struct strp_msg *stm;
  934. int err = 0;
  935. ssize_t copied;
  936. struct sk_buff *skb;
  937. /* Only support splice for SOCKSEQPACKET */
  938. skb = skb_recv_datagram(sk, flags, &err);
  939. if (!skb)
  940. goto err_out;
  941. /* Okay, have a message on the receive queue */
  942. stm = strp_msg(skb);
  943. if (len > stm->full_len)
  944. len = stm->full_len;
  945. copied = skb_splice_bits(skb, sk, stm->offset, pipe, len, flags);
  946. if (copied < 0) {
  947. err = copied;
  948. goto err_out;
  949. }
  950. KCM_STATS_ADD(kcm->stats.rx_bytes, copied);
  951. stm->offset += copied;
  952. stm->full_len -= copied;
  953. /* We have no way to return MSG_EOR. If all the bytes have been
  954. * read we still leave the message in the receive socket buffer.
  955. * A subsequent recvmsg needs to be done to return MSG_EOR and
  956. * finish reading the message.
  957. */
  958. skb_free_datagram(sk, skb);
  959. return copied;
  960. err_out:
  961. skb_free_datagram(sk, skb);
  962. return err;
  963. }
  964. /* kcm sock lock held */
  965. static void kcm_recv_disable(struct kcm_sock *kcm)
  966. {
  967. struct kcm_mux *mux = kcm->mux;
  968. if (kcm->rx_disabled)
  969. return;
  970. spin_lock_bh(&mux->rx_lock);
  971. kcm->rx_disabled = 1;
  972. /* If a psock is reserved we'll do cleanup in unreserve */
  973. if (!kcm->rx_psock) {
  974. if (kcm->rx_wait) {
  975. list_del(&kcm->wait_rx_list);
  976. /* paired with lockless reads in kcm_rfree() */
  977. WRITE_ONCE(kcm->rx_wait, false);
  978. }
  979. requeue_rx_msgs(mux, &kcm->sk.sk_receive_queue);
  980. }
  981. spin_unlock_bh(&mux->rx_lock);
  982. }
  983. /* kcm sock lock held */
  984. static void kcm_recv_enable(struct kcm_sock *kcm)
  985. {
  986. struct kcm_mux *mux = kcm->mux;
  987. if (!kcm->rx_disabled)
  988. return;
  989. spin_lock_bh(&mux->rx_lock);
  990. kcm->rx_disabled = 0;
  991. kcm_rcv_ready(kcm);
  992. spin_unlock_bh(&mux->rx_lock);
  993. }
  994. static int kcm_setsockopt(struct socket *sock, int level, int optname,
  995. sockptr_t optval, unsigned int optlen)
  996. {
  997. struct kcm_sock *kcm = kcm_sk(sock->sk);
  998. int val, valbool;
  999. int err = 0;
  1000. if (level != SOL_KCM)
  1001. return -ENOPROTOOPT;
  1002. if (optlen < sizeof(int))
  1003. return -EINVAL;
  1004. if (copy_from_sockptr(&val, optval, sizeof(int)))
  1005. return -EFAULT;
  1006. valbool = val ? 1 : 0;
  1007. switch (optname) {
  1008. case KCM_RECV_DISABLE:
  1009. lock_sock(&kcm->sk);
  1010. if (valbool)
  1011. kcm_recv_disable(kcm);
  1012. else
  1013. kcm_recv_enable(kcm);
  1014. release_sock(&kcm->sk);
  1015. break;
  1016. default:
  1017. err = -ENOPROTOOPT;
  1018. }
  1019. return err;
  1020. }
  1021. static int kcm_getsockopt(struct socket *sock, int level, int optname,
  1022. char __user *optval, int __user *optlen)
  1023. {
  1024. struct kcm_sock *kcm = kcm_sk(sock->sk);
  1025. int val, len;
  1026. if (level != SOL_KCM)
  1027. return -ENOPROTOOPT;
  1028. if (get_user(len, optlen))
  1029. return -EFAULT;
  1030. len = min_t(unsigned int, len, sizeof(int));
  1031. if (len < 0)
  1032. return -EINVAL;
  1033. switch (optname) {
  1034. case KCM_RECV_DISABLE:
  1035. val = kcm->rx_disabled;
  1036. break;
  1037. default:
  1038. return -ENOPROTOOPT;
  1039. }
  1040. if (put_user(len, optlen))
  1041. return -EFAULT;
  1042. if (copy_to_user(optval, &val, len))
  1043. return -EFAULT;
  1044. return 0;
  1045. }
  1046. static void init_kcm_sock(struct kcm_sock *kcm, struct kcm_mux *mux)
  1047. {
  1048. struct kcm_sock *tkcm;
  1049. struct list_head *head;
  1050. int index = 0;
  1051. /* For SOCK_SEQPACKET sock type, datagram_poll checks the sk_state, so
  1052. * we set sk_state, otherwise epoll_wait always returns right away with
  1053. * EPOLLHUP
  1054. */
  1055. kcm->sk.sk_state = TCP_ESTABLISHED;
  1056. /* Add to mux's kcm sockets list */
  1057. kcm->mux = mux;
  1058. spin_lock_bh(&mux->lock);
  1059. head = &mux->kcm_socks;
  1060. list_for_each_entry(tkcm, &mux->kcm_socks, kcm_sock_list) {
  1061. if (tkcm->index != index)
  1062. break;
  1063. head = &tkcm->kcm_sock_list;
  1064. index++;
  1065. }
  1066. list_add(&kcm->kcm_sock_list, head);
  1067. kcm->index = index;
  1068. mux->kcm_socks_cnt++;
  1069. spin_unlock_bh(&mux->lock);
  1070. INIT_WORK(&kcm->tx_work, kcm_tx_work);
  1071. spin_lock_bh(&mux->rx_lock);
  1072. kcm_rcv_ready(kcm);
  1073. spin_unlock_bh(&mux->rx_lock);
  1074. }
  1075. static int kcm_attach(struct socket *sock, struct socket *csock,
  1076. struct bpf_prog *prog)
  1077. {
  1078. struct kcm_sock *kcm = kcm_sk(sock->sk);
  1079. struct kcm_mux *mux = kcm->mux;
  1080. struct sock *csk;
  1081. struct kcm_psock *psock = NULL, *tpsock;
  1082. struct list_head *head;
  1083. int index = 0;
  1084. static const struct strp_callbacks cb = {
  1085. .rcv_msg = kcm_rcv_strparser,
  1086. .parse_msg = kcm_parse_func_strparser,
  1087. .read_sock_done = kcm_read_sock_done,
  1088. };
  1089. int err = 0;
  1090. csk = csock->sk;
  1091. if (!csk)
  1092. return -EINVAL;
  1093. lock_sock(csk);
  1094. /* Only allow TCP sockets to be attached for now */
  1095. if ((csk->sk_family != AF_INET && csk->sk_family != AF_INET6) ||
  1096. csk->sk_protocol != IPPROTO_TCP) {
  1097. err = -EOPNOTSUPP;
  1098. goto out;
  1099. }
  1100. /* Don't allow listeners or closed sockets */
  1101. if (csk->sk_state == TCP_LISTEN || csk->sk_state == TCP_CLOSE) {
  1102. err = -EOPNOTSUPP;
  1103. goto out;
  1104. }
  1105. psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL);
  1106. if (!psock) {
  1107. err = -ENOMEM;
  1108. goto out;
  1109. }
  1110. psock->mux = mux;
  1111. psock->sk = csk;
  1112. psock->bpf_prog = prog;
  1113. write_lock_bh(&csk->sk_callback_lock);
  1114. /* Check if sk_user_data is already by KCM or someone else.
  1115. * Must be done under lock to prevent race conditions.
  1116. */
  1117. if (csk->sk_user_data) {
  1118. write_unlock_bh(&csk->sk_callback_lock);
  1119. kmem_cache_free(kcm_psockp, psock);
  1120. err = -EALREADY;
  1121. goto out;
  1122. }
  1123. err = strp_init(&psock->strp, csk, &cb);
  1124. if (err) {
  1125. write_unlock_bh(&csk->sk_callback_lock);
  1126. kmem_cache_free(kcm_psockp, psock);
  1127. goto out;
  1128. }
  1129. psock->save_data_ready = csk->sk_data_ready;
  1130. psock->save_write_space = csk->sk_write_space;
  1131. psock->save_state_change = csk->sk_state_change;
  1132. csk->sk_user_data = psock;
  1133. csk->sk_data_ready = psock_data_ready;
  1134. csk->sk_write_space = psock_write_space;
  1135. csk->sk_state_change = psock_state_change;
  1136. write_unlock_bh(&csk->sk_callback_lock);
  1137. sock_hold(csk);
  1138. /* Finished initialization, now add the psock to the MUX. */
  1139. spin_lock_bh(&mux->lock);
  1140. head = &mux->psocks;
  1141. list_for_each_entry(tpsock, &mux->psocks, psock_list) {
  1142. if (tpsock->index != index)
  1143. break;
  1144. head = &tpsock->psock_list;
  1145. index++;
  1146. }
  1147. list_add(&psock->psock_list, head);
  1148. psock->index = index;
  1149. KCM_STATS_INCR(mux->stats.psock_attach);
  1150. mux->psocks_cnt++;
  1151. psock_now_avail(psock);
  1152. spin_unlock_bh(&mux->lock);
  1153. /* Schedule RX work in case there are already bytes queued */
  1154. strp_check_rcv(&psock->strp);
  1155. out:
  1156. release_sock(csk);
  1157. return err;
  1158. }
  1159. static int kcm_attach_ioctl(struct socket *sock, struct kcm_attach *info)
  1160. {
  1161. struct socket *csock;
  1162. struct bpf_prog *prog;
  1163. int err;
  1164. csock = sockfd_lookup(info->fd, &err);
  1165. if (!csock)
  1166. return -ENOENT;
  1167. prog = bpf_prog_get_type(info->bpf_fd, BPF_PROG_TYPE_SOCKET_FILTER);
  1168. if (IS_ERR(prog)) {
  1169. err = PTR_ERR(prog);
  1170. goto out;
  1171. }
  1172. err = kcm_attach(sock, csock, prog);
  1173. if (err) {
  1174. bpf_prog_put(prog);
  1175. goto out;
  1176. }
  1177. /* Keep reference on file also */
  1178. return 0;
  1179. out:
  1180. sockfd_put(csock);
  1181. return err;
  1182. }
  1183. static void kcm_unattach(struct kcm_psock *psock)
  1184. {
  1185. struct sock *csk = psock->sk;
  1186. struct kcm_mux *mux = psock->mux;
  1187. lock_sock(csk);
  1188. /* Stop getting callbacks from TCP socket. After this there should
  1189. * be no way to reserve a kcm for this psock.
  1190. */
  1191. write_lock_bh(&csk->sk_callback_lock);
  1192. csk->sk_user_data = NULL;
  1193. csk->sk_data_ready = psock->save_data_ready;
  1194. csk->sk_write_space = psock->save_write_space;
  1195. csk->sk_state_change = psock->save_state_change;
  1196. strp_stop(&psock->strp);
  1197. if (WARN_ON(psock->rx_kcm)) {
  1198. write_unlock_bh(&csk->sk_callback_lock);
  1199. release_sock(csk);
  1200. return;
  1201. }
  1202. spin_lock_bh(&mux->rx_lock);
  1203. /* Stop receiver activities. After this point psock should not be
  1204. * able to get onto ready list either through callbacks or work.
  1205. */
  1206. if (psock->ready_rx_msg) {
  1207. list_del(&psock->psock_ready_list);
  1208. kfree_skb(psock->ready_rx_msg);
  1209. psock->ready_rx_msg = NULL;
  1210. KCM_STATS_INCR(mux->stats.rx_ready_drops);
  1211. }
  1212. spin_unlock_bh(&mux->rx_lock);
  1213. write_unlock_bh(&csk->sk_callback_lock);
  1214. /* Call strp_done without sock lock */
  1215. release_sock(csk);
  1216. strp_done(&psock->strp);
  1217. lock_sock(csk);
  1218. bpf_prog_put(psock->bpf_prog);
  1219. spin_lock_bh(&mux->lock);
  1220. aggregate_psock_stats(&psock->stats, &mux->aggregate_psock_stats);
  1221. save_strp_stats(&psock->strp, &mux->aggregate_strp_stats);
  1222. KCM_STATS_INCR(mux->stats.psock_unattach);
  1223. if (psock->tx_kcm) {
  1224. /* psock was reserved. Just mark it finished and we will clean
  1225. * up in the kcm paths, we need kcm lock which can not be
  1226. * acquired here.
  1227. */
  1228. KCM_STATS_INCR(mux->stats.psock_unattach_rsvd);
  1229. spin_unlock_bh(&mux->lock);
  1230. /* We are unattaching a socket that is reserved. Abort the
  1231. * socket since we may be out of sync in sending on it. We need
  1232. * to do this without the mux lock.
  1233. */
  1234. kcm_abort_tx_psock(psock, EPIPE, false);
  1235. spin_lock_bh(&mux->lock);
  1236. if (!psock->tx_kcm) {
  1237. /* psock now unreserved in window mux was unlocked */
  1238. goto no_reserved;
  1239. }
  1240. psock->done = 1;
  1241. /* Commit done before queuing work to process it */
  1242. smp_mb();
  1243. /* Queue tx work to make sure psock->done is handled */
  1244. queue_work(kcm_wq, &psock->tx_kcm->tx_work);
  1245. spin_unlock_bh(&mux->lock);
  1246. } else {
  1247. no_reserved:
  1248. if (!psock->tx_stopped)
  1249. list_del(&psock->psock_avail_list);
  1250. list_del(&psock->psock_list);
  1251. mux->psocks_cnt--;
  1252. spin_unlock_bh(&mux->lock);
  1253. sock_put(csk);
  1254. fput(csk->sk_socket->file);
  1255. kmem_cache_free(kcm_psockp, psock);
  1256. }
  1257. release_sock(csk);
  1258. }
  1259. static int kcm_unattach_ioctl(struct socket *sock, struct kcm_unattach *info)
  1260. {
  1261. struct kcm_sock *kcm = kcm_sk(sock->sk);
  1262. struct kcm_mux *mux = kcm->mux;
  1263. struct kcm_psock *psock;
  1264. struct socket *csock;
  1265. struct sock *csk;
  1266. int err;
  1267. csock = sockfd_lookup(info->fd, &err);
  1268. if (!csock)
  1269. return -ENOENT;
  1270. csk = csock->sk;
  1271. if (!csk) {
  1272. err = -EINVAL;
  1273. goto out;
  1274. }
  1275. err = -ENOENT;
  1276. spin_lock_bh(&mux->lock);
  1277. list_for_each_entry(psock, &mux->psocks, psock_list) {
  1278. if (psock->sk != csk)
  1279. continue;
  1280. /* Found the matching psock */
  1281. if (psock->unattaching || WARN_ON(psock->done)) {
  1282. err = -EALREADY;
  1283. break;
  1284. }
  1285. psock->unattaching = 1;
  1286. spin_unlock_bh(&mux->lock);
  1287. /* Lower socket lock should already be held */
  1288. kcm_unattach(psock);
  1289. err = 0;
  1290. goto out;
  1291. }
  1292. spin_unlock_bh(&mux->lock);
  1293. out:
  1294. sockfd_put(csock);
  1295. return err;
  1296. }
  1297. static struct proto kcm_proto = {
  1298. .name = "KCM",
  1299. .owner = THIS_MODULE,
  1300. .obj_size = sizeof(struct kcm_sock),
  1301. };
  1302. /* Clone a kcm socket. */
  1303. static struct file *kcm_clone(struct socket *osock)
  1304. {
  1305. struct socket *newsock;
  1306. struct sock *newsk;
  1307. newsock = sock_alloc();
  1308. if (!newsock)
  1309. return ERR_PTR(-ENFILE);
  1310. newsock->type = osock->type;
  1311. newsock->ops = osock->ops;
  1312. __module_get(newsock->ops->owner);
  1313. newsk = sk_alloc(sock_net(osock->sk), PF_KCM, GFP_KERNEL,
  1314. &kcm_proto, false);
  1315. if (!newsk) {
  1316. sock_release(newsock);
  1317. return ERR_PTR(-ENOMEM);
  1318. }
  1319. sock_init_data(newsock, newsk);
  1320. init_kcm_sock(kcm_sk(newsk), kcm_sk(osock->sk)->mux);
  1321. return sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name);
  1322. }
  1323. static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
  1324. {
  1325. int err;
  1326. switch (cmd) {
  1327. case SIOCKCMATTACH: {
  1328. struct kcm_attach info;
  1329. if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
  1330. return -EFAULT;
  1331. err = kcm_attach_ioctl(sock, &info);
  1332. break;
  1333. }
  1334. case SIOCKCMUNATTACH: {
  1335. struct kcm_unattach info;
  1336. if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
  1337. return -EFAULT;
  1338. err = kcm_unattach_ioctl(sock, &info);
  1339. break;
  1340. }
  1341. case SIOCKCMCLONE: {
  1342. struct kcm_clone info;
  1343. struct file *file;
  1344. info.fd = get_unused_fd_flags(0);
  1345. if (unlikely(info.fd < 0))
  1346. return info.fd;
  1347. file = kcm_clone(sock);
  1348. if (IS_ERR(file)) {
  1349. put_unused_fd(info.fd);
  1350. return PTR_ERR(file);
  1351. }
  1352. if (copy_to_user((void __user *)arg, &info,
  1353. sizeof(info))) {
  1354. put_unused_fd(info.fd);
  1355. fput(file);
  1356. return -EFAULT;
  1357. }
  1358. fd_install(info.fd, file);
  1359. err = 0;
  1360. break;
  1361. }
  1362. default:
  1363. err = -ENOIOCTLCMD;
  1364. break;
  1365. }
  1366. return err;
  1367. }
  1368. static void free_mux(struct rcu_head *rcu)
  1369. {
  1370. struct kcm_mux *mux = container_of(rcu,
  1371. struct kcm_mux, rcu);
  1372. kmem_cache_free(kcm_muxp, mux);
  1373. }
  1374. static void release_mux(struct kcm_mux *mux)
  1375. {
  1376. struct kcm_net *knet = mux->knet;
  1377. struct kcm_psock *psock, *tmp_psock;
  1378. /* Release psocks */
  1379. list_for_each_entry_safe(psock, tmp_psock,
  1380. &mux->psocks, psock_list) {
  1381. if (!WARN_ON(psock->unattaching))
  1382. kcm_unattach(psock);
  1383. }
  1384. if (WARN_ON(mux->psocks_cnt))
  1385. return;
  1386. __skb_queue_purge(&mux->rx_hold_queue);
  1387. mutex_lock(&knet->mutex);
  1388. aggregate_mux_stats(&mux->stats, &knet->aggregate_mux_stats);
  1389. aggregate_psock_stats(&mux->aggregate_psock_stats,
  1390. &knet->aggregate_psock_stats);
  1391. aggregate_strp_stats(&mux->aggregate_strp_stats,
  1392. &knet->aggregate_strp_stats);
  1393. list_del_rcu(&mux->kcm_mux_list);
  1394. knet->count--;
  1395. mutex_unlock(&knet->mutex);
  1396. call_rcu(&mux->rcu, free_mux);
  1397. }
  1398. static void kcm_done(struct kcm_sock *kcm)
  1399. {
  1400. struct kcm_mux *mux = kcm->mux;
  1401. struct sock *sk = &kcm->sk;
  1402. int socks_cnt;
  1403. spin_lock_bh(&mux->rx_lock);
  1404. if (kcm->rx_psock) {
  1405. /* Cleanup in unreserve_rx_kcm */
  1406. WARN_ON(kcm->done);
  1407. kcm->rx_disabled = 1;
  1408. kcm->done = 1;
  1409. spin_unlock_bh(&mux->rx_lock);
  1410. return;
  1411. }
  1412. if (kcm->rx_wait) {
  1413. list_del(&kcm->wait_rx_list);
  1414. /* paired with lockless reads in kcm_rfree() */
  1415. WRITE_ONCE(kcm->rx_wait, false);
  1416. }
  1417. /* Move any pending receive messages to other kcm sockets */
  1418. requeue_rx_msgs(mux, &sk->sk_receive_queue);
  1419. spin_unlock_bh(&mux->rx_lock);
  1420. if (WARN_ON(sk_rmem_alloc_get(sk)))
  1421. return;
  1422. /* Detach from MUX */
  1423. spin_lock_bh(&mux->lock);
  1424. list_del(&kcm->kcm_sock_list);
  1425. mux->kcm_socks_cnt--;
  1426. socks_cnt = mux->kcm_socks_cnt;
  1427. spin_unlock_bh(&mux->lock);
  1428. if (!socks_cnt) {
  1429. /* We are done with the mux now. */
  1430. release_mux(mux);
  1431. }
  1432. WARN_ON(kcm->rx_wait);
  1433. sock_put(&kcm->sk);
  1434. }
  1435. /* Called by kcm_release to close a KCM socket.
  1436. * If this is the last KCM socket on the MUX, destroy the MUX.
  1437. */
  1438. static int kcm_release(struct socket *sock)
  1439. {
  1440. struct sock *sk = sock->sk;
  1441. struct kcm_sock *kcm;
  1442. struct kcm_mux *mux;
  1443. struct kcm_psock *psock;
  1444. if (!sk)
  1445. return 0;
  1446. kcm = kcm_sk(sk);
  1447. mux = kcm->mux;
  1448. lock_sock(sk);
  1449. sock_orphan(sk);
  1450. kfree_skb(kcm->seq_skb);
  1451. /* Purge queue under lock to avoid race condition with tx_work trying
  1452. * to act when queue is nonempty. If tx_work runs after this point
  1453. * it will just return.
  1454. */
  1455. __skb_queue_purge(&sk->sk_write_queue);
  1456. /* Set tx_stopped. This is checked when psock is bound to a kcm and we
  1457. * get a writespace callback. This prevents further work being queued
  1458. * from the callback (unbinding the psock occurs after canceling work.
  1459. */
  1460. kcm->tx_stopped = 1;
  1461. release_sock(sk);
  1462. spin_lock_bh(&mux->lock);
  1463. if (kcm->tx_wait) {
  1464. /* Take of tx_wait list, after this point there should be no way
  1465. * that a psock will be assigned to this kcm.
  1466. */
  1467. list_del(&kcm->wait_psock_list);
  1468. kcm->tx_wait = false;
  1469. }
  1470. spin_unlock_bh(&mux->lock);
  1471. /* Cancel work. After this point there should be no outside references
  1472. * to the kcm socket.
  1473. */
  1474. cancel_work_sync(&kcm->tx_work);
  1475. lock_sock(sk);
  1476. psock = kcm->tx_psock;
  1477. if (psock) {
  1478. /* A psock was reserved, so we need to kill it since it
  1479. * may already have some bytes queued from a message. We
  1480. * need to do this after removing kcm from tx_wait list.
  1481. */
  1482. kcm_abort_tx_psock(psock, EPIPE, false);
  1483. unreserve_psock(kcm);
  1484. }
  1485. release_sock(sk);
  1486. WARN_ON(kcm->tx_wait);
  1487. WARN_ON(kcm->tx_psock);
  1488. sock->sk = NULL;
  1489. kcm_done(kcm);
  1490. return 0;
  1491. }
  1492. static const struct proto_ops kcm_dgram_ops = {
  1493. .family = PF_KCM,
  1494. .owner = THIS_MODULE,
  1495. .release = kcm_release,
  1496. .bind = sock_no_bind,
  1497. .connect = sock_no_connect,
  1498. .socketpair = sock_no_socketpair,
  1499. .accept = sock_no_accept,
  1500. .getname = sock_no_getname,
  1501. .poll = datagram_poll,
  1502. .ioctl = kcm_ioctl,
  1503. .listen = sock_no_listen,
  1504. .shutdown = sock_no_shutdown,
  1505. .setsockopt = kcm_setsockopt,
  1506. .getsockopt = kcm_getsockopt,
  1507. .sendmsg = kcm_sendmsg,
  1508. .recvmsg = kcm_recvmsg,
  1509. .mmap = sock_no_mmap,
  1510. .sendpage = kcm_sendpage,
  1511. };
  1512. static const struct proto_ops kcm_seqpacket_ops = {
  1513. .family = PF_KCM,
  1514. .owner = THIS_MODULE,
  1515. .release = kcm_release,
  1516. .bind = sock_no_bind,
  1517. .connect = sock_no_connect,
  1518. .socketpair = sock_no_socketpair,
  1519. .accept = sock_no_accept,
  1520. .getname = sock_no_getname,
  1521. .poll = datagram_poll,
  1522. .ioctl = kcm_ioctl,
  1523. .listen = sock_no_listen,
  1524. .shutdown = sock_no_shutdown,
  1525. .setsockopt = kcm_setsockopt,
  1526. .getsockopt = kcm_getsockopt,
  1527. .sendmsg = kcm_sendmsg,
  1528. .recvmsg = kcm_recvmsg,
  1529. .mmap = sock_no_mmap,
  1530. .sendpage = kcm_sendpage,
  1531. .splice_read = kcm_splice_read,
  1532. };
  1533. /* Create proto operation for kcm sockets */
  1534. static int kcm_create(struct net *net, struct socket *sock,
  1535. int protocol, int kern)
  1536. {
  1537. struct kcm_net *knet = net_generic(net, kcm_net_id);
  1538. struct sock *sk;
  1539. struct kcm_mux *mux;
  1540. switch (sock->type) {
  1541. case SOCK_DGRAM:
  1542. sock->ops = &kcm_dgram_ops;
  1543. break;
  1544. case SOCK_SEQPACKET:
  1545. sock->ops = &kcm_seqpacket_ops;
  1546. break;
  1547. default:
  1548. return -ESOCKTNOSUPPORT;
  1549. }
  1550. if (protocol != KCMPROTO_CONNECTED)
  1551. return -EPROTONOSUPPORT;
  1552. sk = sk_alloc(net, PF_KCM, GFP_KERNEL, &kcm_proto, kern);
  1553. if (!sk)
  1554. return -ENOMEM;
  1555. /* Allocate a kcm mux, shared between KCM sockets */
  1556. mux = kmem_cache_zalloc(kcm_muxp, GFP_KERNEL);
  1557. if (!mux) {
  1558. sk_free(sk);
  1559. return -ENOMEM;
  1560. }
  1561. spin_lock_init(&mux->lock);
  1562. spin_lock_init(&mux->rx_lock);
  1563. INIT_LIST_HEAD(&mux->kcm_socks);
  1564. INIT_LIST_HEAD(&mux->kcm_rx_waiters);
  1565. INIT_LIST_HEAD(&mux->kcm_tx_waiters);
  1566. INIT_LIST_HEAD(&mux->psocks);
  1567. INIT_LIST_HEAD(&mux->psocks_ready);
  1568. INIT_LIST_HEAD(&mux->psocks_avail);
  1569. mux->knet = knet;
  1570. /* Add new MUX to list */
  1571. mutex_lock(&knet->mutex);
  1572. list_add_rcu(&mux->kcm_mux_list, &knet->mux_list);
  1573. knet->count++;
  1574. mutex_unlock(&knet->mutex);
  1575. skb_queue_head_init(&mux->rx_hold_queue);
  1576. /* Init KCM socket */
  1577. sock_init_data(sock, sk);
  1578. init_kcm_sock(kcm_sk(sk), mux);
  1579. return 0;
  1580. }
  1581. static const struct net_proto_family kcm_family_ops = {
  1582. .family = PF_KCM,
  1583. .create = kcm_create,
  1584. .owner = THIS_MODULE,
  1585. };
  1586. static __net_init int kcm_init_net(struct net *net)
  1587. {
  1588. struct kcm_net *knet = net_generic(net, kcm_net_id);
  1589. INIT_LIST_HEAD_RCU(&knet->mux_list);
  1590. mutex_init(&knet->mutex);
  1591. return 0;
  1592. }
  1593. static __net_exit void kcm_exit_net(struct net *net)
  1594. {
  1595. struct kcm_net *knet = net_generic(net, kcm_net_id);
  1596. /* All KCM sockets should be closed at this point, which should mean
  1597. * that all multiplexors and psocks have been destroyed.
  1598. */
  1599. WARN_ON(!list_empty(&knet->mux_list));
  1600. mutex_destroy(&knet->mutex);
  1601. }
  1602. static struct pernet_operations kcm_net_ops = {
  1603. .init = kcm_init_net,
  1604. .exit = kcm_exit_net,
  1605. .id = &kcm_net_id,
  1606. .size = sizeof(struct kcm_net),
  1607. };
  1608. static int __init kcm_init(void)
  1609. {
  1610. int err = -ENOMEM;
  1611. kcm_muxp = kmem_cache_create("kcm_mux_cache",
  1612. sizeof(struct kcm_mux), 0,
  1613. SLAB_HWCACHE_ALIGN, NULL);
  1614. if (!kcm_muxp)
  1615. goto fail;
  1616. kcm_psockp = kmem_cache_create("kcm_psock_cache",
  1617. sizeof(struct kcm_psock), 0,
  1618. SLAB_HWCACHE_ALIGN, NULL);
  1619. if (!kcm_psockp)
  1620. goto fail;
  1621. kcm_wq = create_singlethread_workqueue("kkcmd");
  1622. if (!kcm_wq)
  1623. goto fail;
  1624. err = proto_register(&kcm_proto, 1);
  1625. if (err)
  1626. goto fail;
  1627. err = register_pernet_device(&kcm_net_ops);
  1628. if (err)
  1629. goto net_ops_fail;
  1630. err = sock_register(&kcm_family_ops);
  1631. if (err)
  1632. goto sock_register_fail;
  1633. err = kcm_proc_init();
  1634. if (err)
  1635. goto proc_init_fail;
  1636. return 0;
  1637. proc_init_fail:
  1638. sock_unregister(PF_KCM);
  1639. sock_register_fail:
  1640. unregister_pernet_device(&kcm_net_ops);
  1641. net_ops_fail:
  1642. proto_unregister(&kcm_proto);
  1643. fail:
  1644. kmem_cache_destroy(kcm_muxp);
  1645. kmem_cache_destroy(kcm_psockp);
  1646. if (kcm_wq)
  1647. destroy_workqueue(kcm_wq);
  1648. return err;
  1649. }
  1650. static void __exit kcm_exit(void)
  1651. {
  1652. kcm_proc_exit();
  1653. sock_unregister(PF_KCM);
  1654. unregister_pernet_device(&kcm_net_ops);
  1655. proto_unregister(&kcm_proto);
  1656. destroy_workqueue(kcm_wq);
  1657. kmem_cache_destroy(kcm_muxp);
  1658. kmem_cache_destroy(kcm_psockp);
  1659. }
  1660. module_init(kcm_init);
  1661. module_exit(kcm_exit);
  1662. MODULE_LICENSE("GPL");
  1663. MODULE_ALIAS_NETPROTO(PF_KCM);