nsproxy.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (C) 2006 IBM Corporation
  4. *
  5. * Author: Serge Hallyn <[email protected]>
  6. *
  7. * Jun 2006 - namespaces support
  8. * OpenVZ, SWsoft Inc.
  9. * Pavel Emelianov <[email protected]>
  10. */
  11. #include <linux/slab.h>
  12. #include <linux/export.h>
  13. #include <linux/nsproxy.h>
  14. #include <linux/init_task.h>
  15. #include <linux/mnt_namespace.h>
  16. #include <linux/utsname.h>
  17. #include <linux/pid_namespace.h>
  18. #include <net/net_namespace.h>
  19. #include <linux/ipc_namespace.h>
  20. #include <linux/time_namespace.h>
  21. #include <linux/fs_struct.h>
  22. #include <linux/proc_fs.h>
  23. #include <linux/proc_ns.h>
  24. #include <linux/file.h>
  25. #include <linux/syscalls.h>
  26. #include <linux/cgroup.h>
  27. #include <linux/perf_event.h>
  28. static struct kmem_cache *nsproxy_cachep;
  29. struct nsproxy init_nsproxy = {
  30. .count = ATOMIC_INIT(1),
  31. .uts_ns = &init_uts_ns,
  32. #if defined(CONFIG_POSIX_MQUEUE) || defined(CONFIG_SYSVIPC)
  33. .ipc_ns = &init_ipc_ns,
  34. #endif
  35. .mnt_ns = NULL,
  36. .pid_ns_for_children = &init_pid_ns,
  37. #ifdef CONFIG_NET
  38. .net_ns = &init_net,
  39. #endif
  40. #ifdef CONFIG_CGROUPS
  41. .cgroup_ns = &init_cgroup_ns,
  42. #endif
  43. #ifdef CONFIG_TIME_NS
  44. .time_ns = &init_time_ns,
  45. .time_ns_for_children = &init_time_ns,
  46. #endif
  47. };
  48. static inline struct nsproxy *create_nsproxy(void)
  49. {
  50. struct nsproxy *nsproxy;
  51. nsproxy = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL);
  52. if (nsproxy)
  53. atomic_set(&nsproxy->count, 1);
  54. return nsproxy;
  55. }
  56. /*
  57. * Create new nsproxy and all of its the associated namespaces.
  58. * Return the newly created nsproxy. Do not attach this to the task,
  59. * leave it to the caller to do proper locking and attach it to task.
  60. */
  61. static struct nsproxy *create_new_namespaces(unsigned long flags,
  62. struct task_struct *tsk, struct user_namespace *user_ns,
  63. struct fs_struct *new_fs)
  64. {
  65. struct nsproxy *new_nsp;
  66. int err;
  67. new_nsp = create_nsproxy();
  68. if (!new_nsp)
  69. return ERR_PTR(-ENOMEM);
  70. new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, user_ns, new_fs);
  71. if (IS_ERR(new_nsp->mnt_ns)) {
  72. err = PTR_ERR(new_nsp->mnt_ns);
  73. goto out_ns;
  74. }
  75. new_nsp->uts_ns = copy_utsname(flags, user_ns, tsk->nsproxy->uts_ns);
  76. if (IS_ERR(new_nsp->uts_ns)) {
  77. err = PTR_ERR(new_nsp->uts_ns);
  78. goto out_uts;
  79. }
  80. new_nsp->ipc_ns = copy_ipcs(flags, user_ns, tsk->nsproxy->ipc_ns);
  81. if (IS_ERR(new_nsp->ipc_ns)) {
  82. err = PTR_ERR(new_nsp->ipc_ns);
  83. goto out_ipc;
  84. }
  85. new_nsp->pid_ns_for_children =
  86. copy_pid_ns(flags, user_ns, tsk->nsproxy->pid_ns_for_children);
  87. if (IS_ERR(new_nsp->pid_ns_for_children)) {
  88. err = PTR_ERR(new_nsp->pid_ns_for_children);
  89. goto out_pid;
  90. }
  91. new_nsp->cgroup_ns = copy_cgroup_ns(flags, user_ns,
  92. tsk->nsproxy->cgroup_ns);
  93. if (IS_ERR(new_nsp->cgroup_ns)) {
  94. err = PTR_ERR(new_nsp->cgroup_ns);
  95. goto out_cgroup;
  96. }
  97. new_nsp->net_ns = copy_net_ns(flags, user_ns, tsk->nsproxy->net_ns);
  98. if (IS_ERR(new_nsp->net_ns)) {
  99. err = PTR_ERR(new_nsp->net_ns);
  100. goto out_net;
  101. }
  102. new_nsp->time_ns_for_children = copy_time_ns(flags, user_ns,
  103. tsk->nsproxy->time_ns_for_children);
  104. if (IS_ERR(new_nsp->time_ns_for_children)) {
  105. err = PTR_ERR(new_nsp->time_ns_for_children);
  106. goto out_time;
  107. }
  108. new_nsp->time_ns = get_time_ns(tsk->nsproxy->time_ns);
  109. return new_nsp;
  110. out_time:
  111. put_net(new_nsp->net_ns);
  112. out_net:
  113. put_cgroup_ns(new_nsp->cgroup_ns);
  114. out_cgroup:
  115. if (new_nsp->pid_ns_for_children)
  116. put_pid_ns(new_nsp->pid_ns_for_children);
  117. out_pid:
  118. if (new_nsp->ipc_ns)
  119. put_ipc_ns(new_nsp->ipc_ns);
  120. out_ipc:
  121. if (new_nsp->uts_ns)
  122. put_uts_ns(new_nsp->uts_ns);
  123. out_uts:
  124. if (new_nsp->mnt_ns)
  125. put_mnt_ns(new_nsp->mnt_ns);
  126. out_ns:
  127. kmem_cache_free(nsproxy_cachep, new_nsp);
  128. return ERR_PTR(err);
  129. }
  130. /*
  131. * called from clone. This now handles copy for nsproxy and all
  132. * namespaces therein.
  133. */
  134. int copy_namespaces(unsigned long flags, struct task_struct *tsk)
  135. {
  136. struct nsproxy *old_ns = tsk->nsproxy;
  137. struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns);
  138. struct nsproxy *new_ns;
  139. if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
  140. CLONE_NEWPID | CLONE_NEWNET |
  141. CLONE_NEWCGROUP | CLONE_NEWTIME)))) {
  142. if (likely(old_ns->time_ns_for_children == old_ns->time_ns)) {
  143. get_nsproxy(old_ns);
  144. return 0;
  145. }
  146. } else if (!ns_capable(user_ns, CAP_SYS_ADMIN))
  147. return -EPERM;
  148. /*
  149. * CLONE_NEWIPC must detach from the undolist: after switching
  150. * to a new ipc namespace, the semaphore arrays from the old
  151. * namespace are unreachable. In clone parlance, CLONE_SYSVSEM
  152. * means share undolist with parent, so we must forbid using
  153. * it along with CLONE_NEWIPC.
  154. */
  155. if ((flags & (CLONE_NEWIPC | CLONE_SYSVSEM)) ==
  156. (CLONE_NEWIPC | CLONE_SYSVSEM))
  157. return -EINVAL;
  158. new_ns = create_new_namespaces(flags, tsk, user_ns, tsk->fs);
  159. if (IS_ERR(new_ns))
  160. return PTR_ERR(new_ns);
  161. timens_on_fork(new_ns, tsk);
  162. tsk->nsproxy = new_ns;
  163. return 0;
  164. }
  165. void free_nsproxy(struct nsproxy *ns)
  166. {
  167. if (ns->mnt_ns)
  168. put_mnt_ns(ns->mnt_ns);
  169. if (ns->uts_ns)
  170. put_uts_ns(ns->uts_ns);
  171. if (ns->ipc_ns)
  172. put_ipc_ns(ns->ipc_ns);
  173. if (ns->pid_ns_for_children)
  174. put_pid_ns(ns->pid_ns_for_children);
  175. if (ns->time_ns)
  176. put_time_ns(ns->time_ns);
  177. if (ns->time_ns_for_children)
  178. put_time_ns(ns->time_ns_for_children);
  179. put_cgroup_ns(ns->cgroup_ns);
  180. put_net(ns->net_ns);
  181. kmem_cache_free(nsproxy_cachep, ns);
  182. }
  183. /*
  184. * Called from unshare. Unshare all the namespaces part of nsproxy.
  185. * On success, returns the new nsproxy.
  186. */
  187. int unshare_nsproxy_namespaces(unsigned long unshare_flags,
  188. struct nsproxy **new_nsp, struct cred *new_cred, struct fs_struct *new_fs)
  189. {
  190. struct user_namespace *user_ns;
  191. int err = 0;
  192. if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
  193. CLONE_NEWNET | CLONE_NEWPID | CLONE_NEWCGROUP |
  194. CLONE_NEWTIME)))
  195. return 0;
  196. user_ns = new_cred ? new_cred->user_ns : current_user_ns();
  197. if (!ns_capable(user_ns, CAP_SYS_ADMIN))
  198. return -EPERM;
  199. *new_nsp = create_new_namespaces(unshare_flags, current, user_ns,
  200. new_fs ? new_fs : current->fs);
  201. if (IS_ERR(*new_nsp)) {
  202. err = PTR_ERR(*new_nsp);
  203. goto out;
  204. }
  205. out:
  206. return err;
  207. }
  208. void switch_task_namespaces(struct task_struct *p, struct nsproxy *new)
  209. {
  210. struct nsproxy *ns;
  211. might_sleep();
  212. task_lock(p);
  213. ns = p->nsproxy;
  214. p->nsproxy = new;
  215. task_unlock(p);
  216. if (ns)
  217. put_nsproxy(ns);
  218. }
  219. void exit_task_namespaces(struct task_struct *p)
  220. {
  221. switch_task_namespaces(p, NULL);
  222. }
  223. static int check_setns_flags(unsigned long flags)
  224. {
  225. if (!flags || (flags & ~(CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
  226. CLONE_NEWNET | CLONE_NEWTIME | CLONE_NEWUSER |
  227. CLONE_NEWPID | CLONE_NEWCGROUP)))
  228. return -EINVAL;
  229. #ifndef CONFIG_USER_NS
  230. if (flags & CLONE_NEWUSER)
  231. return -EINVAL;
  232. #endif
  233. #ifndef CONFIG_PID_NS
  234. if (flags & CLONE_NEWPID)
  235. return -EINVAL;
  236. #endif
  237. #ifndef CONFIG_UTS_NS
  238. if (flags & CLONE_NEWUTS)
  239. return -EINVAL;
  240. #endif
  241. #ifndef CONFIG_IPC_NS
  242. if (flags & CLONE_NEWIPC)
  243. return -EINVAL;
  244. #endif
  245. #ifndef CONFIG_CGROUPS
  246. if (flags & CLONE_NEWCGROUP)
  247. return -EINVAL;
  248. #endif
  249. #ifndef CONFIG_NET_NS
  250. if (flags & CLONE_NEWNET)
  251. return -EINVAL;
  252. #endif
  253. #ifndef CONFIG_TIME_NS
  254. if (flags & CLONE_NEWTIME)
  255. return -EINVAL;
  256. #endif
  257. return 0;
  258. }
  259. static void put_nsset(struct nsset *nsset)
  260. {
  261. unsigned flags = nsset->flags;
  262. if (flags & CLONE_NEWUSER)
  263. put_cred(nsset_cred(nsset));
  264. /*
  265. * We only created a temporary copy if we attached to more than just
  266. * the mount namespace.
  267. */
  268. if (nsset->fs && (flags & CLONE_NEWNS) && (flags & ~CLONE_NEWNS))
  269. free_fs_struct(nsset->fs);
  270. if (nsset->nsproxy)
  271. free_nsproxy(nsset->nsproxy);
  272. }
  273. static int prepare_nsset(unsigned flags, struct nsset *nsset)
  274. {
  275. struct task_struct *me = current;
  276. nsset->nsproxy = create_new_namespaces(0, me, current_user_ns(), me->fs);
  277. if (IS_ERR(nsset->nsproxy))
  278. return PTR_ERR(nsset->nsproxy);
  279. if (flags & CLONE_NEWUSER)
  280. nsset->cred = prepare_creds();
  281. else
  282. nsset->cred = current_cred();
  283. if (!nsset->cred)
  284. goto out;
  285. /* Only create a temporary copy of fs_struct if we really need to. */
  286. if (flags == CLONE_NEWNS) {
  287. nsset->fs = me->fs;
  288. } else if (flags & CLONE_NEWNS) {
  289. nsset->fs = copy_fs_struct(me->fs);
  290. if (!nsset->fs)
  291. goto out;
  292. }
  293. nsset->flags = flags;
  294. return 0;
  295. out:
  296. put_nsset(nsset);
  297. return -ENOMEM;
  298. }
  299. static inline int validate_ns(struct nsset *nsset, struct ns_common *ns)
  300. {
  301. return ns->ops->install(nsset, ns);
  302. }
  303. /*
  304. * This is the inverse operation to unshare().
  305. * Ordering is equivalent to the standard ordering used everywhere else
  306. * during unshare and process creation. The switch to the new set of
  307. * namespaces occurs at the point of no return after installation of
  308. * all requested namespaces was successful in commit_nsset().
  309. */
  310. static int validate_nsset(struct nsset *nsset, struct pid *pid)
  311. {
  312. int ret = 0;
  313. unsigned flags = nsset->flags;
  314. struct user_namespace *user_ns = NULL;
  315. struct pid_namespace *pid_ns = NULL;
  316. struct nsproxy *nsp;
  317. struct task_struct *tsk;
  318. /* Take a "snapshot" of the target task's namespaces. */
  319. rcu_read_lock();
  320. tsk = pid_task(pid, PIDTYPE_PID);
  321. if (!tsk) {
  322. rcu_read_unlock();
  323. return -ESRCH;
  324. }
  325. if (!ptrace_may_access(tsk, PTRACE_MODE_READ_REALCREDS)) {
  326. rcu_read_unlock();
  327. return -EPERM;
  328. }
  329. task_lock(tsk);
  330. nsp = tsk->nsproxy;
  331. if (nsp)
  332. get_nsproxy(nsp);
  333. task_unlock(tsk);
  334. if (!nsp) {
  335. rcu_read_unlock();
  336. return -ESRCH;
  337. }
  338. #ifdef CONFIG_PID_NS
  339. if (flags & CLONE_NEWPID) {
  340. pid_ns = task_active_pid_ns(tsk);
  341. if (unlikely(!pid_ns)) {
  342. rcu_read_unlock();
  343. ret = -ESRCH;
  344. goto out;
  345. }
  346. get_pid_ns(pid_ns);
  347. }
  348. #endif
  349. #ifdef CONFIG_USER_NS
  350. if (flags & CLONE_NEWUSER)
  351. user_ns = get_user_ns(__task_cred(tsk)->user_ns);
  352. #endif
  353. rcu_read_unlock();
  354. /*
  355. * Install requested namespaces. The caller will have
  356. * verified earlier that the requested namespaces are
  357. * supported on this kernel. We don't report errors here
  358. * if a namespace is requested that isn't supported.
  359. */
  360. #ifdef CONFIG_USER_NS
  361. if (flags & CLONE_NEWUSER) {
  362. ret = validate_ns(nsset, &user_ns->ns);
  363. if (ret)
  364. goto out;
  365. }
  366. #endif
  367. if (flags & CLONE_NEWNS) {
  368. ret = validate_ns(nsset, from_mnt_ns(nsp->mnt_ns));
  369. if (ret)
  370. goto out;
  371. }
  372. #ifdef CONFIG_UTS_NS
  373. if (flags & CLONE_NEWUTS) {
  374. ret = validate_ns(nsset, &nsp->uts_ns->ns);
  375. if (ret)
  376. goto out;
  377. }
  378. #endif
  379. #ifdef CONFIG_IPC_NS
  380. if (flags & CLONE_NEWIPC) {
  381. ret = validate_ns(nsset, &nsp->ipc_ns->ns);
  382. if (ret)
  383. goto out;
  384. }
  385. #endif
  386. #ifdef CONFIG_PID_NS
  387. if (flags & CLONE_NEWPID) {
  388. ret = validate_ns(nsset, &pid_ns->ns);
  389. if (ret)
  390. goto out;
  391. }
  392. #endif
  393. #ifdef CONFIG_CGROUPS
  394. if (flags & CLONE_NEWCGROUP) {
  395. ret = validate_ns(nsset, &nsp->cgroup_ns->ns);
  396. if (ret)
  397. goto out;
  398. }
  399. #endif
  400. #ifdef CONFIG_NET_NS
  401. if (flags & CLONE_NEWNET) {
  402. ret = validate_ns(nsset, &nsp->net_ns->ns);
  403. if (ret)
  404. goto out;
  405. }
  406. #endif
  407. #ifdef CONFIG_TIME_NS
  408. if (flags & CLONE_NEWTIME) {
  409. ret = validate_ns(nsset, &nsp->time_ns->ns);
  410. if (ret)
  411. goto out;
  412. }
  413. #endif
  414. out:
  415. if (pid_ns)
  416. put_pid_ns(pid_ns);
  417. if (nsp)
  418. put_nsproxy(nsp);
  419. put_user_ns(user_ns);
  420. return ret;
  421. }
  422. /*
  423. * This is the point of no return. There are just a few namespaces
  424. * that do some actual work here and it's sufficiently minimal that
  425. * a separate ns_common operation seems unnecessary for now.
  426. * Unshare is doing the same thing. If we'll end up needing to do
  427. * more in a given namespace or a helper here is ultimately not
  428. * exported anymore a simple commit handler for each namespace
  429. * should be added to ns_common.
  430. */
  431. static void commit_nsset(struct nsset *nsset)
  432. {
  433. unsigned flags = nsset->flags;
  434. struct task_struct *me = current;
  435. #ifdef CONFIG_USER_NS
  436. if (flags & CLONE_NEWUSER) {
  437. /* transfer ownership */
  438. commit_creds(nsset_cred(nsset));
  439. nsset->cred = NULL;
  440. }
  441. #endif
  442. /* We only need to commit if we have used a temporary fs_struct. */
  443. if ((flags & CLONE_NEWNS) && (flags & ~CLONE_NEWNS)) {
  444. set_fs_root(me->fs, &nsset->fs->root);
  445. set_fs_pwd(me->fs, &nsset->fs->pwd);
  446. }
  447. #ifdef CONFIG_IPC_NS
  448. if (flags & CLONE_NEWIPC)
  449. exit_sem(me);
  450. #endif
  451. #ifdef CONFIG_TIME_NS
  452. if (flags & CLONE_NEWTIME)
  453. timens_commit(me, nsset->nsproxy->time_ns);
  454. #endif
  455. /* transfer ownership */
  456. switch_task_namespaces(me, nsset->nsproxy);
  457. nsset->nsproxy = NULL;
  458. }
  459. SYSCALL_DEFINE2(setns, int, fd, int, flags)
  460. {
  461. struct file *file;
  462. struct ns_common *ns = NULL;
  463. struct nsset nsset = {};
  464. int err = 0;
  465. file = fget(fd);
  466. if (!file)
  467. return -EBADF;
  468. if (proc_ns_file(file)) {
  469. ns = get_proc_ns(file_inode(file));
  470. if (flags && (ns->ops->type != flags))
  471. err = -EINVAL;
  472. flags = ns->ops->type;
  473. } else if (!IS_ERR(pidfd_pid(file))) {
  474. err = check_setns_flags(flags);
  475. } else {
  476. err = -EINVAL;
  477. }
  478. if (err)
  479. goto out;
  480. err = prepare_nsset(flags, &nsset);
  481. if (err)
  482. goto out;
  483. if (proc_ns_file(file))
  484. err = validate_ns(&nsset, ns);
  485. else
  486. err = validate_nsset(&nsset, file->private_data);
  487. if (!err) {
  488. commit_nsset(&nsset);
  489. perf_event_namespaces(current);
  490. }
  491. put_nsset(&nsset);
  492. out:
  493. fput(file);
  494. return err;
  495. }
  496. int __init nsproxy_cache_init(void)
  497. {
  498. nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC|SLAB_ACCOUNT);
  499. return 0;
  500. }