Files
android_kernel_xiaomi_sm8450/net/unix/af_unix.c
Greg Kroah-Hartman 87a7f35a24 Merge 5.10.220 into android12-5.10-lts
Changes in 5.10.220
	SUNRPC: Rename svc_encode_read_payload()
	NFSD: Invoke svc_encode_result_payload() in "read" NFSD encoders
	NFSD: A semicolon is not needed after a switch statement.
	nfsd/nfs3: remove unused macro nfsd3_fhandleres
	NFSD: Clean up the show_nf_may macro
	NFSD: Remove extra "0x" in tracepoint format specifier
	NFSD: Add SPDX header for fs/nfsd/trace.c
	nfsd: Fix error return code in nfsd_file_cache_init()
	SUNRPC: Add xdr_set_scratch_page() and xdr_reset_scratch_buffer()
	SUNRPC: Prepare for xdr_stream-style decoding on the server-side
	NFSD: Add common helpers to decode void args and encode void results
	NFSD: Add tracepoints in nfsd_dispatch()
	NFSD: Add tracepoints in nfsd4_decode/encode_compound()
	NFSD: Replace the internals of the READ_BUF() macro
	NFSD: Replace READ* macros in nfsd4_decode_access()
	NFSD: Replace READ* macros in nfsd4_decode_close()
	NFSD: Replace READ* macros in nfsd4_decode_commit()
	NFSD: Change the way the expected length of a fattr4 is checked
	NFSD: Replace READ* macros that decode the fattr4 size attribute
	NFSD: Replace READ* macros that decode the fattr4 acl attribute
	NFSD: Replace READ* macros that decode the fattr4 mode attribute
	NFSD: Replace READ* macros that decode the fattr4 owner attribute
	NFSD: Replace READ* macros that decode the fattr4 owner_group attribute
	NFSD: Replace READ* macros that decode the fattr4 time_set attributes
	NFSD: Replace READ* macros that decode the fattr4 security label attribute
	NFSD: Replace READ* macros that decode the fattr4 umask attribute
	NFSD: Replace READ* macros in nfsd4_decode_fattr()
	NFSD: Replace READ* macros in nfsd4_decode_create()
	NFSD: Replace READ* macros in nfsd4_decode_delegreturn()
	NFSD: Replace READ* macros in nfsd4_decode_getattr()
	NFSD: Replace READ* macros in nfsd4_decode_link()
	NFSD: Relocate nfsd4_decode_opaque()
	NFSD: Add helpers to decode a clientid4 and an NFSv4 state owner
	NFSD: Add helper for decoding locker4
	NFSD: Replace READ* macros in nfsd4_decode_lock()
	NFSD: Replace READ* macros in nfsd4_decode_lockt()
	NFSD: Replace READ* macros in nfsd4_decode_locku()
	NFSD: Replace READ* macros in nfsd4_decode_lookup()
	NFSD: Add helper to decode NFSv4 verifiers
	NFSD: Add helper to decode OPEN's createhow4 argument
	NFSD: Add helper to decode OPEN's openflag4 argument
	NFSD: Replace READ* macros in nfsd4_decode_share_access()
	NFSD: Replace READ* macros in nfsd4_decode_share_deny()
	NFSD: Add helper to decode OPEN's open_claim4 argument
	NFSD: Replace READ* macros in nfsd4_decode_open()
	NFSD: Replace READ* macros in nfsd4_decode_open_confirm()
	NFSD: Replace READ* macros in nfsd4_decode_open_downgrade()
	NFSD: Replace READ* macros in nfsd4_decode_putfh()
	NFSD: Replace READ* macros in nfsd4_decode_read()
	NFSD: Replace READ* macros in nfsd4_decode_readdir()
	NFSD: Replace READ* macros in nfsd4_decode_remove()
	NFSD: Replace READ* macros in nfsd4_decode_rename()
	NFSD: Replace READ* macros in nfsd4_decode_renew()
	NFSD: Replace READ* macros in nfsd4_decode_secinfo()
	NFSD: Replace READ* macros in nfsd4_decode_setattr()
	NFSD: Replace READ* macros in nfsd4_decode_setclientid()
	NFSD: Replace READ* macros in nfsd4_decode_setclientid_confirm()
	NFSD: Replace READ* macros in nfsd4_decode_verify()
	NFSD: Replace READ* macros in nfsd4_decode_write()
	NFSD: Replace READ* macros in nfsd4_decode_release_lockowner()
	NFSD: Replace READ* macros in nfsd4_decode_cb_sec()
	NFSD: Replace READ* macros in nfsd4_decode_backchannel_ctl()
	NFSD: Replace READ* macros in nfsd4_decode_bind_conn_to_session()
	NFSD: Add a separate decoder to handle state_protect_ops
	NFSD: Add a separate decoder for ssv_sp_parms
	NFSD: Add a helper to decode state_protect4_a
	NFSD: Add a helper to decode nfs_impl_id4
	NFSD: Add a helper to decode channel_attrs4
	NFSD: Replace READ* macros in nfsd4_decode_create_session()
	NFSD: Replace READ* macros in nfsd4_decode_destroy_session()
	NFSD: Replace READ* macros in nfsd4_decode_free_stateid()
	NFSD: Replace READ* macros in nfsd4_decode_getdeviceinfo()
	NFSD: Replace READ* macros in nfsd4_decode_layoutcommit()
	NFSD: Replace READ* macros in nfsd4_decode_layoutget()
	NFSD: Replace READ* macros in nfsd4_decode_layoutreturn()
	NFSD: Replace READ* macros in nfsd4_decode_secinfo_no_name()
	NFSD: Replace READ* macros in nfsd4_decode_sequence()
	NFSD: Replace READ* macros in nfsd4_decode_test_stateid()
	NFSD: Replace READ* macros in nfsd4_decode_destroy_clientid()
	NFSD: Replace READ* macros in nfsd4_decode_reclaim_complete()
	NFSD: Replace READ* macros in nfsd4_decode_fallocate()
	NFSD: Replace READ* macros in nfsd4_decode_nl4_server()
	NFSD: Replace READ* macros in nfsd4_decode_copy()
	NFSD: Replace READ* macros in nfsd4_decode_copy_notify()
	NFSD: Replace READ* macros in nfsd4_decode_offload_status()
	NFSD: Replace READ* macros in nfsd4_decode_seek()
	NFSD: Replace READ* macros in nfsd4_decode_clone()
	NFSD: Replace READ* macros in nfsd4_decode_xattr_name()
	NFSD: Replace READ* macros in nfsd4_decode_setxattr()
	NFSD: Replace READ* macros in nfsd4_decode_listxattrs()
	NFSD: Make nfsd4_ops::opnum a u32
	NFSD: Replace READ* macros in nfsd4_decode_compound()
	NFSD: Remove macros that are no longer used
	nfsd: only call inode_query_iversion in the I_VERSION case
	nfsd: simplify nfsd4_change_info
	nfsd: minor nfsd4_change_attribute cleanup
	nfsd4: don't query change attribute in v2/v3 case
	Revert "nfsd4: support change_attr_type attribute"
	nfsd: add a new EXPORT_OP_NOWCC flag to struct export_operations
	nfsd: allow filesystems to opt out of subtree checking
	nfsd: close cached files prior to a REMOVE or RENAME that would replace target
	exportfs: Add a function to return the raw output from fh_to_dentry()
	nfsd: Fix up nfsd to ensure that timeout errors don't result in ESTALE
	nfsd: Set PF_LOCAL_THROTTLE on local filesystems only
	nfsd: Record NFSv4 pre/post-op attributes as non-atomic
	exec: Don't open code get_close_on_exec
	exec: Move unshare_files to fix posix file locking during exec
	exec: Simplify unshare_files
	exec: Remove reset_files_struct
	kcmp: In kcmp_epoll_target use fget_task
	bpf: In bpf_task_fd_query use fget_task
	proc/fd: In proc_fd_link use fget_task
	Revert "fget: clarify and improve __fget_files() implementation"
	file: Rename __fcheck_files to files_lookup_fd_raw
	file: Factor files_lookup_fd_locked out of fcheck_files
	file: Replace fcheck_files with files_lookup_fd_rcu
	file: Rename fcheck lookup_fd_rcu
	file: Implement task_lookup_fd_rcu
	proc/fd: In tid_fd_mode use task_lookup_fd_rcu
	kcmp: In get_file_raw_ptr use task_lookup_fd_rcu
	file: Implement task_lookup_next_fd_rcu
	proc/fd: In proc_readfd_common use task_lookup_next_fd_rcu
	proc/fd: In fdinfo seq_show don't use get_files_struct
	file: Merge __fd_install into fd_install
	file: In f_dupfd read RLIMIT_NOFILE once.
	file: Merge __alloc_fd into alloc_fd
	file: Rename __close_fd to close_fd and remove the files parameter
	file: Replace ksys_close with close_fd
	inotify: Increase default inotify.max_user_watches limit to 1048576
	fs/lockd: convert comma to semicolon
	NFSD: Fix sparse warning in nfssvc.c
	NFSD: Restore NFSv4 decoding's SAVEMEM functionality
	SUNRPC: Make trace_svc_process() display the RPC procedure symbolically
	SUNRPC: Display RPC procedure names instead of proc numbers
	SUNRPC: Move definition of XDR_UNIT
	NFSD: Update GETATTR3args decoder to use struct xdr_stream
	NFSD: Update ACCESS3arg decoder to use struct xdr_stream
	NFSD: Update READ3arg decoder to use struct xdr_stream
	NFSD: Update WRITE3arg decoder to use struct xdr_stream
	NFSD: Update READLINK3arg decoder to use struct xdr_stream
	NFSD: Fix returned READDIR offset cookie
	NFSD: Add helper to set up the pages where the dirlist is encoded
	NFSD: Update READDIR3args decoders to use struct xdr_stream
	NFSD: Update COMMIT3arg decoder to use struct xdr_stream
	NFSD: Update the NFSv3 DIROPargs decoder to use struct xdr_stream
	NFSD: Update the RENAME3args decoder to use struct xdr_stream
	NFSD: Update the LINK3args decoder to use struct xdr_stream
	NFSD: Update the SETATTR3args decoder to use struct xdr_stream
	NFSD: Update the CREATE3args decoder to use struct xdr_stream
	NFSD: Update the MKDIR3args decoder to use struct xdr_stream
	NFSD: Update the SYMLINK3args decoder to use struct xdr_stream
	NFSD: Update the MKNOD3args decoder to use struct xdr_stream
	NFSD: Update the NFSv2 GETATTR argument decoder to use struct xdr_stream
	NFSD: Update the NFSv2 READ argument decoder to use struct xdr_stream
	NFSD: Update the NFSv2 WRITE argument decoder to use struct xdr_stream
	NFSD: Update the NFSv2 READLINK argument decoder to use struct xdr_stream
	NFSD: Add helper to set up the pages where the dirlist is encoded
	NFSD: Update the NFSv2 READDIR argument decoder to use struct xdr_stream
	NFSD: Update NFSv2 diropargs decoding to use struct xdr_stream
	NFSD: Update the NFSv2 RENAME argument decoder to use struct xdr_stream
	NFSD: Update the NFSv2 LINK argument decoder to use struct xdr_stream
	NFSD: Update the NFSv2 SETATTR argument decoder to use struct xdr_stream
	NFSD: Update the NFSv2 CREATE argument decoder to use struct xdr_stream
	NFSD: Update the NFSv2 SYMLINK argument decoder to use struct xdr_stream
	NFSD: Remove argument length checking in nfsd_dispatch()
	NFSD: Update the NFSv2 GETACL argument decoder to use struct xdr_stream
	NFSD: Add an xdr_stream-based decoder for NFSv2/3 ACLs
	NFSD: Update the NFSv2 SETACL argument decoder to use struct xdr_stream
	NFSD: Update the NFSv2 ACL GETATTR argument decoder to use struct xdr_stream
	NFSD: Update the NFSv2 ACL ACCESS argument decoder to use struct xdr_stream
	NFSD: Clean up after updating NFSv2 ACL decoders
	NFSD: Update the NFSv3 GETACL argument decoder to use struct xdr_stream
	NFSD: Update the NFSv2 SETACL argument decoder to use struct xdr_stream
	NFSD: Clean up after updating NFSv3 ACL decoders
	nfsd: remove unused stats counters
	nfsd: protect concurrent access to nfsd stats counters
	nfsd: report per-export stats
	nfsd4: simplify process_lookup1
	nfsd: simplify process_lock
	nfsd: simplify nfsd_renew
	nfsd: rename lookup_clientid->set_client
	nfsd: refactor set_client
	nfsd: find_cpntf_state cleanup
	nfsd: remove unused set_client argument
	nfsd: simplify nfsd4_check_open_reclaim
	nfsd: cstate->session->se_client -> cstate->clp
	NFSv4_2: SSC helper should use its own config.
	nfs: use change attribute for NFS re-exports
	nfsd: skip some unnecessary stats in the v4 case
	inotify, memcg: account inotify instances to kmemcg
	module: unexport find_module and module_mutex
	module: use RCU to synchronize find_module
	kallsyms: refactor {,module_}kallsyms_on_each_symbol
	kallsyms: only build {,module_}kallsyms_on_each_symbol when required
	fs: add file and path permissions helpers
	namei: introduce struct renamedata
	NFSD: Extract the svcxdr_init_encode() helper
	NFSD: Update the GETATTR3res encoder to use struct xdr_stream
	NFSD: Update the NFSv3 ACCESS3res encoder to use struct xdr_stream
	NFSD: Update the NFSv3 LOOKUP3res encoder to use struct xdr_stream
	NFSD: Update the NFSv3 wccstat result encoder to use struct xdr_stream
	NFSD: Update the NFSv3 READLINK3res encoder to use struct xdr_stream
	NFSD: Update the NFSv3 READ3res encode to use struct xdr_stream
	NFSD: Update the NFSv3 WRITE3res encoder to use struct xdr_stream
	NFSD: Update the NFSv3 CREATE family of encoders to use struct xdr_stream
	NFSD: Update the NFSv3 RENAMEv3res encoder to use struct xdr_stream
	NFSD: Update the NFSv3 LINK3res encoder to use struct xdr_stream
	NFSD: Update the NFSv3 FSSTAT3res encoder to use struct xdr_stream
	NFSD: Update the NFSv3 FSINFO3res encoder to use struct xdr_stream
	NFSD: Update the NFSv3 PATHCONF3res encoder to use struct xdr_stream
	NFSD: Update the NFSv3 COMMIT3res encoder to use struct xdr_stream
	NFSD: Add a helper that encodes NFSv3 directory offset cookies
	NFSD: Count bytes instead of pages in the NFSv3 READDIR encoder
	NFSD: Update the NFSv3 READDIR3res encoder to use struct xdr_stream
	NFSD: Update NFSv3 READDIR entry encoders to use struct xdr_stream
	NFSD: Remove unused NFSv3 directory entry encoders
	NFSD: Reduce svc_rqst::rq_pages churn during READDIR operations
	NFSD: Update the NFSv2 stat encoder to use struct xdr_stream
	NFSD: Update the NFSv2 attrstat encoder to use struct xdr_stream
	NFSD: Update the NFSv2 diropres encoder to use struct xdr_stream
	NFSD: Update the NFSv2 READLINK result encoder to use struct xdr_stream
	NFSD: Update the NFSv2 READ result encoder to use struct xdr_stream
	NFSD: Update the NFSv2 STATFS result encoder to use struct xdr_stream
	NFSD: Add a helper that encodes NFSv3 directory offset cookies
	NFSD: Count bytes instead of pages in the NFSv2 READDIR encoder
	NFSD: Update the NFSv2 READDIR result encoder to use struct xdr_stream
	NFSD: Update the NFSv2 READDIR entry encoder to use struct xdr_stream
	NFSD: Remove unused NFSv2 directory entry encoders
	NFSD: Add an xdr_stream-based encoder for NFSv2/3 ACLs
	NFSD: Update the NFSv2 GETACL result encoder to use struct xdr_stream
	NFSD: Update the NFSv2 SETACL result encoder to use struct xdr_stream
	NFSD: Update the NFSv2 ACL GETATTR result encoder to use struct xdr_stream
	NFSD: Update the NFSv2 ACL ACCESS result encoder to use struct xdr_stream
	NFSD: Clean up after updating NFSv2 ACL encoders
	NFSD: Update the NFSv3 GETACL result encoder to use struct xdr_stream
	NFSD: Update the NFSv3 SETACL result encoder to use struct xdr_stream
	NFSD: Clean up after updating NFSv3 ACL encoders
	NFSD: Add a tracepoint to record directory entry encoding
	NFSD: Clean up NFSDDBG_FACILITY macro
	nfsd: helper for laundromat expiry calculations
	nfsd: Log client tracking type log message as info instead of warning
	nfsd: Fix typo "accesible"
	nfsd: COPY with length 0 should copy to end of file
	nfsd: don't ignore high bits of copy count
	nfsd: report client confirmation status in "info" file
	SUNRPC: Export svc_xprt_received()
	UAPI: nfsfh.h: Replace one-element array with flexible-array member
	NFSD: Use DEFINE_SPINLOCK() for spinlock
	fsnotify: allow fsnotify_{peek,remove}_first_event with empty queue
	Revert "fanotify: limit number of event merge attempts"
	fanotify: reduce event objectid to 29-bit hash
	fanotify: mix event info and pid into merge key hash
	fsnotify: use hash table for faster events merge
	fanotify: limit number of event merge attempts
	fanotify: configurable limits via sysfs
	fanotify: support limited functionality for unprivileged users
	fanotify_user: use upper_32_bits() to verify mask
	nfsd: remove unused function
	nfsd: removed unused argument in nfsd_startup_generic()
	nfsd: hash nfs4_files by inode number
	nfsd: track filehandle aliasing in nfs4_files
	nfsd: reshuffle some code
	nfsd: grant read delegations to clients holding writes
	nfsd: Fix fall-through warnings for Clang
	NFSv4.2: Remove ifdef CONFIG_NFSD from NFSv4.2 client SSC code.
	NFS: fix nfs_fetch_iversion()
	fanotify: fix permission model of unprivileged group
	NFSD: Add an RPC authflavor tracepoint display helper
	NFSD: Add nfsd_clid_cred_mismatch tracepoint
	NFSD: Add nfsd_clid_verf_mismatch tracepoint
	NFSD: Remove trace_nfsd_clid_inuse_err
	NFSD: Add nfsd_clid_confirmed tracepoint
	NFSD: Add nfsd_clid_reclaim_complete tracepoint
	NFSD: Add nfsd_clid_destroyed tracepoint
	NFSD: Add a couple more nfsd_clid_expired call sites
	NFSD: Add tracepoints for SETCLIENTID edge cases
	NFSD: Add tracepoints for EXCHANGEID edge cases
	NFSD: Constify @fh argument of knfsd_fh_hash()
	NFSD: Capture every CB state transition
	NFSD: Drop TRACE_DEFINE_ENUM for NFSD4_CB_<state> macros
	NFSD: Add cb_lost tracepoint
	NFSD: Adjust cb_shutdown tracepoint
	NFSD: Enhance the nfsd_cb_setup tracepoint
	NFSD: Add an nfsd_cb_lm_notify tracepoint
	NFSD: Add an nfsd_cb_offload tracepoint
	NFSD: Replace the nfsd_deleg_break tracepoint
	NFSD: Add an nfsd_cb_probe tracepoint
	NFSD: Remove the nfsd_cb_work and nfsd_cb_done tracepoints
	NFSD: Update nfsd_cb_args tracepoint
	nfsd: Prevent truncation of an unlinked inode from blocking access to its directory
	nfsd: move some commit_metadata()s outside the inode lock
	NFSD add vfs_fsync after async copy is done
	NFSD: delay unmount source's export after inter-server copy completed.
	nfsd: move fsnotify on client creation outside spinlock
	nfsd4: Expose the callback address and state of each NFS4 client
	nfsd: fix kernel test robot warning in SSC code
	NFSD: Fix error return code in nfsd4_interssc_connect()
	nfsd: rpc_peeraddr2str needs rcu lock
	lockd: Remove stale comments
	lockd: Create a simplified .vs_dispatch method for NLM requests
	lockd: Common NLM XDR helpers
	lockd: Update the NLMv1 void argument decoder to use struct xdr_stream
	lockd: Update the NLMv1 TEST arguments decoder to use struct xdr_stream
	lockd: Update the NLMv1 LOCK arguments decoder to use struct xdr_stream
	lockd: Update the NLMv1 CANCEL arguments decoder to use struct xdr_stream
	lockd: Update the NLMv1 UNLOCK arguments decoder to use struct xdr_stream
	lockd: Update the NLMv1 nlm_res arguments decoder to use struct xdr_stream
	lockd: Update the NLMv1 SM_NOTIFY arguments decoder to use struct xdr_stream
	lockd: Update the NLMv1 SHARE arguments decoder to use struct xdr_stream
	lockd: Update the NLMv1 FREE_ALL arguments decoder to use struct xdr_stream
	lockd: Update the NLMv1 void results encoder to use struct xdr_stream
	lockd: Update the NLMv1 TEST results encoder to use struct xdr_stream
	lockd: Update the NLMv1 nlm_res results encoder to use struct xdr_stream
	lockd: Update the NLMv1 SHARE results encoder to use struct xdr_stream
	lockd: Update the NLMv4 void arguments decoder to use struct xdr_stream
	lockd: Update the NLMv4 TEST arguments decoder to use struct xdr_stream
	lockd: Update the NLMv4 LOCK arguments decoder to use struct xdr_stream
	lockd: Update the NLMv4 CANCEL arguments decoder to use struct xdr_stream
	lockd: Update the NLMv4 UNLOCK arguments decoder to use struct xdr_stream
	lockd: Update the NLMv4 nlm_res arguments decoder to use struct xdr_stream
	lockd: Update the NLMv4 SM_NOTIFY arguments decoder to use struct xdr_stream
	lockd: Update the NLMv4 SHARE arguments decoder to use struct xdr_stream
	lockd: Update the NLMv4 FREE_ALL arguments decoder to use struct xdr_stream
	lockd: Update the NLMv4 void results encoder to use struct xdr_stream
	lockd: Update the NLMv4 TEST results encoder to use struct xdr_stream
	lockd: Update the NLMv4 nlm_res results encoder to use struct xdr_stream
	lockd: Update the NLMv4 SHARE results encoder to use struct xdr_stream
	nfsd: remove redundant assignment to pointer 'this'
	NFSD: Prevent a possible oops in the nfs_dirent() tracepoint
	nfsd: fix NULL dereference in nfs3svc_encode_getaclres
	kernel/pid.c: remove static qualifier from pidfd_create()
	kernel/pid.c: implement additional checks upon pidfd_create() parameters
	fanotify: minor cosmetic adjustments to fid labels
	fanotify: introduce a generic info record copying helper
	fanotify: add pidfd support to the fanotify API
	fsnotify: replace igrab() with ihold() on attach connector
	fsnotify: count s_fsnotify_inode_refs for attached connectors
	fsnotify: count all objects with attached connectors
	fsnotify: optimize the case of no marks of any type
	NFSD: Clean up splice actor
	SUNRPC: Add svc_rqst_replace_page() API
	NFSD: Batch release pages during splice read
	NFSD: remove vanity comments
	sysctl: introduce new proc handler proc_dobool
	lockd: change the proc_handler for nsm_use_hostnames
	nlm: minor nlm_lookup_file argument change
	nlm: minor refactoring
	lockd: update nlm_lookup_file reexport comment
	Keep read and write fds with each nlm_file
	nfs: don't atempt blocking locks on nfs reexports
	lockd: don't attempt blocking locks on nfs reexports
	nfs: don't allow reexport reclaims
	SUNRPC: Add svc_rqst::rq_auth_stat
	SUNRPC: Set rq_auth_stat in the pg_authenticate() callout
	SUNRPC: Eliminate the RQ_AUTHERR flag
	NFS: Add a private local dispatcher for NFSv4 callback operations
	NFS: Remove unused callback void decoder
	fsnotify: fix sb_connectors leak
	NLM: Fix svcxdr_encode_owner()
	nfsd: Fix a warning for nfsd_file_close_inode
	fsnotify: pass data_type to fsnotify_name()
	fsnotify: pass dentry instead of inode data
	fsnotify: clarify contract for create event hooks
	fsnotify: Don't insert unmergeable events in hashtable
	fanotify: Fold event size calculation to its own function
	fanotify: Split fsid check from other fid mode checks
	inotify: Don't force FS_IN_IGNORED
	fsnotify: Add helper to detect overflow_event
	fsnotify: Add wrapper around fsnotify_add_event
	fsnotify: Retrieve super block from the data field
	fsnotify: Protect fsnotify_handle_inode_event from no-inode events
	fsnotify: Pass group argument to free_event
	fanotify: Support null inode event in fanotify_dfid_inode
	fanotify: Allow file handle encoding for unhashed events
	fanotify: Encode empty file handle when no inode is provided
	fanotify: Require fid_mode for any non-fd event
	fsnotify: Support FS_ERROR event type
	fanotify: Reserve UAPI bits for FAN_FS_ERROR
	fanotify: Pre-allocate pool of error events
	fanotify: Support enqueueing of error events
	fanotify: Support merging of error events
	fanotify: Wrap object_fh inline space in a creator macro
	fanotify: Add helpers to decide whether to report FID/DFID
	fanotify: WARN_ON against too large file handles
	fanotify: Report fid info for file related file system errors
	fanotify: Emit generic error info for error event
	fanotify: Allow users to request FAN_FS_ERROR events
	SUNRPC: Trace calls to .rpc_call_done
	NFSD: Optimize DRC bucket pruning
	NFSD: move filehandle format declarations out of "uapi".
	NFSD: drop support for ancient filehandles
	NFSD: simplify struct nfsfh
	NFSD: Initialize pointer ni with NULL and not plain integer 0
	NFSD: Have legacy NFSD WRITE decoders use xdr_stream_subsegment()
	SUNRPC: Replace the "__be32 *p" parameter to .pc_decode
	SUNRPC: Change return value type of .pc_decode
	NFSD: Save location of NFSv4 COMPOUND status
	SUNRPC: Replace the "__be32 *p" parameter to .pc_encode
	SUNRPC: Change return value type of .pc_encode
	nfsd: update create verifier comment
	NFSD:fix boolreturn.cocci warning
	nfsd4: remove obselete comment
	NFSD: Fix exposure in nfsd4_decode_bitmap()
	NFSD: Fix READDIR buffer overflow
	fsnotify: clarify object type argument
	fsnotify: separate mark iterator type from object type enum
	fanotify: introduce group flag FAN_REPORT_TARGET_FID
	fsnotify: generate FS_RENAME event with rich information
	fanotify: use macros to get the offset to fanotify_info buffer
	fanotify: use helpers to parcel fanotify_info buffer
	fanotify: support secondary dir fh and name in fanotify_info
	fanotify: record old and new parent and name in FAN_RENAME event
	fanotify: record either old name new name or both for FAN_RENAME
	fanotify: report old and/or new parent+name in FAN_RENAME event
	fanotify: wire up FAN_RENAME event
	exit: Implement kthread_exit
	exit: Rename module_put_and_exit to module_put_and_kthread_exit
	NFSD: Fix sparse warning
	NFSD: handle errors better in write_ports_addfd()
	SUNRPC: change svc_get() to return the svc.
	SUNRPC/NFSD: clean up get/put functions.
	SUNRPC: stop using ->sv_nrthreads as a refcount
	nfsd: make nfsd_stats.th_cnt atomic_t
	SUNRPC: use sv_lock to protect updates to sv_nrthreads.
	NFSD: narrow nfsd_mutex protection in nfsd thread
	NFSD: Make it possible to use svc_set_num_threads_sync
	SUNRPC: discard svo_setup and rename svc_set_num_threads_sync()
	NFSD: simplify locking for network notifier.
	lockd: introduce nlmsvc_serv
	lockd: simplify management of network status notifiers
	lockd: move lockd_start_svc() call into lockd_create_svc()
	lockd: move svc_exit_thread() into the thread
	lockd: introduce lockd_put()
	lockd: rename lockd_create_svc() to lockd_get()
	SUNRPC: move the pool_map definitions (back) into svc.c
	SUNRPC: always treat sv_nrpools==1 as "not pooled"
	lockd: use svc_set_num_threads() for thread start and stop
	NFS: switch the callback service back to non-pooled.
	NFSD: Remove be32_to_cpu() from DRC hash function
	NFSD: Fix inconsistent indenting
	NFSD: simplify per-net file cache management
	NFSD: Combine XDR error tracepoints
	nfsd: improve stateid access bitmask documentation
	NFSD: De-duplicate nfsd4_decode_bitmap4()
	nfs: block notification on fs with its own ->lock
	nfsd4: add refcount for nfsd4_blocked_lock
	NFSD: Fix zero-length NFSv3 WRITEs
	nfsd: map EBADF
	nfsd: Add errno mapping for EREMOTEIO
	nfsd: Retry once in nfsd_open on an -EOPENSTALE return
	NFSD: Clean up nfsd_vfs_write()
	NFSD: De-duplicate net_generic(SVC_NET(rqstp), nfsd_net_id)
	NFSD: De-duplicate net_generic(nf->nf_net, nfsd_net_id)
	nfsd: Add a tracepoint for errors in nfsd4_clone_file_range()
	NFSD: Write verifier might go backwards
	NFSD: Clean up the nfsd_net::nfssvc_boot field
	NFSD: Rename boot verifier functions
	NFSD: Trace boot verifier resets
	Revert "nfsd: skip some unnecessary stats in the v4 case"
	NFSD: Move fill_pre_wcc() and fill_post_wcc()
	nfsd: fix crash on COPY_NOTIFY with special stateid
	fanotify: remove variable set but not used
	lockd: fix server crash on reboot of client holding lock
	lockd: fix failure to cleanup client locks
	NFSD: Fix the behavior of READ near OFFSET_MAX
	NFSD: Fix ia_size underflow
	NFSD: Fix NFSv3 SETATTR/CREATE's handling of large file sizes
	NFSD: COMMIT operations must not return NFS?ERR_INVAL
	NFSD: Deprecate NFS_OFFSET_MAX
	nfsd: Add support for the birth time attribute
	NFSD: De-duplicate hash bucket indexing
	NFSD: Skip extra computation for RC_NOCACHE case
	NFSD: Streamline the rare "found" case
	SUNRPC: Remove the .svo_enqueue_xprt method
	SUNRPC: Merge svc_do_enqueue_xprt() into svc_enqueue_xprt()
	SUNRPC: Remove svo_shutdown method
	SUNRPC: Rename svc_create_xprt()
	SUNRPC: Rename svc_close_xprt()
	SUNRPC: Remove svc_shutdown_net()
	NFSD: Remove svc_serv_ops::svo_module
	NFSD: Move svc_serv_ops::svo_function into struct svc_serv
	NFSD: Remove CONFIG_NFSD_V3
	NFSD: Clean up _lm_ operation names
	nfsd: fix using the correct variable for sizeof()
	fsnotify: fix merge with parent's ignored mask
	fsnotify: optimize FS_MODIFY events with no ignored masks
	fsnotify: remove redundant parameter judgment
	SUNRPC: Return true/false (not 1/0) from bool functions
	nfsd: Fix a write performance regression
	nfsd: Clean up nfsd_file_put()
	fanotify: do not allow setting dirent events in mask of non-dir
	fs/lock: documentation cleanup. Replace inode->i_lock with flc_lock.
	inotify: move control flags from mask to mark flags
	fsnotify: pass flags argument to fsnotify_alloc_group()
	fsnotify: make allow_dups a property of the group
	fsnotify: create helpers for group mark_mutex lock
	inotify: use fsnotify group lock helpers
	nfsd: use fsnotify group lock helpers
	dnotify: use fsnotify group lock helpers
	fsnotify: allow adding an inode mark without pinning inode
	fanotify: create helper fanotify_mark_user_flags()
	fanotify: factor out helper fanotify_mark_update_flags()
	fanotify: implement "evictable" inode marks
	fanotify: use fsnotify group lock helpers
	fanotify: enable "evictable" inode marks
	fsnotify: introduce mark type iterator
	fsnotify: consistent behavior for parent not watching children
	fanotify: fix incorrect fmode_t casts
	NFSD: Clean up nfsd_splice_actor()
	NFSD: add courteous server support for thread with only delegation
	NFSD: add support for share reservation conflict to courteous server
	NFSD: move create/destroy of laundry_wq to init_nfsd and exit_nfsd
	fs/lock: add helper locks_owner_has_blockers to check for blockers
	fs/lock: add 2 callbacks to lock_manager_operations to resolve conflict
	NFSD: add support for lock conflict to courteous server
	NFSD: Show state of courtesy client in client info
	NFSD: Clean up nfsd3_proc_create()
	NFSD: Avoid calling fh_drop_write() twice in do_nfsd_create()
	NFSD: Refactor nfsd_create_setattr()
	NFSD: Refactor NFSv3 CREATE
	NFSD: Refactor NFSv4 OPEN(CREATE)
	NFSD: Remove do_nfsd_create()
	NFSD: Clean up nfsd_open_verified()
	NFSD: Instantiate a struct file when creating a regular NFSv4 file
	NFSD: Remove dprintk call sites from tail of nfsd4_open()
	NFSD: Fix whitespace
	NFSD: Move documenting comment for nfsd4_process_open2()
	NFSD: Trace filecache opens
	NFSD: Clean up the show_nf_flags() macro
	SUNRPC: Use RMW bitops in single-threaded hot paths
	nfsd: Unregister the cld notifier when laundry_wq create failed
	nfsd: Fix null-ptr-deref in nfsd_fill_super()
	nfsd: destroy percpu stats counters after reply cache shutdown
	NFSD: Modernize nfsd4_release_lockowner()
	NFSD: Add documenting comment for nfsd4_release_lockowner()
	NFSD: nfsd_file_put() can sleep
	NFSD: Fix potential use-after-free in nfsd_file_put()
	SUNRPC: Optimize xdr_reserve_space()
	fanotify: refine the validation checks on non-dir inode mask
	NFS: restore module put when manager exits.
	NFSD: Decode NFSv4 birth time attribute
	lockd: set fl_owner when unlocking files
	lockd: fix nlm_close_files
	fs: inotify: Fix typo in inotify comment
	fanotify: prepare for setting event flags in ignore mask
	fanotify: cleanups for fanotify_mark() input validations
	fanotify: introduce FAN_MARK_IGNORE
	fsnotify: Fix comment typo
	nfsd: eliminate the NFSD_FILE_BREAK_* flags
	SUNRPC: Fix xdr_encode_bool()
	NLM: Defend against file_lock changes after vfs_test_lock()
	NFSD: Fix space and spelling mistake
	nfsd: remove redundant assignment to variable len
	NFSD: Demote a WARN to a pr_warn()
	NFSD: Report filecache LRU size
	NFSD: Report count of calls to nfsd_file_acquire()
	NFSD: Report count of freed filecache items
	NFSD: Report average age of filecache items
	NFSD: Add nfsd_file_lru_dispose_list() helper
	NFSD: Refactor nfsd_file_gc()
	NFSD: Refactor nfsd_file_lru_scan()
	NFSD: Report the number of items evicted by the LRU walk
	NFSD: Record number of flush calls
	NFSD: Zero counters when the filecache is re-initialized
	NFSD: Hook up the filecache stat file
	NFSD: WARN when freeing an item still linked via nf_lru
	NFSD: Trace filecache LRU activity
	NFSD: Leave open files out of the filecache LRU
	NFSD: Fix the filecache LRU shrinker
	NFSD: Never call nfsd_file_gc() in foreground paths
	NFSD: No longer record nf_hashval in the trace log
	NFSD: Remove lockdep assertion from unhash_and_release_locked()
	NFSD: nfsd_file_unhash can compute hashval from nf->nf_inode
	NFSD: Refactor __nfsd_file_close_inode()
	NFSD: nfsd_file_hash_remove can compute hashval
	NFSD: Remove nfsd_file::nf_hashval
	NFSD: Replace the "init once" mechanism
	NFSD: Set up an rhashtable for the filecache
	NFSD: Convert the filecache to use rhashtable
	NFSD: Clean up unused code after rhashtable conversion
	NFSD: Separate tracepoints for acquire and create
	NFSD: Move nfsd_file_trace_alloc() tracepoint
	NFSD: NFSv4 CLOSE should release an nfsd_file immediately
	NFSD: Ensure nf_inode is never dereferenced
	NFSD: refactoring v4 specific code to a helper in nfs4state.c
	NFSD: keep track of the number of v4 clients in the system
	NFSD: limit the number of v4 clients to 1024 per 1GB of system memory
	nfsd: silence extraneous printk on nfsd.ko insertion
	NFSD: Optimize nfsd4_encode_operation()
	NFSD: Optimize nfsd4_encode_fattr()
	NFSD: Clean up SPLICE_OK in nfsd4_encode_read()
	NFSD: Add an nfsd4_read::rd_eof field
	NFSD: Optimize nfsd4_encode_readv()
	NFSD: Simplify starting_len
	NFSD: Use xdr_pad_size()
	NFSD: Clean up nfsd4_encode_readlink()
	NFSD: Fix strncpy() fortify warning
	NFSD: nfserrno(-ENOMEM) is nfserr_jukebox
	NFSD: Shrink size of struct nfsd4_copy_notify
	NFSD: Shrink size of struct nfsd4_copy
	NFSD: Reorder the fields in struct nfsd4_op
	NFSD: Make nfs4_put_copy() static
	NFSD: Replace boolean fields in struct nfsd4_copy
	NFSD: Refactor nfsd4_cleanup_inter_ssc() (1/2)
	NFSD: Refactor nfsd4_cleanup_inter_ssc() (2/2)
	NFSD: Refactor nfsd4_do_copy()
	NFSD: Remove kmalloc from nfsd4_do_async_copy()
	NFSD: Add nfsd4_send_cb_offload()
	NFSD: Move copy offload callback arguments into a separate structure
	NFSD: drop fh argument from alloc_init_deleg
	NFSD: verify the opened dentry after setting a delegation
	NFSD: introduce struct nfsd_attrs
	NFSD: set attributes when creating symlinks
	NFSD: add security label to struct nfsd_attrs
	NFSD: add posix ACLs to struct nfsd_attrs
	NFSD: change nfsd_create()/nfsd_symlink() to unlock directory before returning.
	NFSD: always drop directory lock in nfsd_unlink()
	NFSD: only call fh_unlock() once in nfsd_link()
	NFSD: reduce locking in nfsd_lookup()
	NFSD: use explicit lock/unlock for directory ops
	NFSD: use (un)lock_inode instead of fh_(un)lock for file operations
	NFSD: discard fh_locked flag and fh_lock/fh_unlock
	lockd: detect and reject lock arguments that overflow
	NFSD: fix regression with setting ACLs.
	nfsd_splice_actor(): handle compound pages
	NFSD: move from strlcpy with unused retval to strscpy
	lockd: move from strlcpy with unused retval to strscpy
	NFSD enforce filehandle check for source file in COPY
	NFSD: remove redundant variable status
	nfsd: Avoid some useless tests
	nfsd: Propagate some error code returned by memdup_user()
	NFSD: Increase NFSD_MAX_OPS_PER_COMPOUND
	NFSD: Protect against send buffer overflow in NFSv2 READDIR
	NFSD: Protect against send buffer overflow in NFSv3 READDIR
	NFSD: Protect against send buffer overflow in NFSv2 READ
	NFSD: Protect against send buffer overflow in NFSv3 READ
	NFSD: drop fname and flen args from nfsd_create_locked()
	NFSD: Fix handling of oversized NFSv4 COMPOUND requests
	nfsd: clean up mounted_on_fileid handling
	nfsd: remove nfsd4_prepare_cb_recall() declaration
	NFSD: Add tracepoints to report NFSv4 callback completions
	NFSD: Add a mechanism to wait for a DELEGRETURN
	NFSD: Refactor nfsd_setattr()
	NFSD: Make nfsd4_setattr() wait before returning NFS4ERR_DELAY
	NFSD: Make nfsd4_rename() wait before returning NFS4ERR_DELAY
	NFSD: Make nfsd4_remove() wait before returning NFS4ERR_DELAY
	NFSD: keep track of the number of courtesy clients in the system
	NFSD: add shrinker to reap courtesy clients on low memory condition
	SUNRPC: Parametrize how much of argsize should be zeroed
	NFSD: Reduce amount of struct nfsd4_compoundargs that needs clearing
	NFSD: Refactor common code out of dirlist helpers
	NFSD: Use xdr_inline_decode() to decode NFSv3 symlinks
	NFSD: Clean up WRITE arg decoders
	NFSD: Clean up nfs4svc_encode_compoundres()
	NFSD: Remove "inline" directives on op_rsize_bop helpers
	NFSD: Remove unused nfsd4_compoundargs::cachetype field
	NFSD: Pack struct nfsd4_compoundres
	nfsd: use DEFINE_PROC_SHOW_ATTRIBUTE to define nfsd_proc_ops
	nfsd: use DEFINE_SHOW_ATTRIBUTE to define export_features_fops and supported_enctypes_fops
	nfsd: use DEFINE_SHOW_ATTRIBUTE to define client_info_fops
	nfsd: use DEFINE_SHOW_ATTRIBUTE to define nfsd_reply_cache_stats_fops
	nfsd: use DEFINE_SHOW_ATTRIBUTE to define nfsd_file_cache_stats_fops
	NFSD: Rename the fields in copy_stateid_t
	NFSD: Cap rsize_bop result based on send buffer size
	nfsd: only fill out return pointer on success in nfsd4_lookup_stateid
	nfsd: fix comments about spinlock handling with delegations
	nfsd: make nfsd4_run_cb a bool return function
	nfsd: extra checks when freeing delegation stateids
	fs/notify: constify path
	fsnotify: remove unused declaration
	fanotify: Remove obsoleted fanotify_event_has_path()
	nfsd: fix nfsd_file_unhash_and_dispose
	nfsd: rework hashtable handling in nfsd_do_file_acquire
	NFSD: unregister shrinker when nfsd_init_net() fails
	nfsd: fix net-namespace logic in __nfsd_file_cache_purge
	nfsd: fix use-after-free in nfsd_file_do_acquire tracepoint
	nfsd: put the export reference in nfsd4_verify_deleg_dentry
	NFSD: Fix reads with a non-zero offset that don't end on a page boundary
	filelock: add a new locks_inode_context accessor function
	lockd: use locks_inode_context helper
	nfsd: use locks_inode_context helper
	NFSD: Simplify READ_PLUS
	NFSD: Remove redundant assignment to variable host_err
	NFSD: Finish converting the NFSv2 GETACL result encoder
	NFSD: Finish converting the NFSv3 GETACL result encoder
	nfsd: ignore requests to disable unsupported versions
	nfsd: move nfserrno() to vfs.c
	nfsd: allow disabling NFSv2 at compile time
	exportfs: use pr_debug for unreachable debug statements
	NFSD: Pass the target nfsd_file to nfsd_commit()
	NFSD: Revert "NFSD: NFSv4 CLOSE should release an nfsd_file immediately"
	NFSD: Add an NFSD_FILE_GC flag to enable nfsd_file garbage collection
	NFSD: Flesh out a documenting comment for filecache.c
	NFSD: Clean up nfs4_preprocess_stateid_op() call sites
	NFSD: Trace stateids returned via DELEGRETURN
	NFSD: Trace delegation revocations
	NFSD: Use const pointers as parameters to fh_ helpers
	NFSD: Update file_hashtbl() helpers
	NFSD: Clean up nfsd4_init_file()
	NFSD: Add a nfsd4_file_hash_remove() helper
	NFSD: Clean up find_or_add_file()
	NFSD: Refactor find_file()
	NFSD: Use rhashtable for managing nfs4_file objects
	NFSD: Fix licensing header in filecache.c
	nfsd: remove the pages_flushed statistic from filecache
	nfsd: reorganize filecache.c
	nfsd: fix up the filecache laundrette scheduling
	NFSD: Add an nfsd_file_fsync tracepoint
	lockd: set other missing fields when unlocking files
	nfsd: return error if nfs4_setacl fails
	NFSD: Use struct_size() helper in alloc_session()
	lockd: set missing fl_flags field when retrieving args
	lockd: ensure we use the correct file descriptor when unlocking
	lockd: fix file selection in nlmsvc_cancel_blocked
	NFSD: pass range end to vfs_fsync_range() instead of count
	NFSD: refactoring courtesy_client_reaper to a generic low memory shrinker
	NFSD: add support for sending CB_RECALL_ANY
	NFSD: add delegation reaper to react to low memory condition
	NFSD: Use only RQ_DROPME to signal the need to drop a reply
	NFSD: Avoid clashing function prototypes
	nfsd: rework refcounting in filecache
	nfsd: fix handling of cached open files in nfsd4_open codepath
	Revert "SUNRPC: Use RMW bitops in single-threaded hot paths"
	NFSD: Use set_bit(RQ_DROPME)
	NFSD: fix use-after-free in nfsd4_ssc_setup_dul()
	NFSD: register/unregister of nfsd-client shrinker at nfsd startup/shutdown time
	NFSD: replace delayed_work with work_struct for nfsd_client_shrinker
	nfsd: don't free files unconditionally in __nfsd_file_cache_purge
	nfsd: don't destroy global nfs4_file table in per-net shutdown
	NFSD: enhance inter-server copy cleanup
	nfsd: allow nfsd_file_get to sanely handle a NULL pointer
	nfsd: clean up potential nfsd_file refcount leaks in COPY codepath
	NFSD: fix leaked reference count of nfsd4_ssc_umount_item
	nfsd: don't hand out delegation on setuid files being opened for write
	NFSD: fix problems with cleanup on errors in nfsd4_copy
	nfsd: fix courtesy client with deny mode handling in nfs4_upgrade_open
	nfsd: don't fsync nfsd_files on last close
	NFSD: copy the whole verifier in nfsd_copy_write_verifier
	NFSD: Protect against filesystem freezing
	lockd: set file_lock start and end when decoding nlm4 testargs
	nfsd: don't replace page in rq_pages if it's a continuation of last page
	NFSD: Avoid calling OPDESC() with ops->opnum == OP_ILLEGAL
	nfsd: call op_release, even when op_func returns an error
	nfsd: don't open-code clear_and_wake_up_bit
	nfsd: NFSD_FILE_KEY_INODE only needs to find GC'ed entries
	nfsd: simplify test_bit return in NFSD_FILE_KEY_FULL comparator
	nfsd: don't kill nfsd_files because of lease break error
	nfsd: add some comments to nfsd_file_do_acquire
	nfsd: don't take/put an extra reference when putting a file
	nfsd: update comment over __nfsd_file_cache_purge
	nfsd: allow reaping files still under writeback
	NFSD: Convert filecache to rhltable
	nfsd: simplify the delayed disposal list code
	NFSD: Fix problem of COMMIT and NFS4ERR_DELAY in infinite loop
	nfsd: make a copy of struct iattr before calling notify_change
	nfsd: fix double fget() bug in __write_ports_addfd()
	lockd: drop inappropriate svc_get() from locked_get()
	NFSD: Add an nfsd4_encode_nfstime4() helper
	nfsd: Fix creation time serialization order
	nfsd: don't allow nfsd threads to be signalled.
	nfsd: Simplify code around svc_exit_thread() call in nfsd()
	nfsd: separate nfsd_last_thread() from nfsd_put()
	Documentation: Add missing documentation for EXPORT_OP flags
	NFSD: fix possible oops when nfsd/pool_stats is closed.
	nfsd: call nfsd_last_thread() before final nfsd_put()
	nfsd: drop the nfsd_put helper
	nfsd: fix RELEASE_LOCKOWNER
	nfsd: don't take fi_lock in nfsd_break_deleg_cb()
	nfsd: don't call locks_release_private() twice concurrently
	nfsd: Fix a regression in nfsd_setattr()
	Linux 5.10.220

Change-Id: I589ec5e63d1f985ab69f9755b9a87330627d44c5
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
2024-07-16 15:58:54 +00:00

3014 lines
71 KiB
C

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* NET4: Implementation of BSD Unix domain sockets.
*
* Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
*
* Fixes:
* Linus Torvalds : Assorted bug cures.
* Niibe Yutaka : async I/O support.
* Carsten Paeth : PF_UNIX check, address fixes.
* Alan Cox : Limit size of allocated blocks.
* Alan Cox : Fixed the stupid socketpair bug.
* Alan Cox : BSD compatibility fine tuning.
* Alan Cox : Fixed a bug in connect when interrupted.
* Alan Cox : Sorted out a proper draft version of
* file descriptor passing hacked up from
* Mike Shaver's work.
* Marty Leisner : Fixes to fd passing
* Nick Nevin : recvmsg bugfix.
* Alan Cox : Started proper garbage collector
* Heiko EiBfeldt : Missing verify_area check
* Alan Cox : Started POSIXisms
* Andreas Schwab : Replace inode by dentry for proper
* reference counting
* Kirk Petersen : Made this a module
* Christoph Rohland : Elegant non-blocking accept/connect algorithm.
* Lots of bug fixes.
* Alexey Kuznetosv : Repaired (I hope) bugs introduces
* by above two patches.
* Andrea Arcangeli : If possible we block in connect(2)
* if the max backlog of the listen socket
* is been reached. This won't break
* old apps and it will avoid huge amount
* of socks hashed (this for unix_gc()
* performances reasons).
* Security fix that limits the max
* number of socks to 2*max_files and
* the number of skb queueable in the
* dgram receiver.
* Artur Skawina : Hash function optimizations
* Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
* Malcolm Beattie : Set peercred for socketpair
* Michal Ostrowski : Module initialization cleanup.
* Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
* the core infrastructure is doing that
* for all net proto families now (2.5.69+)
*
* Known differences from reference BSD that was tested:
*
* [TO FIX]
* ECONNREFUSED is not returned from one end of a connected() socket to the
* other the moment one end closes.
* fstat() doesn't return st_dev=0, and give the blksize as high water mark
* and a fake inode identifier (nor the BSD first socket fstat twice bug).
* [NOT TO FIX]
* accept() returns a path name even if the connecting socket has closed
* in the meantime (BSD loses the path and gives up).
* accept() returns 0 length path for an unbound connector. BSD returns 16
* and a null first byte in the path (but not for gethost/peername - BSD bug ??)
* socketpair(...SOCK_RAW..) doesn't panic the kernel.
* BSD af_unix apparently has connect forgetting to block properly.
* (need to check this with the POSIX spec in detail)
*
* Differences from 2.0.0-11-... (ANK)
* Bug fixes and improvements.
* - client shutdown killed server socket.
* - removed all useless cli/sti pairs.
*
* Semantic changes/extensions.
* - generic control message passing.
* - SCM_CREDENTIALS control message.
* - "Abstract" (not FS based) socket bindings.
* Abstract names are sequences of bytes (not zero terminated)
* started by 0, so that this name space does not intersect
* with BSD names.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/signal.h>
#include <linux/sched/signal.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/stat.h>
#include <linux/dcache.h>
#include <linux/namei.h>
#include <linux/socket.h>
#include <linux/un.h>
#include <linux/fcntl.h>
#include <linux/termios.h>
#include <linux/sockios.h>
#include <linux/net.h>
#include <linux/in.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/tcp_states.h>
#include <net/af_unix.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <net/scm.h>
#include <linux/init.h>
#include <linux/poll.h>
#include <linux/rtnetlink.h>
#include <linux/mount.h>
#include <net/checksum.h>
#include <linux/security.h>
#include <linux/freezer.h>
#include <linux/file.h>
#include "scm.h"
struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
EXPORT_SYMBOL_GPL(unix_socket_table);
DEFINE_SPINLOCK(unix_table_lock);
EXPORT_SYMBOL_GPL(unix_table_lock);
static atomic_long_t unix_nr_socks;
static struct hlist_head *unix_sockets_unbound(void *addr)
{
unsigned long hash = (unsigned long)addr;
hash ^= hash >> 16;
hash ^= hash >> 8;
hash %= UNIX_HASH_SIZE;
return &unix_socket_table[UNIX_HASH_SIZE + hash];
}
#define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
#ifdef CONFIG_SECURITY_NETWORK
static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
{
UNIXCB(skb).secid = scm->secid;
}
static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
{
scm->secid = UNIXCB(skb).secid;
}
static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
{
return (scm->secid == UNIXCB(skb).secid);
}
#else
static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
{ }
static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
{ }
static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
{
return true;
}
#endif /* CONFIG_SECURITY_NETWORK */
/*
* SMP locking strategy:
* hash table is protected with spinlock unix_table_lock
* each socket state is protected by separate spin lock.
*/
static inline unsigned int unix_hash_fold(__wsum n)
{
unsigned int hash = (__force unsigned int)csum_fold(n);
hash ^= hash>>8;
return hash&(UNIX_HASH_SIZE-1);
}
#define unix_peer(sk) (unix_sk(sk)->peer)
static inline int unix_our_peer(struct sock *sk, struct sock *osk)
{
return unix_peer(osk) == sk;
}
static inline int unix_may_send(struct sock *sk, struct sock *osk)
{
return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
}
static inline int unix_recvq_full(const struct sock *sk)
{
return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
}
static inline int unix_recvq_full_lockless(const struct sock *sk)
{
return skb_queue_len_lockless(&sk->sk_receive_queue) >
READ_ONCE(sk->sk_max_ack_backlog);
}
struct sock *unix_peer_get(struct sock *s)
{
struct sock *peer;
unix_state_lock(s);
peer = unix_peer(s);
if (peer)
sock_hold(peer);
unix_state_unlock(s);
return peer;
}
EXPORT_SYMBOL_GPL(unix_peer_get);
static inline void unix_release_addr(struct unix_address *addr)
{
if (refcount_dec_and_test(&addr->refcnt))
kfree(addr);
}
/*
* Check unix socket name:
* - should be not zero length.
* - if started by not zero, should be NULL terminated (FS object)
* - if started by zero, it is abstract name.
*/
static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
{
*hashp = 0;
if (len <= sizeof(short) || len > sizeof(*sunaddr))
return -EINVAL;
if (!sunaddr || sunaddr->sun_family != AF_UNIX)
return -EINVAL;
if (sunaddr->sun_path[0]) {
/*
* This may look like an off by one error but it is a bit more
* subtle. 108 is the longest valid AF_UNIX path for a binding.
* sun_path[108] doesn't as such exist. However in kernel space
* we are guaranteed that it is a valid memory location in our
* kernel address buffer.
*/
((char *)sunaddr)[len] = 0;
len = strlen(sunaddr->sun_path)+1+sizeof(short);
return len;
}
*hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
return len;
}
static void __unix_remove_socket(struct sock *sk)
{
sk_del_node_init(sk);
}
static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
{
WARN_ON(!sk_unhashed(sk));
sk_add_node(sk, list);
}
static inline void unix_remove_socket(struct sock *sk)
{
spin_lock(&unix_table_lock);
__unix_remove_socket(sk);
spin_unlock(&unix_table_lock);
}
static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
{
spin_lock(&unix_table_lock);
__unix_insert_socket(list, sk);
spin_unlock(&unix_table_lock);
}
static struct sock *__unix_find_socket_byname(struct net *net,
struct sockaddr_un *sunname,
int len, int type, unsigned int hash)
{
struct sock *s;
sk_for_each(s, &unix_socket_table[hash ^ type]) {
struct unix_sock *u = unix_sk(s);
if (!net_eq(sock_net(s), net))
continue;
if (u->addr->len == len &&
!memcmp(u->addr->name, sunname, len))
return s;
}
return NULL;
}
static inline struct sock *unix_find_socket_byname(struct net *net,
struct sockaddr_un *sunname,
int len, int type,
unsigned int hash)
{
struct sock *s;
spin_lock(&unix_table_lock);
s = __unix_find_socket_byname(net, sunname, len, type, hash);
if (s)
sock_hold(s);
spin_unlock(&unix_table_lock);
return s;
}
static struct sock *unix_find_socket_byinode(struct inode *i)
{
struct sock *s;
spin_lock(&unix_table_lock);
sk_for_each(s,
&unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
struct dentry *dentry = unix_sk(s)->path.dentry;
if (dentry && d_backing_inode(dentry) == i) {
sock_hold(s);
goto found;
}
}
s = NULL;
found:
spin_unlock(&unix_table_lock);
return s;
}
/* Support code for asymmetrically connected dgram sockets
*
* If a datagram socket is connected to a socket not itself connected
* to the first socket (eg, /dev/log), clients may only enqueue more
* messages if the present receive queue of the server socket is not
* "too large". This means there's a second writeability condition
* poll and sendmsg need to test. The dgram recv code will do a wake
* up on the peer_wait wait queue of a socket upon reception of a
* datagram which needs to be propagated to sleeping would-be writers
* since these might not have sent anything so far. This can't be
* accomplished via poll_wait because the lifetime of the server
* socket might be less than that of its clients if these break their
* association with it or if the server socket is closed while clients
* are still connected to it and there's no way to inform "a polling
* implementation" that it should let go of a certain wait queue
*
* In order to propagate a wake up, a wait_queue_entry_t of the client
* socket is enqueued on the peer_wait queue of the server socket
* whose wake function does a wake_up on the ordinary client socket
* wait queue. This connection is established whenever a write (or
* poll for write) hit the flow control condition and broken when the
* association to the server socket is dissolved or after a wake up
* was relayed.
*/
static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
void *key)
{
struct unix_sock *u;
wait_queue_head_t *u_sleep;
u = container_of(q, struct unix_sock, peer_wake);
__remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
q);
u->peer_wake.private = NULL;
/* relaying can only happen while the wq still exists */
u_sleep = sk_sleep(&u->sk);
if (u_sleep)
wake_up_interruptible_poll(u_sleep, key_to_poll(key));
return 0;
}
static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
{
struct unix_sock *u, *u_other;
int rc;
u = unix_sk(sk);
u_other = unix_sk(other);
rc = 0;
spin_lock(&u_other->peer_wait.lock);
if (!u->peer_wake.private) {
u->peer_wake.private = other;
__add_wait_queue(&u_other->peer_wait, &u->peer_wake);
rc = 1;
}
spin_unlock(&u_other->peer_wait.lock);
return rc;
}
static void unix_dgram_peer_wake_disconnect(struct sock *sk,
struct sock *other)
{
struct unix_sock *u, *u_other;
u = unix_sk(sk);
u_other = unix_sk(other);
spin_lock(&u_other->peer_wait.lock);
if (u->peer_wake.private == other) {
__remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
u->peer_wake.private = NULL;
}
spin_unlock(&u_other->peer_wait.lock);
}
static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
struct sock *other)
{
unix_dgram_peer_wake_disconnect(sk, other);
wake_up_interruptible_poll(sk_sleep(sk),
EPOLLOUT |
EPOLLWRNORM |
EPOLLWRBAND);
}
/* preconditions:
* - unix_peer(sk) == other
* - association is stable
*/
static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
{
int connected;
connected = unix_dgram_peer_wake_connect(sk, other);
/* If other is SOCK_DEAD, we want to make sure we signal
* POLLOUT, such that a subsequent write() can get a
* -ECONNREFUSED. Otherwise, if we haven't queued any skbs
* to other and its full, we will hang waiting for POLLOUT.
*/
if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD))
return 1;
if (connected)
unix_dgram_peer_wake_disconnect(sk, other);
return 0;
}
static int unix_writable(const struct sock *sk)
{
return sk->sk_state != TCP_LISTEN &&
(refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
}
static void unix_write_space(struct sock *sk)
{
struct socket_wq *wq;
rcu_read_lock();
if (unix_writable(sk)) {
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
wake_up_interruptible_sync_poll(&wq->wait,
EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
}
rcu_read_unlock();
}
/* When dgram socket disconnects (or changes its peer), we clear its receive
* queue of packets arrived from previous peer. First, it allows to do
* flow control based only on wmem_alloc; second, sk connected to peer
* may receive messages only from that peer. */
static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
{
if (!skb_queue_empty(&sk->sk_receive_queue)) {
skb_queue_purge(&sk->sk_receive_queue);
wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
/* If one link of bidirectional dgram pipe is disconnected,
* we signal error. Messages are lost. Do not make this,
* when peer was not connected to us.
*/
if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
other->sk_err = ECONNRESET;
other->sk_error_report(other);
}
}
}
static void unix_sock_destructor(struct sock *sk)
{
struct unix_sock *u = unix_sk(sk);
skb_queue_purge(&sk->sk_receive_queue);
WARN_ON(refcount_read(&sk->sk_wmem_alloc));
WARN_ON(!sk_unhashed(sk));
WARN_ON(sk->sk_socket);
if (!sock_flag(sk, SOCK_DEAD)) {
pr_info("Attempt to release alive unix socket: %p\n", sk);
return;
}
if (u->addr)
unix_release_addr(u->addr);
atomic_long_dec(&unix_nr_socks);
local_bh_disable();
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
local_bh_enable();
#ifdef UNIX_REFCNT_DEBUG
pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
atomic_long_read(&unix_nr_socks));
#endif
}
static void unix_release_sock(struct sock *sk, int embrion)
{
struct unix_sock *u = unix_sk(sk);
struct path path;
struct sock *skpair;
struct sk_buff *skb;
int state;
unix_remove_socket(sk);
/* Clear state */
unix_state_lock(sk);
sock_orphan(sk);
WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
path = u->path;
u->path.dentry = NULL;
u->path.mnt = NULL;
state = sk->sk_state;
sk->sk_state = TCP_CLOSE;
skpair = unix_peer(sk);
unix_peer(sk) = NULL;
unix_state_unlock(sk);
wake_up_interruptible_all(&u->peer_wait);
if (skpair != NULL) {
if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
unix_state_lock(skpair);
/* No more writes */
WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK);
if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
skpair->sk_err = ECONNRESET;
unix_state_unlock(skpair);
skpair->sk_state_change(skpair);
sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
}
unix_dgram_peer_wake_disconnect(sk, skpair);
sock_put(skpair); /* It may now die */
}
/* Try to flush out this socket. Throw out buffers at least */
while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
if (state == TCP_LISTEN)
unix_release_sock(skb->sk, 1);
/* passed fds are erased in the kfree_skb hook */
UNIXCB(skb).consumed = skb->len;
kfree_skb(skb);
}
if (path.dentry)
path_put(&path);
sock_put(sk);
/* ---- Socket is dead now and most probably destroyed ---- */
/*
* Fixme: BSD difference: In BSD all sockets connected to us get
* ECONNRESET and we die on the spot. In Linux we behave
* like files and pipes do and wait for the last
* dereference.
*
* Can't we simply set sock->err?
*
* What the above comment does talk about? --ANK(980817)
*/
if (READ_ONCE(unix_tot_inflight))
unix_gc(); /* Garbage collect fds */
}
static void init_peercred(struct sock *sk)
{
const struct cred *old_cred;
struct pid *old_pid;
spin_lock(&sk->sk_peer_lock);
old_pid = sk->sk_peer_pid;
old_cred = sk->sk_peer_cred;
sk->sk_peer_pid = get_pid(task_tgid(current));
sk->sk_peer_cred = get_current_cred();
spin_unlock(&sk->sk_peer_lock);
put_pid(old_pid);
put_cred(old_cred);
}
static void copy_peercred(struct sock *sk, struct sock *peersk)
{
const struct cred *old_cred;
struct pid *old_pid;
if (sk < peersk) {
spin_lock(&sk->sk_peer_lock);
spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
} else {
spin_lock(&peersk->sk_peer_lock);
spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
}
old_pid = sk->sk_peer_pid;
old_cred = sk->sk_peer_cred;
sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
spin_unlock(&sk->sk_peer_lock);
spin_unlock(&peersk->sk_peer_lock);
put_pid(old_pid);
put_cred(old_cred);
}
static int unix_listen(struct socket *sock, int backlog)
{
int err;
struct sock *sk = sock->sk;
struct unix_sock *u = unix_sk(sk);
err = -EOPNOTSUPP;
if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
goto out; /* Only stream/seqpacket sockets accept */
err = -EINVAL;
if (!u->addr)
goto out; /* No listens on an unbound socket */
unix_state_lock(sk);
if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
goto out_unlock;
if (backlog > sk->sk_max_ack_backlog)
wake_up_interruptible_all(&u->peer_wait);
sk->sk_max_ack_backlog = backlog;
sk->sk_state = TCP_LISTEN;
/* set credentials so connect can copy them */
init_peercred(sk);
err = 0;
out_unlock:
unix_state_unlock(sk);
out:
return err;
}
static int unix_release(struct socket *);
static int unix_bind(struct socket *, struct sockaddr *, int);
static int unix_stream_connect(struct socket *, struct sockaddr *,
int addr_len, int flags);
static int unix_socketpair(struct socket *, struct socket *);
static int unix_accept(struct socket *, struct socket *, int, bool);
static int unix_getname(struct socket *, struct sockaddr *, int);
static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
static __poll_t unix_dgram_poll(struct file *, struct socket *,
poll_table *);
static int unix_ioctl(struct socket *, unsigned int, unsigned long);
#ifdef CONFIG_COMPAT
static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
#endif
static int unix_shutdown(struct socket *, int);
static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
size_t size, int flags);
static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
struct pipe_inode_info *, size_t size,
unsigned int flags);
static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
static int unix_dgram_connect(struct socket *, struct sockaddr *,
int, int);
static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
int);
static int unix_set_peek_off(struct sock *sk, int val)
{
struct unix_sock *u = unix_sk(sk);
if (mutex_lock_interruptible(&u->iolock))
return -EINTR;
WRITE_ONCE(sk->sk_peek_off, val);
mutex_unlock(&u->iolock);
return 0;
}
#ifdef CONFIG_PROC_FS
static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
{
struct sock *sk = sock->sk;
struct unix_sock *u;
if (sk) {
u = unix_sk(sock->sk);
seq_printf(m, "scm_fds: %u\n",
atomic_read(&u->scm_stat.nr_fds));
}
}
#else
#define unix_show_fdinfo NULL
#endif
static const struct proto_ops unix_stream_ops = {
.family = PF_UNIX,
.owner = THIS_MODULE,
.release = unix_release,
.bind = unix_bind,
.connect = unix_stream_connect,
.socketpair = unix_socketpair,
.accept = unix_accept,
.getname = unix_getname,
.poll = unix_poll,
.ioctl = unix_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = unix_compat_ioctl,
#endif
.listen = unix_listen,
.shutdown = unix_shutdown,
.sendmsg = unix_stream_sendmsg,
.recvmsg = unix_stream_recvmsg,
.mmap = sock_no_mmap,
.sendpage = unix_stream_sendpage,
.splice_read = unix_stream_splice_read,
.set_peek_off = unix_set_peek_off,
.show_fdinfo = unix_show_fdinfo,
};
static const struct proto_ops unix_dgram_ops = {
.family = PF_UNIX,
.owner = THIS_MODULE,
.release = unix_release,
.bind = unix_bind,
.connect = unix_dgram_connect,
.socketpair = unix_socketpair,
.accept = sock_no_accept,
.getname = unix_getname,
.poll = unix_dgram_poll,
.ioctl = unix_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = unix_compat_ioctl,
#endif
.listen = sock_no_listen,
.shutdown = unix_shutdown,
.sendmsg = unix_dgram_sendmsg,
.recvmsg = unix_dgram_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
.set_peek_off = unix_set_peek_off,
.show_fdinfo = unix_show_fdinfo,
};
static const struct proto_ops unix_seqpacket_ops = {
.family = PF_UNIX,
.owner = THIS_MODULE,
.release = unix_release,
.bind = unix_bind,
.connect = unix_stream_connect,
.socketpair = unix_socketpair,
.accept = unix_accept,
.getname = unix_getname,
.poll = unix_dgram_poll,
.ioctl = unix_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = unix_compat_ioctl,
#endif
.listen = unix_listen,
.shutdown = unix_shutdown,
.sendmsg = unix_seqpacket_sendmsg,
.recvmsg = unix_seqpacket_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
.set_peek_off = unix_set_peek_off,
.show_fdinfo = unix_show_fdinfo,
};
static struct proto unix_proto = {
.name = "UNIX",
.owner = THIS_MODULE,
.obj_size = sizeof(struct unix_sock),
};
static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
{
struct sock *sk = NULL;
struct unix_sock *u;
atomic_long_inc(&unix_nr_socks);
if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
goto out;
sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
if (!sk)
goto out;
sock_init_data(sock, sk);
sk->sk_allocation = GFP_KERNEL_ACCOUNT;
sk->sk_write_space = unix_write_space;
sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
sk->sk_destruct = unix_sock_destructor;
u = unix_sk(sk);
u->inflight = 0;
u->path.dentry = NULL;
u->path.mnt = NULL;
spin_lock_init(&u->lock);
INIT_LIST_HEAD(&u->link);
mutex_init(&u->iolock); /* single task reading lock */
mutex_init(&u->bindlock); /* single task binding lock */
init_waitqueue_head(&u->peer_wait);
init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
memset(&u->scm_stat, 0, sizeof(struct scm_stat));
unix_insert_socket(unix_sockets_unbound(sk), sk);
out:
if (sk == NULL)
atomic_long_dec(&unix_nr_socks);
else {
local_bh_disable();
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
local_bh_enable();
}
return sk;
}
static int unix_create(struct net *net, struct socket *sock, int protocol,
int kern)
{
if (protocol && protocol != PF_UNIX)
return -EPROTONOSUPPORT;
sock->state = SS_UNCONNECTED;
switch (sock->type) {
case SOCK_STREAM:
sock->ops = &unix_stream_ops;
break;
/*
* Believe it or not BSD has AF_UNIX, SOCK_RAW though
* nothing uses it.
*/
case SOCK_RAW:
sock->type = SOCK_DGRAM;
fallthrough;
case SOCK_DGRAM:
sock->ops = &unix_dgram_ops;
break;
case SOCK_SEQPACKET:
sock->ops = &unix_seqpacket_ops;
break;
default:
return -ESOCKTNOSUPPORT;
}
return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
}
static int unix_release(struct socket *sock)
{
struct sock *sk = sock->sk;
if (!sk)
return 0;
unix_release_sock(sk, 0);
sock->sk = NULL;
return 0;
}
static int unix_autobind(struct socket *sock)
{
struct sock *sk = sock->sk;
struct net *net = sock_net(sk);
struct unix_sock *u = unix_sk(sk);
static u32 ordernum = 1;
struct unix_address *addr;
int err;
unsigned int retries = 0;
err = mutex_lock_interruptible(&u->bindlock);
if (err)
return err;
if (u->addr)
goto out;
err = -ENOMEM;
addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
if (!addr)
goto out;
addr->name->sun_family = AF_UNIX;
refcount_set(&addr->refcnt, 1);
retry:
addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
spin_lock(&unix_table_lock);
ordernum = (ordernum+1)&0xFFFFF;
if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
addr->hash)) {
spin_unlock(&unix_table_lock);
/*
* __unix_find_socket_byname() may take long time if many names
* are already in use.
*/
cond_resched();
/* Give up if all names seems to be in use. */
if (retries++ == 0xFFFFF) {
err = -ENOSPC;
kfree(addr);
goto out;
}
goto retry;
}
addr->hash ^= sk->sk_type;
__unix_remove_socket(sk);
smp_store_release(&u->addr, addr);
__unix_insert_socket(&unix_socket_table[addr->hash], sk);
spin_unlock(&unix_table_lock);
err = 0;
out: mutex_unlock(&u->bindlock);
return err;
}
static struct sock *unix_find_other(struct net *net,
struct sockaddr_un *sunname, int len,
int type, unsigned int hash, int *error)
{
struct sock *u;
struct path path;
int err = 0;
if (sunname->sun_path[0]) {
struct inode *inode;
err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
if (err)
goto fail;
inode = d_backing_inode(path.dentry);
err = path_permission(&path, MAY_WRITE);
if (err)
goto put_fail;
err = -ECONNREFUSED;
if (!S_ISSOCK(inode->i_mode))
goto put_fail;
u = unix_find_socket_byinode(inode);
if (!u)
goto put_fail;
if (u->sk_type == type)
touch_atime(&path);
path_put(&path);
err = -EPROTOTYPE;
if (u->sk_type != type) {
sock_put(u);
goto fail;
}
} else {
err = -ECONNREFUSED;
u = unix_find_socket_byname(net, sunname, len, type, hash);
if (u) {
struct dentry *dentry;
dentry = unix_sk(u)->path.dentry;
if (dentry)
touch_atime(&unix_sk(u)->path);
} else
goto fail;
}
return u;
put_fail:
path_put(&path);
fail:
*error = err;
return NULL;
}
static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
{
struct dentry *dentry;
struct path path;
int err = 0;
/*
* Get the parent directory, calculate the hash for last
* component.
*/
dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
err = PTR_ERR(dentry);
if (IS_ERR(dentry))
return err;
/*
* All right, let's create it.
*/
err = security_path_mknod(&path, dentry, mode, 0);
if (!err) {
err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
if (!err) {
res->mnt = mntget(path.mnt);
res->dentry = dget(dentry);
}
}
done_path_create(&path, dentry);
return err;
}
static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
struct sock *sk = sock->sk;
struct net *net = sock_net(sk);
struct unix_sock *u = unix_sk(sk);
struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
char *sun_path = sunaddr->sun_path;
int err;
unsigned int hash;
struct unix_address *addr;
struct hlist_head *list;
struct path path = { };
err = -EINVAL;
if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
sunaddr->sun_family != AF_UNIX)
goto out;
if (addr_len == sizeof(short)) {
err = unix_autobind(sock);
goto out;
}
err = unix_mkname(sunaddr, addr_len, &hash);
if (err < 0)
goto out;
addr_len = err;
if (sun_path[0]) {
umode_t mode = S_IFSOCK |
(SOCK_INODE(sock)->i_mode & ~current_umask());
err = unix_mknod(sun_path, mode, &path);
if (err) {
if (err == -EEXIST)
err = -EADDRINUSE;
goto out;
}
}
err = mutex_lock_interruptible(&u->bindlock);
if (err)
goto out_put;
err = -EINVAL;
if (u->addr)
goto out_up;
err = -ENOMEM;
addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
if (!addr)
goto out_up;
memcpy(addr->name, sunaddr, addr_len);
addr->len = addr_len;
addr->hash = hash ^ sk->sk_type;
refcount_set(&addr->refcnt, 1);
if (sun_path[0]) {
addr->hash = UNIX_HASH_SIZE;
hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
spin_lock(&unix_table_lock);
u->path = path;
list = &unix_socket_table[hash];
} else {
spin_lock(&unix_table_lock);
err = -EADDRINUSE;
if (__unix_find_socket_byname(net, sunaddr, addr_len,
sk->sk_type, hash)) {
unix_release_addr(addr);
goto out_unlock;
}
list = &unix_socket_table[addr->hash];
}
err = 0;
__unix_remove_socket(sk);
smp_store_release(&u->addr, addr);
__unix_insert_socket(list, sk);
out_unlock:
spin_unlock(&unix_table_lock);
out_up:
mutex_unlock(&u->bindlock);
out_put:
if (err)
path_put(&path);
out:
return err;
}
static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
{
if (unlikely(sk1 == sk2) || !sk2) {
unix_state_lock(sk1);
return;
}
if (sk1 > sk2)
swap(sk1, sk2);
unix_state_lock(sk1);
unix_state_lock_nested(sk2, U_LOCK_SECOND);
}
static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
{
if (unlikely(sk1 == sk2) || !sk2) {
unix_state_unlock(sk1);
return;
}
unix_state_unlock(sk1);
unix_state_unlock(sk2);
}
static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
int alen, int flags)
{
struct sock *sk = sock->sk;
struct net *net = sock_net(sk);
struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
struct sock *other;
unsigned int hash;
int err;
err = -EINVAL;
if (alen < offsetofend(struct sockaddr, sa_family))
goto out;
if (addr->sa_family != AF_UNSPEC) {
err = unix_mkname(sunaddr, alen, &hash);
if (err < 0)
goto out;
alen = err;
if (test_bit(SOCK_PASSCRED, &sock->flags) &&
!unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
goto out;
restart:
other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
if (!other)
goto out;
unix_state_double_lock(sk, other);
/* Apparently VFS overslept socket death. Retry. */
if (sock_flag(other, SOCK_DEAD)) {
unix_state_double_unlock(sk, other);
sock_put(other);
goto restart;
}
err = -EPERM;
if (!unix_may_send(sk, other))
goto out_unlock;
err = security_unix_may_send(sk->sk_socket, other->sk_socket);
if (err)
goto out_unlock;
} else {
/*
* 1003.1g breaking connected state with AF_UNSPEC
*/
other = NULL;
unix_state_double_lock(sk, other);
}
/*
* If it was connected, reconnect.
*/
if (unix_peer(sk)) {
struct sock *old_peer = unix_peer(sk);
unix_peer(sk) = other;
unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
unix_state_double_unlock(sk, other);
if (other != old_peer)
unix_dgram_disconnected(sk, old_peer);
sock_put(old_peer);
} else {
unix_peer(sk) = other;
unix_state_double_unlock(sk, other);
}
return 0;
out_unlock:
unix_state_double_unlock(sk, other);
sock_put(other);
out:
return err;
}
static long unix_wait_for_peer(struct sock *other, long timeo)
__releases(&unix_sk(other)->lock)
{
struct unix_sock *u = unix_sk(other);
int sched;
DEFINE_WAIT(wait);
prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
sched = !sock_flag(other, SOCK_DEAD) &&
!(other->sk_shutdown & RCV_SHUTDOWN) &&
unix_recvq_full_lockless(other);
unix_state_unlock(other);
if (sched)
timeo = schedule_timeout(timeo);
finish_wait(&u->peer_wait, &wait);
return timeo;
}
static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
int addr_len, int flags)
{
struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
struct sock *sk = sock->sk;
struct net *net = sock_net(sk);
struct unix_sock *u = unix_sk(sk), *newu, *otheru;
struct sock *newsk = NULL;
struct sock *other = NULL;
struct sk_buff *skb = NULL;
unsigned int hash;
int st;
int err;
long timeo;
err = unix_mkname(sunaddr, addr_len, &hash);
if (err < 0)
goto out;
addr_len = err;
if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
(err = unix_autobind(sock)) != 0)
goto out;
timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
/* First of all allocate resources.
If we will make it after state is locked,
we will have to recheck all again in any case.
*/
err = -ENOMEM;
/* create new sock for complete connection */
newsk = unix_create1(sock_net(sk), NULL, 0);
if (newsk == NULL)
goto out;
/* Allocate skb for sending to listening sock */
skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
if (skb == NULL)
goto out;
restart:
/* Find listening sock. */
other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
if (!other)
goto out;
/* Latch state of peer */
unix_state_lock(other);
/* Apparently VFS overslept socket death. Retry. */
if (sock_flag(other, SOCK_DEAD)) {
unix_state_unlock(other);
sock_put(other);
goto restart;
}
err = -ECONNREFUSED;
if (other->sk_state != TCP_LISTEN)
goto out_unlock;
if (other->sk_shutdown & RCV_SHUTDOWN)
goto out_unlock;
if (unix_recvq_full(other)) {
err = -EAGAIN;
if (!timeo)
goto out_unlock;
timeo = unix_wait_for_peer(other, timeo);
err = sock_intr_errno(timeo);
if (signal_pending(current))
goto out;
sock_put(other);
goto restart;
}
/* Latch our state.
It is tricky place. We need to grab our state lock and cannot
drop lock on peer. It is dangerous because deadlock is
possible. Connect to self case and simultaneous
attempt to connect are eliminated by checking socket
state. other is TCP_LISTEN, if sk is TCP_LISTEN we
check this before attempt to grab lock.
Well, and we have to recheck the state after socket locked.
*/
st = sk->sk_state;
switch (st) {
case TCP_CLOSE:
/* This is ok... continue with connect */
break;
case TCP_ESTABLISHED:
/* Socket is already connected */
err = -EISCONN;
goto out_unlock;
default:
err = -EINVAL;
goto out_unlock;
}
unix_state_lock_nested(sk, U_LOCK_SECOND);
if (sk->sk_state != st) {
unix_state_unlock(sk);
unix_state_unlock(other);
sock_put(other);
goto restart;
}
err = security_unix_stream_connect(sk, other, newsk);
if (err) {
unix_state_unlock(sk);
goto out_unlock;
}
/* The way is open! Fastly set all the necessary fields... */
sock_hold(sk);
unix_peer(newsk) = sk;
newsk->sk_state = TCP_ESTABLISHED;
newsk->sk_type = sk->sk_type;
init_peercred(newsk);
newu = unix_sk(newsk);
RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
otheru = unix_sk(other);
/* copy address information from listening to new sock
*
* The contents of *(otheru->addr) and otheru->path
* are seen fully set up here, since we have found
* otheru in hash under unix_table_lock. Insertion
* into the hash chain we'd found it in had been done
* in an earlier critical area protected by unix_table_lock,
* the same one where we'd set *(otheru->addr) contents,
* as well as otheru->path and otheru->addr itself.
*
* Using smp_store_release() here to set newu->addr
* is enough to make those stores, as well as stores
* to newu->path visible to anyone who gets newu->addr
* by smp_load_acquire(). IOW, the same warranties
* as for unix_sock instances bound in unix_bind() or
* in unix_autobind().
*/
if (otheru->path.dentry) {
path_get(&otheru->path);
newu->path = otheru->path;
}
refcount_inc(&otheru->addr->refcnt);
smp_store_release(&newu->addr, otheru->addr);
/* Set credentials */
copy_peercred(sk, other);
sock->state = SS_CONNECTED;
sk->sk_state = TCP_ESTABLISHED;
sock_hold(newsk);
smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
unix_peer(sk) = newsk;
unix_state_unlock(sk);
/* take ten and and send info to listening sock */
spin_lock(&other->sk_receive_queue.lock);
__skb_queue_tail(&other->sk_receive_queue, skb);
spin_unlock(&other->sk_receive_queue.lock);
unix_state_unlock(other);
other->sk_data_ready(other);
sock_put(other);
return 0;
out_unlock:
if (other)
unix_state_unlock(other);
out:
kfree_skb(skb);
if (newsk)
unix_release_sock(newsk, 0);
if (other)
sock_put(other);
return err;
}
static int unix_socketpair(struct socket *socka, struct socket *sockb)
{
struct sock *ska = socka->sk, *skb = sockb->sk;
/* Join our sockets back to back */
sock_hold(ska);
sock_hold(skb);
unix_peer(ska) = skb;
unix_peer(skb) = ska;
init_peercred(ska);
init_peercred(skb);
if (ska->sk_type != SOCK_DGRAM) {
ska->sk_state = TCP_ESTABLISHED;
skb->sk_state = TCP_ESTABLISHED;
socka->state = SS_CONNECTED;
sockb->state = SS_CONNECTED;
}
return 0;
}
static void unix_sock_inherit_flags(const struct socket *old,
struct socket *new)
{
if (test_bit(SOCK_PASSCRED, &old->flags))
set_bit(SOCK_PASSCRED, &new->flags);
if (test_bit(SOCK_PASSSEC, &old->flags))
set_bit(SOCK_PASSSEC, &new->flags);
}
static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
bool kern)
{
struct sock *sk = sock->sk;
struct sock *tsk;
struct sk_buff *skb;
int err;
err = -EOPNOTSUPP;
if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
goto out;
err = -EINVAL;
if (sk->sk_state != TCP_LISTEN)
goto out;
/* If socket state is TCP_LISTEN it cannot change (for now...),
* so that no locks are necessary.
*/
skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
if (!skb) {
/* This means receive shutdown. */
if (err == 0)
err = -EINVAL;
goto out;
}
tsk = skb->sk;
skb_free_datagram(sk, skb);
wake_up_interruptible(&unix_sk(sk)->peer_wait);
/* attach accepted sock to socket */
unix_state_lock(tsk);
newsock->state = SS_CONNECTED;
unix_sock_inherit_flags(sock, newsock);
sock_graft(tsk, newsock);
unix_state_unlock(tsk);
return 0;
out:
return err;
}
static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
{
struct sock *sk = sock->sk;
struct unix_address *addr;
DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
int err = 0;
if (peer) {
sk = unix_peer_get(sk);
err = -ENOTCONN;
if (!sk)
goto out;
err = 0;
} else {
sock_hold(sk);
}
addr = smp_load_acquire(&unix_sk(sk)->addr);
if (!addr) {
sunaddr->sun_family = AF_UNIX;
sunaddr->sun_path[0] = 0;
err = sizeof(short);
} else {
err = addr->len;
memcpy(sunaddr, addr->name, addr->len);
}
sock_put(sk);
out:
return err;
}
static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
{
scm->fp = scm_fp_dup(UNIXCB(skb).fp);
/*
* Garbage collection of unix sockets starts by selecting a set of
* candidate sockets which have reference only from being in flight
* (total_refs == inflight_refs). This condition is checked once during
* the candidate collection phase, and candidates are marked as such, so
* that non-candidates can later be ignored. While inflight_refs is
* protected by unix_gc_lock, total_refs (file count) is not, hence this
* is an instantaneous decision.
*
* Once a candidate, however, the socket must not be reinstalled into a
* file descriptor while the garbage collection is in progress.
*
* If the above conditions are met, then the directed graph of
* candidates (*) does not change while unix_gc_lock is held.
*
* Any operations that changes the file count through file descriptors
* (dup, close, sendmsg) does not change the graph since candidates are
* not installed in fds.
*
* Dequeing a candidate via recvmsg would install it into an fd, but
* that takes unix_gc_lock to decrement the inflight count, so it's
* serialized with garbage collection.
*
* MSG_PEEK is special in that it does not change the inflight count,
* yet does install the socket into an fd. The following lock/unlock
* pair is to ensure serialization with garbage collection. It must be
* done between incrementing the file count and installing the file into
* an fd.
*
* If garbage collection starts after the barrier provided by the
* lock/unlock, then it will see the elevated refcount and not mark this
* as a candidate. If a garbage collection is already in progress
* before the file count was incremented, then the lock/unlock pair will
* ensure that garbage collection is finished before progressing to
* installing the fd.
*
* (*) A -> B where B is on the queue of A or B is on the queue of C
* which is on the queue of listening socket A.
*/
spin_lock(&unix_gc_lock);
spin_unlock(&unix_gc_lock);
}
static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
{
int err = 0;
UNIXCB(skb).pid = get_pid(scm->pid);
UNIXCB(skb).uid = scm->creds.uid;
UNIXCB(skb).gid = scm->creds.gid;
UNIXCB(skb).fp = NULL;
unix_get_secdata(scm, skb);
if (scm->fp && send_fds)
err = unix_attach_fds(scm, skb);
skb->destructor = unix_destruct_scm;
return err;
}
static bool unix_passcred_enabled(const struct socket *sock,
const struct sock *other)
{
return test_bit(SOCK_PASSCRED, &sock->flags) ||
!other->sk_socket ||
test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
}
/*
* Some apps rely on write() giving SCM_CREDENTIALS
* We include credentials if source or destination socket
* asserted SOCK_PASSCRED.
*/
static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
const struct sock *other)
{
if (UNIXCB(skb).pid)
return;
if (unix_passcred_enabled(sock, other)) {
UNIXCB(skb).pid = get_pid(task_tgid(current));
current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
}
}
static int maybe_init_creds(struct scm_cookie *scm,
struct socket *socket,
const struct sock *other)
{
int err;
struct msghdr msg = { .msg_controllen = 0 };
err = scm_send(socket, &msg, scm, false);
if (err)
return err;
if (unix_passcred_enabled(socket, other)) {
scm->pid = get_pid(task_tgid(current));
current_uid_gid(&scm->creds.uid, &scm->creds.gid);
}
return err;
}
static bool unix_skb_scm_eq(struct sk_buff *skb,
struct scm_cookie *scm)
{
const struct unix_skb_parms *u = &UNIXCB(skb);
return u->pid == scm->pid &&
uid_eq(u->uid, scm->creds.uid) &&
gid_eq(u->gid, scm->creds.gid) &&
unix_secdata_eq(scm, skb);
}
static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
{
struct scm_fp_list *fp = UNIXCB(skb).fp;
struct unix_sock *u = unix_sk(sk);
if (unlikely(fp && fp->count))
atomic_add(fp->count, &u->scm_stat.nr_fds);
}
static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
{
struct scm_fp_list *fp = UNIXCB(skb).fp;
struct unix_sock *u = unix_sk(sk);
if (unlikely(fp && fp->count))
atomic_sub(fp->count, &u->scm_stat.nr_fds);
}
/*
* Send AF_UNIX data.
*/
static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
size_t len)
{
struct sock *sk = sock->sk;
struct net *net = sock_net(sk);
struct unix_sock *u = unix_sk(sk);
DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
struct sock *other = NULL;
int namelen = 0; /* fake GCC */
int err;
unsigned int hash;
struct sk_buff *skb;
long timeo;
struct scm_cookie scm;
int data_len = 0;
int sk_locked;
wait_for_unix_gc();
err = scm_send(sock, msg, &scm, false);
if (err < 0)
return err;
err = -EOPNOTSUPP;
if (msg->msg_flags&MSG_OOB)
goto out;
if (msg->msg_namelen) {
err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
if (err < 0)
goto out;
namelen = err;
} else {
sunaddr = NULL;
err = -ENOTCONN;
other = unix_peer_get(sk);
if (!other)
goto out;
}
if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
&& (err = unix_autobind(sock)) != 0)
goto out;
err = -EMSGSIZE;
if (len > sk->sk_sndbuf - 32)
goto out;
if (len > SKB_MAX_ALLOC) {
data_len = min_t(size_t,
len - SKB_MAX_ALLOC,
MAX_SKB_FRAGS * PAGE_SIZE);
data_len = PAGE_ALIGN(data_len);
BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
}
skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
msg->msg_flags & MSG_DONTWAIT, &err,
PAGE_ALLOC_COSTLY_ORDER);
if (skb == NULL)
goto out;
err = unix_scm_to_skb(&scm, skb, true);
if (err < 0)
goto out_free;
skb_put(skb, len - data_len);
skb->data_len = data_len;
skb->len = len;
err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
if (err)
goto out_free;
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
restart:
if (!other) {
err = -ECONNRESET;
if (sunaddr == NULL)
goto out_free;
other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
hash, &err);
if (other == NULL)
goto out_free;
}
if (sk_filter(other, skb) < 0) {
/* Toss the packet but do not return any error to the sender */
err = len;
goto out_free;
}
sk_locked = 0;
unix_state_lock(other);
restart_locked:
err = -EPERM;
if (!unix_may_send(sk, other))
goto out_unlock;
if (unlikely(sock_flag(other, SOCK_DEAD))) {
/*
* Check with 1003.1g - what should
* datagram error
*/
unix_state_unlock(other);
sock_put(other);
if (!sk_locked)
unix_state_lock(sk);
err = 0;
if (unix_peer(sk) == other) {
unix_peer(sk) = NULL;
unix_dgram_peer_wake_disconnect_wakeup(sk, other);
unix_state_unlock(sk);
unix_dgram_disconnected(sk, other);
sock_put(other);
err = -ECONNREFUSED;
} else {
unix_state_unlock(sk);
}
other = NULL;
if (err)
goto out_free;
goto restart;
}
err = -EPIPE;
if (other->sk_shutdown & RCV_SHUTDOWN)
goto out_unlock;
if (sk->sk_type != SOCK_SEQPACKET) {
err = security_unix_may_send(sk->sk_socket, other->sk_socket);
if (err)
goto out_unlock;
}
/* other == sk && unix_peer(other) != sk if
* - unix_peer(sk) == NULL, destination address bound to sk
* - unix_peer(sk) == sk by time of get but disconnected before lock
*/
if (other != sk &&
unlikely(unix_peer(other) != sk &&
unix_recvq_full_lockless(other))) {
if (timeo) {
timeo = unix_wait_for_peer(other, timeo);
err = sock_intr_errno(timeo);
if (signal_pending(current))
goto out_free;
goto restart;
}
if (!sk_locked) {
unix_state_unlock(other);
unix_state_double_lock(sk, other);
}
if (unix_peer(sk) != other ||
unix_dgram_peer_wake_me(sk, other)) {
err = -EAGAIN;
sk_locked = 1;
goto out_unlock;
}
if (!sk_locked) {
sk_locked = 1;
goto restart_locked;
}
}
if (unlikely(sk_locked))
unix_state_unlock(sk);
if (sock_flag(other, SOCK_RCVTSTAMP))
__net_timestamp(skb);
maybe_add_creds(skb, sock, other);
scm_stat_add(other, skb);
skb_queue_tail(&other->sk_receive_queue, skb);
unix_state_unlock(other);
other->sk_data_ready(other);
sock_put(other);
scm_destroy(&scm);
return len;
out_unlock:
if (sk_locked)
unix_state_unlock(sk);
unix_state_unlock(other);
out_free:
kfree_skb(skb);
out:
if (other)
sock_put(other);
scm_destroy(&scm);
return err;
}
/* We use paged skbs for stream sockets, and limit occupancy to 32768
* bytes, and a minimum of a full page.
*/
#define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
size_t len)
{
struct sock *sk = sock->sk;
struct sock *other = NULL;
int err, size;
struct sk_buff *skb;
int sent = 0;
struct scm_cookie scm;
bool fds_sent = false;
int data_len;
wait_for_unix_gc();
err = scm_send(sock, msg, &scm, false);
if (err < 0)
return err;
err = -EOPNOTSUPP;
if (msg->msg_flags&MSG_OOB)
goto out_err;
if (msg->msg_namelen) {
err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
goto out_err;
} else {
err = -ENOTCONN;
other = unix_peer(sk);
if (!other)
goto out_err;
}
if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
goto pipe_err;
while (sent < len) {
size = len - sent;
/* Keep two messages in the pipe so it schedules better */
size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
/* allow fallback to order-0 allocations */
size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
msg->msg_flags & MSG_DONTWAIT, &err,
get_order(UNIX_SKB_FRAGS_SZ));
if (!skb)
goto out_err;
/* Only send the fds in the first buffer */
err = unix_scm_to_skb(&scm, skb, !fds_sent);
if (err < 0) {
kfree_skb(skb);
goto out_err;
}
fds_sent = true;
skb_put(skb, size - data_len);
skb->data_len = data_len;
skb->len = size;
err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
if (err) {
kfree_skb(skb);
goto out_err;
}
unix_state_lock(other);
if (sock_flag(other, SOCK_DEAD) ||
(other->sk_shutdown & RCV_SHUTDOWN))
goto pipe_err_free;
maybe_add_creds(skb, sock, other);
scm_stat_add(other, skb);
skb_queue_tail(&other->sk_receive_queue, skb);
unix_state_unlock(other);
other->sk_data_ready(other);
sent += size;
}
scm_destroy(&scm);
return sent;
pipe_err_free:
unix_state_unlock(other);
kfree_skb(skb);
pipe_err:
if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
send_sig(SIGPIPE, current, 0);
err = -EPIPE;
out_err:
scm_destroy(&scm);
return sent ? : err;
}
static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
int offset, size_t size, int flags)
{
int err;
bool send_sigpipe = false;
bool init_scm = true;
struct scm_cookie scm;
struct sock *other, *sk = socket->sk;
struct sk_buff *skb, *newskb = NULL, *tail = NULL;
if (flags & MSG_OOB)
return -EOPNOTSUPP;
other = unix_peer(sk);
if (!other || sk->sk_state != TCP_ESTABLISHED)
return -ENOTCONN;
if (false) {
alloc_skb:
spin_unlock(&other->sk_receive_queue.lock);
unix_state_unlock(other);
mutex_unlock(&unix_sk(other)->iolock);
newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
&err, 0);
if (!newskb)
goto err;
}
/* we must acquire iolock as we modify already present
* skbs in the sk_receive_queue and mess with skb->len
*/
err = mutex_lock_interruptible(&unix_sk(other)->iolock);
if (err) {
err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
goto err;
}
if (sk->sk_shutdown & SEND_SHUTDOWN) {
err = -EPIPE;
send_sigpipe = true;
goto err_unlock;
}
unix_state_lock(other);
if (sock_flag(other, SOCK_DEAD) ||
other->sk_shutdown & RCV_SHUTDOWN) {
err = -EPIPE;
send_sigpipe = true;
goto err_state_unlock;
}
if (init_scm) {
err = maybe_init_creds(&scm, socket, other);
if (err)
goto err_state_unlock;
init_scm = false;
}
spin_lock(&other->sk_receive_queue.lock);
skb = skb_peek_tail(&other->sk_receive_queue);
if (tail && tail == skb) {
skb = newskb;
} else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
if (newskb) {
skb = newskb;
} else {
tail = skb;
goto alloc_skb;
}
} else if (newskb) {
/* this is fast path, we don't necessarily need to
* call to kfree_skb even though with newskb == NULL
* this - does no harm
*/
consume_skb(newskb);
newskb = NULL;
}
if (skb_append_pagefrags(skb, page, offset, size)) {
tail = skb;
goto alloc_skb;
}
skb->len += size;
skb->data_len += size;
skb->truesize += size;
refcount_add(size, &sk->sk_wmem_alloc);
if (newskb) {
unix_scm_to_skb(&scm, skb, false);
__skb_queue_tail(&other->sk_receive_queue, newskb);
}
spin_unlock(&other->sk_receive_queue.lock);
unix_state_unlock(other);
mutex_unlock(&unix_sk(other)->iolock);
other->sk_data_ready(other);
scm_destroy(&scm);
return size;
err_state_unlock:
unix_state_unlock(other);
err_unlock:
mutex_unlock(&unix_sk(other)->iolock);
err:
kfree_skb(newskb);
if (send_sigpipe && !(flags & MSG_NOSIGNAL))
send_sig(SIGPIPE, current, 0);
if (!init_scm)
scm_destroy(&scm);
return err;
}
static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
size_t len)
{
int err;
struct sock *sk = sock->sk;
err = sock_error(sk);
if (err)
return err;
if (sk->sk_state != TCP_ESTABLISHED)
return -ENOTCONN;
if (msg->msg_namelen)
msg->msg_namelen = 0;
return unix_dgram_sendmsg(sock, msg, len);
}
static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
size_t size, int flags)
{
struct sock *sk = sock->sk;
if (sk->sk_state != TCP_ESTABLISHED)
return -ENOTCONN;
return unix_dgram_recvmsg(sock, msg, size, flags);
}
static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
{
struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
if (addr) {
msg->msg_namelen = addr->len;
memcpy(msg->msg_name, addr->name, addr->len);
}
}
static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
size_t size, int flags)
{
struct scm_cookie scm;
struct sock *sk = sock->sk;
struct unix_sock *u = unix_sk(sk);
struct sk_buff *skb, *last;
long timeo;
int skip;
int err;
err = -EOPNOTSUPP;
if (flags&MSG_OOB)
goto out;
timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
do {
mutex_lock(&u->iolock);
skip = sk_peek_offset(sk, flags);
skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
&skip, &err, &last);
if (skb) {
if (!(flags & MSG_PEEK))
scm_stat_del(sk, skb);
break;
}
mutex_unlock(&u->iolock);
if (err != -EAGAIN)
break;
} while (timeo &&
!__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
&err, &timeo, last));
if (!skb) { /* implies iolock unlocked */
unix_state_lock(sk);
/* Signal EOF on disconnected non-blocking SEQPACKET socket. */
if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
(sk->sk_shutdown & RCV_SHUTDOWN))
err = 0;
unix_state_unlock(sk);
goto out;
}
if (wq_has_sleeper(&u->peer_wait))
wake_up_interruptible_sync_poll(&u->peer_wait,
EPOLLOUT | EPOLLWRNORM |
EPOLLWRBAND);
if (msg->msg_name)
unix_copy_addr(msg, skb->sk);
if (size > skb->len - skip)
size = skb->len - skip;
else if (size < skb->len - skip)
msg->msg_flags |= MSG_TRUNC;
err = skb_copy_datagram_msg(skb, skip, msg, size);
if (err)
goto out_free;
if (sock_flag(sk, SOCK_RCVTSTAMP))
__sock_recv_timestamp(msg, sk, skb);
memset(&scm, 0, sizeof(scm));
scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
unix_set_secdata(&scm, skb);
if (!(flags & MSG_PEEK)) {
if (UNIXCB(skb).fp)
unix_detach_fds(&scm, skb);
sk_peek_offset_bwd(sk, skb->len);
} else {
/* It is questionable: on PEEK we could:
- do not return fds - good, but too simple 8)
- return fds, and do not return them on read (old strategy,
apparently wrong)
- clone fds (I chose it for now, it is the most universal
solution)
POSIX 1003.1g does not actually define this clearly
at all. POSIX 1003.1g doesn't define a lot of things
clearly however!
*/
sk_peek_offset_fwd(sk, size);
if (UNIXCB(skb).fp)
unix_peek_fds(&scm, skb);
}
err = (flags & MSG_TRUNC) ? skb->len - skip : size;
scm_recv(sock, msg, &scm, flags);
out_free:
skb_free_datagram(sk, skb);
mutex_unlock(&u->iolock);
out:
return err;
}
/*
* Sleep until more data has arrived. But check for races..
*/
static long unix_stream_data_wait(struct sock *sk, long timeo,
struct sk_buff *last, unsigned int last_len,
bool freezable)
{
struct sk_buff *tail;
DEFINE_WAIT(wait);
unix_state_lock(sk);
for (;;) {
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
tail = skb_peek_tail(&sk->sk_receive_queue);
if (tail != last ||
(tail && tail->len != last_len) ||
sk->sk_err ||
(sk->sk_shutdown & RCV_SHUTDOWN) ||
signal_pending(current) ||
!timeo)
break;
sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
unix_state_unlock(sk);
if (freezable)
timeo = freezable_schedule_timeout(timeo);
else
timeo = schedule_timeout(timeo);
unix_state_lock(sk);
if (sock_flag(sk, SOCK_DEAD))
break;
sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
}
finish_wait(sk_sleep(sk), &wait);
unix_state_unlock(sk);
return timeo;
}
static unsigned int unix_skb_len(const struct sk_buff *skb)
{
return skb->len - UNIXCB(skb).consumed;
}
struct unix_stream_read_state {
int (*recv_actor)(struct sk_buff *, int, int,
struct unix_stream_read_state *);
struct socket *socket;
struct msghdr *msg;
struct pipe_inode_info *pipe;
size_t size;
int flags;
unsigned int splice_flags;
};
static int unix_stream_read_generic(struct unix_stream_read_state *state,
bool freezable)
{
struct scm_cookie scm;
struct socket *sock = state->socket;
struct sock *sk = sock->sk;
struct unix_sock *u = unix_sk(sk);
int copied = 0;
int flags = state->flags;
int noblock = flags & MSG_DONTWAIT;
bool check_creds = false;
int target;
int err = 0;
long timeo;
int skip;
size_t size = state->size;
unsigned int last_len;
if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
err = -EINVAL;
goto out;
}
if (unlikely(flags & MSG_OOB)) {
err = -EOPNOTSUPP;
goto out;
}
target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
timeo = sock_rcvtimeo(sk, noblock);
memset(&scm, 0, sizeof(scm));
/* Lock the socket to prevent queue disordering
* while sleeps in memcpy_tomsg
*/
mutex_lock(&u->iolock);
skip = max(sk_peek_offset(sk, flags), 0);
do {
int chunk;
bool drop_skb;
struct sk_buff *skb, *last;
redo:
unix_state_lock(sk);
if (sock_flag(sk, SOCK_DEAD)) {
err = -ECONNRESET;
goto unlock;
}
last = skb = skb_peek(&sk->sk_receive_queue);
last_len = last ? last->len : 0;
again:
if (skb == NULL) {
if (copied >= target)
goto unlock;
/*
* POSIX 1003.1g mandates this order.
*/
err = sock_error(sk);
if (err)
goto unlock;
if (sk->sk_shutdown & RCV_SHUTDOWN)
goto unlock;
unix_state_unlock(sk);
if (!timeo) {
err = -EAGAIN;
break;
}
mutex_unlock(&u->iolock);
timeo = unix_stream_data_wait(sk, timeo, last,
last_len, freezable);
if (signal_pending(current)) {
err = sock_intr_errno(timeo);
scm_destroy(&scm);
goto out;
}
mutex_lock(&u->iolock);
goto redo;
unlock:
unix_state_unlock(sk);
break;
}
while (skip >= unix_skb_len(skb)) {
skip -= unix_skb_len(skb);
last = skb;
last_len = skb->len;
skb = skb_peek_next(skb, &sk->sk_receive_queue);
if (!skb)
goto again;
}
unix_state_unlock(sk);
if (check_creds) {
/* Never glue messages from different writers */
if (!unix_skb_scm_eq(skb, &scm))
break;
} else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
/* Copy credentials */
scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
unix_set_secdata(&scm, skb);
check_creds = true;
}
/* Copy address just once */
if (state->msg && state->msg->msg_name) {
DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
state->msg->msg_name);
unix_copy_addr(state->msg, skb->sk);
sunaddr = NULL;
}
chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
skb_get(skb);
chunk = state->recv_actor(skb, skip, chunk, state);
drop_skb = !unix_skb_len(skb);
/* skb is only safe to use if !drop_skb */
consume_skb(skb);
if (chunk < 0) {
if (copied == 0)
copied = -EFAULT;
break;
}
copied += chunk;
size -= chunk;
if (drop_skb) {
/* the skb was touched by a concurrent reader;
* we should not expect anything from this skb
* anymore and assume it invalid - we can be
* sure it was dropped from the socket queue
*
* let's report a short read
*/
err = 0;
break;
}
/* Mark read part of skb as used */
if (!(flags & MSG_PEEK)) {
UNIXCB(skb).consumed += chunk;
sk_peek_offset_bwd(sk, chunk);
if (UNIXCB(skb).fp) {
scm_stat_del(sk, skb);
unix_detach_fds(&scm, skb);
}
if (unix_skb_len(skb))
break;
skb_unlink(skb, &sk->sk_receive_queue);
consume_skb(skb);
if (scm.fp)
break;
} else {
/* It is questionable, see note in unix_dgram_recvmsg.
*/
if (UNIXCB(skb).fp)
unix_peek_fds(&scm, skb);
sk_peek_offset_fwd(sk, chunk);
if (UNIXCB(skb).fp)
break;
skip = 0;
last = skb;
last_len = skb->len;
unix_state_lock(sk);
skb = skb_peek_next(skb, &sk->sk_receive_queue);
if (skb)
goto again;
unix_state_unlock(sk);
break;
}
} while (size);
mutex_unlock(&u->iolock);
if (state->msg)
scm_recv(sock, state->msg, &scm, flags);
else
scm_destroy(&scm);
out:
return copied ? : err;
}
static int unix_stream_read_actor(struct sk_buff *skb,
int skip, int chunk,
struct unix_stream_read_state *state)
{
int ret;
ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
state->msg, chunk);
return ret ?: chunk;
}
static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
size_t size, int flags)
{
struct unix_stream_read_state state = {
.recv_actor = unix_stream_read_actor,
.socket = sock,
.msg = msg,
.size = size,
.flags = flags
};
return unix_stream_read_generic(&state, true);
}
static int unix_stream_splice_actor(struct sk_buff *skb,
int skip, int chunk,
struct unix_stream_read_state *state)
{
return skb_splice_bits(skb, state->socket->sk,
UNIXCB(skb).consumed + skip,
state->pipe, chunk, state->splice_flags);
}
static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
struct pipe_inode_info *pipe,
size_t size, unsigned int flags)
{
struct unix_stream_read_state state = {
.recv_actor = unix_stream_splice_actor,
.socket = sock,
.pipe = pipe,
.size = size,
.splice_flags = flags,
};
if (unlikely(*ppos))
return -ESPIPE;
if (sock->file->f_flags & O_NONBLOCK ||
flags & SPLICE_F_NONBLOCK)
state.flags = MSG_DONTWAIT;
return unix_stream_read_generic(&state, false);
}
static int unix_shutdown(struct socket *sock, int mode)
{
struct sock *sk = sock->sk;
struct sock *other;
if (mode < SHUT_RD || mode > SHUT_RDWR)
return -EINVAL;
/* This maps:
* SHUT_RD (0) -> RCV_SHUTDOWN (1)
* SHUT_WR (1) -> SEND_SHUTDOWN (2)
* SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
*/
++mode;
unix_state_lock(sk);
WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | mode);
other = unix_peer(sk);
if (other)
sock_hold(other);
unix_state_unlock(sk);
sk->sk_state_change(sk);
if (other &&
(sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
int peer_mode = 0;
if (mode&RCV_SHUTDOWN)
peer_mode |= SEND_SHUTDOWN;
if (mode&SEND_SHUTDOWN)
peer_mode |= RCV_SHUTDOWN;
unix_state_lock(other);
WRITE_ONCE(other->sk_shutdown, other->sk_shutdown | peer_mode);
unix_state_unlock(other);
other->sk_state_change(other);
if (peer_mode == SHUTDOWN_MASK)
sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
else if (peer_mode & RCV_SHUTDOWN)
sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
}
if (other)
sock_put(other);
return 0;
}
long unix_inq_len(struct sock *sk)
{
struct sk_buff *skb;
long amount = 0;
if (sk->sk_state == TCP_LISTEN)
return -EINVAL;
spin_lock(&sk->sk_receive_queue.lock);
if (sk->sk_type == SOCK_STREAM ||
sk->sk_type == SOCK_SEQPACKET) {
skb_queue_walk(&sk->sk_receive_queue, skb)
amount += unix_skb_len(skb);
} else {
skb = skb_peek(&sk->sk_receive_queue);
if (skb)
amount = skb->len;
}
spin_unlock(&sk->sk_receive_queue.lock);
return amount;
}
EXPORT_SYMBOL_GPL(unix_inq_len);
long unix_outq_len(struct sock *sk)
{
return sk_wmem_alloc_get(sk);
}
EXPORT_SYMBOL_GPL(unix_outq_len);
static int unix_open_file(struct sock *sk)
{
struct path path;
struct file *f;
int fd;
if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
if (!smp_load_acquire(&unix_sk(sk)->addr))
return -ENOENT;
path = unix_sk(sk)->path;
if (!path.dentry)
return -ENOENT;
path_get(&path);
fd = get_unused_fd_flags(O_CLOEXEC);
if (fd < 0)
goto out;
f = dentry_open(&path, O_PATH, current_cred());
if (IS_ERR(f)) {
put_unused_fd(fd);
fd = PTR_ERR(f);
goto out;
}
fd_install(fd, f);
out:
path_put(&path);
return fd;
}
static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
struct sock *sk = sock->sk;
long amount = 0;
int err;
switch (cmd) {
case SIOCOUTQ:
amount = unix_outq_len(sk);
err = put_user(amount, (int __user *)arg);
break;
case SIOCINQ:
amount = unix_inq_len(sk);
if (amount < 0)
err = amount;
else
err = put_user(amount, (int __user *)arg);
break;
case SIOCUNIXFILE:
err = unix_open_file(sk);
break;
default:
err = -ENOIOCTLCMD;
break;
}
return err;
}
#ifdef CONFIG_COMPAT
static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
}
#endif
static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
{
struct sock *sk = sock->sk;
__poll_t mask;
u8 shutdown;
sock_poll_wait(file, sock, wait);
mask = 0;
shutdown = READ_ONCE(sk->sk_shutdown);
/* exceptional events? */
if (sk->sk_err)
mask |= EPOLLERR;
if (shutdown == SHUTDOWN_MASK)
mask |= EPOLLHUP;
if (shutdown & RCV_SHUTDOWN)
mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
/* readable? */
if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
mask |= EPOLLIN | EPOLLRDNORM;
/* Connection-based need to check for termination and startup */
if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
sk->sk_state == TCP_CLOSE)
mask |= EPOLLHUP;
/*
* we set writable also when the other side has shut down the
* connection. This prevents stuck sockets.
*/
if (unix_writable(sk))
mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
return mask;
}
static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
struct sock *sk = sock->sk, *other;
unsigned int writable;
__poll_t mask;
u8 shutdown;
sock_poll_wait(file, sock, wait);
mask = 0;
shutdown = READ_ONCE(sk->sk_shutdown);
/* exceptional events? */
if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
mask |= EPOLLERR |
(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
if (shutdown & RCV_SHUTDOWN)
mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
if (shutdown == SHUTDOWN_MASK)
mask |= EPOLLHUP;
/* readable? */
if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
mask |= EPOLLIN | EPOLLRDNORM;
/* Connection-based need to check for termination and startup */
if (sk->sk_type == SOCK_SEQPACKET) {
if (sk->sk_state == TCP_CLOSE)
mask |= EPOLLHUP;
/* connection hasn't started yet? */
if (sk->sk_state == TCP_SYN_SENT)
return mask;
}
/* No write status requested, avoid expensive OUT tests. */
if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
return mask;
writable = unix_writable(sk);
if (writable) {
unix_state_lock(sk);
other = unix_peer(sk);
if (other && unix_peer(other) != sk &&
unix_recvq_full_lockless(other) &&
unix_dgram_peer_wake_me(sk, other))
writable = 0;
unix_state_unlock(sk);
}
if (writable)
mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
else
sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
return mask;
}
#ifdef CONFIG_PROC_FS
#define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
#define get_bucket(x) ((x) >> BUCKET_SPACE)
#define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
{
unsigned long offset = get_offset(*pos);
unsigned long bucket = get_bucket(*pos);
struct sock *sk;
unsigned long count = 0;
for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
if (sock_net(sk) != seq_file_net(seq))
continue;
if (++count == offset)
break;
}
return sk;
}
static struct sock *unix_next_socket(struct seq_file *seq,
struct sock *sk,
loff_t *pos)
{
unsigned long bucket;
while (sk > (struct sock *)SEQ_START_TOKEN) {
sk = sk_next(sk);
if (!sk)
goto next_bucket;
if (sock_net(sk) == seq_file_net(seq))
return sk;
}
do {
sk = unix_from_bucket(seq, pos);
if (sk)
return sk;
next_bucket:
bucket = get_bucket(*pos) + 1;
*pos = set_bucket_offset(bucket, 1);
} while (bucket < ARRAY_SIZE(unix_socket_table));
return NULL;
}
static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(unix_table_lock)
{
spin_lock(&unix_table_lock);
if (!*pos)
return SEQ_START_TOKEN;
if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
return NULL;
return unix_next_socket(seq, NULL, pos);
}
static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
++*pos;
return unix_next_socket(seq, v, pos);
}
static void unix_seq_stop(struct seq_file *seq, void *v)
__releases(unix_table_lock)
{
spin_unlock(&unix_table_lock);
}
static int unix_seq_show(struct seq_file *seq, void *v)
{
if (v == SEQ_START_TOKEN)
seq_puts(seq, "Num RefCount Protocol Flags Type St "
"Inode Path\n");
else {
struct sock *s = v;
struct unix_sock *u = unix_sk(s);
unix_state_lock(s);
seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
s,
refcount_read(&s->sk_refcnt),
0,
s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
s->sk_type,
s->sk_socket ?
(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
(s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
sock_i_ino(s));
if (u->addr) { // under unix_table_lock here
int i, len;
seq_putc(seq, ' ');
i = 0;
len = u->addr->len - sizeof(short);
if (!UNIX_ABSTRACT(s))
len--;
else {
seq_putc(seq, '@');
i++;
}
for ( ; i < len; i++)
seq_putc(seq, u->addr->name->sun_path[i] ?:
'@');
}
unix_state_unlock(s);
seq_putc(seq, '\n');
}
return 0;
}
static const struct seq_operations unix_seq_ops = {
.start = unix_seq_start,
.next = unix_seq_next,
.stop = unix_seq_stop,
.show = unix_seq_show,
};
#endif
static const struct net_proto_family unix_family_ops = {
.family = PF_UNIX,
.create = unix_create,
.owner = THIS_MODULE,
};
static int __net_init unix_net_init(struct net *net)
{
int error = -ENOMEM;
net->unx.sysctl_max_dgram_qlen = 10;
if (unix_sysctl_register(net))
goto out;
#ifdef CONFIG_PROC_FS
if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
sizeof(struct seq_net_private))) {
unix_sysctl_unregister(net);
goto out;
}
#endif
error = 0;
out:
return error;
}
static void __net_exit unix_net_exit(struct net *net)
{
unix_sysctl_unregister(net);
remove_proc_entry("unix", net->proc_net);
}
static struct pernet_operations unix_net_ops = {
.init = unix_net_init,
.exit = unix_net_exit,
};
static int __init af_unix_init(void)
{
int rc = -1;
BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
rc = proto_register(&unix_proto, 1);
if (rc != 0) {
pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
goto out;
}
sock_register(&unix_family_ops);
register_pernet_subsys(&unix_net_ops);
out:
return rc;
}
static void __exit af_unix_exit(void)
{
sock_unregister(PF_UNIX);
proto_unregister(&unix_proto);
unregister_pernet_subsys(&unix_net_ops);
}
/* Earlier than device_initcall() so that other drivers invoking
request_module() don't end up in a loop when modprobe tries
to use a UNIX socket. But later than subsys_initcall() because
we depend on stuff initialised there */
fs_initcall(af_unix_init);
module_exit(af_unix_exit);
MODULE_LICENSE("GPL");
MODULE_IMPORT_NS(VFS_internal_I_am_really_a_filesystem_and_am_NOT_a_driver);
MODULE_ALIAS_NETPROTO(PF_UNIX);