/spare/repo/netdev-2.6 branch 'master'

This commit is contained in:
Jeff Garzik
2005-09-01 18:02:01 -04:00
1068 changed files with 63465 additions and 21801 deletions

View File

@@ -63,7 +63,7 @@ struct tc_action_ops
__u32 type; /* TBD to match kind */
__u32 capab; /* capabilities includes 4 bit version */
struct module *owner;
int (*act)(struct sk_buff **, struct tc_action *);
int (*act)(struct sk_buff **, struct tc_action *, struct tcf_result *);
int (*get_stats)(struct sk_buff *, struct tc_action *);
int (*dump)(struct sk_buff *, struct tc_action *,int , int);
int (*cleanup)(struct tc_action *, int bind);

View File

@@ -45,6 +45,7 @@ struct prefix_info {
#ifdef __KERNEL__
#include <linux/config.h>
#include <linux/netdevice.h>
#include <net/if_inet6.h>
#include <net/ipv6.h>
@@ -238,5 +239,10 @@ static inline int ipv6_addr_is_ll_all_routers(const struct in6_addr *addr)
addr->s6_addr32[3] == htonl(0x00000002));
}
#ifdef CONFIG_PROC_FS
extern int if6_proc_init(void);
extern void if6_proc_exit(void);
#endif
#endif
#endif

View File

@@ -1,5 +1,11 @@
#ifndef __LINUX_NET_AFUNIX_H
#define __LINUX_NET_AFUNIX_H
#include <linux/config.h>
#include <linux/socket.h>
#include <linux/un.h>
#include <net/sock.h>
extern void unix_inflight(struct file *fp);
extern void unix_notinflight(struct file *fp);
extern void unix_gc(void);
@@ -74,5 +80,14 @@ struct unix_sock {
wait_queue_head_t peer_wait;
};
#define unix_sk(__sk) ((struct unix_sock *)__sk)
#ifdef CONFIG_SYSCTL
extern int sysctl_unix_max_dgram_qlen;
extern void unix_sysctl_register(void);
extern void unix_sysctl_unregister(void);
#else
static inline void unix_sysctl_register(void) {}
static inline void unix_sysctl_unregister(void) {}
#endif
#endif
#endif

View File

@@ -11,7 +11,7 @@ extern struct neigh_table arp_tbl;
extern void arp_init(void);
extern int arp_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt);
struct packet_type *pt, struct net_device *orig_dev);
extern int arp_find(unsigned char *haddr, struct sk_buff *skb);
extern int arp_ioctl(unsigned int cmd, void __user *arg);
extern void arp_send(int type, int ptype, u32 dest_ip,

View File

@@ -316,7 +316,7 @@ extern int ax25_protocol_is_registered(unsigned int);
/* ax25_in.c */
extern int ax25_rx_iframe(ax25_cb *, struct sk_buff *);
extern int ax25_kiss_rcv(struct sk_buff *, struct net_device *, struct packet_type *);
extern int ax25_kiss_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *);
/* ax25_ip.c */
extern int ax25_encapsulate(struct sk_buff *, struct net_device *, unsigned short, void *, void *, unsigned int);

View File

@@ -131,11 +131,12 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock);
/* Skb helpers */
struct bt_skb_cb {
int incoming;
__u8 pkt_type;
__u8 incoming;
};
#define bt_cb(skb) ((struct bt_skb_cb *)(skb->cb))
static inline struct sk_buff *bt_skb_alloc(unsigned int len, int how)
static inline struct sk_buff *bt_skb_alloc(unsigned int len, unsigned int __nocast how)
{
struct sk_buff *skb;

View File

@@ -453,6 +453,15 @@ struct inquiry_info_with_rssi {
__u16 clock_offset;
__s8 rssi;
} __attribute__ ((packed));
struct inquiry_info_with_rssi_and_pscan_mode {
bdaddr_t bdaddr;
__u8 pscan_rep_mode;
__u8 pscan_period_mode;
__u8 pscan_mode;
__u8 dev_class[3];
__u16 clock_offset;
__s8 rssi;
} __attribute__ ((packed));
#define HCI_EV_CONN_COMPLETE 0x03
struct hci_ev_conn_complete {
@@ -584,6 +593,12 @@ struct hci_ev_clock_offset {
__u16 clock_offset;
} __attribute__ ((packed));
#define HCI_EV_PSCAN_REP_MODE 0x20
struct hci_ev_pscan_rep_mode {
bdaddr_t bdaddr;
__u8 pscan_rep_mode;
} __attribute__ ((packed));
/* Internal events generated by Bluetooth stack */
#define HCI_EV_STACK_INTERNAL 0xFD
struct hci_ev_stack_internal {

View File

@@ -404,7 +404,7 @@ static inline int hci_recv_frame(struct sk_buff *skb)
bt_cb(skb)->incoming = 1;
/* Time stamp */
do_gettimeofday(&skb->stamp);
__net_timestamp(skb);
/* Queue frame for rx task */
skb_queue_tail(&hdev->rx_q, skb);

View File

@@ -80,9 +80,9 @@
#define RFCOMM_RPN_STOP_15 1
#define RFCOMM_RPN_PARITY_NONE 0x0
#define RFCOMM_RPN_PARITY_ODD 0x4
#define RFCOMM_RPN_PARITY_EVEN 0x5
#define RFCOMM_RPN_PARITY_MARK 0x6
#define RFCOMM_RPN_PARITY_ODD 0x1
#define RFCOMM_RPN_PARITY_EVEN 0x3
#define RFCOMM_RPN_PARITY_MARK 0x5
#define RFCOMM_RPN_PARITY_SPACE 0x7
#define RFCOMM_RPN_FLOW_NONE 0x00
@@ -223,8 +223,14 @@ struct rfcomm_dlc {
#define RFCOMM_CFC_DISABLED 0
#define RFCOMM_CFC_ENABLED RFCOMM_MAX_CREDITS
/* ---- RFCOMM SEND RPN ---- */
int rfcomm_send_rpn(struct rfcomm_session *s, int cr, u8 dlci,
u8 bit_rate, u8 data_bits, u8 stop_bits,
u8 parity, u8 flow_ctrl_settings,
u8 xon_char, u8 xoff_char, u16 param_mask);
/* ---- RFCOMM DLCs (channels) ---- */
struct rfcomm_dlc *rfcomm_dlc_alloc(int prio);
struct rfcomm_dlc *rfcomm_dlc_alloc(unsigned int __nocast prio);
void rfcomm_dlc_free(struct rfcomm_dlc *d);
int rfcomm_dlc_open(struct rfcomm_dlc *d, bdaddr_t *src, bdaddr_t *dst, u8 channel);
int rfcomm_dlc_close(struct rfcomm_dlc *d, int reason);

View File

@@ -9,7 +9,7 @@ struct datalink_proto {
unsigned short header_length;
int (*rcvfunc)(struct sk_buff *, struct net_device *,
struct packet_type *);
struct packet_type *, struct net_device *);
int (*request)(struct datalink_proto *, struct sk_buff *,
unsigned char *);
struct list_head node;

View File

@@ -3,6 +3,7 @@
#include <linux/dn.h>
#include <net/sock.h>
#include <net/tcp.h>
#include <asm/byteorder.h>
typedef unsigned short dn_address;

View File

@@ -57,4 +57,11 @@ static inline struct raw_sock *raw_sk(const struct sock *sk)
return (struct raw_sock *)sk;
}
extern int sysctl_icmp_echo_ignore_all;
extern int sysctl_icmp_echo_ignore_broadcasts;
extern int sysctl_icmp_ignore_bogus_error_responses;
extern int sysctl_icmp_errors_use_inbound_ifaddr;
extern int sysctl_icmp_ratelimit;
extern int sysctl_icmp_ratemask;
#endif /* _ICMP_H */

View File

@@ -0,0 +1,130 @@
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
* interface as the means of communication with the user level.
*
* Authors: Lotsa people, from code originally in tcp
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#ifndef _INET6_HASHTABLES_H
#define _INET6_HASHTABLES_H
#include <linux/config.h>
#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
#include <linux/in6.h>
#include <linux/ipv6.h>
#include <linux/types.h>
#include <net/ipv6.h>
struct inet_hashinfo;
/* I have no idea if this is a good hash for v6 or not. -DaveM */
static inline int inet6_ehashfn(const struct in6_addr *laddr, const u16 lport,
const struct in6_addr *faddr, const u16 fport,
const int ehash_size)
{
int hashent = (lport ^ fport);
hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
hashent ^= hashent >> 16;
hashent ^= hashent >> 8;
return (hashent & (ehash_size - 1));
}
static inline int inet6_sk_ehashfn(const struct sock *sk, const int ehash_size)
{
const struct inet_sock *inet = inet_sk(sk);
const struct ipv6_pinfo *np = inet6_sk(sk);
const struct in6_addr *laddr = &np->rcv_saddr;
const struct in6_addr *faddr = &np->daddr;
const __u16 lport = inet->num;
const __u16 fport = inet->dport;
return inet6_ehashfn(laddr, lport, faddr, fport, ehash_size);
}
/*
* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
* we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
*
* The sockhash lock must be held as a reader here.
*/
static inline struct sock *
__inet6_lookup_established(struct inet_hashinfo *hashinfo,
const struct in6_addr *saddr,
const u16 sport,
const struct in6_addr *daddr,
const u16 hnum,
const int dif)
{
struct sock *sk;
const struct hlist_node *node;
const __u32 ports = INET_COMBINED_PORTS(sport, hnum);
/* Optimize here for direct hit, only listening connections can
* have wildcards anyways.
*/
const int hash = inet6_ehashfn(daddr, hnum, saddr, sport,
hashinfo->ehash_size);
struct inet_ehash_bucket *head = &hashinfo->ehash[hash];
read_lock(&head->lock);
sk_for_each(sk, node, &head->chain) {
/* For IPV6 do the cheaper port and family tests first. */
if (INET6_MATCH(sk, saddr, daddr, ports, dif))
goto hit; /* You sunk my battleship! */
}
/* Must check for a TIME_WAIT'er before going to listener hash. */
sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) {
const struct inet_timewait_sock *tw = inet_twsk(sk);
if(*((__u32 *)&(tw->tw_dport)) == ports &&
sk->sk_family == PF_INET6) {
const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk);
if (ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) &&
ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) &&
(!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
goto hit;
}
}
read_unlock(&head->lock);
return NULL;
hit:
sock_hold(sk);
read_unlock(&head->lock);
return sk;
}
extern struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo,
const struct in6_addr *daddr,
const unsigned short hnum,
const int dif);
static inline struct sock *__inet6_lookup(struct inet_hashinfo *hashinfo,
const struct in6_addr *saddr,
const u16 sport,
const struct in6_addr *daddr,
const u16 hnum,
const int dif)
{
struct sock *sk = __inet6_lookup_established(hashinfo, saddr, sport,
daddr, hnum, dif);
if (sk)
return sk;
return inet6_lookup_listener(hashinfo, daddr, hnum, dif);
}
extern struct sock *inet6_lookup(struct inet_hashinfo *hashinfo,
const struct in6_addr *saddr, const u16 sport,
const struct in6_addr *daddr, const u16 dport,
const int dif);
#endif /* defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) */
#endif /* _INET6_HASHTABLES_H */

View File

@@ -8,6 +8,11 @@ extern struct proto_ops inet_dgram_ops;
* INET4 prototypes used by INET6
*/
struct msghdr;
struct sock;
struct sockaddr;
struct socket;
extern void inet_remove_sock(struct sock *sk1);
extern void inet_put_sock(unsigned short num,
struct sock *sk);
@@ -29,7 +34,6 @@ extern unsigned int inet_poll(struct file * file, struct socket *sock, struct p
extern int inet_listen(struct socket *sock, int backlog);
extern void inet_sock_destruct(struct sock *sk);
extern atomic_t inet_sock_nr;
extern int inet_bind(struct socket *sock,
struct sockaddr *uaddr, int addr_len);

View File

@@ -0,0 +1,276 @@
/*
* NET Generic infrastructure for INET connection oriented protocols.
*
* Definitions for inet_connection_sock
*
* Authors: Many people, see the TCP sources
*
* From code originally in TCP
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#ifndef _INET_CONNECTION_SOCK_H
#define _INET_CONNECTION_SOCK_H
#include <linux/ip.h>
#include <linux/string.h>
#include <linux/timer.h>
#include <net/request_sock.h>
#define INET_CSK_DEBUG 1
/* Cancel timers, when they are not required. */
#undef INET_CSK_CLEAR_TIMERS
struct inet_bind_bucket;
struct inet_hashinfo;
struct tcp_congestion_ops;
/** inet_connection_sock - INET connection oriented sock
*
* @icsk_accept_queue: FIFO of established children
* @icsk_bind_hash: Bind node
* @icsk_timeout: Timeout
* @icsk_retransmit_timer: Resend (no ack)
* @icsk_rto: Retransmit timeout
* @icsk_ca_ops Pluggable congestion control hook
* @icsk_ca_state: Congestion control state
* @icsk_retransmits: Number of unrecovered [RTO] timeouts
* @icsk_pending: Scheduled timer event
* @icsk_backoff: Backoff
* @icsk_syn_retries: Number of allowed SYN (or equivalent) retries
* @icsk_probes_out: unanswered 0 window probes
* @icsk_ack: Delayed ACK control data
*/
struct inet_connection_sock {
/* inet_sock has to be the first member! */
struct inet_sock icsk_inet;
struct request_sock_queue icsk_accept_queue;
struct inet_bind_bucket *icsk_bind_hash;
unsigned long icsk_timeout;
struct timer_list icsk_retransmit_timer;
struct timer_list icsk_delack_timer;
__u32 icsk_rto;
struct tcp_congestion_ops *icsk_ca_ops;
__u8 icsk_ca_state;
__u8 icsk_retransmits;
__u8 icsk_pending;
__u8 icsk_backoff;
__u8 icsk_syn_retries;
__u8 icsk_probes_out;
/* 2 BYTES HOLE, TRY TO PACK! */
struct {
__u8 pending; /* ACK is pending */
__u8 quick; /* Scheduled number of quick acks */
__u8 pingpong; /* The session is interactive */
__u8 blocked; /* Delayed ACK was blocked by socket lock */
__u32 ato; /* Predicted tick of soft clock */
unsigned long timeout; /* Currently scheduled timeout */
__u32 lrcvtime; /* timestamp of last received data packet */
__u16 last_seg_size; /* Size of last incoming segment */
__u16 rcv_mss; /* MSS used for delayed ACK decisions */
} icsk_ack;
u32 icsk_ca_priv[16];
#define ICSK_CA_PRIV_SIZE (16 * sizeof(u32))
};
#define ICSK_TIME_RETRANS 1 /* Retransmit timer */
#define ICSK_TIME_DACK 2 /* Delayed ack timer */
#define ICSK_TIME_PROBE0 3 /* Zero window probe timer */
#define ICSK_TIME_KEEPOPEN 4 /* Keepalive timer */
static inline struct inet_connection_sock *inet_csk(const struct sock *sk)
{
return (struct inet_connection_sock *)sk;
}
static inline void *inet_csk_ca(const struct sock *sk)
{
return (void *)inet_csk(sk)->icsk_ca_priv;
}
extern struct sock *inet_csk_clone(struct sock *sk,
const struct request_sock *req,
const unsigned int __nocast priority);
enum inet_csk_ack_state_t {
ICSK_ACK_SCHED = 1,
ICSK_ACK_TIMER = 2,
ICSK_ACK_PUSHED = 4
};
extern void inet_csk_init_xmit_timers(struct sock *sk,
void (*retransmit_handler)(unsigned long),
void (*delack_handler)(unsigned long),
void (*keepalive_handler)(unsigned long));
extern void inet_csk_clear_xmit_timers(struct sock *sk);
static inline void inet_csk_schedule_ack(struct sock *sk)
{
inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_SCHED;
}
static inline int inet_csk_ack_scheduled(const struct sock *sk)
{
return inet_csk(sk)->icsk_ack.pending & ICSK_ACK_SCHED;
}
static inline void inet_csk_delack_init(struct sock *sk)
{
memset(&inet_csk(sk)->icsk_ack, 0, sizeof(inet_csk(sk)->icsk_ack));
}
extern void inet_csk_delete_keepalive_timer(struct sock *sk);
extern void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long timeout);
#ifdef INET_CSK_DEBUG
extern const char inet_csk_timer_bug_msg[];
#endif
static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what)
{
struct inet_connection_sock *icsk = inet_csk(sk);
if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) {
icsk->icsk_pending = 0;
#ifdef INET_CSK_CLEAR_TIMERS
sk_stop_timer(sk, &icsk->icsk_retransmit_timer);
#endif
} else if (what == ICSK_TIME_DACK) {
icsk->icsk_ack.blocked = icsk->icsk_ack.pending = 0;
#ifdef INET_CSK_CLEAR_TIMERS
sk_stop_timer(sk, &icsk->icsk_delack_timer);
#endif
}
#ifdef INET_CSK_DEBUG
else {
pr_debug("%s", inet_csk_timer_bug_msg);
}
#endif
}
/*
* Reset the retransmission timer
*/
static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what,
unsigned long when,
const unsigned long max_when)
{
struct inet_connection_sock *icsk = inet_csk(sk);
if (when > max_when) {
#ifdef INET_CSK_DEBUG
pr_debug("reset_xmit_timer: sk=%p %d when=0x%lx, caller=%p\n",
sk, what, when, current_text_addr());
#endif
when = max_when;
}
if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) {
icsk->icsk_pending = what;
icsk->icsk_timeout = jiffies + when;
sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout);
} else if (what == ICSK_TIME_DACK) {
icsk->icsk_ack.pending |= ICSK_ACK_TIMER;
icsk->icsk_ack.timeout = jiffies + when;
sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout);
}
#ifdef INET_CSK_DEBUG
else {
pr_debug("%s", inet_csk_timer_bug_msg);
}
#endif
}
extern struct sock *inet_csk_accept(struct sock *sk, int flags, int *err);
extern struct request_sock *inet_csk_search_req(const struct sock *sk,
struct request_sock ***prevp,
const __u16 rport,
const __u32 raddr,
const __u32 laddr);
extern int inet_csk_get_port(struct inet_hashinfo *hashinfo,
struct sock *sk, unsigned short snum);
extern struct dst_entry* inet_csk_route_req(struct sock *sk,
const struct request_sock *req);
static inline void inet_csk_reqsk_queue_add(struct sock *sk,
struct request_sock *req,
struct sock *child)
{
reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child);
}
extern void inet_csk_reqsk_queue_hash_add(struct sock *sk,
struct request_sock *req,
const unsigned timeout);
static inline void inet_csk_reqsk_queue_removed(struct sock *sk,
struct request_sock *req)
{
if (reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req) == 0)
inet_csk_delete_keepalive_timer(sk);
}
static inline void inet_csk_reqsk_queue_added(struct sock *sk,
const unsigned long timeout)
{
if (reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue) == 0)
inet_csk_reset_keepalive_timer(sk, timeout);
}
static inline int inet_csk_reqsk_queue_len(const struct sock *sk)
{
return reqsk_queue_len(&inet_csk(sk)->icsk_accept_queue);
}
static inline int inet_csk_reqsk_queue_young(const struct sock *sk)
{
return reqsk_queue_len_young(&inet_csk(sk)->icsk_accept_queue);
}
static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk)
{
return reqsk_queue_is_full(&inet_csk(sk)->icsk_accept_queue);
}
static inline void inet_csk_reqsk_queue_unlink(struct sock *sk,
struct request_sock *req,
struct request_sock **prev)
{
reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req, prev);
}
static inline void inet_csk_reqsk_queue_drop(struct sock *sk,
struct request_sock *req,
struct request_sock **prev)
{
inet_csk_reqsk_queue_unlink(sk, req, prev);
inet_csk_reqsk_queue_removed(sk, req);
reqsk_free(req);
}
extern void inet_csk_reqsk_queue_prune(struct sock *parent,
const unsigned long interval,
const unsigned long timeout,
const unsigned long max_rto);
extern void inet_csk_destroy_sock(struct sock *sk);
/*
* LISTEN is a special case for poll..
*/
static inline unsigned int inet_csk_listen_poll(const struct sock *sk)
{
return !reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue) ?
(POLLIN | POLLRDNORM) : 0;
}
extern int inet_csk_listen_start(struct sock *sk, const int nr_table_entries);
extern void inet_csk_listen_stop(struct sock *sk);
#endif /* _INET_CONNECTION_SOCK_H */

View File

@@ -0,0 +1,427 @@
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
* interface as the means of communication with the user level.
*
* Authors: Lotsa people, from code originally in tcp
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#ifndef _INET_HASHTABLES_H
#define _INET_HASHTABLES_H
#include <linux/config.h>
#include <linux/interrupt.h>
#include <linux/ipv6.h>
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/socket.h>
#include <linux/spinlock.h>
#include <linux/types.h>
#include <linux/wait.h>
#include <net/inet_connection_sock.h>
#include <net/route.h>
#include <net/sock.h>
#include <net/tcp_states.h>
#include <asm/atomic.h>
#include <asm/byteorder.h>
/* This is for all connections with a full identity, no wildcards.
* New scheme, half the table is for TIME_WAIT, the other half is
* for the rest. I'll experiment with dynamic table growth later.
*/
struct inet_ehash_bucket {
rwlock_t lock;
struct hlist_head chain;
} __attribute__((__aligned__(8)));
/* There are a few simple rules, which allow for local port reuse by
* an application. In essence:
*
* 1) Sockets bound to different interfaces may share a local port.
* Failing that, goto test 2.
* 2) If all sockets have sk->sk_reuse set, and none of them are in
* TCP_LISTEN state, the port may be shared.
* Failing that, goto test 3.
* 3) If all sockets are bound to a specific inet_sk(sk)->rcv_saddr local
* address, and none of them are the same, the port may be
* shared.
* Failing this, the port cannot be shared.
*
* The interesting point, is test #2. This is what an FTP server does
* all day. To optimize this case we use a specific flag bit defined
* below. As we add sockets to a bind bucket list, we perform a
* check of: (newsk->sk_reuse && (newsk->sk_state != TCP_LISTEN))
* As long as all sockets added to a bind bucket pass this test,
* the flag bit will be set.
* The resulting situation is that tcp_v[46]_verify_bind() can just check
* for this flag bit, if it is set and the socket trying to bind has
* sk->sk_reuse set, we don't even have to walk the owners list at all,
* we return that it is ok to bind this socket to the requested local port.
*
* Sounds like a lot of work, but it is worth it. In a more naive
* implementation (ie. current FreeBSD etc.) the entire list of ports
* must be walked for each data port opened by an ftp server. Needless
* to say, this does not scale at all. With a couple thousand FTP
* users logged onto your box, isn't it nice to know that new data
* ports are created in O(1) time? I thought so. ;-) -DaveM
*/
struct inet_bind_bucket {
unsigned short port;
signed short fastreuse;
struct hlist_node node;
struct hlist_head owners;
};
#define inet_bind_bucket_for_each(tb, node, head) \
hlist_for_each_entry(tb, node, head, node)
struct inet_bind_hashbucket {
spinlock_t lock;
struct hlist_head chain;
};
/* This is for listening sockets, thus all sockets which possess wildcards. */
#define INET_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */
struct inet_hashinfo {
/* This is for sockets with full identity only. Sockets here will
* always be without wildcards and will have the following invariant:
*
* TCP_ESTABLISHED <= sk->sk_state < TCP_CLOSE
*
* First half of the table is for sockets not in TIME_WAIT, second half
* is for TIME_WAIT sockets only.
*/
struct inet_ehash_bucket *ehash;
/* Ok, let's try this, I give up, we do need a local binding
* TCP hash as well as the others for fast bind/connect.
*/
struct inet_bind_hashbucket *bhash;
int bhash_size;
int ehash_size;
/* All sockets in TCP_LISTEN state will be in here. This is the only
* table where wildcard'd TCP sockets can exist. Hash function here
* is just local port number.
*/
struct hlist_head listening_hash[INET_LHTABLE_SIZE];
/* All the above members are written once at bootup and
* never written again _or_ are predominantly read-access.
*
* Now align to a new cache line as all the following members
* are often dirty.
*/
rwlock_t lhash_lock ____cacheline_aligned;
atomic_t lhash_users;
wait_queue_head_t lhash_wait;
spinlock_t portalloc_lock;
kmem_cache_t *bind_bucket_cachep;
int port_rover;
};
static inline int inet_ehashfn(const __u32 laddr, const __u16 lport,
const __u32 faddr, const __u16 fport,
const int ehash_size)
{
int h = (laddr ^ lport) ^ (faddr ^ fport);
h ^= h >> 16;
h ^= h >> 8;
return h & (ehash_size - 1);
}
static inline int inet_sk_ehashfn(const struct sock *sk, const int ehash_size)
{
const struct inet_sock *inet = inet_sk(sk);
const __u32 laddr = inet->rcv_saddr;
const __u16 lport = inet->num;
const __u32 faddr = inet->daddr;
const __u16 fport = inet->dport;
return inet_ehashfn(laddr, lport, faddr, fport, ehash_size);
}
extern struct inet_bind_bucket *
inet_bind_bucket_create(kmem_cache_t *cachep,
struct inet_bind_hashbucket *head,
const unsigned short snum);
extern void inet_bind_bucket_destroy(kmem_cache_t *cachep,
struct inet_bind_bucket *tb);
static inline int inet_bhashfn(const __u16 lport, const int bhash_size)
{
return lport & (bhash_size - 1);
}
extern void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
const unsigned short snum);
/* These can have wildcards, don't try too hard. */
static inline int inet_lhashfn(const unsigned short num)
{
return num & (INET_LHTABLE_SIZE - 1);
}
static inline int inet_sk_listen_hashfn(const struct sock *sk)
{
return inet_lhashfn(inet_sk(sk)->num);
}
/* Caller must disable local BH processing. */
static inline void __inet_inherit_port(struct inet_hashinfo *table,
struct sock *sk, struct sock *child)
{
const int bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size);
struct inet_bind_hashbucket *head = &table->bhash[bhash];
struct inet_bind_bucket *tb;
spin_lock(&head->lock);
tb = inet_csk(sk)->icsk_bind_hash;
sk_add_bind_node(child, &tb->owners);
inet_csk(child)->icsk_bind_hash = tb;
spin_unlock(&head->lock);
}
static inline void inet_inherit_port(struct inet_hashinfo *table,
struct sock *sk, struct sock *child)
{
local_bh_disable();
__inet_inherit_port(table, sk, child);
local_bh_enable();
}
extern void inet_put_port(struct inet_hashinfo *table, struct sock *sk);
extern void inet_listen_wlock(struct inet_hashinfo *hashinfo);
/*
* - We may sleep inside this lock.
* - If sleeping is not required (or called from BH),
* use plain read_(un)lock(&inet_hashinfo.lhash_lock).
*/
static inline void inet_listen_lock(struct inet_hashinfo *hashinfo)
{
/* read_lock synchronizes to candidates to writers */
read_lock(&hashinfo->lhash_lock);
atomic_inc(&hashinfo->lhash_users);
read_unlock(&hashinfo->lhash_lock);
}
static inline void inet_listen_unlock(struct inet_hashinfo *hashinfo)
{
if (atomic_dec_and_test(&hashinfo->lhash_users))
wake_up(&hashinfo->lhash_wait);
}
static inline void __inet_hash(struct inet_hashinfo *hashinfo,
struct sock *sk, const int listen_possible)
{
struct hlist_head *list;
rwlock_t *lock;
BUG_TRAP(sk_unhashed(sk));
if (listen_possible && sk->sk_state == TCP_LISTEN) {
list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
lock = &hashinfo->lhash_lock;
inet_listen_wlock(hashinfo);
} else {
sk->sk_hashent = inet_sk_ehashfn(sk, hashinfo->ehash_size);
list = &hashinfo->ehash[sk->sk_hashent].chain;
lock = &hashinfo->ehash[sk->sk_hashent].lock;
write_lock(lock);
}
__sk_add_node(sk, list);
sock_prot_inc_use(sk->sk_prot);
write_unlock(lock);
if (listen_possible && sk->sk_state == TCP_LISTEN)
wake_up(&hashinfo->lhash_wait);
}
static inline void inet_hash(struct inet_hashinfo *hashinfo, struct sock *sk)
{
if (sk->sk_state != TCP_CLOSE) {
local_bh_disable();
__inet_hash(hashinfo, sk, 1);
local_bh_enable();
}
}
static inline void inet_unhash(struct inet_hashinfo *hashinfo, struct sock *sk)
{
rwlock_t *lock;
if (sk_unhashed(sk))
goto out;
if (sk->sk_state == TCP_LISTEN) {
local_bh_disable();
inet_listen_wlock(hashinfo);
lock = &hashinfo->lhash_lock;
} else {
struct inet_ehash_bucket *head = &hashinfo->ehash[sk->sk_hashent];
lock = &head->lock;
write_lock_bh(&head->lock);
}
if (__sk_del_node_init(sk))
sock_prot_dec_use(sk->sk_prot);
write_unlock_bh(lock);
out:
if (sk->sk_state == TCP_LISTEN)
wake_up(&hashinfo->lhash_wait);
}
static inline int inet_iif(const struct sk_buff *skb)
{
return ((struct rtable *)skb->dst)->rt_iif;
}
extern struct sock *__inet_lookup_listener(const struct hlist_head *head,
const u32 daddr,
const unsigned short hnum,
const int dif);
/* Optimize the common listener case. */
static inline struct sock *
inet_lookup_listener(struct inet_hashinfo *hashinfo,
const u32 daddr,
const unsigned short hnum, const int dif)
{
struct sock *sk = NULL;
const struct hlist_head *head;
read_lock(&hashinfo->lhash_lock);
head = &hashinfo->listening_hash[inet_lhashfn(hnum)];
if (!hlist_empty(head)) {
const struct inet_sock *inet = inet_sk((sk = __sk_head(head)));
if (inet->num == hnum && !sk->sk_node.next &&
(!inet->rcv_saddr || inet->rcv_saddr == daddr) &&
(sk->sk_family == PF_INET || !ipv6_only_sock(sk)) &&
!sk->sk_bound_dev_if)
goto sherry_cache;
sk = __inet_lookup_listener(head, daddr, hnum, dif);
}
if (sk) {
sherry_cache:
sock_hold(sk);
}
read_unlock(&hashinfo->lhash_lock);
return sk;
}
/* Socket demux engine toys. */
#ifdef __BIG_ENDIAN
#define INET_COMBINED_PORTS(__sport, __dport) \
(((__u32)(__sport) << 16) | (__u32)(__dport))
#else /* __LITTLE_ENDIAN */
#define INET_COMBINED_PORTS(__sport, __dport) \
(((__u32)(__dport) << 16) | (__u32)(__sport))
#endif
#if (BITS_PER_LONG == 64)
#ifdef __BIG_ENDIAN
#define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
const __u64 __name = (((__u64)(__saddr)) << 32) | ((__u64)(__daddr));
#else /* __LITTLE_ENDIAN */
#define INET_ADDR_COOKIE(__name, __saddr, __daddr) \
const __u64 __name = (((__u64)(__daddr)) << 32) | ((__u64)(__saddr));
#endif /* __BIG_ENDIAN */
#define INET_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
(((*((__u64 *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \
((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \
(!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
#define INET_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
(((*((__u64 *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \
((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \
(!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
#else /* 32-bit arch */
#define INET_ADDR_COOKIE(__name, __saddr, __daddr)
#define INET_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif) \
((inet_sk(__sk)->daddr == (__saddr)) && \
(inet_sk(__sk)->rcv_saddr == (__daddr)) && \
((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \
(!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
#define INET_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif) \
((inet_twsk(__sk)->tw_daddr == (__saddr)) && \
(inet_twsk(__sk)->tw_rcv_saddr == (__daddr)) && \
((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \
(!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
#endif /* 64-bit arch */
/*
* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need
* not check it for lookups anymore, thanks Alexey. -DaveM
*
* Local BH must be disabled here.
*/
static inline struct sock *
__inet_lookup_established(struct inet_hashinfo *hashinfo,
const u32 saddr, const u16 sport,
const u32 daddr, const u16 hnum,
const int dif)
{
INET_ADDR_COOKIE(acookie, saddr, daddr)
const __u32 ports = INET_COMBINED_PORTS(sport, hnum);
struct sock *sk;
const struct hlist_node *node;
/* Optimize here for direct hit, only listening connections can
* have wildcards anyways.
*/
const int hash = inet_ehashfn(daddr, hnum, saddr, sport, hashinfo->ehash_size);
struct inet_ehash_bucket *head = &hashinfo->ehash[hash];
read_lock(&head->lock);
sk_for_each(sk, node, &head->chain) {
if (INET_MATCH(sk, acookie, saddr, daddr, ports, dif))
goto hit; /* You sunk my battleship! */
}
/* Must check for a TIME_WAIT'er before going to listener hash. */
sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) {
if (INET_TW_MATCH(sk, acookie, saddr, daddr, ports, dif))
goto hit;
}
sk = NULL;
out:
read_unlock(&head->lock);
return sk;
hit:
sock_hold(sk);
goto out;
}
static inline struct sock *__inet_lookup(struct inet_hashinfo *hashinfo,
const u32 saddr, const u16 sport,
const u32 daddr, const u16 hnum,
const int dif)
{
struct sock *sk = __inet_lookup_established(hashinfo, saddr, sport, daddr,
hnum, dif);
return sk ? : inet_lookup_listener(hashinfo, daddr, hnum, dif);
}
static inline struct sock *inet_lookup(struct inet_hashinfo *hashinfo,
const u32 saddr, const u16 sport,
const u32 daddr, const u16 dport,
const int dif)
{
struct sock *sk;
local_bh_disable();
sk = __inet_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif);
local_bh_enable();
return sk;
}
#endif /* _INET_HASHTABLES_H */

View File

@@ -0,0 +1,219 @@
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
* interface as the means of communication with the user level.
*
* Definitions for a generic INET TIMEWAIT sock
*
* From code originally in net/tcp.h
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#ifndef _INET_TIMEWAIT_SOCK_
#define _INET_TIMEWAIT_SOCK_
#include <linux/config.h>
#include <linux/ip.h>
#include <linux/list.h>
#include <linux/timer.h>
#include <linux/types.h>
#include <linux/workqueue.h>
#include <net/sock.h>
#include <net/tcp_states.h>
#include <asm/atomic.h>
struct inet_hashinfo;
#define INET_TWDR_RECYCLE_SLOTS_LOG 5
#define INET_TWDR_RECYCLE_SLOTS (1 << INET_TWDR_RECYCLE_SLOTS_LOG)
/*
* If time > 4sec, it is "slow" path, no recycling is required,
* so that we select tick to get range about 4 seconds.
*/
#if HZ <= 16 || HZ > 4096
# error Unsupported: HZ <= 16 or HZ > 4096
#elif HZ <= 32
# define INET_TWDR_RECYCLE_TICK (5 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
#elif HZ <= 64
# define INET_TWDR_RECYCLE_TICK (6 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
#elif HZ <= 128
# define INET_TWDR_RECYCLE_TICK (7 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
#elif HZ <= 256
# define INET_TWDR_RECYCLE_TICK (8 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
#elif HZ <= 512
# define INET_TWDR_RECYCLE_TICK (9 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
#elif HZ <= 1024
# define INET_TWDR_RECYCLE_TICK (10 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
#elif HZ <= 2048
# define INET_TWDR_RECYCLE_TICK (11 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
#else
# define INET_TWDR_RECYCLE_TICK (12 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG)
#endif
/* TIME_WAIT reaping mechanism. */
#define INET_TWDR_TWKILL_SLOTS 8 /* Please keep this a power of 2. */
#define INET_TWDR_TWKILL_QUOTA 100
struct inet_timewait_death_row {
/* Short-time timewait calendar */
int twcal_hand;
int twcal_jiffie;
struct timer_list twcal_timer;
struct hlist_head twcal_row[INET_TWDR_RECYCLE_SLOTS];
spinlock_t death_lock;
int tw_count;
int period;
u32 thread_slots;
struct work_struct twkill_work;
struct timer_list tw_timer;
int slot;
struct hlist_head cells[INET_TWDR_TWKILL_SLOTS];
struct inet_hashinfo *hashinfo;
int sysctl_tw_recycle;
int sysctl_max_tw_buckets;
};
extern void inet_twdr_hangman(unsigned long data);
extern void inet_twdr_twkill_work(void *data);
extern void inet_twdr_twcal_tick(unsigned long data);
#if (BITS_PER_LONG == 64)
#define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 8
#else
#define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 4
#endif
struct inet_bind_bucket;
/*
* This is a TIME_WAIT sock. It works around the memory consumption
* problems of sockets in such a state on heavily loaded servers, but
* without violating the protocol specification.
*/
struct inet_timewait_sock {
/*
* Now struct sock also uses sock_common, so please just
* don't add nothing before this first member (__tw_common) --acme
*/
struct sock_common __tw_common;
#define tw_family __tw_common.skc_family
#define tw_state __tw_common.skc_state
#define tw_reuse __tw_common.skc_reuse
#define tw_bound_dev_if __tw_common.skc_bound_dev_if
#define tw_node __tw_common.skc_node
#define tw_bind_node __tw_common.skc_bind_node
#define tw_refcnt __tw_common.skc_refcnt
#define tw_prot __tw_common.skc_prot
volatile unsigned char tw_substate;
/* 3 bits hole, try to pack */
unsigned char tw_rcv_wscale;
/* Socket demultiplex comparisons on incoming packets. */
/* these five are in inet_sock */
__u16 tw_sport;
__u32 tw_daddr __attribute__((aligned(INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES)));
__u32 tw_rcv_saddr;
__u16 tw_dport;
__u16 tw_num;
/* And these are ours. */
__u8 tw_ipv6only:1;
/* 31 bits hole, try to pack */
int tw_hashent;
int tw_timeout;
unsigned long tw_ttd;
struct inet_bind_bucket *tw_tb;
struct hlist_node tw_death_node;
};
static inline void inet_twsk_add_node(struct inet_timewait_sock *tw,
struct hlist_head *list)
{
hlist_add_head(&tw->tw_node, list);
}
static inline void inet_twsk_add_bind_node(struct inet_timewait_sock *tw,
struct hlist_head *list)
{
hlist_add_head(&tw->tw_bind_node, list);
}
static inline int inet_twsk_dead_hashed(const struct inet_timewait_sock *tw)
{
return tw->tw_death_node.pprev != NULL;
}
static inline void inet_twsk_dead_node_init(struct inet_timewait_sock *tw)
{
tw->tw_death_node.pprev = NULL;
}
static inline void __inet_twsk_del_dead_node(struct inet_timewait_sock *tw)
{
__hlist_del(&tw->tw_death_node);
inet_twsk_dead_node_init(tw);
}
static inline int inet_twsk_del_dead_node(struct inet_timewait_sock *tw)
{
if (inet_twsk_dead_hashed(tw)) {
__inet_twsk_del_dead_node(tw);
return 1;
}
return 0;
}
#define inet_twsk_for_each(tw, node, head) \
hlist_for_each_entry(tw, node, head, tw_node)
#define inet_twsk_for_each_inmate(tw, node, jail) \
hlist_for_each_entry(tw, node, jail, tw_death_node)
#define inet_twsk_for_each_inmate_safe(tw, node, safe, jail) \
hlist_for_each_entry_safe(tw, node, safe, jail, tw_death_node)
static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk)
{
return (struct inet_timewait_sock *)sk;
}
static inline u32 inet_rcv_saddr(const struct sock *sk)
{
return likely(sk->sk_state != TCP_TIME_WAIT) ?
inet_sk(sk)->rcv_saddr : inet_twsk(sk)->tw_rcv_saddr;
}
static inline void inet_twsk_put(struct inet_timewait_sock *tw)
{
if (atomic_dec_and_test(&tw->tw_refcnt)) {
#ifdef SOCK_REFCNT_DEBUG
printk(KERN_DEBUG "%s timewait_sock %p released\n",
tw->tw_prot->name, tw);
#endif
kmem_cache_free(tw->tw_prot->twsk_slab, tw);
}
}
extern struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
const int state);
extern void __inet_twsk_kill(struct inet_timewait_sock *tw,
struct inet_hashinfo *hashinfo);
extern void __inet_twsk_hashdance(struct inet_timewait_sock *tw,
struct sock *sk,
struct inet_hashinfo *hashinfo);
extern void inet_twsk_schedule(struct inet_timewait_sock *tw,
struct inet_timewait_death_row *twdr,
const int timeo, const int timewait_len);
extern void inet_twsk_deschedule(struct inet_timewait_sock *tw,
struct inet_timewait_death_row *twdr);
#endif /* _INET_TIMEWAIT_SOCK_ */

View File

@@ -86,7 +86,7 @@ extern int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
u32 saddr, u32 daddr,
struct ip_options *opt);
extern int ip_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt);
struct packet_type *pt, struct net_device *orig_dev);
extern int ip_local_deliver(struct sk_buff *skb);
extern int ip_mr_input(struct sk_buff *skb);
extern int ip_output(struct sk_buff *skb);
@@ -140,8 +140,6 @@ struct ip_reply_arg {
void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
unsigned int len);
extern int ip_finish_output(struct sk_buff *skb);
struct ipv4_config
{
int log_martians;
@@ -165,6 +163,24 @@ extern int sysctl_local_port_range[2];
extern int sysctl_ip_default_ttl;
extern int sysctl_ip_nonlocal_bind;
/* From ip_fragment.c */
extern int sysctl_ipfrag_high_thresh;
extern int sysctl_ipfrag_low_thresh;
extern int sysctl_ipfrag_time;
extern int sysctl_ipfrag_secret_interval;
/* From inetpeer.c */
extern int inet_peer_threshold;
extern int inet_peer_minttl;
extern int inet_peer_maxttl;
extern int inet_peer_gc_mintime;
extern int inet_peer_gc_maxtime;
/* From ip_output.c */
extern int sysctl_ip_dynaddr;
extern void ipfrag_init(void);
#ifdef CONFIG_INET
/* The function in 2.2 was invalid, producing wrong result for
* check=0xFEFF. It was noticed by Arthur Skawina _year_ ago. --ANK(000625) */
@@ -319,7 +335,10 @@ extern void ip_options_build(struct sk_buff *skb, struct ip_options *opt, u32 da
extern int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb);
extern void ip_options_fragment(struct sk_buff *skb);
extern int ip_options_compile(struct ip_options *opt, struct sk_buff *skb);
extern int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen, int user);
extern int ip_options_get(struct ip_options **optp,
unsigned char *data, int optlen);
extern int ip_options_get_from_user(struct ip_options **optp,
unsigned char __user *data, int optlen);
extern void ip_options_undo(struct ip_options * opt);
extern void ip_forward_options(struct sk_buff *skb);
extern int ip_options_rcv_srr(struct sk_buff *skb);
@@ -350,5 +369,10 @@ int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
void __user *oldval, size_t __user *oldlenp,
void __user *newval, size_t newlen,
void **context);
#ifdef CONFIG_PROC_FS
extern int ip_misc_proc_init(void);
#endif
extern struct ctl_table ipv4_table[];
#endif /* _IP_H */

View File

@@ -12,7 +12,6 @@
#include <net/flow.h>
#include <net/ip6_fib.h>
#include <net/sock.h>
#include <linux/tcp.h>
#include <linux/ip.h>
#include <linux/ipv6.h>

View File

@@ -295,4 +295,9 @@ static inline void fib_res_put(struct fib_result *res)
#endif
}
#ifdef CONFIG_PROC_FS
extern int fib_proc_init(void);
extern void fib_proc_exit(void);
#endif
#endif /* _NET_FIB_H */

View File

@@ -255,7 +255,6 @@ struct ip_vs_daemon_user {
#include <asm/atomic.h> /* for struct atomic_t */
#include <linux/netdevice.h> /* for struct neighbour */
#include <net/dst.h> /* for struct dst_entry */
#include <net/tcp.h>
#include <net/udp.h>
#include <linux/compiler.h>

View File

@@ -104,6 +104,7 @@ struct frag_hdr {
#ifdef __KERNEL__
#include <linux/config.h>
#include <net/sock.h>
/* sysctls */
@@ -145,7 +146,6 @@ DECLARE_SNMP_STAT(struct udp_mib, udp_stats_in6);
#define UDP6_INC_STATS(field) SNMP_INC_STATS(udp_stats_in6, field)
#define UDP6_INC_STATS_BH(field) SNMP_INC_STATS_BH(udp_stats_in6, field)
#define UDP6_INC_STATS_USER(field) SNMP_INC_STATS_USER(udp_stats_in6, field)
extern atomic_t inet6_sock_nr;
int snmp6_register_dev(struct inet6_dev *idev);
int snmp6_unregister_dev(struct inet6_dev *idev);
@@ -346,7 +346,8 @@ static inline int ipv6_addr_any(const struct in6_addr *a)
extern int ipv6_rcv(struct sk_buff *skb,
struct net_device *dev,
struct packet_type *pt);
struct packet_type *pt,
struct net_device *orig_dev);
/*
* upper-layer output functions
@@ -464,8 +465,38 @@ extern int sysctl_ip6frag_low_thresh;
extern int sysctl_ip6frag_time;
extern int sysctl_ip6frag_secret_interval;
extern struct proto_ops inet6_stream_ops;
extern struct proto_ops inet6_dgram_ops;
extern int ip6_mc_source(int add, int omode, struct sock *sk,
struct group_source_req *pgsr);
extern int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf);
extern int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
struct group_filter __user *optval,
int __user *optlen);
#ifdef CONFIG_PROC_FS
extern int ac6_proc_init(void);
extern void ac6_proc_exit(void);
extern int raw6_proc_init(void);
extern void raw6_proc_exit(void);
extern int tcp6_proc_init(void);
extern void tcp6_proc_exit(void);
extern int udp6_proc_init(void);
extern void udp6_proc_exit(void);
extern int ipv6_misc_proc_init(void);
extern void ipv6_misc_proc_exit(void);
extern struct rt6_statistics rt6_stats;
#endif
#ifdef CONFIG_SYSCTL
extern ctl_table ipv6_route_table[];
extern ctl_table ipv6_icmp_table[];
extern void ipv6_sysctl_register(void);
extern void ipv6_sysctl_unregister(void);
#endif
#endif /* __KERNEL__ */
#endif /* _NET_IPV6_H */

View File

@@ -46,7 +46,8 @@ struct llc_sap {
unsigned char f_bit;
int (*rcv_func)(struct sk_buff *skb,
struct net_device *dev,
struct packet_type *pt);
struct packet_type *pt,
struct net_device *orig_dev);
struct llc_addr laddr;
struct list_head node;
struct {
@@ -64,7 +65,7 @@ extern rwlock_t llc_sap_list_lock;
extern unsigned char llc_station_mac_sa[ETH_ALEN];
extern int llc_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt);
struct packet_type *pt, struct net_device *orig_dev);
extern int llc_mac_hdr_init(struct sk_buff *skb,
unsigned char *sa, unsigned char *da);
@@ -78,7 +79,8 @@ extern void llc_set_station_handler(void (*handler)(struct sk_buff *skb));
extern struct llc_sap *llc_sap_open(unsigned char lsap,
int (*rcv)(struct sk_buff *skb,
struct net_device *dev,
struct packet_type *pt));
struct packet_type *pt,
struct net_device *orig_dev));
extern void llc_sap_close(struct llc_sap *sap);
extern struct llc_sap *llc_sap_find(unsigned char sap_value);

View File

@@ -363,7 +363,14 @@ __neigh_lookup_errno(struct neigh_table *tbl, const void *pkey,
return neigh_create(tbl, pkey, dev);
}
#define LOCALLY_ENQUEUED -2
struct neighbour_cb {
unsigned long sched_next;
unsigned int flags;
};
#define LOCALLY_ENQUEUED 0x1
#define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb)
#endif
#endif

View File

@@ -4,7 +4,10 @@ extern struct datalink_proto *
register_8022_client(unsigned char type,
int (*func)(struct sk_buff *skb,
struct net_device *dev,
struct packet_type *pt));
struct packet_type *pt,
struct net_device *orig_dev));
extern void unregister_8022_client(struct datalink_proto *proto);
extern struct datalink_proto *make_8023_client(void);
extern void destroy_8023_client(struct datalink_proto *dl);
#endif

View File

@@ -352,10 +352,10 @@ tcf_change_indev(struct tcf_proto *tp, char *indev, struct rtattr *indev_tlv)
static inline int
tcf_match_indev(struct sk_buff *skb, char *indev)
{
if (0 != indev[0]) {
if (NULL == skb->input_dev)
if (indev[0]) {
if (!skb->input_dev)
return 0;
else if (0 != strcmp(indev, skb->input_dev->name))
if (strcmp(indev, skb->input_dev->name))
return 0;
}

View File

@@ -1,7 +1,7 @@
#ifndef _NET_PSNAP_H
#define _NET_PSNAP_H
extern struct datalink_proto *register_snap_client(unsigned char *desc, int (*rcvfunc)(struct sk_buff *, struct net_device *, struct packet_type *));
extern struct datalink_proto *register_snap_client(unsigned char *desc, int (*rcvfunc)(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *orig_dev));
extern void unregister_snap_client(struct datalink_proto *proto);
#endif

View File

@@ -17,10 +17,10 @@
#ifndef _RAW_H
#define _RAW_H
#include <linux/config.h>
extern struct proto raw_prot;
extern void raw_err(struct sock *, struct sk_buff *, u32 info);
extern int raw_rcv(struct sock *, struct sk_buff *);
@@ -37,6 +37,11 @@ extern struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num,
unsigned long raddr, unsigned long laddr,
int dif);
extern void raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash);
extern int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash);
#ifdef CONFIG_PROC_FS
extern int raw_proc_init(void);
extern void raw_proc_exit(void);
#endif
#endif /* _RAW_H */

View File

@@ -7,10 +7,11 @@
extern struct hlist_head raw_v6_htable[RAWV6_HTABLE_SIZE];
extern rwlock_t raw_v6_lock;
extern void ipv6_raw_deliver(struct sk_buff *skb, int nexthdr);
extern int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr);
extern struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num,
struct in6_addr *loc_addr, struct in6_addr *rmt_addr);
struct in6_addr *loc_addr, struct in6_addr *rmt_addr,
int dif);
extern int rawv6_rcv(struct sock *sk,
struct sk_buff *skb);

View File

@@ -89,6 +89,7 @@ struct listen_sock {
int qlen_young;
int clock_hand;
u32 hash_rnd;
u32 nr_table_entries;
struct request_sock *syn_table[0];
};
@@ -96,6 +97,7 @@ struct listen_sock {
*
* @rskq_accept_head - FIFO head of established children
* @rskq_accept_tail - FIFO tail of established children
* @rskq_defer_accept - User waits for some data after accept()
* @syn_wait_lock - serializer
*
* %syn_wait_lock is necessary only to avoid proc interface having to grab the main
@@ -111,6 +113,8 @@ struct request_sock_queue {
struct request_sock *rskq_accept_head;
struct request_sock *rskq_accept_tail;
rwlock_t syn_wait_lock;
u8 rskq_defer_accept;
/* 3 bytes hole, try to pack */
struct listen_sock *listen_opt;
};
@@ -129,11 +133,13 @@ static inline struct listen_sock *reqsk_queue_yank_listen_sk(struct request_sock
return lopt;
}
static inline void reqsk_queue_destroy(struct request_sock_queue *queue)
static inline void __reqsk_queue_destroy(struct request_sock_queue *queue)
{
kfree(reqsk_queue_yank_listen_sk(queue));
}
extern void reqsk_queue_destroy(struct request_sock_queue *queue);
static inline struct request_sock *
reqsk_queue_yank_acceptq(struct request_sock_queue *queue)
{
@@ -221,17 +227,17 @@ static inline int reqsk_queue_added(struct request_sock_queue *queue)
return prev_qlen;
}
static inline int reqsk_queue_len(struct request_sock_queue *queue)
static inline int reqsk_queue_len(const struct request_sock_queue *queue)
{
return queue->listen_opt != NULL ? queue->listen_opt->qlen : 0;
}
static inline int reqsk_queue_len_young(struct request_sock_queue *queue)
static inline int reqsk_queue_len_young(const struct request_sock_queue *queue)
{
return queue->listen_opt->qlen_young;
}
static inline int reqsk_queue_is_full(struct request_sock_queue *queue)
static inline int reqsk_queue_is_full(const struct request_sock_queue *queue)
{
return queue->listen_opt->qlen >> queue->listen_opt->max_qlen_log;
}

View File

@@ -105,10 +105,6 @@ struct rt_cache_stat
unsigned int out_hlist_search;
};
extern struct rt_cache_stat *rt_cache_stat;
#define RT_CACHE_STAT_INC(field) \
(per_cpu_ptr(rt_cache_stat, raw_smp_processor_id())->field++)
extern struct ip_rt_acct *ip_rt_acct;
struct in_device;
@@ -199,4 +195,6 @@ static inline struct inet_peer *rt_get_peer(struct rtable *rt)
return rt->peer;
}
extern ctl_table ipv4_route_table[];
#endif /* _ROUTE_H */

View File

@@ -47,10 +47,10 @@
#ifndef __sctp_constants_h__
#define __sctp_constants_h__
#include <linux/tcp.h> /* For TCP states used in sctp_sock_state_t */
#include <linux/sctp.h>
#include <linux/ipv6.h> /* For ipv6hdr. */
#include <net/sctp/user.h>
#include <net/tcp_states.h> /* For TCP states used in sctp_sock_state_t */
/* Value used for stream negotiation. */
enum { SCTP_MAX_STREAM = 0xffff };

View File

@@ -88,6 +88,7 @@ do { spin_lock_init(&((__sk)->sk_lock.slock)); \
} while(0)
struct sock;
struct proto;
/**
* struct sock_common - minimal network layer representation of sockets
@@ -98,10 +99,11 @@ struct sock;
* @skc_node: main hash linkage for various protocol lookup tables
* @skc_bind_node: bind hash linkage for various protocol lookup tables
* @skc_refcnt: reference count
* @skc_prot: protocol handlers inside a network family
*
* This is the minimal network layer representation of sockets, the header
* for struct sock and struct tcp_tw_bucket.
*/
* for struct sock and struct inet_timewait_sock.
*/
struct sock_common {
unsigned short skc_family;
volatile unsigned char skc_state;
@@ -110,11 +112,12 @@ struct sock_common {
struct hlist_node skc_node;
struct hlist_node skc_bind_node;
atomic_t skc_refcnt;
struct proto *skc_prot;
};
/**
* struct sock - network layer representation of sockets
* @__sk_common: shared layout with tcp_tw_bucket
* @__sk_common: shared layout with inet_timewait_sock
* @sk_shutdown: mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN
* @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings
* @sk_lock: synchronizer
@@ -136,11 +139,10 @@ struct sock_common {
* @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets
* @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO)
* @sk_lingertime: %SO_LINGER l_linger setting
* @sk_hashent: hash entry in several tables (e.g. tcp_ehash)
* @sk_hashent: hash entry in several tables (e.g. inet_hashinfo.ehash)
* @sk_backlog: always used with the per-socket spinlock held
* @sk_callback_lock: used with the callbacks in the end of this struct
* @sk_error_queue: rarely used
* @sk_prot: protocol handlers inside a network family
* @sk_prot_creator: sk_prot of original sock creator (see ipv6_setsockopt, IPV6_ADDRFORM for instance)
* @sk_err: last error
* @sk_err_soft: errors that don't cause failure but are the cause of a persistent failure not just 'timed out'
@@ -173,7 +175,7 @@ struct sock_common {
*/
struct sock {
/*
* Now struct tcp_tw_bucket also uses sock_common, so please just
* Now struct inet_timewait_sock also uses sock_common, so please just
* don't add nothing before this first member (__sk_common) --acme
*/
struct sock_common __sk_common;
@@ -184,6 +186,7 @@ struct sock {
#define sk_node __sk_common.skc_node
#define sk_bind_node __sk_common.skc_bind_node
#define sk_refcnt __sk_common.skc_refcnt
#define sk_prot __sk_common.skc_prot
unsigned char sk_shutdown : 2,
sk_no_check : 2,
sk_userlocks : 4;
@@ -218,7 +221,6 @@ struct sock {
struct sk_buff *tail;
} sk_backlog;
struct sk_buff_head sk_error_queue;
struct proto *sk_prot;
struct proto *sk_prot_creator;
rwlock_t sk_callback_lock;
int sk_err,
@@ -253,28 +255,28 @@ struct sock {
/*
* Hashed lists helper routines
*/
static inline struct sock *__sk_head(struct hlist_head *head)
static inline struct sock *__sk_head(const struct hlist_head *head)
{
return hlist_entry(head->first, struct sock, sk_node);
}
static inline struct sock *sk_head(struct hlist_head *head)
static inline struct sock *sk_head(const struct hlist_head *head)
{
return hlist_empty(head) ? NULL : __sk_head(head);
}
static inline struct sock *sk_next(struct sock *sk)
static inline struct sock *sk_next(const struct sock *sk)
{
return sk->sk_node.next ?
hlist_entry(sk->sk_node.next, struct sock, sk_node) : NULL;
}
static inline int sk_unhashed(struct sock *sk)
static inline int sk_unhashed(const struct sock *sk)
{
return hlist_unhashed(&sk->sk_node);
}
static inline int sk_hashed(struct sock *sk)
static inline int sk_hashed(const struct sock *sk)
{
return sk->sk_node.pprev != NULL;
}
@@ -554,6 +556,10 @@ struct proto {
kmem_cache_t *slab;
unsigned int obj_size;
kmem_cache_t *twsk_slab;
unsigned int twsk_obj_size;
atomic_t *orphan_count;
struct request_sock_ops *rsk_prot;
struct module *owner;
@@ -561,7 +567,9 @@ struct proto {
char name[32];
struct list_head node;
#ifdef SOCK_REFCNT_DEBUG
atomic_t socks;
#endif
struct {
int inuse;
u8 __pad[SMP_CACHE_BYTES - sizeof(int)];
@@ -571,6 +579,31 @@ struct proto {
extern int proto_register(struct proto *prot, int alloc_slab);
extern void proto_unregister(struct proto *prot);
#ifdef SOCK_REFCNT_DEBUG
static inline void sk_refcnt_debug_inc(struct sock *sk)
{
atomic_inc(&sk->sk_prot->socks);
}
static inline void sk_refcnt_debug_dec(struct sock *sk)
{
atomic_dec(&sk->sk_prot->socks);
printk(KERN_DEBUG "%s socket %p released, %d are still alive\n",
sk->sk_prot->name, sk, atomic_read(&sk->sk_prot->socks));
}
static inline void sk_refcnt_debug_release(const struct sock *sk)
{
if (atomic_read(&sk->sk_refcnt) != 1)
printk(KERN_DEBUG "Destruction of the %s socket %p delayed, refcnt=%d\n",
sk->sk_prot->name, sk, atomic_read(&sk->sk_refcnt));
}
#else /* SOCK_REFCNT_DEBUG */
#define sk_refcnt_debug_inc(sk) do { } while (0)
#define sk_refcnt_debug_dec(sk) do { } while (0)
#define sk_refcnt_debug_release(sk) do { } while (0)
#endif /* SOCK_REFCNT_DEBUG */
/* Called with local bh disabled */
static __inline__ void sock_prot_inc_use(struct proto *prot)
{
@@ -582,6 +615,15 @@ static __inline__ void sock_prot_dec_use(struct proto *prot)
prot->stats[smp_processor_id()].inuse--;
}
/* With per-bucket locks this operation is not-atomic, so that
* this version is not worse.
*/
static inline void __sk_prot_rehash(struct sock *sk)
{
sk->sk_prot->unhash(sk);
sk->sk_prot->hash(sk);
}
/* About 10 seconds */
#define SOCK_DESTROY_TIME (10*HZ)
@@ -693,6 +735,8 @@ extern struct sock *sk_alloc(int family,
unsigned int __nocast priority,
struct proto *prot, int zero_it);
extern void sk_free(struct sock *sk);
extern struct sock *sk_clone(const struct sock *sk,
const unsigned int __nocast priority);
extern struct sk_buff *sock_wmalloc(struct sock *sk,
unsigned long size, int force,
@@ -986,6 +1030,16 @@ sk_dst_check(struct sock *sk, u32 cookie)
return dst;
}
static inline void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
{
__sk_dst_set(sk, dst);
sk->sk_route_caps = dst->dev->features;
if (sk->sk_route_caps & NETIF_F_TSO) {
if (sock_flag(sk, SOCK_NO_LARGESEND) || dst->header_len)
sk->sk_route_caps &= ~NETIF_F_TSO;
}
}
static inline void sk_charge_skb(struct sock *sk, struct sk_buff *skb)
{
sk->sk_wmem_queued += skb->truesize;
@@ -1146,7 +1200,7 @@ static inline struct sk_buff *sk_stream_alloc_pskb(struct sock *sk,
int hdr_len;
hdr_len = SKB_DATA_ALIGN(sk->sk_prot->max_header);
skb = alloc_skb(size + hdr_len, gfp);
skb = alloc_skb_fclone(size + hdr_len, gfp);
if (skb) {
skb->truesize += mem;
if (sk->sk_forward_alloc >= (int)skb->truesize ||
@@ -1228,16 +1282,19 @@ static inline int sock_intr_errno(long timeo)
static __inline__ void
sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
{
struct timeval *stamp = &skb->stamp;
struct timeval stamp;
skb_get_timestamp(skb, &stamp);
if (sock_flag(sk, SOCK_RCVTSTAMP)) {
/* Race occurred between timestamp enabling and packet
receiving. Fill in the current time for now. */
if (stamp->tv_sec == 0)
do_gettimeofday(stamp);
if (stamp.tv_sec == 0)
do_gettimeofday(&stamp);
skb_set_timestamp(skb, &stamp);
put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP, sizeof(struct timeval),
stamp);
&stamp);
} else
sk->sk_stamp = *stamp;
sk->sk_stamp = stamp;
}
/**
@@ -1262,11 +1319,11 @@ extern int sock_get_timestamp(struct sock *, struct timeval __user *);
*/
#if 0
#define NETDEBUG(x) do { } while (0)
#define LIMIT_NETDEBUG(x) do {} while(0)
#define NETDEBUG(fmt, args...) do { } while (0)
#define LIMIT_NETDEBUG(fmt, args...) do { } while(0)
#else
#define NETDEBUG(x) do { x; } while (0)
#define LIMIT_NETDEBUG(x) do { if (net_ratelimit()) { x; } } while(0)
#define NETDEBUG(fmt, args...) printk(fmt,##args)
#define LIMIT_NETDEBUG(fmt, args...) do { if (net_ratelimit()) printk(fmt,##args); } while(0)
#endif
/*
@@ -1313,4 +1370,14 @@ static inline int siocdevprivate_ioctl(unsigned int fd, unsigned int cmd, unsign
}
#endif
extern void sk_init(void);
#ifdef CONFIG_SYSCTL
extern struct ctl_table core_table[];
extern int sysctl_optmem_max;
#endif
extern __u32 sysctl_wmem_default;
extern __u32 sysctl_rmem_default;
#endif /* _SOCK_H */

View File

@@ -21,360 +21,29 @@
#define TCP_DEBUG 1
#define FASTRETRANS_DEBUG 1
/* Cancel timers, when they are not required. */
#undef TCP_CLEAR_TIMERS
#include <linux/config.h>
#include <linux/list.h>
#include <linux/tcp.h>
#include <linux/slab.h>
#include <linux/cache.h>
#include <linux/percpu.h>
#include <net/inet_connection_sock.h>
#include <net/inet_timewait_sock.h>
#include <net/inet_hashtables.h>
#include <net/checksum.h>
#include <net/request_sock.h>
#include <net/sock.h>
#include <net/snmp.h>
#include <net/ip.h>
#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
#include <linux/ipv6.h>
#endif
#include <net/tcp_states.h>
#include <linux/seq_file.h>
/* This is for all connections with a full identity, no wildcards.
* New scheme, half the table is for TIME_WAIT, the other half is
* for the rest. I'll experiment with dynamic table growth later.
*/
struct tcp_ehash_bucket {
rwlock_t lock;
struct hlist_head chain;
} __attribute__((__aligned__(8)));
/* This is for listening sockets, thus all sockets which possess wildcards. */
#define TCP_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */
/* There are a few simple rules, which allow for local port reuse by
* an application. In essence:
*
* 1) Sockets bound to different interfaces may share a local port.
* Failing that, goto test 2.
* 2) If all sockets have sk->sk_reuse set, and none of them are in
* TCP_LISTEN state, the port may be shared.
* Failing that, goto test 3.
* 3) If all sockets are bound to a specific inet_sk(sk)->rcv_saddr local
* address, and none of them are the same, the port may be
* shared.
* Failing this, the port cannot be shared.
*
* The interesting point, is test #2. This is what an FTP server does
* all day. To optimize this case we use a specific flag bit defined
* below. As we add sockets to a bind bucket list, we perform a
* check of: (newsk->sk_reuse && (newsk->sk_state != TCP_LISTEN))
* As long as all sockets added to a bind bucket pass this test,
* the flag bit will be set.
* The resulting situation is that tcp_v[46]_verify_bind() can just check
* for this flag bit, if it is set and the socket trying to bind has
* sk->sk_reuse set, we don't even have to walk the owners list at all,
* we return that it is ok to bind this socket to the requested local port.
*
* Sounds like a lot of work, but it is worth it. In a more naive
* implementation (ie. current FreeBSD etc.) the entire list of ports
* must be walked for each data port opened by an ftp server. Needless
* to say, this does not scale at all. With a couple thousand FTP
* users logged onto your box, isn't it nice to know that new data
* ports are created in O(1) time? I thought so. ;-) -DaveM
*/
struct tcp_bind_bucket {
unsigned short port;
signed short fastreuse;
struct hlist_node node;
struct hlist_head owners;
};
#define tb_for_each(tb, node, head) hlist_for_each_entry(tb, node, head, node)
struct tcp_bind_hashbucket {
spinlock_t lock;
struct hlist_head chain;
};
static inline struct tcp_bind_bucket *__tb_head(struct tcp_bind_hashbucket *head)
{
return hlist_entry(head->chain.first, struct tcp_bind_bucket, node);
}
static inline struct tcp_bind_bucket *tb_head(struct tcp_bind_hashbucket *head)
{
return hlist_empty(&head->chain) ? NULL : __tb_head(head);
}
extern struct tcp_hashinfo {
/* This is for sockets with full identity only. Sockets here will
* always be without wildcards and will have the following invariant:
*
* TCP_ESTABLISHED <= sk->sk_state < TCP_CLOSE
*
* First half of the table is for sockets not in TIME_WAIT, second half
* is for TIME_WAIT sockets only.
*/
struct tcp_ehash_bucket *__tcp_ehash;
/* Ok, let's try this, I give up, we do need a local binding
* TCP hash as well as the others for fast bind/connect.
*/
struct tcp_bind_hashbucket *__tcp_bhash;
int __tcp_bhash_size;
int __tcp_ehash_size;
/* All sockets in TCP_LISTEN state will be in here. This is the only
* table where wildcard'd TCP sockets can exist. Hash function here
* is just local port number.
*/
struct hlist_head __tcp_listening_hash[TCP_LHTABLE_SIZE];
/* All the above members are written once at bootup and
* never written again _or_ are predominantly read-access.
*
* Now align to a new cache line as all the following members
* are often dirty.
*/
rwlock_t __tcp_lhash_lock ____cacheline_aligned;
atomic_t __tcp_lhash_users;
wait_queue_head_t __tcp_lhash_wait;
spinlock_t __tcp_portalloc_lock;
} tcp_hashinfo;
#define tcp_ehash (tcp_hashinfo.__tcp_ehash)
#define tcp_bhash (tcp_hashinfo.__tcp_bhash)
#define tcp_ehash_size (tcp_hashinfo.__tcp_ehash_size)
#define tcp_bhash_size (tcp_hashinfo.__tcp_bhash_size)
#define tcp_listening_hash (tcp_hashinfo.__tcp_listening_hash)
#define tcp_lhash_lock (tcp_hashinfo.__tcp_lhash_lock)
#define tcp_lhash_users (tcp_hashinfo.__tcp_lhash_users)
#define tcp_lhash_wait (tcp_hashinfo.__tcp_lhash_wait)
#define tcp_portalloc_lock (tcp_hashinfo.__tcp_portalloc_lock)
extern kmem_cache_t *tcp_bucket_cachep;
extern struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head,
unsigned short snum);
extern void tcp_bucket_destroy(struct tcp_bind_bucket *tb);
extern void tcp_bucket_unlock(struct sock *sk);
extern int tcp_port_rover;
/* These are AF independent. */
static __inline__ int tcp_bhashfn(__u16 lport)
{
return (lport & (tcp_bhash_size - 1));
}
extern void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb,
unsigned short snum);
#if (BITS_PER_LONG == 64)
#define TCP_ADDRCMP_ALIGN_BYTES 8
#else
#define TCP_ADDRCMP_ALIGN_BYTES 4
#endif
/* This is a TIME_WAIT bucket. It works around the memory consumption
* problems of sockets in such a state on heavily loaded servers, but
* without violating the protocol specification.
*/
struct tcp_tw_bucket {
/*
* Now struct sock also uses sock_common, so please just
* don't add nothing before this first member (__tw_common) --acme
*/
struct sock_common __tw_common;
#define tw_family __tw_common.skc_family
#define tw_state __tw_common.skc_state
#define tw_reuse __tw_common.skc_reuse
#define tw_bound_dev_if __tw_common.skc_bound_dev_if
#define tw_node __tw_common.skc_node
#define tw_bind_node __tw_common.skc_bind_node
#define tw_refcnt __tw_common.skc_refcnt
volatile unsigned char tw_substate;
unsigned char tw_rcv_wscale;
__u16 tw_sport;
/* Socket demultiplex comparisons on incoming packets. */
/* these five are in inet_sock */
__u32 tw_daddr
__attribute__((aligned(TCP_ADDRCMP_ALIGN_BYTES)));
__u32 tw_rcv_saddr;
__u16 tw_dport;
__u16 tw_num;
/* And these are ours. */
int tw_hashent;
int tw_timeout;
__u32 tw_rcv_nxt;
__u32 tw_snd_nxt;
__u32 tw_rcv_wnd;
__u32 tw_ts_recent;
long tw_ts_recent_stamp;
unsigned long tw_ttd;
struct tcp_bind_bucket *tw_tb;
struct hlist_node tw_death_node;
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
struct in6_addr tw_v6_daddr;
struct in6_addr tw_v6_rcv_saddr;
int tw_v6_ipv6only;
#endif
};
static __inline__ void tw_add_node(struct tcp_tw_bucket *tw,
struct hlist_head *list)
{
hlist_add_head(&tw->tw_node, list);
}
static __inline__ void tw_add_bind_node(struct tcp_tw_bucket *tw,
struct hlist_head *list)
{
hlist_add_head(&tw->tw_bind_node, list);
}
static inline int tw_dead_hashed(struct tcp_tw_bucket *tw)
{
return tw->tw_death_node.pprev != NULL;
}
static __inline__ void tw_dead_node_init(struct tcp_tw_bucket *tw)
{
tw->tw_death_node.pprev = NULL;
}
static __inline__ void __tw_del_dead_node(struct tcp_tw_bucket *tw)
{
__hlist_del(&tw->tw_death_node);
tw_dead_node_init(tw);
}
static __inline__ int tw_del_dead_node(struct tcp_tw_bucket *tw)
{
if (tw_dead_hashed(tw)) {
__tw_del_dead_node(tw);
return 1;
}
return 0;
}
#define tw_for_each(tw, node, head) \
hlist_for_each_entry(tw, node, head, tw_node)
#define tw_for_each_inmate(tw, node, jail) \
hlist_for_each_entry(tw, node, jail, tw_death_node)
#define tw_for_each_inmate_safe(tw, node, safe, jail) \
hlist_for_each_entry_safe(tw, node, safe, jail, tw_death_node)
#define tcptw_sk(__sk) ((struct tcp_tw_bucket *)(__sk))
static inline u32 tcp_v4_rcv_saddr(const struct sock *sk)
{
return likely(sk->sk_state != TCP_TIME_WAIT) ?
inet_sk(sk)->rcv_saddr : tcptw_sk(sk)->tw_rcv_saddr;
}
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
static inline struct in6_addr *__tcp_v6_rcv_saddr(const struct sock *sk)
{
return likely(sk->sk_state != TCP_TIME_WAIT) ?
&inet6_sk(sk)->rcv_saddr : &tcptw_sk(sk)->tw_v6_rcv_saddr;
}
static inline struct in6_addr *tcp_v6_rcv_saddr(const struct sock *sk)
{
return sk->sk_family == AF_INET6 ? __tcp_v6_rcv_saddr(sk) : NULL;
}
#define tcptw_sk_ipv6only(__sk) (tcptw_sk(__sk)->tw_v6_ipv6only)
static inline int tcp_v6_ipv6only(const struct sock *sk)
{
return likely(sk->sk_state != TCP_TIME_WAIT) ?
ipv6_only_sock(sk) : tcptw_sk_ipv6only(sk);
}
#else
# define __tcp_v6_rcv_saddr(__sk) NULL
# define tcp_v6_rcv_saddr(__sk) NULL
# define tcptw_sk_ipv6only(__sk) 0
# define tcp_v6_ipv6only(__sk) 0
#endif
extern kmem_cache_t *tcp_timewait_cachep;
static inline void tcp_tw_put(struct tcp_tw_bucket *tw)
{
if (atomic_dec_and_test(&tw->tw_refcnt)) {
#ifdef INET_REFCNT_DEBUG
printk(KERN_DEBUG "tw_bucket %p released\n", tw);
#endif
kmem_cache_free(tcp_timewait_cachep, tw);
}
}
extern struct inet_hashinfo tcp_hashinfo;
extern atomic_t tcp_orphan_count;
extern int tcp_tw_count;
extern void tcp_time_wait(struct sock *sk, int state, int timeo);
extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw);
/* Socket demux engine toys. */
#ifdef __BIG_ENDIAN
#define TCP_COMBINED_PORTS(__sport, __dport) \
(((__u32)(__sport)<<16) | (__u32)(__dport))
#else /* __LITTLE_ENDIAN */
#define TCP_COMBINED_PORTS(__sport, __dport) \
(((__u32)(__dport)<<16) | (__u32)(__sport))
#endif
#if (BITS_PER_LONG == 64)
#ifdef __BIG_ENDIAN
#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \
__u64 __name = (((__u64)(__saddr))<<32)|((__u64)(__daddr));
#else /* __LITTLE_ENDIAN */
#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \
__u64 __name = (((__u64)(__daddr))<<32)|((__u64)(__saddr));
#endif /* __BIG_ENDIAN */
#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
(((*((__u64 *)&(inet_sk(__sk)->daddr)))== (__cookie)) && \
((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \
(!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
#define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
(((*((__u64 *)&(tcptw_sk(__sk)->tw_daddr))) == (__cookie)) && \
((*((__u32 *)&(tcptw_sk(__sk)->tw_dport))) == (__ports)) && \
(!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
#else /* 32-bit arch */
#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr)
#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
((inet_sk(__sk)->daddr == (__saddr)) && \
(inet_sk(__sk)->rcv_saddr == (__daddr)) && \
((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \
(!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
#define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\
((tcptw_sk(__sk)->tw_daddr == (__saddr)) && \
(tcptw_sk(__sk)->tw_rcv_saddr == (__daddr)) && \
((*((__u32 *)&(tcptw_sk(__sk)->tw_dport))) == (__ports)) && \
(!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
#endif /* 64-bit arch */
#define TCP_IPV6_MATCH(__sk, __saddr, __daddr, __ports, __dif) \
(((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \
((__sk)->sk_family == AF_INET6) && \
ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr)) && \
ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \
(!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
/* These can have wildcards, don't try too hard. */
static __inline__ int tcp_lhashfn(unsigned short num)
{
return num & (TCP_LHTABLE_SIZE - 1);
}
static __inline__ int tcp_sk_listen_hashfn(struct sock *sk)
{
return tcp_lhashfn(inet_sk(sk)->num);
}
#define MAX_TCP_HEADER (128 + MAX_HEADER)
@@ -478,33 +147,6 @@ static __inline__ int tcp_sk_listen_hashfn(struct sock *sk)
* timestamps. It must be less than
* minimal timewait lifetime.
*/
#define TCP_TW_RECYCLE_SLOTS_LOG 5
#define TCP_TW_RECYCLE_SLOTS (1<<TCP_TW_RECYCLE_SLOTS_LOG)
/* If time > 4sec, it is "slow" path, no recycling is required,
so that we select tick to get range about 4 seconds.
*/
#if HZ <= 16 || HZ > 4096
# error Unsupported: HZ <= 16 or HZ > 4096
#elif HZ <= 32
# define TCP_TW_RECYCLE_TICK (5+2-TCP_TW_RECYCLE_SLOTS_LOG)
#elif HZ <= 64
# define TCP_TW_RECYCLE_TICK (6+2-TCP_TW_RECYCLE_SLOTS_LOG)
#elif HZ <= 128
# define TCP_TW_RECYCLE_TICK (7+2-TCP_TW_RECYCLE_SLOTS_LOG)
#elif HZ <= 256
# define TCP_TW_RECYCLE_TICK (8+2-TCP_TW_RECYCLE_SLOTS_LOG)
#elif HZ <= 512
# define TCP_TW_RECYCLE_TICK (9+2-TCP_TW_RECYCLE_SLOTS_LOG)
#elif HZ <= 1024
# define TCP_TW_RECYCLE_TICK (10+2-TCP_TW_RECYCLE_SLOTS_LOG)
#elif HZ <= 2048
# define TCP_TW_RECYCLE_TICK (11+2-TCP_TW_RECYCLE_SLOTS_LOG)
#else
# define TCP_TW_RECYCLE_TICK (12+2-TCP_TW_RECYCLE_SLOTS_LOG)
#endif
/*
* TCP option
*/
@@ -534,22 +176,18 @@ static __inline__ int tcp_sk_listen_hashfn(struct sock *sk)
#define TCPOLEN_SACK_BASE_ALIGNED 4
#define TCPOLEN_SACK_PERBLOCK 8
#define TCP_TIME_RETRANS 1 /* Retransmit timer */
#define TCP_TIME_DACK 2 /* Delayed ack timer */
#define TCP_TIME_PROBE0 3 /* Zero window probe timer */
#define TCP_TIME_KEEPOPEN 4 /* Keepalive timer */
/* Flags in tp->nonagle */
#define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */
#define TCP_NAGLE_CORK 2 /* Socket is corked */
#define TCP_NAGLE_PUSH 4 /* Cork is overriden for already queued data */
extern struct inet_timewait_death_row tcp_death_row;
/* sysctl variables for tcp */
extern int sysctl_tcp_timestamps;
extern int sysctl_tcp_window_scaling;
extern int sysctl_tcp_sack;
extern int sysctl_tcp_fin_timeout;
extern int sysctl_tcp_tw_recycle;
extern int sysctl_tcp_keepalive_time;
extern int sysctl_tcp_keepalive_probes;
extern int sysctl_tcp_keepalive_intvl;
@@ -564,7 +202,6 @@ extern int sysctl_tcp_stdurg;
extern int sysctl_tcp_rfc1337;
extern int sysctl_tcp_abort_on_overflow;
extern int sysctl_tcp_max_orphans;
extern int sysctl_tcp_max_tw_buckets;
extern int sysctl_tcp_fack;
extern int sysctl_tcp_reordering;
extern int sysctl_tcp_ecn;
@@ -585,12 +222,6 @@ extern atomic_t tcp_memory_allocated;
extern atomic_t tcp_sockets_allocated;
extern int tcp_memory_pressure;
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
#define TCP_INET_FAMILY(fam) ((fam) == AF_INET)
#else
#define TCP_INET_FAMILY(fam) 1
#endif
/*
* Pointers to address related TCP functions
* (i.e. things that depend on the address family)
@@ -671,9 +302,6 @@ DECLARE_SNMP_STAT(struct tcp_mib, tcp_statistics);
#define TCP_ADD_STATS_BH(field, val) SNMP_ADD_STATS_BH(tcp_statistics, field, val)
#define TCP_ADD_STATS_USER(field, val) SNMP_ADD_STATS_USER(tcp_statistics, field, val)
extern void tcp_put_port(struct sock *sk);
extern void tcp_inherit_port(struct sock *sk, struct sock *child);
extern void tcp_v4_err(struct sk_buff *skb, u32);
extern void tcp_shutdown (struct sock *sk, int how);
@@ -682,7 +310,7 @@ extern int tcp_v4_rcv(struct sk_buff *skb);
extern int tcp_v4_remember_stamp(struct sock *sk);
extern int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw);
extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk,
struct msghdr *msg, size_t size);
@@ -704,42 +332,22 @@ extern int tcp_rcv_established(struct sock *sk,
extern void tcp_rcv_space_adjust(struct sock *sk);
enum tcp_ack_state_t
static inline void tcp_dec_quickack_mode(struct sock *sk,
const unsigned int pkts)
{
TCP_ACK_SCHED = 1,
TCP_ACK_TIMER = 2,
TCP_ACK_PUSHED= 4
};
static inline void tcp_schedule_ack(struct tcp_sock *tp)
{
tp->ack.pending |= TCP_ACK_SCHED;
}
static inline int tcp_ack_scheduled(struct tcp_sock *tp)
{
return tp->ack.pending&TCP_ACK_SCHED;
}
static __inline__ void tcp_dec_quickack_mode(struct tcp_sock *tp, unsigned int pkts)
{
if (tp->ack.quick) {
if (pkts >= tp->ack.quick) {
tp->ack.quick = 0;
struct inet_connection_sock *icsk = inet_csk(sk);
if (icsk->icsk_ack.quick) {
if (pkts >= icsk->icsk_ack.quick) {
icsk->icsk_ack.quick = 0;
/* Leaving quickack mode we deflate ATO. */
tp->ack.ato = TCP_ATO_MIN;
icsk->icsk_ack.ato = TCP_ATO_MIN;
} else
tp->ack.quick -= pkts;
icsk->icsk_ack.quick -= pkts;
}
}
extern void tcp_enter_quickack_mode(struct tcp_sock *tp);
static __inline__ void tcp_delack_init(struct tcp_sock *tp)
{
memset(&tp->ack, 0, sizeof(tp->ack));
}
extern void tcp_enter_quickack_mode(struct sock *sk);
static inline void tcp_clear_options(struct tcp_options_received *rx_opt)
{
@@ -755,10 +363,9 @@ enum tcp_tw_status
};
extern enum tcp_tw_status tcp_timewait_state_process(struct tcp_tw_bucket *tw,
extern enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw,
struct sk_buff *skb,
struct tcphdr *th,
unsigned len);
const struct tcphdr *th);
extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb,
struct request_sock *req,
@@ -773,7 +380,6 @@ extern void tcp_update_metrics(struct sock *sk);
extern void tcp_close(struct sock *sk,
long timeout);
extern struct sock * tcp_accept(struct sock *sk, int flags, int *err);
extern unsigned int tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait);
extern int tcp_getsockopt(struct sock *sk, int level,
@@ -789,8 +395,6 @@ extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk,
size_t len, int nonblock,
int flags, int *addr_len);
extern int tcp_listen_start(struct sock *sk);
extern void tcp_parse_options(struct sk_buff *skb,
struct tcp_options_received *opt_rx,
int estab);
@@ -799,11 +403,6 @@ extern void tcp_parse_options(struct sk_buff *skb,
* TCP v4 functions exported for the inet6 API
*/
extern int tcp_v4_rebuild_header(struct sock *sk);
extern int tcp_v4_build_header(struct sock *sk,
struct sk_buff *skb);
extern void tcp_v4_send_check(struct sock *sk,
struct tcphdr *th, int len,
struct sk_buff *skb);
@@ -872,18 +471,15 @@ extern void tcp_cwnd_application_limited(struct sock *sk);
/* tcp_timer.c */
extern void tcp_init_xmit_timers(struct sock *);
extern void tcp_clear_xmit_timers(struct sock *);
static inline void tcp_clear_xmit_timers(struct sock *sk)
{
inet_csk_clear_xmit_timers(sk);
}
extern void tcp_delete_keepalive_timer(struct sock *);
extern void tcp_reset_keepalive_timer(struct sock *, unsigned long);
extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu);
extern unsigned int tcp_current_mss(struct sock *sk, int large);
#ifdef TCP_DEBUG
extern const char tcp_timer_bug_msg[];
#endif
/* tcp_diag.c */
/* tcp.c */
extern void tcp_get_info(struct sock *, struct tcp_info *);
/* Read 'sendfile()'-style from a TCP socket */
@@ -892,72 +488,6 @@ typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,
extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
sk_read_actor_t recv_actor);
static inline void tcp_clear_xmit_timer(struct sock *sk, int what)
{
struct tcp_sock *tp = tcp_sk(sk);
switch (what) {
case TCP_TIME_RETRANS:
case TCP_TIME_PROBE0:
tp->pending = 0;
#ifdef TCP_CLEAR_TIMERS
sk_stop_timer(sk, &tp->retransmit_timer);
#endif
break;
case TCP_TIME_DACK:
tp->ack.blocked = 0;
tp->ack.pending = 0;
#ifdef TCP_CLEAR_TIMERS
sk_stop_timer(sk, &tp->delack_timer);
#endif
break;
default:
#ifdef TCP_DEBUG
printk(tcp_timer_bug_msg);
#endif
return;
};
}
/*
* Reset the retransmission timer
*/
static inline void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long when)
{
struct tcp_sock *tp = tcp_sk(sk);
if (when > TCP_RTO_MAX) {
#ifdef TCP_DEBUG
printk(KERN_DEBUG "reset_xmit_timer sk=%p %d when=0x%lx, caller=%p\n", sk, what, when, current_text_addr());
#endif
when = TCP_RTO_MAX;
}
switch (what) {
case TCP_TIME_RETRANS:
case TCP_TIME_PROBE0:
tp->pending = what;
tp->timeout = jiffies+when;
sk_reset_timer(sk, &tp->retransmit_timer, tp->timeout);
break;
case TCP_TIME_DACK:
tp->ack.pending |= TCP_ACK_TIMER;
tp->ack.timeout = jiffies+when;
sk_reset_timer(sk, &tp->delack_timer, tp->ack.timeout);
break;
default:
#ifdef TCP_DEBUG
printk(tcp_timer_bug_msg);
#endif
return;
};
}
/* Initialize RCV_MSS value.
* RCV_MSS is an our guess about MSS used by the peer.
* We haven't any direct information about the MSS.
@@ -975,7 +505,7 @@ static inline void tcp_initialize_rcv_mss(struct sock *sk)
hint = min(hint, TCP_MIN_RCVMSS);
hint = max(hint, TCP_MIN_MSS);
tp->ack.rcv_mss = hint;
inet_csk(sk)->icsk_ack.rcv_mss = hint;
}
static __inline__ void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd)
@@ -1110,7 +640,8 @@ static inline void tcp_packets_out_inc(struct sock *sk,
tp->packets_out += tcp_skb_pcount(skb);
if (!orig)
tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
}
static inline void tcp_packets_out_dec(struct tcp_sock *tp,
@@ -1138,29 +669,29 @@ struct tcp_congestion_ops {
struct list_head list;
/* initialize private data (optional) */
void (*init)(struct tcp_sock *tp);
void (*init)(struct sock *sk);
/* cleanup private data (optional) */
void (*release)(struct tcp_sock *tp);
void (*release)(struct sock *sk);
/* return slow start threshold (required) */
u32 (*ssthresh)(struct tcp_sock *tp);
u32 (*ssthresh)(struct sock *sk);
/* lower bound for congestion window (optional) */
u32 (*min_cwnd)(struct tcp_sock *tp);
u32 (*min_cwnd)(struct sock *sk);
/* do new cwnd calculation (required) */
void (*cong_avoid)(struct tcp_sock *tp, u32 ack,
void (*cong_avoid)(struct sock *sk, u32 ack,
u32 rtt, u32 in_flight, int good_ack);
/* round trip time sample per acked packet (optional) */
void (*rtt_sample)(struct tcp_sock *tp, u32 usrtt);
void (*rtt_sample)(struct sock *sk, u32 usrtt);
/* call before changing ca_state (optional) */
void (*set_state)(struct tcp_sock *tp, u8 new_state);
void (*set_state)(struct sock *sk, u8 new_state);
/* call when cwnd event occurs (optional) */
void (*cwnd_event)(struct tcp_sock *tp, enum tcp_ca_event ev);
void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev);
/* new value of cwnd after loss (optional) */
u32 (*undo_cwnd)(struct tcp_sock *tp);
u32 (*undo_cwnd)(struct sock *sk);
/* hook for packet ack accounting (optional) */
void (*pkts_acked)(struct tcp_sock *tp, u32 num_acked);
/* get info for tcp_diag (optional) */
void (*get_info)(struct tcp_sock *tp, u32 ext, struct sk_buff *skb);
void (*pkts_acked)(struct sock *sk, u32 num_acked);
/* get info for inet_diag (optional) */
void (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb);
char name[TCP_CA_NAME_MAX];
struct module *owner;
@@ -1169,30 +700,34 @@ struct tcp_congestion_ops {
extern int tcp_register_congestion_control(struct tcp_congestion_ops *type);
extern void tcp_unregister_congestion_control(struct tcp_congestion_ops *type);
extern void tcp_init_congestion_control(struct tcp_sock *tp);
extern void tcp_cleanup_congestion_control(struct tcp_sock *tp);
extern void tcp_init_congestion_control(struct sock *sk);
extern void tcp_cleanup_congestion_control(struct sock *sk);
extern int tcp_set_default_congestion_control(const char *name);
extern void tcp_get_default_congestion_control(char *name);
extern int tcp_set_congestion_control(struct tcp_sock *tp, const char *name);
extern int tcp_set_congestion_control(struct sock *sk, const char *name);
extern struct tcp_congestion_ops tcp_init_congestion_ops;
extern u32 tcp_reno_ssthresh(struct tcp_sock *tp);
extern void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack,
extern u32 tcp_reno_ssthresh(struct sock *sk);
extern void tcp_reno_cong_avoid(struct sock *sk, u32 ack,
u32 rtt, u32 in_flight, int flag);
extern u32 tcp_reno_min_cwnd(struct tcp_sock *tp);
extern u32 tcp_reno_min_cwnd(struct sock *sk);
extern struct tcp_congestion_ops tcp_reno;
static inline void tcp_set_ca_state(struct tcp_sock *tp, u8 ca_state)
static inline void tcp_set_ca_state(struct sock *sk, const u8 ca_state)
{
if (tp->ca_ops->set_state)
tp->ca_ops->set_state(tp, ca_state);
tp->ca_state = ca_state;
struct inet_connection_sock *icsk = inet_csk(sk);
if (icsk->icsk_ca_ops->set_state)
icsk->icsk_ca_ops->set_state(sk, ca_state);
icsk->icsk_ca_state = ca_state;
}
static inline void tcp_ca_event(struct tcp_sock *tp, enum tcp_ca_event event)
static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
{
if (tp->ca_ops->cwnd_event)
tp->ca_ops->cwnd_event(tp, event);
const struct inet_connection_sock *icsk = inet_csk(sk);
if (icsk->icsk_ca_ops->cwnd_event)
icsk->icsk_ca_ops->cwnd_event(sk, event);
}
/* This determines how many packets are "in the network" to the best
@@ -1218,9 +753,10 @@ static __inline__ unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
* The exception is rate halving phase, when cwnd is decreasing towards
* ssthresh.
*/
static inline __u32 tcp_current_ssthresh(struct tcp_sock *tp)
static inline __u32 tcp_current_ssthresh(const struct sock *sk)
{
if ((1<<tp->ca_state)&(TCPF_CA_CWR|TCPF_CA_Recovery))
const struct tcp_sock *tp = tcp_sk(sk);
if ((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_CWR | TCPF_CA_Recovery))
return tp->snd_ssthresh;
else
return max(tp->snd_ssthresh,
@@ -1237,10 +773,13 @@ static inline void tcp_sync_left_out(struct tcp_sock *tp)
}
/* Set slow start threshold and cwnd not falling to slow start */
static inline void __tcp_enter_cwr(struct tcp_sock *tp)
static inline void __tcp_enter_cwr(struct sock *sk)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
tp->undo_marker = 0;
tp->snd_ssthresh = tp->ca_ops->ssthresh(tp);
tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
tp->snd_cwnd = min(tp->snd_cwnd,
tcp_packets_in_flight(tp) + 1U);
tp->snd_cwnd_cnt = 0;
@@ -1249,12 +788,14 @@ static inline void __tcp_enter_cwr(struct tcp_sock *tp)
TCP_ECN_queue_cwr(tp);
}
static inline void tcp_enter_cwr(struct tcp_sock *tp)
static inline void tcp_enter_cwr(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
tp->prior_ssthresh = 0;
if (tp->ca_state < TCP_CA_CWR) {
__tcp_enter_cwr(tp);
tcp_set_ca_state(tp, TCP_CA_CWR);
if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
__tcp_enter_cwr(sk);
tcp_set_ca_state(sk, TCP_CA_CWR);
}
}
@@ -1277,8 +818,10 @@ static __inline__ void tcp_minshall_update(struct tcp_sock *tp, int mss,
static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *tp)
{
if (!tp->packets_out && !tp->pending)
tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, tp->rto);
const struct inet_connection_sock *icsk = inet_csk(sk);
if (!tp->packets_out && !icsk->icsk_pending)
inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
icsk->icsk_rto, TCP_RTO_MAX);
}
static __inline__ void tcp_push_pending_frames(struct sock *sk,
@@ -1297,9 +840,6 @@ static __inline__ void tcp_update_wl(struct tcp_sock *tp, u32 ack, u32 seq)
tp->snd_wl1 = seq;
}
extern void tcp_destroy_sock(struct sock *sk);
/*
* Calculate(/check) TCP checksum
*/
@@ -1359,8 +899,10 @@ static __inline__ int tcp_prequeue(struct sock *sk, struct sk_buff *skb)
tp->ucopy.memory = 0;
} else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
wake_up_interruptible(sk->sk_sleep);
if (!tcp_ack_scheduled(tp))
tcp_reset_xmit_timer(sk, TCP_TIME_DACK, (3*TCP_RTO_MIN)/4);
if (!inet_csk_ack_scheduled(sk))
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
(3 * TCP_RTO_MIN) / 4,
TCP_RTO_MAX);
}
return 1;
}
@@ -1393,9 +935,9 @@ static __inline__ void tcp_set_state(struct sock *sk, int state)
TCP_INC_STATS(TCP_MIB_ESTABRESETS);
sk->sk_prot->unhash(sk);
if (tcp_sk(sk)->bind_hash &&
if (inet_csk(sk)->icsk_bind_hash &&
!(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
tcp_put_port(sk);
inet_put_port(&tcp_hashinfo, sk);
/* fall through */
default:
if (oldstate==TCP_ESTABLISHED)
@@ -1422,7 +964,7 @@ static __inline__ void tcp_done(struct sock *sk)
if (!sock_flag(sk, SOCK_DEAD))
sk->sk_state_change(sk);
else
tcp_destroy_sock(sk);
inet_csk_destroy_sock(sk);
}
static __inline__ void tcp_sack_reset(struct tcp_options_received *rx_opt)
@@ -1524,54 +1066,6 @@ static inline int tcp_full_space(const struct sock *sk)
return tcp_win_from_space(sk->sk_rcvbuf);
}
static inline void tcp_acceptq_queue(struct sock *sk, struct request_sock *req,
struct sock *child)
{
reqsk_queue_add(&tcp_sk(sk)->accept_queue, req, sk, child);
}
static inline void
tcp_synq_removed(struct sock *sk, struct request_sock *req)
{
if (reqsk_queue_removed(&tcp_sk(sk)->accept_queue, req) == 0)
tcp_delete_keepalive_timer(sk);
}
static inline void tcp_synq_added(struct sock *sk)
{
if (reqsk_queue_added(&tcp_sk(sk)->accept_queue) == 0)
tcp_reset_keepalive_timer(sk, TCP_TIMEOUT_INIT);
}
static inline int tcp_synq_len(struct sock *sk)
{
return reqsk_queue_len(&tcp_sk(sk)->accept_queue);
}
static inline int tcp_synq_young(struct sock *sk)
{
return reqsk_queue_len_young(&tcp_sk(sk)->accept_queue);
}
static inline int tcp_synq_is_full(struct sock *sk)
{
return reqsk_queue_is_full(&tcp_sk(sk)->accept_queue);
}
static inline void tcp_synq_unlink(struct tcp_sock *tp, struct request_sock *req,
struct request_sock **prev)
{
reqsk_queue_unlink(&tp->accept_queue, req, prev);
}
static inline void tcp_synq_drop(struct sock *sk, struct request_sock *req,
struct request_sock **prev)
{
tcp_synq_unlink(tcp_sk(sk), req, prev);
tcp_synq_removed(sk, req);
reqsk_free(req);
}
static __inline__ void tcp_openreq_init(struct request_sock *req,
struct tcp_options_received *rx_opt,
struct sk_buff *skb)
@@ -1593,27 +1087,6 @@ static __inline__ void tcp_openreq_init(struct request_sock *req,
extern void tcp_enter_memory_pressure(void);
extern void tcp_listen_wlock(void);
/* - We may sleep inside this lock.
* - If sleeping is not required (or called from BH),
* use plain read_(un)lock(&tcp_lhash_lock).
*/
static inline void tcp_listen_lock(void)
{
/* read_lock synchronizes to candidates to writers */
read_lock(&tcp_lhash_lock);
atomic_inc(&tcp_lhash_users);
read_unlock(&tcp_lhash_lock);
}
static inline void tcp_listen_unlock(void)
{
if (atomic_dec_and_test(&tcp_lhash_users))
wake_up(&tcp_lhash_wait);
}
static inline int keepalive_intvl_when(const struct tcp_sock *tp)
{
return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl;
@@ -1624,12 +1097,13 @@ static inline int keepalive_time_when(const struct tcp_sock *tp)
return tp->keepalive_time ? : sysctl_tcp_keepalive_time;
}
static inline int tcp_fin_time(const struct tcp_sock *tp)
static inline int tcp_fin_time(const struct sock *sk)
{
int fin_timeout = tp->linger2 ? : sysctl_tcp_fin_timeout;
int fin_timeout = tcp_sk(sk)->linger2 ? : sysctl_tcp_fin_timeout;
const int rto = inet_csk(sk)->icsk_rto;
if (fin_timeout < (tp->rto<<2) - (tp->rto>>1))
fin_timeout = (tp->rto<<2) - (tp->rto>>1);
if (fin_timeout < (rto << 2) - (rto >> 1))
fin_timeout = (rto << 2) - (rto >> 1);
return fin_timeout;
}
@@ -1658,15 +1132,6 @@ static inline int tcp_paws_check(const struct tcp_options_received *rx_opt, int
return 1;
}
static inline void tcp_v4_setup_caps(struct sock *sk, struct dst_entry *dst)
{
sk->sk_route_caps = dst->dev->features;
if (sk->sk_route_caps & NETIF_F_TSO) {
if (sock_flag(sk, SOCK_NO_LARGESEND) || dst->header_len)
sk->sk_route_caps &= ~NETIF_F_TSO;
}
}
#define TCP_CHECK_TIMER(sk) do { } while (0)
static inline int tcp_use_frto(const struct sock *sk)
@@ -1718,4 +1183,16 @@ struct tcp_iter_state {
extern int tcp_proc_register(struct tcp_seq_afinfo *afinfo);
extern void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo);
extern struct request_sock_ops tcp_request_sock_ops;
extern int tcp_v4_destroy_sock(struct sock *sk);
#ifdef CONFIG_PROC_FS
extern int tcp4_proc_init(void);
extern void tcp4_proc_exit(void);
#endif
extern void tcp_v4_init(struct net_proto_family *ops);
extern void tcp_init(void);
#endif /* _TCP_H */

View File

@@ -88,7 +88,7 @@ static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb)
* it is surely retransmit. It is not in ECN RFC,
* but Linux follows this rule. */
else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags)))
tcp_enter_quickack_mode(tp);
tcp_enter_quickack_mode((struct sock *)tp);
}
}

34
include/net/tcp_states.h Normal file
View File

@@ -0,0 +1,34 @@
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
* interface as the means of communication with the user level.
*
* Definitions for the TCP protocol sk_state field.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#ifndef _LINUX_TCP_STATES_H
#define _LINUX_TCP_STATES_H
enum {
TCP_ESTABLISHED = 1,
TCP_SYN_SENT,
TCP_SYN_RECV,
TCP_FIN_WAIT1,
TCP_FIN_WAIT2,
TCP_TIME_WAIT,
TCP_CLOSE,
TCP_CLOSE_WAIT,
TCP_LAST_ACK,
TCP_LISTEN,
TCP_CLOSING, /* Now a valid state */
TCP_MAX_STATES /* Leave at the end! */
};
#define TCP_STATE_MASK 0xF
#endif /* _LINUX_TCP_STATES_H */

View File

@@ -94,6 +94,11 @@ struct udp_iter_state {
struct seq_operations seq_ops;
};
#ifdef CONFIG_PROC_FS
extern int udp_proc_register(struct udp_seq_afinfo *afinfo);
extern void udp_proc_unregister(struct udp_seq_afinfo *afinfo);
extern int udp4_proc_init(void);
extern void udp4_proc_exit(void);
#endif
#endif /* _UDP_H */

View File

@@ -175,7 +175,7 @@ extern void x25_kill_by_neigh(struct x25_neigh *);
/* x25_dev.c */
extern void x25_send_frame(struct sk_buff *, struct x25_neigh *);
extern int x25_lapb_receive_frame(struct sk_buff *, struct net_device *, struct packet_type *);
extern int x25_lapb_receive_frame(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *);
extern void x25_establish_link(struct x25_neigh *);
extern void x25_terminate_link(struct x25_neigh *);

View File

@@ -8,7 +8,6 @@
static inline __be16 x25_type_trans(struct sk_buff *skb, struct net_device *dev)
{
skb->mac.raw = skb->data;
skb->input_dev = skb->dev = dev;
skb->pkt_type = PACKET_HOST;
return htons(ETH_P_X25);

View File

@@ -818,7 +818,6 @@ extern void xfrm6_init(void);
extern void xfrm6_fini(void);
extern void xfrm_state_init(void);
extern void xfrm4_state_init(void);
extern void xfrm4_state_fini(void);
extern void xfrm6_state_init(void);
extern void xfrm6_state_fini(void);