This reverts "Merge branch 'dccp' of git://eden-feed.erg.abdn.ac.uk/dccp_exp"
as it accentally contained the wrong set of patches. These will be submitted separately. Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
This commit is contained in:
@@ -1,8 +1,10 @@
|
||||
menu "DCCP CCIDs Configuration (EXPERIMENTAL)"
|
||||
depends on EXPERIMENTAL
|
||||
|
||||
config IP_DCCP_CCID2
|
||||
tristate "CCID2 (TCP-Like)"
|
||||
tristate "CCID2 (TCP-Like) (EXPERIMENTAL)"
|
||||
def_tristate IP_DCCP
|
||||
select IP_DCCP_ACKVEC
|
||||
---help---
|
||||
CCID 2, TCP-like Congestion Control, denotes Additive Increase,
|
||||
Multiplicative Decrease (AIMD) congestion control with behavior
|
||||
@@ -34,7 +36,7 @@ config IP_DCCP_CCID2_DEBUG
|
||||
If in doubt, say N.
|
||||
|
||||
config IP_DCCP_CCID3
|
||||
tristate "CCID3 (TCP-Friendly)"
|
||||
tristate "CCID3 (TCP-Friendly) (EXPERIMENTAL)"
|
||||
def_tristate IP_DCCP
|
||||
select IP_DCCP_TFRC_LIB
|
||||
---help---
|
||||
@@ -62,9 +64,9 @@ config IP_DCCP_CCID3
|
||||
|
||||
If in doubt, say M.
|
||||
|
||||
if IP_DCCP_CCID3
|
||||
config IP_DCCP_CCID3_DEBUG
|
||||
bool "CCID3 debugging messages"
|
||||
depends on IP_DCCP_CCID3
|
||||
---help---
|
||||
Enable CCID3-specific debugging messages.
|
||||
|
||||
@@ -74,29 +76,10 @@ config IP_DCCP_CCID3_DEBUG
|
||||
|
||||
If in doubt, say N.
|
||||
|
||||
choice
|
||||
prompt "Select method for measuring the packet size s"
|
||||
default IP_DCCP_CCID3_MEASURE_S_AS_MPS
|
||||
|
||||
config IP_DCCP_CCID3_MEASURE_S_AS_MPS
|
||||
bool "Always use MPS in place of s"
|
||||
---help---
|
||||
This use is recommended as it is consistent with the initialisation
|
||||
of X and suggested when s varies (rfc3448bis, (1) in section 4.1).
|
||||
config IP_DCCP_CCID3_MEASURE_S_AS_AVG
|
||||
bool "Use moving average"
|
||||
---help---
|
||||
An alternative way of tracking s, also supported by rfc3448bis.
|
||||
This used to be the default for CCID-3 in previous kernels.
|
||||
config IP_DCCP_CCID3_MEASURE_S_AS_MAX
|
||||
bool "Track the maximum payload length"
|
||||
---help---
|
||||
An experimental method based on tracking the maximum packet size.
|
||||
endchoice
|
||||
|
||||
config IP_DCCP_CCID3_RTO
|
||||
int "Use higher bound for nofeedback timer"
|
||||
default 100
|
||||
depends on IP_DCCP_CCID3 && EXPERIMENTAL
|
||||
---help---
|
||||
Use higher lower bound for nofeedback timer expiration.
|
||||
|
||||
@@ -123,7 +106,6 @@ config IP_DCCP_CCID3_RTO
|
||||
The purpose of the nofeedback timer is to slow DCCP down when there
|
||||
is serious network congestion: experimenting with larger values should
|
||||
therefore not be performed on WANs.
|
||||
endif # IP_DCCP_CCID3
|
||||
|
||||
config IP_DCCP_TFRC_LIB
|
||||
tristate
|
||||
|
@@ -25,7 +25,7 @@
|
||||
/*
|
||||
* This implementation should follow RFC 4341
|
||||
*/
|
||||
#include "../feat.h"
|
||||
|
||||
#include "../ccid.h"
|
||||
#include "../dccp.h"
|
||||
#include "ccid2.h"
|
||||
@@ -34,8 +34,51 @@
|
||||
#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
|
||||
static int ccid2_debug;
|
||||
#define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a)
|
||||
|
||||
static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx)
|
||||
{
|
||||
int len = 0;
|
||||
int pipe = 0;
|
||||
struct ccid2_seq *seqp = hctx->ccid2hctx_seqh;
|
||||
|
||||
/* there is data in the chain */
|
||||
if (seqp != hctx->ccid2hctx_seqt) {
|
||||
seqp = seqp->ccid2s_prev;
|
||||
len++;
|
||||
if (!seqp->ccid2s_acked)
|
||||
pipe++;
|
||||
|
||||
while (seqp != hctx->ccid2hctx_seqt) {
|
||||
struct ccid2_seq *prev = seqp->ccid2s_prev;
|
||||
|
||||
len++;
|
||||
if (!prev->ccid2s_acked)
|
||||
pipe++;
|
||||
|
||||
/* packets are sent sequentially */
|
||||
BUG_ON(dccp_delta_seqno(seqp->ccid2s_seq,
|
||||
prev->ccid2s_seq ) >= 0);
|
||||
BUG_ON(time_before(seqp->ccid2s_sent,
|
||||
prev->ccid2s_sent));
|
||||
|
||||
seqp = prev;
|
||||
}
|
||||
}
|
||||
|
||||
BUG_ON(pipe != hctx->ccid2hctx_pipe);
|
||||
ccid2_pr_debug("len of chain=%d\n", len);
|
||||
|
||||
do {
|
||||
seqp = seqp->ccid2s_prev;
|
||||
len++;
|
||||
} while (seqp != hctx->ccid2hctx_seqh);
|
||||
|
||||
ccid2_pr_debug("total len=%d\n", len);
|
||||
BUG_ON(len != hctx->ccid2hctx_seqbufc * CCID2_SEQBUF_LEN);
|
||||
}
|
||||
#else
|
||||
#define ccid2_pr_debug(format, a...)
|
||||
#define ccid2_hc_tx_check_sanity(hctx)
|
||||
#endif
|
||||
|
||||
static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx)
|
||||
@@ -44,7 +87,8 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx)
|
||||
int i;
|
||||
|
||||
/* check if we have space to preserve the pointer to the buffer */
|
||||
if (hctx->seqbufc >= sizeof(hctx->seqbuf) / sizeof(struct ccid2_seq *))
|
||||
if (hctx->ccid2hctx_seqbufc >= (sizeof(hctx->ccid2hctx_seqbuf) /
|
||||
sizeof(struct ccid2_seq*)))
|
||||
return -ENOMEM;
|
||||
|
||||
/* allocate buffer and initialize linked list */
|
||||
@@ -60,35 +104,38 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx)
|
||||
seqp->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1];
|
||||
|
||||
/* This is the first allocation. Initiate the head and tail. */
|
||||
if (hctx->seqbufc == 0)
|
||||
hctx->seqh = hctx->seqt = seqp;
|
||||
if (hctx->ccid2hctx_seqbufc == 0)
|
||||
hctx->ccid2hctx_seqh = hctx->ccid2hctx_seqt = seqp;
|
||||
else {
|
||||
/* link the existing list with the one we just created */
|
||||
hctx->seqh->ccid2s_next = seqp;
|
||||
seqp->ccid2s_prev = hctx->seqh;
|
||||
hctx->ccid2hctx_seqh->ccid2s_next = seqp;
|
||||
seqp->ccid2s_prev = hctx->ccid2hctx_seqh;
|
||||
|
||||
hctx->seqt->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1];
|
||||
seqp[CCID2_SEQBUF_LEN - 1].ccid2s_next = hctx->seqt;
|
||||
hctx->ccid2hctx_seqt->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1];
|
||||
seqp[CCID2_SEQBUF_LEN - 1].ccid2s_next = hctx->ccid2hctx_seqt;
|
||||
}
|
||||
|
||||
/* store the original pointer to the buffer so we can free it */
|
||||
hctx->seqbuf[hctx->seqbufc] = seqp;
|
||||
hctx->seqbufc++;
|
||||
hctx->ccid2hctx_seqbuf[hctx->ccid2hctx_seqbufc] = seqp;
|
||||
hctx->ccid2hctx_seqbufc++;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
|
||||
{
|
||||
if (ccid2_cwnd_network_limited(ccid2_hc_tx_sk(sk)))
|
||||
return CCID_PACKET_WILL_DEQUEUE_LATER;
|
||||
return CCID_PACKET_SEND_AT_ONCE;
|
||||
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
|
||||
|
||||
if (hctx->ccid2hctx_pipe < hctx->ccid2hctx_cwnd)
|
||||
return 0;
|
||||
|
||||
return 1; /* XXX CCID should dequeue when ready instead of polling */
|
||||
}
|
||||
|
||||
static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
|
||||
{
|
||||
struct dccp_sock *dp = dccp_sk(sk);
|
||||
u32 max_ratio = DIV_ROUND_UP(ccid2_hc_tx_sk(sk)->cwnd, 2);
|
||||
u32 max_ratio = DIV_ROUND_UP(ccid2_hc_tx_sk(sk)->ccid2hctx_cwnd, 2);
|
||||
|
||||
/*
|
||||
* Ensure that Ack Ratio does not exceed ceil(cwnd/2), which is (2) from
|
||||
@@ -100,8 +147,8 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
|
||||
DCCP_WARN("Limiting Ack Ratio (%u) to %u\n", val, max_ratio);
|
||||
val = max_ratio;
|
||||
}
|
||||
if (val > DCCPF_ACK_RATIO_MAX)
|
||||
val = DCCPF_ACK_RATIO_MAX;
|
||||
if (val > 0xFFFF) /* RFC 4340, 11.3 */
|
||||
val = 0xFFFF;
|
||||
|
||||
if (val == dp->dccps_l_ack_ratio)
|
||||
return;
|
||||
@@ -110,77 +157,99 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
|
||||
dp->dccps_l_ack_ratio = val;
|
||||
}
|
||||
|
||||
static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hctx, long val)
|
||||
{
|
||||
ccid2_pr_debug("change SRTT to %ld\n", val);
|
||||
hctx->ccid2hctx_srtt = val;
|
||||
}
|
||||
|
||||
static void ccid2_start_rto_timer(struct sock *sk);
|
||||
|
||||
static void ccid2_hc_tx_rto_expire(unsigned long data)
|
||||
{
|
||||
struct sock *sk = (struct sock *)data;
|
||||
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
|
||||
const bool sender_was_blocked = ccid2_cwnd_network_limited(hctx);
|
||||
long s;
|
||||
|
||||
bh_lock_sock(sk);
|
||||
if (sock_owned_by_user(sk)) {
|
||||
sk_reset_timer(sk, &hctx->rtotimer, jiffies + HZ / 5);
|
||||
sk_reset_timer(sk, &hctx->ccid2hctx_rtotimer,
|
||||
jiffies + HZ / 5);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ccid2_pr_debug("RTO_EXPIRE\n");
|
||||
|
||||
ccid2_hc_tx_check_sanity(hctx);
|
||||
|
||||
/* back-off timer */
|
||||
hctx->rto <<= 1;
|
||||
if (hctx->rto > DCCP_RTO_MAX)
|
||||
hctx->rto = DCCP_RTO_MAX;
|
||||
hctx->ccid2hctx_rto <<= 1;
|
||||
|
||||
s = hctx->ccid2hctx_rto / HZ;
|
||||
if (s > 60)
|
||||
hctx->ccid2hctx_rto = 60 * HZ;
|
||||
|
||||
ccid2_start_rto_timer(sk);
|
||||
|
||||
/* adjust pipe, cwnd etc */
|
||||
hctx->ssthresh = hctx->cwnd / 2;
|
||||
if (hctx->ssthresh < 2)
|
||||
hctx->ssthresh = 2;
|
||||
hctx->cwnd = 1;
|
||||
hctx->pipe = 0;
|
||||
hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd / 2;
|
||||
if (hctx->ccid2hctx_ssthresh < 2)
|
||||
hctx->ccid2hctx_ssthresh = 2;
|
||||
hctx->ccid2hctx_cwnd = 1;
|
||||
hctx->ccid2hctx_pipe = 0;
|
||||
|
||||
/* clear state about stuff we sent */
|
||||
hctx->seqt = hctx->seqh;
|
||||
hctx->packets_acked = 0;
|
||||
hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqh;
|
||||
hctx->ccid2hctx_packets_acked = 0;
|
||||
|
||||
/* clear ack ratio state. */
|
||||
hctx->rpseq = 0;
|
||||
hctx->rpdupack = -1;
|
||||
hctx->ccid2hctx_rpseq = 0;
|
||||
hctx->ccid2hctx_rpdupack = -1;
|
||||
ccid2_change_l_ack_ratio(sk, 1);
|
||||
|
||||
/* if we were blocked before, we may now send cwnd=1 packet */
|
||||
if (sender_was_blocked)
|
||||
tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet);
|
||||
/* restart backed-off timer */
|
||||
sk_reset_timer(sk, &hctx->rtotimer, jiffies + hctx->rto);
|
||||
ccid2_hc_tx_check_sanity(hctx);
|
||||
out:
|
||||
bh_unlock_sock(sk);
|
||||
sock_put(sk);
|
||||
}
|
||||
|
||||
static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
|
||||
static void ccid2_start_rto_timer(struct sock *sk)
|
||||
{
|
||||
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
|
||||
|
||||
ccid2_pr_debug("setting RTO timeout=%ld\n", hctx->ccid2hctx_rto);
|
||||
|
||||
BUG_ON(timer_pending(&hctx->ccid2hctx_rtotimer));
|
||||
sk_reset_timer(sk, &hctx->ccid2hctx_rtotimer,
|
||||
jiffies + hctx->ccid2hctx_rto);
|
||||
}
|
||||
|
||||
static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
|
||||
{
|
||||
struct dccp_sock *dp = dccp_sk(sk);
|
||||
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
|
||||
struct ccid2_seq *next;
|
||||
|
||||
hctx->pipe++;
|
||||
hctx->ccid2hctx_pipe++;
|
||||
|
||||
hctx->seqh->ccid2s_seq = dp->dccps_gss;
|
||||
hctx->seqh->ccid2s_acked = 0;
|
||||
hctx->seqh->ccid2s_sent = jiffies;
|
||||
hctx->ccid2hctx_seqh->ccid2s_seq = dp->dccps_gss;
|
||||
hctx->ccid2hctx_seqh->ccid2s_acked = 0;
|
||||
hctx->ccid2hctx_seqh->ccid2s_sent = jiffies;
|
||||
|
||||
next = hctx->seqh->ccid2s_next;
|
||||
next = hctx->ccid2hctx_seqh->ccid2s_next;
|
||||
/* check if we need to alloc more space */
|
||||
if (next == hctx->seqt) {
|
||||
if (next == hctx->ccid2hctx_seqt) {
|
||||
if (ccid2_hc_tx_alloc_seq(hctx)) {
|
||||
DCCP_CRIT("packet history - out of memory!");
|
||||
/* FIXME: find a more graceful way to bail out */
|
||||
return;
|
||||
}
|
||||
next = hctx->seqh->ccid2s_next;
|
||||
BUG_ON(next == hctx->seqt);
|
||||
next = hctx->ccid2hctx_seqh->ccid2s_next;
|
||||
BUG_ON(next == hctx->ccid2hctx_seqt);
|
||||
}
|
||||
hctx->seqh = next;
|
||||
hctx->ccid2hctx_seqh = next;
|
||||
|
||||
ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->cwnd, hctx->pipe);
|
||||
ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->ccid2hctx_cwnd,
|
||||
hctx->ccid2hctx_pipe);
|
||||
|
||||
/*
|
||||
* FIXME: The code below is broken and the variables have been removed
|
||||
@@ -203,12 +272,12 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
|
||||
*/
|
||||
#if 0
|
||||
/* Ack Ratio. Need to maintain a concept of how many windows we sent */
|
||||
hctx->arsent++;
|
||||
hctx->ccid2hctx_arsent++;
|
||||
/* We had an ack loss in this window... */
|
||||
if (hctx->ackloss) {
|
||||
if (hctx->arsent >= hctx->cwnd) {
|
||||
hctx->arsent = 0;
|
||||
hctx->ackloss = 0;
|
||||
if (hctx->ccid2hctx_ackloss) {
|
||||
if (hctx->ccid2hctx_arsent >= hctx->ccid2hctx_cwnd) {
|
||||
hctx->ccid2hctx_arsent = 0;
|
||||
hctx->ccid2hctx_ackloss = 0;
|
||||
}
|
||||
} else {
|
||||
/* No acks lost up to now... */
|
||||
@@ -218,28 +287,28 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
|
||||
int denom = dp->dccps_l_ack_ratio * dp->dccps_l_ack_ratio -
|
||||
dp->dccps_l_ack_ratio;
|
||||
|
||||
denom = hctx->cwnd * hctx->cwnd / denom;
|
||||
denom = hctx->ccid2hctx_cwnd * hctx->ccid2hctx_cwnd / denom;
|
||||
|
||||
if (hctx->arsent >= denom) {
|
||||
if (hctx->ccid2hctx_arsent >= denom) {
|
||||
ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio - 1);
|
||||
hctx->arsent = 0;
|
||||
hctx->ccid2hctx_arsent = 0;
|
||||
}
|
||||
} else {
|
||||
/* we can't increase ack ratio further [1] */
|
||||
hctx->arsent = 0; /* or maybe set it to cwnd*/
|
||||
hctx->ccid2hctx_arsent = 0; /* or maybe set it to cwnd*/
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* setup RTO timer */
|
||||
if (!timer_pending(&hctx->rtotimer))
|
||||
sk_reset_timer(sk, &hctx->rtotimer, jiffies + hctx->rto);
|
||||
if (!timer_pending(&hctx->ccid2hctx_rtotimer))
|
||||
ccid2_start_rto_timer(sk);
|
||||
|
||||
#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
|
||||
do {
|
||||
struct ccid2_seq *seqp = hctx->seqt;
|
||||
struct ccid2_seq *seqp = hctx->ccid2hctx_seqt;
|
||||
|
||||
while (seqp != hctx->seqh) {
|
||||
while (seqp != hctx->ccid2hctx_seqh) {
|
||||
ccid2_pr_debug("out seq=%llu acked=%d time=%lu\n",
|
||||
(unsigned long long)seqp->ccid2s_seq,
|
||||
seqp->ccid2s_acked, seqp->ccid2s_sent);
|
||||
@@ -247,158 +316,205 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
|
||||
}
|
||||
} while (0);
|
||||
ccid2_pr_debug("=========\n");
|
||||
ccid2_hc_tx_check_sanity(hctx);
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
* ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm
|
||||
* This code is almost identical with TCP's tcp_rtt_estimator(), since
|
||||
* - it has a higher sampling frequency (recommended by RFC 1323),
|
||||
* - the RTO does not collapse into RTT due to RTTVAR going towards zero,
|
||||
* - it is simple (cf. more complex proposals such as Eifel timer or research
|
||||
* which suggests that the gain should be set according to window size),
|
||||
* - in tests it was found to work well with CCID2 [gerrit].
|
||||
/* XXX Lame code duplication!
|
||||
* returns -1 if none was found.
|
||||
* else returns the next offset to use in the function call.
|
||||
*/
|
||||
static void ccid2_rtt_estimator(struct sock *sk, const long mrtt)
|
||||
static int ccid2_ackvector(struct sock *sk, struct sk_buff *skb, int offset,
|
||||
unsigned char **vec, unsigned char *veclen)
|
||||
{
|
||||
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
|
||||
long m = mrtt ? : 1;
|
||||
const struct dccp_hdr *dh = dccp_hdr(skb);
|
||||
unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb);
|
||||
unsigned char *opt_ptr;
|
||||
const unsigned char *opt_end = (unsigned char *)dh +
|
||||
(dh->dccph_doff * 4);
|
||||
unsigned char opt, len;
|
||||
unsigned char *value;
|
||||
|
||||
if (hctx->srtt == 0) {
|
||||
/* First measurement m */
|
||||
hctx->srtt = m << 3;
|
||||
hctx->mdev = m << 1;
|
||||
BUG_ON(offset < 0);
|
||||
options += offset;
|
||||
opt_ptr = options;
|
||||
if (opt_ptr >= opt_end)
|
||||
return -1;
|
||||
|
||||
hctx->mdev_max = max(TCP_RTO_MIN, hctx->mdev);
|
||||
hctx->rttvar = hctx->mdev_max;
|
||||
hctx->rtt_seq = dccp_sk(sk)->dccps_gss;
|
||||
} else {
|
||||
/* Update scaled SRTT as SRTT += 1/8 * (m - SRTT) */
|
||||
m -= (hctx->srtt >> 3);
|
||||
hctx->srtt += m;
|
||||
while (opt_ptr != opt_end) {
|
||||
opt = *opt_ptr++;
|
||||
len = 0;
|
||||
value = NULL;
|
||||
|
||||
/* Similarly, update scaled mdev with regard to |m| */
|
||||
if (m < 0) {
|
||||
m = -m;
|
||||
m -= (hctx->mdev >> 2);
|
||||
/* Check if this isn't a single byte option */
|
||||
if (opt > DCCPO_MAX_RESERVED) {
|
||||
if (opt_ptr == opt_end)
|
||||
goto out_invalid_option;
|
||||
|
||||
len = *opt_ptr++;
|
||||
if (len < 3)
|
||||
goto out_invalid_option;
|
||||
/*
|
||||
* This neutralises RTO increase when RTT < SRTT - mdev
|
||||
* (see P. Sarolahti, A. Kuznetsov,"Congestion Control
|
||||
* in Linux TCP", USENIX 2002, pp. 49-62).
|
||||
* Remove the type and len fields, leaving
|
||||
* just the value size
|
||||
*/
|
||||
if (m > 0)
|
||||
m >>= 3;
|
||||
} else {
|
||||
m -= (hctx->mdev >> 2);
|
||||
}
|
||||
hctx->mdev += m;
|
||||
len -= 2;
|
||||
value = opt_ptr;
|
||||
opt_ptr += len;
|
||||
|
||||
if (hctx->mdev > hctx->mdev_max) {
|
||||
hctx->mdev_max = hctx->mdev;
|
||||
if (hctx->mdev_max > hctx->rttvar)
|
||||
hctx->rttvar = hctx->mdev_max;
|
||||
if (opt_ptr > opt_end)
|
||||
goto out_invalid_option;
|
||||
}
|
||||
|
||||
/*
|
||||
* Decay RTTVAR at most once per flight, exploiting that
|
||||
* 1) pipe <= cwnd <= Sequence_Window = W (RFC 4340, 7.5.2)
|
||||
* 2) AWL = GSS-W+1 <= GAR <= GSS (RFC 4340, 7.5.1)
|
||||
* GAR is a useful bound for FlightSize = pipe, AWL is probably
|
||||
* too low as it over-estimates pipe.
|
||||
*/
|
||||
if (after48(dccp_sk(sk)->dccps_gar, hctx->rtt_seq)) {
|
||||
if (hctx->mdev_max < hctx->rttvar)
|
||||
hctx->rttvar -= (hctx->rttvar -
|
||||
hctx->mdev_max) >> 2;
|
||||
hctx->rtt_seq = dccp_sk(sk)->dccps_gss;
|
||||
hctx->mdev_max = TCP_RTO_MIN;
|
||||
switch (opt) {
|
||||
case DCCPO_ACK_VECTOR_0:
|
||||
case DCCPO_ACK_VECTOR_1:
|
||||
*vec = value;
|
||||
*veclen = len;
|
||||
return offset + (opt_ptr - options);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Set RTO from SRTT and RTTVAR
|
||||
* Clock granularity is ignored since the minimum error for RTTVAR is
|
||||
* clamped to 50msec (corresponding to HZ=20). This leads to a minimum
|
||||
* RTO of 200msec. This agrees with TCP and RFC 4341, 5.: "Because DCCP
|
||||
* does not retransmit data, DCCP does not require TCP's recommended
|
||||
* minimum timeout of one second".
|
||||
*/
|
||||
hctx->rto = (hctx->srtt >> 3) + hctx->rttvar;
|
||||
return -1;
|
||||
|
||||
if (hctx->rto > DCCP_RTO_MAX)
|
||||
hctx->rto = DCCP_RTO_MAX;
|
||||
out_invalid_option:
|
||||
DCCP_BUG("Invalid option - this should not happen (previous parsing)!");
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp,
|
||||
unsigned int *maxincr)
|
||||
static void ccid2_hc_tx_kill_rto_timer(struct sock *sk)
|
||||
{
|
||||
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
|
||||
|
||||
if (hctx->cwnd < hctx->ssthresh) {
|
||||
if (*maxincr > 0 && ++hctx->packets_acked == 2) {
|
||||
hctx->cwnd += 1;
|
||||
*maxincr -= 1;
|
||||
hctx->packets_acked = 0;
|
||||
sk_stop_timer(sk, &hctx->ccid2hctx_rtotimer);
|
||||
ccid2_pr_debug("deleted RTO timer\n");
|
||||
}
|
||||
|
||||
static inline void ccid2_new_ack(struct sock *sk,
|
||||
struct ccid2_seq *seqp,
|
||||
unsigned int *maxincr)
|
||||
{
|
||||
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
|
||||
|
||||
if (hctx->ccid2hctx_cwnd < hctx->ccid2hctx_ssthresh) {
|
||||
if (*maxincr > 0 && ++hctx->ccid2hctx_packets_acked == 2) {
|
||||
hctx->ccid2hctx_cwnd += 1;
|
||||
*maxincr -= 1;
|
||||
hctx->ccid2hctx_packets_acked = 0;
|
||||
}
|
||||
} else if (++hctx->packets_acked >= hctx->cwnd) {
|
||||
hctx->cwnd += 1;
|
||||
hctx->packets_acked = 0;
|
||||
} else if (++hctx->ccid2hctx_packets_acked >= hctx->ccid2hctx_cwnd) {
|
||||
hctx->ccid2hctx_cwnd += 1;
|
||||
hctx->ccid2hctx_packets_acked = 0;
|
||||
}
|
||||
/*
|
||||
* FIXME: RTT is sampled several times per acknowledgment (for each
|
||||
* entry in the Ack Vector), instead of once per Ack (as in TCP SACK).
|
||||
* This causes the RTT to be over-estimated, since the older entries
|
||||
* in the Ack Vector have earlier sending times.
|
||||
* The cleanest solution is to not use the ccid2s_sent field at all
|
||||
* and instead use DCCP timestamps - need to be resolved at some time.
|
||||
*/
|
||||
ccid2_rtt_estimator(sk, jiffies - seqp->ccid2s_sent);
|
||||
|
||||
/* update RTO */
|
||||
if (hctx->ccid2hctx_srtt == -1 ||
|
||||
time_after(jiffies, hctx->ccid2hctx_lastrtt + hctx->ccid2hctx_srtt)) {
|
||||
unsigned long r = (long)jiffies - (long)seqp->ccid2s_sent;
|
||||
int s;
|
||||
|
||||
/* first measurement */
|
||||
if (hctx->ccid2hctx_srtt == -1) {
|
||||
ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n",
|
||||
r, jiffies,
|
||||
(unsigned long long)seqp->ccid2s_seq);
|
||||
ccid2_change_srtt(hctx, r);
|
||||
hctx->ccid2hctx_rttvar = r >> 1;
|
||||
} else {
|
||||
/* RTTVAR */
|
||||
long tmp = hctx->ccid2hctx_srtt - r;
|
||||
long srtt;
|
||||
|
||||
if (tmp < 0)
|
||||
tmp *= -1;
|
||||
|
||||
tmp >>= 2;
|
||||
hctx->ccid2hctx_rttvar *= 3;
|
||||
hctx->ccid2hctx_rttvar >>= 2;
|
||||
hctx->ccid2hctx_rttvar += tmp;
|
||||
|
||||
/* SRTT */
|
||||
srtt = hctx->ccid2hctx_srtt;
|
||||
srtt *= 7;
|
||||
srtt >>= 3;
|
||||
tmp = r >> 3;
|
||||
srtt += tmp;
|
||||
ccid2_change_srtt(hctx, srtt);
|
||||
}
|
||||
s = hctx->ccid2hctx_rttvar << 2;
|
||||
/* clock granularity is 1 when based on jiffies */
|
||||
if (!s)
|
||||
s = 1;
|
||||
hctx->ccid2hctx_rto = hctx->ccid2hctx_srtt + s;
|
||||
|
||||
/* must be at least a second */
|
||||
s = hctx->ccid2hctx_rto / HZ;
|
||||
/* DCCP doesn't require this [but I like it cuz my code sux] */
|
||||
#if 1
|
||||
if (s < 1)
|
||||
hctx->ccid2hctx_rto = HZ;
|
||||
#endif
|
||||
/* max 60 seconds */
|
||||
if (s > 60)
|
||||
hctx->ccid2hctx_rto = HZ * 60;
|
||||
|
||||
hctx->ccid2hctx_lastrtt = jiffies;
|
||||
|
||||
ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n",
|
||||
hctx->ccid2hctx_srtt, hctx->ccid2hctx_rttvar,
|
||||
hctx->ccid2hctx_rto, HZ, r);
|
||||
}
|
||||
|
||||
/* we got a new ack, so re-start RTO timer */
|
||||
ccid2_hc_tx_kill_rto_timer(sk);
|
||||
ccid2_start_rto_timer(sk);
|
||||
}
|
||||
|
||||
static void ccid2_hc_tx_dec_pipe(struct sock *sk)
|
||||
{
|
||||
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
|
||||
|
||||
if (hctx->ccid2hctx_pipe == 0)
|
||||
DCCP_BUG("pipe == 0");
|
||||
else
|
||||
hctx->ccid2hctx_pipe--;
|
||||
|
||||
if (hctx->ccid2hctx_pipe == 0)
|
||||
ccid2_hc_tx_kill_rto_timer(sk);
|
||||
}
|
||||
|
||||
static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
|
||||
{
|
||||
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
|
||||
|
||||
if (time_before(seqp->ccid2s_sent, hctx->last_cong)) {
|
||||
if (time_before(seqp->ccid2s_sent, hctx->ccid2hctx_last_cong)) {
|
||||
ccid2_pr_debug("Multiple losses in an RTT---treating as one\n");
|
||||
return;
|
||||
}
|
||||
|
||||
hctx->last_cong = jiffies;
|
||||
hctx->ccid2hctx_last_cong = jiffies;
|
||||
|
||||
hctx->cwnd = hctx->cwnd / 2 ? : 1U;
|
||||
hctx->ssthresh = max(hctx->cwnd, 2U);
|
||||
hctx->ccid2hctx_cwnd = hctx->ccid2hctx_cwnd / 2 ? : 1U;
|
||||
hctx->ccid2hctx_ssthresh = max(hctx->ccid2hctx_cwnd, 2U);
|
||||
|
||||
/* Avoid spurious timeouts resulting from Ack Ratio > cwnd */
|
||||
if (dccp_sk(sk)->dccps_l_ack_ratio > hctx->cwnd)
|
||||
ccid2_change_l_ack_ratio(sk, hctx->cwnd);
|
||||
}
|
||||
|
||||
static int ccid2_hc_tx_parse_options(struct sock *sk, u8 packet_type,
|
||||
u8 option, u8 *optval, u8 optlen)
|
||||
{
|
||||
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
|
||||
|
||||
switch (option) {
|
||||
case DCCPO_ACK_VECTOR_0:
|
||||
case DCCPO_ACK_VECTOR_1:
|
||||
return dccp_ackvec_parsed_add(&hctx->av_chunks, optval, optlen,
|
||||
option - DCCPO_ACK_VECTOR_0);
|
||||
}
|
||||
return 0;
|
||||
if (dccp_sk(sk)->dccps_l_ack_ratio > hctx->ccid2hctx_cwnd)
|
||||
ccid2_change_l_ack_ratio(sk, hctx->ccid2hctx_cwnd);
|
||||
}
|
||||
|
||||
static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
|
||||
{
|
||||
struct dccp_sock *dp = dccp_sk(sk);
|
||||
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
|
||||
const bool sender_was_blocked = ccid2_cwnd_network_limited(hctx);
|
||||
struct dccp_ackvec_parsed *avp;
|
||||
u64 ackno, seqno;
|
||||
struct ccid2_seq *seqp;
|
||||
unsigned char *vector;
|
||||
unsigned char veclen;
|
||||
int offset = 0;
|
||||
int done = 0;
|
||||
unsigned int maxincr = 0;
|
||||
|
||||
ccid2_hc_tx_check_sanity(hctx);
|
||||
/* check reverse path congestion */
|
||||
seqno = DCCP_SKB_CB(skb)->dccpd_seq;
|
||||
|
||||
@@ -407,21 +523,21 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
|
||||
* -sorbo.
|
||||
*/
|
||||
/* need to bootstrap */
|
||||
if (hctx->rpdupack == -1) {
|
||||
hctx->rpdupack = 0;
|
||||
hctx->rpseq = seqno;
|
||||
if (hctx->ccid2hctx_rpdupack == -1) {
|
||||
hctx->ccid2hctx_rpdupack = 0;
|
||||
hctx->ccid2hctx_rpseq = seqno;
|
||||
} else {
|
||||
/* check if packet is consecutive */
|
||||
if (dccp_delta_seqno(hctx->rpseq, seqno) == 1)
|
||||
hctx->rpseq = seqno;
|
||||
if (dccp_delta_seqno(hctx->ccid2hctx_rpseq, seqno) == 1)
|
||||
hctx->ccid2hctx_rpseq = seqno;
|
||||
/* it's a later packet */
|
||||
else if (after48(seqno, hctx->rpseq)) {
|
||||
hctx->rpdupack++;
|
||||
else if (after48(seqno, hctx->ccid2hctx_rpseq)) {
|
||||
hctx->ccid2hctx_rpdupack++;
|
||||
|
||||
/* check if we got enough dupacks */
|
||||
if (hctx->rpdupack >= NUMDUPACK) {
|
||||
hctx->rpdupack = -1; /* XXX lame */
|
||||
hctx->rpseq = 0;
|
||||
if (hctx->ccid2hctx_rpdupack >= NUMDUPACK) {
|
||||
hctx->ccid2hctx_rpdupack = -1; /* XXX lame */
|
||||
hctx->ccid2hctx_rpseq = 0;
|
||||
|
||||
ccid2_change_l_ack_ratio(sk, 2 * dp->dccps_l_ack_ratio);
|
||||
}
|
||||
@@ -429,22 +545,27 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
|
||||
}
|
||||
|
||||
/* check forward path congestion */
|
||||
if (dccp_packet_without_ack(skb))
|
||||
/* still didn't send out new data packets */
|
||||
if (hctx->ccid2hctx_seqh == hctx->ccid2hctx_seqt)
|
||||
return;
|
||||
|
||||
/* still didn't send out new data packets */
|
||||
if (hctx->seqh == hctx->seqt)
|
||||
goto done;
|
||||
switch (DCCP_SKB_CB(skb)->dccpd_type) {
|
||||
case DCCP_PKT_ACK:
|
||||
case DCCP_PKT_DATAACK:
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
|
||||
ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq;
|
||||
if (after48(ackno, hctx->high_ack))
|
||||
hctx->high_ack = ackno;
|
||||
if (after48(ackno, hctx->ccid2hctx_high_ack))
|
||||
hctx->ccid2hctx_high_ack = ackno;
|
||||
|
||||
seqp = hctx->seqt;
|
||||
seqp = hctx->ccid2hctx_seqt;
|
||||
while (before48(seqp->ccid2s_seq, ackno)) {
|
||||
seqp = seqp->ccid2s_next;
|
||||
if (seqp == hctx->seqh) {
|
||||
seqp = hctx->seqh->ccid2s_prev;
|
||||
if (seqp == hctx->ccid2hctx_seqh) {
|
||||
seqp = hctx->ccid2hctx_seqh->ccid2s_prev;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -454,26 +575,26 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
|
||||
* packets per acknowledgement. Rounding up avoids that cwnd is not
|
||||
* advanced when Ack Ratio is 1 and gives a slight edge otherwise.
|
||||
*/
|
||||
if (hctx->cwnd < hctx->ssthresh)
|
||||
if (hctx->ccid2hctx_cwnd < hctx->ccid2hctx_ssthresh)
|
||||
maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2);
|
||||
|
||||
/* go through all ack vectors */
|
||||
list_for_each_entry(avp, &hctx->av_chunks, node) {
|
||||
while ((offset = ccid2_ackvector(sk, skb, offset,
|
||||
&vector, &veclen)) != -1) {
|
||||
/* go through this ack vector */
|
||||
for (; avp->len--; avp->vec++) {
|
||||
u64 ackno_end_rl = SUB48(ackno,
|
||||
dccp_ackvec_runlen(avp->vec));
|
||||
while (veclen--) {
|
||||
const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
|
||||
u64 ackno_end_rl = SUB48(ackno, rl);
|
||||
|
||||
ccid2_pr_debug("ackvec %llu |%u,%u|\n",
|
||||
ccid2_pr_debug("ackvec start:%llu end:%llu\n",
|
||||
(unsigned long long)ackno,
|
||||
dccp_ackvec_state(avp->vec) >> 6,
|
||||
dccp_ackvec_runlen(avp->vec));
|
||||
(unsigned long long)ackno_end_rl);
|
||||
/* if the seqno we are analyzing is larger than the
|
||||
* current ackno, then move towards the tail of our
|
||||
* seqnos.
|
||||
*/
|
||||
while (after48(seqp->ccid2s_seq, ackno)) {
|
||||
if (seqp == hctx->seqt) {
|
||||
if (seqp == hctx->ccid2hctx_seqt) {
|
||||
done = 1;
|
||||
break;
|
||||
}
|
||||
@@ -486,24 +607,26 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
|
||||
* run length
|
||||
*/
|
||||
while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) {
|
||||
const u8 state = dccp_ackvec_state(avp->vec);
|
||||
const u8 state = *vector &
|
||||
DCCP_ACKVEC_STATE_MASK;
|
||||
|
||||
/* new packet received or marked */
|
||||
if (state != DCCPAV_NOT_RECEIVED &&
|
||||
if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED &&
|
||||
!seqp->ccid2s_acked) {
|
||||
if (state == DCCPAV_ECN_MARKED)
|
||||
if (state ==
|
||||
DCCP_ACKVEC_STATE_ECN_MARKED) {
|
||||
ccid2_congestion_event(sk,
|
||||
seqp);
|
||||
else
|
||||
} else
|
||||
ccid2_new_ack(sk, seqp,
|
||||
&maxincr);
|
||||
|
||||
seqp->ccid2s_acked = 1;
|
||||
ccid2_pr_debug("Got ack for %llu\n",
|
||||
(unsigned long long)seqp->ccid2s_seq);
|
||||
hctx->pipe--;
|
||||
ccid2_hc_tx_dec_pipe(sk);
|
||||
}
|
||||
if (seqp == hctx->seqt) {
|
||||
if (seqp == hctx->ccid2hctx_seqt) {
|
||||
done = 1;
|
||||
break;
|
||||
}
|
||||
@@ -513,6 +636,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
|
||||
break;
|
||||
|
||||
ackno = SUB48(ackno_end_rl, 1);
|
||||
vector++;
|
||||
}
|
||||
if (done)
|
||||
break;
|
||||
@@ -521,11 +645,11 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
|
||||
/* The state about what is acked should be correct now
|
||||
* Check for NUMDUPACK
|
||||
*/
|
||||
seqp = hctx->seqt;
|
||||
while (before48(seqp->ccid2s_seq, hctx->high_ack)) {
|
||||
seqp = hctx->ccid2hctx_seqt;
|
||||
while (before48(seqp->ccid2s_seq, hctx->ccid2hctx_high_ack)) {
|
||||
seqp = seqp->ccid2s_next;
|
||||
if (seqp == hctx->seqh) {
|
||||
seqp = hctx->seqh->ccid2s_prev;
|
||||
if (seqp == hctx->ccid2hctx_seqh) {
|
||||
seqp = hctx->ccid2hctx_seqh->ccid2s_prev;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -536,7 +660,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
|
||||
if (done == NUMDUPACK)
|
||||
break;
|
||||
}
|
||||
if (seqp == hctx->seqt)
|
||||
if (seqp == hctx->ccid2hctx_seqt)
|
||||
break;
|
||||
seqp = seqp->ccid2s_prev;
|
||||
}
|
||||
@@ -557,34 +681,25 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
|
||||
* one ack vector.
|
||||
*/
|
||||
ccid2_congestion_event(sk, seqp);
|
||||
hctx->pipe--;
|
||||
ccid2_hc_tx_dec_pipe(sk);
|
||||
}
|
||||
if (seqp == hctx->seqt)
|
||||
if (seqp == hctx->ccid2hctx_seqt)
|
||||
break;
|
||||
seqp = seqp->ccid2s_prev;
|
||||
}
|
||||
|
||||
hctx->seqt = last_acked;
|
||||
hctx->ccid2hctx_seqt = last_acked;
|
||||
}
|
||||
|
||||
/* trim acked packets in tail */
|
||||
while (hctx->seqt != hctx->seqh) {
|
||||
if (!hctx->seqt->ccid2s_acked)
|
||||
while (hctx->ccid2hctx_seqt != hctx->ccid2hctx_seqh) {
|
||||
if (!hctx->ccid2hctx_seqt->ccid2s_acked)
|
||||
break;
|
||||
|
||||
hctx->seqt = hctx->seqt->ccid2s_next;
|
||||
hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqt->ccid2s_next;
|
||||
}
|
||||
|
||||
/* restart RTO timer if not all outstanding data has been acked */
|
||||
if (hctx->pipe == 0)
|
||||
sk_stop_timer(sk, &hctx->rtotimer);
|
||||
else
|
||||
sk_reset_timer(sk, &hctx->rtotimer, jiffies + hctx->rto);
|
||||
done:
|
||||
/* check if incoming Acks allow pending packets to be sent */
|
||||
if (sender_was_blocked && !ccid2_cwnd_network_limited(hctx))
|
||||
tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet);
|
||||
dccp_ackvec_parsed_cleanup(&hctx->av_chunks);
|
||||
ccid2_hc_tx_check_sanity(hctx);
|
||||
}
|
||||
|
||||
static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
|
||||
@@ -594,13 +709,17 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
|
||||
u32 max_ratio;
|
||||
|
||||
/* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */
|
||||
hctx->ssthresh = ~0U;
|
||||
hctx->ccid2hctx_ssthresh = ~0U;
|
||||
|
||||
/* Use larger initial windows (RFC 3390, rfc2581bis) */
|
||||
hctx->cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache);
|
||||
/*
|
||||
* RFC 4341, 5: "The cwnd parameter is initialized to at most four
|
||||
* packets for new connections, following the rules from [RFC3390]".
|
||||
* We need to convert the bytes of RFC3390 into the packets of RFC 4341.
|
||||
*/
|
||||
hctx->ccid2hctx_cwnd = clamp(4380U / dp->dccps_mss_cache, 2U, 4U);
|
||||
|
||||
/* Make sure that Ack Ratio is enabled and within bounds. */
|
||||
max_ratio = DIV_ROUND_UP(hctx->cwnd, 2);
|
||||
max_ratio = DIV_ROUND_UP(hctx->ccid2hctx_cwnd, 2);
|
||||
if (dp->dccps_l_ack_ratio == 0 || dp->dccps_l_ack_ratio > max_ratio)
|
||||
dp->dccps_l_ack_ratio = max_ratio;
|
||||
|
||||
@@ -608,11 +727,15 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
|
||||
if (ccid2_hc_tx_alloc_seq(hctx))
|
||||
return -ENOMEM;
|
||||
|
||||
hctx->rto = DCCP_TIMEOUT_INIT;
|
||||
hctx->rpdupack = -1;
|
||||
hctx->last_cong = jiffies;
|
||||
setup_timer(&hctx->rtotimer, ccid2_hc_tx_rto_expire, (unsigned long)sk);
|
||||
INIT_LIST_HEAD(&hctx->av_chunks);
|
||||
hctx->ccid2hctx_rto = 3 * HZ;
|
||||
ccid2_change_srtt(hctx, -1);
|
||||
hctx->ccid2hctx_rttvar = -1;
|
||||
hctx->ccid2hctx_rpdupack = -1;
|
||||
hctx->ccid2hctx_last_cong = jiffies;
|
||||
setup_timer(&hctx->ccid2hctx_rtotimer, ccid2_hc_tx_rto_expire,
|
||||
(unsigned long)sk);
|
||||
|
||||
ccid2_hc_tx_check_sanity(hctx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -621,11 +744,11 @@ static void ccid2_hc_tx_exit(struct sock *sk)
|
||||
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
|
||||
int i;
|
||||
|
||||
sk_stop_timer(sk, &hctx->rtotimer);
|
||||
ccid2_hc_tx_kill_rto_timer(sk);
|
||||
|
||||
for (i = 0; i < hctx->seqbufc; i++)
|
||||
kfree(hctx->seqbuf[i]);
|
||||
hctx->seqbufc = 0;
|
||||
for (i = 0; i < hctx->ccid2hctx_seqbufc; i++)
|
||||
kfree(hctx->ccid2hctx_seqbuf[i]);
|
||||
hctx->ccid2hctx_seqbufc = 0;
|
||||
}
|
||||
|
||||
static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
|
||||
@@ -636,28 +759,27 @@ static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
|
||||
switch (DCCP_SKB_CB(skb)->dccpd_type) {
|
||||
case DCCP_PKT_DATA:
|
||||
case DCCP_PKT_DATAACK:
|
||||
hcrx->data++;
|
||||
if (hcrx->data >= dp->dccps_r_ack_ratio) {
|
||||
hcrx->ccid2hcrx_data++;
|
||||
if (hcrx->ccid2hcrx_data >= dp->dccps_r_ack_ratio) {
|
||||
dccp_send_ack(sk);
|
||||
hcrx->data = 0;
|
||||
hcrx->ccid2hcrx_data = 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static struct ccid_operations ccid2 = {
|
||||
.ccid_id = DCCPC_CCID2,
|
||||
.ccid_name = "TCP-like",
|
||||
.ccid_owner = THIS_MODULE,
|
||||
.ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock),
|
||||
.ccid_hc_tx_init = ccid2_hc_tx_init,
|
||||
.ccid_hc_tx_exit = ccid2_hc_tx_exit,
|
||||
.ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet,
|
||||
.ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent,
|
||||
.ccid_hc_tx_parse_options = ccid2_hc_tx_parse_options,
|
||||
.ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv,
|
||||
.ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock),
|
||||
.ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv,
|
||||
.ccid_id = DCCPC_CCID2,
|
||||
.ccid_name = "TCP-like",
|
||||
.ccid_owner = THIS_MODULE,
|
||||
.ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock),
|
||||
.ccid_hc_tx_init = ccid2_hc_tx_init,
|
||||
.ccid_hc_tx_exit = ccid2_hc_tx_exit,
|
||||
.ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet,
|
||||
.ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent,
|
||||
.ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv,
|
||||
.ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock),
|
||||
.ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
|
||||
|
@@ -42,49 +42,34 @@ struct ccid2_seq {
|
||||
|
||||
/** struct ccid2_hc_tx_sock - CCID2 TX half connection
|
||||
*
|
||||
* @{cwnd,ssthresh,pipe}: as per RFC 4341, section 5
|
||||
* @packets_acked: Ack counter for deriving cwnd growth (RFC 3465)
|
||||
* @srtt: smoothed RTT estimate, scaled by 2^3
|
||||
* @mdev: smoothed RTT variation, scaled by 2^2
|
||||
* @mdev_max: maximum of @mdev during one flight
|
||||
* @rttvar: moving average/maximum of @mdev_max
|
||||
* @rto: RTO value deriving from SRTT and RTTVAR (RFC 2988)
|
||||
* @rtt_seq: to decay RTTVAR at most once per flight
|
||||
* @rpseq: last consecutive seqno
|
||||
* @rpdupack: dupacks since rpseq
|
||||
* @av_chunks: list of Ack Vectors received on current skb
|
||||
*/
|
||||
* @ccid2hctx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5
|
||||
* @ccid2hctx_packets_acked - Ack counter for deriving cwnd growth (RFC 3465)
|
||||
* @ccid2hctx_lastrtt -time RTT was last measured
|
||||
* @ccid2hctx_rpseq - last consecutive seqno
|
||||
* @ccid2hctx_rpdupack - dupacks since rpseq
|
||||
*/
|
||||
struct ccid2_hc_tx_sock {
|
||||
u32 cwnd;
|
||||
u32 ssthresh;
|
||||
u32 pipe;
|
||||
u32 packets_acked;
|
||||
struct ccid2_seq *seqbuf[CCID2_SEQBUF_MAX];
|
||||
int seqbufc;
|
||||
struct ccid2_seq *seqh;
|
||||
struct ccid2_seq *seqt;
|
||||
/* RTT measurement: variables/principles are the same as in TCP */
|
||||
u32 srtt,
|
||||
mdev,
|
||||
mdev_max,
|
||||
rttvar,
|
||||
rto;
|
||||
u64 rtt_seq:48;
|
||||
struct timer_list rtotimer;
|
||||
u64 rpseq;
|
||||
int rpdupack;
|
||||
unsigned long last_cong;
|
||||
u64 high_ack;
|
||||
struct list_head av_chunks;
|
||||
u32 ccid2hctx_cwnd;
|
||||
u32 ccid2hctx_ssthresh;
|
||||
u32 ccid2hctx_pipe;
|
||||
u32 ccid2hctx_packets_acked;
|
||||
struct ccid2_seq *ccid2hctx_seqbuf[CCID2_SEQBUF_MAX];
|
||||
int ccid2hctx_seqbufc;
|
||||
struct ccid2_seq *ccid2hctx_seqh;
|
||||
struct ccid2_seq *ccid2hctx_seqt;
|
||||
long ccid2hctx_rto;
|
||||
long ccid2hctx_srtt;
|
||||
long ccid2hctx_rttvar;
|
||||
unsigned long ccid2hctx_lastrtt;
|
||||
struct timer_list ccid2hctx_rtotimer;
|
||||
u64 ccid2hctx_rpseq;
|
||||
int ccid2hctx_rpdupack;
|
||||
unsigned long ccid2hctx_last_cong;
|
||||
u64 ccid2hctx_high_ack;
|
||||
};
|
||||
|
||||
static inline bool ccid2_cwnd_network_limited(struct ccid2_hc_tx_sock *hctx)
|
||||
{
|
||||
return (hctx->pipe >= hctx->cwnd);
|
||||
}
|
||||
|
||||
struct ccid2_hc_rx_sock {
|
||||
int data;
|
||||
int ccid2hcrx_data;
|
||||
};
|
||||
|
||||
static inline struct ccid2_hc_tx_sock *ccid2_hc_tx_sk(const struct sock *sk)
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -47,22 +47,11 @@
|
||||
/* Two seconds as per RFC 3448 4.2 */
|
||||
#define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC)
|
||||
|
||||
/* Maximum backoff interval t_mbi (RFC 3448, 4.3) */
|
||||
#define TFRC_T_MBI (64 * USEC_PER_SEC)
|
||||
/* In usecs - half the scheduling granularity as per RFC3448 4.6 */
|
||||
#define TFRC_OPSYS_HALF_TIME_GRAN (USEC_PER_SEC / (2 * HZ))
|
||||
|
||||
/*
|
||||
* The t_delta parameter (RFC 3448, 4.6): delays of less than %USEC_PER_MSEC are
|
||||
* rounded down to 0, since sk_reset_timer() here uses millisecond granularity.
|
||||
* Hence we can use a constant t_delta = %USEC_PER_MSEC when HZ >= 500. A coarse
|
||||
* resolution of HZ < 500 means that the error is below one timer tick (t_gran)
|
||||
* when using the constant t_delta = t_gran / 2 = %USEC_PER_SEC / (2 * HZ).
|
||||
*/
|
||||
#if (HZ >= 500)
|
||||
# define TFRC_T_DELTA USEC_PER_MSEC
|
||||
#else
|
||||
# define TFRC_T_DELTA (USEC_PER_SEC / (2 * HZ))
|
||||
#warning Coarse CONFIG_HZ resolution -- higher value recommended for TFRC.
|
||||
#endif
|
||||
/* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */
|
||||
#define TFRC_T_MBI 64
|
||||
|
||||
enum ccid3_options {
|
||||
TFRC_OPT_LOSS_EVENT_RATE = 192,
|
||||
@@ -70,43 +59,62 @@ enum ccid3_options {
|
||||
TFRC_OPT_RECEIVE_RATE = 194,
|
||||
};
|
||||
|
||||
struct ccid3_options_received {
|
||||
u64 ccid3or_seqno:48,
|
||||
ccid3or_loss_intervals_idx:16;
|
||||
u16 ccid3or_loss_intervals_len;
|
||||
u32 ccid3or_loss_event_rate;
|
||||
u32 ccid3or_receive_rate;
|
||||
};
|
||||
|
||||
/* TFRC sender states */
|
||||
enum ccid3_hc_tx_states {
|
||||
TFRC_SSTATE_NO_SENT = 1,
|
||||
TFRC_SSTATE_NO_FBACK,
|
||||
TFRC_SSTATE_FBACK,
|
||||
TFRC_SSTATE_TERM,
|
||||
};
|
||||
|
||||
/** struct ccid3_hc_tx_sock - CCID3 sender half-connection socket
|
||||
*
|
||||
* @x - Current sending rate in 64 * bytes per second
|
||||
* @x_recv - Receive rate in 64 * bytes per second
|
||||
* @x_calc - Calculated rate in bytes per second
|
||||
* @rtt - Estimate of current round trip time in usecs
|
||||
* @r_sqmean - Estimate of long-term RTT (RFC 3448, 4.5)
|
||||
* @p - Current loss event rate (0-1) scaled by 1000000
|
||||
* @s - Packet size in bytes
|
||||
* @t_rto - Nofeedback Timer setting in usecs
|
||||
* @t_ipi - Interpacket (send) interval (RFC 3448, 4.6) in usecs
|
||||
* @feedback - Whether feedback has been received or not
|
||||
* @last_win_count - Last window counter sent
|
||||
* @t_last_win_count - Timestamp of earliest packet with
|
||||
* last_win_count value sent
|
||||
* @no_feedback_timer - Handle to no feedback timer
|
||||
* @t_ld - Time last doubled during slow start
|
||||
* @t_nom - Nominal send time of next packet
|
||||
* @hist - Packet history
|
||||
* @ccid3hctx_x - Current sending rate in 64 * bytes per second
|
||||
* @ccid3hctx_x_recv - Receive rate in 64 * bytes per second
|
||||
* @ccid3hctx_x_calc - Calculated rate in bytes per second
|
||||
* @ccid3hctx_rtt - Estimate of current round trip time in usecs
|
||||
* @ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000
|
||||
* @ccid3hctx_s - Packet size in bytes
|
||||
* @ccid3hctx_t_rto - Nofeedback Timer setting in usecs
|
||||
* @ccid3hctx_t_ipi - Interpacket (send) interval (RFC 3448, 4.6) in usecs
|
||||
* @ccid3hctx_state - Sender state, one of %ccid3_hc_tx_states
|
||||
* @ccid3hctx_last_win_count - Last window counter sent
|
||||
* @ccid3hctx_t_last_win_count - Timestamp of earliest packet
|
||||
* with last_win_count value sent
|
||||
* @ccid3hctx_no_feedback_timer - Handle to no feedback timer
|
||||
* @ccid3hctx_t_ld - Time last doubled during slow start
|
||||
* @ccid3hctx_t_nom - Nominal send time of next packet
|
||||
* @ccid3hctx_delta - Send timer delta (RFC 3448, 4.6) in usecs
|
||||
* @ccid3hctx_hist - Packet history
|
||||
* @ccid3hctx_options_received - Parsed set of retrieved options
|
||||
*/
|
||||
struct ccid3_hc_tx_sock {
|
||||
u64 x;
|
||||
u64 x_recv;
|
||||
u32 x_calc;
|
||||
u32 rtt;
|
||||
u16 r_sqmean;
|
||||
u32 p;
|
||||
u32 t_rto;
|
||||
u32 t_ipi;
|
||||
u16 s;
|
||||
bool feedback:1;
|
||||
u8 last_win_count;
|
||||
ktime_t t_last_win_count;
|
||||
struct timer_list no_feedback_timer;
|
||||
ktime_t t_ld;
|
||||
ktime_t t_nom;
|
||||
struct tfrc_tx_hist_entry *hist;
|
||||
struct tfrc_tx_info ccid3hctx_tfrc;
|
||||
#define ccid3hctx_x ccid3hctx_tfrc.tfrctx_x
|
||||
#define ccid3hctx_x_recv ccid3hctx_tfrc.tfrctx_x_recv
|
||||
#define ccid3hctx_x_calc ccid3hctx_tfrc.tfrctx_x_calc
|
||||
#define ccid3hctx_rtt ccid3hctx_tfrc.tfrctx_rtt
|
||||
#define ccid3hctx_p ccid3hctx_tfrc.tfrctx_p
|
||||
#define ccid3hctx_t_rto ccid3hctx_tfrc.tfrctx_rto
|
||||
#define ccid3hctx_t_ipi ccid3hctx_tfrc.tfrctx_ipi
|
||||
u16 ccid3hctx_s;
|
||||
enum ccid3_hc_tx_states ccid3hctx_state:8;
|
||||
u8 ccid3hctx_last_win_count;
|
||||
ktime_t ccid3hctx_t_last_win_count;
|
||||
struct timer_list ccid3hctx_no_feedback_timer;
|
||||
ktime_t ccid3hctx_t_ld;
|
||||
ktime_t ccid3hctx_t_nom;
|
||||
u32 ccid3hctx_delta;
|
||||
struct tfrc_tx_hist_entry *ccid3hctx_hist;
|
||||
struct ccid3_options_received ccid3hctx_options_received;
|
||||
};
|
||||
|
||||
static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk)
|
||||
@@ -116,32 +124,41 @@ static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk)
|
||||
return hctx;
|
||||
}
|
||||
|
||||
|
||||
enum ccid3_fback_type {
|
||||
CCID3_FBACK_NONE = 0,
|
||||
CCID3_FBACK_INITIAL,
|
||||
CCID3_FBACK_PERIODIC,
|
||||
CCID3_FBACK_PARAM_CHANGE
|
||||
/* TFRC receiver states */
|
||||
enum ccid3_hc_rx_states {
|
||||
TFRC_RSTATE_NO_DATA = 1,
|
||||
TFRC_RSTATE_DATA,
|
||||
TFRC_RSTATE_TERM = 127,
|
||||
};
|
||||
|
||||
/** struct ccid3_hc_rx_sock - CCID3 receiver half-connection socket
|
||||
*
|
||||
* @last_counter - Tracks window counter (RFC 4342, 8.1)
|
||||
* @feedback - The type of the feedback last sent
|
||||
* @x_recv - Receiver estimate of send rate (RFC 3448, sec. 4.3)
|
||||
* @tstamp_last_feedback - Time at which last feedback was sent
|
||||
* @hist - Packet history (loss detection + RTT sampling)
|
||||
* @li_hist - Loss Interval database
|
||||
* @p_inverse - Inverse of Loss Event Rate (RFC 4342, sec. 8.5)
|
||||
* @ccid3hcrx_x_recv - Receiver estimate of send rate (RFC 3448 4.3)
|
||||
* @ccid3hcrx_rtt - Receiver estimate of rtt (non-standard)
|
||||
* @ccid3hcrx_p - Current loss event rate (RFC 3448 5.4)
|
||||
* @ccid3hcrx_last_counter - Tracks window counter (RFC 4342, 8.1)
|
||||
* @ccid3hcrx_state - Receiver state, one of %ccid3_hc_rx_states
|
||||
* @ccid3hcrx_bytes_recv - Total sum of DCCP payload bytes
|
||||
* @ccid3hcrx_x_recv - Receiver estimate of send rate (RFC 3448, sec. 4.3)
|
||||
* @ccid3hcrx_rtt - Receiver estimate of RTT
|
||||
* @ccid3hcrx_tstamp_last_feedback - Time at which last feedback was sent
|
||||
* @ccid3hcrx_tstamp_last_ack - Time at which last feedback was sent
|
||||
* @ccid3hcrx_hist - Packet history (loss detection + RTT sampling)
|
||||
* @ccid3hcrx_li_hist - Loss Interval database
|
||||
* @ccid3hcrx_s - Received packet size in bytes
|
||||
* @ccid3hcrx_pinv - Inverse of Loss Event Rate (RFC 4342, sec. 8.5)
|
||||
*/
|
||||
struct ccid3_hc_rx_sock {
|
||||
u8 last_counter:4;
|
||||
enum ccid3_fback_type feedback:4;
|
||||
u32 x_recv;
|
||||
ktime_t tstamp_last_feedback;
|
||||
struct tfrc_rx_hist hist;
|
||||
struct tfrc_loss_hist li_hist;
|
||||
#define p_inverse li_hist.i_mean
|
||||
u8 ccid3hcrx_last_counter:4;
|
||||
enum ccid3_hc_rx_states ccid3hcrx_state:8;
|
||||
u32 ccid3hcrx_bytes_recv;
|
||||
u32 ccid3hcrx_x_recv;
|
||||
u32 ccid3hcrx_rtt;
|
||||
ktime_t ccid3hcrx_tstamp_last_feedback;
|
||||
struct tfrc_rx_hist ccid3hcrx_hist;
|
||||
struct tfrc_loss_hist ccid3hcrx_li_hist;
|
||||
u16 ccid3hcrx_s;
|
||||
#define ccid3hcrx_pinv ccid3hcrx_li_hist.i_mean
|
||||
};
|
||||
|
||||
static inline struct ccid3_hc_rx_sock *ccid3_hc_rx_sk(const struct sock *sk)
|
||||
|
@@ -86,26 +86,21 @@ static void tfrc_lh_calc_i_mean(struct tfrc_loss_hist *lh)
|
||||
|
||||
/**
|
||||
* tfrc_lh_update_i_mean - Update the `open' loss interval I_0
|
||||
* This updates I_mean as the sequence numbers increase. As a consequence, the
|
||||
* open loss interval I_0 increases, hence p = W_tot/max(I_tot0, I_tot1)
|
||||
* decreases, and thus there is no need to send renewed feedback.
|
||||
* For recomputing p: returns `true' if p > p_prev <=> 1/p < 1/p_prev
|
||||
*/
|
||||
void tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
|
||||
u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
|
||||
{
|
||||
struct tfrc_loss_interval *cur = tfrc_lh_peek(lh);
|
||||
u32 old_i_mean = lh->i_mean;
|
||||
s64 len;
|
||||
|
||||
if (cur == NULL) /* not initialised */
|
||||
return;
|
||||
|
||||
/* FIXME: should probably also count non-data packets (RFC 4342, 6.1) */
|
||||
if (!dccp_data_packet(skb))
|
||||
return;
|
||||
return 0;
|
||||
|
||||
len = dccp_delta_seqno(cur->li_seqno, DCCP_SKB_CB(skb)->dccpd_seq) + 1;
|
||||
|
||||
if (len - (s64)cur->li_length <= 0) /* duplicate or reordered */
|
||||
return;
|
||||
return 0;
|
||||
|
||||
if (SUB16(dccp_hdr(skb)->dccph_ccval, cur->li_ccval) > 4)
|
||||
/*
|
||||
@@ -119,11 +114,14 @@ void tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
|
||||
cur->li_is_closed = 1;
|
||||
|
||||
if (tfrc_lh_length(lh) == 1) /* due to RFC 3448, 6.3.1 */
|
||||
return;
|
||||
return 0;
|
||||
|
||||
cur->li_length = len;
|
||||
tfrc_lh_calc_i_mean(lh);
|
||||
|
||||
return (lh->i_mean < old_i_mean);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tfrc_lh_update_i_mean);
|
||||
|
||||
/* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */
|
||||
static inline u8 tfrc_lh_is_new_loss(struct tfrc_loss_interval *cur,
|
||||
@@ -140,18 +138,18 @@ static inline u8 tfrc_lh_is_new_loss(struct tfrc_loss_interval *cur,
|
||||
* @sk: Used by @calc_first_li in caller-specific way (subtyping)
|
||||
* Updates I_mean and returns 1 if a new interval has in fact been added to @lh.
|
||||
*/
|
||||
bool tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh,
|
||||
u32 (*calc_first_li)(struct sock *), struct sock *sk)
|
||||
int tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh,
|
||||
u32 (*calc_first_li)(struct sock *), struct sock *sk)
|
||||
{
|
||||
struct tfrc_loss_interval *cur = tfrc_lh_peek(lh), *new;
|
||||
|
||||
if (cur != NULL && !tfrc_lh_is_new_loss(cur, tfrc_rx_hist_loss_prev(rh)))
|
||||
return false;
|
||||
return 0;
|
||||
|
||||
new = tfrc_lh_demand_next(lh);
|
||||
if (unlikely(new == NULL)) {
|
||||
DCCP_CRIT("Cannot allocate/add loss record.");
|
||||
return false;
|
||||
return 0;
|
||||
}
|
||||
|
||||
new->li_seqno = tfrc_rx_hist_loss_prev(rh)->tfrchrx_seqno;
|
||||
@@ -169,7 +167,7 @@ bool tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh,
|
||||
|
||||
tfrc_lh_calc_i_mean(lh);
|
||||
}
|
||||
return true;
|
||||
return 1;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tfrc_lh_interval_add);
|
||||
|
||||
|
@@ -67,9 +67,9 @@ static inline u8 tfrc_lh_length(struct tfrc_loss_hist *lh)
|
||||
|
||||
struct tfrc_rx_hist;
|
||||
|
||||
extern bool tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *,
|
||||
extern int tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *,
|
||||
u32 (*first_li)(struct sock *), struct sock *);
|
||||
extern void tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *);
|
||||
extern u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *);
|
||||
extern void tfrc_lh_cleanup(struct tfrc_loss_hist *lh);
|
||||
|
||||
#endif /* _DCCP_LI_HIST_ */
|
||||
|
@@ -40,6 +40,18 @@
|
||||
#include "packet_history.h"
|
||||
#include "../../dccp.h"
|
||||
|
||||
/**
|
||||
* tfrc_tx_hist_entry - Simple singly-linked TX history list
|
||||
* @next: next oldest entry (LIFO order)
|
||||
* @seqno: sequence number of this entry
|
||||
* @stamp: send time of packet with sequence number @seqno
|
||||
*/
|
||||
struct tfrc_tx_hist_entry {
|
||||
struct tfrc_tx_hist_entry *next;
|
||||
u64 seqno;
|
||||
ktime_t stamp;
|
||||
};
|
||||
|
||||
/*
|
||||
* Transmitter History Routines
|
||||
*/
|
||||
@@ -61,6 +73,15 @@ void tfrc_tx_packet_history_exit(void)
|
||||
}
|
||||
}
|
||||
|
||||
static struct tfrc_tx_hist_entry *
|
||||
tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno)
|
||||
{
|
||||
while (head != NULL && head->seqno != seqno)
|
||||
head = head->next;
|
||||
|
||||
return head;
|
||||
}
|
||||
|
||||
int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno)
|
||||
{
|
||||
struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist_slab, gfp_any());
|
||||
@@ -90,6 +111,25 @@ void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tfrc_tx_hist_purge);
|
||||
|
||||
u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno,
|
||||
const ktime_t now)
|
||||
{
|
||||
u32 rtt = 0;
|
||||
struct tfrc_tx_hist_entry *packet = tfrc_tx_hist_find_entry(head, seqno);
|
||||
|
||||
if (packet != NULL) {
|
||||
rtt = ktime_us_delta(now, packet->stamp);
|
||||
/*
|
||||
* Garbage-collect older (irrelevant) entries:
|
||||
*/
|
||||
tfrc_tx_hist_purge(&packet->next);
|
||||
}
|
||||
|
||||
return rtt;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tfrc_tx_hist_rtt);
|
||||
|
||||
|
||||
/*
|
||||
* Receiver History Routines
|
||||
*/
|
||||
@@ -151,31 +191,14 @@ int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tfrc_rx_hist_duplicate);
|
||||
|
||||
|
||||
static void __tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b)
|
||||
{
|
||||
struct tfrc_rx_hist_entry *tmp = h->ring[a];
|
||||
|
||||
h->ring[a] = h->ring[b];
|
||||
h->ring[b] = tmp;
|
||||
}
|
||||
|
||||
static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b)
|
||||
{
|
||||
__tfrc_rx_hist_swap(h, tfrc_rx_hist_index(h, a),
|
||||
tfrc_rx_hist_index(h, b));
|
||||
}
|
||||
const u8 idx_a = tfrc_rx_hist_index(h, a),
|
||||
idx_b = tfrc_rx_hist_index(h, b);
|
||||
struct tfrc_rx_hist_entry *tmp = h->ring[idx_a];
|
||||
|
||||
/**
|
||||
* tfrc_rx_hist_resume_rtt_sampling - Prepare RX history for RTT sampling
|
||||
* This is called after loss detection has finished, when the history entry
|
||||
* with the index of `loss_count' holds the highest-received sequence number.
|
||||
* RTT sampling requires this information at ring[0] (tfrc_rx_hist_sample_rtt).
|
||||
*/
|
||||
static inline void tfrc_rx_hist_resume_rtt_sampling(struct tfrc_rx_hist *h)
|
||||
{
|
||||
__tfrc_rx_hist_swap(h, 0, tfrc_rx_hist_index(h, h->loss_count));
|
||||
h->loss_count = h->loss_start = 0;
|
||||
h->ring[idx_a] = h->ring[idx_b];
|
||||
h->ring[idx_b] = tmp;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -192,8 +215,10 @@ static void __do_track_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u64 n1)
|
||||
u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno,
|
||||
s1 = DCCP_SKB_CB(skb)->dccpd_seq;
|
||||
|
||||
if (!dccp_loss_free(s0, s1, n1)) /* gap between S0 and S1 */
|
||||
if (!dccp_loss_free(s0, s1, n1)) { /* gap between S0 and S1 */
|
||||
h->loss_count = 1;
|
||||
tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n1);
|
||||
}
|
||||
}
|
||||
|
||||
static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2)
|
||||
@@ -215,7 +240,8 @@ static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2
|
||||
|
||||
if (dccp_loss_free(s2, s1, n1)) {
|
||||
/* hole is filled: S0, S2, and S1 are consecutive */
|
||||
tfrc_rx_hist_resume_rtt_sampling(h);
|
||||
h->loss_count = 0;
|
||||
h->loss_start = tfrc_rx_hist_index(h, 1);
|
||||
} else
|
||||
/* gap between S2 and S1: just update loss_prev */
|
||||
tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_loss_prev(h), skb, n2);
|
||||
@@ -268,7 +294,8 @@ static int __two_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n3)
|
||||
|
||||
if (dccp_loss_free(s1, s2, n2)) {
|
||||
/* entire hole filled by S0, S3, S1, S2 */
|
||||
tfrc_rx_hist_resume_rtt_sampling(h);
|
||||
h->loss_start = tfrc_rx_hist_index(h, 2);
|
||||
h->loss_count = 0;
|
||||
} else {
|
||||
/* gap remains between S1 and S2 */
|
||||
h->loss_start = tfrc_rx_hist_index(h, 1);
|
||||
@@ -312,7 +339,8 @@ static void __three_after_loss(struct tfrc_rx_hist *h)
|
||||
|
||||
if (dccp_loss_free(s2, s3, n3)) {
|
||||
/* no gap between S2 and S3: entire hole is filled */
|
||||
tfrc_rx_hist_resume_rtt_sampling(h);
|
||||
h->loss_start = tfrc_rx_hist_index(h, 3);
|
||||
h->loss_count = 0;
|
||||
} else {
|
||||
/* gap between S2 and S3 */
|
||||
h->loss_start = tfrc_rx_hist_index(h, 2);
|
||||
@@ -326,13 +354,13 @@ static void __three_after_loss(struct tfrc_rx_hist *h)
|
||||
}
|
||||
|
||||
/**
|
||||
* tfrc_rx_congestion_event - Loss detection and further processing
|
||||
* @h: The non-empty RX history object
|
||||
* @lh: Loss Intervals database to update
|
||||
* @skb: Currently received packet
|
||||
* @ndp: The NDP count belonging to @skb
|
||||
* @first_li: Caller-dependent computation of first loss interval in @lh
|
||||
* @sk: Used by @calc_first_li (see tfrc_lh_interval_add)
|
||||
* tfrc_rx_handle_loss - Loss detection and further processing
|
||||
* @h: The non-empty RX history object
|
||||
* @lh: Loss Intervals database to update
|
||||
* @skb: Currently received packet
|
||||
* @ndp: The NDP count belonging to @skb
|
||||
* @calc_first_li: Caller-dependent computation of first loss interval in @lh
|
||||
* @sk: Used by @calc_first_li (see tfrc_lh_interval_add)
|
||||
* Chooses action according to pending loss, updates LI database when a new
|
||||
* loss was detected, and does required post-processing. Returns 1 when caller
|
||||
* should send feedback, 0 otherwise.
|
||||
@@ -340,20 +368,15 @@ static void __three_after_loss(struct tfrc_rx_hist *h)
|
||||
* records accordingly, the caller should not perform any more RX history
|
||||
* operations when loss_count is greater than 0 after calling this function.
|
||||
*/
|
||||
bool tfrc_rx_congestion_event(struct tfrc_rx_hist *h,
|
||||
struct tfrc_loss_hist *lh,
|
||||
struct sk_buff *skb, const u64 ndp,
|
||||
u32 (*first_li)(struct sock *), struct sock *sk)
|
||||
int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
|
||||
struct tfrc_loss_hist *lh,
|
||||
struct sk_buff *skb, const u64 ndp,
|
||||
u32 (*calc_first_li)(struct sock *), struct sock *sk)
|
||||
{
|
||||
bool new_event = false;
|
||||
|
||||
if (tfrc_rx_hist_duplicate(h, skb))
|
||||
return 0;
|
||||
int is_new_loss = 0;
|
||||
|
||||
if (h->loss_count == 0) {
|
||||
__do_track_loss(h, skb, ndp);
|
||||
tfrc_rx_hist_sample_rtt(h, skb);
|
||||
tfrc_rx_hist_add_packet(h, skb, ndp);
|
||||
} else if (h->loss_count == 1) {
|
||||
__one_after_loss(h, skb, ndp);
|
||||
} else if (h->loss_count != 2) {
|
||||
@@ -362,57 +385,34 @@ bool tfrc_rx_congestion_event(struct tfrc_rx_hist *h,
|
||||
/*
|
||||
* Update Loss Interval database and recycle RX records
|
||||
*/
|
||||
new_event = tfrc_lh_interval_add(lh, h, first_li, sk);
|
||||
is_new_loss = tfrc_lh_interval_add(lh, h, calc_first_li, sk);
|
||||
__three_after_loss(h);
|
||||
}
|
||||
|
||||
/*
|
||||
* Update moving-average of `s' and the sum of received payload bytes.
|
||||
*/
|
||||
if (dccp_data_packet(skb)) {
|
||||
const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4;
|
||||
|
||||
h->packet_size = tfrc_ewma(h->packet_size, payload, 9);
|
||||
h->bytes_recvd += payload;
|
||||
}
|
||||
|
||||
/* RFC 3448, 6.1: update I_0, whose growth implies p <= p_prev */
|
||||
if (!new_event)
|
||||
tfrc_lh_update_i_mean(lh, skb);
|
||||
|
||||
return new_event;
|
||||
return is_new_loss;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tfrc_rx_congestion_event);
|
||||
EXPORT_SYMBOL_GPL(tfrc_rx_handle_loss);
|
||||
|
||||
/* Compute the sending rate X_recv measured between feedback intervals */
|
||||
u32 tfrc_rx_hist_x_recv(struct tfrc_rx_hist *h, const u32 last_x_recv)
|
||||
int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h)
|
||||
{
|
||||
u64 bytes = h->bytes_recvd, last_rtt = h->rtt_estimate;
|
||||
s64 delta = ktime_to_us(net_timedelta(h->bytes_start));
|
||||
int i;
|
||||
|
||||
WARN_ON(delta <= 0);
|
||||
/*
|
||||
* Ensure that the sampling interval for X_recv is at least one RTT,
|
||||
* by extending the sampling interval backwards in time, over the last
|
||||
* R_(m-1) seconds, as per rfc3448bis-06, 6.2.
|
||||
* To reduce noise (e.g. when the RTT changes often), this is only
|
||||
* done when delta is smaller than RTT/2.
|
||||
*/
|
||||
if (last_x_recv > 0 && delta < last_rtt/2) {
|
||||
tfrc_pr_debug("delta < RTT ==> %ld us < %u us\n",
|
||||
(long)delta, (unsigned)last_rtt);
|
||||
|
||||
delta = (bytes ? delta : 0) + last_rtt;
|
||||
bytes += div_u64((u64)last_x_recv * last_rtt, USEC_PER_SEC);
|
||||
for (i = 0; i <= TFRC_NDUPACK; i++) {
|
||||
h->ring[i] = kmem_cache_alloc(tfrc_rx_hist_slab, GFP_ATOMIC);
|
||||
if (h->ring[i] == NULL)
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
if (unlikely(bytes == 0)) {
|
||||
DCCP_WARN("X_recv == 0, using old value of %u\n", last_x_recv);
|
||||
return last_x_recv;
|
||||
h->loss_count = h->loss_start = 0;
|
||||
return 0;
|
||||
|
||||
out_free:
|
||||
while (i-- != 0) {
|
||||
kmem_cache_free(tfrc_rx_hist_slab, h->ring[i]);
|
||||
h->ring[i] = NULL;
|
||||
}
|
||||
return scaled_div32(bytes, delta);
|
||||
return -ENOBUFS;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tfrc_rx_hist_x_recv);
|
||||
EXPORT_SYMBOL_GPL(tfrc_rx_hist_alloc);
|
||||
|
||||
void tfrc_rx_hist_purge(struct tfrc_rx_hist *h)
|
||||
{
|
||||
@@ -426,81 +426,73 @@ void tfrc_rx_hist_purge(struct tfrc_rx_hist *h)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tfrc_rx_hist_purge);
|
||||
|
||||
static int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h)
|
||||
/**
|
||||
* tfrc_rx_hist_rtt_last_s - reference entry to compute RTT samples against
|
||||
*/
|
||||
static inline struct tfrc_rx_hist_entry *
|
||||
tfrc_rx_hist_rtt_last_s(const struct tfrc_rx_hist *h)
|
||||
{
|
||||
int i;
|
||||
|
||||
memset(h, 0, sizeof(*h));
|
||||
|
||||
for (i = 0; i <= TFRC_NDUPACK; i++) {
|
||||
h->ring[i] = kmem_cache_alloc(tfrc_rx_hist_slab, GFP_ATOMIC);
|
||||
if (h->ring[i] == NULL) {
|
||||
tfrc_rx_hist_purge(h);
|
||||
return -ENOBUFS;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
return h->ring[0];
|
||||
}
|
||||
|
||||
int tfrc_rx_hist_init(struct tfrc_rx_hist *h, struct sock *sk)
|
||||
/**
|
||||
* tfrc_rx_hist_rtt_prev_s: previously suitable (wrt rtt_last_s) RTT-sampling entry
|
||||
*/
|
||||
static inline struct tfrc_rx_hist_entry *
|
||||
tfrc_rx_hist_rtt_prev_s(const struct tfrc_rx_hist *h)
|
||||
{
|
||||
if (tfrc_rx_hist_alloc(h))
|
||||
return -ENOBUFS;
|
||||
/*
|
||||
* Initialise first entry with GSR to start loss detection as early as
|
||||
* possible. Code using this must not use any other fields. The entry
|
||||
* will be overwritten once the CCID updates its received packets.
|
||||
*/
|
||||
tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno = dccp_sk(sk)->dccps_gsr;
|
||||
return 0;
|
||||
return h->ring[h->rtt_sample_prev];
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tfrc_rx_hist_init);
|
||||
|
||||
/**
|
||||
* tfrc_rx_hist_sample_rtt - Sample RTT from timestamp / CCVal
|
||||
* Based on ideas presented in RFC 4342, 8.1. This function expects that no loss
|
||||
* is pending and uses the following history entries (via rtt_sample_prev):
|
||||
* - h->ring[0] contains the most recent history entry prior to @skb;
|
||||
* - h->ring[1] is an unused `dummy' entry when the current difference is 0;
|
||||
* Based on ideas presented in RFC 4342, 8.1. Returns 0 if it was not able
|
||||
* to compute a sample with given data - calling function should check this.
|
||||
*/
|
||||
void tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb)
|
||||
u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb)
|
||||
{
|
||||
struct tfrc_rx_hist_entry *last = h->ring[0];
|
||||
u32 sample, delta_v;
|
||||
u32 sample = 0,
|
||||
delta_v = SUB16(dccp_hdr(skb)->dccph_ccval,
|
||||
tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
|
||||
|
||||
/*
|
||||
* When not to sample:
|
||||
* - on non-data packets
|
||||
* (RFC 4342, 8.1: CCVal only fully defined for data packets);
|
||||
* - when no data packets have been received yet
|
||||
* (FIXME: using sampled packet size as indicator here);
|
||||
* - as long as there are gaps in the sequence space (pending loss).
|
||||
*/
|
||||
if (!dccp_data_packet(skb) || h->packet_size == 0 ||
|
||||
tfrc_rx_hist_loss_pending(h))
|
||||
return;
|
||||
if (delta_v < 1 || delta_v > 4) { /* unsuitable CCVal delta */
|
||||
if (h->rtt_sample_prev == 2) { /* previous candidate stored */
|
||||
sample = SUB16(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval,
|
||||
tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
|
||||
if (sample)
|
||||
sample = 4 / sample *
|
||||
ktime_us_delta(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_tstamp,
|
||||
tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp);
|
||||
else /*
|
||||
* FIXME: This condition is in principle not
|
||||
* possible but occurs when CCID is used for
|
||||
* two-way data traffic. I have tried to trace
|
||||
* it, but the cause does not seem to be here.
|
||||
*/
|
||||
DCCP_BUG("please report to dccp@vger.kernel.org"
|
||||
" => prev = %u, last = %u",
|
||||
tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval,
|
||||
tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
|
||||
} else if (delta_v < 1) {
|
||||
h->rtt_sample_prev = 1;
|
||||
goto keep_ref_for_next_time;
|
||||
}
|
||||
|
||||
h->rtt_sample_prev = 0; /* reset previous candidate */
|
||||
|
||||
delta_v = SUB16(dccp_hdr(skb)->dccph_ccval, last->tfrchrx_ccval);
|
||||
if (delta_v == 0) { /* less than RTT/4 difference */
|
||||
h->rtt_sample_prev = 1;
|
||||
return;
|
||||
} else if (delta_v == 4) /* optimal match */
|
||||
sample = ktime_to_us(net_timedelta(tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp));
|
||||
else { /* suboptimal match */
|
||||
h->rtt_sample_prev = 2;
|
||||
goto keep_ref_for_next_time;
|
||||
}
|
||||
sample = dccp_sane_rtt(ktime_to_us(net_timedelta(last->tfrchrx_tstamp)));
|
||||
|
||||
if (delta_v <= 4) /* between RTT/4 and RTT */
|
||||
sample *= 4 / delta_v;
|
||||
else if (!(sample < h->rtt_estimate && sample > h->rtt_estimate/2))
|
||||
/*
|
||||
* Optimisation: CCVal difference is greater than 1 RTT, yet the
|
||||
* sample is less than the local RTT estimate; which means that
|
||||
* the RTT estimate is too high.
|
||||
* To avoid noise, it is not done if the sample is below RTT/2.
|
||||
*/
|
||||
return;
|
||||
if (unlikely(sample > DCCP_SANE_RTT_MAX)) {
|
||||
DCCP_WARN("RTT sample %u too large, using max\n", sample);
|
||||
sample = DCCP_SANE_RTT_MAX;
|
||||
}
|
||||
|
||||
/* Use a lower weight than usual to increase responsiveness */
|
||||
h->rtt_estimate = tfrc_ewma(h->rtt_estimate, sample, 5);
|
||||
h->rtt_sample_prev = 0; /* use current entry as next reference */
|
||||
keep_ref_for_next_time:
|
||||
|
||||
return sample;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tfrc_rx_hist_sample_rtt);
|
||||
|
@@ -40,28 +40,12 @@
|
||||
#include <linux/slab.h>
|
||||
#include "tfrc.h"
|
||||
|
||||
/**
|
||||
* tfrc_tx_hist_entry - Simple singly-linked TX history list
|
||||
* @next: next oldest entry (LIFO order)
|
||||
* @seqno: sequence number of this entry
|
||||
* @stamp: send time of packet with sequence number @seqno
|
||||
*/
|
||||
struct tfrc_tx_hist_entry {
|
||||
struct tfrc_tx_hist_entry *next;
|
||||
u64 seqno;
|
||||
ktime_t stamp;
|
||||
};
|
||||
|
||||
static inline struct tfrc_tx_hist_entry *
|
||||
tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno)
|
||||
{
|
||||
while (head != NULL && head->seqno != seqno)
|
||||
head = head->next;
|
||||
return head;
|
||||
}
|
||||
struct tfrc_tx_hist_entry;
|
||||
|
||||
extern int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno);
|
||||
extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp);
|
||||
extern u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head,
|
||||
const u64 seqno, const ktime_t now);
|
||||
|
||||
/* Subtraction a-b modulo-16, respects circular wrap-around */
|
||||
#define SUB16(a, b) (((a) + 16 - (b)) & 0xF)
|
||||
@@ -91,22 +75,12 @@ struct tfrc_rx_hist_entry {
|
||||
* @loss_count: Number of entries in circular history
|
||||
* @loss_start: Movable index (for loss detection)
|
||||
* @rtt_sample_prev: Used during RTT sampling, points to candidate entry
|
||||
* @rtt_estimate: Receiver RTT estimate
|
||||
* @packet_size: Packet size in bytes (as per RFC 3448, 3.1)
|
||||
* @bytes_recvd: Number of bytes received since @bytes_start
|
||||
* @bytes_start: Start time for counting @bytes_recvd
|
||||
*/
|
||||
struct tfrc_rx_hist {
|
||||
struct tfrc_rx_hist_entry *ring[TFRC_NDUPACK + 1];
|
||||
u8 loss_count:2,
|
||||
loss_start:2;
|
||||
/* Receiver RTT sampling */
|
||||
#define rtt_sample_prev loss_start
|
||||
u32 rtt_estimate;
|
||||
/* Receiver sampling of application payload lengths */
|
||||
u32 packet_size,
|
||||
bytes_recvd;
|
||||
ktime_t bytes_start;
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -150,50 +124,20 @@ static inline bool tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h)
|
||||
return h->loss_count > 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Accessor functions to retrieve parameters sampled by the RX history
|
||||
*/
|
||||
static inline u32 tfrc_rx_hist_packet_size(const struct tfrc_rx_hist *h)
|
||||
{
|
||||
if (h->packet_size == 0) {
|
||||
DCCP_WARN("No sample for s, using fallback\n");
|
||||
return TCP_MIN_RCVMSS;
|
||||
}
|
||||
return h->packet_size;
|
||||
|
||||
}
|
||||
static inline u32 tfrc_rx_hist_rtt(const struct tfrc_rx_hist *h)
|
||||
{
|
||||
if (h->rtt_estimate == 0) {
|
||||
DCCP_WARN("No RTT estimate available, using fallback RTT\n");
|
||||
return DCCP_FALLBACK_RTT;
|
||||
}
|
||||
return h->rtt_estimate;
|
||||
}
|
||||
|
||||
static inline void tfrc_rx_hist_restart_byte_counter(struct tfrc_rx_hist *h)
|
||||
{
|
||||
h->bytes_recvd = 0;
|
||||
h->bytes_start = ktime_get_real();
|
||||
}
|
||||
|
||||
extern u32 tfrc_rx_hist_x_recv(struct tfrc_rx_hist *h, const u32 last_x_recv);
|
||||
|
||||
|
||||
extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
|
||||
const struct sk_buff *skb, const u64 ndp);
|
||||
|
||||
extern int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb);
|
||||
|
||||
struct tfrc_loss_hist;
|
||||
extern bool tfrc_rx_congestion_event(struct tfrc_rx_hist *h,
|
||||
struct tfrc_loss_hist *lh,
|
||||
struct sk_buff *skb, const u64 ndp,
|
||||
u32 (*first_li)(struct sock *sk),
|
||||
struct sock *sk);
|
||||
extern void tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h,
|
||||
const struct sk_buff *skb);
|
||||
extern int tfrc_rx_hist_init(struct tfrc_rx_hist *h, struct sock *sk);
|
||||
extern int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
|
||||
struct tfrc_loss_hist *lh,
|
||||
struct sk_buff *skb, const u64 ndp,
|
||||
u32 (*first_li)(struct sock *sk),
|
||||
struct sock *sk);
|
||||
extern u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h,
|
||||
const struct sk_buff *skb);
|
||||
extern int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h);
|
||||
extern void tfrc_rx_hist_purge(struct tfrc_rx_hist *h);
|
||||
|
||||
#endif /* _DCCP_PKT_HIST_ */
|
||||
|
@@ -47,21 +47,6 @@ static inline u32 scaled_div32(u64 a, u64 b)
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* tfrc_scaled_sqrt - Compute scaled integer sqrt(x) for 0 < x < 2^22-1
|
||||
* Uses scaling to improve accuracy of the integer approximation of sqrt(). The
|
||||
* scaling factor of 2^10 limits the maximum @sample to 4e6; this is okay for
|
||||
* clamped RTT samples (dccp_sample_rtt).
|
||||
* Should best be used for expressions of type sqrt(x)/sqrt(y), since then the
|
||||
* scaling factor is neutralised. For this purpose, it avoids returning zero.
|
||||
*/
|
||||
static inline u16 tfrc_scaled_sqrt(const u32 sample)
|
||||
{
|
||||
const unsigned long non_zero_sample = sample ? : 1;
|
||||
|
||||
return int_sqrt(non_zero_sample << 10);
|
||||
}
|
||||
|
||||
/**
|
||||
* tfrc_ewma - Exponentially weighted moving average
|
||||
* @weight: Weight to be used as damping factor, in units of 1/10
|
||||
@@ -73,7 +58,6 @@ static inline u32 tfrc_ewma(const u32 avg, const u32 newval, const u8 weight)
|
||||
|
||||
extern u32 tfrc_calc_x(u16 s, u32 R, u32 p);
|
||||
extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue);
|
||||
extern u32 tfrc_invert_loss_event_rate(u32 loss_event_rate);
|
||||
|
||||
extern int tfrc_tx_packet_history_init(void);
|
||||
extern void tfrc_tx_packet_history_exit(void);
|
||||
|
@@ -632,16 +632,8 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p)
|
||||
|
||||
if (p <= TFRC_CALC_X_SPLIT) { /* 0.0000 < p <= 0.05 */
|
||||
if (p < TFRC_SMALLEST_P) { /* 0.0000 < p < 0.0001 */
|
||||
/*
|
||||
* In the congestion-avoidance phase p decays towards 0
|
||||
* when there are no further losses, so this case is
|
||||
* natural. Truncating to p_min = 0.01% means that the
|
||||
* maximum achievable throughput is limited to about
|
||||
* X_calc_max = 122.4 * s/RTT (see RFC 3448, 3.1); e.g.
|
||||
* with s=1500 bytes, RTT=0.01 s: X_calc_max = 147 Mbps.
|
||||
*/
|
||||
tfrc_pr_debug("Value of p (%d) below resolution. "
|
||||
"Substituting %d\n", p, TFRC_SMALLEST_P);
|
||||
DCCP_WARN("Value of p (%d) below resolution. "
|
||||
"Substituting %d\n", p, TFRC_SMALLEST_P);
|
||||
index = 0;
|
||||
} else /* 0.0001 <= p <= 0.05 */
|
||||
index = p/TFRC_SMALLEST_P - 1;
|
||||
@@ -666,6 +658,7 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p)
|
||||
result = scaled_div(s, R);
|
||||
return scaled_div32(result, f);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL_GPL(tfrc_calc_x);
|
||||
|
||||
/**
|
||||
@@ -700,19 +693,5 @@ u32 tfrc_calc_x_reverse_lookup(u32 fvalue)
|
||||
index = tfrc_binsearch(fvalue, 0);
|
||||
return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tfrc_calc_x_reverse_lookup);
|
||||
|
||||
/**
|
||||
* tfrc_invert_loss_event_rate - Compute p so that 10^6 corresponds to 100%
|
||||
* When @loss_event_rate is large, there is a chance that p is truncated to 0.
|
||||
* To avoid re-entering slow-start in that case, we set p = TFRC_SMALLEST_P > 0.
|
||||
*/
|
||||
u32 tfrc_invert_loss_event_rate(u32 loss_event_rate)
|
||||
{
|
||||
if (loss_event_rate == UINT_MAX) /* see RFC 4342, 8.5 */
|
||||
return 0;
|
||||
if (unlikely(loss_event_rate == 0)) /* map 1/0 into 100% */
|
||||
return 1000000;
|
||||
return max_t(u32, scaled_div(1, loss_event_rate), TFRC_SMALLEST_P);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(tfrc_invert_loss_event_rate);
|
||||
EXPORT_SYMBOL_GPL(tfrc_calc_x_reverse_lookup);
|
||||
|
Reference in New Issue
Block a user