This reverts "Merge branch 'dccp' of git://eden-feed.erg.abdn.ac.uk/dccp_exp"

as it accentally contained the wrong set of patches. These will be
submitted separately.
Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
This commit is contained in:
Gerrit Renker
2008-09-09 13:27:22 +02:00
parent 0a68a20cc3
commit 410e27a49b
36 changed files with 3035 additions and 4122 deletions

View File

@@ -1,8 +1,10 @@
menu "DCCP CCIDs Configuration (EXPERIMENTAL)"
depends on EXPERIMENTAL
config IP_DCCP_CCID2
tristate "CCID2 (TCP-Like)"
tristate "CCID2 (TCP-Like) (EXPERIMENTAL)"
def_tristate IP_DCCP
select IP_DCCP_ACKVEC
---help---
CCID 2, TCP-like Congestion Control, denotes Additive Increase,
Multiplicative Decrease (AIMD) congestion control with behavior
@@ -34,7 +36,7 @@ config IP_DCCP_CCID2_DEBUG
If in doubt, say N.
config IP_DCCP_CCID3
tristate "CCID3 (TCP-Friendly)"
tristate "CCID3 (TCP-Friendly) (EXPERIMENTAL)"
def_tristate IP_DCCP
select IP_DCCP_TFRC_LIB
---help---
@@ -62,9 +64,9 @@ config IP_DCCP_CCID3
If in doubt, say M.
if IP_DCCP_CCID3
config IP_DCCP_CCID3_DEBUG
bool "CCID3 debugging messages"
depends on IP_DCCP_CCID3
---help---
Enable CCID3-specific debugging messages.
@@ -74,29 +76,10 @@ config IP_DCCP_CCID3_DEBUG
If in doubt, say N.
choice
prompt "Select method for measuring the packet size s"
default IP_DCCP_CCID3_MEASURE_S_AS_MPS
config IP_DCCP_CCID3_MEASURE_S_AS_MPS
bool "Always use MPS in place of s"
---help---
This use is recommended as it is consistent with the initialisation
of X and suggested when s varies (rfc3448bis, (1) in section 4.1).
config IP_DCCP_CCID3_MEASURE_S_AS_AVG
bool "Use moving average"
---help---
An alternative way of tracking s, also supported by rfc3448bis.
This used to be the default for CCID-3 in previous kernels.
config IP_DCCP_CCID3_MEASURE_S_AS_MAX
bool "Track the maximum payload length"
---help---
An experimental method based on tracking the maximum packet size.
endchoice
config IP_DCCP_CCID3_RTO
int "Use higher bound for nofeedback timer"
default 100
depends on IP_DCCP_CCID3 && EXPERIMENTAL
---help---
Use higher lower bound for nofeedback timer expiration.
@@ -123,7 +106,6 @@ config IP_DCCP_CCID3_RTO
The purpose of the nofeedback timer is to slow DCCP down when there
is serious network congestion: experimenting with larger values should
therefore not be performed on WANs.
endif # IP_DCCP_CCID3
config IP_DCCP_TFRC_LIB
tristate

View File

@@ -25,7 +25,7 @@
/*
* This implementation should follow RFC 4341
*/
#include "../feat.h"
#include "../ccid.h"
#include "../dccp.h"
#include "ccid2.h"
@@ -34,8 +34,51 @@
#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
static int ccid2_debug;
#define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a)
static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx)
{
int len = 0;
int pipe = 0;
struct ccid2_seq *seqp = hctx->ccid2hctx_seqh;
/* there is data in the chain */
if (seqp != hctx->ccid2hctx_seqt) {
seqp = seqp->ccid2s_prev;
len++;
if (!seqp->ccid2s_acked)
pipe++;
while (seqp != hctx->ccid2hctx_seqt) {
struct ccid2_seq *prev = seqp->ccid2s_prev;
len++;
if (!prev->ccid2s_acked)
pipe++;
/* packets are sent sequentially */
BUG_ON(dccp_delta_seqno(seqp->ccid2s_seq,
prev->ccid2s_seq ) >= 0);
BUG_ON(time_before(seqp->ccid2s_sent,
prev->ccid2s_sent));
seqp = prev;
}
}
BUG_ON(pipe != hctx->ccid2hctx_pipe);
ccid2_pr_debug("len of chain=%d\n", len);
do {
seqp = seqp->ccid2s_prev;
len++;
} while (seqp != hctx->ccid2hctx_seqh);
ccid2_pr_debug("total len=%d\n", len);
BUG_ON(len != hctx->ccid2hctx_seqbufc * CCID2_SEQBUF_LEN);
}
#else
#define ccid2_pr_debug(format, a...)
#define ccid2_hc_tx_check_sanity(hctx)
#endif
static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx)
@@ -44,7 +87,8 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx)
int i;
/* check if we have space to preserve the pointer to the buffer */
if (hctx->seqbufc >= sizeof(hctx->seqbuf) / sizeof(struct ccid2_seq *))
if (hctx->ccid2hctx_seqbufc >= (sizeof(hctx->ccid2hctx_seqbuf) /
sizeof(struct ccid2_seq*)))
return -ENOMEM;
/* allocate buffer and initialize linked list */
@@ -60,35 +104,38 @@ static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx)
seqp->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1];
/* This is the first allocation. Initiate the head and tail. */
if (hctx->seqbufc == 0)
hctx->seqh = hctx->seqt = seqp;
if (hctx->ccid2hctx_seqbufc == 0)
hctx->ccid2hctx_seqh = hctx->ccid2hctx_seqt = seqp;
else {
/* link the existing list with the one we just created */
hctx->seqh->ccid2s_next = seqp;
seqp->ccid2s_prev = hctx->seqh;
hctx->ccid2hctx_seqh->ccid2s_next = seqp;
seqp->ccid2s_prev = hctx->ccid2hctx_seqh;
hctx->seqt->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1];
seqp[CCID2_SEQBUF_LEN - 1].ccid2s_next = hctx->seqt;
hctx->ccid2hctx_seqt->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1];
seqp[CCID2_SEQBUF_LEN - 1].ccid2s_next = hctx->ccid2hctx_seqt;
}
/* store the original pointer to the buffer so we can free it */
hctx->seqbuf[hctx->seqbufc] = seqp;
hctx->seqbufc++;
hctx->ccid2hctx_seqbuf[hctx->ccid2hctx_seqbufc] = seqp;
hctx->ccid2hctx_seqbufc++;
return 0;
}
static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
{
if (ccid2_cwnd_network_limited(ccid2_hc_tx_sk(sk)))
return CCID_PACKET_WILL_DEQUEUE_LATER;
return CCID_PACKET_SEND_AT_ONCE;
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
if (hctx->ccid2hctx_pipe < hctx->ccid2hctx_cwnd)
return 0;
return 1; /* XXX CCID should dequeue when ready instead of polling */
}
static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
{
struct dccp_sock *dp = dccp_sk(sk);
u32 max_ratio = DIV_ROUND_UP(ccid2_hc_tx_sk(sk)->cwnd, 2);
u32 max_ratio = DIV_ROUND_UP(ccid2_hc_tx_sk(sk)->ccid2hctx_cwnd, 2);
/*
* Ensure that Ack Ratio does not exceed ceil(cwnd/2), which is (2) from
@@ -100,8 +147,8 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
DCCP_WARN("Limiting Ack Ratio (%u) to %u\n", val, max_ratio);
val = max_ratio;
}
if (val > DCCPF_ACK_RATIO_MAX)
val = DCCPF_ACK_RATIO_MAX;
if (val > 0xFFFF) /* RFC 4340, 11.3 */
val = 0xFFFF;
if (val == dp->dccps_l_ack_ratio)
return;
@@ -110,77 +157,99 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
dp->dccps_l_ack_ratio = val;
}
static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hctx, long val)
{
ccid2_pr_debug("change SRTT to %ld\n", val);
hctx->ccid2hctx_srtt = val;
}
static void ccid2_start_rto_timer(struct sock *sk);
static void ccid2_hc_tx_rto_expire(unsigned long data)
{
struct sock *sk = (struct sock *)data;
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
const bool sender_was_blocked = ccid2_cwnd_network_limited(hctx);
long s;
bh_lock_sock(sk);
if (sock_owned_by_user(sk)) {
sk_reset_timer(sk, &hctx->rtotimer, jiffies + HZ / 5);
sk_reset_timer(sk, &hctx->ccid2hctx_rtotimer,
jiffies + HZ / 5);
goto out;
}
ccid2_pr_debug("RTO_EXPIRE\n");
ccid2_hc_tx_check_sanity(hctx);
/* back-off timer */
hctx->rto <<= 1;
if (hctx->rto > DCCP_RTO_MAX)
hctx->rto = DCCP_RTO_MAX;
hctx->ccid2hctx_rto <<= 1;
s = hctx->ccid2hctx_rto / HZ;
if (s > 60)
hctx->ccid2hctx_rto = 60 * HZ;
ccid2_start_rto_timer(sk);
/* adjust pipe, cwnd etc */
hctx->ssthresh = hctx->cwnd / 2;
if (hctx->ssthresh < 2)
hctx->ssthresh = 2;
hctx->cwnd = 1;
hctx->pipe = 0;
hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd / 2;
if (hctx->ccid2hctx_ssthresh < 2)
hctx->ccid2hctx_ssthresh = 2;
hctx->ccid2hctx_cwnd = 1;
hctx->ccid2hctx_pipe = 0;
/* clear state about stuff we sent */
hctx->seqt = hctx->seqh;
hctx->packets_acked = 0;
hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqh;
hctx->ccid2hctx_packets_acked = 0;
/* clear ack ratio state. */
hctx->rpseq = 0;
hctx->rpdupack = -1;
hctx->ccid2hctx_rpseq = 0;
hctx->ccid2hctx_rpdupack = -1;
ccid2_change_l_ack_ratio(sk, 1);
/* if we were blocked before, we may now send cwnd=1 packet */
if (sender_was_blocked)
tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet);
/* restart backed-off timer */
sk_reset_timer(sk, &hctx->rtotimer, jiffies + hctx->rto);
ccid2_hc_tx_check_sanity(hctx);
out:
bh_unlock_sock(sk);
sock_put(sk);
}
static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
static void ccid2_start_rto_timer(struct sock *sk)
{
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
ccid2_pr_debug("setting RTO timeout=%ld\n", hctx->ccid2hctx_rto);
BUG_ON(timer_pending(&hctx->ccid2hctx_rtotimer));
sk_reset_timer(sk, &hctx->ccid2hctx_rtotimer,
jiffies + hctx->ccid2hctx_rto);
}
static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
{
struct dccp_sock *dp = dccp_sk(sk);
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
struct ccid2_seq *next;
hctx->pipe++;
hctx->ccid2hctx_pipe++;
hctx->seqh->ccid2s_seq = dp->dccps_gss;
hctx->seqh->ccid2s_acked = 0;
hctx->seqh->ccid2s_sent = jiffies;
hctx->ccid2hctx_seqh->ccid2s_seq = dp->dccps_gss;
hctx->ccid2hctx_seqh->ccid2s_acked = 0;
hctx->ccid2hctx_seqh->ccid2s_sent = jiffies;
next = hctx->seqh->ccid2s_next;
next = hctx->ccid2hctx_seqh->ccid2s_next;
/* check if we need to alloc more space */
if (next == hctx->seqt) {
if (next == hctx->ccid2hctx_seqt) {
if (ccid2_hc_tx_alloc_seq(hctx)) {
DCCP_CRIT("packet history - out of memory!");
/* FIXME: find a more graceful way to bail out */
return;
}
next = hctx->seqh->ccid2s_next;
BUG_ON(next == hctx->seqt);
next = hctx->ccid2hctx_seqh->ccid2s_next;
BUG_ON(next == hctx->ccid2hctx_seqt);
}
hctx->seqh = next;
hctx->ccid2hctx_seqh = next;
ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->cwnd, hctx->pipe);
ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->ccid2hctx_cwnd,
hctx->ccid2hctx_pipe);
/*
* FIXME: The code below is broken and the variables have been removed
@@ -203,12 +272,12 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
*/
#if 0
/* Ack Ratio. Need to maintain a concept of how many windows we sent */
hctx->arsent++;
hctx->ccid2hctx_arsent++;
/* We had an ack loss in this window... */
if (hctx->ackloss) {
if (hctx->arsent >= hctx->cwnd) {
hctx->arsent = 0;
hctx->ackloss = 0;
if (hctx->ccid2hctx_ackloss) {
if (hctx->ccid2hctx_arsent >= hctx->ccid2hctx_cwnd) {
hctx->ccid2hctx_arsent = 0;
hctx->ccid2hctx_ackloss = 0;
}
} else {
/* No acks lost up to now... */
@@ -218,28 +287,28 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
int denom = dp->dccps_l_ack_ratio * dp->dccps_l_ack_ratio -
dp->dccps_l_ack_ratio;
denom = hctx->cwnd * hctx->cwnd / denom;
denom = hctx->ccid2hctx_cwnd * hctx->ccid2hctx_cwnd / denom;
if (hctx->arsent >= denom) {
if (hctx->ccid2hctx_arsent >= denom) {
ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio - 1);
hctx->arsent = 0;
hctx->ccid2hctx_arsent = 0;
}
} else {
/* we can't increase ack ratio further [1] */
hctx->arsent = 0; /* or maybe set it to cwnd*/
hctx->ccid2hctx_arsent = 0; /* or maybe set it to cwnd*/
}
}
#endif
/* setup RTO timer */
if (!timer_pending(&hctx->rtotimer))
sk_reset_timer(sk, &hctx->rtotimer, jiffies + hctx->rto);
if (!timer_pending(&hctx->ccid2hctx_rtotimer))
ccid2_start_rto_timer(sk);
#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
do {
struct ccid2_seq *seqp = hctx->seqt;
struct ccid2_seq *seqp = hctx->ccid2hctx_seqt;
while (seqp != hctx->seqh) {
while (seqp != hctx->ccid2hctx_seqh) {
ccid2_pr_debug("out seq=%llu acked=%d time=%lu\n",
(unsigned long long)seqp->ccid2s_seq,
seqp->ccid2s_acked, seqp->ccid2s_sent);
@@ -247,158 +316,205 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
}
} while (0);
ccid2_pr_debug("=========\n");
ccid2_hc_tx_check_sanity(hctx);
#endif
}
/**
* ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm
* This code is almost identical with TCP's tcp_rtt_estimator(), since
* - it has a higher sampling frequency (recommended by RFC 1323),
* - the RTO does not collapse into RTT due to RTTVAR going towards zero,
* - it is simple (cf. more complex proposals such as Eifel timer or research
* which suggests that the gain should be set according to window size),
* - in tests it was found to work well with CCID2 [gerrit].
/* XXX Lame code duplication!
* returns -1 if none was found.
* else returns the next offset to use in the function call.
*/
static void ccid2_rtt_estimator(struct sock *sk, const long mrtt)
static int ccid2_ackvector(struct sock *sk, struct sk_buff *skb, int offset,
unsigned char **vec, unsigned char *veclen)
{
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
long m = mrtt ? : 1;
const struct dccp_hdr *dh = dccp_hdr(skb);
unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb);
unsigned char *opt_ptr;
const unsigned char *opt_end = (unsigned char *)dh +
(dh->dccph_doff * 4);
unsigned char opt, len;
unsigned char *value;
if (hctx->srtt == 0) {
/* First measurement m */
hctx->srtt = m << 3;
hctx->mdev = m << 1;
BUG_ON(offset < 0);
options += offset;
opt_ptr = options;
if (opt_ptr >= opt_end)
return -1;
hctx->mdev_max = max(TCP_RTO_MIN, hctx->mdev);
hctx->rttvar = hctx->mdev_max;
hctx->rtt_seq = dccp_sk(sk)->dccps_gss;
} else {
/* Update scaled SRTT as SRTT += 1/8 * (m - SRTT) */
m -= (hctx->srtt >> 3);
hctx->srtt += m;
while (opt_ptr != opt_end) {
opt = *opt_ptr++;
len = 0;
value = NULL;
/* Similarly, update scaled mdev with regard to |m| */
if (m < 0) {
m = -m;
m -= (hctx->mdev >> 2);
/* Check if this isn't a single byte option */
if (opt > DCCPO_MAX_RESERVED) {
if (opt_ptr == opt_end)
goto out_invalid_option;
len = *opt_ptr++;
if (len < 3)
goto out_invalid_option;
/*
* This neutralises RTO increase when RTT < SRTT - mdev
* (see P. Sarolahti, A. Kuznetsov,"Congestion Control
* in Linux TCP", USENIX 2002, pp. 49-62).
* Remove the type and len fields, leaving
* just the value size
*/
if (m > 0)
m >>= 3;
} else {
m -= (hctx->mdev >> 2);
}
hctx->mdev += m;
len -= 2;
value = opt_ptr;
opt_ptr += len;
if (hctx->mdev > hctx->mdev_max) {
hctx->mdev_max = hctx->mdev;
if (hctx->mdev_max > hctx->rttvar)
hctx->rttvar = hctx->mdev_max;
if (opt_ptr > opt_end)
goto out_invalid_option;
}
/*
* Decay RTTVAR at most once per flight, exploiting that
* 1) pipe <= cwnd <= Sequence_Window = W (RFC 4340, 7.5.2)
* 2) AWL = GSS-W+1 <= GAR <= GSS (RFC 4340, 7.5.1)
* GAR is a useful bound for FlightSize = pipe, AWL is probably
* too low as it over-estimates pipe.
*/
if (after48(dccp_sk(sk)->dccps_gar, hctx->rtt_seq)) {
if (hctx->mdev_max < hctx->rttvar)
hctx->rttvar -= (hctx->rttvar -
hctx->mdev_max) >> 2;
hctx->rtt_seq = dccp_sk(sk)->dccps_gss;
hctx->mdev_max = TCP_RTO_MIN;
switch (opt) {
case DCCPO_ACK_VECTOR_0:
case DCCPO_ACK_VECTOR_1:
*vec = value;
*veclen = len;
return offset + (opt_ptr - options);
}
}
/*
* Set RTO from SRTT and RTTVAR
* Clock granularity is ignored since the minimum error for RTTVAR is
* clamped to 50msec (corresponding to HZ=20). This leads to a minimum
* RTO of 200msec. This agrees with TCP and RFC 4341, 5.: "Because DCCP
* does not retransmit data, DCCP does not require TCP's recommended
* minimum timeout of one second".
*/
hctx->rto = (hctx->srtt >> 3) + hctx->rttvar;
return -1;
if (hctx->rto > DCCP_RTO_MAX)
hctx->rto = DCCP_RTO_MAX;
out_invalid_option:
DCCP_BUG("Invalid option - this should not happen (previous parsing)!");
return -1;
}
static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp,
unsigned int *maxincr)
static void ccid2_hc_tx_kill_rto_timer(struct sock *sk)
{
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
if (hctx->cwnd < hctx->ssthresh) {
if (*maxincr > 0 && ++hctx->packets_acked == 2) {
hctx->cwnd += 1;
*maxincr -= 1;
hctx->packets_acked = 0;
sk_stop_timer(sk, &hctx->ccid2hctx_rtotimer);
ccid2_pr_debug("deleted RTO timer\n");
}
static inline void ccid2_new_ack(struct sock *sk,
struct ccid2_seq *seqp,
unsigned int *maxincr)
{
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
if (hctx->ccid2hctx_cwnd < hctx->ccid2hctx_ssthresh) {
if (*maxincr > 0 && ++hctx->ccid2hctx_packets_acked == 2) {
hctx->ccid2hctx_cwnd += 1;
*maxincr -= 1;
hctx->ccid2hctx_packets_acked = 0;
}
} else if (++hctx->packets_acked >= hctx->cwnd) {
hctx->cwnd += 1;
hctx->packets_acked = 0;
} else if (++hctx->ccid2hctx_packets_acked >= hctx->ccid2hctx_cwnd) {
hctx->ccid2hctx_cwnd += 1;
hctx->ccid2hctx_packets_acked = 0;
}
/*
* FIXME: RTT is sampled several times per acknowledgment (for each
* entry in the Ack Vector), instead of once per Ack (as in TCP SACK).
* This causes the RTT to be over-estimated, since the older entries
* in the Ack Vector have earlier sending times.
* The cleanest solution is to not use the ccid2s_sent field at all
* and instead use DCCP timestamps - need to be resolved at some time.
*/
ccid2_rtt_estimator(sk, jiffies - seqp->ccid2s_sent);
/* update RTO */
if (hctx->ccid2hctx_srtt == -1 ||
time_after(jiffies, hctx->ccid2hctx_lastrtt + hctx->ccid2hctx_srtt)) {
unsigned long r = (long)jiffies - (long)seqp->ccid2s_sent;
int s;
/* first measurement */
if (hctx->ccid2hctx_srtt == -1) {
ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n",
r, jiffies,
(unsigned long long)seqp->ccid2s_seq);
ccid2_change_srtt(hctx, r);
hctx->ccid2hctx_rttvar = r >> 1;
} else {
/* RTTVAR */
long tmp = hctx->ccid2hctx_srtt - r;
long srtt;
if (tmp < 0)
tmp *= -1;
tmp >>= 2;
hctx->ccid2hctx_rttvar *= 3;
hctx->ccid2hctx_rttvar >>= 2;
hctx->ccid2hctx_rttvar += tmp;
/* SRTT */
srtt = hctx->ccid2hctx_srtt;
srtt *= 7;
srtt >>= 3;
tmp = r >> 3;
srtt += tmp;
ccid2_change_srtt(hctx, srtt);
}
s = hctx->ccid2hctx_rttvar << 2;
/* clock granularity is 1 when based on jiffies */
if (!s)
s = 1;
hctx->ccid2hctx_rto = hctx->ccid2hctx_srtt + s;
/* must be at least a second */
s = hctx->ccid2hctx_rto / HZ;
/* DCCP doesn't require this [but I like it cuz my code sux] */
#if 1
if (s < 1)
hctx->ccid2hctx_rto = HZ;
#endif
/* max 60 seconds */
if (s > 60)
hctx->ccid2hctx_rto = HZ * 60;
hctx->ccid2hctx_lastrtt = jiffies;
ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n",
hctx->ccid2hctx_srtt, hctx->ccid2hctx_rttvar,
hctx->ccid2hctx_rto, HZ, r);
}
/* we got a new ack, so re-start RTO timer */
ccid2_hc_tx_kill_rto_timer(sk);
ccid2_start_rto_timer(sk);
}
static void ccid2_hc_tx_dec_pipe(struct sock *sk)
{
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
if (hctx->ccid2hctx_pipe == 0)
DCCP_BUG("pipe == 0");
else
hctx->ccid2hctx_pipe--;
if (hctx->ccid2hctx_pipe == 0)
ccid2_hc_tx_kill_rto_timer(sk);
}
static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
{
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
if (time_before(seqp->ccid2s_sent, hctx->last_cong)) {
if (time_before(seqp->ccid2s_sent, hctx->ccid2hctx_last_cong)) {
ccid2_pr_debug("Multiple losses in an RTT---treating as one\n");
return;
}
hctx->last_cong = jiffies;
hctx->ccid2hctx_last_cong = jiffies;
hctx->cwnd = hctx->cwnd / 2 ? : 1U;
hctx->ssthresh = max(hctx->cwnd, 2U);
hctx->ccid2hctx_cwnd = hctx->ccid2hctx_cwnd / 2 ? : 1U;
hctx->ccid2hctx_ssthresh = max(hctx->ccid2hctx_cwnd, 2U);
/* Avoid spurious timeouts resulting from Ack Ratio > cwnd */
if (dccp_sk(sk)->dccps_l_ack_ratio > hctx->cwnd)
ccid2_change_l_ack_ratio(sk, hctx->cwnd);
}
static int ccid2_hc_tx_parse_options(struct sock *sk, u8 packet_type,
u8 option, u8 *optval, u8 optlen)
{
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
switch (option) {
case DCCPO_ACK_VECTOR_0:
case DCCPO_ACK_VECTOR_1:
return dccp_ackvec_parsed_add(&hctx->av_chunks, optval, optlen,
option - DCCPO_ACK_VECTOR_0);
}
return 0;
if (dccp_sk(sk)->dccps_l_ack_ratio > hctx->ccid2hctx_cwnd)
ccid2_change_l_ack_ratio(sk, hctx->ccid2hctx_cwnd);
}
static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
{
struct dccp_sock *dp = dccp_sk(sk);
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
const bool sender_was_blocked = ccid2_cwnd_network_limited(hctx);
struct dccp_ackvec_parsed *avp;
u64 ackno, seqno;
struct ccid2_seq *seqp;
unsigned char *vector;
unsigned char veclen;
int offset = 0;
int done = 0;
unsigned int maxincr = 0;
ccid2_hc_tx_check_sanity(hctx);
/* check reverse path congestion */
seqno = DCCP_SKB_CB(skb)->dccpd_seq;
@@ -407,21 +523,21 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
* -sorbo.
*/
/* need to bootstrap */
if (hctx->rpdupack == -1) {
hctx->rpdupack = 0;
hctx->rpseq = seqno;
if (hctx->ccid2hctx_rpdupack == -1) {
hctx->ccid2hctx_rpdupack = 0;
hctx->ccid2hctx_rpseq = seqno;
} else {
/* check if packet is consecutive */
if (dccp_delta_seqno(hctx->rpseq, seqno) == 1)
hctx->rpseq = seqno;
if (dccp_delta_seqno(hctx->ccid2hctx_rpseq, seqno) == 1)
hctx->ccid2hctx_rpseq = seqno;
/* it's a later packet */
else if (after48(seqno, hctx->rpseq)) {
hctx->rpdupack++;
else if (after48(seqno, hctx->ccid2hctx_rpseq)) {
hctx->ccid2hctx_rpdupack++;
/* check if we got enough dupacks */
if (hctx->rpdupack >= NUMDUPACK) {
hctx->rpdupack = -1; /* XXX lame */
hctx->rpseq = 0;
if (hctx->ccid2hctx_rpdupack >= NUMDUPACK) {
hctx->ccid2hctx_rpdupack = -1; /* XXX lame */
hctx->ccid2hctx_rpseq = 0;
ccid2_change_l_ack_ratio(sk, 2 * dp->dccps_l_ack_ratio);
}
@@ -429,22 +545,27 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
}
/* check forward path congestion */
if (dccp_packet_without_ack(skb))
/* still didn't send out new data packets */
if (hctx->ccid2hctx_seqh == hctx->ccid2hctx_seqt)
return;
/* still didn't send out new data packets */
if (hctx->seqh == hctx->seqt)
goto done;
switch (DCCP_SKB_CB(skb)->dccpd_type) {
case DCCP_PKT_ACK:
case DCCP_PKT_DATAACK:
break;
default:
return;
}
ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq;
if (after48(ackno, hctx->high_ack))
hctx->high_ack = ackno;
if (after48(ackno, hctx->ccid2hctx_high_ack))
hctx->ccid2hctx_high_ack = ackno;
seqp = hctx->seqt;
seqp = hctx->ccid2hctx_seqt;
while (before48(seqp->ccid2s_seq, ackno)) {
seqp = seqp->ccid2s_next;
if (seqp == hctx->seqh) {
seqp = hctx->seqh->ccid2s_prev;
if (seqp == hctx->ccid2hctx_seqh) {
seqp = hctx->ccid2hctx_seqh->ccid2s_prev;
break;
}
}
@@ -454,26 +575,26 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
* packets per acknowledgement. Rounding up avoids that cwnd is not
* advanced when Ack Ratio is 1 and gives a slight edge otherwise.
*/
if (hctx->cwnd < hctx->ssthresh)
if (hctx->ccid2hctx_cwnd < hctx->ccid2hctx_ssthresh)
maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2);
/* go through all ack vectors */
list_for_each_entry(avp, &hctx->av_chunks, node) {
while ((offset = ccid2_ackvector(sk, skb, offset,
&vector, &veclen)) != -1) {
/* go through this ack vector */
for (; avp->len--; avp->vec++) {
u64 ackno_end_rl = SUB48(ackno,
dccp_ackvec_runlen(avp->vec));
while (veclen--) {
const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
u64 ackno_end_rl = SUB48(ackno, rl);
ccid2_pr_debug("ackvec %llu |%u,%u|\n",
ccid2_pr_debug("ackvec start:%llu end:%llu\n",
(unsigned long long)ackno,
dccp_ackvec_state(avp->vec) >> 6,
dccp_ackvec_runlen(avp->vec));
(unsigned long long)ackno_end_rl);
/* if the seqno we are analyzing is larger than the
* current ackno, then move towards the tail of our
* seqnos.
*/
while (after48(seqp->ccid2s_seq, ackno)) {
if (seqp == hctx->seqt) {
if (seqp == hctx->ccid2hctx_seqt) {
done = 1;
break;
}
@@ -486,24 +607,26 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
* run length
*/
while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) {
const u8 state = dccp_ackvec_state(avp->vec);
const u8 state = *vector &
DCCP_ACKVEC_STATE_MASK;
/* new packet received or marked */
if (state != DCCPAV_NOT_RECEIVED &&
if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED &&
!seqp->ccid2s_acked) {
if (state == DCCPAV_ECN_MARKED)
if (state ==
DCCP_ACKVEC_STATE_ECN_MARKED) {
ccid2_congestion_event(sk,
seqp);
else
} else
ccid2_new_ack(sk, seqp,
&maxincr);
seqp->ccid2s_acked = 1;
ccid2_pr_debug("Got ack for %llu\n",
(unsigned long long)seqp->ccid2s_seq);
hctx->pipe--;
ccid2_hc_tx_dec_pipe(sk);
}
if (seqp == hctx->seqt) {
if (seqp == hctx->ccid2hctx_seqt) {
done = 1;
break;
}
@@ -513,6 +636,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
break;
ackno = SUB48(ackno_end_rl, 1);
vector++;
}
if (done)
break;
@@ -521,11 +645,11 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
/* The state about what is acked should be correct now
* Check for NUMDUPACK
*/
seqp = hctx->seqt;
while (before48(seqp->ccid2s_seq, hctx->high_ack)) {
seqp = hctx->ccid2hctx_seqt;
while (before48(seqp->ccid2s_seq, hctx->ccid2hctx_high_ack)) {
seqp = seqp->ccid2s_next;
if (seqp == hctx->seqh) {
seqp = hctx->seqh->ccid2s_prev;
if (seqp == hctx->ccid2hctx_seqh) {
seqp = hctx->ccid2hctx_seqh->ccid2s_prev;
break;
}
}
@@ -536,7 +660,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
if (done == NUMDUPACK)
break;
}
if (seqp == hctx->seqt)
if (seqp == hctx->ccid2hctx_seqt)
break;
seqp = seqp->ccid2s_prev;
}
@@ -557,34 +681,25 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
* one ack vector.
*/
ccid2_congestion_event(sk, seqp);
hctx->pipe--;
ccid2_hc_tx_dec_pipe(sk);
}
if (seqp == hctx->seqt)
if (seqp == hctx->ccid2hctx_seqt)
break;
seqp = seqp->ccid2s_prev;
}
hctx->seqt = last_acked;
hctx->ccid2hctx_seqt = last_acked;
}
/* trim acked packets in tail */
while (hctx->seqt != hctx->seqh) {
if (!hctx->seqt->ccid2s_acked)
while (hctx->ccid2hctx_seqt != hctx->ccid2hctx_seqh) {
if (!hctx->ccid2hctx_seqt->ccid2s_acked)
break;
hctx->seqt = hctx->seqt->ccid2s_next;
hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqt->ccid2s_next;
}
/* restart RTO timer if not all outstanding data has been acked */
if (hctx->pipe == 0)
sk_stop_timer(sk, &hctx->rtotimer);
else
sk_reset_timer(sk, &hctx->rtotimer, jiffies + hctx->rto);
done:
/* check if incoming Acks allow pending packets to be sent */
if (sender_was_blocked && !ccid2_cwnd_network_limited(hctx))
tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet);
dccp_ackvec_parsed_cleanup(&hctx->av_chunks);
ccid2_hc_tx_check_sanity(hctx);
}
static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
@@ -594,13 +709,17 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
u32 max_ratio;
/* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */
hctx->ssthresh = ~0U;
hctx->ccid2hctx_ssthresh = ~0U;
/* Use larger initial windows (RFC 3390, rfc2581bis) */
hctx->cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache);
/*
* RFC 4341, 5: "The cwnd parameter is initialized to at most four
* packets for new connections, following the rules from [RFC3390]".
* We need to convert the bytes of RFC3390 into the packets of RFC 4341.
*/
hctx->ccid2hctx_cwnd = clamp(4380U / dp->dccps_mss_cache, 2U, 4U);
/* Make sure that Ack Ratio is enabled and within bounds. */
max_ratio = DIV_ROUND_UP(hctx->cwnd, 2);
max_ratio = DIV_ROUND_UP(hctx->ccid2hctx_cwnd, 2);
if (dp->dccps_l_ack_ratio == 0 || dp->dccps_l_ack_ratio > max_ratio)
dp->dccps_l_ack_ratio = max_ratio;
@@ -608,11 +727,15 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
if (ccid2_hc_tx_alloc_seq(hctx))
return -ENOMEM;
hctx->rto = DCCP_TIMEOUT_INIT;
hctx->rpdupack = -1;
hctx->last_cong = jiffies;
setup_timer(&hctx->rtotimer, ccid2_hc_tx_rto_expire, (unsigned long)sk);
INIT_LIST_HEAD(&hctx->av_chunks);
hctx->ccid2hctx_rto = 3 * HZ;
ccid2_change_srtt(hctx, -1);
hctx->ccid2hctx_rttvar = -1;
hctx->ccid2hctx_rpdupack = -1;
hctx->ccid2hctx_last_cong = jiffies;
setup_timer(&hctx->ccid2hctx_rtotimer, ccid2_hc_tx_rto_expire,
(unsigned long)sk);
ccid2_hc_tx_check_sanity(hctx);
return 0;
}
@@ -621,11 +744,11 @@ static void ccid2_hc_tx_exit(struct sock *sk)
struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk);
int i;
sk_stop_timer(sk, &hctx->rtotimer);
ccid2_hc_tx_kill_rto_timer(sk);
for (i = 0; i < hctx->seqbufc; i++)
kfree(hctx->seqbuf[i]);
hctx->seqbufc = 0;
for (i = 0; i < hctx->ccid2hctx_seqbufc; i++)
kfree(hctx->ccid2hctx_seqbuf[i]);
hctx->ccid2hctx_seqbufc = 0;
}
static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
@@ -636,28 +759,27 @@ static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
switch (DCCP_SKB_CB(skb)->dccpd_type) {
case DCCP_PKT_DATA:
case DCCP_PKT_DATAACK:
hcrx->data++;
if (hcrx->data >= dp->dccps_r_ack_ratio) {
hcrx->ccid2hcrx_data++;
if (hcrx->ccid2hcrx_data >= dp->dccps_r_ack_ratio) {
dccp_send_ack(sk);
hcrx->data = 0;
hcrx->ccid2hcrx_data = 0;
}
break;
}
}
static struct ccid_operations ccid2 = {
.ccid_id = DCCPC_CCID2,
.ccid_name = "TCP-like",
.ccid_owner = THIS_MODULE,
.ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock),
.ccid_hc_tx_init = ccid2_hc_tx_init,
.ccid_hc_tx_exit = ccid2_hc_tx_exit,
.ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet,
.ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent,
.ccid_hc_tx_parse_options = ccid2_hc_tx_parse_options,
.ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv,
.ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock),
.ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv,
.ccid_id = DCCPC_CCID2,
.ccid_name = "TCP-like",
.ccid_owner = THIS_MODULE,
.ccid_hc_tx_obj_size = sizeof(struct ccid2_hc_tx_sock),
.ccid_hc_tx_init = ccid2_hc_tx_init,
.ccid_hc_tx_exit = ccid2_hc_tx_exit,
.ccid_hc_tx_send_packet = ccid2_hc_tx_send_packet,
.ccid_hc_tx_packet_sent = ccid2_hc_tx_packet_sent,
.ccid_hc_tx_packet_recv = ccid2_hc_tx_packet_recv,
.ccid_hc_rx_obj_size = sizeof(struct ccid2_hc_rx_sock),
.ccid_hc_rx_packet_recv = ccid2_hc_rx_packet_recv,
};
#ifdef CONFIG_IP_DCCP_CCID2_DEBUG

View File

@@ -42,49 +42,34 @@ struct ccid2_seq {
/** struct ccid2_hc_tx_sock - CCID2 TX half connection
*
* @{cwnd,ssthresh,pipe}: as per RFC 4341, section 5
* @packets_acked: Ack counter for deriving cwnd growth (RFC 3465)
* @srtt: smoothed RTT estimate, scaled by 2^3
* @mdev: smoothed RTT variation, scaled by 2^2
* @mdev_max: maximum of @mdev during one flight
* @rttvar: moving average/maximum of @mdev_max
* @rto: RTO value deriving from SRTT and RTTVAR (RFC 2988)
* @rtt_seq: to decay RTTVAR at most once per flight
* @rpseq: last consecutive seqno
* @rpdupack: dupacks since rpseq
* @av_chunks: list of Ack Vectors received on current skb
*/
* @ccid2hctx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5
* @ccid2hctx_packets_acked - Ack counter for deriving cwnd growth (RFC 3465)
* @ccid2hctx_lastrtt -time RTT was last measured
* @ccid2hctx_rpseq - last consecutive seqno
* @ccid2hctx_rpdupack - dupacks since rpseq
*/
struct ccid2_hc_tx_sock {
u32 cwnd;
u32 ssthresh;
u32 pipe;
u32 packets_acked;
struct ccid2_seq *seqbuf[CCID2_SEQBUF_MAX];
int seqbufc;
struct ccid2_seq *seqh;
struct ccid2_seq *seqt;
/* RTT measurement: variables/principles are the same as in TCP */
u32 srtt,
mdev,
mdev_max,
rttvar,
rto;
u64 rtt_seq:48;
struct timer_list rtotimer;
u64 rpseq;
int rpdupack;
unsigned long last_cong;
u64 high_ack;
struct list_head av_chunks;
u32 ccid2hctx_cwnd;
u32 ccid2hctx_ssthresh;
u32 ccid2hctx_pipe;
u32 ccid2hctx_packets_acked;
struct ccid2_seq *ccid2hctx_seqbuf[CCID2_SEQBUF_MAX];
int ccid2hctx_seqbufc;
struct ccid2_seq *ccid2hctx_seqh;
struct ccid2_seq *ccid2hctx_seqt;
long ccid2hctx_rto;
long ccid2hctx_srtt;
long ccid2hctx_rttvar;
unsigned long ccid2hctx_lastrtt;
struct timer_list ccid2hctx_rtotimer;
u64 ccid2hctx_rpseq;
int ccid2hctx_rpdupack;
unsigned long ccid2hctx_last_cong;
u64 ccid2hctx_high_ack;
};
static inline bool ccid2_cwnd_network_limited(struct ccid2_hc_tx_sock *hctx)
{
return (hctx->pipe >= hctx->cwnd);
}
struct ccid2_hc_rx_sock {
int data;
int ccid2hcrx_data;
};
static inline struct ccid2_hc_tx_sock *ccid2_hc_tx_sk(const struct sock *sk)

File diff suppressed because it is too large Load Diff

View File

@@ -47,22 +47,11 @@
/* Two seconds as per RFC 3448 4.2 */
#define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC)
/* Maximum backoff interval t_mbi (RFC 3448, 4.3) */
#define TFRC_T_MBI (64 * USEC_PER_SEC)
/* In usecs - half the scheduling granularity as per RFC3448 4.6 */
#define TFRC_OPSYS_HALF_TIME_GRAN (USEC_PER_SEC / (2 * HZ))
/*
* The t_delta parameter (RFC 3448, 4.6): delays of less than %USEC_PER_MSEC are
* rounded down to 0, since sk_reset_timer() here uses millisecond granularity.
* Hence we can use a constant t_delta = %USEC_PER_MSEC when HZ >= 500. A coarse
* resolution of HZ < 500 means that the error is below one timer tick (t_gran)
* when using the constant t_delta = t_gran / 2 = %USEC_PER_SEC / (2 * HZ).
*/
#if (HZ >= 500)
# define TFRC_T_DELTA USEC_PER_MSEC
#else
# define TFRC_T_DELTA (USEC_PER_SEC / (2 * HZ))
#warning Coarse CONFIG_HZ resolution -- higher value recommended for TFRC.
#endif
/* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */
#define TFRC_T_MBI 64
enum ccid3_options {
TFRC_OPT_LOSS_EVENT_RATE = 192,
@@ -70,43 +59,62 @@ enum ccid3_options {
TFRC_OPT_RECEIVE_RATE = 194,
};
struct ccid3_options_received {
u64 ccid3or_seqno:48,
ccid3or_loss_intervals_idx:16;
u16 ccid3or_loss_intervals_len;
u32 ccid3or_loss_event_rate;
u32 ccid3or_receive_rate;
};
/* TFRC sender states */
enum ccid3_hc_tx_states {
TFRC_SSTATE_NO_SENT = 1,
TFRC_SSTATE_NO_FBACK,
TFRC_SSTATE_FBACK,
TFRC_SSTATE_TERM,
};
/** struct ccid3_hc_tx_sock - CCID3 sender half-connection socket
*
* @x - Current sending rate in 64 * bytes per second
* @x_recv - Receive rate in 64 * bytes per second
* @x_calc - Calculated rate in bytes per second
* @rtt - Estimate of current round trip time in usecs
* @r_sqmean - Estimate of long-term RTT (RFC 3448, 4.5)
* @p - Current loss event rate (0-1) scaled by 1000000
* @s - Packet size in bytes
* @t_rto - Nofeedback Timer setting in usecs
* @t_ipi - Interpacket (send) interval (RFC 3448, 4.6) in usecs
* @feedback - Whether feedback has been received or not
* @last_win_count - Last window counter sent
* @t_last_win_count - Timestamp of earliest packet with
* last_win_count value sent
* @no_feedback_timer - Handle to no feedback timer
* @t_ld - Time last doubled during slow start
* @t_nom - Nominal send time of next packet
* @hist - Packet history
* @ccid3hctx_x - Current sending rate in 64 * bytes per second
* @ccid3hctx_x_recv - Receive rate in 64 * bytes per second
* @ccid3hctx_x_calc - Calculated rate in bytes per second
* @ccid3hctx_rtt - Estimate of current round trip time in usecs
* @ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000
* @ccid3hctx_s - Packet size in bytes
* @ccid3hctx_t_rto - Nofeedback Timer setting in usecs
* @ccid3hctx_t_ipi - Interpacket (send) interval (RFC 3448, 4.6) in usecs
* @ccid3hctx_state - Sender state, one of %ccid3_hc_tx_states
* @ccid3hctx_last_win_count - Last window counter sent
* @ccid3hctx_t_last_win_count - Timestamp of earliest packet
* with last_win_count value sent
* @ccid3hctx_no_feedback_timer - Handle to no feedback timer
* @ccid3hctx_t_ld - Time last doubled during slow start
* @ccid3hctx_t_nom - Nominal send time of next packet
* @ccid3hctx_delta - Send timer delta (RFC 3448, 4.6) in usecs
* @ccid3hctx_hist - Packet history
* @ccid3hctx_options_received - Parsed set of retrieved options
*/
struct ccid3_hc_tx_sock {
u64 x;
u64 x_recv;
u32 x_calc;
u32 rtt;
u16 r_sqmean;
u32 p;
u32 t_rto;
u32 t_ipi;
u16 s;
bool feedback:1;
u8 last_win_count;
ktime_t t_last_win_count;
struct timer_list no_feedback_timer;
ktime_t t_ld;
ktime_t t_nom;
struct tfrc_tx_hist_entry *hist;
struct tfrc_tx_info ccid3hctx_tfrc;
#define ccid3hctx_x ccid3hctx_tfrc.tfrctx_x
#define ccid3hctx_x_recv ccid3hctx_tfrc.tfrctx_x_recv
#define ccid3hctx_x_calc ccid3hctx_tfrc.tfrctx_x_calc
#define ccid3hctx_rtt ccid3hctx_tfrc.tfrctx_rtt
#define ccid3hctx_p ccid3hctx_tfrc.tfrctx_p
#define ccid3hctx_t_rto ccid3hctx_tfrc.tfrctx_rto
#define ccid3hctx_t_ipi ccid3hctx_tfrc.tfrctx_ipi
u16 ccid3hctx_s;
enum ccid3_hc_tx_states ccid3hctx_state:8;
u8 ccid3hctx_last_win_count;
ktime_t ccid3hctx_t_last_win_count;
struct timer_list ccid3hctx_no_feedback_timer;
ktime_t ccid3hctx_t_ld;
ktime_t ccid3hctx_t_nom;
u32 ccid3hctx_delta;
struct tfrc_tx_hist_entry *ccid3hctx_hist;
struct ccid3_options_received ccid3hctx_options_received;
};
static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk)
@@ -116,32 +124,41 @@ static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk)
return hctx;
}
enum ccid3_fback_type {
CCID3_FBACK_NONE = 0,
CCID3_FBACK_INITIAL,
CCID3_FBACK_PERIODIC,
CCID3_FBACK_PARAM_CHANGE
/* TFRC receiver states */
enum ccid3_hc_rx_states {
TFRC_RSTATE_NO_DATA = 1,
TFRC_RSTATE_DATA,
TFRC_RSTATE_TERM = 127,
};
/** struct ccid3_hc_rx_sock - CCID3 receiver half-connection socket
*
* @last_counter - Tracks window counter (RFC 4342, 8.1)
* @feedback - The type of the feedback last sent
* @x_recv - Receiver estimate of send rate (RFC 3448, sec. 4.3)
* @tstamp_last_feedback - Time at which last feedback was sent
* @hist - Packet history (loss detection + RTT sampling)
* @li_hist - Loss Interval database
* @p_inverse - Inverse of Loss Event Rate (RFC 4342, sec. 8.5)
* @ccid3hcrx_x_recv - Receiver estimate of send rate (RFC 3448 4.3)
* @ccid3hcrx_rtt - Receiver estimate of rtt (non-standard)
* @ccid3hcrx_p - Current loss event rate (RFC 3448 5.4)
* @ccid3hcrx_last_counter - Tracks window counter (RFC 4342, 8.1)
* @ccid3hcrx_state - Receiver state, one of %ccid3_hc_rx_states
* @ccid3hcrx_bytes_recv - Total sum of DCCP payload bytes
* @ccid3hcrx_x_recv - Receiver estimate of send rate (RFC 3448, sec. 4.3)
* @ccid3hcrx_rtt - Receiver estimate of RTT
* @ccid3hcrx_tstamp_last_feedback - Time at which last feedback was sent
* @ccid3hcrx_tstamp_last_ack - Time at which last feedback was sent
* @ccid3hcrx_hist - Packet history (loss detection + RTT sampling)
* @ccid3hcrx_li_hist - Loss Interval database
* @ccid3hcrx_s - Received packet size in bytes
* @ccid3hcrx_pinv - Inverse of Loss Event Rate (RFC 4342, sec. 8.5)
*/
struct ccid3_hc_rx_sock {
u8 last_counter:4;
enum ccid3_fback_type feedback:4;
u32 x_recv;
ktime_t tstamp_last_feedback;
struct tfrc_rx_hist hist;
struct tfrc_loss_hist li_hist;
#define p_inverse li_hist.i_mean
u8 ccid3hcrx_last_counter:4;
enum ccid3_hc_rx_states ccid3hcrx_state:8;
u32 ccid3hcrx_bytes_recv;
u32 ccid3hcrx_x_recv;
u32 ccid3hcrx_rtt;
ktime_t ccid3hcrx_tstamp_last_feedback;
struct tfrc_rx_hist ccid3hcrx_hist;
struct tfrc_loss_hist ccid3hcrx_li_hist;
u16 ccid3hcrx_s;
#define ccid3hcrx_pinv ccid3hcrx_li_hist.i_mean
};
static inline struct ccid3_hc_rx_sock *ccid3_hc_rx_sk(const struct sock *sk)

View File

@@ -86,26 +86,21 @@ static void tfrc_lh_calc_i_mean(struct tfrc_loss_hist *lh)
/**
* tfrc_lh_update_i_mean - Update the `open' loss interval I_0
* This updates I_mean as the sequence numbers increase. As a consequence, the
* open loss interval I_0 increases, hence p = W_tot/max(I_tot0, I_tot1)
* decreases, and thus there is no need to send renewed feedback.
* For recomputing p: returns `true' if p > p_prev <=> 1/p < 1/p_prev
*/
void tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
{
struct tfrc_loss_interval *cur = tfrc_lh_peek(lh);
u32 old_i_mean = lh->i_mean;
s64 len;
if (cur == NULL) /* not initialised */
return;
/* FIXME: should probably also count non-data packets (RFC 4342, 6.1) */
if (!dccp_data_packet(skb))
return;
return 0;
len = dccp_delta_seqno(cur->li_seqno, DCCP_SKB_CB(skb)->dccpd_seq) + 1;
if (len - (s64)cur->li_length <= 0) /* duplicate or reordered */
return;
return 0;
if (SUB16(dccp_hdr(skb)->dccph_ccval, cur->li_ccval) > 4)
/*
@@ -119,11 +114,14 @@ void tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
cur->li_is_closed = 1;
if (tfrc_lh_length(lh) == 1) /* due to RFC 3448, 6.3.1 */
return;
return 0;
cur->li_length = len;
tfrc_lh_calc_i_mean(lh);
return (lh->i_mean < old_i_mean);
}
EXPORT_SYMBOL_GPL(tfrc_lh_update_i_mean);
/* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */
static inline u8 tfrc_lh_is_new_loss(struct tfrc_loss_interval *cur,
@@ -140,18 +138,18 @@ static inline u8 tfrc_lh_is_new_loss(struct tfrc_loss_interval *cur,
* @sk: Used by @calc_first_li in caller-specific way (subtyping)
* Updates I_mean and returns 1 if a new interval has in fact been added to @lh.
*/
bool tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh,
u32 (*calc_first_li)(struct sock *), struct sock *sk)
int tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh,
u32 (*calc_first_li)(struct sock *), struct sock *sk)
{
struct tfrc_loss_interval *cur = tfrc_lh_peek(lh), *new;
if (cur != NULL && !tfrc_lh_is_new_loss(cur, tfrc_rx_hist_loss_prev(rh)))
return false;
return 0;
new = tfrc_lh_demand_next(lh);
if (unlikely(new == NULL)) {
DCCP_CRIT("Cannot allocate/add loss record.");
return false;
return 0;
}
new->li_seqno = tfrc_rx_hist_loss_prev(rh)->tfrchrx_seqno;
@@ -169,7 +167,7 @@ bool tfrc_lh_interval_add(struct tfrc_loss_hist *lh, struct tfrc_rx_hist *rh,
tfrc_lh_calc_i_mean(lh);
}
return true;
return 1;
}
EXPORT_SYMBOL_GPL(tfrc_lh_interval_add);

View File

@@ -67,9 +67,9 @@ static inline u8 tfrc_lh_length(struct tfrc_loss_hist *lh)
struct tfrc_rx_hist;
extern bool tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *,
extern int tfrc_lh_interval_add(struct tfrc_loss_hist *, struct tfrc_rx_hist *,
u32 (*first_li)(struct sock *), struct sock *);
extern void tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *);
extern u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *);
extern void tfrc_lh_cleanup(struct tfrc_loss_hist *lh);
#endif /* _DCCP_LI_HIST_ */

View File

@@ -40,6 +40,18 @@
#include "packet_history.h"
#include "../../dccp.h"
/**
* tfrc_tx_hist_entry - Simple singly-linked TX history list
* @next: next oldest entry (LIFO order)
* @seqno: sequence number of this entry
* @stamp: send time of packet with sequence number @seqno
*/
struct tfrc_tx_hist_entry {
struct tfrc_tx_hist_entry *next;
u64 seqno;
ktime_t stamp;
};
/*
* Transmitter History Routines
*/
@@ -61,6 +73,15 @@ void tfrc_tx_packet_history_exit(void)
}
}
static struct tfrc_tx_hist_entry *
tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno)
{
while (head != NULL && head->seqno != seqno)
head = head->next;
return head;
}
int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno)
{
struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist_slab, gfp_any());
@@ -90,6 +111,25 @@ void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp)
}
EXPORT_SYMBOL_GPL(tfrc_tx_hist_purge);
u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno,
const ktime_t now)
{
u32 rtt = 0;
struct tfrc_tx_hist_entry *packet = tfrc_tx_hist_find_entry(head, seqno);
if (packet != NULL) {
rtt = ktime_us_delta(now, packet->stamp);
/*
* Garbage-collect older (irrelevant) entries:
*/
tfrc_tx_hist_purge(&packet->next);
}
return rtt;
}
EXPORT_SYMBOL_GPL(tfrc_tx_hist_rtt);
/*
* Receiver History Routines
*/
@@ -151,31 +191,14 @@ int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb)
}
EXPORT_SYMBOL_GPL(tfrc_rx_hist_duplicate);
static void __tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b)
{
struct tfrc_rx_hist_entry *tmp = h->ring[a];
h->ring[a] = h->ring[b];
h->ring[b] = tmp;
}
static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b)
{
__tfrc_rx_hist_swap(h, tfrc_rx_hist_index(h, a),
tfrc_rx_hist_index(h, b));
}
const u8 idx_a = tfrc_rx_hist_index(h, a),
idx_b = tfrc_rx_hist_index(h, b);
struct tfrc_rx_hist_entry *tmp = h->ring[idx_a];
/**
* tfrc_rx_hist_resume_rtt_sampling - Prepare RX history for RTT sampling
* This is called after loss detection has finished, when the history entry
* with the index of `loss_count' holds the highest-received sequence number.
* RTT sampling requires this information at ring[0] (tfrc_rx_hist_sample_rtt).
*/
static inline void tfrc_rx_hist_resume_rtt_sampling(struct tfrc_rx_hist *h)
{
__tfrc_rx_hist_swap(h, 0, tfrc_rx_hist_index(h, h->loss_count));
h->loss_count = h->loss_start = 0;
h->ring[idx_a] = h->ring[idx_b];
h->ring[idx_b] = tmp;
}
/*
@@ -192,8 +215,10 @@ static void __do_track_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u64 n1)
u64 s0 = tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno,
s1 = DCCP_SKB_CB(skb)->dccpd_seq;
if (!dccp_loss_free(s0, s1, n1)) /* gap between S0 and S1 */
if (!dccp_loss_free(s0, s1, n1)) { /* gap between S0 and S1 */
h->loss_count = 1;
tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_entry(h, 1), skb, n1);
}
}
static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2)
@@ -215,7 +240,8 @@ static void __one_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n2
if (dccp_loss_free(s2, s1, n1)) {
/* hole is filled: S0, S2, and S1 are consecutive */
tfrc_rx_hist_resume_rtt_sampling(h);
h->loss_count = 0;
h->loss_start = tfrc_rx_hist_index(h, 1);
} else
/* gap between S2 and S1: just update loss_prev */
tfrc_rx_hist_entry_from_skb(tfrc_rx_hist_loss_prev(h), skb, n2);
@@ -268,7 +294,8 @@ static int __two_after_loss(struct tfrc_rx_hist *h, struct sk_buff *skb, u32 n3)
if (dccp_loss_free(s1, s2, n2)) {
/* entire hole filled by S0, S3, S1, S2 */
tfrc_rx_hist_resume_rtt_sampling(h);
h->loss_start = tfrc_rx_hist_index(h, 2);
h->loss_count = 0;
} else {
/* gap remains between S1 and S2 */
h->loss_start = tfrc_rx_hist_index(h, 1);
@@ -312,7 +339,8 @@ static void __three_after_loss(struct tfrc_rx_hist *h)
if (dccp_loss_free(s2, s3, n3)) {
/* no gap between S2 and S3: entire hole is filled */
tfrc_rx_hist_resume_rtt_sampling(h);
h->loss_start = tfrc_rx_hist_index(h, 3);
h->loss_count = 0;
} else {
/* gap between S2 and S3 */
h->loss_start = tfrc_rx_hist_index(h, 2);
@@ -326,13 +354,13 @@ static void __three_after_loss(struct tfrc_rx_hist *h)
}
/**
* tfrc_rx_congestion_event - Loss detection and further processing
* @h: The non-empty RX history object
* @lh: Loss Intervals database to update
* @skb: Currently received packet
* @ndp: The NDP count belonging to @skb
* @first_li: Caller-dependent computation of first loss interval in @lh
* @sk: Used by @calc_first_li (see tfrc_lh_interval_add)
* tfrc_rx_handle_loss - Loss detection and further processing
* @h: The non-empty RX history object
* @lh: Loss Intervals database to update
* @skb: Currently received packet
* @ndp: The NDP count belonging to @skb
* @calc_first_li: Caller-dependent computation of first loss interval in @lh
* @sk: Used by @calc_first_li (see tfrc_lh_interval_add)
* Chooses action according to pending loss, updates LI database when a new
* loss was detected, and does required post-processing. Returns 1 when caller
* should send feedback, 0 otherwise.
@@ -340,20 +368,15 @@ static void __three_after_loss(struct tfrc_rx_hist *h)
* records accordingly, the caller should not perform any more RX history
* operations when loss_count is greater than 0 after calling this function.
*/
bool tfrc_rx_congestion_event(struct tfrc_rx_hist *h,
struct tfrc_loss_hist *lh,
struct sk_buff *skb, const u64 ndp,
u32 (*first_li)(struct sock *), struct sock *sk)
int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
struct tfrc_loss_hist *lh,
struct sk_buff *skb, const u64 ndp,
u32 (*calc_first_li)(struct sock *), struct sock *sk)
{
bool new_event = false;
if (tfrc_rx_hist_duplicate(h, skb))
return 0;
int is_new_loss = 0;
if (h->loss_count == 0) {
__do_track_loss(h, skb, ndp);
tfrc_rx_hist_sample_rtt(h, skb);
tfrc_rx_hist_add_packet(h, skb, ndp);
} else if (h->loss_count == 1) {
__one_after_loss(h, skb, ndp);
} else if (h->loss_count != 2) {
@@ -362,57 +385,34 @@ bool tfrc_rx_congestion_event(struct tfrc_rx_hist *h,
/*
* Update Loss Interval database and recycle RX records
*/
new_event = tfrc_lh_interval_add(lh, h, first_li, sk);
is_new_loss = tfrc_lh_interval_add(lh, h, calc_first_li, sk);
__three_after_loss(h);
}
/*
* Update moving-average of `s' and the sum of received payload bytes.
*/
if (dccp_data_packet(skb)) {
const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4;
h->packet_size = tfrc_ewma(h->packet_size, payload, 9);
h->bytes_recvd += payload;
}
/* RFC 3448, 6.1: update I_0, whose growth implies p <= p_prev */
if (!new_event)
tfrc_lh_update_i_mean(lh, skb);
return new_event;
return is_new_loss;
}
EXPORT_SYMBOL_GPL(tfrc_rx_congestion_event);
EXPORT_SYMBOL_GPL(tfrc_rx_handle_loss);
/* Compute the sending rate X_recv measured between feedback intervals */
u32 tfrc_rx_hist_x_recv(struct tfrc_rx_hist *h, const u32 last_x_recv)
int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h)
{
u64 bytes = h->bytes_recvd, last_rtt = h->rtt_estimate;
s64 delta = ktime_to_us(net_timedelta(h->bytes_start));
int i;
WARN_ON(delta <= 0);
/*
* Ensure that the sampling interval for X_recv is at least one RTT,
* by extending the sampling interval backwards in time, over the last
* R_(m-1) seconds, as per rfc3448bis-06, 6.2.
* To reduce noise (e.g. when the RTT changes often), this is only
* done when delta is smaller than RTT/2.
*/
if (last_x_recv > 0 && delta < last_rtt/2) {
tfrc_pr_debug("delta < RTT ==> %ld us < %u us\n",
(long)delta, (unsigned)last_rtt);
delta = (bytes ? delta : 0) + last_rtt;
bytes += div_u64((u64)last_x_recv * last_rtt, USEC_PER_SEC);
for (i = 0; i <= TFRC_NDUPACK; i++) {
h->ring[i] = kmem_cache_alloc(tfrc_rx_hist_slab, GFP_ATOMIC);
if (h->ring[i] == NULL)
goto out_free;
}
if (unlikely(bytes == 0)) {
DCCP_WARN("X_recv == 0, using old value of %u\n", last_x_recv);
return last_x_recv;
h->loss_count = h->loss_start = 0;
return 0;
out_free:
while (i-- != 0) {
kmem_cache_free(tfrc_rx_hist_slab, h->ring[i]);
h->ring[i] = NULL;
}
return scaled_div32(bytes, delta);
return -ENOBUFS;
}
EXPORT_SYMBOL_GPL(tfrc_rx_hist_x_recv);
EXPORT_SYMBOL_GPL(tfrc_rx_hist_alloc);
void tfrc_rx_hist_purge(struct tfrc_rx_hist *h)
{
@@ -426,81 +426,73 @@ void tfrc_rx_hist_purge(struct tfrc_rx_hist *h)
}
EXPORT_SYMBOL_GPL(tfrc_rx_hist_purge);
static int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h)
/**
* tfrc_rx_hist_rtt_last_s - reference entry to compute RTT samples against
*/
static inline struct tfrc_rx_hist_entry *
tfrc_rx_hist_rtt_last_s(const struct tfrc_rx_hist *h)
{
int i;
memset(h, 0, sizeof(*h));
for (i = 0; i <= TFRC_NDUPACK; i++) {
h->ring[i] = kmem_cache_alloc(tfrc_rx_hist_slab, GFP_ATOMIC);
if (h->ring[i] == NULL) {
tfrc_rx_hist_purge(h);
return -ENOBUFS;
}
}
return 0;
return h->ring[0];
}
int tfrc_rx_hist_init(struct tfrc_rx_hist *h, struct sock *sk)
/**
* tfrc_rx_hist_rtt_prev_s: previously suitable (wrt rtt_last_s) RTT-sampling entry
*/
static inline struct tfrc_rx_hist_entry *
tfrc_rx_hist_rtt_prev_s(const struct tfrc_rx_hist *h)
{
if (tfrc_rx_hist_alloc(h))
return -ENOBUFS;
/*
* Initialise first entry with GSR to start loss detection as early as
* possible. Code using this must not use any other fields. The entry
* will be overwritten once the CCID updates its received packets.
*/
tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno = dccp_sk(sk)->dccps_gsr;
return 0;
return h->ring[h->rtt_sample_prev];
}
EXPORT_SYMBOL_GPL(tfrc_rx_hist_init);
/**
* tfrc_rx_hist_sample_rtt - Sample RTT from timestamp / CCVal
* Based on ideas presented in RFC 4342, 8.1. This function expects that no loss
* is pending and uses the following history entries (via rtt_sample_prev):
* - h->ring[0] contains the most recent history entry prior to @skb;
* - h->ring[1] is an unused `dummy' entry when the current difference is 0;
* Based on ideas presented in RFC 4342, 8.1. Returns 0 if it was not able
* to compute a sample with given data - calling function should check this.
*/
void tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb)
u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb)
{
struct tfrc_rx_hist_entry *last = h->ring[0];
u32 sample, delta_v;
u32 sample = 0,
delta_v = SUB16(dccp_hdr(skb)->dccph_ccval,
tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
/*
* When not to sample:
* - on non-data packets
* (RFC 4342, 8.1: CCVal only fully defined for data packets);
* - when no data packets have been received yet
* (FIXME: using sampled packet size as indicator here);
* - as long as there are gaps in the sequence space (pending loss).
*/
if (!dccp_data_packet(skb) || h->packet_size == 0 ||
tfrc_rx_hist_loss_pending(h))
return;
if (delta_v < 1 || delta_v > 4) { /* unsuitable CCVal delta */
if (h->rtt_sample_prev == 2) { /* previous candidate stored */
sample = SUB16(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval,
tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
if (sample)
sample = 4 / sample *
ktime_us_delta(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_tstamp,
tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp);
else /*
* FIXME: This condition is in principle not
* possible but occurs when CCID is used for
* two-way data traffic. I have tried to trace
* it, but the cause does not seem to be here.
*/
DCCP_BUG("please report to dccp@vger.kernel.org"
" => prev = %u, last = %u",
tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval,
tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
} else if (delta_v < 1) {
h->rtt_sample_prev = 1;
goto keep_ref_for_next_time;
}
h->rtt_sample_prev = 0; /* reset previous candidate */
delta_v = SUB16(dccp_hdr(skb)->dccph_ccval, last->tfrchrx_ccval);
if (delta_v == 0) { /* less than RTT/4 difference */
h->rtt_sample_prev = 1;
return;
} else if (delta_v == 4) /* optimal match */
sample = ktime_to_us(net_timedelta(tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp));
else { /* suboptimal match */
h->rtt_sample_prev = 2;
goto keep_ref_for_next_time;
}
sample = dccp_sane_rtt(ktime_to_us(net_timedelta(last->tfrchrx_tstamp)));
if (delta_v <= 4) /* between RTT/4 and RTT */
sample *= 4 / delta_v;
else if (!(sample < h->rtt_estimate && sample > h->rtt_estimate/2))
/*
* Optimisation: CCVal difference is greater than 1 RTT, yet the
* sample is less than the local RTT estimate; which means that
* the RTT estimate is too high.
* To avoid noise, it is not done if the sample is below RTT/2.
*/
return;
if (unlikely(sample > DCCP_SANE_RTT_MAX)) {
DCCP_WARN("RTT sample %u too large, using max\n", sample);
sample = DCCP_SANE_RTT_MAX;
}
/* Use a lower weight than usual to increase responsiveness */
h->rtt_estimate = tfrc_ewma(h->rtt_estimate, sample, 5);
h->rtt_sample_prev = 0; /* use current entry as next reference */
keep_ref_for_next_time:
return sample;
}
EXPORT_SYMBOL_GPL(tfrc_rx_hist_sample_rtt);

View File

@@ -40,28 +40,12 @@
#include <linux/slab.h>
#include "tfrc.h"
/**
* tfrc_tx_hist_entry - Simple singly-linked TX history list
* @next: next oldest entry (LIFO order)
* @seqno: sequence number of this entry
* @stamp: send time of packet with sequence number @seqno
*/
struct tfrc_tx_hist_entry {
struct tfrc_tx_hist_entry *next;
u64 seqno;
ktime_t stamp;
};
static inline struct tfrc_tx_hist_entry *
tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno)
{
while (head != NULL && head->seqno != seqno)
head = head->next;
return head;
}
struct tfrc_tx_hist_entry;
extern int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno);
extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp);
extern u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head,
const u64 seqno, const ktime_t now);
/* Subtraction a-b modulo-16, respects circular wrap-around */
#define SUB16(a, b) (((a) + 16 - (b)) & 0xF)
@@ -91,22 +75,12 @@ struct tfrc_rx_hist_entry {
* @loss_count: Number of entries in circular history
* @loss_start: Movable index (for loss detection)
* @rtt_sample_prev: Used during RTT sampling, points to candidate entry
* @rtt_estimate: Receiver RTT estimate
* @packet_size: Packet size in bytes (as per RFC 3448, 3.1)
* @bytes_recvd: Number of bytes received since @bytes_start
* @bytes_start: Start time for counting @bytes_recvd
*/
struct tfrc_rx_hist {
struct tfrc_rx_hist_entry *ring[TFRC_NDUPACK + 1];
u8 loss_count:2,
loss_start:2;
/* Receiver RTT sampling */
#define rtt_sample_prev loss_start
u32 rtt_estimate;
/* Receiver sampling of application payload lengths */
u32 packet_size,
bytes_recvd;
ktime_t bytes_start;
};
/**
@@ -150,50 +124,20 @@ static inline bool tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h)
return h->loss_count > 0;
}
/*
* Accessor functions to retrieve parameters sampled by the RX history
*/
static inline u32 tfrc_rx_hist_packet_size(const struct tfrc_rx_hist *h)
{
if (h->packet_size == 0) {
DCCP_WARN("No sample for s, using fallback\n");
return TCP_MIN_RCVMSS;
}
return h->packet_size;
}
static inline u32 tfrc_rx_hist_rtt(const struct tfrc_rx_hist *h)
{
if (h->rtt_estimate == 0) {
DCCP_WARN("No RTT estimate available, using fallback RTT\n");
return DCCP_FALLBACK_RTT;
}
return h->rtt_estimate;
}
static inline void tfrc_rx_hist_restart_byte_counter(struct tfrc_rx_hist *h)
{
h->bytes_recvd = 0;
h->bytes_start = ktime_get_real();
}
extern u32 tfrc_rx_hist_x_recv(struct tfrc_rx_hist *h, const u32 last_x_recv);
extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
const struct sk_buff *skb, const u64 ndp);
extern int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb);
struct tfrc_loss_hist;
extern bool tfrc_rx_congestion_event(struct tfrc_rx_hist *h,
struct tfrc_loss_hist *lh,
struct sk_buff *skb, const u64 ndp,
u32 (*first_li)(struct sock *sk),
struct sock *sk);
extern void tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h,
const struct sk_buff *skb);
extern int tfrc_rx_hist_init(struct tfrc_rx_hist *h, struct sock *sk);
extern int tfrc_rx_handle_loss(struct tfrc_rx_hist *h,
struct tfrc_loss_hist *lh,
struct sk_buff *skb, const u64 ndp,
u32 (*first_li)(struct sock *sk),
struct sock *sk);
extern u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h,
const struct sk_buff *skb);
extern int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h);
extern void tfrc_rx_hist_purge(struct tfrc_rx_hist *h);
#endif /* _DCCP_PKT_HIST_ */

View File

@@ -47,21 +47,6 @@ static inline u32 scaled_div32(u64 a, u64 b)
return result;
}
/**
* tfrc_scaled_sqrt - Compute scaled integer sqrt(x) for 0 < x < 2^22-1
* Uses scaling to improve accuracy of the integer approximation of sqrt(). The
* scaling factor of 2^10 limits the maximum @sample to 4e6; this is okay for
* clamped RTT samples (dccp_sample_rtt).
* Should best be used for expressions of type sqrt(x)/sqrt(y), since then the
* scaling factor is neutralised. For this purpose, it avoids returning zero.
*/
static inline u16 tfrc_scaled_sqrt(const u32 sample)
{
const unsigned long non_zero_sample = sample ? : 1;
return int_sqrt(non_zero_sample << 10);
}
/**
* tfrc_ewma - Exponentially weighted moving average
* @weight: Weight to be used as damping factor, in units of 1/10
@@ -73,7 +58,6 @@ static inline u32 tfrc_ewma(const u32 avg, const u32 newval, const u8 weight)
extern u32 tfrc_calc_x(u16 s, u32 R, u32 p);
extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue);
extern u32 tfrc_invert_loss_event_rate(u32 loss_event_rate);
extern int tfrc_tx_packet_history_init(void);
extern void tfrc_tx_packet_history_exit(void);

View File

@@ -632,16 +632,8 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p)
if (p <= TFRC_CALC_X_SPLIT) { /* 0.0000 < p <= 0.05 */
if (p < TFRC_SMALLEST_P) { /* 0.0000 < p < 0.0001 */
/*
* In the congestion-avoidance phase p decays towards 0
* when there are no further losses, so this case is
* natural. Truncating to p_min = 0.01% means that the
* maximum achievable throughput is limited to about
* X_calc_max = 122.4 * s/RTT (see RFC 3448, 3.1); e.g.
* with s=1500 bytes, RTT=0.01 s: X_calc_max = 147 Mbps.
*/
tfrc_pr_debug("Value of p (%d) below resolution. "
"Substituting %d\n", p, TFRC_SMALLEST_P);
DCCP_WARN("Value of p (%d) below resolution. "
"Substituting %d\n", p, TFRC_SMALLEST_P);
index = 0;
} else /* 0.0001 <= p <= 0.05 */
index = p/TFRC_SMALLEST_P - 1;
@@ -666,6 +658,7 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p)
result = scaled_div(s, R);
return scaled_div32(result, f);
}
EXPORT_SYMBOL_GPL(tfrc_calc_x);
/**
@@ -700,19 +693,5 @@ u32 tfrc_calc_x_reverse_lookup(u32 fvalue)
index = tfrc_binsearch(fvalue, 0);
return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE;
}
EXPORT_SYMBOL_GPL(tfrc_calc_x_reverse_lookup);
/**
* tfrc_invert_loss_event_rate - Compute p so that 10^6 corresponds to 100%
* When @loss_event_rate is large, there is a chance that p is truncated to 0.
* To avoid re-entering slow-start in that case, we set p = TFRC_SMALLEST_P > 0.
*/
u32 tfrc_invert_loss_event_rate(u32 loss_event_rate)
{
if (loss_event_rate == UINT_MAX) /* see RFC 4342, 8.5 */
return 0;
if (unlikely(loss_event_rate == 0)) /* map 1/0 into 100% */
return 1000000;
return max_t(u32, scaled_div(1, loss_event_rate), TFRC_SMALLEST_P);
}
EXPORT_SYMBOL_GPL(tfrc_invert_loss_event_rate);
EXPORT_SYMBOL_GPL(tfrc_calc_x_reverse_lookup);