Changeset View
Changeset View
Standalone View
Standalone View
sys/netinet/tcp_sack.c
Show First 20 Lines • Show All 349 Lines • ▼ Show 20 Lines | |||||
* information which is slightly different than rfc6675. | * information which is slightly different than rfc6675. | ||||
*/ | */ | ||||
int | int | ||||
tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) | tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) | ||||
{ | { | ||||
struct sackhole *cur, *temp; | struct sackhole *cur, *temp; | ||||
struct sackblk sack, sack_blocks[TCP_MAX_SACK + 1], *sblkp; | struct sackblk sack, sack_blocks[TCP_MAX_SACK + 1], *sblkp; | ||||
int i, j, num_sack_blks, sack_changed; | int i, j, num_sack_blks, sack_changed; | ||||
int delivered_data, left_edge_delta; | |||||
INP_WLOCK_ASSERT(tp->t_inpcb); | INP_WLOCK_ASSERT(tp->t_inpcb); | ||||
num_sack_blks = 0; | num_sack_blks = 0; | ||||
sack_changed = 0; | sack_changed = 0; | ||||
delivered_data = 0; | |||||
left_edge_delta = 0; | |||||
/* | /* | ||||
* If SND.UNA will be advanced by SEG.ACK, and if SACK holes exist, | * If SND.UNA will be advanced by SEG.ACK, and if SACK holes exist, | ||||
* treat [SND.UNA, SEG.ACK) as if it is a SACK block. | * treat [SND.UNA, SEG.ACK) as if it is a SACK block. | ||||
* Account changes to SND.UNA always in delivered data. | |||||
*/ | */ | ||||
if (SEQ_LT(tp->snd_una, th_ack) && !TAILQ_EMPTY(&tp->snd_holes)) { | if SEQ_LT(tp->snd_una, th_ack) { | ||||
// delivered_data = th_ack - tp->snd_una; | |||||
left_edge_delta = th_ack - tp->snd_una; | |||||
if(!TAILQ_EMPTY(&tp->snd_holes)) { | |||||
sack_blocks[num_sack_blks].start = tp->snd_una; | sack_blocks[num_sack_blks].start = tp->snd_una; | ||||
sack_blocks[num_sack_blks++].end = th_ack; | sack_blocks[num_sack_blks++].end = th_ack; | ||||
} | } | ||||
} | |||||
/* | /* | ||||
* Append received valid SACK blocks to sack_blocks[], but only if we | * Append received valid SACK blocks to sack_blocks[], but only if we | ||||
* received new blocks from the other side. | * received new blocks from the other side. | ||||
*/ | */ | ||||
if (to->to_flags & TOF_SACK) { | if (to->to_flags & TOF_SACK) { | ||||
tp->sackhint.sacked_bytes = 0; /* reset */ | tp->sackhint.sacked_bytes_old = 0; /* reset */ | ||||
for (i = 0; i < to->to_nsacks; i++) { | for (i = 0; i < to->to_nsacks; i++) { | ||||
bcopy((to->to_sacks + i * TCPOLEN_SACK), | bcopy((to->to_sacks + i * TCPOLEN_SACK), | ||||
&sack, sizeof(sack)); | &sack, sizeof(sack)); | ||||
sack.start = ntohl(sack.start); | sack.start = ntohl(sack.start); | ||||
sack.end = ntohl(sack.end); | sack.end = ntohl(sack.end); | ||||
if (SEQ_GT(sack.end, sack.start) && | if (SEQ_GT(sack.end, sack.start) && | ||||
SEQ_GT(sack.start, tp->snd_una) && | SEQ_GT(sack.start, tp->snd_una) && | ||||
SEQ_GT(sack.start, th_ack) && | SEQ_GT(sack.start, th_ack) && | ||||
SEQ_LT(sack.start, tp->snd_max) && | SEQ_LT(sack.start, tp->snd_max) && | ||||
SEQ_GT(sack.end, tp->snd_una) && | SEQ_GT(sack.end, tp->snd_una) && | ||||
SEQ_LEQ(sack.end, tp->snd_max)) { | SEQ_LEQ(sack.end, tp->snd_max)) { | ||||
sack_blocks[num_sack_blks++] = sack; | sack_blocks[num_sack_blks++] = sack; | ||||
tp->sackhint.sacked_bytes += | tp->sackhint.sacked_bytes_old += | ||||
(sack.end-sack.start); | (sack.end - sack.start); | ||||
} | } | ||||
} | } | ||||
} | } | ||||
/* | /* | ||||
* Return if SND.UNA is not advanced and no valid SACK block is | * Return if SND.UNA is not advanced and no valid SACK block is | ||||
* received. | * received. | ||||
*/ | */ | ||||
if (num_sack_blks == 0) | if (num_sack_blks == 0) | ||||
return (sack_changed); | return (sack_changed); | ||||
/* | /* | ||||
* Sort the SACK blocks so we can update the scoreboard with just one | * Sort the SACK blocks so we can update the scoreboard with just one | ||||
* pass. The overhead of sorting up to 4+1 elements is less than | * pass. The overhead of sorting up to 4+1 elements is less than | ||||
* making up to 4+1 passes over the scoreboard. | * making up to 4+1 passes over the scoreboard. | ||||
*/ | */ | ||||
for (i = 0; i < num_sack_blks; i++) { | for (i = 0; i < num_sack_blks; i++) { | ||||
for (j = i + 1; j < num_sack_blks; j++) { | for (j = i + 1; j < num_sack_blks; j++) { | ||||
if (SEQ_GT(sack_blocks[i].end, sack_blocks[j].end)) { | if (SEQ_GT(sack_blocks[i].end, sack_blocks[j].end)) { | ||||
sack = sack_blocks[i]; | sack = sack_blocks[i]; | ||||
sack_blocks[i] = sack_blocks[j]; | sack_blocks[i] = sack_blocks[j]; | ||||
sack_blocks[j] = sack; | sack_blocks[j] = sack; | ||||
} | } | ||||
} | } | ||||
} | } | ||||
if (TAILQ_EMPTY(&tp->snd_holes)) | if (TAILQ_EMPTY(&tp->snd_holes)) { | ||||
/* | /* | ||||
* Empty scoreboard. Need to initialize snd_fack (it may be | * Empty scoreboard. Need to initialize snd_fack (it may be | ||||
* uninitialized or have a bogus value). Scoreboard holes | * uninitialized or have a bogus value). Scoreboard holes | ||||
* (from the sack blocks received) are created later below | * (from the sack blocks received) are created later below | ||||
* (in the logic that adds holes to the tail of the | * (in the logic that adds holes to the tail of the | ||||
* scoreboard). | * scoreboard). | ||||
*/ | */ | ||||
tp->snd_fack = SEQ_MAX(tp->snd_una, th_ack); | tp->snd_fack = SEQ_MAX(tp->snd_una, th_ack); | ||||
tp->sackhint.sacked_bytes = 0; /* reset */ | |||||
} | |||||
/* | /* | ||||
* In the while-loop below, incoming SACK blocks (sack_blocks[]) and | * In the while-loop below, incoming SACK blocks (sack_blocks[]) and | ||||
* SACK holes (snd_holes) are traversed from their tails with just | * SACK holes (snd_holes) are traversed from their tails with just | ||||
* one pass in order to reduce the number of compares especially when | * one pass in order to reduce the number of compares especially when | ||||
* the bandwidth-delay product is large. | * the bandwidth-delay product is large. | ||||
* | * | ||||
* Note: Typically, in the first RTT of SACK recovery, the highest | * Note: Typically, in the first RTT of SACK recovery, the highest | ||||
* three or four SACK blocks with the same ack number are received. | * three or four SACK blocks with the same ack number are received. | ||||
* In the second RTT, if retransmitted data segments are not lost, | * In the second RTT, if retransmitted data segments are not lost, | ||||
* the highest three or four SACK blocks with ack number advancing | * the highest three or four SACK blocks with ack number advancing | ||||
* are received. | * are received. | ||||
*/ | */ | ||||
sblkp = &sack_blocks[num_sack_blks - 1]; /* Last SACK block */ | sblkp = &sack_blocks[num_sack_blks - 1]; /* Last SACK block */ | ||||
tp->sackhint.last_sack_ack = sblkp->end; | tp->sackhint.last_sack_ack = sblkp->end; | ||||
if (SEQ_LT(tp->snd_fack, sblkp->start)) { | if (SEQ_LT(tp->snd_fack, sblkp->start)) { | ||||
/* | /* | ||||
* The highest SACK block is beyond fack. Append new SACK | * The highest SACK block is beyond fack. Append new SACK | ||||
* hole at the tail. If the second or later highest SACK | * hole at the tail. If the second or later highest SACK | ||||
* blocks are also beyond the current fack, they will be | * blocks are also beyond the current fack, they will be | ||||
* inserted by way of hole splitting in the while-loop below. | * inserted by way of hole splitting in the while-loop below. | ||||
*/ | */ | ||||
temp = tcp_sackhole_insert(tp, tp->snd_fack,sblkp->start,NULL); | temp = tcp_sackhole_insert(tp, tp->snd_fack,sblkp->start,NULL); | ||||
if (temp != NULL) { | if (temp != NULL) { | ||||
delivered_data += sblkp->end - sblkp->start; | |||||
tp->snd_fack = sblkp->end; | tp->snd_fack = sblkp->end; | ||||
/* Go to the previous sack block. */ | /* Go to the previous sack block. */ | ||||
sblkp--; | sblkp--; | ||||
sack_changed = 1; | sack_changed = 1; | ||||
} else { | } else { | ||||
/* | /* | ||||
* We failed to add a new hole based on the current | * We failed to add a new hole based on the current | ||||
* sack block. Skip over all the sack blocks that | * sack block. Skip over all the sack blocks that | ||||
* fall completely to the right of snd_fack and | * fall completely to the right of snd_fack and | ||||
* proceed to trim the scoreboard based on the | * proceed to trim the scoreboard based on the | ||||
* remaining sack blocks. This also trims the | * remaining sack blocks. This also trims the | ||||
* scoreboard for th_ack (which is sack_blocks[0]). | * scoreboard for th_ack (which is sack_blocks[0]). | ||||
*/ | */ | ||||
while (sblkp >= sack_blocks && | while (sblkp >= sack_blocks && | ||||
SEQ_LT(tp->snd_fack, sblkp->start)) | SEQ_LT(tp->snd_fack, sblkp->start)) | ||||
sblkp--; | sblkp--; | ||||
if (sblkp >= sack_blocks && | if (sblkp >= sack_blocks && | ||||
SEQ_LT(tp->snd_fack, sblkp->end)) | SEQ_LT(tp->snd_fack, sblkp->end)) | ||||
delivered_data += sblkp->end - tp->snd_fack; | |||||
rscheff: brackets in the if clause missing, sack_changed needs to be set too. | |||||
tp->snd_fack = sblkp->end; | tp->snd_fack = sblkp->end; | ||||
} | } | ||||
} else if (SEQ_LT(tp->snd_fack, sblkp->end)) { | } else if (SEQ_LT(tp->snd_fack, sblkp->end)) { | ||||
/* fack is advanced. */ | /* fack is advanced. */ | ||||
delivered_data += sblkp->end - tp->snd_fack; | |||||
tp->snd_fack = sblkp->end; | tp->snd_fack = sblkp->end; | ||||
sack_changed = 1; | sack_changed = 1; | ||||
} | } | ||||
cur = TAILQ_LAST(&tp->snd_holes, sackhole_head); /* Last SACK hole. */ | cur = TAILQ_LAST(&tp->snd_holes, sackhole_head); /* Last SACK hole. */ | ||||
/* | /* | ||||
* Since the incoming sack blocks are sorted, we can process them | * Since the incoming sack blocks are sorted, we can process them | ||||
* making one sweep of the scoreboard. | * making one sweep of the scoreboard. | ||||
*/ | */ | ||||
Show All 17 Lines | while (sblkp >= sack_blocks && cur != NULL) { | ||||
tp->sackhint.sack_bytes_rexmit -= (cur->rxmit - cur->start); | tp->sackhint.sack_bytes_rexmit -= (cur->rxmit - cur->start); | ||||
KASSERT(tp->sackhint.sack_bytes_rexmit >= 0, | KASSERT(tp->sackhint.sack_bytes_rexmit >= 0, | ||||
("sackhint bytes rtx >= 0")); | ("sackhint bytes rtx >= 0")); | ||||
sack_changed = 1; | sack_changed = 1; | ||||
if (SEQ_LEQ(sblkp->start, cur->start)) { | if (SEQ_LEQ(sblkp->start, cur->start)) { | ||||
/* Data acks at least the beginning of hole. */ | /* Data acks at least the beginning of hole. */ | ||||
if (SEQ_GEQ(sblkp->end, cur->end)) { | if (SEQ_GEQ(sblkp->end, cur->end)) { | ||||
/* Acks entire hole, so delete hole. */ | /* Acks entire hole, so delete hole. */ | ||||
delivered_data += (cur->end - cur->start); | |||||
temp = cur; | temp = cur; | ||||
cur = TAILQ_PREV(cur, sackhole_head, scblink); | cur = TAILQ_PREV(cur, sackhole_head, scblink); | ||||
tcp_sackhole_remove(tp, temp); | tcp_sackhole_remove(tp, temp); | ||||
/* | /* | ||||
* The sack block may ack all or part of the | * The sack block may ack all or part of the | ||||
* next hole too, so continue onto the next | * next hole too, so continue onto the next | ||||
* hole. | * hole. | ||||
*/ | */ | ||||
continue; | continue; | ||||
} else { | } else { | ||||
/* Move start of hole forward. */ | /* Move start of hole forward. */ | ||||
delivered_data += (sblkp->end - cur->start); | |||||
cur->start = sblkp->end; | cur->start = sblkp->end; | ||||
cur->rxmit = SEQ_MAX(cur->rxmit, cur->start); | cur->rxmit = SEQ_MAX(cur->rxmit, cur->start); | ||||
} | } | ||||
} else { | } else { | ||||
/* Data acks at least the end of hole. */ | /* Data acks at least the end of hole. */ | ||||
if (SEQ_GEQ(sblkp->end, cur->end)) { | if (SEQ_GEQ(sblkp->end, cur->end)) { | ||||
/* Move end of hole backward. */ | /* Move end of hole backward. */ | ||||
delivered_data += (cur->end - sblkp->start); | |||||
cur->end = sblkp->start; | cur->end = sblkp->start; | ||||
cur->rxmit = SEQ_MIN(cur->rxmit, cur->end); | cur->rxmit = SEQ_MIN(cur->rxmit, cur->end); | ||||
} else { | } else { | ||||
/* | /* | ||||
* ACKs some data in middle of a hole; need | * ACKs some data in middle of a hole; need | ||||
* to split current hole | * to split current hole | ||||
*/ | */ | ||||
temp = tcp_sackhole_insert(tp, sblkp->end, | temp = tcp_sackhole_insert(tp, sblkp->end, | ||||
cur->end, cur); | cur->end, cur); | ||||
if (temp != NULL) { | if (temp != NULL) { | ||||
if (SEQ_GT(cur->rxmit, temp->rxmit)) { | if (SEQ_GT(cur->rxmit, temp->rxmit)) { | ||||
temp->rxmit = cur->rxmit; | temp->rxmit = cur->rxmit; | ||||
tp->sackhint.sack_bytes_rexmit | tp->sackhint.sack_bytes_rexmit | ||||
+= (temp->rxmit | += (temp->rxmit | ||||
- temp->start); | - temp->start); | ||||
} | } | ||||
cur->end = sblkp->start; | cur->end = sblkp->start; | ||||
cur->rxmit = SEQ_MIN(cur->rxmit, | cur->rxmit = SEQ_MIN(cur->rxmit, | ||||
cur->end); | cur->end); | ||||
delivered_data += (sblkp->end - sblkp->start); | |||||
} | } | ||||
} | } | ||||
} | } | ||||
tp->sackhint.sack_bytes_rexmit += (cur->rxmit - cur->start); | tp->sackhint.sack_bytes_rexmit += (cur->rxmit - cur->start); | ||||
/* | /* | ||||
* Testing sblkp->start against cur->start tells us whether | * Testing sblkp->start against cur->start tells us whether | ||||
* we're done with the sack block or the sack hole. | * we're done with the sack block or the sack hole. | ||||
* Accordingly, we advance one or the other. | * Accordingly, we advance one or the other. | ||||
*/ | */ | ||||
if (SEQ_LEQ(sblkp->start, cur->start)) | if (SEQ_LEQ(sblkp->start, cur->start)) | ||||
cur = TAILQ_PREV(cur, sackhole_head, scblink); | cur = TAILQ_PREV(cur, sackhole_head, scblink); | ||||
else | else | ||||
sblkp--; | sblkp--; | ||||
} | } | ||||
tp->sackhint.delivered_data = delivered_data; | |||||
tp->sackhint.sacked_bytes += delivered_data - left_edge_delta; | |||||
if (!(to->to_flags & TOF_SACK)) | |||||
Done Inline ActionsMove this to tcp_input:2670, as sack_changed should semantically caputer any changes to the scoreboard. Which does happen on partial / cumulative acks. rscheff: Move this to tcp_input:2670, as sack_changed should semantically caputer any changes to the… | |||||
/* | |||||
* If this ACK did not contain any | |||||
* SACK blocks, any only moved the | |||||
* left edge right, it is a pure | |||||
* cumulative ACK. Do not count | |||||
* DupAck for this. Also required | |||||
* for RFC6675 rescue retransmission. | |||||
*/ | |||||
sack_changed = 0; | |||||
return (sack_changed); | return (sack_changed); | ||||
} | } | ||||
/* | /* | ||||
* Free all SACK holes to clear the scoreboard. | * Free all SACK holes to clear the scoreboard. | ||||
*/ | */ | ||||
void | void | ||||
tcp_free_sackholes(struct tcpcb *tp) | tcp_free_sackholes(struct tcpcb *tp) | ||||
Show All 29 Lines | tcp_sack_partialack(struct tcpcb *tp, struct tcphdr *th) | ||||
/* Send one or 2 segments based on how much new data was acked. */ | /* Send one or 2 segments based on how much new data was acked. */ | ||||
if ((BYTES_THIS_ACK(tp, th) / tp->t_maxseg) >= 2) | if ((BYTES_THIS_ACK(tp, th) / tp->t_maxseg) >= 2) | ||||
num_segs = 2; | num_segs = 2; | ||||
tp->snd_cwnd = (tp->sackhint.sack_bytes_rexmit + | tp->snd_cwnd = (tp->sackhint.sack_bytes_rexmit + | ||||
(tp->snd_nxt - tp->sack_newdata) + num_segs * tp->t_maxseg); | (tp->snd_nxt - tp->sack_newdata) + num_segs * tp->t_maxseg); | ||||
if (tp->snd_cwnd > tp->snd_ssthresh) | if (tp->snd_cwnd > tp->snd_ssthresh) | ||||
tp->snd_cwnd = tp->snd_ssthresh; | tp->snd_cwnd = tp->snd_ssthresh; | ||||
tp->t_flags |= TF_ACKNOW; | tp->t_flags |= TF_ACKNOW; | ||||
/* | |||||
* RFC6675 rescue retransmission | |||||
* Add a hole between th_ack (una is not yet set) and snd_max, | |||||
* if this was a pure cumulative ACK and no data was send beyond | |||||
* recovery point. Since the data in the socket has not been freed | |||||
* at this point, this may still happen when more new data is ready to | |||||
* send. The rescue retransmission may be slightly premature | |||||
* compared to RFC6675. | |||||
*/ | |||||
if ((V_tcp_do_rfc6675_pipe) && | |||||
SEQ_LT(th->th_ack, tp->snd_recover) && | |||||
(tp->snd_recover == tp->snd_max) && | |||||
TAILQ_EMPTY(&tp->snd_holes) && | |||||
(tp->sackhint.delivered_data > 0)) { | |||||
struct sackhole *hole; | |||||
int maxseg = tcp_maxseg(tp); | |||||
hole = tcp_sackhole_insert(tp, SEQ_MAX(th->th_ack, tp->snd_max - maxseg), tp->snd_max, NULL); | |||||
// if ((tp->snd_max - th->th_ack) > maxseg) { // do this with PRR to avoid bursts. | |||||
/* | |||||
* have to insert lower hole after | |||||
* rescue retransmission, for | |||||
* sackhint updates to pick this up | |||||
*/ | |||||
// hole = tcp_sackhole_insert(tp, th->th_ack, tp->snd_max - maxseg, NULL); | |||||
// log(LOG_DEBUG,"low hole %u - %u <- %u\n", hole->start - tp->iss, hole->end - tp->iss, hole->rxmit - tp->iss); | |||||
// } | |||||
log(LOG_DEBUG,"high hole %u - %u <- %u\n", tp->sackhint.nexthole->start - tp->iss, tp->sackhint.nexthole->end - tp->iss, tp->sackhint.nexthole->rxmit - tp->iss); | |||||
log(LOG_DEBUG,"nexthole: %p (%u) hole: %p (%u)\n", | |||||
(void *)tp->sackhint.nexthole, tp->sackhint.nexthole->start - tp->iss, | |||||
(void *)hole, hole->start - tp->iss); | |||||
} | |||||
(void) tp->t_fb->tfb_tcp_output(tp); | (void) tp->t_fb->tfb_tcp_output(tp); | ||||
struct socket *so = tp->t_inpcb->inp_socket; | |||||
LOGTCPCBSTATE2; | |||||
} | } | ||||
#if 0 | #if 0 | ||||
/* | /* | ||||
* Debug version of tcp_sack_output() that walks the scoreboard. Used for | * Debug version of tcp_sack_output() that walks the scoreboard. Used for | ||||
* now to sanity check the hint. | * now to sanity check the hint. | ||||
*/ | */ | ||||
static struct sackhole * | static struct sackhole * | ||||
Show All 37 Lines | |||||
struct sackhole * | struct sackhole * | ||||
tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt) | tcp_sack_output(struct tcpcb *tp, int *sack_bytes_rexmt) | ||||
{ | { | ||||
struct sackhole *hole = NULL; | struct sackhole *hole = NULL; | ||||
INP_WLOCK_ASSERT(tp->t_inpcb); | INP_WLOCK_ASSERT(tp->t_inpcb); | ||||
*sack_bytes_rexmt = tp->sackhint.sack_bytes_rexmit; | *sack_bytes_rexmt = tp->sackhint.sack_bytes_rexmit; | ||||
hole = tp->sackhint.nexthole; | hole = tp->sackhint.nexthole; | ||||
struct socket *so = tp->t_inpcb->inp_socket; | |||||
LOGTCPCBSTATE2; | |||||
if (hole == NULL || SEQ_LT(hole->rxmit, hole->end)) | if (hole == NULL || SEQ_LT(hole->rxmit, hole->end)) | ||||
goto out; | goto out; | ||||
while ((hole = TAILQ_NEXT(hole, scblink)) != NULL) { | while ((hole = TAILQ_NEXT(hole, scblink)) != NULL) { | ||||
if (SEQ_LT(hole->rxmit, hole->end)) { | if (SEQ_LT(hole->rxmit, hole->end)) { | ||||
tp->sackhint.nexthole = hole; | tp->sackhint.nexthole = hole; | ||||
break; | break; | ||||
} | } | ||||
} | } | ||||
Show All 38 Lines |
brackets in the if clause missing, sack_changed needs to be set too.