Changeset View
Changeset View
Standalone View
Standalone View
sys/netinet/tcp_sack.c
Show First 20 Lines • Show All 339 Lines • ▼ Show 20 Lines | tcp_sackhole_remove(struct tcpcb *tp, struct sackhole *hole) | ||||
tcp_sackhole_free(tp, hole); | tcp_sackhole_free(tp, hole); | ||||
} | } | ||||
/* | /* | ||||
* Process cumulative ACK and the TCP SACK option to update the scoreboard. | * Process cumulative ACK and the TCP SACK option to update the scoreboard. | ||||
* tp->snd_holes is an ordered list of holes (oldest to newest, in terms of | * tp->snd_holes is an ordered list of holes (oldest to newest, in terms of | ||||
* the sequence space). | * the sequence space). | ||||
* Returns 1 if incoming ACK has previously unknown SACK information, | * Returns 1 if incoming ACK has previously unknown SACK information, | ||||
* 0 otherwise. Note: We treat (snd_una, th_ack) as a sack block so any changes | * 0 otherwise. | ||||
* to that (i.e. left edge moving) would also be considered a change in SACK | |||||
* information which is slightly different than rfc6675. | |||||
*/ | */ | ||||
int | int | ||||
tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) | tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) | ||||
{ | { | ||||
struct sackhole *cur, *temp; | struct sackhole *cur, *temp; | ||||
struct sackblk sack, sack_blocks[TCP_MAX_SACK + 1], *sblkp; | struct sackblk sack, sack_blocks[TCP_MAX_SACK + 1], *sblkp; | ||||
int i, j, num_sack_blks, sack_changed; | int i, j, num_sack_blks, sack_changed; | ||||
int delivered_data, left_edge_delta; | |||||
INP_WLOCK_ASSERT(tp->t_inpcb); | INP_WLOCK_ASSERT(tp->t_inpcb); | ||||
num_sack_blks = 0; | num_sack_blks = 0; | ||||
sack_changed = 0; | sack_changed = 0; | ||||
delivered_data = 0; | |||||
left_edge_delta = 0; | |||||
/* | /* | ||||
* If SND.UNA will be advanced by SEG.ACK, and if SACK holes exist, | * If SND.UNA will be advanced by SEG.ACK, and if SACK holes exist, | ||||
* treat [SND.UNA, SEG.ACK) as if it is a SACK block. | * treat [SND.UNA, SEG.ACK) as if it is a SACK block. | ||||
* Account changes to SND.UNA always in delivered data. | |||||
*/ | */ | ||||
if (SEQ_LT(tp->snd_una, th_ack) && !TAILQ_EMPTY(&tp->snd_holes)) { | if (SEQ_LT(tp->snd_una, th_ack) && !TAILQ_EMPTY(&tp->snd_holes)) { | ||||
left_edge_delta = th_ack - tp->snd_una; | |||||
sack_blocks[num_sack_blks].start = tp->snd_una; | sack_blocks[num_sack_blks].start = tp->snd_una; | ||||
sack_blocks[num_sack_blks++].end = th_ack; | sack_blocks[num_sack_blks++].end = th_ack; | ||||
} | } | ||||
/* | /* | ||||
* Append received valid SACK blocks to sack_blocks[], but only if we | * Append received valid SACK blocks to sack_blocks[], but only if we | ||||
* received new blocks from the other side. | * received new blocks from the other side. | ||||
*/ | */ | ||||
if (to->to_flags & TOF_SACK) { | if (to->to_flags & TOF_SACK) { | ||||
tp->sackhint.sacked_bytes = 0; /* reset */ | |||||
for (i = 0; i < to->to_nsacks; i++) { | for (i = 0; i < to->to_nsacks; i++) { | ||||
bcopy((to->to_sacks + i * TCPOLEN_SACK), | bcopy((to->to_sacks + i * TCPOLEN_SACK), | ||||
&sack, sizeof(sack)); | &sack, sizeof(sack)); | ||||
sack.start = ntohl(sack.start); | sack.start = ntohl(sack.start); | ||||
sack.end = ntohl(sack.end); | sack.end = ntohl(sack.end); | ||||
if (SEQ_GT(sack.end, sack.start) && | if (SEQ_GT(sack.end, sack.start) && | ||||
SEQ_GT(sack.start, tp->snd_una) && | SEQ_GT(sack.start, tp->snd_una) && | ||||
SEQ_GT(sack.start, th_ack) && | SEQ_GT(sack.start, th_ack) && | ||||
SEQ_LT(sack.start, tp->snd_max) && | SEQ_LT(sack.start, tp->snd_max) && | ||||
SEQ_GT(sack.end, tp->snd_una) && | SEQ_GT(sack.end, tp->snd_una) && | ||||
SEQ_LEQ(sack.end, tp->snd_max)) { | SEQ_LEQ(sack.end, tp->snd_max)) { | ||||
sack_blocks[num_sack_blks++] = sack; | sack_blocks[num_sack_blks++] = sack; | ||||
tp->sackhint.sacked_bytes += | |||||
(sack.end-sack.start); | |||||
} | } | ||||
} | } | ||||
} | } | ||||
/* | /* | ||||
* Return if SND.UNA is not advanced and no valid SACK block is | * Return if SND.UNA is not advanced and no valid SACK block is | ||||
* received. | * received. | ||||
*/ | */ | ||||
if (num_sack_blks == 0) | if (num_sack_blks == 0) | ||||
return (sack_changed); | return (sack_changed); | ||||
/* | /* | ||||
* Sort the SACK blocks so we can update the scoreboard with just one | * Sort the SACK blocks so we can update the scoreboard with just one | ||||
* pass. The overhead of sorting up to 4+1 elements is less than | * pass. The overhead of sorting up to 4+1 elements is less than | ||||
* making up to 4+1 passes over the scoreboard. | * making up to 4+1 passes over the scoreboard. | ||||
*/ | */ | ||||
for (i = 0; i < num_sack_blks; i++) { | for (i = 0; i < num_sack_blks; i++) { | ||||
for (j = i + 1; j < num_sack_blks; j++) { | for (j = i + 1; j < num_sack_blks; j++) { | ||||
if (SEQ_GT(sack_blocks[i].end, sack_blocks[j].end)) { | if (SEQ_GT(sack_blocks[i].end, sack_blocks[j].end)) { | ||||
sack = sack_blocks[i]; | sack = sack_blocks[i]; | ||||
sack_blocks[i] = sack_blocks[j]; | sack_blocks[i] = sack_blocks[j]; | ||||
sack_blocks[j] = sack; | sack_blocks[j] = sack; | ||||
} | } | ||||
} | } | ||||
} | } | ||||
if (TAILQ_EMPTY(&tp->snd_holes)) | if (TAILQ_EMPTY(&tp->snd_holes)) { | ||||
/* | /* | ||||
* Empty scoreboard. Need to initialize snd_fack (it may be | * Empty scoreboard. Need to initialize snd_fack (it may be | ||||
* uninitialized or have a bogus value). Scoreboard holes | * uninitialized or have a bogus value). Scoreboard holes | ||||
* (from the sack blocks received) are created later below | * (from the sack blocks received) are created later below | ||||
* (in the logic that adds holes to the tail of the | * (in the logic that adds holes to the tail of the | ||||
* scoreboard). | * scoreboard). | ||||
*/ | */ | ||||
tp->snd_fack = SEQ_MAX(tp->snd_una, th_ack); | tp->snd_fack = SEQ_MAX(tp->snd_una, th_ack); | ||||
tp->sackhint.sacked_bytes = 0; /* reset */ | |||||
} | |||||
/* | /* | ||||
* In the while-loop below, incoming SACK blocks (sack_blocks[]) and | * In the while-loop below, incoming SACK blocks (sack_blocks[]) and | ||||
* SACK holes (snd_holes) are traversed from their tails with just | * SACK holes (snd_holes) are traversed from their tails with just | ||||
* one pass in order to reduce the number of compares especially when | * one pass in order to reduce the number of compares especially when | ||||
* the bandwidth-delay product is large. | * the bandwidth-delay product is large. | ||||
* | * | ||||
* Note: Typically, in the first RTT of SACK recovery, the highest | * Note: Typically, in the first RTT of SACK recovery, the highest | ||||
* three or four SACK blocks with the same ack number are received. | * three or four SACK blocks with the same ack number are received. | ||||
* In the second RTT, if retransmitted data segments are not lost, | * In the second RTT, if retransmitted data segments are not lost, | ||||
* the highest three or four SACK blocks with ack number advancing | * the highest three or four SACK blocks with ack number advancing | ||||
* are received. | * are received. | ||||
*/ | */ | ||||
sblkp = &sack_blocks[num_sack_blks - 1]; /* Last SACK block */ | sblkp = &sack_blocks[num_sack_blks - 1]; /* Last SACK block */ | ||||
tp->sackhint.last_sack_ack = sblkp->end; | tp->sackhint.last_sack_ack = sblkp->end; | ||||
if (SEQ_LT(tp->snd_fack, sblkp->start)) { | if (SEQ_LT(tp->snd_fack, sblkp->start)) { | ||||
/* | /* | ||||
* The highest SACK block is beyond fack. Append new SACK | * The highest SACK block is beyond fack. Append new SACK | ||||
* hole at the tail. If the second or later highest SACK | * hole at the tail. If the second or later highest SACK | ||||
* blocks are also beyond the current fack, they will be | * blocks are also beyond the current fack, they will be | ||||
* inserted by way of hole splitting in the while-loop below. | * inserted by way of hole splitting in the while-loop below. | ||||
*/ | */ | ||||
temp = tcp_sackhole_insert(tp, tp->snd_fack,sblkp->start,NULL); | temp = tcp_sackhole_insert(tp, tp->snd_fack,sblkp->start,NULL); | ||||
if (temp != NULL) { | if (temp != NULL) { | ||||
delivered_data += sblkp->end - sblkp->start; | |||||
tp->snd_fack = sblkp->end; | tp->snd_fack = sblkp->end; | ||||
/* Go to the previous sack block. */ | /* Go to the previous sack block. */ | ||||
sblkp--; | sblkp--; | ||||
sack_changed = 1; | sack_changed = 1; | ||||
} else { | } else { | ||||
/* | /* | ||||
* We failed to add a new hole based on the current | * We failed to add a new hole based on the current | ||||
* sack block. Skip over all the sack blocks that | * sack block. Skip over all the sack blocks that | ||||
* fall completely to the right of snd_fack and | * fall completely to the right of snd_fack and | ||||
* proceed to trim the scoreboard based on the | * proceed to trim the scoreboard based on the | ||||
* remaining sack blocks. This also trims the | * remaining sack blocks. This also trims the | ||||
* scoreboard for th_ack (which is sack_blocks[0]). | * scoreboard for th_ack (which is sack_blocks[0]). | ||||
*/ | */ | ||||
while (sblkp >= sack_blocks && | while (sblkp >= sack_blocks && | ||||
SEQ_LT(tp->snd_fack, sblkp->start)) | SEQ_LT(tp->snd_fack, sblkp->start)) | ||||
sblkp--; | sblkp--; | ||||
if (sblkp >= sack_blocks && | if (sblkp >= sack_blocks && | ||||
SEQ_LT(tp->snd_fack, sblkp->end)) | SEQ_LT(tp->snd_fack, sblkp->end)) { | ||||
delivered_data += sblkp->end - tp->snd_fack; | |||||
rscheff: brackets in the if clause missing, sack_changed needs to be set too. | |||||
tp->snd_fack = sblkp->end; | tp->snd_fack = sblkp->end; | ||||
sack_changed = 1; | |||||
} | } | ||||
} | |||||
} else if (SEQ_LT(tp->snd_fack, sblkp->end)) { | } else if (SEQ_LT(tp->snd_fack, sblkp->end)) { | ||||
/* fack is advanced. */ | /* fack is advanced. */ | ||||
delivered_data += sblkp->end - tp->snd_fack; | |||||
tp->snd_fack = sblkp->end; | tp->snd_fack = sblkp->end; | ||||
sack_changed = 1; | sack_changed = 1; | ||||
} | } | ||||
cur = TAILQ_LAST(&tp->snd_holes, sackhole_head); /* Last SACK hole. */ | cur = TAILQ_LAST(&tp->snd_holes, sackhole_head); /* Last SACK hole. */ | ||||
/* | /* | ||||
* Since the incoming sack blocks are sorted, we can process them | * Since the incoming sack blocks are sorted, we can process them | ||||
* making one sweep of the scoreboard. | * making one sweep of the scoreboard. | ||||
*/ | */ | ||||
Show All 17 Lines | while (sblkp >= sack_blocks && cur != NULL) { | ||||
tp->sackhint.sack_bytes_rexmit -= (cur->rxmit - cur->start); | tp->sackhint.sack_bytes_rexmit -= (cur->rxmit - cur->start); | ||||
KASSERT(tp->sackhint.sack_bytes_rexmit >= 0, | KASSERT(tp->sackhint.sack_bytes_rexmit >= 0, | ||||
("sackhint bytes rtx >= 0")); | ("sackhint bytes rtx >= 0")); | ||||
sack_changed = 1; | sack_changed = 1; | ||||
if (SEQ_LEQ(sblkp->start, cur->start)) { | if (SEQ_LEQ(sblkp->start, cur->start)) { | ||||
/* Data acks at least the beginning of hole. */ | /* Data acks at least the beginning of hole. */ | ||||
if (SEQ_GEQ(sblkp->end, cur->end)) { | if (SEQ_GEQ(sblkp->end, cur->end)) { | ||||
/* Acks entire hole, so delete hole. */ | /* Acks entire hole, so delete hole. */ | ||||
delivered_data += (cur->end - cur->start); | |||||
temp = cur; | temp = cur; | ||||
cur = TAILQ_PREV(cur, sackhole_head, scblink); | cur = TAILQ_PREV(cur, sackhole_head, scblink); | ||||
tcp_sackhole_remove(tp, temp); | tcp_sackhole_remove(tp, temp); | ||||
/* | /* | ||||
* The sack block may ack all or part of the | * The sack block may ack all or part of the | ||||
* next hole too, so continue onto the next | * next hole too, so continue onto the next | ||||
* hole. | * hole. | ||||
*/ | */ | ||||
continue; | continue; | ||||
} else { | } else { | ||||
/* Move start of hole forward. */ | /* Move start of hole forward. */ | ||||
delivered_data += (sblkp->end - cur->start); | |||||
cur->start = sblkp->end; | cur->start = sblkp->end; | ||||
cur->rxmit = SEQ_MAX(cur->rxmit, cur->start); | cur->rxmit = SEQ_MAX(cur->rxmit, cur->start); | ||||
} | } | ||||
} else { | } else { | ||||
/* Data acks at least the end of hole. */ | /* Data acks at least the end of hole. */ | ||||
if (SEQ_GEQ(sblkp->end, cur->end)) { | if (SEQ_GEQ(sblkp->end, cur->end)) { | ||||
/* Move end of hole backward. */ | /* Move end of hole backward. */ | ||||
delivered_data += (cur->end - sblkp->start); | |||||
cur->end = sblkp->start; | cur->end = sblkp->start; | ||||
cur->rxmit = SEQ_MIN(cur->rxmit, cur->end); | cur->rxmit = SEQ_MIN(cur->rxmit, cur->end); | ||||
} else { | } else { | ||||
/* | /* | ||||
* ACKs some data in middle of a hole; need | * ACKs some data in middle of a hole; need | ||||
* to split current hole | * to split current hole | ||||
*/ | */ | ||||
temp = tcp_sackhole_insert(tp, sblkp->end, | temp = tcp_sackhole_insert(tp, sblkp->end, | ||||
cur->end, cur); | cur->end, cur); | ||||
if (temp != NULL) { | if (temp != NULL) { | ||||
if (SEQ_GT(cur->rxmit, temp->rxmit)) { | if (SEQ_GT(cur->rxmit, temp->rxmit)) { | ||||
temp->rxmit = cur->rxmit; | temp->rxmit = cur->rxmit; | ||||
tp->sackhint.sack_bytes_rexmit | tp->sackhint.sack_bytes_rexmit | ||||
+= (temp->rxmit | += (temp->rxmit | ||||
- temp->start); | - temp->start); | ||||
} | } | ||||
cur->end = sblkp->start; | cur->end = sblkp->start; | ||||
cur->rxmit = SEQ_MIN(cur->rxmit, | cur->rxmit = SEQ_MIN(cur->rxmit, | ||||
cur->end); | cur->end); | ||||
delivered_data += (sblkp->end - sblkp->start); | |||||
} | } | ||||
} | } | ||||
} | } | ||||
tp->sackhint.sack_bytes_rexmit += (cur->rxmit - cur->start); | tp->sackhint.sack_bytes_rexmit += (cur->rxmit - cur->start); | ||||
/* | /* | ||||
* Testing sblkp->start against cur->start tells us whether | * Testing sblkp->start against cur->start tells us whether | ||||
* we're done with the sack block or the sack hole. | * we're done with the sack block or the sack hole. | ||||
* Accordingly, we advance one or the other. | * Accordingly, we advance one or the other. | ||||
*/ | */ | ||||
if (SEQ_LEQ(sblkp->start, cur->start)) | if (SEQ_LEQ(sblkp->start, cur->start)) | ||||
cur = TAILQ_PREV(cur, sackhole_head, scblink); | cur = TAILQ_PREV(cur, sackhole_head, scblink); | ||||
else | else | ||||
sblkp--; | sblkp--; | ||||
} | } | ||||
tp->sackhint.delivered_data = delivered_data; | |||||
tp->sackhint.sacked_bytes += delivered_data - left_edge_delta; | |||||
KASSERT((delivered_data >= 0), ("delivered_data < 0")); | |||||
Done Inline ActionsMove this to tcp_input:2670, as sack_changed should semantically caputer any changes to the scoreboard. Which does happen on partial / cumulative acks. rscheff: Move this to tcp_input:2670, as sack_changed should semantically caputer any changes to the… | |||||
KASSERT((tp->sackhint.sacked_bytes >= 0), ("sacked_bytes < 0")); | |||||
return (sack_changed); | return (sack_changed); | ||||
} | } | ||||
/* | /* | ||||
* Free all SACK holes to clear the scoreboard. | * Free all SACK holes to clear the scoreboard. | ||||
*/ | */ | ||||
void | void | ||||
tcp_free_sackholes(struct tcpcb *tp) | tcp_free_sackholes(struct tcpcb *tp) | ||||
▲ Show 20 Lines • Show All 137 Lines • Show Last 20 Lines |
brackets in the if clause missing, sack_changed needs to be set too.