Dear sir,
When I used FullTCP with SACK, I found a problem that
the final one of SACK report cannot use due to non next one.
My description shows in the following:
The code location is
int
SackFullTcpAgent::nxt_tseq()
{
int in_recovery = (highest_ack_ < recover_);
int seq = h_seqno_;
if (!in_recovery) {
//if (int(t_seqno_) > 1)
printf("%f: non-recovery nxt_tseq called w/t_seqno:%d\n",
now(), int(t_seqno_));
sq_.dumplist();
return (t_seqno_);
}
int fcnt; // following count-- the
// count field in the block
// after the seq# we are about
// to send
int fbytes; // fcnt in bytes
//if (int(t_seqno_) > 1)
printf("%f: recovery nxt_tseq called w/t_seqno:%d, seq:%d, mode:%d\n",
now(), int(t_seqno_), seq, sack_rtx_threshmode_);
sq_.dumplist();
while ((seq = sq_.nexthole(seq, fcnt, fbytes)) > 0) {
// if we have a following block
// with a large enough count
// we should use the seq# we get
// from nexthole()
printf(" next hole seq %d, h_seqno_ %d, fcnt %d sack_rtx_cthresh_ %d, sack_rtx_threshmode_ %d \n", int(seq), h_seqno_, fcnt, sack_rtx_cthresh_, sack_rtx_threshmode_);
if (sack_rtx_threshmode_ == 0 ||
(sack_rtx_threshmode_ == 1 && fcnt >= sack_rtx_cthresh_) ||
(sack_rtx_threshmode_ == 2 && fbytes >= sack_rtx_bthresh_) ||
(sack_rtx_threshmode_ == 3 && (fcnt >= sack_rtx_cthresh_ || fbytes >= sack_rtx_bthresh_)) ||
(sack_rtx_threshmode_ == 4 && (fcnt >= sack_rtx_cthresh_ && fbytes >= sack_rtx_bthresh_))) {
//if (int(t_seqno_) > 1)
printf("%f: nxt_tseq<hole> returning %d\n", now(), int(seq));
// adjust h_seqno, as we may have
// been "jumped ahead" by learning
// about a filled hole
if (seq > h_seqno_)
h_seqno_ = seq;
return (seq);
} else if (fcnt <= 0)
break;
else {
seq += maxseg_;
}
}
//if (int(t_seqno_) > 1)
printf("%f: nxt_tseq<top> returning %d\n", now(), int(t_seqno_));
return (t_seqno_);
}</top></hole>
when the code execute "seq = sq_.nexthole(seq, fcnt, fbytes)"
It will call "rq.cc" to select nexthold()
int
ReassemblyQueue::nexthole(TcpSeq seq, int& nxtcnt, int& nxtbytes)
{
printf(" ReassemblyQueue::nexthole \n");
nxtbytes = nxtcnt = -1;
hint_ = head_;
seginfo* p;
for (p = hint_; p; p = p->next_) {
// seq# is prior to SACK region
// so seq# is a legit hole
printf("seq %d p->startseq_ %d p->endseq_ %d\n", seq, p->startseq_, p->endseq_);
if (p->startseq_ > seq) {
printf("p->startseq_ > seq\n");
cnts(p, nxtcnt, nxtbytes);
return (seq);
}
// seq# is covered by SACK region
// so the hole is at the end of the region
if ((p->startseq_ <= seq) && (p->endseq_ >= seq)) {
printf("p->startseq_ <= seq) && (p->endseq_ >= seq), p->next_ %d\n", p->next_);
if (p->next_) {
printf("p->next_\n");
cnts(p->next_, nxtcnt, nxtbytes);
}
return (p->endseq_);
}
}
return (-1);
}
For example, my sq_.dumplist() shows as follows:
FIFO [size:33878]: [->285358, 286661<-<f:0x0,c:1>]</f:0x0,c:1>[->287964, 289267<-<f:0x0,c:1>]</f:0x0,c:1>[->290570, 291873<-<f:0x0,c:1>]</f:0x0,c:1>[->293176, 294479<-<f:0x0,c:1>]</f:0x0,c:1>[->295782, 297085<-<f:0x0,c:1>]</f:0x0,c:1>[->298388, 299691<-<f:0x0,c:1>]</f:0x0,c:1>[->300994, 302297<-<f:0x0,c:1>]</f:0x0,c:1>[->303600, 304903<-<f:0x0,c:1>]</f:0x0,c:1>[->306206, 307509<-<f:0x0,c:1>]</f:0x0,c:1>[->308812, 310115<-<f:0x0,c:1>]</f:0x0,c:1>[->311418, 312721<-<f:0x0,c:1>]</f:0x0,c:1>[->314024, 315327<-<f:0x0,c:1>]</f:0x0,c:1>[->316630, 317933<-<f:0x0,c:1>]</f:0x0,c:1>[->319236, 320539<-<f:0x0,c:1>]</f:0x0,c:1>[->321842, 323145<-<f:0x0,c:1>]</f:0x0,c:1>[->324448, 325751<-<f:0x0,c:1>]</f:0x0,c:1>[->327054, 328357<-<f:0x0,c:1>]</f:0x0,c:1>[->329660, 330963<-<f:0x0,c:1>]</f:0x0,c:1>[->332266, 333569<-<f:0x0,c:1>]</f:0x0,c:1>[->334872, 336175<-<f:0x0,c:1>]</f:0x0,c:1>[->337478, 338781<-<f:0x0,c:1>]</f:0x0,c:1>[->340084, 341387<-<f:0x0,c:1>]</f:0x0,c:1>[->342690, 343993<-<f:0x0,c:1>]</f:0x0,c:1>[->345296, 346599<-<f:0x0,c:1>]</f:0x0,c:1>[->347902, 349205<-<f:0x0,c:1>]</f:0x0,c:1>[->350508, 351811<-<f:0x0,c:1>]</f:0x0,c:1>
[->350508, 351811<-][->347902, 349205<-][->345296, 346599<-][->342690, 343993<-][->340084, 341387<-][->337478, 338781<-][->334872, 336175<-][->332266, 333569<-][->329660, 330963<-][->327054, 328357<-][->324448, 325751<-][->321842, 323145<-][->319236, 320539<-][->316630, 317933<-][->314024, 315327<-][->311418, 312721<-][->308812, 310115<-][->306206, 307509<-][->303600, 304903<-][->300994, 302297<-][->298388, 299691<-][->295782, 297085<-][->293176, 294479<-][->290570, 291873<-][->287964, 289267<-][->285358, 286661<-]
LIFO: [->350508, 351811<-][->347902, 349205<-][->345296, 346599<-][->342690, 343993<-][->340084, 341387<-][->337478, 338781<-][->334872, 336175<-][->332266, 333569<-][->329660, 330963<-][->327054, 328357<-][->324448, 325751<-][->321842, 323145<-][->319236, 320539<-][->316630, 317933<-][->314024, 315327<-][->311418, 312721<-][->308812, 310115<-][->306206, 307509<-][->303600, 304903<-][->300994, 302297<-][->298388, 299691<-][->295782, 297085<-][->293176, 294479<-][->290570, 291873<-][->287964, 289267<-][->285358, 286661<-]
[->285358, 286661<-][->287964, 289267<-][->290570, 291873<-][->293176, 294479<-][->295782, 297085<-][->298388, 299691<-][->300994, 302297<-][->303600, 304903<-][->306206, 307509<-][->308812, 310115<-][->311418, 312721<-][->314024, 315327<-][->316630, 317933<-][->319236, 320539<-][->321842, 323145<-][->324448, 325751<-][->327054, 328357<-][->329660, 330963<-][->332266, 333569<-][->334872, 336175<-][->337478, 338781<-][->340084, 341387<-][->342690, 343993<-][->345296, 346599<-][->347902, 349205<-][->350508, 351811<-]
RCVNXT: 284055
ReassemblyQueue::nexthole
seq 350508 p->startseq_ 285358 p->endseq_ 286661
seq 350508 p->startseq_ 287964 p->endseq_ 289267
seq 350508 p->startseq_ 290570 p->endseq_ 291873
seq 350508 p->startseq_ 293176 p->endseq_ 294479
seq 350508 p->startseq_ 295782 p->endseq_ 297085
seq 350508 p->startseq_ 298388 p->endseq_ 299691
seq 350508 p->startseq_ 300994 p->endseq_ 302297
seq 350508 p->startseq_ 303600 p->endseq_ 304903
seq 350508 p->startseq_ 306206 p->endseq_ 307509
seq 350508 p->startseq_ 308812 p->endseq_ 310115
seq 350508 p->startseq_ 311418 p->endseq_ 312721
seq 350508 p->startseq_ 314024 p->endseq_ 315327
seq 350508 p->startseq_ 316630 p->endseq_ 317933
seq 350508 p->startseq_ 319236 p->endseq_ 320539
seq 350508 p->startseq_ 321842 p->endseq_ 323145
seq 350508 p->startseq_ 324448 p->endseq_ 325751
seq 350508 p->startseq_ 327054 p->endseq_ 328357
seq 350508 p->startseq_ 329660 p->endseq_ 330963
seq 350508 p->startseq_ 332266 p->endseq_ 333569
seq 350508 p->startseq_ 334872 p->endseq_ 336175
seq 350508 p->startseq_ 337478 p->endseq_ 338781
seq 350508 p->startseq_ 340084 p->endseq_ 341387
seq 350508 p->startseq_ 342690 p->endseq_ 343993
seq 350508 p->startseq_ 345296 p->endseq_ 346599
seq 350508 p->startseq_ 347902 p->endseq_ 349205
seq 350508 p->startseq_ 350508 p->endseq_ 351811 <-- target
p->startseq_ <= seq) && (p->endseq_ >= seq), p->next_ 0
next hole seq 351811, h_seqno_ 350508, fcnt -1 sack_rtx_cthresh_ 1, sack_rtx_threshmode_ 1
5.800364: nxt_tseq<top> returning 371356</top>
In this case, the nxt_tseq<top> returning should be "351811", but it is final one log in the SACK report. The cnts() will not execute in this case (due to final one so that p->next_ == NULL).</top>
if (p->next_) { <--- here!!!
printf("p->next_\n");
cnts(p->next_, nxtcnt, nxtbytes);
}
The fcnt value always is 0 in SackFullTcpAgent::nxt_tseq().
Therefore, the return value is return (t_seqno_), NOT return (seq);
The gap can only send when the sender got more SACK report (i.e. p->next_ of nexthole != 0).
In this period, it will happen unnecessary timeout due to the sender cannot retransmit the missing seq.
can you provide a patch that you believe will fix this without side effects?
Hi Tom,
According to my observation, current implementation of SACK report in ns2
only triggers when the number of missing packets (i.e., hold) are more than one.
There is a trick part
if the packet retransmission of missing packets triggers by only one SACK's hold,
the frequency of packet retransmission will be increased to generate some redundancy retransmission packets.
if the packet retransmission of missing packets trigger by at least two SACK's holds (i.e., current implementation in ns2), the packet retransmission might not trigger when the number of missing packets is only one.
I have no good solution and only force to trigger packet retransmission when the number of hold is not zero.
=== only insertion else if (p->next_ == NULL){} in ReassemblyQueue::nexthole() ===
Do you think this is a bug, or just a policy decision in how to implement the option? You mentioned that changing the threshold to one may increase the number of unnecessary retransmissions.
In such a case (if this is not strictly an error but an implementation choice) then the choices seem to be to leave it as it is, or to provide some kind of variable that controls whether two or one is the threshold value.
Hi
I think this is a policy decision in implementation of SACK report.
Perhaps, to provide some kind of variable can avoid unnecessary timeout due to the sender cannot re-transmit the missing seq when "p->next_ == NULL".