Is there a way to identify a delayed ACK in the TCP layer dtrace probes?
If I do
time dd if=/dev/zero of=foo bs=8k count=1
where the output file is on an NFS mount and dtrace the TCP send, receives
and look at the delta of the times, the last acknowledgement happens after
the dd has finished.
I''m imagining the the last ACK is a delayed acknowledgement.
I want to be able to measure the round trip speeds, but these slow ACKs
throw off the measurements . I want to be able to filter them out if
possible.
Here is an example
time dd if=/dev/zero of=foo bs=8k count=1
1+0 records in
1+0 records out
8192 bytes (8.2 kB) copied, 0.00193471 s, 4.2 MB/s
real 0m0.014s
user 0m0.001s
sys 0m0.006s
sudo nfslat.d 1 192.86.100.186
starting up ...
sbytes rbytes delta send recd flags
unackd unackd us bytes bytes
240 0 439 / 68 ACK|PUSH|
244 0 175 244 \ ACK|PUSH|
244 0 308 / 312 ACK|PUSH|
252 0 467 252 \ ACK|PUSH|
252 0 320 / 76 ACK|PUSH|
244 0 1717 244 \ ACK|PUSH|
244 0 268 / 312 ACK|PUSH|
168 0 208 168 \ ACK|PUSH|
168 0 250 / 388 ACK|PUSH|
244 0 199 244 \ ACK|PUSH|
244 0 260 / 312 ACK|PUSH|
176 0 149 176 \ ACK|PUSH|
176 0 250 / 248 ACK|PUSH|
280 0 159 280 \ ACK|PUSH|
280 0 606 / 532 ACK|PUSH|
176 0 173 176 \ ACK|PUSH|
176 0 245 / 84 ACK|PUSH|
0 84 380 1448 \ ACK|
0 84 58 1448 \ ACK|
0 84 36 1448 \ ACK|
0 84 34 1448 \ ACK|
0 84 36 1448 \ ACK|
0 84 34 1140 \ ACK|PUSH|
8380 0 360 / 84 ACK|PUSH|
168 0 144 168 \ ACK|PUSH|
168 0 471 / 76 ACK|PUSH|
192 0 116 192 \ ACK|PUSH|
192 0 275 / 256 ACK|PUSH|
244 0 5125 244 \ ACK|PUSH|
244 0 382 / 68 ACK|PUSH|
0 0 50698 0 \ ACK| 0
So the total dd time was 14ms where as the last ACK alone was 50ms.
Code
#!/usr/sbin/dtrace -s
#pragma D option quiet
#pragma D option defaultargs
inline int TICKS=$1;
inline string ADDR=$$2;
dtrace:::BEGIN
{
TIMER = ( TICKS != NULL ) ? TICKS : 1 ;
ticks = TIMER;
TITLE = 10;
title = 0;
walltime=timestamp;
printf("starting up ...\n");
}
tcp:::send, tcp:::receive
/ title == 0 &&
( args[2]->ip_daddr == ADDR || ADDR == NULL ) /
{
printf("%6s %6s %6s %8s %8s %8s %8s %8s %8s %8s %8s %8s
\n",
"sbytes",
"rbytes",
"delta" ,
"send" ,
"recd" ,
"ssz" ,
"sscal" ,
"rsz",
"rscal",
"congw",
"conthr",
"retran"
);
}
tcp:::send
/ ( args[2]->ip_daddr == ADDR || ADDR == NULL ) /
{
nfs[args[1]->cs_cid]=1; /* this is an NFS thread */
delta= timestamp-walltime;
walltime=timestamp;
flags="";
flags= strjoin((( args[4]->tcp_flags & TH_FIN ) ? "FIN|" :
""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_SYN ) ? "SYN|" :
""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_RST ) ? "RST|" :
""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_PUSH ) ? "PUSH|"
: ""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_ACK ) ? "ACK|" :
""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_URG ) ? "URG|" :
""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_ECE ) ? "ECE|" :
""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_CWR ) ? "CWR|" :
""),flags);
flags= strjoin((( args[4]->tcp_flags == 0 ) ? "null " :
""),flags);
printf("%6d %6d %6d %8d \ %-8s %8d %8d %8d %8d %8d %12d %s %d
\n",
/*
printf("%6d %6d %6d %8d \ %8s %8d %8d %8d %8d %8d %12d %12d %12d
%8d
%8d %s %d \n",
args[4]->tcp_seq %1000000,
args[4]->tcp_ack %1000000,
*/
args[3]->tcps_snxt - args[3]->tcps_suna ,
args[3]->tcps_rnxt - args[3]->tcps_rack,
delta/1000,
args[2]->ip_plength - args[4]->tcp_offset,
"",
args[3]->tcps_swnd,
args[3]->tcps_snd_ws,
args[3]->tcps_rwnd,
args[3]->tcps_rcv_ws,
args[3]->tcps_cwnd,
args[3]->tcps_cwnd_ssthresh,
/*
args[3]->tcps_sack_fack,
args[3]->tcps_sack_snxt,
args[3]->tcps_rto,
args[3]->tcps_mss,
*/
flags,
args[3]->tcps_retransmit
);
flags=0;
title--;
}
tcp:::receive
/ ( args[2]->ip_saddr == ADDR || ADDR == NULL ) &&
nfs[args[1]->cs_cid] /
{
delta=timestamp-walltime;
walltime=timestamp;
flags="";
flags= strjoin((( args[4]->tcp_flags & TH_FIN ) ? "FIN|"
: ""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_SYN ) ? "SYN|"
: ""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_RST ) ? "RST|"
: ""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_PUSH ) ?
"PUSH|" :
""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_ACK ) ? "ACK|"
: ""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_URG ) ? "URG|"
: ""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_ECE ) ? "ECE|"
: ""),flags);
flags= strjoin((( args[4]->tcp_flags & TH_CWR ) ? "CWR|"
: ""),flags);
flags= strjoin((( args[4]->tcp_flags == 0 ) ? "null " :
""),flags);
printf("%6d %6d %6d %8s / %-8d %8d %8d %8d %8d %8d %12d %s %d
\n",
/*
printf("%6d %6d %6d %8s / %-8d %8d %8d %8d %8d %8d %12d %12d %12d
%8d %8d %s %d \n",
args[4]->tcp_ack %1000000,
args[4]->tcp_seq %1000000,
*/
args[3]->tcps_snxt - args[3]->tcps_suna ,
args[3]->tcps_rnxt - args[3]->tcps_rack,
delta/1000,
"",
args[2]->ip_plength - args[4]->tcp_offset,
args[3]->tcps_swnd,
args[3]->tcps_snd_ws,
args[3]->tcps_rwnd,
args[3]->tcps_rcv_ws,
args[3]->tcps_cwnd,
args[3]->tcps_cwnd_ssthresh,
flags,
/*
args[3]->tcps_sack_fack,
args[3]->tcps_sack_snxt,
args[3]->tcps_rto,
args[3]->tcps_mss,
*/
args[3]->tcps_retransmit
);
flags=0;
title--;
}
--
- Kyle
O: +1.415.341.3430
F: +1.650.494.1676
275 Middlefield Road, Suite 50
Menlo Park, CA 94025
http://www.delphix.com
-------------- next part --------------
An HTML attachment was scrubbed...
URL:
<http://mail.opensolaris.org/pipermail/dtrace-discuss/attachments/20110517/c06a9ff0/attachment-0001.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: image/png
Size: 821 bytes
Desc: not available
URL:
<http://mail.opensolaris.org/pipermail/dtrace-discuss/attachments/20110517/c06a9ff0/attachment-0001.png>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 2xsgwmdx.bmp
Type: image/bmp
Size: 2189614 bytes
Desc: not available
URL:
<http://mail.opensolaris.org/pipermail/dtrace-discuss/attachments/20110517/c06a9ff0/attachment-0001.bin>
hi Kyle I discuss delayed ACKs and the tcp provider at http://blogs.oracle.com/amaguire/entry/measuring_rtt_and_handling_tcp One filtering approach would be to only to measure RTT for segments which are directly ACKed - i.e. ACK value received == tcp->tcps_sndnxt value The script at http://blogs.oracle.com/amaguire/entry/improved_dtrace_tcp_acknowledgement_rtt ...does this, so give it a try. Hope this helps, Alan On 17/05/2011 20:07, Kyle Hailey wrote:> > Is there a way to identify a delayed ACK in the TCP layer dtrace probes? > > If I do > > time dd if=/dev/zero of=foo bs=8k count=1 > > where the output file is on an NFS mount and dtrace the TCP send, > receives and look at the delta of the times, the last acknowledgement > happens after the dd has finished. > I''m imagining the the last ACK is a delayed acknowledgement. > I want to be able to measure the round trip speeds, but these slow > ACKs throw off the measurements . I want to be able to filter them out > if possible. > > Here is an example > > time dd if=/dev/zero of=foo bs=8k count=1 > 1+0 records in > 1+0 records out > 8192 bytes (8.2 kB) copied, 0.00193471 s, 4.2 MB/s > > real 0m0.014s > user 0m0.001s > sys 0m0.006s > > > sudo nfslat.d 1 192.86.100.186 > starting up ... > sbytes rbytes delta send recd flags > unackd unackd us bytes bytes > 240 0 439 / 68 ACK|PUSH| > 244 0 175 244 \ ACK|PUSH| > 244 0 308 / 312 ACK|PUSH| > 252 0 467 252 \ ACK|PUSH| > 252 0 320 / 76 ACK|PUSH| > 244 0 1717 244 \ ACK|PUSH| > 244 0 268 / 312 ACK|PUSH| > 168 0 208 168 \ ACK|PUSH| > 168 0 250 / 388 ACK|PUSH| > 244 0 199 244 \ ACK|PUSH| > 244 0 260 / 312 ACK|PUSH| > 176 0 149 176 \ ACK|PUSH| > 176 0 250 / 248 ACK|PUSH| > 280 0 159 280 \ ACK|PUSH| > 280 0 606 / 532 ACK|PUSH| > 176 0 173 176 \ ACK|PUSH| > 176 0 245 / 84 ACK|PUSH| > 0 84 380 1448 \ ACK| > 0 84 58 1448 \ ACK| > 0 84 36 1448 \ ACK| > 0 84 34 1448 \ ACK| > 0 84 36 1448 \ ACK| > 0 84 34 1140 \ ACK|PUSH| > 8380 0 360 / 84 ACK|PUSH| > 168 0 144 168 \ ACK|PUSH| > 168 0 471 / 76 ACK|PUSH| > 192 0 116 192 \ ACK|PUSH| > 192 0 275 / 256 ACK|PUSH| > 244 0 5125 244 \ ACK|PUSH| > 244 0 382 / 68 ACK|PUSH| > 0 0 50698 0 \ ACK| 0 > > So the total dd time was 14ms where as the last ACK alone was 50ms. > > > Code > > #!/usr/sbin/dtrace -s > #pragma D option quiet > #pragma D option defaultargs > > inline int TICKS=$1; > inline string ADDR=$$2; > > dtrace:::BEGIN > { > TIMER = ( TICKS != NULL ) ? TICKS : 1 ; > ticks = TIMER; > TITLE = 10; > title = 0; > walltime=timestamp; > printf("starting up ...\n"); > } > > tcp:::send, tcp:::receive > / title == 0 && > ( args[2]->ip_daddr == ADDR || ADDR == NULL ) / > { > printf("%6s %6s %6s %8s %8s %8s %8s %8s %8s %8s %8s %8s \n", > "sbytes", > "rbytes", > "delta" , > "send" , > "recd" , > "ssz" , > "sscal" , > "rsz", > "rscal", > "congw", > "conthr", > "retran" > ); > } > > tcp:::send > / ( args[2]->ip_daddr == ADDR || ADDR == NULL ) / > { > nfs[args[1]->cs_cid]=1; /* this is an NFS thread */ > delta= timestamp-walltime; > walltime=timestamp; > flags=""; > flags= strjoin((( args[4]->tcp_flags & TH_FIN ) ? "FIN|" : ""),flags); > flags= strjoin((( args[4]->tcp_flags & TH_SYN ) ? "SYN|" : ""),flags); > flags= strjoin((( args[4]->tcp_flags & TH_RST ) ? "RST|" : ""),flags); > flags= strjoin((( args[4]->tcp_flags & TH_PUSH ) ? "PUSH|" : > ""),flags); > flags= strjoin((( args[4]->tcp_flags & TH_ACK ) ? "ACK|" : ""),flags); > flags= strjoin((( args[4]->tcp_flags & TH_URG ) ? "URG|" : ""),flags); > flags= strjoin((( args[4]->tcp_flags & TH_ECE ) ? "ECE|" : ""),flags); > flags= strjoin((( args[4]->tcp_flags & TH_CWR ) ? "CWR|" : ""),flags); > flags= strjoin((( args[4]->tcp_flags == 0 ) ? "null " : ""),flags); > > > printf("%6d %6d %6d %8d \ %-8s %8d %8d %8d %8d %8d %12d %s %d > \n", > /* > printf("%6d %6d %6d %8d \ %8s %8d %8d %8d %8d %8d %12d %12d > %12d %8d %8d %s %d \n", > args[4]->tcp_seq %1000000, > args[4]->tcp_ack %1000000, > */ > args[3]->tcps_snxt - args[3]->tcps_suna , > args[3]->tcps_rnxt - args[3]->tcps_rack, > delta/1000, > args[2]->ip_plength - args[4]->tcp_offset, > "", > args[3]->tcps_swnd, > args[3]->tcps_snd_ws, > args[3]->tcps_rwnd, > args[3]->tcps_rcv_ws, > args[3]->tcps_cwnd, > args[3]->tcps_cwnd_ssthresh, > /* > args[3]->tcps_sack_fack, > args[3]->tcps_sack_snxt, > args[3]->tcps_rto, > args[3]->tcps_mss, > */ > flags, > args[3]->tcps_retransmit > ); > flags=0; > title--; > } > > tcp:::receive > / ( args[2]->ip_saddr == ADDR || ADDR == NULL ) && nfs[args[1]->cs_cid] / > { > delta=timestamp-walltime; > walltime=timestamp; > > flags=""; > flags= strjoin((( args[4]->tcp_flags & TH_FIN ) ? "FIN|" : > ""),flags); > flags= strjoin((( args[4]->tcp_flags & TH_SYN ) ? "SYN|" : > ""),flags); > flags= strjoin((( args[4]->tcp_flags & TH_RST ) ? "RST|" : > ""),flags); > flags= strjoin((( args[4]->tcp_flags & TH_PUSH ) ? "PUSH|" : > ""),flags); > flags= strjoin((( args[4]->tcp_flags & TH_ACK ) ? "ACK|" : > ""),flags); > flags= strjoin((( args[4]->tcp_flags & TH_URG ) ? "URG|" : > ""),flags); > flags= strjoin((( args[4]->tcp_flags & TH_ECE ) ? "ECE|" : > ""),flags); > flags= strjoin((( args[4]->tcp_flags & TH_CWR ) ? "CWR|" : > ""),flags); > flags= strjoin((( args[4]->tcp_flags == 0 ) ? "null " : ""),flags); > > printf("%6d %6d %6d %8s / %-8d %8d %8d %8d %8d %8d %12d %s %d > \n", > /* > printf("%6d %6d %6d %8s / %-8d %8d %8d %8d %8d %8d %12d %12d > %12d %8d %8d %s %d \n", > args[4]->tcp_ack %1000000, > args[4]->tcp_seq %1000000, > */ > args[3]->tcps_snxt - args[3]->tcps_suna , > args[3]->tcps_rnxt - args[3]->tcps_rack, > delta/1000, > "", > args[2]->ip_plength - args[4]->tcp_offset, > args[3]->tcps_swnd, > args[3]->tcps_snd_ws, > args[3]->tcps_rwnd, > args[3]->tcps_rcv_ws, > args[3]->tcps_cwnd, > args[3]->tcps_cwnd_ssthresh, > flags, > /* > args[3]->tcps_sack_fack, > args[3]->tcps_sack_snxt, > args[3]->tcps_rto, > args[3]->tcps_mss, > */ > args[3]->tcps_retransmit > ); > flags=0; > title--; > } > > > > -- > - Kyle > > O: +1.415.341.3430 > F: +1.650.494.1676 > 275 Middlefield Road, Suite 50 > Menlo Park, CA 94025 > http://www.delphix.com <http://www.delphix.com/> > > > _______________________________________________ > dtrace-discuss mailing list > dtrace-discuss at opensolaris.org