Page MenuHomeFreeBSD

cxgbe: Compute timestamps via sbintime_t.
ClosedPublic

Authored by jhb on Sep 21 2022, 11:36 PM.
Tags
None
Referenced Files
Unknown Object (File)
Fri, Apr 26, 4:52 PM
Unknown Object (File)
Fri, Apr 26, 4:52 PM
Unknown Object (File)
Fri, Apr 26, 3:20 PM
Unknown Object (File)
Fri, Apr 26, 11:55 AM
Unknown Object (File)
Mar 22 2024, 6:12 AM
Unknown Object (File)
Mar 22 2024, 6:11 AM
Unknown Object (File)
Mar 22 2024, 5:54 AM
Unknown Object (File)
Dec 23 2023, 8:48 AM
Subscribers

Details

Summary

This uses fixed-point math already used elsewhere in the kernel for
sub-second time values. To avoid overflows this does require updating
the calibration once a second rather than once every 30 seconds.
Note that the cxgbe driver already queries multiple registers once a
second for the statistics timers. Also, this version uses 15
instructions with no branches in the per-packet fast path vs 44
instructions with approximately 5 branches in the previous version.

Diff Detail

Repository
rG FreeBSD src repository
Lint
Lint Not Applicable
Unit
Tests Not Applicable

Event Timeline

jhb requested review of this revision.Sep 21 2022, 11:36 PM

I ran both versions side by side in the same driver (both versions compiled in) and the timestamps were never off by more than 2 nanoseconds from each other. For reference, a disassembly of the previous version starting after the loop to fetch the values up to the store of the timestamp in the mbuf:

0x000000000004d119 <+1561>:  imul   $0x3e8,0x740(%r12),%r8d
0x000000000004d125 <+1573>:  sub    %rsi,%rax
0x000000000004d128 <+1576>:  mov    %rax,%rdx
0x000000000004d12b <+1579>:  shr    $0x20,%rdx
0x000000000004d12f <+1583>:  je     0x4d146 <service_iq_fl+1606>
0x000000000004d131 <+1585>:  xor    %edx,%edx
0x000000000004d133 <+1587>:  div    %r8
0x000000000004d136 <+1590>:  jmp    0x4d14b <service_iq_fl+1611>
0x000000000004d138 <+1592>:  movq   $0x0,0x48(%r15)
0x000000000004d140 <+1600>:  mov    -0x50(%rbp),%rdi
0x000000000004d144 <+1604>:  jmp    0x4d1b4 <service_iq_fl+1716>
0x000000000004d146 <+1606>:  xor    %edx,%edx
0x000000000004d148 <+1608>:  div    %r8d
0x000000000004d14b <+1611>:  sub    %rdi,%rcx
0x000000000004d14e <+1614>:  imul   %rcx,%rax
0x000000000004d152 <+1618>:  imul   %rdx,%rcx
0x000000000004d156 <+1622>:  sub    %rsi,%r11
0x000000000004d159 <+1625>:  mov    %rax,%rdx
0x000000000004d15c <+1628>:  or     %r11,%rdx
0x000000000004d15f <+1631>:  shr    $0x20,%rdx
0x000000000004d163 <+1635>:  je     0x4d16f <service_iq_fl+1647>
0x000000000004d165 <+1637>:  xor    %edx,%edx
0x000000000004d167 <+1639>:  div    %r11
0x000000000004d16a <+1642>:  mov    %rax,%r9
0x000000000004d16d <+1645>:  jmp    0x4d177 <service_iq_fl+1655>
0x000000000004d16f <+1647>:  xor    %edx,%edx
0x000000000004d171 <+1649>:  div    %r11d
0x000000000004d174 <+1652>:  mov    %eax,%r9d
0x000000000004d177 <+1655>:  imul   %r8,%rdx
0x000000000004d17b <+1659>:  add    %rcx,%rdx
0x000000000004d17e <+1662>:  mov    %rdx,%rax
0x000000000004d181 <+1665>:  or     %r11,%rax
0x000000000004d184 <+1668>:  shr    $0x20,%rax
0x000000000004d188 <+1672>:  je     0x4d194 <service_iq_fl+1684>
0x000000000004d18a <+1674>:  mov    %rdx,%rax
0x000000000004d18d <+1677>:  xor    %edx,%edx
0x000000000004d18f <+1679>:  div    %r11
0x000000000004d192 <+1682>:  jmp    0x4d19b <service_iq_fl+1691>
0x000000000004d194 <+1684>:  mov    %edx,%eax
0x000000000004d196 <+1686>:  xor    %edx,%edx
0x000000000004d198 <+1688>:  div    %r11d
0x000000000004d19b <+1691>:  imul   %r8,%r9
0x000000000004d19f <+1695>:  add    %rdi,%rax
0x000000000004d1a2 <+1698>:  add    %r9,%rax
0x000000000004d1a5 <+1701>:  mov    %rax,0x48(%r15)

and this version:

0x000000000004c6f9 <+1529>:  sub    %rbx,%r9
0x000000000004c6fc <+1532>:  sub    %rsi,%rax
0x000000000004c6ff <+1535>:  imul   %r9,%rax
0x000000000004c703 <+1539>:  sub    %rbx,%rdi
0x000000000004c706 <+1542>:  xor    %edx,%edx
0x000000000004c708 <+1544>:  div    %rdi
0x000000000004c70b <+1547>:  add    %rsi,%rax
0x000000000004c70e <+1550>:  mov    %eax,%ecx
0x000000000004c710 <+1552>:  shr    $0x20,%rax
0x000000000004c714 <+1556>:  imul   $0x3b9aca00,%rax,%rdx
0x000000000004c71b <+1563>:  test   %rax,%rax
0x000000000004c71e <+1566>:  cmove  %rax,%rdx
0x000000000004c722 <+1570>:  imul   $0x3b9aca00,%rcx,%rax
0x000000000004c729 <+1577>:  shr    $0x20,%rax
0x000000000004c72d <+1581>:  add    %rdx,%rax
0x000000000004c730 <+1584>:  mov    %rax,0x48(%r15)
This revision is now accepted and ready to land.Sep 21 2022, 11:44 PM
sys/dev/cxgbe/t4_main.c
1163

BTW, I find this management of the generation a bit odd. It means there is a window where you will not do timestamps since all structures have gen == 0. The timehands structures used for nanouptime(), etc. do not do this, instead they just permit reading the previous entry if the writes to update the equivalent of sc->cal_current isn't visible yet.

Also, a more typical way to handle updating a structure like this is to or 1 into the generation count while a structure is being modified and then increment it back up to the next even value after the structure update is done. <sys/seqc.h> has an API to do this (documented in secq(9)).

This revision was automatically updated to reflect the committed changes.