I've only lightly tested this so far and have only been able to test RDTSCP
since I don't currently have Intel HW recent enough to support RDPID. I also
don't currently have AMD HW so I wasn't yet able to sanity-check that, on AMD,
these changes have no effect and calls to the libvmmapi
`vm_{get,set}_capability()` API have the desired outcome (returning the expected
`errno` values).
That said, in light of the recent request for this feature on
freebsd-virtualization, I thought it'd be useful to post this with what testing
I've gotten to so far.
My simple test has just been running a 4-CPU RELEASE guest on a patched 4-CPU
CURRENT host with the guest's vCPUs pinned to the host's CPUs in opposing CPU
numbering order, and then running the below simple test program on both the host
and the guest (usually via `gnu-watch`). (The script I used to launch this guest
is also below).
Since, in this scenario, both the host and the guest are writing
`PCPU_GET(cpuid)` to each `TSC_AUX` register, I figure the reverse pinning should
ensure that the guest writes a different value to all of its `TSC_AUX` registers.
This seemed like a simple way to confirm that the guest and host do, indeed,
effectively have their own `TSC_AUX` registers---guest values //do// show up on
the guest, guest values //don't// show up on the host, and vice versa from the
host's perspective.
I also made the following one-off change to the host's `initcpu.c` to give each
host TSC a visibly different value so that I could eyeball that the guest's
vCPUs were, indeed, running on host CPUs with different `TSC_AUX` values:
```
@@ -284,6 +284,8 @@ initializecpu(void)
if ((amd_feature & AMDID_RDTSCP) != 0 ||
(cpu_stdext_feature2 & CPUID_STDEXT2_RDPID) != 0)
wrmsr(MSR_TSC_AUX, PCPU_GET(cpuid));
+
+ wrmsr(MSR_TSC, ((uint64_t)PCPU_GET(cpuid)) << 48);
}
```
(Many thanks to @grehan for his help with this work)!
```
lang=c, name=rdtscp.c, lines=24
#include <sys/types.h>
#include <sys/sysctl.h>
#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
#include <pthread.h>
#include <pthread_np.h>
struct testthrcb {
pthread_t thr;
#ifdef HAVE_RDPID
uint64_t aux_rdpid;
#endif
uint32_t aux_rdtscp;
uint64_t tsc_rdtsc;
uint64_t tsc_rdtscp;
};
static bool testthrs_enabled;
static pthread_mutex_t testthrs_enabled_lock;
static pthread_cond_t testthrs_enabled_cond;
static void *run_testthr(void *arg);
#ifdef HAVE_RDPID
static inline void
rdpid(uint64_t *pid)
{
asm volatile ("rdpid %0" : "=r" (*pid));
}
#endif
static inline void
rdtsc(uint64_t *tsc)
{
uint32_t *tsc_high, *tsc_low;
tsc_high = ((uint32_t *)tsc) + 1;
tsc_low = (uint32_t *)tsc;
asm volatile ("rdtsc" : "=a" (*tsc_low), "=d" (*tsc_high));
}
static inline void
rdtscp(uint64_t *tsc, uint32_t *pid)
{
uint32_t *tsc_high, *tsc_low;
tsc_high = ((uint32_t *)tsc) + 1;
tsc_low = (uint32_t *)tsc;
asm volatile ("rdtscp"
: "=a" (*tsc_low), "=d" (*tsc_high), "=c" (*pid));
}
static void *
run_testthr(void *arg)
{
struct testthrcb *tcb;
int err;
err = pthread_mutex_lock(&testthrs_enabled_lock);
assert(err == 0);
while (!testthrs_enabled) {
err = pthread_cond_wait(&testthrs_enabled_cond,
&testthrs_enabled_lock);
assert(err == 0);
}
err = pthread_mutex_unlock(&testthrs_enabled_lock);
assert(err == 0);
tcb = (struct testthrcb *)arg;
#ifdef HAVE_RDPID
rdpid(&tcb->aux_rdpid);
#endif
rdtscp(&tcb->tsc_rdtscp, &tcb->aux_rdtscp);
rdtsc(&tcb->tsc_rdtsc);
asm volatile ("mfence");
return (NULL);
}
int
main(int argc, const char **argv)
{
pthread_attr_t attr;
cpuset_t cpuset;
struct testthrcb *tcbs;
size_t len;
int err, i, ncpus;
testthrs_enabled = false;
err = pthread_mutex_init(&testthrs_enabled_lock, NULL);
assert(err == 0);
err = pthread_cond_init(&testthrs_enabled_cond, NULL);
assert(err == 0);
len = sizeof(ncpus);
err = sysctlbyname("hw.ncpu", &ncpus, &len, NULL, 0);
assert(err == 0);
tcbs = (struct testthrcb *)malloc(ncpus * sizeof(*tcbs));
assert(tcbs != NULL);
bzero(tcbs, ncpus * sizeof(*tcbs));
for (i = 0; i < ncpus; i++) {
err = pthread_attr_init(&attr);
assert(err == 0);
CPU_ZERO(&cpuset);
CPU_SET(i, &cpuset);
err = pthread_attr_setaffinity_np(&attr, sizeof(cpuset),
&cpuset);
assert(err == 0);
err = pthread_create(&tcbs[i].thr, &attr, run_testthr,
&tcbs[i]);
assert(err == 0);
err = pthread_attr_destroy(&attr);
assert(err == 0);
}
err = pthread_mutex_lock(&testthrs_enabled_lock);
assert(err == 0);
testthrs_enabled = true;
err = pthread_cond_broadcast(&testthrs_enabled_cond);
assert(err == 0);
err = pthread_mutex_unlock(&testthrs_enabled_lock);
assert(err == 0);
for (i = 0; i < ncpus; i++) {
err = pthread_join(tcbs[i].thr, NULL);
assert(err == 0);
}
for (i = 0; i < ncpus; i++) {
#ifdef HAVE_RDPID
printf("CPU %d: aux_rdpid %ju, aux_rdtscp %u, tsc_rdtscp "
"%#0jx, tsc_rdtsc %#0jx, delta %jd\n", i,
tcbs[i].aux_rdpid, tcbs[i].aux_rdtscp,
tcbs[i].tsc_rdtscp, tcbs[i].tsc_rdtsc,
(int64_t)(tcbs[i].tsc_rdtsc - tcbs[i].tsc_rdtscp));
#else
printf("CPU %d: aux %u, tsc_rdtscp %#0jx, tsc_rdtsc "
"%#0jx, delta %jd\n", i, tcbs[i].aux_rdtscp,
tcbs[i].tsc_rdtscp, tcbs[i].tsc_rdtsc,
(int64_t)(tcbs[i].tsc_rdtsc - tcbs[i].tsc_rdtscp));
#endif
}
printf("\n");
for (i = 0; i < ncpus; i++) {
#ifdef HAVE_RDPID
printf("CPU %d: aux_rdpid %ju, aux_rdtscp %u, tsc_rdtscp "
"%ju, tsc_rdtsc %ju, delta %jd\n", i,
tcbs[i].aux_rdpid, tcbs[i].aux_rdtscp,
tcbs[i].tsc_rdtscp, tcbs[i].tsc_rdtsc,
(int64_t)(tcbs[i].tsc_rdtsc - tcbs[i].tsc_rdtscp));
#else
printf("CPU %d: aux %u, tsc_rdtscp %ju, tsc_rdtsc %ju, "
"delta %jd\n", i, tcbs[i].aux_rdtscp,
tcbs[i].tsc_rdtscp, tcbs[i].tsc_rdtsc,
(int64_t)(tcbs[i].tsc_rdtsc - tcbs[i].tsc_rdtscp));
#endif
}
free(tcbs);
err = pthread_cond_destroy(&testthrs_enabled_cond);
assert(err == 0);
err = pthread_mutex_destroy(&testthrs_enabled_lock);
assert(err == 0);
return (0);
}
```
```
lang=sh, name=runrdtscp, lines=24
#!/bin/sh
# References:
# - 'bhyve(8)'
# - 'bhyvectl(8)'
# - '/usr/share/examples/bhyve/vmrun.sh'
VMNAME=rdtscp
NCPUS=1
RAMSIZE=128M
DISKIMAGE="${HOME}/rdtscp.raw"
TAP_DEV=tap0
TAP_MAC="00:a0:98:df:46:01"
CONS_DEV=stdio
sudo bhyvectl --vm=${VMNAME} --destroy > /dev/null 2>&1
sudo bhyveload \
-c ${CONS_DEV} \
-m ${RAMSIZE} \
-d ${DISKIMAGE} \
${VMNAME}
sudo bhyve \
-A \
-H \
-P \
-c 4 \
-p 0:3 \
-p 1:2 \
-p 2:1 \
-p 3:0 \
-m ${RAMSIZE} \
-s 0,hostbridge \
-s 1,lpc \
-s 2,virtio-blk,${DISKIMAGE} \
-s 3,virtio-net,${TAP_DEV},mac=${TAP_MAC} \
-l com1,${CONS_DEV} \
${VMNAME}
BHYVE_EXIT=$?
echo -n "bhyve exited: "
case ${BHYVE_EXIT} in
0)
echo "reboot (0)"
;;
1)
echo "powered off (1)"
;;
2)
echo "halted (2)"
;;
3)
echo "TRIPLE-FAULT (3)"
;;
4)
echo "an error occurred (4)"
;;
*)
echo "UNKNOWN REASON: ${BHYVE_EXIT}"
;;
esac
case ${BHYVE_EXIT} in
0|1|2)
# Cleanup /dev/vmm entry when bhyve did not exit
# due to an error.
echo -e Destroying VM...
sudo bhyvectl --vm=${VMNAME} --destroy > /dev/null 2>&1
echo done
;;
esac
exit ${BHYVE_EXIT}
```