I've only lightly tested this so far and have only been able to test RDTSCP
since I don't currently have Intel HW recent enough to support RDPID. I also
also don't currently have AMD HW so I wasn't yet able to sanity-check that, on AMD,
AMD, these changes have no effect and calls to the libvmmapi
`vm_{get,set}_capability()` API have the desired outcome (returning the expected
`errno` values).
That said, in light of the recent request for this feature on
freebsd-virtualization, I thought it'd be useful enough to post this with what testing
testing I've gotten to so far.
My simple test has just been running a 4-CPU RELEASE guest on a patched 4-CPU
CURRENT host with the guest's vCPUs pinned to the host's CPUs in opposing CPU
numbering order, and then running the attachedbelow simple test program on both the host
host and the guest (usually via `gnu-watch`). (The script I used to launch this guest
guest is also attachedbelow).
Since, in this scenario, both the host and the guest are writing PCPU_GET(cpuid)
`PCPU_GET(cpuid)` to each `TSC_AUX` register, I figure the reverse pinning should ensure that the
ensure that the guest writes a different value to all of its `TSC_AUX` registers. This seemed like
This seemed like a simple way to confirm that the guest and host do, indeed, effectively have
effectively have their own `TSC_AUX` registers---guest values show up on the guest, guest values dos //do// show up on
notthe guest, guest values //don't// show up on the host, and vice versa from the the
host's perspective.
I also made the following one-off change to the host's `initcpu.c` to give each
host TSC a visibly different value so that I could eyeball that the guest's
vCPUs were, indeed, running on host CPUs with different `TSC_AUX` values:
```
@@ -284,6 +284,8 @@ initializecpu(void)
if ((amd_feature & AMDID_RDTSCP) != 0 ||
(cpu_stdext_feature2 & CPUID_STDEXT2_RDPID) != 0)
wrmsr(MSR_TSC_AUX, PCPU_GET(cpuid));
+
+ wrmsr(MSR_TSC, ((uint64_t)PCPU_GET(cpuid)) << 48);
}
```
(Many thanks to grehan@ for his help with this work)!Many thanks to @grehan for his help with this work)!
```
lang=c, name=rdtscp.c, lines=24
#include <sys/types.h>
#include <sys/sysctl.h>
#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
#include <pthread.h>
#include <pthread_np.h>
struct testthrcb {
pthread_t thr;
#ifdef HAVE_RDPID
uint64_t aux_rdpid;
#endif
uint32_t aux_rdtscp;
uint64_t tsc_rdtsc;
uint64_t tsc_rdtscp;
};
static bool testthrs_enabled;
static pthread_mutex_t testthrs_enabled_lock;
static pthread_cond_t testthrs_enabled_cond;
static void *run_testthr(void *arg);
#ifdef HAVE_RDPID
static inline void
rdpid(uint64_t *pid)
{
asm volatile ("rdpid %0" : "=r" (*pid));
}
#endif
static inline void
rdtsc(uint64_t *tsc)
{
uint32_t *tsc_high, *tsc_low;
tsc_high = ((uint32_t *)tsc) + 1;
tsc_low = (uint32_t *)tsc;
asm volatile ("rdtsc" : "=a" (*tsc_low), "=d" (*tsc_high));
}
static inline void
rdtscp(uint64_t *tsc, uint32_t *pid)
{
uint32_t *tsc_high, *tsc_low;
tsc_high = ((uint32_t *)tsc) + 1;
tsc_low = (uint32_t *)tsc;
asm volatile ("rdtscp"
: "=a" (*tsc_low), "=d" (*tsc_high), "=c" (*pid));
}
static void *
run_testthr(void *arg)
{
struct testthrcb *tcb;
int err;
err = pthread_mutex_lock(&testthrs_enabled_lock);
assert(err == 0);
while (!testthrs_enabled) {
err = pthread_cond_wait(&testthrs_enabled_cond,
&testthrs_enabled_lock);
assert(err == 0);
}
err = pthread_mutex_unlock(&testthrs_enabled_lock);
assert(err == 0);
tcb = (struct testthrcb *)arg;
#ifdef HAVE_RDPID
rdpid(&tcb->aux_rdpid);
#endif
rdtscp(&tcb->tsc_rdtscp, &tcb->aux_rdtscp);
rdtsc(&tcb->tsc_rdtsc);
asm volatile ("mfence");
return (NULL);
}
int
main(int argc, const char **argv)
{
pthread_attr_t attr;
cpuset_t cpuset;
struct testthrcb *tcbs;
size_t len;
int err, i, ncpus;
testthrs_enabled = false;
err = pthread_mutex_init(&testthrs_enabled_lock, NULL);
assert(err == 0);
err = pthread_cond_init(&testthrs_enabled_cond, NULL);
assert(err == 0);
len = sizeof(ncpus);
err = sysctlbyname("hw.ncpu", &ncpus, &len, NULL, 0);
assert(err == 0);
tcbs = (struct testthrcb *)malloc(ncpus * sizeof(*tcbs));
assert(tcbs != NULL);
bzero(tcbs, ncpus * sizeof(*tcbs));
for (i = 0; i < ncpus; i++) {
err = pthread_attr_init(&attr);
assert(err == 0);
CPU_ZERO(&cpuset);
CPU_SET(i, &cpuset);
err = pthread_attr_setaffinity_np(&attr, sizeof(cpuset),
&cpuset);
assert(err == 0);
err = pthread_create(&tcbs[i].thr, &attr, run_testthr,
&tcbs[i]);
assert(err == 0);
err = pthread_attr_destroy(&attr);
assert(err == 0);
}
err = pthread_mutex_lock(&testthrs_enabled_lock);
assert(err == 0);
testthrs_enabled = true;
err = pthread_cond_broadcast(&testthrs_enabled_cond);
assert(err == 0);
err = pthread_mutex_unlock(&testthrs_enabled_lock);
assert(err == 0);
for (i = 0; i < ncpus; i++) {
err = pthread_join(tcbs[i].thr, NULL);
assert(err == 0);
}
for (i = 0; i < ncpus; i++) {
#ifdef HAVE_RDPID
printf("CPU %d: aux_rdpid %ju, aux_rdtscp %u, tsc_rdtscp "
"%#0jx, tsc_rdtsc %#0jx, delta %jd\n", i,
tcbs[i].aux_rdpid, tcbs[i].aux_rdtscp,
tcbs[i].tsc_rdtscp, tcbs[i].tsc_rdtsc,
(int64_t)(tcbs[i].tsc_rdtsc - tcbs[i].tsc_rdtscp));
#else
printf("CPU %d: aux %u, tsc_rdtscp %#0jx, tsc_rdtsc "
"%#0jx, delta %jd\n", i, tcbs[i].aux_rdtscp,
tcbs[i].tsc_rdtscp, tcbs[i].tsc_rdtsc,
(int64_t)(tcbs[i].tsc_rdtsc - tcbs[i].tsc_rdtscp));
#endif
}
printf("\n");
for (i = 0; i < ncpus; i++) {
#ifdef HAVE_RDPID
printf("CPU %d: aux_rdpid %ju, aux_rdtscp %u, tsc_rdtscp "
"%ju, tsc_rdtsc %ju, delta %jd\n", i,
tcbs[i].aux_rdpid, tcbs[i].aux_rdtscp,
tcbs[i].tsc_rdtscp, tcbs[i].tsc_rdtsc,
(int64_t)(tcbs[i].tsc_rdtsc - tcbs[i].tsc_rdtscp));
#else
printf("CPU %d: aux %u, tsc_rdtscp %ju, tsc_rdtsc %ju, "
"delta %jd\n", i, tcbs[i].aux_rdtscp,
tcbs[i].tsc_rdtscp, tcbs[i].tsc_rdtsc,
(int64_t)(tcbs[i].tsc_rdtsc - tcbs[i].tsc_rdtscp));
#endif
}
free(tcbs);
err = pthread_cond_destroy(&testthrs_enabled_cond);
assert(err == 0);
err = pthread_mutex_destroy(&testthrs_enabled_lock);
assert(err == 0);
return (0);
}
```
```
lang=sh, name=runrdtscp, lines=24
#!/bin/sh
# References:
# - 'bhyve(8)'
# - 'bhyvectl(8)'
# - '/usr/share/examples/bhyve/vmrun.sh'
VMNAME=rdtscp
NCPUS=1
RAMSIZE=128M
DISKIMAGE="${HOME}/rdtscp.raw"
TAP_DEV=tap0
TAP_MAC="00:a0:98:df:46:01"
CONS_DEV=stdio
sudo bhyvectl --vm=${VMNAME} --destroy > /dev/null 2>&1
sudo bhyveload \
-c ${CONS_DEV} \
-m ${RAMSIZE} \
-d ${DISKIMAGE} \
${VMNAME}
sudo bhyve \
-A \
-H \
-P \
-c 4 \
-p 0:3 \
-p 1:2 \
-p 2:1 \
-p 3:0 \
-m ${RAMSIZE} \
-s 0,hostbridge \
-s 1,lpc \
-s 2,virtio-blk,${DISKIMAGE} \
-s 3,virtio-net,${TAP_DEV},mac=${TAP_MAC} \
-l com1,${CONS_DEV} \
${VMNAME}
BHYVE_EXIT=$?
echo -n "bhyve exited: "
case ${BHYVE_EXIT} in
0)
echo "reboot (0)"
;;
1)
echo "powered off (1)"
;;
2)
echo "halted (2)"
;;
3)
echo "TRIPLE-FAULT (3)"
;;
4)
echo "an error occurred (4)"
;;
*)
echo "UNKNOWN REASON: ${BHYVE_EXIT}"
;;
esac
case ${BHYVE_EXIT} in
0|1|2)
# Cleanup /dev/vmm entry when bhyve did not exit
# due to an error.
echo -e Destroying VM...
sudo bhyvectl --vm=${VMNAME} --destroy > /dev/null 2>&1
echo done
;;
esac
exit ${BHYVE_EXIT}
```