commit 442f94ec0bd6cdeaefbdda627f08036be4378849 Author: Mike Gerdts <[email protected]> Date: Thu Feb 22 04:10:22 2018 +0000 avoid false sharing in vcpu array diff --git a/usr/src/uts/i86pc/io/vmm/vmm.c b/usr/src/uts/i86pc/io/vmm/vmm.c index 0e55799..7dd48be 100644 --- a/usr/src/uts/i86pc/io/vmm/vmm.c +++ b/usr/src/uts/i86pc/io/vmm/vmm.c @@ -124,6 +124,7 @@ struct vcpu { void *stats; /* (a,i) statistics */ struct vm_exit exitinfo; /* (x) exit reason and collateral */ uint64_t nextrip; /* (x) next instruction to execute */ + uint64_t pad; /* avoid false sharing */ }; #define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx)) @@ -180,7 +181,7 @@ struct vm { struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */ struct vmspace *vmspace; /* (o) guest's address space */ char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ - struct vcpu vcpu[VM_MAXCPU]; /* (i) guest vcpus */ + struct vcpu *vcpu; /* (i) guest vcpus */ #ifndef __FreeBSD__ krwlock_t ioport_rwlock; list_t ioport_hooks; @@ -306,6 +307,9 @@ vcpu_init(struct vm *vm, int vcpu_id, bool create) vcpu = &vm->vcpu[vcpu_id]; + KASSERT(((uintptr_t)vcpu & (CPU_CACHE_COHERENCE_SIZE - 1)) == 0, + ("vcpu_init: cpu %d not aligned on cache line", vcpu_id)); + if (create) { #ifdef __FreeBSD__ KASSERT(!vcpu_lock_initialized(vcpu), ("vcpu %d already " @@ -504,6 +508,10 @@ vm_init(struct vm *vm, bool create) vm->suspend = 0; CPU_ZERO(&vm->suspended_cpus); + if (create) { + vm->vcpu = kmem_zalloc(VM_MAXCPU * sizeof (*vm->vcpu), + KM_SLEEP); + } for (i = 0; i < VM_MAXCPU; i++) vcpu_init(vm, i, create); }

[root@emy-17 /root]# mdb -k Loading modules: [ unix genunix specfs dtrace mac cpu.generic uppc apix scsi_vhci ufs vmm ip hook neti sockfs arp usba xhci stmf_sbd stmf zfs mm lofs idm sata crypto random cpc logindmux ptm kvm sppp nsmb smbsrv nfs ] > ::sizeof struct vcpu sizeof (struct vcpu) = 0x100 > ::sizeof struct vcpu |=D 256

Strategies

In svm_softc.h I found something that makes alignment on page boundaries easy. We could do the same for cache line boundaries too.

struct svm_vcpu {
        struct vmcb     vmcb;    /* hardware saved vcpu context */
        struct svm_regctx swctx; /* software saved vcpu context */
        uint64_t        vmcb_pa; /* VMCB physical address */
        uint64_t        nextrip; /* next instruction to be executed by guest */
        int             lastcpu; /* host cpu that the vcpu last ran on */
        uint32_t        dirty;   /* state cache bits that must be cleared */
        long            eptgen;  /* pmap->pm_eptgen when the vcpu last ran */
        struct asid     asid;
} __aligned(PAGE_SIZE);

In mdb we can see that almost an entire page of pad is added.

> ::sizeof struct svm_vcpu    
sizeof (struct svm_vcpu) = 0x2000
> ::print -ta struct svm_vcpu
0 struct svm_vcpu {
    0 struct vmcb vmcb {
...
    1090 struct asid asid {
        1090 uint64_t gen 
        1098 uint32_t num 
    }
}

`usr/src/cmd/bhyve/bhyverun.c`

This only includes those that are not in #ifdef __FreeBSD__.

`vmexit` array

struct vm_exit should be padded to 192 bytes (add uint64_t pad[7])

static struct vm_exit vmexit[VM_MAXCPU];

> ::sizeof struct vm_exit
sizeof (struct vm_exit) = 0x88
> ::sizeof struct vm_exit |=D
                136

`mt_vmm_info` array

Ugh. We need 40 bytes of pad on a 24 byte structure. Maybe this structure could be combined with something else.

struct mt_vmm_info {
        pthread_t       mt_thr;
        struct vmctx    *mt_ctx;
        int             mt_vcpu;        
} mt_vmm_info[VM_MAXCPU];

> ::sizeof struct mt_vmm_info
sizeof (struct mt_vmm_info) = 0x18
> ::sizeof struct mt_vmm_info |=D
                24

`struct vmx`

/* virtual machine softc */
struct vmx {
        struct vmcs     vmcs[VM_MAXCPU];        /* one vmcs per virtual cpu */
        struct apic_page apic_page[VM_MAXCPU];  /* one apic page per vcpu */
        char            msr_bitmap[PAGE_SIZE];
        struct pir_desc pir_desc[VM_MAXCPU];
        uint64_t        guest_msrs[VM_MAXCPU][GUEST_MSR_NUM];
#ifndef __FreeBSD__
        uint64_t        host_msrs[VM_MAXCPU][GUEST_MSR_NUM];
        uint64_t        tsc_offset[VM_MAXCPU];
#endif
        struct vmxctx   ctx[VM_MAXCPU];
        struct vmxcap   cap[VM_MAXCPU];
        struct vmxstate state[VM_MAXCPU];
        uint64_t        eptp;
        struct vm       *vm;
        long            eptgen[MAXCPU];         /* cached pmap->pm_eptgen */
};

We can see here that each vcpu's guest_msrs and host_msrs share cache lines with neighboring vcpus.

> ::print -ta struct vmx
0 struct vmx {
    0 struct vmcs [16] vmcs 
    20000 struct apic_page [16] apic_page 
    30000 char [4096] msr_bitmap 
    31000 struct pir_desc [16] pir_desc 
    31400 uint64_t [16][6] guest_msrs 
    31700 uint64_t [16][6] host_msrs 
    31a00 uint64_t [16] tsc_offset 
    31a80 struct vmxctx [16] ctx 
    32380 struct vmxcap [16] cap 
    32440 struct vmxstate [16] state 
    32540 uint64_t eptp 
    32548 struct vm *vm 
    32550 long [256] eptgen 
}

This could probably be greatly improved as:

struct vmx_vcpu {
        struct vmcs     vmcs;        /* one vmcs per virtual cpu */
        struct apic_page apic_page;  /* one apic page per vcpu */
        struct pir_desc pir_desc;
        uint64_t        guest_msrs[GUEST_MSR_NUM];
#ifndef __FreeBSD__
        uint64_t        host_msrs[GUEST_MSR_NUM];
        uint64_t        tsc_offset;
#endif
        struct vmxctx   ctx;
        struct vmxcap   cap;
        struct vmxstate state;
        long            eptgen;         /* cached pmap->pm_eptgen */
} __aligned(CPU_CACHE_COHERENCE_SIZE);

struct vmx {
        char            msr_bitmap[PAGE_SIZE];
        struct vmx_vcpu vmx_vcpu[VM_MAXCPU];
        uint64_t        eptp;
        struct vm       *vm;
}

mgerdts/README.md

Baseline

`struct vcpu` alignment

Strategies

`usr/src/cmd/bhyve/bhyverun.c`

`vmexit` array

`mt_vmm_info` array

`struct vmx`

mgerdts/README.md

Baseline

struct vcpu alignment

Strategies

usr/src/cmd/bhyve/bhyverun.c

vmexit array

mt_vmm_info array

struct vmx

`struct vcpu` alignment

`usr/src/cmd/bhyve/bhyverun.c`

`vmexit` array

`mt_vmm_info` array

`struct vmx`