Skip to content

Instantly share code, notes, and snippets.

@Pacifist117
Created January 16, 2014 14:30
Show Gist options
  • Save Pacifist117/8455867 to your computer and use it in GitHub Desktop.
Save Pacifist117/8455867 to your computer and use it in GitHub Desktop.
diff -bur ./arch/arm/kvm/arm.c ../linux-3.11-rc1/arch/arm/kvm/arm.c
--- ./arch/arm/kvm/arm.c 2014-01-09 19:34:09.113458259 -0500
+++ ../linux-3.11-rc1/arch/arm/kvm/arm.c 2013-07-14 18:18:27.000000000 -0400
@@ -219,10 +219,6 @@
return -EINVAL;
}
-void kvm_arch_memslots_updated(struct kvm *kvm)
-{
-}
-
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
struct kvm_userspace_memory_region *mem,
diff -bur ./arch/ia64/kvm/kvm-ia64.c ../linux-3.11-rc1/arch/ia64/kvm/kvm-ia64.c
--- ./arch/ia64/kvm/kvm-ia64.c 2014-01-09 19:34:09.523458276 -0500
+++ ../linux-3.11-rc1/arch/ia64/kvm/kvm-ia64.c 2013-07-14 18:18:27.000000000 -0400
@@ -1560,10 +1560,6 @@
return 0;
}
-void kvm_arch_memslots_updated(struct kvm *kvm)
-{
-}
-
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
struct kvm_userspace_memory_region *mem,
diff -bur ./arch/mips/kvm/kvm_mips.c ../linux-3.11-rc1/arch/mips/kvm/kvm_mips.c
--- ./arch/mips/kvm/kvm_mips.c 2014-01-09 19:34:09.723458441 -0500
+++ ../linux-3.11-rc1/arch/mips/kvm/kvm_mips.c 2013-07-14 18:18:27.000000000 -0400
@@ -208,10 +208,6 @@
return 0;
}
-void kvm_arch_memslots_updated(struct kvm *kvm)
-{
-}
-
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
struct kvm_userspace_memory_region *mem,
diff -bur ./arch/powerpc/kvm/powerpc.c ../linux-3.11-rc1/arch/powerpc/kvm/powerpc.c
--- ./arch/powerpc/kvm/powerpc.c 2014-01-09 19:34:09.903464521 -0500
+++ ../linux-3.11-rc1/arch/powerpc/kvm/powerpc.c 2013-07-14 18:18:27.000000000 -0400
@@ -420,10 +420,6 @@
return kvmppc_core_create_memslot(slot, npages);
}
-void kvm_arch_memslots_updated(struct kvm *kvm)
-{
-}
-
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
struct kvm_userspace_memory_region *mem,
diff -bur ./arch/s390/include/asm/kvm_host.h ../linux-3.11-rc1/arch/s390/include/asm/kvm_host.h
--- ./arch/s390/include/asm/kvm_host.h 2014-01-09 19:34:09.983466699 -0500
+++ ../linux-3.11-rc1/arch/s390/include/asm/kvm_host.h 2013-07-14 18:18:27.000000000 -0400
@@ -274,14 +274,6 @@
int css_support;
};
-#define KVM_HVA_ERR_BAD (-1UL)
-#define KVM_HVA_ERR_RO_BAD (-2UL)
-
-static inline bool kvm_is_error_hva(unsigned long addr)
-{
- return IS_ERR_VALUE(addr);
-}
-
extern int sie64a(struct kvm_s390_sie_block *, u64 *);
extern char sie_exit;
#endif
diff -bur ./arch/s390/include/asm/mmu_context.h ../linux-3.11-rc1/arch/s390/include/asm/mmu_context.h
--- ./arch/s390/include/asm/mmu_context.h 2014-01-09 19:34:09.983466699 -0500
+++ ../linux-3.11-rc1/arch/s390/include/asm/mmu_context.h 2013-07-14 18:18:27.000000000 -0400
@@ -21,7 +21,24 @@
#ifdef CONFIG_64BIT
mm->context.asce_bits |= _ASCE_TYPE_REGION3;
#endif
+ if (current->mm && current->mm->context.alloc_pgste) {
+ /*
+ * alloc_pgste indicates, that any NEW context will be created
+ * with extended page tables. The old context is unchanged. The
+ * page table allocation and the page table operations will
+ * look at has_pgste to distinguish normal and extended page
+ * tables. The only way to create extended page tables is to
+ * set alloc_pgste and then create a new context (e.g. dup_mm).
+ * The page table allocation is called after init_new_context
+ * and if has_pgste is set, it will create extended page
+ * tables.
+ */
+ mm->context.has_pgste = 1;
+ mm->context.alloc_pgste = 1;
+ } else {
mm->context.has_pgste = 0;
+ mm->context.alloc_pgste = 0;
+ }
mm->context.asce_limit = STACK_TOP_MAX;
crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm));
return 0;
diff -bur ./arch/s390/include/asm/mmu.h ../linux-3.11-rc1/arch/s390/include/asm/mmu.h
--- ./arch/s390/include/asm/mmu.h 2014-01-09 19:34:09.983466699 -0500
+++ ../linux-3.11-rc1/arch/s390/include/asm/mmu.h 2013-07-14 18:18:27.000000000 -0400
@@ -12,6 +12,8 @@
unsigned long asce_bits;
unsigned long asce_limit;
unsigned long vdso_base;
+ /* Cloned contexts will be created with extended page tables. */
+ unsigned int alloc_pgste:1;
/* The mmu context has extended page tables. */
unsigned int has_pgste:1;
} mm_context_t;
diff -bur ./arch/s390/include/asm/pgtable.h ../linux-3.11-rc1/arch/s390/include/asm/pgtable.h
--- ./arch/s390/include/asm/pgtable.h 2014-01-09 19:34:09.983466699 -0500
+++ ../linux-3.11-rc1/arch/s390/include/asm/pgtable.h 2013-07-14 18:18:27.000000000 -0400
@@ -1361,17 +1361,6 @@
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */
-static inline void pmdp_flush_lazy(struct mm_struct *mm,
- unsigned long address, pmd_t *pmdp)
-{
- int active = (mm == current->active_mm) ? 1 : 0;
-
- if ((atomic_read(&mm->context.attach_count) & 0xffff) > active)
- __pmd_idte(address, pmdp);
- else
- mm->context.flush_mm = 1;
-}
-
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define __HAVE_ARCH_PGTABLE_DEPOSIT
diff -bur ./arch/s390/include/asm/processor.h ../linux-3.11-rc1/arch/s390/include/asm/processor.h
--- ./arch/s390/include/asm/processor.h 2014-01-09 19:34:09.983466699 -0500
+++ ../linux-3.11-rc1/arch/s390/include/asm/processor.h 2013-07-14 18:18:27.000000000 -0400
@@ -43,7 +43,6 @@
#ifndef CONFIG_64BIT
#define TASK_SIZE (1UL << 31)
-#define TASK_MAX_SIZE (1UL << 31)
#define TASK_UNMAPPED_BASE (1UL << 30)
#else /* CONFIG_64BIT */
@@ -52,7 +51,6 @@
#define TASK_UNMAPPED_BASE (test_thread_flag(TIF_31BIT) ? \
(1UL << 30) : (1UL << 41))
#define TASK_SIZE TASK_SIZE_OF(current)
-#define TASK_MAX_SIZE (1UL << 53)
#endif /* CONFIG_64BIT */
diff -bur ./arch/s390/kvm/diag.c ../linux-3.11-rc1/arch/s390/kvm/diag.c
--- ./arch/s390/kvm/diag.c 2014-01-09 19:34:09.993464527 -0500
+++ ../linux-3.11-rc1/arch/s390/kvm/diag.c 2013-07-14 18:18:27.000000000 -0400
@@ -119,21 +119,12 @@
* The layout is as follows:
* - gpr 2 contains the subchannel id (passed as addr)
* - gpr 3 contains the virtqueue index (passed as datamatch)
- * - gpr 4 contains the index on the bus (optionally)
*/
- ret = kvm_io_bus_write_cookie(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS,
+ ret = kvm_io_bus_write(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS,
vcpu->run->s.regs.gprs[2],
- 8, &vcpu->run->s.regs.gprs[3],
- vcpu->run->s.regs.gprs[4]);
+ 8, &vcpu->run->s.regs.gprs[3]);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
-
- /*
- * Return cookie in gpr 2, but don't overwrite the register if the
- * diagnose will be handled by userspace.
- */
- if (ret != -EOPNOTSUPP)
- vcpu->run->s.regs.gprs[2] = ret;
- /* kvm_io_bus_write_cookie returns -EOPNOTSUPP if it found no match. */
+ /* kvm_io_bus_write returns -EOPNOTSUPP if it found no match. */
return ret < 0 ? ret : 0;
}
diff -bur ./arch/s390/kvm/kvm-s390.c ../linux-3.11-rc1/arch/s390/kvm/kvm-s390.c
--- ./arch/s390/kvm/kvm-s390.c 2014-01-09 19:34:09.993464527 -0500
+++ ../linux-3.11-rc1/arch/s390/kvm/kvm-s390.c 2013-07-14 18:18:27.000000000 -0400
@@ -28,7 +28,6 @@
#include <asm/pgtable.h>
#include <asm/nmi.h>
#include <asm/switch_to.h>
-#include <asm/facility.h>
#include <asm/sclp.h>
#include "kvm-s390.h"
#include "gaccess.h"
@@ -85,15 +84,9 @@
{ NULL }
};
-unsigned long *vfacilities;
+static unsigned long long *facilities;
static struct gmap_notifier gmap_notifier;
-/* test availability of vfacility */
-static inline int test_vfacility(unsigned long nr)
-{
- return __test_facility(nr, (void *) vfacilities);
-}
-
/* Section: not file related */
int kvm_arch_hardware_enable(void *garbage)
{
@@ -394,7 +387,7 @@
vcpu->arch.sie_block->ecb = 6;
vcpu->arch.sie_block->ecb2 = 8;
vcpu->arch.sie_block->eca = 0xC1002001U;
- vcpu->arch.sie_block->fac = (int) (long) vfacilities;
+ vcpu->arch.sie_block->fac = (int) (long) facilities;
hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
(unsigned long) vcpu);
@@ -1063,10 +1056,6 @@
return 0;
}
-void kvm_arch_memslots_updated(struct kvm *kvm)
-{
-}
-
/* Section: memory related */
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
@@ -1133,20 +1122,20 @@
* to hold the maximum amount of facilities. On the other hand, we
* only set facilities that are known to work in KVM.
*/
- vfacilities = (unsigned long *) get_zeroed_page(GFP_KERNEL|GFP_DMA);
- if (!vfacilities) {
+ facilities = (unsigned long long *) get_zeroed_page(GFP_KERNEL|GFP_DMA);
+ if (!facilities) {
kvm_exit();
return -ENOMEM;
}
- memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16);
- vfacilities[0] &= 0xff82fff3f47c0000UL;
- vfacilities[1] &= 0x001c000000000000UL;
+ memcpy(facilities, S390_lowcore.stfle_fac_list, 16);
+ facilities[0] &= 0xff82fff3f47c0000ULL;
+ facilities[1] &= 0x001c000000000000ULL;
return 0;
}
static void __exit kvm_s390_exit(void)
{
- free_page((unsigned long) vfacilities);
+ free_page((unsigned long) facilities);
kvm_exit();
}
diff -bur ./arch/s390/kvm/kvm-s390.h ../linux-3.11-rc1/arch/s390/kvm/kvm-s390.h
--- ./arch/s390/kvm/kvm-s390.h 2014-01-09 19:34:09.993464527 -0500
+++ ../linux-3.11-rc1/arch/s390/kvm/kvm-s390.h 2013-07-14 18:18:27.000000000 -0400
@@ -24,9 +24,6 @@
typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
-/* declare vfacilities extern */
-extern unsigned long *vfacilities;
-
/* negativ values are error codes, positive values for internal conditions */
#define SIE_INTERCEPT_RERUNVCPU (1<<0)
#define SIE_INTERCEPT_UCONTROL (1<<1)
@@ -115,13 +112,6 @@
return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
}
-/* Set the condition code in the guest program status word */
-static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc)
-{
- vcpu->arch.sie_block->gpsw.mask &= ~(3UL << 44);
- vcpu->arch.sie_block->gpsw.mask |= cc << 44;
-}
-
int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer);
void kvm_s390_tasklet(unsigned long parm);
diff -bur ./arch/s390/kvm/priv.c ../linux-3.11-rc1/arch/s390/kvm/priv.c
--- ./arch/s390/kvm/priv.c 2014-01-09 19:34:09.993464527 -0500
+++ ../linux-3.11-rc1/arch/s390/kvm/priv.c 2013-07-14 18:18:27.000000000 -0400
@@ -163,7 +163,8 @@
kfree(inti);
no_interrupt:
/* Set condition code and we're done. */
- kvm_s390_set_psw_cc(vcpu, cc);
+ vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
+ vcpu->arch.sie_block->gpsw.mask |= (cc & 3ul) << 44;
return 0;
}
@@ -218,13 +219,15 @@
* Set condition code 3 to stop the guest from issueing channel
* I/O instructions.
*/
- kvm_s390_set_psw_cc(vcpu, 3);
+ vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
+ vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44;
return 0;
}
}
static int handle_stfl(struct kvm_vcpu *vcpu)
{
+ unsigned int facility_list;
int rc;
vcpu->stat.instruction_stfl++;
@@ -232,13 +235,15 @@
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+ /* only pass the facility bits, which we can handle */
+ facility_list = S390_lowcore.stfl_fac_list & 0xff82fff3;
+
rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list),
- vfacilities, 4);
+ &facility_list, sizeof(facility_list));
if (rc)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- VCPU_EVENT(vcpu, 5, "store facility list value %x",
- *(unsigned int *) vfacilities);
- trace_kvm_s390_handle_stfl(vcpu, *(unsigned int *) vfacilities);
+ VCPU_EVENT(vcpu, 5, "store facility list value %x", facility_list);
+ trace_kvm_s390_handle_stfl(vcpu, facility_list);
return 0;
}
@@ -381,7 +386,7 @@
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
if (fc > 3) {
- kvm_s390_set_psw_cc(vcpu, 3);
+ vcpu->arch.sie_block->gpsw.mask |= 3ul << 44; /* cc 3 */
return 0;
}
@@ -391,7 +396,7 @@
if (fc == 0) {
vcpu->run->s.regs.gprs[0] = 3 << 28;
- kvm_s390_set_psw_cc(vcpu, 0);
+ vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); /* cc 0 */
return 0;
}
@@ -425,11 +430,12 @@
}
trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2);
free_page(mem);
- kvm_s390_set_psw_cc(vcpu, 0);
+ vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
vcpu->run->s.regs.gprs[0] = 0;
return 0;
out_no_data:
- kvm_s390_set_psw_cc(vcpu, 3);
+ /* condition code 3 */
+ vcpu->arch.sie_block->gpsw.mask |= 3ul << 44;
out_exception:
free_page(mem);
return rc;
@@ -487,12 +493,12 @@
kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
/* This basically extracts the mask half of the psw. */
- vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000UL;
+ vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000;
vcpu->run->s.regs.gprs[reg1] |= vcpu->arch.sie_block->gpsw.mask >> 32;
if (reg2) {
- vcpu->run->s.regs.gprs[reg2] &= 0xffffffff00000000UL;
+ vcpu->run->s.regs.gprs[reg2] &= 0xffffffff00000000;
vcpu->run->s.regs.gprs[reg2] |=
- vcpu->arch.sie_block->gpsw.mask & 0x00000000ffffffffUL;
+ vcpu->arch.sie_block->gpsw.mask & 0x00000000ffffffff;
}
return 0;
}
diff -bur ./arch/s390/mm/pgtable.c ../linux-3.11-rc1/arch/s390/mm/pgtable.c
--- ./arch/s390/mm/pgtable.c 2014-01-09 19:34:10.003464567 -0500
+++ ../linux-3.11-rc1/arch/s390/mm/pgtable.c 2013-07-14 18:18:27.000000000 -0400
@@ -335,7 +335,7 @@
if ((from | to | len) & (PMD_SIZE - 1))
return -EINVAL;
- if (len == 0 || from + len > TASK_MAX_SIZE ||
+ if (len == 0 || from + len > PGDIR_SIZE ||
from + len < from || to + len < to)
return -EINVAL;
@@ -731,11 +731,6 @@
spin_unlock(&gmap_notifier_lock);
}
-static inline int page_table_with_pgste(struct page *page)
-{
- return atomic_read(&page->_mapcount) == 0;
-}
-
static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
unsigned long vmaddr)
{
@@ -755,7 +750,7 @@
mp->vmaddr = vmaddr & PMD_MASK;
INIT_LIST_HEAD(&mp->mapper);
page->index = (unsigned long) mp;
- atomic_set(&page->_mapcount, 0);
+ atomic_set(&page->_mapcount, 3);
table = (unsigned long *) page_to_phys(page);
clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
@@ -826,11 +821,6 @@
#else /* CONFIG_PGSTE */
-static inline int page_table_with_pgste(struct page *page)
-{
- return 0;
-}
-
static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
unsigned long vmaddr)
{
@@ -907,12 +897,12 @@
struct page *page;
unsigned int bit, mask;
- page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- if (page_table_with_pgste(page)) {
+ if (mm_has_pgste(mm)) {
gmap_disconnect_pgtable(mm, table);
return page_table_free_pgste(table);
}
/* Free 1K/2K page table fragment of a 4K page */
+ page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
spin_lock_bh(&mm->context.list_lock);
if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
@@ -950,14 +940,14 @@
unsigned int bit, mask;
mm = tlb->mm;
- page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- if (page_table_with_pgste(page)) {
+ if (mm_has_pgste(mm)) {
gmap_disconnect_pgtable(mm, table);
table = (unsigned long *) (__pa(table) | FRAG_MASK);
tlb_remove_table(tlb, table);
return;
}
bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)));
+ page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
spin_lock_bh(&mm->context.list_lock);
if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
list_del(&page->lru);
@@ -1043,120 +1033,36 @@
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static inline void thp_split_vma(struct vm_area_struct *vma)
+void thp_split_vma(struct vm_area_struct *vma)
{
unsigned long addr;
+ struct page *page;
- for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE)
- follow_page(vma, addr, FOLL_SPLIT);
+ for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) {
+ page = follow_page(vma, addr, FOLL_SPLIT);
+ }
}
-static inline void thp_split_mm(struct mm_struct *mm)
+void thp_split_mm(struct mm_struct *mm)
{
- struct vm_area_struct *vma;
+ struct vm_area_struct *vma = mm->mmap;
- for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
+ while (vma != NULL) {
thp_split_vma(vma);
vma->vm_flags &= ~VM_HUGEPAGE;
vma->vm_flags |= VM_NOHUGEPAGE;
+ vma = vma->vm_next;
}
- mm->def_flags |= VM_NOHUGEPAGE;
-}
-#else
-static inline void thp_split_mm(struct mm_struct *mm)
-{
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-static unsigned long page_table_realloc_pmd(struct mmu_gather *tlb,
- struct mm_struct *mm, pud_t *pud,
- unsigned long addr, unsigned long end)
-{
- unsigned long next, *table, *new;
- struct page *page;
- pmd_t *pmd;
-
- pmd = pmd_offset(pud, addr);
- do {
- next = pmd_addr_end(addr, end);
-again:
- if (pmd_none_or_clear_bad(pmd))
- continue;
- table = (unsigned long *) pmd_deref(*pmd);
- page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- if (page_table_with_pgste(page))
- continue;
- /* Allocate new page table with pgstes */
- new = page_table_alloc_pgste(mm, addr);
- if (!new) {
- mm->context.has_pgste = 0;
- continue;
- }
- spin_lock(&mm->page_table_lock);
- if (likely((unsigned long *) pmd_deref(*pmd) == table)) {
- /* Nuke pmd entry pointing to the "short" page table */
- pmdp_flush_lazy(mm, addr, pmd);
- pmd_clear(pmd);
- /* Copy ptes from old table to new table */
- memcpy(new, table, PAGE_SIZE/2);
- clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
- /* Establish new table */
- pmd_populate(mm, pmd, (pte_t *) new);
- /* Free old table with rcu, there might be a walker! */
- page_table_free_rcu(tlb, table);
- new = NULL;
- }
- spin_unlock(&mm->page_table_lock);
- if (new) {
- page_table_free_pgste(new);
- goto again;
- }
- } while (pmd++, addr = next, addr != end);
-
- return addr;
-}
-
-static unsigned long page_table_realloc_pud(struct mmu_gather *tlb,
- struct mm_struct *mm, pgd_t *pgd,
- unsigned long addr, unsigned long end)
-{
- unsigned long next;
- pud_t *pud;
-
- pud = pud_offset(pgd, addr);
- do {
- next = pud_addr_end(addr, end);
- if (pud_none_or_clear_bad(pud))
- continue;
- next = page_table_realloc_pmd(tlb, mm, pud, addr, next);
- } while (pud++, addr = next, addr != end);
-
- return addr;
-}
-
-static void page_table_realloc(struct mmu_gather *tlb, struct mm_struct *mm,
- unsigned long addr, unsigned long end)
-{
- unsigned long next;
- pgd_t *pgd;
-
- pgd = pgd_offset(mm, addr);
- do {
- next = pgd_addr_end(addr, end);
- if (pgd_none_or_clear_bad(pgd))
- continue;
- next = page_table_realloc_pud(tlb, mm, pgd, addr, next);
- } while (pgd++, addr = next, addr != end);
-}
-
/*
* switch on pgstes for its userspace process (for kvm)
*/
int s390_enable_sie(void)
{
struct task_struct *tsk = current;
- struct mm_struct *mm = tsk->mm;
- struct mmu_gather tlb;
+ struct mm_struct *mm, *old_mm;
/* Do we have switched amode? If no, we cannot do sie */
if (s390_user_mode == HOME_SPACE_MODE)
@@ -1166,16 +1072,57 @@
if (mm_has_pgste(tsk->mm))
return 0;
- down_write(&mm->mmap_sem);
+ /* lets check if we are allowed to replace the mm */
+ task_lock(tsk);
+ if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
+#ifdef CONFIG_AIO
+ !hlist_empty(&tsk->mm->ioctx_list) ||
+#endif
+ tsk->mm != tsk->active_mm) {
+ task_unlock(tsk);
+ return -EINVAL;
+ }
+ task_unlock(tsk);
+
+ /* we copy the mm and let dup_mm create the page tables with_pgstes */
+ tsk->mm->context.alloc_pgste = 1;
+ /* make sure that both mms have a correct rss state */
+ sync_mm_rss(tsk->mm);
+ mm = dup_mm(tsk);
+ tsk->mm->context.alloc_pgste = 0;
+ if (!mm)
+ return -ENOMEM;
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/* split thp mappings and disable thp for future mappings */
thp_split_mm(mm);
- /* Reallocate the page tables with pgstes */
- mm->context.has_pgste = 1;
- tlb_gather_mmu(&tlb, mm, 0);
- page_table_realloc(&tlb, mm, 0, TASK_SIZE);
- tlb_finish_mmu(&tlb, 0, -1);
- up_write(&mm->mmap_sem);
- return mm->context.has_pgste ? 0 : -ENOMEM;
+ mm->def_flags |= VM_NOHUGEPAGE;
+#endif
+
+ /* Now lets check again if something happened */
+ task_lock(tsk);
+ if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
+#ifdef CONFIG_AIO
+ !hlist_empty(&tsk->mm->ioctx_list) ||
+#endif
+ tsk->mm != tsk->active_mm) {
+ mmput(mm);
+ task_unlock(tsk);
+ return -EINVAL;
+ }
+
+ /* ok, we are alone. No ptrace, no threads, etc. */
+ old_mm = tsk->mm;
+ tsk->mm = tsk->active_mm = mm;
+ preempt_disable();
+ update_mm(mm, tsk);
+ atomic_inc(&mm->context.attach_count);
+ atomic_dec(&old_mm->context.attach_count);
+ cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
+ preempt_enable();
+ task_unlock(tsk);
+ mmput(old_mm);
+ return 0;
}
EXPORT_SYMBOL_GPL(s390_enable_sie);
diff -bur ./arch/x86/include/asm/kvm_host.h ../linux-3.11-rc1/arch/x86/include/asm/kvm_host.h
--- ./arch/x86/include/asm/kvm_host.h 2014-01-09 19:34:10.183464607 -0500
+++ ../linux-3.11-rc1/arch/x86/include/asm/kvm_host.h 2013-07-14 18:18:27.000000000 -0400
@@ -323,7 +323,6 @@
u64 global_ovf_ctrl;
u64 counter_bitmask[2];
u64 global_ctrl_mask;
- u64 reserved_bits;
u8 version;
struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC];
struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED];
@@ -804,7 +803,7 @@
enum emulation_result {
EMULATE_DONE, /* no further processing */
- EMULATE_USER_EXIT, /* kvm_run ready for userspace exit */
+ EMULATE_DO_MMIO, /* kvm_run filled with mmio request */
EMULATE_FAIL, /* can't emulate this instruction */
};
diff -bur ./arch/x86/include/asm/pvclock.h ../linux-3.11-rc1/arch/x86/include/asm/pvclock.h
--- ./arch/x86/include/asm/pvclock.h 2014-01-09 19:34:10.193458186 -0500
+++ ../linux-3.11-rc1/arch/x86/include/asm/pvclock.h 2013-07-14 18:18:27.000000000 -0400
@@ -93,6 +93,7 @@
struct pvclock_vsyscall_time_info {
struct pvclock_vcpu_time_info pvti;
+ u32 migrate_count;
} __attribute__((__aligned__(SMP_CACHE_BYTES)));
#define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info)
Only in ./arch/x86/include: generated
diff -bur ./arch/x86/kernel/pvclock.c ../linux-3.11-rc1/arch/x86/kernel/pvclock.c
--- ./arch/x86/kernel/pvclock.c 2014-01-09 19:34:10.223458452 -0500
+++ ../linux-3.11-rc1/arch/x86/kernel/pvclock.c 2013-07-14 18:18:27.000000000 -0400
@@ -128,7 +128,46 @@
set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
}
+static struct pvclock_vsyscall_time_info *pvclock_vdso_info;
+
+static struct pvclock_vsyscall_time_info *
+pvclock_get_vsyscall_user_time_info(int cpu)
+{
+ if (!pvclock_vdso_info) {
+ BUG();
+ return NULL;
+ }
+
+ return &pvclock_vdso_info[cpu];
+}
+
+struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu)
+{
+ return &pvclock_get_vsyscall_user_time_info(cpu)->pvti;
+}
+
#ifdef CONFIG_X86_64
+static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l,
+ void *v)
+{
+ struct task_migration_notifier *mn = v;
+ struct pvclock_vsyscall_time_info *pvti;
+
+ pvti = pvclock_get_vsyscall_user_time_info(mn->from_cpu);
+
+ /* this is NULL when pvclock vsyscall is not initialized */
+ if (unlikely(pvti == NULL))
+ return NOTIFY_DONE;
+
+ pvti->migrate_count++;
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block pvclock_migrate = {
+ .notifier_call = pvclock_task_migrate,
+};
+
/*
* Initialize the generic pvclock vsyscall state. This will allocate
* a/some page(s) for the per-vcpu pvclock information, set up a
@@ -142,12 +181,17 @@
WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE);
+ pvclock_vdso_info = i;
+
for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) {
__set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx,
__pa(i) + (idx*PAGE_SIZE),
PAGE_KERNEL_VVAR);
}
+
+ register_task_migration_notifier(&pvclock_migrate);
+
return 0;
}
#endif
diff -bur ./arch/x86/kvm/lapic.c ../linux-3.11-rc1/arch/x86/kvm/lapic.c
--- ./arch/x86/kvm/lapic.c 2014-01-09 19:34:10.233458290 -0500
+++ ../linux-3.11-rc1/arch/x86/kvm/lapic.c 2013-07-14 18:18:27.000000000 -0400
@@ -79,6 +79,16 @@
*((u32 *) (apic->regs + reg_off)) = val;
}
+static inline int apic_test_and_set_vector(int vec, void *bitmap)
+{
+ return test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
+}
+
+static inline int apic_test_and_clear_vector(int vec, void *bitmap)
+{
+ return test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
+}
+
static inline int apic_test_vector(int vec, void *bitmap)
{
return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
@@ -321,10 +331,10 @@
}
EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
-static inline void apic_set_irr(int vec, struct kvm_lapic *apic)
+static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic)
{
apic->irr_pending = true;
- apic_set_vector(vec, apic->regs + APIC_IRR);
+ return apic_test_and_set_vector(vec, apic->regs + APIC_IRR);
}
static inline int apic_search_irr(struct kvm_lapic *apic)
@@ -671,21 +681,28 @@
if (unlikely(!apic_enabled(apic)))
break;
- result = 1;
-
if (dest_map)
__set_bit(vcpu->vcpu_id, dest_map);
- if (kvm_x86_ops->deliver_posted_interrupt)
+ if (kvm_x86_ops->deliver_posted_interrupt) {
+ result = 1;
kvm_x86_ops->deliver_posted_interrupt(vcpu, vector);
- else {
- apic_set_irr(vector, apic);
+ } else {
+ result = !apic_test_and_set_irr(vector, apic);
+
+ if (!result) {
+ if (trig_mode)
+ apic_debug("level trig mode repeatedly "
+ "for vector %d", vector);
+ goto out;
+ }
kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_vcpu_kick(vcpu);
}
+out:
trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
- trig_mode, vector, false);
+ trig_mode, vector, !result);
break;
case APIC_DM_REMRD:
diff -bur ./arch/x86/kvm/Makefile ../linux-3.11-rc1/arch/x86/kvm/Makefile
--- ./arch/x86/kvm/Makefile 2014-01-10 15:40:08.885432013 -0500
+++ ../linux-3.11-rc1/arch/x86/kvm/Makefile 2013-07-14 18:18:27.000000000 -0400
@@ -15,7 +15,7 @@
kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
i8254.o cpuid.o pmu.o
-kvm-intel-y += vmx.o nitro_main.o
+kvm-intel-y += vmx.o
kvm-amd-y += svm.o
obj-$(CONFIG_KVM) += kvm.o
diff -bur ./arch/x86/kvm/mmu.c ../linux-3.11-rc1/arch/x86/kvm/mmu.c
--- ./arch/x86/kvm/mmu.c 2014-01-09 19:34:10.233458290 -0500
+++ ../linux-3.11-rc1/arch/x86/kvm/mmu.c 2013-07-14 18:18:27.000000000 -0400
@@ -2811,13 +2811,6 @@
static bool page_fault_can_be_fast(struct kvm_vcpu *vcpu, u32 error_code)
{
/*
- * Do not fix the mmio spte with invalid generation number which
- * need to be updated by slow page fault path.
- */
- if (unlikely(error_code & PFERR_RSVD_MASK))
- return false;
-
- /*
* #PF can be fast only if the shadow page table is present and it
* is caused by write-protect, that means we just need change the
* W bit of the spte which can be done out of mmu-lock.
@@ -4182,7 +4175,7 @@
switch (er) {
case EMULATE_DONE:
return 1;
- case EMULATE_USER_EXIT:
+ case EMULATE_DO_MMIO:
++vcpu->stat.mmio_exits;
/* fall through */
case EMULATE_FAIL:
@@ -4390,8 +4383,11 @@
/*
* The very rare case: if the generation-number is round,
* zap all shadow pages.
+ *
+ * The max value is MMIO_MAX_GEN - 1 since it is not called
+ * when mark memslot invalid.
*/
- if (unlikely(kvm_current_mmio_generation(kvm) >= MMIO_MAX_GEN)) {
+ if (unlikely(kvm_current_mmio_generation(kvm) >= (MMIO_MAX_GEN - 1))) {
printk_ratelimited(KERN_INFO "kvm: zapping shadow pages for mmio generation wraparound\n");
kvm_mmu_invalidate_zap_all_pages(kvm);
}
Only in ./arch/x86/kvm: nitro_main.c
diff -bur ./arch/x86/kvm/pmu.c ../linux-3.11-rc1/arch/x86/kvm/pmu.c
--- ./arch/x86/kvm/pmu.c 2014-01-09 19:34:10.233458290 -0500
+++ ../linux-3.11-rc1/arch/x86/kvm/pmu.c 2013-07-14 18:18:27.000000000 -0400
@@ -160,7 +160,7 @@
static void reprogram_counter(struct kvm_pmc *pmc, u32 type,
unsigned config, bool exclude_user, bool exclude_kernel,
- bool intr, bool in_tx, bool in_tx_cp)
+ bool intr)
{
struct perf_event *event;
struct perf_event_attr attr = {
@@ -173,10 +173,6 @@
.exclude_kernel = exclude_kernel,
.config = config,
};
- if (in_tx)
- attr.config |= HSW_IN_TX;
- if (in_tx_cp)
- attr.config |= HSW_IN_TX_CHECKPOINTED;
attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc);
@@ -230,9 +226,7 @@
if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE |
ARCH_PERFMON_EVENTSEL_INV |
- ARCH_PERFMON_EVENTSEL_CMASK |
- HSW_IN_TX |
- HSW_IN_TX_CHECKPOINTED))) {
+ ARCH_PERFMON_EVENTSEL_CMASK))) {
config = find_arch_event(&pmc->vcpu->arch.pmu, event_select,
unit_mask);
if (config != PERF_COUNT_HW_MAX)
@@ -245,9 +239,7 @@
reprogram_counter(pmc, type, config,
!(eventsel & ARCH_PERFMON_EVENTSEL_USR),
!(eventsel & ARCH_PERFMON_EVENTSEL_OS),
- eventsel & ARCH_PERFMON_EVENTSEL_INT,
- (eventsel & HSW_IN_TX),
- (eventsel & HSW_IN_TX_CHECKPOINTED));
+ eventsel & ARCH_PERFMON_EVENTSEL_INT);
}
static void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 en_pmi, int idx)
@@ -264,7 +256,7 @@
arch_events[fixed_pmc_events[idx]].event_type,
!(en & 0x2), /* exclude user */
!(en & 0x1), /* exclude kernel */
- pmi, false, false);
+ pmi);
}
static inline u8 fixed_en_pmi(u64 ctrl, int idx)
@@ -416,7 +408,7 @@
} else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) {
if (data == pmc->eventsel)
return 0;
- if (!(data & pmu->reserved_bits)) {
+ if (!(data & 0xffffffff00200000ull)) {
reprogram_gp_counter(pmc, data);
return 0;
}
@@ -458,7 +450,6 @@
pmu->counter_bitmask[KVM_PMC_GP] = 0;
pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
pmu->version = 0;
- pmu->reserved_bits = 0xffffffff00200000ull;
entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
if (!entry)
@@ -487,12 +478,6 @@
pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) |
(((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED);
pmu->global_ctrl_mask = ~pmu->global_ctrl;
-
- entry = kvm_find_cpuid_entry(vcpu, 7, 0);
- if (entry &&
- (boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM)) &&
- (entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM)))
- pmu->reserved_bits ^= HSW_IN_TX|HSW_IN_TX_CHECKPOINTED;
}
void kvm_pmu_init(struct kvm_vcpu *vcpu)
diff -bur ./arch/x86/kvm/vmx.c ../linux-3.11-rc1/arch/x86/kvm/vmx.c
--- ./arch/x86/kvm/vmx.c 2014-01-09 19:34:10.233458290 -0500
+++ ../linux-3.11-rc1/arch/x86/kvm/vmx.c 2013-07-14 18:18:27.000000000 -0400
@@ -373,7 +373,6 @@
* we must keep them pinned while L2 runs.
*/
struct page *apic_access_page;
- u64 msr_ia32_feature_control;
};
#define POSTED_INTR_ON 0
@@ -2283,11 +2282,8 @@
switch (msr_index) {
case MSR_IA32_FEATURE_CONTROL:
- if (nested_vmx_allowed(vcpu)) {
- *pdata = to_vmx(vcpu)->nested.msr_ia32_feature_control;
+ *pdata = 0;
break;
- }
- return 0;
case MSR_IA32_VMX_BASIC:
/*
* This MSR reports some information about VMX support. We
@@ -2360,24 +2356,14 @@
return 1;
}
-static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
{
- u32 msr_index = msr_info->index;
- u64 data = msr_info->data;
- bool host_initialized = msr_info->host_initiated;
-
if (!nested_vmx_allowed(vcpu))
return 0;
- if (msr_index == MSR_IA32_FEATURE_CONTROL) {
- if (!host_initialized &&
- to_vmx(vcpu)->nested.msr_ia32_feature_control
- & FEATURE_CONTROL_LOCKED)
- return 0;
- to_vmx(vcpu)->nested.msr_ia32_feature_control = data;
+ if (msr_index == MSR_IA32_FEATURE_CONTROL)
+ /* TODO: the right thing. */
return 1;
- }
-
/*
* No need to treat VMX capability MSRs specially: If we don't handle
* them, handle_wrmsr will #GP(0), which is correct (they are readonly)
@@ -2508,7 +2494,7 @@
return 1;
/* Otherwise falls through */
default:
- if (vmx_set_vmx_msr(vcpu, msr_info))
+ if (vmx_set_vmx_msr(vcpu, msr_index, data))
break;
msr = find_msr_entry(vmx, msr_index);
if (msr) {
@@ -5452,7 +5438,7 @@
err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE);
- if (err == EMULATE_USER_EXIT) {
+ if (err == EMULATE_DO_MMIO) {
ret = 0;
goto out;
}
@@ -5581,47 +5567,8 @@
free_loaded_vmcs(&vmx->vmcs01);
}
-/*
- * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(),
- * set the success or error code of an emulated VMX instruction, as specified
- * by Vol 2B, VMX Instruction Reference, "Conventions".
- */
-static void nested_vmx_succeed(struct kvm_vcpu *vcpu)
-{
- vmx_set_rflags(vcpu, vmx_get_rflags(vcpu)
- & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
- X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF));
-}
-
-static void nested_vmx_failInvalid(struct kvm_vcpu *vcpu)
-{
- vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
- & ~(X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
- X86_EFLAGS_SF | X86_EFLAGS_OF))
- | X86_EFLAGS_CF);
-}
-
static void nested_vmx_failValid(struct kvm_vcpu *vcpu,
- u32 vm_instruction_error)
-{
- if (to_vmx(vcpu)->nested.current_vmptr == -1ull) {
- /*
- * failValid writes the error number to the current VMCS, which
- * can't be done there isn't a current VMCS.
- */
- nested_vmx_failInvalid(vcpu);
- return;
- }
- vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
- & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
- X86_EFLAGS_SF | X86_EFLAGS_OF))
- | X86_EFLAGS_ZF);
- get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error;
- /*
- * We don't need to force a shadow sync because
- * VM_INSTRUCTION_ERROR is not shadowed
- */
-}
+ u32 vm_instruction_error);
/*
* Emulate the VMXON instruction.
@@ -5636,8 +5583,6 @@
struct kvm_segment cs;
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct vmcs *shadow_vmcs;
- const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED
- | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
/* The Intel VMX Instruction Reference lists a bunch of bits that
* are prerequisite to running VMXON, most notably cr4.VMXE must be
@@ -5666,13 +5611,6 @@
skip_emulated_instruction(vcpu);
return 1;
}
-
- if ((vmx->nested.msr_ia32_feature_control & VMXON_NEEDED_FEATURES)
- != VMXON_NEEDED_FEATURES) {
- kvm_inject_gp(vcpu, 0);
- return 1;
- }
-
if (enable_shadow_vmcs) {
shadow_vmcs = alloc_vmcs();
if (!shadow_vmcs)
@@ -5690,7 +5628,6 @@
vmx->nested.vmxon = true;
skip_emulated_instruction(vcpu);
- nested_vmx_succeed(vcpu);
return 1;
}
@@ -5775,7 +5712,6 @@
return 1;
free_nested(to_vmx(vcpu));
skip_emulated_instruction(vcpu);
- nested_vmx_succeed(vcpu);
return 1;
}
@@ -5832,6 +5768,48 @@
return 0;
}
+/*
+ * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(),
+ * set the success or error code of an emulated VMX instruction, as specified
+ * by Vol 2B, VMX Instruction Reference, "Conventions".
+ */
+static void nested_vmx_succeed(struct kvm_vcpu *vcpu)
+{
+ vmx_set_rflags(vcpu, vmx_get_rflags(vcpu)
+ & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
+ X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF));
+}
+
+static void nested_vmx_failInvalid(struct kvm_vcpu *vcpu)
+{
+ vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
+ & ~(X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
+ X86_EFLAGS_SF | X86_EFLAGS_OF))
+ | X86_EFLAGS_CF);
+}
+
+static void nested_vmx_failValid(struct kvm_vcpu *vcpu,
+ u32 vm_instruction_error)
+{
+ if (to_vmx(vcpu)->nested.current_vmptr == -1ull) {
+ /*
+ * failValid writes the error number to the current VMCS, which
+ * can't be done there isn't a current VMCS.
+ */
+ nested_vmx_failInvalid(vcpu);
+ return;
+ }
+ vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
+ & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
+ X86_EFLAGS_SF | X86_EFLAGS_OF))
+ | X86_EFLAGS_ZF);
+ get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error;
+ /*
+ * We don't need to force a shadow sync because
+ * VM_INSTRUCTION_ERROR is not shadowed
+ */
+}
+
/* Emulate the VMCLEAR instruction */
static int handle_vmclear(struct kvm_vcpu *vcpu)
{
@@ -5994,8 +5972,8 @@
unsigned long field;
u64 field_value;
struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs;
- const unsigned long *fields = shadow_read_write_fields;
- const int num_fields = max_shadow_read_write_fields;
+ unsigned long *fields = (unsigned long *)shadow_read_write_fields;
+ int num_fields = max_shadow_read_write_fields;
vmcs_load(shadow_vmcs);
@@ -6024,11 +6002,12 @@
static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
{
- const unsigned long *fields[] = {
- shadow_read_write_fields,
- shadow_read_only_fields
+ unsigned long *fields[] = {
+ (unsigned long *)shadow_read_write_fields,
+ (unsigned long *)shadow_read_only_fields
};
- const int max_fields[] = {
+ int num_lists = ARRAY_SIZE(fields);
+ int max_fields[] = {
max_shadow_read_write_fields,
max_shadow_read_only_fields
};
@@ -6039,7 +6018,7 @@
vmcs_load(shadow_vmcs);
- for (q = 0; q < ARRAY_SIZE(fields); q++) {
+ for (q = 0; q < num_lists; q++) {
for (i = 0; i < max_fields[q]; i++) {
field = fields[q][i];
vmcs12_read_any(&vmx->vcpu, field, &field_value);
@@ -7969,8 +7948,6 @@
static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
- struct kvm_segment seg;
-
if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER)
vcpu->arch.efer = vmcs12->host_ia32_efer;
else if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
@@ -8024,6 +8001,16 @@
vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->host_ia32_sysenter_eip);
vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base);
vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base);
+ vmcs_writel(GUEST_TR_BASE, vmcs12->host_tr_base);
+ vmcs_writel(GUEST_GS_BASE, vmcs12->host_gs_base);
+ vmcs_writel(GUEST_FS_BASE, vmcs12->host_fs_base);
+ vmcs_write16(GUEST_ES_SELECTOR, vmcs12->host_es_selector);
+ vmcs_write16(GUEST_CS_SELECTOR, vmcs12->host_cs_selector);
+ vmcs_write16(GUEST_SS_SELECTOR, vmcs12->host_ss_selector);
+ vmcs_write16(GUEST_DS_SELECTOR, vmcs12->host_ds_selector);
+ vmcs_write16(GUEST_FS_SELECTOR, vmcs12->host_fs_selector);
+ vmcs_write16(GUEST_GS_SELECTOR, vmcs12->host_gs_selector);
+ vmcs_write16(GUEST_TR_SELECTOR, vmcs12->host_tr_selector);
if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT)
vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
@@ -8031,52 +8018,6 @@
vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL,
vmcs12->host_ia32_perf_global_ctrl);
- /* Set L1 segment info according to Intel SDM
- 27.5.2 Loading Host Segment and Descriptor-Table Registers */
- seg = (struct kvm_segment) {
- .base = 0,
- .limit = 0xFFFFFFFF,
- .selector = vmcs12->host_cs_selector,
- .type = 11,
- .present = 1,
- .s = 1,
- .g = 1
- };
- if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
- seg.l = 1;
- else
- seg.db = 1;
- vmx_set_segment(vcpu, &seg, VCPU_SREG_CS);
- seg = (struct kvm_segment) {
- .base = 0,
- .limit = 0xFFFFFFFF,
- .type = 3,
- .present = 1,
- .s = 1,
- .db = 1,
- .g = 1
- };
- seg.selector = vmcs12->host_ds_selector;
- vmx_set_segment(vcpu, &seg, VCPU_SREG_DS);
- seg.selector = vmcs12->host_es_selector;
- vmx_set_segment(vcpu, &seg, VCPU_SREG_ES);
- seg.selector = vmcs12->host_ss_selector;
- vmx_set_segment(vcpu, &seg, VCPU_SREG_SS);
- seg.selector = vmcs12->host_fs_selector;
- seg.base = vmcs12->host_fs_base;
- vmx_set_segment(vcpu, &seg, VCPU_SREG_FS);
- seg.selector = vmcs12->host_gs_selector;
- seg.base = vmcs12->host_gs_base;
- vmx_set_segment(vcpu, &seg, VCPU_SREG_GS);
- seg = (struct kvm_segment) {
- .base = 0,
- .limit = 0x67,
- .selector = vmcs12->host_tr_selector,
- .type = 11,
- .present = 1
- };
- vmx_set_segment(vcpu, &seg, VCPU_SREG_TR);
-
kvm_set_dr(vcpu, 7, 0x400);
vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
}
diff -bur ./arch/x86/kvm/x86.c ../linux-3.11-rc1/arch/x86/kvm/x86.c
--- ./arch/x86/kvm/x86.c 2014-01-09 19:34:10.233458290 -0500
+++ ../linux-3.11-rc1/arch/x86/kvm/x86.c 2013-07-14 18:18:27.000000000 -0400
@@ -850,8 +850,7 @@
#ifdef CONFIG_X86_64
MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
#endif
- MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
- MSR_IA32_FEATURE_CONTROL
+ MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
};
static unsigned num_msrs_to_save;
@@ -4956,97 +4955,6 @@
static int complete_emulated_mmio(struct kvm_vcpu *vcpu);
static int complete_emulated_pio(struct kvm_vcpu *vcpu);
-static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
- unsigned long *db)
-{
- u32 dr6 = 0;
- int i;
- u32 enable, rwlen;
-
- enable = dr7;
- rwlen = dr7 >> 16;
- for (i = 0; i < 4; i++, enable >>= 2, rwlen >>= 4)
- if ((enable & 3) && (rwlen & 15) == type && db[i] == addr)
- dr6 |= (1 << i);
- return dr6;
-}
-
-static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, int *r)
-{
- struct kvm_run *kvm_run = vcpu->run;
-
- /*
- * Use the "raw" value to see if TF was passed to the processor.
- * Note that the new value of the flags has not been saved yet.
- *
- * This is correct even for TF set by the guest, because "the
- * processor will not generate this exception after the instruction
- * that sets the TF flag".
- */
- unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
-
- if (unlikely(rflags & X86_EFLAGS_TF)) {
- if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
- kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1;
- kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
- kvm_run->debug.arch.exception = DB_VECTOR;
- kvm_run->exit_reason = KVM_EXIT_DEBUG;
- *r = EMULATE_USER_EXIT;
- } else {
- vcpu->arch.emulate_ctxt.eflags &= ~X86_EFLAGS_TF;
- /*
- * "Certain debug exceptions may clear bit 0-3. The
- * remaining contents of the DR6 register are never
- * cleared by the processor".
- */
- vcpu->arch.dr6 &= ~15;
- vcpu->arch.dr6 |= DR6_BS;
- kvm_queue_exception(vcpu, DB_VECTOR);
- }
- }
-}
-
-static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
-{
- struct kvm_run *kvm_run = vcpu->run;
- unsigned long eip = vcpu->arch.emulate_ctxt.eip;
- u32 dr6 = 0;
-
- if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) &&
- (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) {
- dr6 = kvm_vcpu_check_hw_bp(eip, 0,
- vcpu->arch.guest_debug_dr7,
- vcpu->arch.eff_db);
-
- if (dr6 != 0) {
- kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1;
- kvm_run->debug.arch.pc = kvm_rip_read(vcpu) +
- get_segment_base(vcpu, VCPU_SREG_CS);
-
- kvm_run->debug.arch.exception = DB_VECTOR;
- kvm_run->exit_reason = KVM_EXIT_DEBUG;
- *r = EMULATE_USER_EXIT;
- return true;
- }
- }
-
- if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK)) {
- dr6 = kvm_vcpu_check_hw_bp(eip, 0,
- vcpu->arch.dr7,
- vcpu->arch.db);
-
- if (dr6 != 0) {
- vcpu->arch.dr6 &= ~15;
- vcpu->arch.dr6 |= dr6;
- kvm_queue_exception(vcpu, DB_VECTOR);
- *r = EMULATE_DONE;
- return true;
- }
- }
-
- return false;
-}
-
int x86_emulate_instruction(struct kvm_vcpu *vcpu,
unsigned long cr2,
int emulation_type,
@@ -5067,16 +4975,6 @@
if (!(emulation_type & EMULTYPE_NO_DECODE)) {
init_emulate_ctxt(vcpu);
-
- /*
- * We will reenter on the same instruction since
- * we do not set complete_userspace_io. This does not
- * handle watchpoints yet, those would be handled in
- * the emulate_ops.
- */
- if (kvm_vcpu_check_breakpoint(vcpu, &r))
- return r;
-
ctxt->interruptibility = 0;
ctxt->have_exception = false;
ctxt->perm_ok = false;
@@ -5139,11 +5037,11 @@
writeback = false;
vcpu->arch.complete_userspace_io = complete_emulated_pio;
}
- r = EMULATE_USER_EXIT;
+ r = EMULATE_DO_MMIO;
} else if (vcpu->mmio_needed) {
if (!vcpu->mmio_is_write)
writeback = false;
- r = EMULATE_USER_EXIT;
+ r = EMULATE_DO_MMIO;
vcpu->arch.complete_userspace_io = complete_emulated_mmio;
} else if (r == EMULATION_RESTART)
goto restart;
@@ -5152,12 +5050,10 @@
if (writeback) {
toggle_interruptibility(vcpu, ctxt->interruptibility);
+ kvm_set_rflags(vcpu, ctxt->eflags);
kvm_make_request(KVM_REQ_EVENT, vcpu);
vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
kvm_rip_write(vcpu, ctxt->eip);
- if (r == EMULATE_DONE)
- kvm_vcpu_check_singlestep(vcpu, &r);
- kvm_set_rflags(vcpu, ctxt->eflags);
} else
vcpu->arch.emulate_regs_need_sync_to_vcpu = true;
@@ -5451,7 +5347,7 @@
int kvm_arch_init(void *opaque)
{
int r;
- struct kvm_x86_ops *ops = opaque;
+ struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
if (kvm_x86_ops) {
printk(KERN_ERR "kvm: already loaded the other module\n");
@@ -7123,15 +7019,6 @@
return -ENOMEM;
}
-void kvm_arch_memslots_updated(struct kvm *kvm)
-{
- /*
- * memslots->generation has been incremented.
- * mmio generation may have reached its maximum value.
- */
- kvm_mmu_invalidate_mmio_sptes(kvm);
-}
-
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
struct kvm_userspace_memory_region *mem,
@@ -7192,6 +7079,11 @@
*/
if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES))
kvm_mmu_slot_remove_write_access(kvm, mem->slot);
+ /*
+ * If memory slot is created, or moved, we need to clear all
+ * mmio sptes.
+ */
+ kvm_mmu_invalidate_mmio_sptes(kvm);
}
void kvm_arch_flush_shadow_all(struct kvm *kvm)
diff -bur ./arch/x86/vdso/vclock_gettime.c ../linux-3.11-rc1/arch/x86/vdso/vclock_gettime.c
--- ./arch/x86/vdso/vclock_gettime.c 2014-01-09 19:34:10.263464776 -0500
+++ ../linux-3.11-rc1/arch/x86/vdso/vclock_gettime.c 2013-07-14 18:18:27.000000000 -0400
@@ -85,18 +85,15 @@
cycle_t ret;
u64 last;
u32 version;
+ u32 migrate_count;
u8 flags;
unsigned cpu, cpu1;
/*
- * Note: hypervisor must guarantee that:
- * 1. cpu ID number maps 1:1 to per-CPU pvclock time info.
- * 2. that per-CPU pvclock time info is updated if the
- * underlying CPU changes.
- * 3. that version is increased whenever underlying CPU
- * changes.
- *
+ * When looping to get a consistent (time-info, tsc) pair, we
+ * also need to deal with the possibility we can switch vcpus,
+ * so make sure we always re-fetch time-info for the current vcpu.
*/
do {
cpu = __getcpu() & VGETCPU_CPU_MASK;
@@ -107,6 +104,8 @@
pvti = get_pvti(cpu);
+ migrate_count = pvti->migrate_count;
+
version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags);
/*
@@ -118,7 +117,8 @@
cpu1 = __getcpu() & VGETCPU_CPU_MASK;
} while (unlikely(cpu != cpu1 ||
(pvti->pvti.version & 1) ||
- pvti->pvti.version != version));
+ pvti->pvti.version != version ||
+ pvti->migrate_count != migrate_count));
if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT)))
*mode = VCLOCK_NONE;
Only in .: .config
Only in .: .config.old
Only in .: .git
Only in ./include: config
Only in ./include: generated
diff -bur ./include/linux/kvm_host.h ../linux-3.11-rc1/include/linux/kvm_host.h
--- ./include/linux/kvm_host.h 2014-01-09 19:34:13.253451268 -0500
+++ ../linux-3.11-rc1/include/linux/kvm_host.h 2013-07-14 18:18:27.000000000 -0400
@@ -33,8 +33,6 @@
#include <asm/kvm_host.h>
-#include <linux/nitro_main.h>
-
#ifndef KVM_MMIO_SIZE
#define KVM_MMIO_SIZE 8
#endif
@@ -87,12 +85,6 @@
return pfn == KVM_PFN_NOSLOT;
}
-/*
- * architectures with KVM_HVA_ERR_BAD other than PAGE_OFFSET (e.g. s390)
- * provide own defines and kvm_is_error_hva
- */
-#ifndef KVM_HVA_ERR_BAD
-
#define KVM_HVA_ERR_BAD (PAGE_OFFSET)
#define KVM_HVA_ERR_RO_BAD (PAGE_OFFSET + PAGE_SIZE)
@@ -101,8 +93,6 @@
return addr >= PAGE_OFFSET;
}
-#endif
-
#define KVM_ERR_PTR_BAD_PAGE (ERR_PTR(-ENOENT))
static inline bool is_error_page(struct page *page)
@@ -170,12 +160,8 @@
int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
int len, const void *val);
-int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
- int len, const void *val, long cookie);
int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len,
void *val);
-int kvm_io_bus_read_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
- int len, void *val, long cookie);
int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
int len, struct kvm_io_device *dev);
int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
@@ -408,8 +394,6 @@
#endif
long tlbs_dirty;
struct list_head devices;
-
- struct nitro_kvm *nitro_kvm;
};
#define kvm_err(fmt, ...) \
@@ -515,7 +499,6 @@
void kvm_arch_free_memslot(struct kvm_memory_slot *free,
struct kvm_memory_slot *dont);
int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages);
-void kvm_arch_memslots_updated(struct kvm *kvm);
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
struct kvm_userspace_memory_region *mem,
Only in ./include/linux: nitro.h
Only in ./include/linux: nitro_main.h
diff -bur ./include/linux/sched.h ../linux-3.11-rc1/include/linux/sched.h
--- ./include/linux/sched.h 2014-01-09 19:34:13.323458542 -0500
+++ ../linux-3.11-rc1/include/linux/sched.h 2013-07-14 18:18:27.000000000 -0400
@@ -107,6 +107,14 @@
extern void calc_global_load(unsigned long ticks);
extern void update_cpu_load_nohz(void);
+/* Notifier for when a task gets migrated to a new CPU */
+struct task_migration_notifier {
+ struct task_struct *task;
+ int from_cpu;
+ int to_cpu;
+};
+extern void register_task_migration_notifier(struct notifier_block *n);
+
extern unsigned long get_parent_ip(unsigned long addr);
extern void dump_cpu_task(int cpu);
diff -bur ./kernel/sched/core.c ../linux-3.11-rc1/kernel/sched/core.c
--- ./kernel/sched/core.c 2014-01-09 19:34:13.503458398 -0500
+++ ../linux-3.11-rc1/kernel/sched/core.c 2013-07-14 18:18:27.000000000 -0400
@@ -976,6 +976,13 @@
rq->skip_clock_update = 1;
}
+static ATOMIC_NOTIFIER_HEAD(task_migration_notifier);
+
+void register_task_migration_notifier(struct notifier_block *n)
+{
+ atomic_notifier_chain_register(&task_migration_notifier, n);
+}
+
#ifdef CONFIG_SMP
void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
{
@@ -1006,10 +1013,18 @@
trace_sched_migrate_task(p, new_cpu);
if (task_cpu(p) != new_cpu) {
+ struct task_migration_notifier tmn;
+
if (p->sched_class->migrate_task_rq)
p->sched_class->migrate_task_rq(p, new_cpu);
p->se.nr_migrations++;
perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, NULL, 0);
+
+ tmn.task = p;
+ tmn.from_cpu = task_cpu(p);
+ tmn.to_cpu = new_cpu;
+
+ atomic_notifier_call_chain(&task_migration_notifier, 0, &tmn);
}
__set_task_cpu(p, new_cpu);
Only in .: kinst.sh
Only in .: Module.symvers
Only in .: nitro_diff
Only in ./scripts/basic: fixdep
Only in ./scripts: conmakehash
Only in ./scripts/genksyms: genksyms
Only in ./scripts/genksyms: keywords.hash.c
Only in ./scripts/genksyms: lex.lex.c
Only in ./scripts/genksyms: parse.tab.c
Only in ./scripts/genksyms: parse.tab.h
Only in ./scripts: kallsyms
Only in ./scripts/kconfig: conf
Only in ./scripts/kconfig: mconf
Only in ./scripts/kconfig: zconf.hash.c
Only in ./scripts/kconfig: zconf.lex.c
Only in ./scripts/kconfig: zconf.tab.c
Only in ./scripts/mod: devicetable-offsets.h
Only in ./scripts/mod: elfconfig.h
Only in ./scripts/mod: mk_elfconfig
Only in ./scripts/mod: modpost
Only in ./scripts: recordmcount
Only in ./scripts/selinux/genheaders: genheaders
Only in ./scripts/selinux/mdp: mdp
Only in ./scripts: sortextable
Only in ./scripts: unifdef
Only in ./security/tomoyo: builtin-policy.h
Only in ./security/tomoyo: policy
Only in .: .version
diff -bur ./virt/kvm/kvm_main.c ../linux-3.11-rc1/virt/kvm/kvm_main.c
--- ./virt/kvm/kvm_main.c 2014-01-09 19:34:14.283464649 -0500
+++ ../linux-3.11-rc1/virt/kvm/kvm_main.c 2013-07-14 18:18:27.000000000 -0400
@@ -61,9 +61,6 @@
#define CREATE_TRACE_POINTS
#include <trace/events/kvm.h>
-#include <linux/nitro.h>
-#include <linux/nitro_main.h>
-
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
@@ -603,8 +600,6 @@
int i;
struct mm_struct *mm = kvm->mm;
- nitro_destroy_vm_hook(kvm);
-
kvm_arch_sync_events(kvm);
raw_spin_lock(&kvm_lock);
list_del(&kvm->vm_list);
@@ -736,9 +731,6 @@
update_memslots(slots, new, kvm->memslots->generation);
rcu_assign_pointer(kvm->memslots, slots);
synchronize_srcu_expedited(&kvm->srcu);
-
- kvm_arch_memslots_updated(kvm);
-
return old_memslots;
}
@@ -1899,7 +1891,7 @@
/*
* Allocates an inode for the vcpu.
*/
-int create_vcpu_fd(struct kvm_vcpu *vcpu)
+static int create_vcpu_fd(struct kvm_vcpu *vcpu)
{
return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR);
}
@@ -1983,8 +1975,8 @@
struct kvm_fpu *fpu = NULL;
struct kvm_sregs *kvm_sregs = NULL;
- //if (vcpu->kvm->mm != current->mm)
- // return -EIO;
+ if (vcpu->kvm->mm != current->mm)
+ return -EIO;
#if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS)
/*
@@ -2329,8 +2321,8 @@
void __user *argp = (void __user *)arg;
int r;
- //if (kvm->mm != current->mm)
- // return -EIO;
+ if (kvm->mm != current->mm)
+ return -EIO;
switch (ioctl) {
case KVM_CREATE_VCPU:
r = kvm_vm_ioctl_create_vcpu(kvm, arg);
@@ -2485,24 +2477,6 @@
r = 0;
break;
}
- case KVM_NITRO_ATTACH_VCPUS: {
- int i;
- struct nitro_vcpus nvcpus;
-
- r = nitro_iotcl_attach_vcpus(kvm,&nvcpus);
- if (r)
- goto out;
-
- r = -EFAULT;
- if (copy_to_user(argp, &nvcpus, sizeof(nvcpus))){
- for(i=0;i<nvcpus.num_vcpus;i++)
- kvm_put_kvm(kvm);
- goto out;
- }
-
- r = 0;
- break;
- }
default:
r = kvm_arch_vm_ioctl(filp, ioctl, arg);
if (r == -ENOTTY)
@@ -2612,8 +2586,6 @@
return r;
}
#endif
- nitro_create_vm_hook(kvm);
-
r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
if (r < 0)
kvm_put_kvm(kvm);
@@ -2651,7 +2623,6 @@
static long kvm_dev_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
- void __user *argp = (void __user *)arg;
long r = -EINVAL;
switch (ioctl) {
@@ -2684,28 +2655,6 @@
case KVM_TRACE_DISABLE:
r = -EOPNOTSUPP;
break;
- case KVM_NITRO_NUM_VMS:
- r = nitro_iotcl_num_vms();
- break;
- case KVM_NITRO_ATTACH_VM: {
- pid_t creator;
- struct kvm *kvm;
-
- r = -EFAULT;
- if (copy_from_user(&creator, argp, sizeof(pid_t)))
- goto out;
-
- r = -ESRCH;
- kvm = nitro_get_vm_by_creator(creator);
- if(kvm == NULL)
- goto out;
-
- kvm_get_kvm(kvm);
- r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
- if(r<0)
- kvm_put_kvm(kvm);
- break;
- }
default:
return kvm_arch_dev_ioctl(filp, ioctl, arg);
}
@@ -2863,9 +2812,11 @@
kfree(bus);
}
-static inline int __kvm_io_bus_sort_cmp(const struct kvm_io_range *r1,
- const struct kvm_io_range *r2)
+static int kvm_io_bus_sort_cmp(const void *p1, const void *p2)
{
+ const struct kvm_io_range *r1 = p1;
+ const struct kvm_io_range *r2 = p2;
+
if (r1->addr < r2->addr)
return -1;
if (r1->addr + r1->len > r2->addr + r2->len)
@@ -2873,11 +2824,6 @@
return 0;
}
-static int kvm_io_bus_sort_cmp(const void *p1, const void *p2)
-{
- return __kvm_io_bus_sort_cmp(p1, p2);
-}
-
static int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev,
gpa_t addr, int len)
{
@@ -2911,54 +2857,17 @@
off = range - bus->range;
- while (off > 0 && __kvm_io_bus_sort_cmp(&key, &bus->range[off-1]) == 0)
+ while (off > 0 && kvm_io_bus_sort_cmp(&key, &bus->range[off-1]) == 0)
off--;
return off;
}
-static int __kvm_io_bus_write(struct kvm_io_bus *bus,
- struct kvm_io_range *range, const void *val)
-{
- int idx;
-
- idx = kvm_io_bus_get_first_dev(bus, range->addr, range->len);
- if (idx < 0)
- return -EOPNOTSUPP;
-
- while (idx < bus->dev_count &&
- __kvm_io_bus_sort_cmp(range, &bus->range[idx]) == 0) {
- if (!kvm_iodevice_write(bus->range[idx].dev, range->addr,
- range->len, val))
- return idx;
- idx++;
- }
-
- return -EOPNOTSUPP;
-}
-
/* kvm_io_bus_write - called under kvm->slots_lock */
int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
int len, const void *val)
{
- struct kvm_io_bus *bus;
- struct kvm_io_range range;
- int r;
-
- range = (struct kvm_io_range) {
- .addr = addr,
- .len = len,
- };
-
- bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
- r = __kvm_io_bus_write(bus, &range, val);
- return r < 0 ? r : 0;
-}
-
-/* kvm_io_bus_write_cookie - called under kvm->slots_lock */
-int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
- int len, const void *val, long cookie)
-{
+ int idx;
struct kvm_io_bus *bus;
struct kvm_io_range range;
@@ -2968,35 +2877,14 @@
};
bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
-
- /* First try the device referenced by cookie. */
- if ((cookie >= 0) && (cookie < bus->dev_count) &&
- (__kvm_io_bus_sort_cmp(&range, &bus->range[cookie]) == 0))
- if (!kvm_iodevice_write(bus->range[cookie].dev, addr, len,
- val))
- return cookie;
-
- /*
- * cookie contained garbage; fall back to search and return the
- * correct cookie value.
- */
- return __kvm_io_bus_write(bus, &range, val);
-}
-
-static int __kvm_io_bus_read(struct kvm_io_bus *bus, struct kvm_io_range *range,
- void *val)
-{
- int idx;
-
- idx = kvm_io_bus_get_first_dev(bus, range->addr, range->len);
+ idx = kvm_io_bus_get_first_dev(bus, addr, len);
if (idx < 0)
return -EOPNOTSUPP;
while (idx < bus->dev_count &&
- __kvm_io_bus_sort_cmp(range, &bus->range[idx]) == 0) {
- if (!kvm_iodevice_read(bus->range[idx].dev, range->addr,
- range->len, val))
- return idx;
+ kvm_io_bus_sort_cmp(&range, &bus->range[idx]) == 0) {
+ if (!kvm_iodevice_write(bus->range[idx].dev, addr, len, val))
+ return 0;
idx++;
}
@@ -3007,24 +2895,7 @@
int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
int len, void *val)
{
- struct kvm_io_bus *bus;
- struct kvm_io_range range;
- int r;
-
- range = (struct kvm_io_range) {
- .addr = addr,
- .len = len,
- };
-
- bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
- r = __kvm_io_bus_read(bus, &range, val);
- return r < 0 ? r : 0;
-}
-
-/* kvm_io_bus_read_cookie - called under kvm->slots_lock */
-int kvm_io_bus_read_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
- int len, void *val, long cookie)
-{
+ int idx;
struct kvm_io_bus *bus;
struct kvm_io_range range;
@@ -3034,19 +2905,18 @@
};
bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
+ idx = kvm_io_bus_get_first_dev(bus, addr, len);
+ if (idx < 0)
+ return -EOPNOTSUPP;
- /* First try the device referenced by cookie. */
- if ((cookie >= 0) && (cookie < bus->dev_count) &&
- (__kvm_io_bus_sort_cmp(&range, &bus->range[cookie]) == 0))
- if (!kvm_iodevice_read(bus->range[cookie].dev, addr, len,
- val))
- return cookie;
+ while (idx < bus->dev_count &&
+ kvm_io_bus_sort_cmp(&range, &bus->range[idx]) == 0) {
+ if (!kvm_iodevice_read(bus->range[idx].dev, addr, len, val))
+ return 0;
+ idx++;
+ }
- /*
- * cookie contained garbage; fall back to search and return the
- * correct cookie value.
- */
- return __kvm_io_bus_read(bus, &range, val);
+ return -EOPNOTSUPP;
}
/* Caller must hold slots_lock. */
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment