Created
January 16, 2014 14:30
-
-
Save Pacifist117/8455867 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff -bur ./arch/arm/kvm/arm.c ../linux-3.11-rc1/arch/arm/kvm/arm.c | |
--- ./arch/arm/kvm/arm.c 2014-01-09 19:34:09.113458259 -0500 | |
+++ ../linux-3.11-rc1/arch/arm/kvm/arm.c 2013-07-14 18:18:27.000000000 -0400 | |
@@ -219,10 +219,6 @@ | |
return -EINVAL; | |
} | |
-void kvm_arch_memslots_updated(struct kvm *kvm) | |
-{ | |
-} | |
- | |
int kvm_arch_prepare_memory_region(struct kvm *kvm, | |
struct kvm_memory_slot *memslot, | |
struct kvm_userspace_memory_region *mem, | |
diff -bur ./arch/ia64/kvm/kvm-ia64.c ../linux-3.11-rc1/arch/ia64/kvm/kvm-ia64.c | |
--- ./arch/ia64/kvm/kvm-ia64.c 2014-01-09 19:34:09.523458276 -0500 | |
+++ ../linux-3.11-rc1/arch/ia64/kvm/kvm-ia64.c 2013-07-14 18:18:27.000000000 -0400 | |
@@ -1560,10 +1560,6 @@ | |
return 0; | |
} | |
-void kvm_arch_memslots_updated(struct kvm *kvm) | |
-{ | |
-} | |
- | |
int kvm_arch_prepare_memory_region(struct kvm *kvm, | |
struct kvm_memory_slot *memslot, | |
struct kvm_userspace_memory_region *mem, | |
diff -bur ./arch/mips/kvm/kvm_mips.c ../linux-3.11-rc1/arch/mips/kvm/kvm_mips.c | |
--- ./arch/mips/kvm/kvm_mips.c 2014-01-09 19:34:09.723458441 -0500 | |
+++ ../linux-3.11-rc1/arch/mips/kvm/kvm_mips.c 2013-07-14 18:18:27.000000000 -0400 | |
@@ -208,10 +208,6 @@ | |
return 0; | |
} | |
-void kvm_arch_memslots_updated(struct kvm *kvm) | |
-{ | |
-} | |
- | |
int kvm_arch_prepare_memory_region(struct kvm *kvm, | |
struct kvm_memory_slot *memslot, | |
struct kvm_userspace_memory_region *mem, | |
diff -bur ./arch/powerpc/kvm/powerpc.c ../linux-3.11-rc1/arch/powerpc/kvm/powerpc.c | |
--- ./arch/powerpc/kvm/powerpc.c 2014-01-09 19:34:09.903464521 -0500 | |
+++ ../linux-3.11-rc1/arch/powerpc/kvm/powerpc.c 2013-07-14 18:18:27.000000000 -0400 | |
@@ -420,10 +420,6 @@ | |
return kvmppc_core_create_memslot(slot, npages); | |
} | |
-void kvm_arch_memslots_updated(struct kvm *kvm) | |
-{ | |
-} | |
- | |
int kvm_arch_prepare_memory_region(struct kvm *kvm, | |
struct kvm_memory_slot *memslot, | |
struct kvm_userspace_memory_region *mem, | |
diff -bur ./arch/s390/include/asm/kvm_host.h ../linux-3.11-rc1/arch/s390/include/asm/kvm_host.h | |
--- ./arch/s390/include/asm/kvm_host.h 2014-01-09 19:34:09.983466699 -0500 | |
+++ ../linux-3.11-rc1/arch/s390/include/asm/kvm_host.h 2013-07-14 18:18:27.000000000 -0400 | |
@@ -274,14 +274,6 @@ | |
int css_support; | |
}; | |
-#define KVM_HVA_ERR_BAD (-1UL) | |
-#define KVM_HVA_ERR_RO_BAD (-2UL) | |
- | |
-static inline bool kvm_is_error_hva(unsigned long addr) | |
-{ | |
- return IS_ERR_VALUE(addr); | |
-} | |
- | |
extern int sie64a(struct kvm_s390_sie_block *, u64 *); | |
extern char sie_exit; | |
#endif | |
diff -bur ./arch/s390/include/asm/mmu_context.h ../linux-3.11-rc1/arch/s390/include/asm/mmu_context.h | |
--- ./arch/s390/include/asm/mmu_context.h 2014-01-09 19:34:09.983466699 -0500 | |
+++ ../linux-3.11-rc1/arch/s390/include/asm/mmu_context.h 2013-07-14 18:18:27.000000000 -0400 | |
@@ -21,7 +21,24 @@ | |
#ifdef CONFIG_64BIT | |
mm->context.asce_bits |= _ASCE_TYPE_REGION3; | |
#endif | |
+ if (current->mm && current->mm->context.alloc_pgste) { | |
+ /* | |
+ * alloc_pgste indicates, that any NEW context will be created | |
+ * with extended page tables. The old context is unchanged. The | |
+ * page table allocation and the page table operations will | |
+ * look at has_pgste to distinguish normal and extended page | |
+ * tables. The only way to create extended page tables is to | |
+ * set alloc_pgste and then create a new context (e.g. dup_mm). | |
+ * The page table allocation is called after init_new_context | |
+ * and if has_pgste is set, it will create extended page | |
+ * tables. | |
+ */ | |
+ mm->context.has_pgste = 1; | |
+ mm->context.alloc_pgste = 1; | |
+ } else { | |
mm->context.has_pgste = 0; | |
+ mm->context.alloc_pgste = 0; | |
+ } | |
mm->context.asce_limit = STACK_TOP_MAX; | |
crst_table_init((unsigned long *) mm->pgd, pgd_entry_type(mm)); | |
return 0; | |
diff -bur ./arch/s390/include/asm/mmu.h ../linux-3.11-rc1/arch/s390/include/asm/mmu.h | |
--- ./arch/s390/include/asm/mmu.h 2014-01-09 19:34:09.983466699 -0500 | |
+++ ../linux-3.11-rc1/arch/s390/include/asm/mmu.h 2013-07-14 18:18:27.000000000 -0400 | |
@@ -12,6 +12,8 @@ | |
unsigned long asce_bits; | |
unsigned long asce_limit; | |
unsigned long vdso_base; | |
+ /* Cloned contexts will be created with extended page tables. */ | |
+ unsigned int alloc_pgste:1; | |
/* The mmu context has extended page tables. */ | |
unsigned int has_pgste:1; | |
} mm_context_t; | |
diff -bur ./arch/s390/include/asm/pgtable.h ../linux-3.11-rc1/arch/s390/include/asm/pgtable.h | |
--- ./arch/s390/include/asm/pgtable.h 2014-01-09 19:34:09.983466699 -0500 | |
+++ ../linux-3.11-rc1/arch/s390/include/asm/pgtable.h 2013-07-14 18:18:27.000000000 -0400 | |
@@ -1361,17 +1361,6 @@ | |
} | |
#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */ | |
-static inline void pmdp_flush_lazy(struct mm_struct *mm, | |
- unsigned long address, pmd_t *pmdp) | |
-{ | |
- int active = (mm == current->active_mm) ? 1 : 0; | |
- | |
- if ((atomic_read(&mm->context.attach_count) & 0xffff) > active) | |
- __pmd_idte(address, pmdp); | |
- else | |
- mm->context.flush_mm = 1; | |
-} | |
- | |
#ifdef CONFIG_TRANSPARENT_HUGEPAGE | |
#define __HAVE_ARCH_PGTABLE_DEPOSIT | |
diff -bur ./arch/s390/include/asm/processor.h ../linux-3.11-rc1/arch/s390/include/asm/processor.h | |
--- ./arch/s390/include/asm/processor.h 2014-01-09 19:34:09.983466699 -0500 | |
+++ ../linux-3.11-rc1/arch/s390/include/asm/processor.h 2013-07-14 18:18:27.000000000 -0400 | |
@@ -43,7 +43,6 @@ | |
#ifndef CONFIG_64BIT | |
#define TASK_SIZE (1UL << 31) | |
-#define TASK_MAX_SIZE (1UL << 31) | |
#define TASK_UNMAPPED_BASE (1UL << 30) | |
#else /* CONFIG_64BIT */ | |
@@ -52,7 +51,6 @@ | |
#define TASK_UNMAPPED_BASE (test_thread_flag(TIF_31BIT) ? \ | |
(1UL << 30) : (1UL << 41)) | |
#define TASK_SIZE TASK_SIZE_OF(current) | |
-#define TASK_MAX_SIZE (1UL << 53) | |
#endif /* CONFIG_64BIT */ | |
diff -bur ./arch/s390/kvm/diag.c ../linux-3.11-rc1/arch/s390/kvm/diag.c | |
--- ./arch/s390/kvm/diag.c 2014-01-09 19:34:09.993464527 -0500 | |
+++ ../linux-3.11-rc1/arch/s390/kvm/diag.c 2013-07-14 18:18:27.000000000 -0400 | |
@@ -119,21 +119,12 @@ | |
* The layout is as follows: | |
* - gpr 2 contains the subchannel id (passed as addr) | |
* - gpr 3 contains the virtqueue index (passed as datamatch) | |
- * - gpr 4 contains the index on the bus (optionally) | |
*/ | |
- ret = kvm_io_bus_write_cookie(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS, | |
+ ret = kvm_io_bus_write(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS, | |
vcpu->run->s.regs.gprs[2], | |
- 8, &vcpu->run->s.regs.gprs[3], | |
- vcpu->run->s.regs.gprs[4]); | |
+ 8, &vcpu->run->s.regs.gprs[3]); | |
srcu_read_unlock(&vcpu->kvm->srcu, idx); | |
- | |
- /* | |
- * Return cookie in gpr 2, but don't overwrite the register if the | |
- * diagnose will be handled by userspace. | |
- */ | |
- if (ret != -EOPNOTSUPP) | |
- vcpu->run->s.regs.gprs[2] = ret; | |
- /* kvm_io_bus_write_cookie returns -EOPNOTSUPP if it found no match. */ | |
+ /* kvm_io_bus_write returns -EOPNOTSUPP if it found no match. */ | |
return ret < 0 ? ret : 0; | |
} | |
diff -bur ./arch/s390/kvm/kvm-s390.c ../linux-3.11-rc1/arch/s390/kvm/kvm-s390.c | |
--- ./arch/s390/kvm/kvm-s390.c 2014-01-09 19:34:09.993464527 -0500 | |
+++ ../linux-3.11-rc1/arch/s390/kvm/kvm-s390.c 2013-07-14 18:18:27.000000000 -0400 | |
@@ -28,7 +28,6 @@ | |
#include <asm/pgtable.h> | |
#include <asm/nmi.h> | |
#include <asm/switch_to.h> | |
-#include <asm/facility.h> | |
#include <asm/sclp.h> | |
#include "kvm-s390.h" | |
#include "gaccess.h" | |
@@ -85,15 +84,9 @@ | |
{ NULL } | |
}; | |
-unsigned long *vfacilities; | |
+static unsigned long long *facilities; | |
static struct gmap_notifier gmap_notifier; | |
-/* test availability of vfacility */ | |
-static inline int test_vfacility(unsigned long nr) | |
-{ | |
- return __test_facility(nr, (void *) vfacilities); | |
-} | |
- | |
/* Section: not file related */ | |
int kvm_arch_hardware_enable(void *garbage) | |
{ | |
@@ -394,7 +387,7 @@ | |
vcpu->arch.sie_block->ecb = 6; | |
vcpu->arch.sie_block->ecb2 = 8; | |
vcpu->arch.sie_block->eca = 0xC1002001U; | |
- vcpu->arch.sie_block->fac = (int) (long) vfacilities; | |
+ vcpu->arch.sie_block->fac = (int) (long) facilities; | |
hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); | |
tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet, | |
(unsigned long) vcpu); | |
@@ -1063,10 +1056,6 @@ | |
return 0; | |
} | |
-void kvm_arch_memslots_updated(struct kvm *kvm) | |
-{ | |
-} | |
- | |
/* Section: memory related */ | |
int kvm_arch_prepare_memory_region(struct kvm *kvm, | |
struct kvm_memory_slot *memslot, | |
@@ -1133,20 +1122,20 @@ | |
* to hold the maximum amount of facilities. On the other hand, we | |
* only set facilities that are known to work in KVM. | |
*/ | |
- vfacilities = (unsigned long *) get_zeroed_page(GFP_KERNEL|GFP_DMA); | |
- if (!vfacilities) { | |
+ facilities = (unsigned long long *) get_zeroed_page(GFP_KERNEL|GFP_DMA); | |
+ if (!facilities) { | |
kvm_exit(); | |
return -ENOMEM; | |
} | |
- memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16); | |
- vfacilities[0] &= 0xff82fff3f47c0000UL; | |
- vfacilities[1] &= 0x001c000000000000UL; | |
+ memcpy(facilities, S390_lowcore.stfle_fac_list, 16); | |
+ facilities[0] &= 0xff82fff3f47c0000ULL; | |
+ facilities[1] &= 0x001c000000000000ULL; | |
return 0; | |
} | |
static void __exit kvm_s390_exit(void) | |
{ | |
- free_page((unsigned long) vfacilities); | |
+ free_page((unsigned long) facilities); | |
kvm_exit(); | |
} | |
diff -bur ./arch/s390/kvm/kvm-s390.h ../linux-3.11-rc1/arch/s390/kvm/kvm-s390.h | |
--- ./arch/s390/kvm/kvm-s390.h 2014-01-09 19:34:09.993464527 -0500 | |
+++ ../linux-3.11-rc1/arch/s390/kvm/kvm-s390.h 2013-07-14 18:18:27.000000000 -0400 | |
@@ -24,9 +24,6 @@ | |
typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); | |
-/* declare vfacilities extern */ | |
-extern unsigned long *vfacilities; | |
- | |
/* negativ values are error codes, positive values for internal conditions */ | |
#define SIE_INTERCEPT_RERUNVCPU (1<<0) | |
#define SIE_INTERCEPT_UCONTROL (1<<1) | |
@@ -115,13 +112,6 @@ | |
return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2; | |
} | |
-/* Set the condition code in the guest program status word */ | |
-static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc) | |
-{ | |
- vcpu->arch.sie_block->gpsw.mask &= ~(3UL << 44); | |
- vcpu->arch.sie_block->gpsw.mask |= cc << 44; | |
-} | |
- | |
int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); | |
enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer); | |
void kvm_s390_tasklet(unsigned long parm); | |
diff -bur ./arch/s390/kvm/priv.c ../linux-3.11-rc1/arch/s390/kvm/priv.c | |
--- ./arch/s390/kvm/priv.c 2014-01-09 19:34:09.993464527 -0500 | |
+++ ../linux-3.11-rc1/arch/s390/kvm/priv.c 2013-07-14 18:18:27.000000000 -0400 | |
@@ -163,7 +163,8 @@ | |
kfree(inti); | |
no_interrupt: | |
/* Set condition code and we're done. */ | |
- kvm_s390_set_psw_cc(vcpu, cc); | |
+ vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); | |
+ vcpu->arch.sie_block->gpsw.mask |= (cc & 3ul) << 44; | |
return 0; | |
} | |
@@ -218,13 +219,15 @@ | |
* Set condition code 3 to stop the guest from issueing channel | |
* I/O instructions. | |
*/ | |
- kvm_s390_set_psw_cc(vcpu, 3); | |
+ vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); | |
+ vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44; | |
return 0; | |
} | |
} | |
static int handle_stfl(struct kvm_vcpu *vcpu) | |
{ | |
+ unsigned int facility_list; | |
int rc; | |
vcpu->stat.instruction_stfl++; | |
@@ -232,13 +235,15 @@ | |
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) | |
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); | |
+ /* only pass the facility bits, which we can handle */ | |
+ facility_list = S390_lowcore.stfl_fac_list & 0xff82fff3; | |
+ | |
rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list), | |
- vfacilities, 4); | |
+ &facility_list, sizeof(facility_list)); | |
if (rc) | |
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); | |
- VCPU_EVENT(vcpu, 5, "store facility list value %x", | |
- *(unsigned int *) vfacilities); | |
- trace_kvm_s390_handle_stfl(vcpu, *(unsigned int *) vfacilities); | |
+ VCPU_EVENT(vcpu, 5, "store facility list value %x", facility_list); | |
+ trace_kvm_s390_handle_stfl(vcpu, facility_list); | |
return 0; | |
} | |
@@ -381,7 +386,7 @@ | |
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); | |
if (fc > 3) { | |
- kvm_s390_set_psw_cc(vcpu, 3); | |
+ vcpu->arch.sie_block->gpsw.mask |= 3ul << 44; /* cc 3 */ | |
return 0; | |
} | |
@@ -391,7 +396,7 @@ | |
if (fc == 0) { | |
vcpu->run->s.regs.gprs[0] = 3 << 28; | |
- kvm_s390_set_psw_cc(vcpu, 0); | |
+ vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); /* cc 0 */ | |
return 0; | |
} | |
@@ -425,11 +430,12 @@ | |
} | |
trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2); | |
free_page(mem); | |
- kvm_s390_set_psw_cc(vcpu, 0); | |
+ vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); | |
vcpu->run->s.regs.gprs[0] = 0; | |
return 0; | |
out_no_data: | |
- kvm_s390_set_psw_cc(vcpu, 3); | |
+ /* condition code 3 */ | |
+ vcpu->arch.sie_block->gpsw.mask |= 3ul << 44; | |
out_exception: | |
free_page(mem); | |
return rc; | |
@@ -487,12 +493,12 @@ | |
kvm_s390_get_regs_rre(vcpu, ®1, ®2); | |
/* This basically extracts the mask half of the psw. */ | |
- vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000UL; | |
+ vcpu->run->s.regs.gprs[reg1] &= 0xffffffff00000000; | |
vcpu->run->s.regs.gprs[reg1] |= vcpu->arch.sie_block->gpsw.mask >> 32; | |
if (reg2) { | |
- vcpu->run->s.regs.gprs[reg2] &= 0xffffffff00000000UL; | |
+ vcpu->run->s.regs.gprs[reg2] &= 0xffffffff00000000; | |
vcpu->run->s.regs.gprs[reg2] |= | |
- vcpu->arch.sie_block->gpsw.mask & 0x00000000ffffffffUL; | |
+ vcpu->arch.sie_block->gpsw.mask & 0x00000000ffffffff; | |
} | |
return 0; | |
} | |
diff -bur ./arch/s390/mm/pgtable.c ../linux-3.11-rc1/arch/s390/mm/pgtable.c | |
--- ./arch/s390/mm/pgtable.c 2014-01-09 19:34:10.003464567 -0500 | |
+++ ../linux-3.11-rc1/arch/s390/mm/pgtable.c 2013-07-14 18:18:27.000000000 -0400 | |
@@ -335,7 +335,7 @@ | |
if ((from | to | len) & (PMD_SIZE - 1)) | |
return -EINVAL; | |
- if (len == 0 || from + len > TASK_MAX_SIZE || | |
+ if (len == 0 || from + len > PGDIR_SIZE || | |
from + len < from || to + len < to) | |
return -EINVAL; | |
@@ -731,11 +731,6 @@ | |
spin_unlock(&gmap_notifier_lock); | |
} | |
-static inline int page_table_with_pgste(struct page *page) | |
-{ | |
- return atomic_read(&page->_mapcount) == 0; | |
-} | |
- | |
static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, | |
unsigned long vmaddr) | |
{ | |
@@ -755,7 +750,7 @@ | |
mp->vmaddr = vmaddr & PMD_MASK; | |
INIT_LIST_HEAD(&mp->mapper); | |
page->index = (unsigned long) mp; | |
- atomic_set(&page->_mapcount, 0); | |
+ atomic_set(&page->_mapcount, 3); | |
table = (unsigned long *) page_to_phys(page); | |
clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); | |
clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); | |
@@ -826,11 +821,6 @@ | |
#else /* CONFIG_PGSTE */ | |
-static inline int page_table_with_pgste(struct page *page) | |
-{ | |
- return 0; | |
-} | |
- | |
static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, | |
unsigned long vmaddr) | |
{ | |
@@ -907,12 +897,12 @@ | |
struct page *page; | |
unsigned int bit, mask; | |
- page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | |
- if (page_table_with_pgste(page)) { | |
+ if (mm_has_pgste(mm)) { | |
gmap_disconnect_pgtable(mm, table); | |
return page_table_free_pgste(table); | |
} | |
/* Free 1K/2K page table fragment of a 4K page */ | |
+ page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | |
bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t))); | |
spin_lock_bh(&mm->context.list_lock); | |
if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) | |
@@ -950,14 +940,14 @@ | |
unsigned int bit, mask; | |
mm = tlb->mm; | |
- page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | |
- if (page_table_with_pgste(page)) { | |
+ if (mm_has_pgste(mm)) { | |
gmap_disconnect_pgtable(mm, table); | |
table = (unsigned long *) (__pa(table) | FRAG_MASK); | |
tlb_remove_table(tlb, table); | |
return; | |
} | |
bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t))); | |
+ page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | |
spin_lock_bh(&mm->context.list_lock); | |
if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK) | |
list_del(&page->lru); | |
@@ -1043,120 +1033,36 @@ | |
} | |
#ifdef CONFIG_TRANSPARENT_HUGEPAGE | |
-static inline void thp_split_vma(struct vm_area_struct *vma) | |
+void thp_split_vma(struct vm_area_struct *vma) | |
{ | |
unsigned long addr; | |
+ struct page *page; | |
- for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) | |
- follow_page(vma, addr, FOLL_SPLIT); | |
+ for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) { | |
+ page = follow_page(vma, addr, FOLL_SPLIT); | |
+ } | |
} | |
-static inline void thp_split_mm(struct mm_struct *mm) | |
+void thp_split_mm(struct mm_struct *mm) | |
{ | |
- struct vm_area_struct *vma; | |
+ struct vm_area_struct *vma = mm->mmap; | |
- for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { | |
+ while (vma != NULL) { | |
thp_split_vma(vma); | |
vma->vm_flags &= ~VM_HUGEPAGE; | |
vma->vm_flags |= VM_NOHUGEPAGE; | |
+ vma = vma->vm_next; | |
} | |
- mm->def_flags |= VM_NOHUGEPAGE; | |
-} | |
-#else | |
-static inline void thp_split_mm(struct mm_struct *mm) | |
-{ | |
} | |
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | |
-static unsigned long page_table_realloc_pmd(struct mmu_gather *tlb, | |
- struct mm_struct *mm, pud_t *pud, | |
- unsigned long addr, unsigned long end) | |
-{ | |
- unsigned long next, *table, *new; | |
- struct page *page; | |
- pmd_t *pmd; | |
- | |
- pmd = pmd_offset(pud, addr); | |
- do { | |
- next = pmd_addr_end(addr, end); | |
-again: | |
- if (pmd_none_or_clear_bad(pmd)) | |
- continue; | |
- table = (unsigned long *) pmd_deref(*pmd); | |
- page = pfn_to_page(__pa(table) >> PAGE_SHIFT); | |
- if (page_table_with_pgste(page)) | |
- continue; | |
- /* Allocate new page table with pgstes */ | |
- new = page_table_alloc_pgste(mm, addr); | |
- if (!new) { | |
- mm->context.has_pgste = 0; | |
- continue; | |
- } | |
- spin_lock(&mm->page_table_lock); | |
- if (likely((unsigned long *) pmd_deref(*pmd) == table)) { | |
- /* Nuke pmd entry pointing to the "short" page table */ | |
- pmdp_flush_lazy(mm, addr, pmd); | |
- pmd_clear(pmd); | |
- /* Copy ptes from old table to new table */ | |
- memcpy(new, table, PAGE_SIZE/2); | |
- clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); | |
- /* Establish new table */ | |
- pmd_populate(mm, pmd, (pte_t *) new); | |
- /* Free old table with rcu, there might be a walker! */ | |
- page_table_free_rcu(tlb, table); | |
- new = NULL; | |
- } | |
- spin_unlock(&mm->page_table_lock); | |
- if (new) { | |
- page_table_free_pgste(new); | |
- goto again; | |
- } | |
- } while (pmd++, addr = next, addr != end); | |
- | |
- return addr; | |
-} | |
- | |
-static unsigned long page_table_realloc_pud(struct mmu_gather *tlb, | |
- struct mm_struct *mm, pgd_t *pgd, | |
- unsigned long addr, unsigned long end) | |
-{ | |
- unsigned long next; | |
- pud_t *pud; | |
- | |
- pud = pud_offset(pgd, addr); | |
- do { | |
- next = pud_addr_end(addr, end); | |
- if (pud_none_or_clear_bad(pud)) | |
- continue; | |
- next = page_table_realloc_pmd(tlb, mm, pud, addr, next); | |
- } while (pud++, addr = next, addr != end); | |
- | |
- return addr; | |
-} | |
- | |
-static void page_table_realloc(struct mmu_gather *tlb, struct mm_struct *mm, | |
- unsigned long addr, unsigned long end) | |
-{ | |
- unsigned long next; | |
- pgd_t *pgd; | |
- | |
- pgd = pgd_offset(mm, addr); | |
- do { | |
- next = pgd_addr_end(addr, end); | |
- if (pgd_none_or_clear_bad(pgd)) | |
- continue; | |
- next = page_table_realloc_pud(tlb, mm, pgd, addr, next); | |
- } while (pgd++, addr = next, addr != end); | |
-} | |
- | |
/* | |
* switch on pgstes for its userspace process (for kvm) | |
*/ | |
int s390_enable_sie(void) | |
{ | |
struct task_struct *tsk = current; | |
- struct mm_struct *mm = tsk->mm; | |
- struct mmu_gather tlb; | |
+ struct mm_struct *mm, *old_mm; | |
/* Do we have switched amode? If no, we cannot do sie */ | |
if (s390_user_mode == HOME_SPACE_MODE) | |
@@ -1166,16 +1072,57 @@ | |
if (mm_has_pgste(tsk->mm)) | |
return 0; | |
- down_write(&mm->mmap_sem); | |
+ /* lets check if we are allowed to replace the mm */ | |
+ task_lock(tsk); | |
+ if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || | |
+#ifdef CONFIG_AIO | |
+ !hlist_empty(&tsk->mm->ioctx_list) || | |
+#endif | |
+ tsk->mm != tsk->active_mm) { | |
+ task_unlock(tsk); | |
+ return -EINVAL; | |
+ } | |
+ task_unlock(tsk); | |
+ | |
+ /* we copy the mm and let dup_mm create the page tables with_pgstes */ | |
+ tsk->mm->context.alloc_pgste = 1; | |
+ /* make sure that both mms have a correct rss state */ | |
+ sync_mm_rss(tsk->mm); | |
+ mm = dup_mm(tsk); | |
+ tsk->mm->context.alloc_pgste = 0; | |
+ if (!mm) | |
+ return -ENOMEM; | |
+ | |
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE | |
/* split thp mappings and disable thp for future mappings */ | |
thp_split_mm(mm); | |
- /* Reallocate the page tables with pgstes */ | |
- mm->context.has_pgste = 1; | |
- tlb_gather_mmu(&tlb, mm, 0); | |
- page_table_realloc(&tlb, mm, 0, TASK_SIZE); | |
- tlb_finish_mmu(&tlb, 0, -1); | |
- up_write(&mm->mmap_sem); | |
- return mm->context.has_pgste ? 0 : -ENOMEM; | |
+ mm->def_flags |= VM_NOHUGEPAGE; | |
+#endif | |
+ | |
+ /* Now lets check again if something happened */ | |
+ task_lock(tsk); | |
+ if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || | |
+#ifdef CONFIG_AIO | |
+ !hlist_empty(&tsk->mm->ioctx_list) || | |
+#endif | |
+ tsk->mm != tsk->active_mm) { | |
+ mmput(mm); | |
+ task_unlock(tsk); | |
+ return -EINVAL; | |
+ } | |
+ | |
+ /* ok, we are alone. No ptrace, no threads, etc. */ | |
+ old_mm = tsk->mm; | |
+ tsk->mm = tsk->active_mm = mm; | |
+ preempt_disable(); | |
+ update_mm(mm, tsk); | |
+ atomic_inc(&mm->context.attach_count); | |
+ atomic_dec(&old_mm->context.attach_count); | |
+ cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); | |
+ preempt_enable(); | |
+ task_unlock(tsk); | |
+ mmput(old_mm); | |
+ return 0; | |
} | |
EXPORT_SYMBOL_GPL(s390_enable_sie); | |
diff -bur ./arch/x86/include/asm/kvm_host.h ../linux-3.11-rc1/arch/x86/include/asm/kvm_host.h | |
--- ./arch/x86/include/asm/kvm_host.h 2014-01-09 19:34:10.183464607 -0500 | |
+++ ../linux-3.11-rc1/arch/x86/include/asm/kvm_host.h 2013-07-14 18:18:27.000000000 -0400 | |
@@ -323,7 +323,6 @@ | |
u64 global_ovf_ctrl; | |
u64 counter_bitmask[2]; | |
u64 global_ctrl_mask; | |
- u64 reserved_bits; | |
u8 version; | |
struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC]; | |
struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED]; | |
@@ -804,7 +803,7 @@ | |
enum emulation_result { | |
EMULATE_DONE, /* no further processing */ | |
- EMULATE_USER_EXIT, /* kvm_run ready for userspace exit */ | |
+ EMULATE_DO_MMIO, /* kvm_run filled with mmio request */ | |
EMULATE_FAIL, /* can't emulate this instruction */ | |
}; | |
diff -bur ./arch/x86/include/asm/pvclock.h ../linux-3.11-rc1/arch/x86/include/asm/pvclock.h | |
--- ./arch/x86/include/asm/pvclock.h 2014-01-09 19:34:10.193458186 -0500 | |
+++ ../linux-3.11-rc1/arch/x86/include/asm/pvclock.h 2013-07-14 18:18:27.000000000 -0400 | |
@@ -93,6 +93,7 @@ | |
struct pvclock_vsyscall_time_info { | |
struct pvclock_vcpu_time_info pvti; | |
+ u32 migrate_count; | |
} __attribute__((__aligned__(SMP_CACHE_BYTES))); | |
#define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info) | |
Only in ./arch/x86/include: generated | |
diff -bur ./arch/x86/kernel/pvclock.c ../linux-3.11-rc1/arch/x86/kernel/pvclock.c | |
--- ./arch/x86/kernel/pvclock.c 2014-01-09 19:34:10.223458452 -0500 | |
+++ ../linux-3.11-rc1/arch/x86/kernel/pvclock.c 2013-07-14 18:18:27.000000000 -0400 | |
@@ -128,7 +128,46 @@ | |
set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); | |
} | |
+static struct pvclock_vsyscall_time_info *pvclock_vdso_info; | |
+ | |
+static struct pvclock_vsyscall_time_info * | |
+pvclock_get_vsyscall_user_time_info(int cpu) | |
+{ | |
+ if (!pvclock_vdso_info) { | |
+ BUG(); | |
+ return NULL; | |
+ } | |
+ | |
+ return &pvclock_vdso_info[cpu]; | |
+} | |
+ | |
+struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu) | |
+{ | |
+ return &pvclock_get_vsyscall_user_time_info(cpu)->pvti; | |
+} | |
+ | |
#ifdef CONFIG_X86_64 | |
+static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l, | |
+ void *v) | |
+{ | |
+ struct task_migration_notifier *mn = v; | |
+ struct pvclock_vsyscall_time_info *pvti; | |
+ | |
+ pvti = pvclock_get_vsyscall_user_time_info(mn->from_cpu); | |
+ | |
+ /* this is NULL when pvclock vsyscall is not initialized */ | |
+ if (unlikely(pvti == NULL)) | |
+ return NOTIFY_DONE; | |
+ | |
+ pvti->migrate_count++; | |
+ | |
+ return NOTIFY_DONE; | |
+} | |
+ | |
+static struct notifier_block pvclock_migrate = { | |
+ .notifier_call = pvclock_task_migrate, | |
+}; | |
+ | |
/* | |
* Initialize the generic pvclock vsyscall state. This will allocate | |
* a/some page(s) for the per-vcpu pvclock information, set up a | |
@@ -142,12 +181,17 @@ | |
WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE); | |
+ pvclock_vdso_info = i; | |
+ | |
for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) { | |
__set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx, | |
__pa(i) + (idx*PAGE_SIZE), | |
PAGE_KERNEL_VVAR); | |
} | |
+ | |
+ register_task_migration_notifier(&pvclock_migrate); | |
+ | |
return 0; | |
} | |
#endif | |
diff -bur ./arch/x86/kvm/lapic.c ../linux-3.11-rc1/arch/x86/kvm/lapic.c | |
--- ./arch/x86/kvm/lapic.c 2014-01-09 19:34:10.233458290 -0500 | |
+++ ../linux-3.11-rc1/arch/x86/kvm/lapic.c 2013-07-14 18:18:27.000000000 -0400 | |
@@ -79,6 +79,16 @@ | |
*((u32 *) (apic->regs + reg_off)) = val; | |
} | |
+static inline int apic_test_and_set_vector(int vec, void *bitmap) | |
+{ | |
+ return test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); | |
+} | |
+ | |
+static inline int apic_test_and_clear_vector(int vec, void *bitmap) | |
+{ | |
+ return test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); | |
+} | |
+ | |
static inline int apic_test_vector(int vec, void *bitmap) | |
{ | |
return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec)); | |
@@ -321,10 +331,10 @@ | |
} | |
EXPORT_SYMBOL_GPL(kvm_apic_update_irr); | |
-static inline void apic_set_irr(int vec, struct kvm_lapic *apic) | |
+static inline int apic_test_and_set_irr(int vec, struct kvm_lapic *apic) | |
{ | |
apic->irr_pending = true; | |
- apic_set_vector(vec, apic->regs + APIC_IRR); | |
+ return apic_test_and_set_vector(vec, apic->regs + APIC_IRR); | |
} | |
static inline int apic_search_irr(struct kvm_lapic *apic) | |
@@ -671,21 +681,28 @@ | |
if (unlikely(!apic_enabled(apic))) | |
break; | |
- result = 1; | |
- | |
if (dest_map) | |
__set_bit(vcpu->vcpu_id, dest_map); | |
- if (kvm_x86_ops->deliver_posted_interrupt) | |
+ if (kvm_x86_ops->deliver_posted_interrupt) { | |
+ result = 1; | |
kvm_x86_ops->deliver_posted_interrupt(vcpu, vector); | |
- else { | |
- apic_set_irr(vector, apic); | |
+ } else { | |
+ result = !apic_test_and_set_irr(vector, apic); | |
+ | |
+ if (!result) { | |
+ if (trig_mode) | |
+ apic_debug("level trig mode repeatedly " | |
+ "for vector %d", vector); | |
+ goto out; | |
+ } | |
kvm_make_request(KVM_REQ_EVENT, vcpu); | |
kvm_vcpu_kick(vcpu); | |
} | |
+out: | |
trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode, | |
- trig_mode, vector, false); | |
+ trig_mode, vector, !result); | |
break; | |
case APIC_DM_REMRD: | |
diff -bur ./arch/x86/kvm/Makefile ../linux-3.11-rc1/arch/x86/kvm/Makefile | |
--- ./arch/x86/kvm/Makefile 2014-01-10 15:40:08.885432013 -0500 | |
+++ ../linux-3.11-rc1/arch/x86/kvm/Makefile 2013-07-14 18:18:27.000000000 -0400 | |
@@ -15,7 +15,7 @@ | |
kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \ | |
i8254.o cpuid.o pmu.o | |
-kvm-intel-y += vmx.o nitro_main.o | |
+kvm-intel-y += vmx.o | |
kvm-amd-y += svm.o | |
obj-$(CONFIG_KVM) += kvm.o | |
diff -bur ./arch/x86/kvm/mmu.c ../linux-3.11-rc1/arch/x86/kvm/mmu.c | |
--- ./arch/x86/kvm/mmu.c 2014-01-09 19:34:10.233458290 -0500 | |
+++ ../linux-3.11-rc1/arch/x86/kvm/mmu.c 2013-07-14 18:18:27.000000000 -0400 | |
@@ -2811,13 +2811,6 @@ | |
static bool page_fault_can_be_fast(struct kvm_vcpu *vcpu, u32 error_code) | |
{ | |
/* | |
- * Do not fix the mmio spte with invalid generation number which | |
- * need to be updated by slow page fault path. | |
- */ | |
- if (unlikely(error_code & PFERR_RSVD_MASK)) | |
- return false; | |
- | |
- /* | |
* #PF can be fast only if the shadow page table is present and it | |
* is caused by write-protect, that means we just need change the | |
* W bit of the spte which can be done out of mmu-lock. | |
@@ -4182,7 +4175,7 @@ | |
switch (er) { | |
case EMULATE_DONE: | |
return 1; | |
- case EMULATE_USER_EXIT: | |
+ case EMULATE_DO_MMIO: | |
++vcpu->stat.mmio_exits; | |
/* fall through */ | |
case EMULATE_FAIL: | |
@@ -4390,8 +4383,11 @@ | |
/* | |
* The very rare case: if the generation-number is round, | |
* zap all shadow pages. | |
+ * | |
+ * The max value is MMIO_MAX_GEN - 1 since it is not called | |
+ * when mark memslot invalid. | |
*/ | |
- if (unlikely(kvm_current_mmio_generation(kvm) >= MMIO_MAX_GEN)) { | |
+ if (unlikely(kvm_current_mmio_generation(kvm) >= (MMIO_MAX_GEN - 1))) { | |
printk_ratelimited(KERN_INFO "kvm: zapping shadow pages for mmio generation wraparound\n"); | |
kvm_mmu_invalidate_zap_all_pages(kvm); | |
} | |
Only in ./arch/x86/kvm: nitro_main.c | |
diff -bur ./arch/x86/kvm/pmu.c ../linux-3.11-rc1/arch/x86/kvm/pmu.c | |
--- ./arch/x86/kvm/pmu.c 2014-01-09 19:34:10.233458290 -0500 | |
+++ ../linux-3.11-rc1/arch/x86/kvm/pmu.c 2013-07-14 18:18:27.000000000 -0400 | |
@@ -160,7 +160,7 @@ | |
static void reprogram_counter(struct kvm_pmc *pmc, u32 type, | |
unsigned config, bool exclude_user, bool exclude_kernel, | |
- bool intr, bool in_tx, bool in_tx_cp) | |
+ bool intr) | |
{ | |
struct perf_event *event; | |
struct perf_event_attr attr = { | |
@@ -173,10 +173,6 @@ | |
.exclude_kernel = exclude_kernel, | |
.config = config, | |
}; | |
- if (in_tx) | |
- attr.config |= HSW_IN_TX; | |
- if (in_tx_cp) | |
- attr.config |= HSW_IN_TX_CHECKPOINTED; | |
attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc); | |
@@ -230,9 +226,7 @@ | |
if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE | | |
ARCH_PERFMON_EVENTSEL_INV | | |
- ARCH_PERFMON_EVENTSEL_CMASK | | |
- HSW_IN_TX | | |
- HSW_IN_TX_CHECKPOINTED))) { | |
+ ARCH_PERFMON_EVENTSEL_CMASK))) { | |
config = find_arch_event(&pmc->vcpu->arch.pmu, event_select, | |
unit_mask); | |
if (config != PERF_COUNT_HW_MAX) | |
@@ -245,9 +239,7 @@ | |
reprogram_counter(pmc, type, config, | |
!(eventsel & ARCH_PERFMON_EVENTSEL_USR), | |
!(eventsel & ARCH_PERFMON_EVENTSEL_OS), | |
- eventsel & ARCH_PERFMON_EVENTSEL_INT, | |
- (eventsel & HSW_IN_TX), | |
- (eventsel & HSW_IN_TX_CHECKPOINTED)); | |
+ eventsel & ARCH_PERFMON_EVENTSEL_INT); | |
} | |
static void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 en_pmi, int idx) | |
@@ -264,7 +256,7 @@ | |
arch_events[fixed_pmc_events[idx]].event_type, | |
!(en & 0x2), /* exclude user */ | |
!(en & 0x1), /* exclude kernel */ | |
- pmi, false, false); | |
+ pmi); | |
} | |
static inline u8 fixed_en_pmi(u64 ctrl, int idx) | |
@@ -416,7 +408,7 @@ | |
} else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) { | |
if (data == pmc->eventsel) | |
return 0; | |
- if (!(data & pmu->reserved_bits)) { | |
+ if (!(data & 0xffffffff00200000ull)) { | |
reprogram_gp_counter(pmc, data); | |
return 0; | |
} | |
@@ -458,7 +450,6 @@ | |
pmu->counter_bitmask[KVM_PMC_GP] = 0; | |
pmu->counter_bitmask[KVM_PMC_FIXED] = 0; | |
pmu->version = 0; | |
- pmu->reserved_bits = 0xffffffff00200000ull; | |
entry = kvm_find_cpuid_entry(vcpu, 0xa, 0); | |
if (!entry) | |
@@ -487,12 +478,6 @@ | |
pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) | | |
(((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED); | |
pmu->global_ctrl_mask = ~pmu->global_ctrl; | |
- | |
- entry = kvm_find_cpuid_entry(vcpu, 7, 0); | |
- if (entry && | |
- (boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM)) && | |
- (entry->ebx & (X86_FEATURE_HLE|X86_FEATURE_RTM))) | |
- pmu->reserved_bits ^= HSW_IN_TX|HSW_IN_TX_CHECKPOINTED; | |
} | |
void kvm_pmu_init(struct kvm_vcpu *vcpu) | |
diff -bur ./arch/x86/kvm/vmx.c ../linux-3.11-rc1/arch/x86/kvm/vmx.c | |
--- ./arch/x86/kvm/vmx.c 2014-01-09 19:34:10.233458290 -0500 | |
+++ ../linux-3.11-rc1/arch/x86/kvm/vmx.c 2013-07-14 18:18:27.000000000 -0400 | |
@@ -373,7 +373,6 @@ | |
* we must keep them pinned while L2 runs. | |
*/ | |
struct page *apic_access_page; | |
- u64 msr_ia32_feature_control; | |
}; | |
#define POSTED_INTR_ON 0 | |
@@ -2283,11 +2282,8 @@ | |
switch (msr_index) { | |
case MSR_IA32_FEATURE_CONTROL: | |
- if (nested_vmx_allowed(vcpu)) { | |
- *pdata = to_vmx(vcpu)->nested.msr_ia32_feature_control; | |
+ *pdata = 0; | |
break; | |
- } | |
- return 0; | |
case MSR_IA32_VMX_BASIC: | |
/* | |
* This MSR reports some information about VMX support. We | |
@@ -2360,24 +2356,14 @@ | |
return 1; | |
} | |
-static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |
+static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |
{ | |
- u32 msr_index = msr_info->index; | |
- u64 data = msr_info->data; | |
- bool host_initialized = msr_info->host_initiated; | |
- | |
if (!nested_vmx_allowed(vcpu)) | |
return 0; | |
- if (msr_index == MSR_IA32_FEATURE_CONTROL) { | |
- if (!host_initialized && | |
- to_vmx(vcpu)->nested.msr_ia32_feature_control | |
- & FEATURE_CONTROL_LOCKED) | |
- return 0; | |
- to_vmx(vcpu)->nested.msr_ia32_feature_control = data; | |
+ if (msr_index == MSR_IA32_FEATURE_CONTROL) | |
+ /* TODO: the right thing. */ | |
return 1; | |
- } | |
- | |
/* | |
* No need to treat VMX capability MSRs specially: If we don't handle | |
* them, handle_wrmsr will #GP(0), which is correct (they are readonly) | |
@@ -2508,7 +2494,7 @@ | |
return 1; | |
/* Otherwise falls through */ | |
default: | |
- if (vmx_set_vmx_msr(vcpu, msr_info)) | |
+ if (vmx_set_vmx_msr(vcpu, msr_index, data)) | |
break; | |
msr = find_msr_entry(vmx, msr_index); | |
if (msr) { | |
@@ -5452,7 +5438,7 @@ | |
err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE); | |
- if (err == EMULATE_USER_EXIT) { | |
+ if (err == EMULATE_DO_MMIO) { | |
ret = 0; | |
goto out; | |
} | |
@@ -5581,47 +5567,8 @@ | |
free_loaded_vmcs(&vmx->vmcs01); | |
} | |
-/* | |
- * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(), | |
- * set the success or error code of an emulated VMX instruction, as specified | |
- * by Vol 2B, VMX Instruction Reference, "Conventions". | |
- */ | |
-static void nested_vmx_succeed(struct kvm_vcpu *vcpu) | |
-{ | |
- vmx_set_rflags(vcpu, vmx_get_rflags(vcpu) | |
- & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | | |
- X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF)); | |
-} | |
- | |
-static void nested_vmx_failInvalid(struct kvm_vcpu *vcpu) | |
-{ | |
- vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu) | |
- & ~(X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF | | |
- X86_EFLAGS_SF | X86_EFLAGS_OF)) | |
- | X86_EFLAGS_CF); | |
-} | |
- | |
static void nested_vmx_failValid(struct kvm_vcpu *vcpu, | |
- u32 vm_instruction_error) | |
-{ | |
- if (to_vmx(vcpu)->nested.current_vmptr == -1ull) { | |
- /* | |
- * failValid writes the error number to the current VMCS, which | |
- * can't be done there isn't a current VMCS. | |
- */ | |
- nested_vmx_failInvalid(vcpu); | |
- return; | |
- } | |
- vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu) | |
- & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | | |
- X86_EFLAGS_SF | X86_EFLAGS_OF)) | |
- | X86_EFLAGS_ZF); | |
- get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error; | |
- /* | |
- * We don't need to force a shadow sync because | |
- * VM_INSTRUCTION_ERROR is not shadowed | |
- */ | |
-} | |
+ u32 vm_instruction_error); | |
/* | |
* Emulate the VMXON instruction. | |
@@ -5636,8 +5583,6 @@ | |
struct kvm_segment cs; | |
struct vcpu_vmx *vmx = to_vmx(vcpu); | |
struct vmcs *shadow_vmcs; | |
- const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED | |
- | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; | |
/* The Intel VMX Instruction Reference lists a bunch of bits that | |
* are prerequisite to running VMXON, most notably cr4.VMXE must be | |
@@ -5666,13 +5611,6 @@ | |
skip_emulated_instruction(vcpu); | |
return 1; | |
} | |
- | |
- if ((vmx->nested.msr_ia32_feature_control & VMXON_NEEDED_FEATURES) | |
- != VMXON_NEEDED_FEATURES) { | |
- kvm_inject_gp(vcpu, 0); | |
- return 1; | |
- } | |
- | |
if (enable_shadow_vmcs) { | |
shadow_vmcs = alloc_vmcs(); | |
if (!shadow_vmcs) | |
@@ -5690,7 +5628,6 @@ | |
vmx->nested.vmxon = true; | |
skip_emulated_instruction(vcpu); | |
- nested_vmx_succeed(vcpu); | |
return 1; | |
} | |
@@ -5775,7 +5712,6 @@ | |
return 1; | |
free_nested(to_vmx(vcpu)); | |
skip_emulated_instruction(vcpu); | |
- nested_vmx_succeed(vcpu); | |
return 1; | |
} | |
@@ -5832,6 +5768,48 @@ | |
return 0; | |
} | |
+/* | |
+ * The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(), | |
+ * set the success or error code of an emulated VMX instruction, as specified | |
+ * by Vol 2B, VMX Instruction Reference, "Conventions". | |
+ */ | |
+static void nested_vmx_succeed(struct kvm_vcpu *vcpu) | |
+{ | |
+ vmx_set_rflags(vcpu, vmx_get_rflags(vcpu) | |
+ & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | | |
+ X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF)); | |
+} | |
+ | |
+static void nested_vmx_failInvalid(struct kvm_vcpu *vcpu) | |
+{ | |
+ vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu) | |
+ & ~(X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF | | |
+ X86_EFLAGS_SF | X86_EFLAGS_OF)) | |
+ | X86_EFLAGS_CF); | |
+} | |
+ | |
+static void nested_vmx_failValid(struct kvm_vcpu *vcpu, | |
+ u32 vm_instruction_error) | |
+{ | |
+ if (to_vmx(vcpu)->nested.current_vmptr == -1ull) { | |
+ /* | |
+ * failValid writes the error number to the current VMCS, which | |
+ * can't be done there isn't a current VMCS. | |
+ */ | |
+ nested_vmx_failInvalid(vcpu); | |
+ return; | |
+ } | |
+ vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu) | |
+ & ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | | |
+ X86_EFLAGS_SF | X86_EFLAGS_OF)) | |
+ | X86_EFLAGS_ZF); | |
+ get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error; | |
+ /* | |
+ * We don't need to force a shadow sync because | |
+ * VM_INSTRUCTION_ERROR is not shadowed | |
+ */ | |
+} | |
+ | |
/* Emulate the VMCLEAR instruction */ | |
static int handle_vmclear(struct kvm_vcpu *vcpu) | |
{ | |
@@ -5994,8 +5972,8 @@ | |
unsigned long field; | |
u64 field_value; | |
struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs; | |
- const unsigned long *fields = shadow_read_write_fields; | |
- const int num_fields = max_shadow_read_write_fields; | |
+ unsigned long *fields = (unsigned long *)shadow_read_write_fields; | |
+ int num_fields = max_shadow_read_write_fields; | |
vmcs_load(shadow_vmcs); | |
@@ -6024,11 +6002,12 @@ | |
static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) | |
{ | |
- const unsigned long *fields[] = { | |
- shadow_read_write_fields, | |
- shadow_read_only_fields | |
+ unsigned long *fields[] = { | |
+ (unsigned long *)shadow_read_write_fields, | |
+ (unsigned long *)shadow_read_only_fields | |
}; | |
- const int max_fields[] = { | |
+ int num_lists = ARRAY_SIZE(fields); | |
+ int max_fields[] = { | |
max_shadow_read_write_fields, | |
max_shadow_read_only_fields | |
}; | |
@@ -6039,7 +6018,7 @@ | |
vmcs_load(shadow_vmcs); | |
- for (q = 0; q < ARRAY_SIZE(fields); q++) { | |
+ for (q = 0; q < num_lists; q++) { | |
for (i = 0; i < max_fields[q]; i++) { | |
field = fields[q][i]; | |
vmcs12_read_any(&vmx->vcpu, field, &field_value); | |
@@ -7969,8 +7948,6 @@ | |
static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, | |
struct vmcs12 *vmcs12) | |
{ | |
- struct kvm_segment seg; | |
- | |
if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) | |
vcpu->arch.efer = vmcs12->host_ia32_efer; | |
else if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) | |
@@ -8024,6 +8001,16 @@ | |
vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->host_ia32_sysenter_eip); | |
vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base); | |
vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base); | |
+ vmcs_writel(GUEST_TR_BASE, vmcs12->host_tr_base); | |
+ vmcs_writel(GUEST_GS_BASE, vmcs12->host_gs_base); | |
+ vmcs_writel(GUEST_FS_BASE, vmcs12->host_fs_base); | |
+ vmcs_write16(GUEST_ES_SELECTOR, vmcs12->host_es_selector); | |
+ vmcs_write16(GUEST_CS_SELECTOR, vmcs12->host_cs_selector); | |
+ vmcs_write16(GUEST_SS_SELECTOR, vmcs12->host_ss_selector); | |
+ vmcs_write16(GUEST_DS_SELECTOR, vmcs12->host_ds_selector); | |
+ vmcs_write16(GUEST_FS_SELECTOR, vmcs12->host_fs_selector); | |
+ vmcs_write16(GUEST_GS_SELECTOR, vmcs12->host_gs_selector); | |
+ vmcs_write16(GUEST_TR_SELECTOR, vmcs12->host_tr_selector); | |
if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) | |
vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat); | |
@@ -8031,52 +8018,6 @@ | |
vmcs_write64(GUEST_IA32_PERF_GLOBAL_CTRL, | |
vmcs12->host_ia32_perf_global_ctrl); | |
- /* Set L1 segment info according to Intel SDM | |
- 27.5.2 Loading Host Segment and Descriptor-Table Registers */ | |
- seg = (struct kvm_segment) { | |
- .base = 0, | |
- .limit = 0xFFFFFFFF, | |
- .selector = vmcs12->host_cs_selector, | |
- .type = 11, | |
- .present = 1, | |
- .s = 1, | |
- .g = 1 | |
- }; | |
- if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) | |
- seg.l = 1; | |
- else | |
- seg.db = 1; | |
- vmx_set_segment(vcpu, &seg, VCPU_SREG_CS); | |
- seg = (struct kvm_segment) { | |
- .base = 0, | |
- .limit = 0xFFFFFFFF, | |
- .type = 3, | |
- .present = 1, | |
- .s = 1, | |
- .db = 1, | |
- .g = 1 | |
- }; | |
- seg.selector = vmcs12->host_ds_selector; | |
- vmx_set_segment(vcpu, &seg, VCPU_SREG_DS); | |
- seg.selector = vmcs12->host_es_selector; | |
- vmx_set_segment(vcpu, &seg, VCPU_SREG_ES); | |
- seg.selector = vmcs12->host_ss_selector; | |
- vmx_set_segment(vcpu, &seg, VCPU_SREG_SS); | |
- seg.selector = vmcs12->host_fs_selector; | |
- seg.base = vmcs12->host_fs_base; | |
- vmx_set_segment(vcpu, &seg, VCPU_SREG_FS); | |
- seg.selector = vmcs12->host_gs_selector; | |
- seg.base = vmcs12->host_gs_base; | |
- vmx_set_segment(vcpu, &seg, VCPU_SREG_GS); | |
- seg = (struct kvm_segment) { | |
- .base = 0, | |
- .limit = 0x67, | |
- .selector = vmcs12->host_tr_selector, | |
- .type = 11, | |
- .present = 1 | |
- }; | |
- vmx_set_segment(vcpu, &seg, VCPU_SREG_TR); | |
- | |
kvm_set_dr(vcpu, 7, 0x400); | |
vmcs_write64(GUEST_IA32_DEBUGCTL, 0); | |
} | |
diff -bur ./arch/x86/kvm/x86.c ../linux-3.11-rc1/arch/x86/kvm/x86.c | |
--- ./arch/x86/kvm/x86.c 2014-01-09 19:34:10.233458290 -0500 | |
+++ ../linux-3.11-rc1/arch/x86/kvm/x86.c 2013-07-14 18:18:27.000000000 -0400 | |
@@ -850,8 +850,7 @@ | |
#ifdef CONFIG_X86_64 | |
MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, | |
#endif | |
- MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, | |
- MSR_IA32_FEATURE_CONTROL | |
+ MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA | |
}; | |
static unsigned num_msrs_to_save; | |
@@ -4956,97 +4955,6 @@ | |
static int complete_emulated_mmio(struct kvm_vcpu *vcpu); | |
static int complete_emulated_pio(struct kvm_vcpu *vcpu); | |
-static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7, | |
- unsigned long *db) | |
-{ | |
- u32 dr6 = 0; | |
- int i; | |
- u32 enable, rwlen; | |
- | |
- enable = dr7; | |
- rwlen = dr7 >> 16; | |
- for (i = 0; i < 4; i++, enable >>= 2, rwlen >>= 4) | |
- if ((enable & 3) && (rwlen & 15) == type && db[i] == addr) | |
- dr6 |= (1 << i); | |
- return dr6; | |
-} | |
- | |
-static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, int *r) | |
-{ | |
- struct kvm_run *kvm_run = vcpu->run; | |
- | |
- /* | |
- * Use the "raw" value to see if TF was passed to the processor. | |
- * Note that the new value of the flags has not been saved yet. | |
- * | |
- * This is correct even for TF set by the guest, because "the | |
- * processor will not generate this exception after the instruction | |
- * that sets the TF flag". | |
- */ | |
- unsigned long rflags = kvm_x86_ops->get_rflags(vcpu); | |
- | |
- if (unlikely(rflags & X86_EFLAGS_TF)) { | |
- if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) { | |
- kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1; | |
- kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip; | |
- kvm_run->debug.arch.exception = DB_VECTOR; | |
- kvm_run->exit_reason = KVM_EXIT_DEBUG; | |
- *r = EMULATE_USER_EXIT; | |
- } else { | |
- vcpu->arch.emulate_ctxt.eflags &= ~X86_EFLAGS_TF; | |
- /* | |
- * "Certain debug exceptions may clear bit 0-3. The | |
- * remaining contents of the DR6 register are never | |
- * cleared by the processor". | |
- */ | |
- vcpu->arch.dr6 &= ~15; | |
- vcpu->arch.dr6 |= DR6_BS; | |
- kvm_queue_exception(vcpu, DB_VECTOR); | |
- } | |
- } | |
-} | |
- | |
-static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r) | |
-{ | |
- struct kvm_run *kvm_run = vcpu->run; | |
- unsigned long eip = vcpu->arch.emulate_ctxt.eip; | |
- u32 dr6 = 0; | |
- | |
- if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) && | |
- (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) { | |
- dr6 = kvm_vcpu_check_hw_bp(eip, 0, | |
- vcpu->arch.guest_debug_dr7, | |
- vcpu->arch.eff_db); | |
- | |
- if (dr6 != 0) { | |
- kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1; | |
- kvm_run->debug.arch.pc = kvm_rip_read(vcpu) + | |
- get_segment_base(vcpu, VCPU_SREG_CS); | |
- | |
- kvm_run->debug.arch.exception = DB_VECTOR; | |
- kvm_run->exit_reason = KVM_EXIT_DEBUG; | |
- *r = EMULATE_USER_EXIT; | |
- return true; | |
- } | |
- } | |
- | |
- if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK)) { | |
- dr6 = kvm_vcpu_check_hw_bp(eip, 0, | |
- vcpu->arch.dr7, | |
- vcpu->arch.db); | |
- | |
- if (dr6 != 0) { | |
- vcpu->arch.dr6 &= ~15; | |
- vcpu->arch.dr6 |= dr6; | |
- kvm_queue_exception(vcpu, DB_VECTOR); | |
- *r = EMULATE_DONE; | |
- return true; | |
- } | |
- } | |
- | |
- return false; | |
-} | |
- | |
int x86_emulate_instruction(struct kvm_vcpu *vcpu, | |
unsigned long cr2, | |
int emulation_type, | |
@@ -5067,16 +4975,6 @@ | |
if (!(emulation_type & EMULTYPE_NO_DECODE)) { | |
init_emulate_ctxt(vcpu); | |
- | |
- /* | |
- * We will reenter on the same instruction since | |
- * we do not set complete_userspace_io. This does not | |
- * handle watchpoints yet, those would be handled in | |
- * the emulate_ops. | |
- */ | |
- if (kvm_vcpu_check_breakpoint(vcpu, &r)) | |
- return r; | |
- | |
ctxt->interruptibility = 0; | |
ctxt->have_exception = false; | |
ctxt->perm_ok = false; | |
@@ -5139,11 +5037,11 @@ | |
writeback = false; | |
vcpu->arch.complete_userspace_io = complete_emulated_pio; | |
} | |
- r = EMULATE_USER_EXIT; | |
+ r = EMULATE_DO_MMIO; | |
} else if (vcpu->mmio_needed) { | |
if (!vcpu->mmio_is_write) | |
writeback = false; | |
- r = EMULATE_USER_EXIT; | |
+ r = EMULATE_DO_MMIO; | |
vcpu->arch.complete_userspace_io = complete_emulated_mmio; | |
} else if (r == EMULATION_RESTART) | |
goto restart; | |
@@ -5152,12 +5050,10 @@ | |
if (writeback) { | |
toggle_interruptibility(vcpu, ctxt->interruptibility); | |
+ kvm_set_rflags(vcpu, ctxt->eflags); | |
kvm_make_request(KVM_REQ_EVENT, vcpu); | |
vcpu->arch.emulate_regs_need_sync_to_vcpu = false; | |
kvm_rip_write(vcpu, ctxt->eip); | |
- if (r == EMULATE_DONE) | |
- kvm_vcpu_check_singlestep(vcpu, &r); | |
- kvm_set_rflags(vcpu, ctxt->eflags); | |
} else | |
vcpu->arch.emulate_regs_need_sync_to_vcpu = true; | |
@@ -5451,7 +5347,7 @@ | |
int kvm_arch_init(void *opaque) | |
{ | |
int r; | |
- struct kvm_x86_ops *ops = opaque; | |
+ struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque; | |
if (kvm_x86_ops) { | |
printk(KERN_ERR "kvm: already loaded the other module\n"); | |
@@ -7123,15 +7019,6 @@ | |
return -ENOMEM; | |
} | |
-void kvm_arch_memslots_updated(struct kvm *kvm) | |
-{ | |
- /* | |
- * memslots->generation has been incremented. | |
- * mmio generation may have reached its maximum value. | |
- */ | |
- kvm_mmu_invalidate_mmio_sptes(kvm); | |
-} | |
- | |
int kvm_arch_prepare_memory_region(struct kvm *kvm, | |
struct kvm_memory_slot *memslot, | |
struct kvm_userspace_memory_region *mem, | |
@@ -7192,6 +7079,11 @@ | |
*/ | |
if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES)) | |
kvm_mmu_slot_remove_write_access(kvm, mem->slot); | |
+ /* | |
+ * If memory slot is created, or moved, we need to clear all | |
+ * mmio sptes. | |
+ */ | |
+ kvm_mmu_invalidate_mmio_sptes(kvm); | |
} | |
void kvm_arch_flush_shadow_all(struct kvm *kvm) | |
diff -bur ./arch/x86/vdso/vclock_gettime.c ../linux-3.11-rc1/arch/x86/vdso/vclock_gettime.c | |
--- ./arch/x86/vdso/vclock_gettime.c 2014-01-09 19:34:10.263464776 -0500 | |
+++ ../linux-3.11-rc1/arch/x86/vdso/vclock_gettime.c 2013-07-14 18:18:27.000000000 -0400 | |
@@ -85,18 +85,15 @@ | |
cycle_t ret; | |
u64 last; | |
u32 version; | |
+ u32 migrate_count; | |
u8 flags; | |
unsigned cpu, cpu1; | |
/* | |
- * Note: hypervisor must guarantee that: | |
- * 1. cpu ID number maps 1:1 to per-CPU pvclock time info. | |
- * 2. that per-CPU pvclock time info is updated if the | |
- * underlying CPU changes. | |
- * 3. that version is increased whenever underlying CPU | |
- * changes. | |
- * | |
+ * When looping to get a consistent (time-info, tsc) pair, we | |
+ * also need to deal with the possibility we can switch vcpus, | |
+ * so make sure we always re-fetch time-info for the current vcpu. | |
*/ | |
do { | |
cpu = __getcpu() & VGETCPU_CPU_MASK; | |
@@ -107,6 +104,8 @@ | |
pvti = get_pvti(cpu); | |
+ migrate_count = pvti->migrate_count; | |
+ | |
version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags); | |
/* | |
@@ -118,7 +117,8 @@ | |
cpu1 = __getcpu() & VGETCPU_CPU_MASK; | |
} while (unlikely(cpu != cpu1 || | |
(pvti->pvti.version & 1) || | |
- pvti->pvti.version != version)); | |
+ pvti->pvti.version != version || | |
+ pvti->migrate_count != migrate_count)); | |
if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT))) | |
*mode = VCLOCK_NONE; | |
Only in .: .config | |
Only in .: .config.old | |
Only in .: .git | |
Only in ./include: config | |
Only in ./include: generated | |
diff -bur ./include/linux/kvm_host.h ../linux-3.11-rc1/include/linux/kvm_host.h | |
--- ./include/linux/kvm_host.h 2014-01-09 19:34:13.253451268 -0500 | |
+++ ../linux-3.11-rc1/include/linux/kvm_host.h 2013-07-14 18:18:27.000000000 -0400 | |
@@ -33,8 +33,6 @@ | |
#include <asm/kvm_host.h> | |
-#include <linux/nitro_main.h> | |
- | |
#ifndef KVM_MMIO_SIZE | |
#define KVM_MMIO_SIZE 8 | |
#endif | |
@@ -87,12 +85,6 @@ | |
return pfn == KVM_PFN_NOSLOT; | |
} | |
-/* | |
- * architectures with KVM_HVA_ERR_BAD other than PAGE_OFFSET (e.g. s390) | |
- * provide own defines and kvm_is_error_hva | |
- */ | |
-#ifndef KVM_HVA_ERR_BAD | |
- | |
#define KVM_HVA_ERR_BAD (PAGE_OFFSET) | |
#define KVM_HVA_ERR_RO_BAD (PAGE_OFFSET + PAGE_SIZE) | |
@@ -101,8 +93,6 @@ | |
return addr >= PAGE_OFFSET; | |
} | |
-#endif | |
- | |
#define KVM_ERR_PTR_BAD_PAGE (ERR_PTR(-ENOENT)) | |
static inline bool is_error_page(struct page *page) | |
@@ -170,12 +160,8 @@ | |
int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | |
int len, const void *val); | |
-int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | |
- int len, const void *val, long cookie); | |
int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, | |
void *val); | |
-int kvm_io_bus_read_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | |
- int len, void *val, long cookie); | |
int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | |
int len, struct kvm_io_device *dev); | |
int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, | |
@@ -408,8 +394,6 @@ | |
#endif | |
long tlbs_dirty; | |
struct list_head devices; | |
- | |
- struct nitro_kvm *nitro_kvm; | |
}; | |
#define kvm_err(fmt, ...) \ | |
@@ -515,7 +499,6 @@ | |
void kvm_arch_free_memslot(struct kvm_memory_slot *free, | |
struct kvm_memory_slot *dont); | |
int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages); | |
-void kvm_arch_memslots_updated(struct kvm *kvm); | |
int kvm_arch_prepare_memory_region(struct kvm *kvm, | |
struct kvm_memory_slot *memslot, | |
struct kvm_userspace_memory_region *mem, | |
Only in ./include/linux: nitro.h | |
Only in ./include/linux: nitro_main.h | |
diff -bur ./include/linux/sched.h ../linux-3.11-rc1/include/linux/sched.h | |
--- ./include/linux/sched.h 2014-01-09 19:34:13.323458542 -0500 | |
+++ ../linux-3.11-rc1/include/linux/sched.h 2013-07-14 18:18:27.000000000 -0400 | |
@@ -107,6 +107,14 @@ | |
extern void calc_global_load(unsigned long ticks); | |
extern void update_cpu_load_nohz(void); | |
+/* Notifier for when a task gets migrated to a new CPU */ | |
+struct task_migration_notifier { | |
+ struct task_struct *task; | |
+ int from_cpu; | |
+ int to_cpu; | |
+}; | |
+extern void register_task_migration_notifier(struct notifier_block *n); | |
+ | |
extern unsigned long get_parent_ip(unsigned long addr); | |
extern void dump_cpu_task(int cpu); | |
diff -bur ./kernel/sched/core.c ../linux-3.11-rc1/kernel/sched/core.c | |
--- ./kernel/sched/core.c 2014-01-09 19:34:13.503458398 -0500 | |
+++ ../linux-3.11-rc1/kernel/sched/core.c 2013-07-14 18:18:27.000000000 -0400 | |
@@ -976,6 +976,13 @@ | |
rq->skip_clock_update = 1; | |
} | |
+static ATOMIC_NOTIFIER_HEAD(task_migration_notifier); | |
+ | |
+void register_task_migration_notifier(struct notifier_block *n) | |
+{ | |
+ atomic_notifier_chain_register(&task_migration_notifier, n); | |
+} | |
+ | |
#ifdef CONFIG_SMP | |
void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | |
{ | |
@@ -1006,10 +1013,18 @@ | |
trace_sched_migrate_task(p, new_cpu); | |
if (task_cpu(p) != new_cpu) { | |
+ struct task_migration_notifier tmn; | |
+ | |
if (p->sched_class->migrate_task_rq) | |
p->sched_class->migrate_task_rq(p, new_cpu); | |
p->se.nr_migrations++; | |
perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, NULL, 0); | |
+ | |
+ tmn.task = p; | |
+ tmn.from_cpu = task_cpu(p); | |
+ tmn.to_cpu = new_cpu; | |
+ | |
+ atomic_notifier_call_chain(&task_migration_notifier, 0, &tmn); | |
} | |
__set_task_cpu(p, new_cpu); | |
Only in .: kinst.sh | |
Only in .: Module.symvers | |
Only in .: nitro_diff | |
Only in ./scripts/basic: fixdep | |
Only in ./scripts: conmakehash | |
Only in ./scripts/genksyms: genksyms | |
Only in ./scripts/genksyms: keywords.hash.c | |
Only in ./scripts/genksyms: lex.lex.c | |
Only in ./scripts/genksyms: parse.tab.c | |
Only in ./scripts/genksyms: parse.tab.h | |
Only in ./scripts: kallsyms | |
Only in ./scripts/kconfig: conf | |
Only in ./scripts/kconfig: mconf | |
Only in ./scripts/kconfig: zconf.hash.c | |
Only in ./scripts/kconfig: zconf.lex.c | |
Only in ./scripts/kconfig: zconf.tab.c | |
Only in ./scripts/mod: devicetable-offsets.h | |
Only in ./scripts/mod: elfconfig.h | |
Only in ./scripts/mod: mk_elfconfig | |
Only in ./scripts/mod: modpost | |
Only in ./scripts: recordmcount | |
Only in ./scripts/selinux/genheaders: genheaders | |
Only in ./scripts/selinux/mdp: mdp | |
Only in ./scripts: sortextable | |
Only in ./scripts: unifdef | |
Only in ./security/tomoyo: builtin-policy.h | |
Only in ./security/tomoyo: policy | |
Only in .: .version | |
diff -bur ./virt/kvm/kvm_main.c ../linux-3.11-rc1/virt/kvm/kvm_main.c | |
--- ./virt/kvm/kvm_main.c 2014-01-09 19:34:14.283464649 -0500 | |
+++ ../linux-3.11-rc1/virt/kvm/kvm_main.c 2013-07-14 18:18:27.000000000 -0400 | |
@@ -61,9 +61,6 @@ | |
#define CREATE_TRACE_POINTS | |
#include <trace/events/kvm.h> | |
-#include <linux/nitro.h> | |
-#include <linux/nitro_main.h> | |
- | |
MODULE_AUTHOR("Qumranet"); | |
MODULE_LICENSE("GPL"); | |
@@ -603,8 +600,6 @@ | |
int i; | |
struct mm_struct *mm = kvm->mm; | |
- nitro_destroy_vm_hook(kvm); | |
- | |
kvm_arch_sync_events(kvm); | |
raw_spin_lock(&kvm_lock); | |
list_del(&kvm->vm_list); | |
@@ -736,9 +731,6 @@ | |
update_memslots(slots, new, kvm->memslots->generation); | |
rcu_assign_pointer(kvm->memslots, slots); | |
synchronize_srcu_expedited(&kvm->srcu); | |
- | |
- kvm_arch_memslots_updated(kvm); | |
- | |
return old_memslots; | |
} | |
@@ -1899,7 +1891,7 @@ | |
/* | |
* Allocates an inode for the vcpu. | |
*/ | |
-int create_vcpu_fd(struct kvm_vcpu *vcpu) | |
+static int create_vcpu_fd(struct kvm_vcpu *vcpu) | |
{ | |
return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, O_RDWR); | |
} | |
@@ -1983,8 +1975,8 @@ | |
struct kvm_fpu *fpu = NULL; | |
struct kvm_sregs *kvm_sregs = NULL; | |
- //if (vcpu->kvm->mm != current->mm) | |
- // return -EIO; | |
+ if (vcpu->kvm->mm != current->mm) | |
+ return -EIO; | |
#if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS) | |
/* | |
@@ -2329,8 +2321,8 @@ | |
void __user *argp = (void __user *)arg; | |
int r; | |
- //if (kvm->mm != current->mm) | |
- // return -EIO; | |
+ if (kvm->mm != current->mm) | |
+ return -EIO; | |
switch (ioctl) { | |
case KVM_CREATE_VCPU: | |
r = kvm_vm_ioctl_create_vcpu(kvm, arg); | |
@@ -2485,24 +2477,6 @@ | |
r = 0; | |
break; | |
} | |
- case KVM_NITRO_ATTACH_VCPUS: { | |
- int i; | |
- struct nitro_vcpus nvcpus; | |
- | |
- r = nitro_iotcl_attach_vcpus(kvm,&nvcpus); | |
- if (r) | |
- goto out; | |
- | |
- r = -EFAULT; | |
- if (copy_to_user(argp, &nvcpus, sizeof(nvcpus))){ | |
- for(i=0;i<nvcpus.num_vcpus;i++) | |
- kvm_put_kvm(kvm); | |
- goto out; | |
- } | |
- | |
- r = 0; | |
- break; | |
- } | |
default: | |
r = kvm_arch_vm_ioctl(filp, ioctl, arg); | |
if (r == -ENOTTY) | |
@@ -2612,8 +2586,6 @@ | |
return r; | |
} | |
#endif | |
- nitro_create_vm_hook(kvm); | |
- | |
r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); | |
if (r < 0) | |
kvm_put_kvm(kvm); | |
@@ -2651,7 +2623,6 @@ | |
static long kvm_dev_ioctl(struct file *filp, | |
unsigned int ioctl, unsigned long arg) | |
{ | |
- void __user *argp = (void __user *)arg; | |
long r = -EINVAL; | |
switch (ioctl) { | |
@@ -2684,28 +2655,6 @@ | |
case KVM_TRACE_DISABLE: | |
r = -EOPNOTSUPP; | |
break; | |
- case KVM_NITRO_NUM_VMS: | |
- r = nitro_iotcl_num_vms(); | |
- break; | |
- case KVM_NITRO_ATTACH_VM: { | |
- pid_t creator; | |
- struct kvm *kvm; | |
- | |
- r = -EFAULT; | |
- if (copy_from_user(&creator, argp, sizeof(pid_t))) | |
- goto out; | |
- | |
- r = -ESRCH; | |
- kvm = nitro_get_vm_by_creator(creator); | |
- if(kvm == NULL) | |
- goto out; | |
- | |
- kvm_get_kvm(kvm); | |
- r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR); | |
- if(r<0) | |
- kvm_put_kvm(kvm); | |
- break; | |
- } | |
default: | |
return kvm_arch_dev_ioctl(filp, ioctl, arg); | |
} | |
@@ -2863,9 +2812,11 @@ | |
kfree(bus); | |
} | |
-static inline int __kvm_io_bus_sort_cmp(const struct kvm_io_range *r1, | |
- const struct kvm_io_range *r2) | |
+static int kvm_io_bus_sort_cmp(const void *p1, const void *p2) | |
{ | |
+ const struct kvm_io_range *r1 = p1; | |
+ const struct kvm_io_range *r2 = p2; | |
+ | |
if (r1->addr < r2->addr) | |
return -1; | |
if (r1->addr + r1->len > r2->addr + r2->len) | |
@@ -2873,11 +2824,6 @@ | |
return 0; | |
} | |
-static int kvm_io_bus_sort_cmp(const void *p1, const void *p2) | |
-{ | |
- return __kvm_io_bus_sort_cmp(p1, p2); | |
-} | |
- | |
static int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev, | |
gpa_t addr, int len) | |
{ | |
@@ -2911,54 +2857,17 @@ | |
off = range - bus->range; | |
- while (off > 0 && __kvm_io_bus_sort_cmp(&key, &bus->range[off-1]) == 0) | |
+ while (off > 0 && kvm_io_bus_sort_cmp(&key, &bus->range[off-1]) == 0) | |
off--; | |
return off; | |
} | |
-static int __kvm_io_bus_write(struct kvm_io_bus *bus, | |
- struct kvm_io_range *range, const void *val) | |
-{ | |
- int idx; | |
- | |
- idx = kvm_io_bus_get_first_dev(bus, range->addr, range->len); | |
- if (idx < 0) | |
- return -EOPNOTSUPP; | |
- | |
- while (idx < bus->dev_count && | |
- __kvm_io_bus_sort_cmp(range, &bus->range[idx]) == 0) { | |
- if (!kvm_iodevice_write(bus->range[idx].dev, range->addr, | |
- range->len, val)) | |
- return idx; | |
- idx++; | |
- } | |
- | |
- return -EOPNOTSUPP; | |
-} | |
- | |
/* kvm_io_bus_write - called under kvm->slots_lock */ | |
int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | |
int len, const void *val) | |
{ | |
- struct kvm_io_bus *bus; | |
- struct kvm_io_range range; | |
- int r; | |
- | |
- range = (struct kvm_io_range) { | |
- .addr = addr, | |
- .len = len, | |
- }; | |
- | |
- bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); | |
- r = __kvm_io_bus_write(bus, &range, val); | |
- return r < 0 ? r : 0; | |
-} | |
- | |
-/* kvm_io_bus_write_cookie - called under kvm->slots_lock */ | |
-int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | |
- int len, const void *val, long cookie) | |
-{ | |
+ int idx; | |
struct kvm_io_bus *bus; | |
struct kvm_io_range range; | |
@@ -2968,35 +2877,14 @@ | |
}; | |
bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); | |
- | |
- /* First try the device referenced by cookie. */ | |
- if ((cookie >= 0) && (cookie < bus->dev_count) && | |
- (__kvm_io_bus_sort_cmp(&range, &bus->range[cookie]) == 0)) | |
- if (!kvm_iodevice_write(bus->range[cookie].dev, addr, len, | |
- val)) | |
- return cookie; | |
- | |
- /* | |
- * cookie contained garbage; fall back to search and return the | |
- * correct cookie value. | |
- */ | |
- return __kvm_io_bus_write(bus, &range, val); | |
-} | |
- | |
-static int __kvm_io_bus_read(struct kvm_io_bus *bus, struct kvm_io_range *range, | |
- void *val) | |
-{ | |
- int idx; | |
- | |
- idx = kvm_io_bus_get_first_dev(bus, range->addr, range->len); | |
+ idx = kvm_io_bus_get_first_dev(bus, addr, len); | |
if (idx < 0) | |
return -EOPNOTSUPP; | |
while (idx < bus->dev_count && | |
- __kvm_io_bus_sort_cmp(range, &bus->range[idx]) == 0) { | |
- if (!kvm_iodevice_read(bus->range[idx].dev, range->addr, | |
- range->len, val)) | |
- return idx; | |
+ kvm_io_bus_sort_cmp(&range, &bus->range[idx]) == 0) { | |
+ if (!kvm_iodevice_write(bus->range[idx].dev, addr, len, val)) | |
+ return 0; | |
idx++; | |
} | |
@@ -3007,24 +2895,7 @@ | |
int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | |
int len, void *val) | |
{ | |
- struct kvm_io_bus *bus; | |
- struct kvm_io_range range; | |
- int r; | |
- | |
- range = (struct kvm_io_range) { | |
- .addr = addr, | |
- .len = len, | |
- }; | |
- | |
- bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); | |
- r = __kvm_io_bus_read(bus, &range, val); | |
- return r < 0 ? r : 0; | |
-} | |
- | |
-/* kvm_io_bus_read_cookie - called under kvm->slots_lock */ | |
-int kvm_io_bus_read_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, | |
- int len, void *val, long cookie) | |
-{ | |
+ int idx; | |
struct kvm_io_bus *bus; | |
struct kvm_io_range range; | |
@@ -3034,19 +2905,18 @@ | |
}; | |
bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); | |
+ idx = kvm_io_bus_get_first_dev(bus, addr, len); | |
+ if (idx < 0) | |
+ return -EOPNOTSUPP; | |
- /* First try the device referenced by cookie. */ | |
- if ((cookie >= 0) && (cookie < bus->dev_count) && | |
- (__kvm_io_bus_sort_cmp(&range, &bus->range[cookie]) == 0)) | |
- if (!kvm_iodevice_read(bus->range[cookie].dev, addr, len, | |
- val)) | |
- return cookie; | |
+ while (idx < bus->dev_count && | |
+ kvm_io_bus_sort_cmp(&range, &bus->range[idx]) == 0) { | |
+ if (!kvm_iodevice_read(bus->range[idx].dev, addr, len, val)) | |
+ return 0; | |
+ idx++; | |
+ } | |
- /* | |
- * cookie contained garbage; fall back to search and return the | |
- * correct cookie value. | |
- */ | |
- return __kvm_io_bus_read(bus, &range, val); | |
+ return -EOPNOTSUPP; | |
} | |
/* Caller must hold slots_lock. */ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment