diff options
Diffstat (limited to 'arch/x86/kvm/vmx/vmx.c')
-rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 395 |
1 files changed, 183 insertions, 212 deletions
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index e3394c839dea..9a6664886f2e 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1057,6 +1057,12 @@ static unsigned long segment_base(u16 selector) } #endif +static inline bool pt_can_write_msr(struct vcpu_vmx *vmx) +{ + return (pt_mode == PT_MODE_HOST_GUEST) && + !(vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN); +} + static inline void pt_load_msr(struct pt_ctx *ctx, u32 addr_range) { u32 i; @@ -1422,8 +1428,6 @@ static bool emulation_required(struct kvm_vcpu *vcpu) return emulate_invalid_guest_state && !guest_state_valid(vcpu); } -static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu); - unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -1716,7 +1720,7 @@ static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu) struct vmcs12 *vmcs12 = get_vmcs12(vcpu); if (is_guest_mode(vcpu) && - (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)) + (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)) return vcpu->arch.tsc_offset - vmcs12->tsc_offset; return vcpu->arch.tsc_offset; @@ -1734,7 +1738,7 @@ static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) * to the newly set TSC to get L2's TSC. */ if (is_guest_mode(vcpu) && - (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)) + (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)) g_tsc_offset = vmcs12->tsc_offset; trace_kvm_write_tsc_offset(vcpu->vcpu_id, @@ -1773,8 +1777,6 @@ static int vmx_get_msr_feature(struct kvm_msr_entry *msr) default: return 1; } - - return 0; } /* @@ -1839,18 +1841,30 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_MCG_EXT_CTL: if (!msr_info->host_initiated && !(vmx->msr_ia32_feature_control & - FEATURE_CONTROL_LMCE)) + FEAT_CTL_LMCE_ENABLED)) return 1; msr_info->data = vcpu->arch.mcg_ext_ctl; break; - case MSR_IA32_FEATURE_CONTROL: + case MSR_IA32_FEAT_CTL: msr_info->data = vmx->msr_ia32_feature_control; break; case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: if (!nested_vmx_allowed(vcpu)) return 1; - return vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index, - &msr_info->data); + if (vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index, + &msr_info->data)) + return 1; + /* + * Enlightened VMCS v1 doesn't have certain fields, but buggy + * Hyper-V versions are still trying to use corresponding + * features when they are exposed. Filter out the essential + * minimum. + */ + if (!msr_info->host_initiated && + vmx->nested.enlightened_vmcs_enabled) + nested_evmcs_filter_control_msr(msr_info->index, + &msr_info->data); + break; case MSR_IA32_RTIT_CTL: if (pt_mode != PT_MODE_HOST_GUEST) return 1; @@ -1916,7 +1930,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) } /* - * Writes msr value into into the appropriate "register". + * Writes msr value into the appropriate "register". * Returns 0 on success, non-0 otherwise. * Assumes vcpu_load() was already called. */ @@ -1994,12 +2008,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) return 1; - /* The STIBP bit doesn't fault even if it's not advertised */ - if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD)) + if (data & ~kvm_spec_ctrl_valid_bits(vcpu)) return 1; vmx->spec_ctrl = data; - if (!data) break; @@ -2010,7 +2022,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) * * For nested: * The handling of the MSR bitmap for L2 guests is done in - * nested_vmx_merge_msr_bitmap. We should not touch the + * nested_vmx_prepare_msr_bitmap. We should not touch the * vmcs02.msr_bitmap here since it gets completely overwritten * in the merging. We update the vmcs01 here for L1 as well * since it will end up touching the MSR anyway now. @@ -2033,7 +2045,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (data & ~PRED_CMD_IBPB) return 1; - + if (!boot_cpu_has(X86_FEATURE_SPEC_CTRL)) + return 1; if (!data) break; @@ -2046,7 +2059,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) * * For nested: * The handling of the MSR bitmap for L2 guests is done in - * nested_vmx_merge_msr_bitmap. We should not touch the + * nested_vmx_prepare_msr_bitmap. We should not touch the * vmcs02.msr_bitmap here since it gets completely overwritten * in the merging. */ @@ -2074,15 +2087,15 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_MCG_EXT_CTL: if ((!msr_info->host_initiated && !(to_vmx(vcpu)->msr_ia32_feature_control & - FEATURE_CONTROL_LMCE)) || + FEAT_CTL_LMCE_ENABLED)) || (data & ~MCG_EXT_CTL_LMCE_EN)) return 1; vcpu->arch.mcg_ext_ctl = data; break; - case MSR_IA32_FEATURE_CONTROL: + case MSR_IA32_FEAT_CTL: if (!vmx_feature_control_msr_valid(vcpu, data) || (to_vmx(vcpu)->msr_ia32_feature_control & - FEATURE_CONTROL_LOCKED && !msr_info->host_initiated)) + FEAT_CTL_LOCKED && !msr_info->host_initiated)) return 1; vmx->msr_ia32_feature_control = data; if (msr_info->host_initiated && data == 0) @@ -2104,47 +2117,50 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) pt_update_intercept_for_msr(vmx); break; case MSR_IA32_RTIT_STATUS: - if ((pt_mode != PT_MODE_HOST_GUEST) || - (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) || - (data & MSR_IA32_RTIT_STATUS_MASK)) + if (!pt_can_write_msr(vmx)) + return 1; + if (data & MSR_IA32_RTIT_STATUS_MASK) return 1; vmx->pt_desc.guest.status = data; break; case MSR_IA32_RTIT_CR3_MATCH: - if ((pt_mode != PT_MODE_HOST_GUEST) || - (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) || - !intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_cr3_filtering)) + if (!pt_can_write_msr(vmx)) + return 1; + if (!intel_pt_validate_cap(vmx->pt_desc.caps, + PT_CAP_cr3_filtering)) return 1; vmx->pt_desc.guest.cr3_match = data; break; case MSR_IA32_RTIT_OUTPUT_BASE: - if ((pt_mode != PT_MODE_HOST_GUEST) || - (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) || - (!intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_topa_output) && - !intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_single_range_output)) || - (data & MSR_IA32_RTIT_OUTPUT_BASE_MASK)) + if (!pt_can_write_msr(vmx)) + return 1; + if (!intel_pt_validate_cap(vmx->pt_desc.caps, + PT_CAP_topa_output) && + !intel_pt_validate_cap(vmx->pt_desc.caps, + PT_CAP_single_range_output)) + return 1; + if (data & MSR_IA32_RTIT_OUTPUT_BASE_MASK) return 1; vmx->pt_desc.guest.output_base = data; break; case MSR_IA32_RTIT_OUTPUT_MASK: - if ((pt_mode != PT_MODE_HOST_GUEST) || - (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) || - (!intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_topa_output) && - !intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_single_range_output))) + if (!pt_can_write_msr(vmx)) + return 1; + if (!intel_pt_validate_cap(vmx->pt_desc.caps, + PT_CAP_topa_output) && + !intel_pt_validate_cap(vmx->pt_desc.caps, + PT_CAP_single_range_output)) return 1; vmx->pt_desc.guest.output_mask = data; break; case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: + if (!pt_can_write_msr(vmx)) + return 1; index = msr_info->index - MSR_IA32_RTIT_ADDR0_A; - if ((pt_mode != PT_MODE_HOST_GUEST) || - (vmx->pt_desc.guest.ctl & RTIT_CTL_TRACEEN) || - (index >= 2 * intel_pt_validate_cap(vmx->pt_desc.caps, - PT_CAP_num_address_ranges))) + if (index >= 2 * intel_pt_validate_cap(vmx->pt_desc.caps, + PT_CAP_num_address_ranges)) + return 1; + if (is_noncanonical_address(data, vcpu)) return 1; if (index % 2) vmx->pt_desc.guest.addr_b[index / 2] = data; @@ -2204,29 +2220,8 @@ static __init int cpu_has_kvm_support(void) static __init int vmx_disabled_by_bios(void) { - u64 msr; - - rdmsrl(MSR_IA32_FEATURE_CONTROL, msr); - if (msr & FEATURE_CONTROL_LOCKED) { - /* launched w/ TXT and VMX disabled */ - if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX) - && tboot_enabled()) - return 1; - /* launched w/o TXT and VMX only enabled w/ TXT */ - if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) - && (msr & FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX) - && !tboot_enabled()) { - printk(KERN_WARNING "kvm: disable TXT in the BIOS or " - "activate TXT before enabling KVM\n"); - return 1; - } - /* launched w/o TXT and VMX disabled */ - if (!(msr & FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX) - && !tboot_enabled()) - return 1; - } - - return 0; + return !boot_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) || + !boot_cpu_has(X86_FEATURE_VMX); } static void kvm_cpu_vmxon(u64 addr) @@ -2241,7 +2236,6 @@ static int hardware_enable(void) { int cpu = raw_smp_processor_id(); u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); - u64 old, test_bits; if (cr4_read_shadow() & X86_CR4_VMXE) return -EBUSY; @@ -2269,17 +2263,6 @@ static int hardware_enable(void) */ crash_enable_local_vmclear(cpu); - rdmsrl(MSR_IA32_FEATURE_CONTROL, old); - - test_bits = FEATURE_CONTROL_LOCKED; - test_bits |= FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; - if (tboot_enabled()) - test_bits |= FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX; - - if ((old & test_bits) != test_bits) { - /* enable and lock */ - wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits); - } kvm_cpu_vmxon(phys_addr); if (enable_ept) ept_sync_global(); @@ -2355,7 +2338,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, CPU_BASED_CR3_STORE_EXITING | CPU_BASED_UNCOND_IO_EXITING | CPU_BASED_MOV_DR_EXITING | - CPU_BASED_USE_TSC_OFFSETING | + CPU_BASED_USE_TSC_OFFSETTING | CPU_BASED_MWAIT_EXITING | CPU_BASED_MONITOR_EXITING | CPU_BASED_INVLPG_EXITING | @@ -2690,8 +2673,6 @@ static void enter_pmode(struct kvm_vcpu *vcpu) vmx->rmode.vm86_active = 0; - vmx_segment_cache_clear(vmx); - vmx_set_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR); flags = vmcs_readl(GUEST_RFLAGS); @@ -3477,7 +3458,7 @@ out: static int init_rmode_identity_map(struct kvm *kvm) { struct kvm_vmx *kvm_vmx = to_kvm_vmx(kvm); - int i, idx, r = 0; + int i, r = 0; kvm_pfn_t identity_map_pfn; u32 tmp; @@ -3485,7 +3466,7 @@ static int init_rmode_identity_map(struct kvm *kvm) mutex_lock(&kvm->slots_lock); if (likely(kvm_vmx->ept_identity_pagetable_done)) - goto out2; + goto out; if (!kvm_vmx->ept_identity_map_addr) kvm_vmx->ept_identity_map_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR; @@ -3494,9 +3475,8 @@ static int init_rmode_identity_map(struct kvm *kvm) r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, kvm_vmx->ept_identity_map_addr, PAGE_SIZE); if (r < 0) - goto out2; + goto out; - idx = srcu_read_lock(&kvm->srcu); r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE); if (r < 0) goto out; @@ -3512,9 +3492,6 @@ static int init_rmode_identity_map(struct kvm *kvm) kvm_vmx->ept_identity_pagetable_done = true; out: - srcu_read_unlock(&kvm->srcu, idx); - -out2: mutex_unlock(&kvm->slots_lock); return r; } @@ -3752,11 +3729,6 @@ void pt_update_intercept_for_msr(struct vcpu_vmx *vmx) } } -static bool vmx_get_enable_apicv(struct kvm *kvm) -{ - return enable_apicv; -} - static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -4042,6 +4014,7 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) if (vmx_xsaves_supported()) { /* Exposing XSAVES only when XSAVE is exposed */ bool xsaves_enabled = + boot_cpu_has(X86_FEATURE_XSAVE) && guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) && guest_cpuid_has(vcpu, X86_FEATURE_XSAVES); @@ -4352,7 +4325,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) static void enable_irq_window(struct kvm_vcpu *vcpu) { - exec_controls_setbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_INTR_PENDING); + exec_controls_setbit(to_vmx(vcpu), CPU_BASED_INTR_WINDOW_EXITING); } static void enable_nmi_window(struct kvm_vcpu *vcpu) @@ -4363,7 +4336,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) return; } - exec_controls_setbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_NMI_PENDING); + exec_controls_setbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING); } static void vmx_inject_irq(struct kvm_vcpu *vcpu) @@ -4488,8 +4461,11 @@ static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) if (enable_unrestricted_guest) return 0; - ret = x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr, - PAGE_SIZE * 3); + mutex_lock(&kvm->slots_lock); + ret = __x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr, + PAGE_SIZE * 3); + mutex_unlock(&kvm->slots_lock); + if (ret) return ret; to_kvm_vmx(kvm)->tss_addr = addr; @@ -4971,7 +4947,7 @@ static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu) static int handle_interrupt_window(struct kvm_vcpu *vcpu) { - exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_INTR_PENDING); + exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_INTR_WINDOW_EXITING); kvm_make_request(KVM_REQ_EVENT, vcpu); @@ -5184,7 +5160,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) static int handle_nmi_window(struct kvm_vcpu *vcpu) { WARN_ON_ONCE(!enable_vnmi); - exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_NMI_PENDING); + exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_NMI_WINDOW_EXITING); ++vcpu->stat.nmi_window_exits; kvm_make_request(KVM_REQ_EVENT, vcpu); @@ -5205,7 +5181,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) WARN_ON_ONCE(vmx->emulation_required && vmx->nested.nested_run_pending); intr_window_requested = exec_controls_get(vmx) & - CPU_BASED_VIRTUAL_INTR_PENDING; + CPU_BASED_INTR_WINDOW_EXITING; while (vmx->emulation_required && count-- != 0) { if (intr_window_requested && vmx_interrupt_allowed(vcpu)) @@ -5529,7 +5505,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = { [EXIT_REASON_CPUID] = kvm_emulate_cpuid, [EXIT_REASON_MSR_READ] = kvm_emulate_rdmsr, [EXIT_REASON_MSR_WRITE] = kvm_emulate_wrmsr, - [EXIT_REASON_PENDING_INTERRUPT] = handle_interrupt_window, + [EXIT_REASON_INTERRUPT_WINDOW] = handle_interrupt_window, [EXIT_REASON_HLT] = kvm_emulate_halt, [EXIT_REASON_INVD] = handle_invd, [EXIT_REASON_INVLPG] = handle_invlpg, @@ -5816,7 +5792,8 @@ void dump_vmcs(void) * The guest has exited. See if we can fix it or if we need userspace * assistance. */ -static int vmx_handle_exit(struct kvm_vcpu *vcpu) +static int vmx_handle_exit(struct kvm_vcpu *vcpu, + enum exit_fastpath_completion exit_fastpath) { struct vcpu_vmx *vmx = to_vmx(vcpu); u32 exit_reason = vmx->exit_reason; @@ -5902,34 +5879,44 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) } } - if (exit_reason < kvm_vmx_max_exit_handlers - && kvm_vmx_exit_handlers[exit_reason]) { + if (exit_fastpath == EXIT_FASTPATH_SKIP_EMUL_INS) { + kvm_skip_emulated_instruction(vcpu); + return 1; + } + + if (exit_reason >= kvm_vmx_max_exit_handlers) + goto unexpected_vmexit; #ifdef CONFIG_RETPOLINE - if (exit_reason == EXIT_REASON_MSR_WRITE) - return kvm_emulate_wrmsr(vcpu); - else if (exit_reason == EXIT_REASON_PREEMPTION_TIMER) - return handle_preemption_timer(vcpu); - else if (exit_reason == EXIT_REASON_PENDING_INTERRUPT) - return handle_interrupt_window(vcpu); - else if (exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT) - return handle_external_interrupt(vcpu); - else if (exit_reason == EXIT_REASON_HLT) - return kvm_emulate_halt(vcpu); - else if (exit_reason == EXIT_REASON_EPT_MISCONFIG) - return handle_ept_misconfig(vcpu); + if (exit_reason == EXIT_REASON_MSR_WRITE) + return kvm_emulate_wrmsr(vcpu); + else if (exit_reason == EXIT_REASON_PREEMPTION_TIMER) + return handle_preemption_timer(vcpu); + else if (exit_reason == EXIT_REASON_INTERRUPT_WINDOW) + return handle_interrupt_window(vcpu); + else if (exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT) + return handle_external_interrupt(vcpu); + else if (exit_reason == EXIT_REASON_HLT) + return kvm_emulate_halt(vcpu); + else if (exit_reason == EXIT_REASON_EPT_MISCONFIG) + return handle_ept_misconfig(vcpu); #endif - return kvm_vmx_exit_handlers[exit_reason](vcpu); - } else { - vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n", - exit_reason); - dump_vmcs(); - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = + + exit_reason = array_index_nospec(exit_reason, + kvm_vmx_max_exit_handlers); + if (!kvm_vmx_exit_handlers[exit_reason]) + goto unexpected_vmexit; + + return kvm_vmx_exit_handlers[exit_reason](vcpu); + +unexpected_vmexit: + vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n", exit_reason); + dump_vmcs(); + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON; - vcpu->run->internal.ndata = 1; - vcpu->run->internal.data[0] = exit_reason; - return 0; - } + vcpu->run->internal.ndata = 1; + vcpu->run->internal.data[0] = exit_reason; + return 0; } /* @@ -6250,7 +6237,8 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu) } STACK_FRAME_NON_STANDARD(handle_external_interrupt_irqoff); -static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu) +static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu, + enum exit_fastpath_completion *exit_fastpath) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -6258,6 +6246,9 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu) handle_external_interrupt_irqoff(vcpu); else if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI) handle_exception_nmi_irqoff(vmx); + else if (!is_guest_mode(vcpu) && + vmx->exit_reason == EXIT_REASON_MSR_WRITE) + *exit_fastpath = handle_fastpath_set_msr_irqoff(vcpu); } static bool vmx_has_emulated_msr(int index) @@ -6666,60 +6657,31 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) free_vpid(vmx->vpid); nested_vmx_free_vcpu(vcpu); free_loaded_vmcs(vmx->loaded_vmcs); - kvm_vcpu_uninit(vcpu); - kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu); - kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu); - kmem_cache_free(kvm_vcpu_cache, vmx); } -static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) +static int vmx_create_vcpu(struct kvm_vcpu *vcpu) { - int err; struct vcpu_vmx *vmx; unsigned long *msr_bitmap; - int i, cpu; - - BUILD_BUG_ON_MSG(offsetof(struct vcpu_vmx, vcpu) != 0, - "struct kvm_vcpu must be at offset 0 for arch usercopy region"); + int i, cpu, err; - vmx = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL_ACCOUNT); - if (!vmx) - return ERR_PTR(-ENOMEM); - - vmx->vcpu.arch.user_fpu = kmem_cache_zalloc(x86_fpu_cache, - GFP_KERNEL_ACCOUNT); - if (!vmx->vcpu.arch.user_fpu) { - printk(KERN_ERR "kvm: failed to allocate kvm userspace's fpu\n"); - err = -ENOMEM; - goto free_partial_vcpu; - } + BUILD_BUG_ON(offsetof(struct vcpu_vmx, vcpu) != 0); + vmx = to_vmx(vcpu); - vmx->vcpu.arch.guest_fpu = kmem_cache_zalloc(x86_fpu_cache, - GFP_KERNEL_ACCOUNT); - if (!vmx->vcpu.arch.guest_fpu) { - printk(KERN_ERR "kvm: failed to allocate vcpu's fpu\n"); - err = -ENOMEM; - goto free_user_fpu; - } + err = -ENOMEM; vmx->vpid = allocate_vpid(); - err = kvm_vcpu_init(&vmx->vcpu, kvm, id); - if (err) - goto free_vcpu; - - err = -ENOMEM; - /* * If PML is turned on, failure on enabling PML just results in failure * of creating the vcpu, therefore we can simplify PML logic (by * avoiding dealing with cases, such as enabling PML partially on vcpus - * for the guest, etc. + * for the guest), etc. */ if (enable_pml) { vmx->pml_pg = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); if (!vmx->pml_pg) - goto uninit_vcpu; + goto free_vpid; } BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) != NR_SHARED_MSRS); @@ -6764,7 +6726,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW); vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW); vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW); - if (kvm_cstate_in_guest(kvm)) { + if (kvm_cstate_in_guest(vcpu->kvm)) { vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C1_RES, MSR_TYPE_R); vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R); vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R); @@ -6774,19 +6736,19 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) vmx->loaded_vmcs = &vmx->vmcs01; cpu = get_cpu(); - vmx_vcpu_load(&vmx->vcpu, cpu); - vmx->vcpu.cpu = cpu; + vmx_vcpu_load(vcpu, cpu); + vcpu->cpu = cpu; init_vmcs(vmx); - vmx_vcpu_put(&vmx->vcpu); + vmx_vcpu_put(vcpu); put_cpu(); - if (cpu_need_virtualize_apic_accesses(&vmx->vcpu)) { - err = alloc_apic_access_page(kvm); + if (cpu_need_virtualize_apic_accesses(vcpu)) { + err = alloc_apic_access_page(vcpu->kvm); if (err) goto free_vmcs; } if (enable_ept && !enable_unrestricted_guest) { - err = init_rmode_identity_map(kvm); + err = init_rmode_identity_map(vcpu->kvm); if (err) goto free_vmcs; } @@ -6794,14 +6756,14 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) if (nested) nested_vmx_setup_ctls_msrs(&vmx->nested.msrs, vmx_capability.ept, - kvm_vcpu_apicv_active(&vmx->vcpu)); + kvm_vcpu_apicv_active(vcpu)); else memset(&vmx->nested.msrs, 0, sizeof(vmx->nested.msrs)); vmx->nested.posted_intr_nv = -1; vmx->nested.current_vmptr = -1ull; - vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED; + vmx->msr_ia32_feature_control_valid_bits = FEAT_CTL_LOCKED; /* * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR @@ -6812,22 +6774,15 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) vmx->ept_pointer = INVALID_PAGE; - return &vmx->vcpu; + return 0; free_vmcs: free_loaded_vmcs(vmx->loaded_vmcs); free_pml: vmx_destroy_pml_buffer(vmx); -uninit_vcpu: - kvm_vcpu_uninit(&vmx->vcpu); -free_vcpu: +free_vpid: free_vpid(vmx->vpid); - kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu); -free_user_fpu: - kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu); -free_partial_vcpu: - kmem_cache_free(kvm_vcpu_cache, vmx); - return ERR_PTR(err); + return err; } #define L1TF_MSG_SMT "L1TF CPU bug present and SMT on, data leak possible. See CVE-2018-3646 and https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/l1tf.html for details.\n" @@ -6863,6 +6818,7 @@ static int vmx_vm_init(struct kvm *kvm) break; } } + kvm_apicv_init(kvm, enable_apicv); return 0; } @@ -6871,6 +6827,12 @@ static int __init vmx_check_processor_compat(void) struct vmcs_config vmcs_conf; struct vmx_capability vmx_cap; + if (!this_cpu_has(X86_FEATURE_MSR_IA32_FEAT_CTL) || + !this_cpu_has(X86_FEATURE_VMX)) { + pr_err("kvm: VMX is disabled on CPU %d\n", smp_processor_id()); + return -EIO; + } + if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0) return -EIO; if (nested) @@ -6973,28 +6935,28 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu) } while (0) entry = kvm_find_cpuid_entry(vcpu, 0x1, 0); - cr4_fixed1_update(X86_CR4_VME, edx, bit(X86_FEATURE_VME)); - cr4_fixed1_update(X86_CR4_PVI, edx, bit(X86_FEATURE_VME)); - cr4_fixed1_update(X86_CR4_TSD, edx, bit(X86_FEATURE_TSC)); - cr4_fixed1_update(X86_CR4_DE, edx, bit(X86_FEATURE_DE)); - cr4_fixed1_update(X86_CR4_PSE, edx, bit(X86_FEATURE_PSE)); - cr4_fixed1_update(X86_CR4_PAE, edx, bit(X86_FEATURE_PAE)); - cr4_fixed1_update(X86_CR4_MCE, edx, bit(X86_FEATURE_MCE)); - cr4_fixed1_update(X86_CR4_PGE, edx, bit(X86_FEATURE_PGE)); - cr4_fixed1_update(X86_CR4_OSFXSR, edx, bit(X86_FEATURE_FXSR)); - cr4_fixed1_update(X86_CR4_OSXMMEXCPT, edx, bit(X86_FEATURE_XMM)); - cr4_fixed1_update(X86_CR4_VMXE, ecx, bit(X86_FEATURE_VMX)); - cr4_fixed1_update(X86_CR4_SMXE, ecx, bit(X86_FEATURE_SMX)); - cr4_fixed1_update(X86_CR4_PCIDE, ecx, bit(X86_FEATURE_PCID)); - cr4_fixed1_update(X86_CR4_OSXSAVE, ecx, bit(X86_FEATURE_XSAVE)); + cr4_fixed1_update(X86_CR4_VME, edx, feature_bit(VME)); + cr4_fixed1_update(X86_CR4_PVI, edx, feature_bit(VME)); + cr4_fixed1_update(X86_CR4_TSD, edx, feature_bit(TSC)); + cr4_fixed1_update(X86_CR4_DE, edx, feature_bit(DE)); + cr4_fixed1_update(X86_CR4_PSE, edx, feature_bit(PSE)); + cr4_fixed1_update(X86_CR4_PAE, edx, feature_bit(PAE)); + cr4_fixed1_update(X86_CR4_MCE, edx, feature_bit(MCE)); + cr4_fixed1_update(X86_CR4_PGE, edx, feature_bit(PGE)); + cr4_fixed1_update(X86_CR4_OSFXSR, edx, feature_bit(FXSR)); + cr4_fixed1_update(X86_CR4_OSXMMEXCPT, edx, feature_bit(XMM)); + cr4_fixed1_update(X86_CR4_VMXE, ecx, feature_bit(VMX)); + cr4_fixed1_update(X86_CR4_SMXE, ecx, feature_bit(SMX)); + cr4_fixed1_update(X86_CR4_PCIDE, ecx, feature_bit(PCID)); + cr4_fixed1_update(X86_CR4_OSXSAVE, ecx, feature_bit(XSAVE)); entry = kvm_find_cpuid_entry(vcpu, 0x7, 0); - cr4_fixed1_update(X86_CR4_FSGSBASE, ebx, bit(X86_FEATURE_FSGSBASE)); - cr4_fixed1_update(X86_CR4_SMEP, ebx, bit(X86_FEATURE_SMEP)); - cr4_fixed1_update(X86_CR4_SMAP, ebx, bit(X86_FEATURE_SMAP)); - cr4_fixed1_update(X86_CR4_PKE, ecx, bit(X86_FEATURE_PKU)); - cr4_fixed1_update(X86_CR4_UMIP, ecx, bit(X86_FEATURE_UMIP)); - cr4_fixed1_update(X86_CR4_LA57, ecx, bit(X86_FEATURE_LA57)); + cr4_fixed1_update(X86_CR4_FSGSBASE, ebx, feature_bit(FSGSBASE)); + cr4_fixed1_update(X86_CR4_SMEP, ebx, feature_bit(SMEP)); + cr4_fixed1_update(X86_CR4_SMAP, ebx, feature_bit(SMAP)); + cr4_fixed1_update(X86_CR4_PKE, ecx, feature_bit(PKU)); + cr4_fixed1_update(X86_CR4_UMIP, ecx, feature_bit(UMIP)); + cr4_fixed1_update(X86_CR4_LA57, ecx, feature_bit(LA57)); #undef cr4_fixed1_update } @@ -7099,12 +7061,12 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) if (nested_vmx_allowed(vcpu)) to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |= - FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX | - FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; + FEAT_CTL_VMX_ENABLED_INSIDE_SMX | + FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX; else to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &= - ~(FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX | - FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX); + ~(FEAT_CTL_VMX_ENABLED_INSIDE_SMX | + FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX); if (nested_vmx_allowed(vcpu)) { nested_vmx_cr_fixed1_bits_update(vcpu); @@ -7128,7 +7090,7 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) { if (func == 1 && nested) - entry->ecx |= bit(X86_FEATURE_VMX); + entry->ecx |= feature_bit(VMX); } static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu) @@ -7523,10 +7485,10 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu) { if (vcpu->arch.mcg_cap & MCG_LMCE_P) to_vmx(vcpu)->msr_ia32_feature_control_valid_bits |= - FEATURE_CONTROL_LMCE; + FEAT_CTL_LMCE_ENABLED; else to_vmx(vcpu)->msr_ia32_feature_control_valid_bits &= - ~FEATURE_CONTROL_LMCE; + ~FEAT_CTL_LMCE_ENABLED; } static int vmx_smi_allowed(struct kvm_vcpu *vcpu) @@ -7758,6 +7720,14 @@ static __exit void hardware_unsetup(void) free_kvm_area(); } +static bool vmx_check_apicv_inhibit_reasons(ulong bit) +{ + ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) | + BIT(APICV_INHIBIT_REASON_HYPERV); + + return supported & BIT(bit); +} + static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .cpu_has_kvm_support = cpu_has_kvm_support, .disabled_by_bios = vmx_disabled_by_bios, @@ -7830,10 +7800,10 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .update_cr8_intercept = update_cr8_intercept, .set_virtual_apic_mode = vmx_set_virtual_apic_mode, .set_apic_access_page_addr = vmx_set_apic_access_page_addr, - .get_enable_apicv = vmx_get_enable_apicv, .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl, .load_eoi_exitmap = vmx_load_eoi_exitmap, .apicv_post_state_restore = vmx_apicv_post_state_restore, + .check_apicv_inhibit_reasons = vmx_check_apicv_inhibit_reasons, .hwapic_irr_update = vmx_hwapic_irr_update, .hwapic_isr_update = vmx_hwapic_isr_update, .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt, @@ -7870,6 +7840,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .xsaves_supported = vmx_xsaves_supported, .umip_emulated = vmx_umip_emulated, .pt_supported = vmx_pt_supported, + .pku_supported = vmx_pku_supported, .request_immediate_exit = vmx_request_immediate_exit, |