From af8b3cd3934ec60f4c2a420d19a9d416554f140b Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Tue, 14 Feb 2017 00:11:02 -0800 Subject: x86/process: Optimize TIF checks in __switch_to_xtra() Help the compiler to avoid reevaluating the thread flags for each checked bit by reordering the bit checks and providing an explicit xor for evaluation. With default defconfigs for each arch, x86_64: arch/x86/kernel/process.o text data bss dec hex 3056 8577 16 11649 2d81 Before 3024 8577 16 11617 2d61 After i386: arch/x86/kernel/process.o text data bss dec hex 2957 8673 8 11638 2d76 Before 2925 8673 8 11606 2d56 After Originally-by: Thomas Gleixner Signed-off-by: Kyle Huey Cc: Peter Zijlstra Cc: Andy Lutomirski Link: http://lkml.kernel.org/r/20170214081104.9244-2-khuey@kylehuey.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process.c | 65 ++++++++++++++++++++++++++--------------------- 1 file changed, 36 insertions(+), 29 deletions(-) (limited to 'arch/x86/kernel/process.c') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index f67591561711..ea9ea2582dab 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -182,54 +182,61 @@ int set_tsc_mode(unsigned int val) return 0; } +static inline void switch_to_bitmap(struct tss_struct *tss, + struct thread_struct *prev, + struct thread_struct *next, + unsigned long tifp, unsigned long tifn) +{ + if (tifn & _TIF_IO_BITMAP) { + /* + * Copy the relevant range of the IO bitmap. + * Normally this is 128 bytes or less: + */ + memcpy(tss->io_bitmap, next->io_bitmap_ptr, + max(prev->io_bitmap_max, next->io_bitmap_max)); + /* + * Make sure that the TSS limit is correct for the CPU + * to notice the IO bitmap. + */ + refresh_tss_limit(); + } else if (tifp & _TIF_IO_BITMAP) { + /* + * Clear any possible leftover bits: + */ + memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); + } +} + void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, struct tss_struct *tss) { struct thread_struct *prev, *next; + unsigned long tifp, tifn; prev = &prev_p->thread; next = &next_p->thread; - if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^ - test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) { + tifn = READ_ONCE(task_thread_info(next_p)->flags); + tifp = READ_ONCE(task_thread_info(prev_p)->flags); + switch_to_bitmap(tss, prev, next, tifp, tifn); + + propagate_user_return_notify(prev_p, next_p); + + if ((tifp ^ tifn) & _TIF_BLOCKSTEP) { unsigned long debugctl = get_debugctlmsr(); debugctl &= ~DEBUGCTLMSR_BTF; - if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) + if (tifn & _TIF_BLOCKSTEP) debugctl |= DEBUGCTLMSR_BTF; - update_debugctlmsr(debugctl); } - if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ - test_tsk_thread_flag(next_p, TIF_NOTSC)) { - /* prev and next are different */ - if (test_tsk_thread_flag(next_p, TIF_NOTSC)) + if ((tifp ^ tifn) & _TIF_NOTSC) { + if (tifn & _TIF_NOTSC) hard_disable_TSC(); else hard_enable_TSC(); } - - if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { - /* - * Copy the relevant range of the IO bitmap. - * Normally this is 128 bytes or less: - */ - memcpy(tss->io_bitmap, next->io_bitmap_ptr, - max(prev->io_bitmap_max, next->io_bitmap_max)); - - /* - * Make sure that the TSS limit is correct for the CPU - * to notice the IO bitmap. - */ - refresh_tss_limit(); - } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) { - /* - * Clear any possible leftover bits: - */ - memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); - } - propagate_user_return_notify(prev_p, next_p); } /* -- cgit v1.2.3 From b9894a2f5bd18b1691cb6872c9afe32b148d0132 Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Tue, 14 Feb 2017 00:11:03 -0800 Subject: x86/process: Correct and optimize TIF_BLOCKSTEP switch The debug control MSR is "highly magical" as the blockstep bit can be cleared by hardware under not well documented circumstances. So a task switch relying on the bit set by the previous task (according to the previous tasks thread flags) can trip over this and not update the flag for the next task. To fix this its required to handle DEBUGCTLMSR_BTF when either the previous or the next or both tasks have the TIF_BLOCKSTEP flag set. While at it avoid branching within the TIF_BLOCKSTEP case and evaluating boot_cpu_data twice in kernels without CONFIG_X86_DEBUGCTLMSR. x86_64: arch/x86/kernel/process.o text data bss dec hex 3024 8577 16 11617 2d61 Before 3008 8577 16 11601 2d51 After i386: No change [ tglx: Made the shift value explicit, use a local variable to make the code readable and massaged changelog] Originally-by: Thomas Gleixner Signed-off-by: Kyle Huey Cc: Peter Zijlstra Cc: Andy Lutomirski Link: http://lkml.kernel.org/r/20170214081104.9244-3-khuey@kylehuey.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel/process.c') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ea9ea2582dab..83fa3cb4f8f0 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -222,13 +222,15 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, propagate_user_return_notify(prev_p, next_p); - if ((tifp ^ tifn) & _TIF_BLOCKSTEP) { - unsigned long debugctl = get_debugctlmsr(); + if ((tifp & _TIF_BLOCKSTEP || tifn & _TIF_BLOCKSTEP) && + arch_has_block_step()) { + unsigned long debugctl, msk; + rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); debugctl &= ~DEBUGCTLMSR_BTF; - if (tifn & _TIF_BLOCKSTEP) - debugctl |= DEBUGCTLMSR_BTF; - update_debugctlmsr(debugctl); + msk = tifn & _TIF_BLOCKSTEP; + debugctl |= (msk >> TIF_BLOCKSTEP) << DEBUGCTLMSR_BTF_SHIFT; + wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); } if ((tifp ^ tifn) & _TIF_NOTSC) { -- cgit v1.2.3 From 5a920155e388ec22a22e0532fb695b9215c9b34d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 14 Feb 2017 00:11:04 -0800 Subject: x86/process: Optimize TIF_NOTSC switch Provide and use a toggle helper instead of doing it with a branch. x86_64: arch/x86/kernel/process.o text data bss dec hex 3008 8577 16 11601 2d51 Before 2976 8577 16 11569 2d31 After i386: arch/x86/kernel/process.o text data bss dec hex 2925 8673 8 11606 2d56 Before 2893 8673 8 11574 2d36 After Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Andy Lutomirski Link: http://lkml.kernel.org/r/20170214081104.9244-4-khuey@kylehuey.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process.c | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) (limited to 'arch/x86/kernel/process.c') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 83fa3cb4f8f0..366db7782fc6 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -124,11 +124,6 @@ void flush_thread(void) fpu__clear(&tsk->thread.fpu); } -static void hard_disable_TSC(void) -{ - cr4_set_bits(X86_CR4_TSD); -} - void disable_TSC(void) { preempt_disable(); @@ -137,15 +132,10 @@ void disable_TSC(void) * Must flip the CPU state synchronously with * TIF_NOTSC in the current running context. */ - hard_disable_TSC(); + cr4_set_bits(X86_CR4_TSD); preempt_enable(); } -static void hard_enable_TSC(void) -{ - cr4_clear_bits(X86_CR4_TSD); -} - static void enable_TSC(void) { preempt_disable(); @@ -154,7 +144,7 @@ static void enable_TSC(void) * Must flip the CPU state synchronously with * TIF_NOTSC in the current running context. */ - hard_enable_TSC(); + cr4_clear_bits(X86_CR4_TSD); preempt_enable(); } @@ -233,12 +223,8 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); } - if ((tifp ^ tifn) & _TIF_NOTSC) { - if (tifn & _TIF_NOTSC) - hard_disable_TSC(); - else - hard_enable_TSC(); - } + if ((tifp ^ tifn) & _TIF_NOTSC) + cr4_toggle_bits(X86_CR4_TSD); } /* -- cgit v1.2.3 From b0b9b014016d16ca7a192da986aa8ebae21bb995 Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Mon, 20 Mar 2017 01:16:23 -0700 Subject: x86/arch_prctl: Add do_arch_prctl_common() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add do_arch_prctl_common() to handle arch_prctls that are not specific to 64 bit mode. Call it from the syscall entry point, but not any of the other callsites in the kernel, which all want one of the existing 64 bit only arch_prctls. Signed-off-by: Kyle Huey Cc: Grzegorz Andrejczuk Cc: kvm@vger.kernel.org Cc: Radim Krčmář Cc: Peter Zijlstra Cc: Dave Hansen Cc: Andi Kleen Cc: linux-kselftest@vger.kernel.org Cc: Nadav Amit Cc: Robert O'Callahan Cc: Richard Weinberger Cc: "Rafael J. Wysocki" Cc: Borislav Petkov Cc: Andy Lutomirski Cc: Len Brown Cc: Shuah Khan Cc: user-mode-linux-devel@lists.sourceforge.net Cc: Jeff Dike Cc: Alexander Viro Cc: user-mode-linux-user@lists.sourceforge.net Cc: David Matlack Cc: Boris Ostrovsky Cc: Dmitry Safonov Cc: linux-fsdevel@vger.kernel.org Cc: Paolo Bonzini Link: http://lkml.kernel.org/r/20170320081628.18952-6-khuey@kylehuey.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/kernel/process.c') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 366db7782fc6..b12e95eceb83 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -545,3 +545,9 @@ out: put_task_stack(p); return ret; } + +long do_arch_prctl_common(struct task_struct *task, int option, + unsigned long cpuid_enabled) +{ + return -EINVAL; +} -- cgit v1.2.3 From e9ea1e7f53b852147cbd568b0568c7ad97ec21a3 Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Mon, 20 Mar 2017 01:16:26 -0700 Subject: x86/arch_prctl: Add ARCH_[GET|SET]_CPUID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Intel supports faulting on the CPUID instruction beginning with Ivy Bridge. When enabled, the processor will fault on attempts to execute the CPUID instruction with CPL>0. Exposing this feature to userspace will allow a ptracer to trap and emulate the CPUID instruction. When supported, this feature is controlled by toggling bit 0 of MSR_MISC_FEATURES_ENABLES. It is documented in detail in Section 2.3.2 of https://bugzilla.kernel.org/attachment.cgi?id=243991 Implement a new pair of arch_prctls, available on both x86-32 and x86-64. ARCH_GET_CPUID: Returns the current CPUID state, either 0 if CPUID faulting is enabled (and thus the CPUID instruction is not available) or 1 if CPUID faulting is not enabled. ARCH_SET_CPUID: Set the CPUID state to the second argument. If cpuid_enabled is 0 CPUID faulting will be activated, otherwise it will be deactivated. Returns ENODEV if CPUID faulting is not supported on this system. The state of the CPUID faulting flag is propagated across forks, but reset upon exec. Signed-off-by: Kyle Huey Cc: Grzegorz Andrejczuk Cc: kvm@vger.kernel.org Cc: Radim Krčmář Cc: Peter Zijlstra Cc: Dave Hansen Cc: Andi Kleen Cc: linux-kselftest@vger.kernel.org Cc: Nadav Amit Cc: Robert O'Callahan Cc: Richard Weinberger Cc: "Rafael J. Wysocki" Cc: Borislav Petkov Cc: Andy Lutomirski Cc: Len Brown Cc: Shuah Khan Cc: user-mode-linux-devel@lists.sourceforge.net Cc: Jeff Dike Cc: Alexander Viro Cc: user-mode-linux-user@lists.sourceforge.net Cc: David Matlack Cc: Boris Ostrovsky Cc: Dmitry Safonov Cc: linux-fsdevel@vger.kernel.org Cc: Paolo Bonzini Link: http://lkml.kernel.org/r/20170320081628.18952-9-khuey@kylehuey.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/process.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) (limited to 'arch/x86/kernel/process.c') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index b12e95eceb83..0bb88428cbf2 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -37,6 +37,7 @@ #include #include #include +#include /* * per-CPU TSS segments. Threads are completely 'soft' on Linux, @@ -172,6 +173,73 @@ int set_tsc_mode(unsigned int val) return 0; } +DEFINE_PER_CPU(u64, msr_misc_features_shadow); + +static void set_cpuid_faulting(bool on) +{ + u64 msrval; + + msrval = this_cpu_read(msr_misc_features_shadow); + msrval &= ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT; + msrval |= (on << MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT); + this_cpu_write(msr_misc_features_shadow, msrval); + wrmsrl(MSR_MISC_FEATURES_ENABLES, msrval); +} + +static void disable_cpuid(void) +{ + preempt_disable(); + if (!test_and_set_thread_flag(TIF_NOCPUID)) { + /* + * Must flip the CPU state synchronously with + * TIF_NOCPUID in the current running context. + */ + set_cpuid_faulting(true); + } + preempt_enable(); +} + +static void enable_cpuid(void) +{ + preempt_disable(); + if (test_and_clear_thread_flag(TIF_NOCPUID)) { + /* + * Must flip the CPU state synchronously with + * TIF_NOCPUID in the current running context. + */ + set_cpuid_faulting(false); + } + preempt_enable(); +} + +static int get_cpuid_mode(void) +{ + return !test_thread_flag(TIF_NOCPUID); +} + +static int set_cpuid_mode(struct task_struct *task, unsigned long cpuid_enabled) +{ + if (!static_cpu_has(X86_FEATURE_CPUID_FAULT)) + return -ENODEV; + + if (cpuid_enabled) + enable_cpuid(); + else + disable_cpuid(); + + return 0; +} + +/* + * Called immediately after a successful exec. + */ +void arch_setup_new_exec(void) +{ + /* If cpuid was previously disabled for this task, re-enable it. */ + if (test_thread_flag(TIF_NOCPUID)) + enable_cpuid(); +} + static inline void switch_to_bitmap(struct tss_struct *tss, struct thread_struct *prev, struct thread_struct *next, @@ -225,6 +293,9 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, if ((tifp ^ tifn) & _TIF_NOTSC) cr4_toggle_bits(X86_CR4_TSD); + + if ((tifp ^ tifn) & _TIF_NOCPUID) + set_cpuid_faulting(!!(tifn & _TIF_NOCPUID)); } /* @@ -549,5 +620,12 @@ out: long do_arch_prctl_common(struct task_struct *task, int option, unsigned long cpuid_enabled) { + switch (option) { + case ARCH_GET_CPUID: + return get_cpuid_mode(); + case ARCH_SET_CPUID: + return set_cpuid_mode(task, cpuid_enabled); + } + return -EINVAL; } -- cgit v1.2.3