diff options
author | Michael Ellerman <mpe@ellerman.id.au> | 2017-07-31 20:20:29 +1000 |
---|---|---|
committer | Michael Ellerman <mpe@ellerman.id.au> | 2017-07-31 20:20:29 +1000 |
commit | bb272221e9db79f13d454e1f3fb6b05013be985e (patch) | |
tree | 36f4acc50e3fabac71fadd34c720c0a6011db470 /arch/powerpc/kernel | |
parent | 253fd51e2f533552ae35a0c661705da6c4842c1b (diff) | |
parent | 5771a8c08880cdca3bfb4a3fc6d309d6bba20877 (diff) | |
download | linux-bb272221e9db79f13d454e1f3fb6b05013be985e.tar.gz linux-bb272221e9db79f13d454e1f3fb6b05013be985e.tar.bz2 linux-bb272221e9db79f13d454e1f3fb6b05013be985e.zip |
Merge tag 'v4.13-rc1' into fixes
The fixes branch is based off a random pre-rc1 commit, because we had
some fixes that needed to go in before rc1 was released.
However we now need to fix some code that went in after that point, but
before rc1, so merge rc1 to get that code into fixes so we can fix it!
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r-- | arch/powerpc/kernel/Makefile | 1 | ||||
-rw-r--r-- | arch/powerpc/kernel/exceptions-64s.S | 30 | ||||
-rw-r--r-- | arch/powerpc/kernel/fadump.c | 3 | ||||
-rw-r--r-- | arch/powerpc/kernel/kvm.c | 7 | ||||
-rw-r--r-- | arch/powerpc/kernel/prom_init.c | 3 | ||||
-rw-r--r-- | arch/powerpc/kernel/setup_64.c | 19 | ||||
-rw-r--r-- | arch/powerpc/kernel/smp.c | 20 | ||||
-rw-r--r-- | arch/powerpc/kernel/watchdog.c | 386 |
8 files changed, 445 insertions, 24 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 0845eebc5af3..4aa7c147e447 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -38,6 +38,7 @@ obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \ signal_64.o ptrace32.o \ paca.o nvram_64.o firmware.o obj-$(CONFIG_VDSO32) += vdso32/ +obj-$(CONFIG_HARDLOCKUP_DETECTOR) += watchdog.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 124091d306ff..9029afd1fa2a 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1314,6 +1314,31 @@ EXC_REAL_NONE(0x1800, 0x100) EXC_VIRT_NONE(0x5800, 0x100) #endif +#if defined(CONFIG_HARDLOCKUP_DETECTOR) && defined(CONFIG_HAVE_HARDLOCKUP_DETECTOR_ARCH) + +#define MASKED_DEC_HANDLER_LABEL 3f + +#define MASKED_DEC_HANDLER(_H) \ +3: /* soft-nmi */ \ + std r12,PACA_EXGEN+EX_R12(r13); \ + GET_SCRATCH0(r10); \ + std r10,PACA_EXGEN+EX_R13(r13); \ + EXCEPTION_PROLOG_PSERIES_1(soft_nmi_common, _H) + +EXC_COMMON_BEGIN(soft_nmi_common) + mr r10,r1 + ld r1,PACAEMERGSP(r13) + ld r1,PACA_NMI_EMERG_SP(r13) + subi r1,r1,INT_FRAME_SIZE + EXCEPTION_COMMON_NORET_STACK(PACA_EXGEN, 0x900, + system_reset, soft_nmi_interrupt, + ADD_NVGPRS;ADD_RECONCILE) + b ret_from_except + +#else +#define MASKED_DEC_HANDLER_LABEL 2f /* normal return */ +#define MASKED_DEC_HANDLER(_H) +#endif /* * An interrupt came in while soft-disabled. We set paca->irq_happened, then: @@ -1336,7 +1361,7 @@ masked_##_H##interrupt: \ lis r10,0x7fff; \ ori r10,r10,0xffff; \ mtspr SPRN_DEC,r10; \ - b 2f; \ + b MASKED_DEC_HANDLER_LABEL; \ 1: cmpwi r10,PACA_IRQ_DBELL; \ beq 2f; \ cmpwi r10,PACA_IRQ_HMI; \ @@ -1351,7 +1376,8 @@ masked_##_H##interrupt: \ ld r11,PACA_EXGEN+EX_R11(r13); \ GET_SCRATCH0(r13); \ ##_H##rfid; \ - b . + b .; \ + MASKED_DEC_HANDLER(_H) /* * Real mode exceptions actually use this too, but alternate diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 3079518f2245..dc0c49cfd90a 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -999,8 +999,7 @@ static int fadump_create_elfcore_headers(char *bufp) phdr->p_paddr = fadump_relocate(paddr_vmcoreinfo_note()); phdr->p_offset = phdr->p_paddr; - phdr->p_memsz = vmcoreinfo_max_size; - phdr->p_filesz = vmcoreinfo_max_size; + phdr->p_memsz = phdr->p_filesz = VMCOREINFO_NOTE_SIZE; /* Increment number of program headers. */ (elf->e_phnum)++; diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 9ad37f827a97..1086ea37c832 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -25,6 +25,7 @@ #include <linux/kvm_para.h> #include <linux/slab.h> #include <linux/of.h> +#include <linux/nmi.h> /* hardlockup_detector_disable() */ #include <asm/reg.h> #include <asm/sections.h> @@ -718,6 +719,12 @@ static __init void kvm_free_tmp(void) static int __init kvm_guest_init(void) { + /* + * The hardlockup detector is likely to get false positives in + * KVM guests, so disable it by default. + */ + hardlockup_detector_disable(); + if (!kvm_para_available()) goto free_tmp; diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index dd8a04f3053a..613f79f03877 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -15,6 +15,9 @@ #undef DEBUG_PROM +/* we cannot use FORTIFY as it brings in new symbols */ +#define __NO_FORTIFY + #include <stdarg.h> #include <linux/kernel.h> #include <linux/string.h> diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 4640f6d64f8b..af23d4b576ec 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -751,22 +751,3 @@ unsigned long memory_block_size_bytes(void) struct ppc_pci_io ppc_pci_io; EXPORT_SYMBOL(ppc_pci_io); #endif - -#ifdef CONFIG_HARDLOCKUP_DETECTOR -u64 hw_nmi_get_sample_period(int watchdog_thresh) -{ - return ppc_proc_freq * watchdog_thresh; -} - -/* - * The hardlockup detector breaks PMU event based branches and is likely - * to get false positives in KVM guests, so disable it by default. - */ -static int __init disable_hardlockup_detector(void) -{ - hardlockup_detector_disable(); - - return 0; -} -early_initcall(disable_hardlockup_detector); -#endif diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index b0ea6d4d4853..cf0e1245b8cc 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -435,13 +435,31 @@ static void do_smp_send_nmi_ipi(int cpu) } } +void smp_flush_nmi_ipi(u64 delay_us) +{ + unsigned long flags; + + nmi_ipi_lock_start(&flags); + while (nmi_ipi_busy_count) { + nmi_ipi_unlock_end(&flags); + udelay(1); + if (delay_us) { + delay_us--; + if (!delay_us) + return; + } + nmi_ipi_lock_start(&flags); + } + nmi_ipi_unlock_end(&flags); +} + /* * - cpu is the target CPU (must not be this CPU), or NMI_IPI_ALL_OTHERS. * - fn is the target callback function. * - delay_us > 0 is the delay before giving up waiting for targets to * enter the handler, == 0 specifies indefinite delay. */ -static int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us) +int smp_send_nmi_ipi(int cpu, void (*fn)(struct pt_regs *), u64 delay_us) { unsigned long flags; int me = raw_smp_processor_id(); diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c new file mode 100644 index 000000000000..b67f8b03a32d --- /dev/null +++ b/arch/powerpc/kernel/watchdog.c @@ -0,0 +1,386 @@ +/* + * Watchdog support on powerpc systems. + * + * Copyright 2017, IBM Corporation. + * + * This uses code from arch/sparc/kernel/nmi.c and kernel/watchdog.c + */ +#include <linux/kernel.h> +#include <linux/param.h> +#include <linux/init.h> +#include <linux/percpu.h> +#include <linux/cpu.h> +#include <linux/nmi.h> +#include <linux/module.h> +#include <linux/export.h> +#include <linux/kprobes.h> +#include <linux/hardirq.h> +#include <linux/reboot.h> +#include <linux/slab.h> +#include <linux/kdebug.h> +#include <linux/sched/debug.h> +#include <linux/delay.h> +#include <linux/smp.h> + +#include <asm/paca.h> + +/* + * The watchdog has a simple timer that runs on each CPU, once per timer + * period. This is the heartbeat. + * + * Then there are checks to see if the heartbeat has not triggered on a CPU + * for the panic timeout period. Currently the watchdog only supports an + * SMP check, so the heartbeat only turns on when we have 2 or more CPUs. + * + * This is not an NMI watchdog, but Linux uses that name for a generic + * watchdog in some cases, so NMI gets used in some places. + */ + +static cpumask_t wd_cpus_enabled __read_mostly; + +static u64 wd_panic_timeout_tb __read_mostly; /* timebase ticks until panic */ +static u64 wd_smp_panic_timeout_tb __read_mostly; /* panic other CPUs */ + +static u64 wd_timer_period_ms __read_mostly; /* interval between heartbeat */ + +static DEFINE_PER_CPU(struct timer_list, wd_timer); +static DEFINE_PER_CPU(u64, wd_timer_tb); + +/* + * These are for the SMP checker. CPUs clear their pending bit in their + * heartbeat. If the bitmask becomes empty, the time is noted and the + * bitmask is refilled. + * + * All CPUs clear their bit in the pending mask every timer period. + * Once all have cleared, the time is noted and the bits are reset. + * If the time since all clear was greater than the panic timeout, + * we can panic with the list of stuck CPUs. + * + * This will work best with NMI IPIs for crash code so the stuck CPUs + * can be pulled out to get their backtraces. + */ +static unsigned long __wd_smp_lock; +static cpumask_t wd_smp_cpus_pending; +static cpumask_t wd_smp_cpus_stuck; +static u64 wd_smp_last_reset_tb; + +static inline void wd_smp_lock(unsigned long *flags) +{ + /* + * Avoid locking layers if possible. + * This may be called from low level interrupt handlers at some + * point in future. + */ + local_irq_save(*flags); + while (unlikely(test_and_set_bit_lock(0, &__wd_smp_lock))) + cpu_relax(); +} + +static inline void wd_smp_unlock(unsigned long *flags) +{ + clear_bit_unlock(0, &__wd_smp_lock); + local_irq_restore(*flags); +} + +static void wd_lockup_ipi(struct pt_regs *regs) +{ + pr_emerg("Watchdog CPU:%d Hard LOCKUP\n", raw_smp_processor_id()); + print_modules(); + print_irqtrace_events(current); + if (regs) + show_regs(regs); + else + dump_stack(); + + if (hardlockup_panic) + nmi_panic(regs, "Hard LOCKUP"); +} + +static void set_cpu_stuck(int cpu, u64 tb) +{ + cpumask_set_cpu(cpu, &wd_smp_cpus_stuck); + cpumask_clear_cpu(cpu, &wd_smp_cpus_pending); + if (cpumask_empty(&wd_smp_cpus_pending)) { + wd_smp_last_reset_tb = tb; + cpumask_andnot(&wd_smp_cpus_pending, + &wd_cpus_enabled, + &wd_smp_cpus_stuck); + } +} + +static void watchdog_smp_panic(int cpu, u64 tb) +{ + unsigned long flags; + int c; + + wd_smp_lock(&flags); + /* Double check some things under lock */ + if ((s64)(tb - wd_smp_last_reset_tb) < (s64)wd_smp_panic_timeout_tb) + goto out; + if (cpumask_test_cpu(cpu, &wd_smp_cpus_pending)) + goto out; + if (cpumask_weight(&wd_smp_cpus_pending) == 0) + goto out; + + pr_emerg("Watchdog CPU:%d detected Hard LOCKUP other CPUS:%*pbl\n", + cpu, cpumask_pr_args(&wd_smp_cpus_pending)); + + /* + * Try to trigger the stuck CPUs. + */ + for_each_cpu(c, &wd_smp_cpus_pending) { + if (c == cpu) + continue; + smp_send_nmi_ipi(c, wd_lockup_ipi, 1000000); + } + smp_flush_nmi_ipi(1000000); + + /* Take the stuck CPU out of the watch group */ + for_each_cpu(c, &wd_smp_cpus_pending) + set_cpu_stuck(c, tb); + +out: + wd_smp_unlock(&flags); + + printk_safe_flush(); + /* + * printk_safe_flush() seems to require another print + * before anything actually goes out to console. + */ + if (sysctl_hardlockup_all_cpu_backtrace) + trigger_allbutself_cpu_backtrace(); + + if (hardlockup_panic) + nmi_panic(NULL, "Hard LOCKUP"); +} + +static void wd_smp_clear_cpu_pending(int cpu, u64 tb) +{ + if (!cpumask_test_cpu(cpu, &wd_smp_cpus_pending)) { + if (unlikely(cpumask_test_cpu(cpu, &wd_smp_cpus_stuck))) { + unsigned long flags; + + pr_emerg("Watchdog CPU:%d became unstuck\n", cpu); + wd_smp_lock(&flags); + cpumask_clear_cpu(cpu, &wd_smp_cpus_stuck); + wd_smp_unlock(&flags); + } + return; + } + cpumask_clear_cpu(cpu, &wd_smp_cpus_pending); + if (cpumask_empty(&wd_smp_cpus_pending)) { + unsigned long flags; + + wd_smp_lock(&flags); + if (cpumask_empty(&wd_smp_cpus_pending)) { + wd_smp_last_reset_tb = tb; + cpumask_andnot(&wd_smp_cpus_pending, + &wd_cpus_enabled, + &wd_smp_cpus_stuck); + } + wd_smp_unlock(&flags); + } +} + +static void watchdog_timer_interrupt(int cpu) +{ + u64 tb = get_tb(); + + per_cpu(wd_timer_tb, cpu) = tb; + + wd_smp_clear_cpu_pending(cpu, tb); + + if ((s64)(tb - wd_smp_last_reset_tb) >= (s64)wd_smp_panic_timeout_tb) + watchdog_smp_panic(cpu, tb); +} + +void soft_nmi_interrupt(struct pt_regs *regs) +{ + unsigned long flags; + int cpu = raw_smp_processor_id(); + u64 tb; + + if (!cpumask_test_cpu(cpu, &wd_cpus_enabled)) + return; + + nmi_enter(); + tb = get_tb(); + if (tb - per_cpu(wd_timer_tb, cpu) >= wd_panic_timeout_tb) { + per_cpu(wd_timer_tb, cpu) = tb; + + wd_smp_lock(&flags); + if (cpumask_test_cpu(cpu, &wd_smp_cpus_stuck)) { + wd_smp_unlock(&flags); + goto out; + } + set_cpu_stuck(cpu, tb); + + pr_emerg("Watchdog CPU:%d Hard LOCKUP\n", cpu); + print_modules(); + print_irqtrace_events(current); + if (regs) + show_regs(regs); + else + dump_stack(); + + wd_smp_unlock(&flags); + + if (sysctl_hardlockup_all_cpu_backtrace) + trigger_allbutself_cpu_backtrace(); + + if (hardlockup_panic) + nmi_panic(regs, "Hard LOCKUP"); + } + if (wd_panic_timeout_tb < 0x7fffffff) + mtspr(SPRN_DEC, wd_panic_timeout_tb); + +out: + nmi_exit(); +} + +static void wd_timer_reset(unsigned int cpu, struct timer_list *t) +{ + t->expires = jiffies + msecs_to_jiffies(wd_timer_period_ms); + if (wd_timer_period_ms > 1000) + t->expires = __round_jiffies_up(t->expires, cpu); + add_timer_on(t, cpu); +} + +static void wd_timer_fn(unsigned long data) +{ + struct timer_list *t = this_cpu_ptr(&wd_timer); + int cpu = smp_processor_id(); + + watchdog_timer_interrupt(cpu); + + wd_timer_reset(cpu, t); +} + +void arch_touch_nmi_watchdog(void) +{ + int cpu = smp_processor_id(); + + watchdog_timer_interrupt(cpu); +} +EXPORT_SYMBOL(arch_touch_nmi_watchdog); + +static void start_watchdog_timer_on(unsigned int cpu) +{ + struct timer_list *t = per_cpu_ptr(&wd_timer, cpu); + + per_cpu(wd_timer_tb, cpu) = get_tb(); + + setup_pinned_timer(t, wd_timer_fn, 0); + wd_timer_reset(cpu, t); +} + +static void stop_watchdog_timer_on(unsigned int cpu) +{ + struct timer_list *t = per_cpu_ptr(&wd_timer, cpu); + + del_timer_sync(t); +} + +static int start_wd_on_cpu(unsigned int cpu) +{ + if (cpumask_test_cpu(cpu, &wd_cpus_enabled)) { + WARN_ON(1); + return 0; + } + + if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED)) + return 0; + + if (watchdog_suspended) + return 0; + + if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) + return 0; + + cpumask_set_cpu(cpu, &wd_cpus_enabled); + if (cpumask_weight(&wd_cpus_enabled) == 1) { + cpumask_set_cpu(cpu, &wd_smp_cpus_pending); + wd_smp_last_reset_tb = get_tb(); + } + smp_wmb(); + start_watchdog_timer_on(cpu); + + return 0; +} + +static int stop_wd_on_cpu(unsigned int cpu) +{ + if (!cpumask_test_cpu(cpu, &wd_cpus_enabled)) + return 0; /* Can happen in CPU unplug case */ + + stop_watchdog_timer_on(cpu); + + cpumask_clear_cpu(cpu, &wd_cpus_enabled); + wd_smp_clear_cpu_pending(cpu, get_tb()); + + return 0; +} + +static void watchdog_calc_timeouts(void) +{ + wd_panic_timeout_tb = watchdog_thresh * ppc_tb_freq; + + /* Have the SMP detector trigger a bit later */ + wd_smp_panic_timeout_tb = wd_panic_timeout_tb * 3 / 2; + + /* 2/5 is the factor that the perf based detector uses */ + wd_timer_period_ms = watchdog_thresh * 1000 * 2 / 5; +} + +void watchdog_nmi_reconfigure(void) +{ + int cpu; + + watchdog_calc_timeouts(); + + for_each_cpu(cpu, &wd_cpus_enabled) + stop_wd_on_cpu(cpu); + + for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask) + start_wd_on_cpu(cpu); +} + +/* + * This runs after lockup_detector_init() which sets up watchdog_cpumask. + */ +static int __init powerpc_watchdog_init(void) +{ + int err; + + watchdog_calc_timeouts(); + + err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/watchdog:online", + start_wd_on_cpu, stop_wd_on_cpu); + if (err < 0) + pr_warn("Watchdog could not be initialized"); + + return 0; +} +arch_initcall(powerpc_watchdog_init); + +static void handle_backtrace_ipi(struct pt_regs *regs) +{ + nmi_cpu_backtrace(regs); +} + +static void raise_backtrace_ipi(cpumask_t *mask) +{ + unsigned int cpu; + + for_each_cpu(cpu, mask) { + if (cpu == smp_processor_id()) + handle_backtrace_ipi(NULL); + else + smp_send_nmi_ipi(cpu, handle_backtrace_ipi, 1000000); + } +} + +void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) +{ + nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace_ipi); +} |