summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorMichael Ellerman <mpe@ellerman.id.au>2017-07-31 20:20:29 +1000
committerMichael Ellerman <mpe@ellerman.id.au>2017-07-31 20:20:29 +1000
commitbb272221e9db79f13d454e1f3fb6b05013be985e (patch)
tree36f4acc50e3fabac71fadd34c720c0a6011db470 /kernel
parent253fd51e2f533552ae35a0c661705da6c4842c1b (diff)
parent5771a8c08880cdca3bfb4a3fc6d309d6bba20877 (diff)
downloadlinux-bb272221e9db79f13d454e1f3fb6b05013be985e.tar.gz
linux-bb272221e9db79f13d454e1f3fb6b05013be985e.tar.bz2
linux-bb272221e9db79f13d454e1f3fb6b05013be985e.zip
Merge tag 'v4.13-rc1' into fixes
The fixes branch is based off a random pre-rc1 commit, because we had some fixes that needed to go in before rc1 was released. However we now need to fix some code that went in after that point, but before rc1, so merge rc1 to get that code into fixes so we can fix it!
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/bpf/inode.c16
-rw-r--r--kernel/crash_core.c44
-rw-r--r--kernel/exit.c4
-rw-r--r--kernel/extable.c3
-rw-r--r--kernel/fork.c22
-rw-r--r--kernel/groups.c35
-rw-r--r--kernel/kallsyms.c10
-rw-r--r--kernel/kcmp.c57
-rw-r--r--kernel/kexec.c8
-rw-r--r--kernel/kexec_core.c39
-rw-r--r--kernel/kexec_file.c15
-rw-r--r--kernel/kexec_internal.h2
-rw-r--r--kernel/kmod.c56
-rw-r--r--kernel/ksysfs.c4
-rw-r--r--kernel/module.c87
-rw-r--r--kernel/power/main.c2
-rw-r--r--kernel/sched/cpufreq_schedutil.c5
-rw-r--r--kernel/signal.c4
-rw-r--r--kernel/sys.c8
-rw-r--r--kernel/sysctl.c335
-rw-r--r--kernel/sysctl_binary.c2
-rw-r--r--kernel/trace/ftrace.c6
-rw-r--r--kernel/trace/trace.c142
-rw-r--r--kernel/trace/trace_kprobe.c9
-rw-r--r--kernel/trace/trace_stack.c6
-rw-r--r--kernel/watchdog.c289
-rw-r--r--kernel/watchdog_hld.c37
28 files changed, 925 insertions, 324 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 72aa080f91f0..4cb8e8b23c6e 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -82,7 +82,7 @@ obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_KGDB) += debug/
obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o
-obj-$(CONFIG_HARDLOCKUP_DETECTOR) += watchdog_hld.o
+obj-$(CONFIG_HARDLOCKUP_DETECTOR_PERF) += watchdog_hld.o
obj-$(CONFIG_SECCOMP) += seccomp.o
obj-$(CONFIG_RELAY) += relay.o
obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 9bbd33497d3d..e833ed914358 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -377,10 +377,22 @@ static void bpf_evict_inode(struct inode *inode)
bpf_any_put(inode->i_private, type);
}
+/*
+ * Display the mount options in /proc/mounts.
+ */
+static int bpf_show_options(struct seq_file *m, struct dentry *root)
+{
+ umode_t mode = d_inode(root)->i_mode & S_IALLUGO & ~S_ISVTX;
+
+ if (mode != S_IRWXUGO)
+ seq_printf(m, ",mode=%o", mode);
+ return 0;
+}
+
static const struct super_operations bpf_super_ops = {
.statfs = simple_statfs,
.drop_inode = generic_delete_inode,
- .show_options = generic_show_options,
+ .show_options = bpf_show_options,
.evict_inode = bpf_evict_inode,
};
@@ -434,8 +446,6 @@ static int bpf_fill_super(struct super_block *sb, void *data, int silent)
struct inode *inode;
int ret;
- save_mount_options(sb, data);
-
ret = bpf_parse_options(data, &opts);
if (ret)
return ret;
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index fcbd568f1e95..6db80fc0810b 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -14,10 +14,12 @@
#include <asm/sections.h>
/* vmcoreinfo stuff */
-static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
-u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
-size_t vmcoreinfo_size;
-size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
+static unsigned char *vmcoreinfo_data;
+static size_t vmcoreinfo_size;
+u32 *vmcoreinfo_note;
+
+/* trusted vmcoreinfo, e.g. we can make a copy in the crash memory */
+static unsigned char *vmcoreinfo_data_safecopy;
/*
* parsing the "crashkernel" commandline
@@ -324,8 +326,23 @@ static void update_vmcoreinfo_note(void)
final_note(buf);
}
+void crash_update_vmcoreinfo_safecopy(void *ptr)
+{
+ if (ptr)
+ memcpy(ptr, vmcoreinfo_data, vmcoreinfo_size);
+
+ vmcoreinfo_data_safecopy = ptr;
+}
+
void crash_save_vmcoreinfo(void)
{
+ if (!vmcoreinfo_note)
+ return;
+
+ /* Use the safe copy to generate vmcoreinfo note if have */
+ if (vmcoreinfo_data_safecopy)
+ vmcoreinfo_data = vmcoreinfo_data_safecopy;
+
vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
update_vmcoreinfo_note();
}
@@ -340,7 +357,7 @@ void vmcoreinfo_append_str(const char *fmt, ...)
r = vscnprintf(buf, sizeof(buf), fmt, args);
va_end(args);
- r = min(r, vmcoreinfo_max_size - vmcoreinfo_size);
+ r = min(r, (size_t)VMCOREINFO_BYTES - vmcoreinfo_size);
memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r);
@@ -356,11 +373,26 @@ void __weak arch_crash_save_vmcoreinfo(void)
phys_addr_t __weak paddr_vmcoreinfo_note(void)
{
- return __pa_symbol((unsigned long)(char *)&vmcoreinfo_note);
+ return __pa(vmcoreinfo_note);
}
static int __init crash_save_vmcoreinfo_init(void)
{
+ vmcoreinfo_data = (unsigned char *)get_zeroed_page(GFP_KERNEL);
+ if (!vmcoreinfo_data) {
+ pr_warn("Memory allocation for vmcoreinfo_data failed\n");
+ return -ENOMEM;
+ }
+
+ vmcoreinfo_note = alloc_pages_exact(VMCOREINFO_NOTE_SIZE,
+ GFP_KERNEL | __GFP_ZERO);
+ if (!vmcoreinfo_note) {
+ free_page((unsigned long)vmcoreinfo_data);
+ vmcoreinfo_data = NULL;
+ pr_warn("Memory allocation for vmcoreinfo_note failed\n");
+ return -ENOMEM;
+ }
+
VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
VMCOREINFO_PAGESIZE(PAGE_SIZE);
diff --git a/kernel/exit.c b/kernel/exit.c
index 608c9775a37b..c5548faa9f37 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1639,6 +1639,10 @@ long kernel_wait4(pid_t upid, int __user *stat_addr, int options,
__WNOTHREAD|__WCLONE|__WALL))
return -EINVAL;
+ /* -INT_MIN is not defined */
+ if (upid == INT_MIN)
+ return -ESRCH;
+
if (upid == -1)
type = PIDTYPE_MAX;
else if (upid < 0) {
diff --git a/kernel/extable.c b/kernel/extable.c
index 223df4a328a4..38c2412401a1 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -55,7 +55,8 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr)
{
const struct exception_table_entry *e;
- e = search_extable(__start___ex_table, __stop___ex_table-1, addr);
+ e = search_extable(__start___ex_table,
+ __stop___ex_table - __start___ex_table, addr);
if (!e)
e = search_module_extables(addr);
return e;
diff --git a/kernel/fork.c b/kernel/fork.c
index 0f69a3e5281e..17921b0390b4 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -205,19 +205,17 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
void *stack;
int i;
- local_irq_disable();
for (i = 0; i < NR_CACHED_STACKS; i++) {
- struct vm_struct *s = this_cpu_read(cached_stacks[i]);
+ struct vm_struct *s;
+
+ s = this_cpu_xchg(cached_stacks[i], NULL);
if (!s)
continue;
- this_cpu_write(cached_stacks[i], NULL);
tsk->stack_vm_area = s;
- local_irq_enable();
return s->addr;
}
- local_irq_enable();
stack = __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE,
VMALLOC_START, VMALLOC_END,
@@ -245,19 +243,15 @@ static inline void free_thread_stack(struct task_struct *tsk)
{
#ifdef CONFIG_VMAP_STACK
if (task_stack_vm_area(tsk)) {
- unsigned long flags;
int i;
- local_irq_save(flags);
for (i = 0; i < NR_CACHED_STACKS; i++) {
- if (this_cpu_read(cached_stacks[i]))
+ if (this_cpu_cmpxchg(cached_stacks[i],
+ NULL, tsk->stack_vm_area) != NULL)
continue;
- this_cpu_write(cached_stacks[i], tsk->stack_vm_area);
- local_irq_restore(flags);
return;
}
- local_irq_restore(flags);
vfree_atomic(tsk->stack);
return;
@@ -560,7 +554,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
set_task_stack_end_magic(tsk);
#ifdef CONFIG_CC_STACKPROTECTOR
- tsk->stack_canary = get_random_long();
+ tsk->stack_canary = get_random_canary();
#endif
/*
@@ -579,6 +573,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
kcov_task_init(tsk);
+#ifdef CONFIG_FAULT_INJECTION
+ tsk->fail_nth = 0;
+#endif
+
return tsk;
free_stack:
diff --git a/kernel/groups.c b/kernel/groups.c
index d09727692a2a..434f6665f187 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -5,6 +5,7 @@
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/security.h>
+#include <linux/sort.h>
#include <linux/syscalls.h>
#include <linux/user_namespace.h>
#include <linux/vmalloc.h>
@@ -76,32 +77,18 @@ static int groups_from_user(struct group_info *group_info,
return 0;
}
-/* a simple Shell sort */
+static int gid_cmp(const void *_a, const void *_b)
+{
+ kgid_t a = *(kgid_t *)_a;
+ kgid_t b = *(kgid_t *)_b;
+
+ return gid_gt(a, b) - gid_lt(a, b);
+}
+
static void groups_sort(struct group_info *group_info)
{
- int base, max, stride;
- int gidsetsize = group_info->ngroups;
-
- for (stride = 1; stride < gidsetsize; stride = 3 * stride + 1)
- ; /* nothing */
- stride /= 3;
-
- while (stride) {
- max = gidsetsize - stride;
- for (base = 0; base < max; base++) {
- int left = base;
- int right = left + stride;
- kgid_t tmp = group_info->gid[right];
-
- while (left >= 0 && gid_gt(group_info->gid[left], tmp)) {
- group_info->gid[right] = group_info->gid[left];
- right = left;
- left -= stride;
- }
- group_info->gid[right] = tmp;
- }
- stride /= 3;
- }
+ sort(group_info->gid, group_info->ngroups, sizeof(*group_info->gid),
+ gid_cmp, NULL);
}
/* a simple bsearch */
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 6a3b249a2ae1..127e7cfafa55 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -28,12 +28,6 @@
#include <asm/sections.h>
-#ifdef CONFIG_KALLSYMS_ALL
-#define all_var 1
-#else
-#define all_var 0
-#endif
-
/*
* These will be re-linked against their real values
* during the second link stage.
@@ -82,7 +76,7 @@ static inline int is_kernel(unsigned long addr)
static int is_ksym_addr(unsigned long addr)
{
- if (all_var)
+ if (IS_ENABLED(CONFIG_KALLSYMS_ALL))
return is_kernel(addr);
return is_kernel_text(addr) || is_kernel_inittext(addr);
@@ -280,7 +274,7 @@ static unsigned long get_symbol_pos(unsigned long addr,
if (!symbol_end) {
if (is_kernel_inittext(addr))
symbol_end = (unsigned long)_einittext;
- else if (all_var)
+ else if (IS_ENABLED(CONFIG_KALLSYMS_ALL))
symbol_end = (unsigned long)_end;
else
symbol_end = (unsigned long)_etext;
diff --git a/kernel/kcmp.c b/kernel/kcmp.c
index 3a47fa998fe0..ea34ed8bb952 100644
--- a/kernel/kcmp.c
+++ b/kernel/kcmp.c
@@ -11,6 +11,10 @@
#include <linux/bug.h>
#include <linux/err.h>
#include <linux/kcmp.h>
+#include <linux/capability.h>
+#include <linux/list.h>
+#include <linux/eventpoll.h>
+#include <linux/file.h>
#include <asm/unistd.h>
@@ -94,6 +98,56 @@ static int kcmp_lock(struct mutex *m1, struct mutex *m2)
return err;
}
+#ifdef CONFIG_EPOLL
+static int kcmp_epoll_target(struct task_struct *task1,
+ struct task_struct *task2,
+ unsigned long idx1,
+ struct kcmp_epoll_slot __user *uslot)
+{
+ struct file *filp, *filp_epoll, *filp_tgt;
+ struct kcmp_epoll_slot slot;
+ struct files_struct *files;
+
+ if (copy_from_user(&slot, uslot, sizeof(slot)))
+ return -EFAULT;
+
+ filp = get_file_raw_ptr(task1, idx1);
+ if (!filp)
+ return -EBADF;
+
+ files = get_files_struct(task2);
+ if (!files)
+ return -EBADF;
+
+ spin_lock(&files->file_lock);
+ filp_epoll = fcheck_files(files, slot.efd);
+ if (filp_epoll)
+ get_file(filp_epoll);
+ else
+ filp_tgt = ERR_PTR(-EBADF);
+ spin_unlock(&files->file_lock);
+ put_files_struct(files);
+
+ if (filp_epoll) {
+ filp_tgt = get_epoll_tfile_raw_ptr(filp_epoll, slot.tfd, slot.toff);
+ fput(filp_epoll);
+ } else
+
+ if (IS_ERR(filp_tgt))
+ return PTR_ERR(filp_tgt);
+
+ return kcmp_ptr(filp, filp_tgt, KCMP_FILE);
+}
+#else
+static int kcmp_epoll_target(struct task_struct *task1,
+ struct task_struct *task2,
+ unsigned long idx1,
+ struct kcmp_epoll_slot __user *uslot)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type,
unsigned long, idx1, unsigned long, idx2)
{
@@ -165,6 +219,9 @@ SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type,
ret = -EOPNOTSUPP;
#endif
break;
+ case KCMP_EPOLL_TFD:
+ ret = kcmp_epoll_target(task1, task2, idx1, (void *)idx2);
+ break;
default:
ret = -EINVAL;
break;
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 980936a90ee6..e62ec4dc6620 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -144,6 +144,14 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
if (ret)
goto out;
+ /*
+ * Some architecture(like S390) may touch the crash memory before
+ * machine_kexec_prepare(), we must copy vmcoreinfo data after it.
+ */
+ ret = kimage_crash_copy_vmcoreinfo(image);
+ if (ret)
+ goto out;
+
for (i = 0; i < nr_segments; i++) {
ret = kimage_load_segment(image, &image->segment[i]);
if (ret)
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 154ffb489b93..1ae7c41c33c1 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -482,6 +482,40 @@ struct page *kimage_alloc_control_pages(struct kimage *image,
return pages;
}
+int kimage_crash_copy_vmcoreinfo(struct kimage *image)
+{
+ struct page *vmcoreinfo_page;
+ void *safecopy;
+
+ if (image->type != KEXEC_TYPE_CRASH)
+ return 0;
+
+ /*
+ * For kdump, allocate one vmcoreinfo safe copy from the
+ * crash memory. as we have arch_kexec_protect_crashkres()
+ * after kexec syscall, we naturally protect it from write
+ * (even read) access under kernel direct mapping. But on
+ * the other hand, we still need to operate it when crash
+ * happens to generate vmcoreinfo note, hereby we rely on
+ * vmap for this purpose.
+ */
+ vmcoreinfo_page = kimage_alloc_control_pages(image, 0);
+ if (!vmcoreinfo_page) {
+ pr_warn("Could not allocate vmcoreinfo buffer\n");
+ return -ENOMEM;
+ }
+ safecopy = vmap(&vmcoreinfo_page, 1, VM_MAP, PAGE_KERNEL);
+ if (!safecopy) {
+ pr_warn("Could not vmap vmcoreinfo buffer\n");
+ return -ENOMEM;
+ }
+
+ image->vmcoreinfo_data_copy = safecopy;
+ crash_update_vmcoreinfo_safecopy(safecopy);
+
+ return 0;
+}
+
static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
{
if (*image->entry != 0)
@@ -569,6 +603,11 @@ void kimage_free(struct kimage *image)
if (!image)
return;
+ if (image->vmcoreinfo_data_copy) {
+ crash_update_vmcoreinfo_safecopy(NULL);
+ vunmap(image->vmcoreinfo_data_copy);
+ }
+
kimage_free_extra_pages(image);
for_each_kimage_entry(image, ptr, entry) {
if (entry & IND_INDIRECTION) {
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 766e7e4d3ad9..9f48f4412297 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -26,13 +26,6 @@
#include <linux/vmalloc.h>
#include "kexec_internal.h"
-/*
- * Declare these symbols weak so that if architecture provides a purgatory,
- * these will be overridden.
- */
-char __weak kexec_purgatory[0];
-size_t __weak kexec_purgatory_size = 0;
-
static int kexec_calculate_store_digests(struct kimage *image);
/* Architectures can provide this probe function */
@@ -298,6 +291,14 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
if (ret)
goto out;
+ /*
+ * Some architecture(like S390) may touch the crash memory before
+ * machine_kexec_prepare(), we must copy vmcoreinfo data after it.
+ */
+ ret = kimage_crash_copy_vmcoreinfo(image);
+ if (ret)
+ goto out;
+
ret = kexec_calculate_store_digests(image);
if (ret)
goto out;
diff --git a/kernel/kexec_internal.h b/kernel/kexec_internal.h
index 799a8a452187..50dfcb039a41 100644
--- a/kernel/kexec_internal.h
+++ b/kernel/kexec_internal.h
@@ -17,6 +17,8 @@ extern struct mutex kexec_mutex;
#ifdef CONFIG_KEXEC_FILE
#include <linux/purgatory.h>
void kimage_file_post_load_cleanup(struct kimage *image);
+extern char kexec_purgatory[];
+extern size_t kexec_purgatory_size;
#else /* CONFIG_KEXEC_FILE */
static inline void kimage_file_post_load_cleanup(struct kimage *image) { }
#endif /* CONFIG_KEXEC_FILE */
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 563f97e2be36..6d016c5d97c8 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -45,8 +45,6 @@
#include <trace/events/module.h>
-extern int max_threads;
-
#define CAP_BSET (void *)1
#define CAP_PI (void *)2
@@ -56,6 +54,21 @@ static DEFINE_SPINLOCK(umh_sysctl_lock);
static DECLARE_RWSEM(umhelper_sem);
#ifdef CONFIG_MODULES
+/*
+ * Assuming:
+ *
+ * threads = div64_u64((u64) totalram_pages * (u64) PAGE_SIZE,
+ * (u64) THREAD_SIZE * 8UL);
+ *
+ * If you need less than 50 threads would mean we're dealing with systems
+ * smaller than 3200 pages. This assuems you are capable of having ~13M memory,
+ * and this would only be an be an upper limit, after which the OOM killer
+ * would take effect. Systems like these are very unlikely if modules are
+ * enabled.
+ */
+#define MAX_KMOD_CONCURRENT 50
+static atomic_t kmod_concurrent_max = ATOMIC_INIT(MAX_KMOD_CONCURRENT);
+static DECLARE_WAIT_QUEUE_HEAD(kmod_wq);
/*
modprobe_path is set via /proc/sys.
@@ -127,11 +140,7 @@ int __request_module(bool wait, const char *fmt, ...)
{
va_list args;
char module_name[MODULE_NAME_LEN];
- unsigned int max_modprobes;
int ret;
- static atomic_t kmod_concurrent = ATOMIC_INIT(0);
-#define MAX_KMOD_CONCURRENT 50 /* Completely arbitrary value - KAO */
- static int kmod_loop_msg;
/*
* We don't allow synchronous module loading from async. Module
@@ -154,40 +163,25 @@ int __request_module(bool wait, const char *fmt, ...)
if (ret)
return ret;
- /* If modprobe needs a service that is in a module, we get a recursive
- * loop. Limit the number of running kmod threads to max_threads/2 or
- * MAX_KMOD_CONCURRENT, whichever is the smaller. A cleaner method
- * would be to run the parents of this process, counting how many times
- * kmod was invoked. That would mean accessing the internals of the
- * process tables to get the command line, proc_pid_cmdline is static
- * and it is not worth changing the proc code just to handle this case.
- * KAO.
- *
- * "trace the ppid" is simple, but will fail if someone's
- * parent exits. I think this is as good as it gets. --RR
- */
- max_modprobes = min(max_threads/2, MAX_KMOD_CONCURRENT);
- atomic_inc(&kmod_concurrent);
- if (atomic_read(&kmod_concurrent) > max_modprobes) {
- /* We may be blaming an innocent here, but unlikely */
- if (kmod_loop_msg < 5) {
- printk(KERN_ERR
- "request_module: runaway loop modprobe %s\n",
- module_name);
- kmod_loop_msg++;
- }
- atomic_dec(&kmod_concurrent);
- return -ENOMEM;
+ if (atomic_dec_if_positive(&kmod_concurrent_max) < 0) {
+ pr_warn_ratelimited("request_module: kmod_concurrent_max (%u) close to 0 (max_modprobes: %u), for module %s, throttling...",
+ atomic_read(&kmod_concurrent_max),
+ MAX_KMOD_CONCURRENT, module_name);
+ wait_event_interruptible(kmod_wq,
+ atomic_dec_if_positive(&kmod_concurrent_max) >= 0);
}
trace_module_request(module_name, wait, _RET_IP_);
ret = call_modprobe(module_name, wait ? UMH_WAIT_PROC : UMH_WAIT_EXEC);
- atomic_dec(&kmod_concurrent);
+ atomic_inc(&kmod_concurrent_max);
+ wake_up(&kmod_wq);
+
return ret;
}
EXPORT_SYMBOL(__request_module);
+
#endif /* CONFIG_MODULES */
static void call_usermodehelper_freeinfo(struct subprocess_info *info)
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 23cd70651238..46ba853656f6 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -134,7 +134,7 @@ static ssize_t vmcoreinfo_show(struct kobject *kobj,
{
phys_addr_t vmcore_base = paddr_vmcoreinfo_note();
return sprintf(buf, "%pa %x\n", &vmcore_base,
- (unsigned int)sizeof(vmcoreinfo_note));
+ (unsigned int)VMCOREINFO_NOTE_SIZE);
}
KERNEL_ATTR_RO(vmcoreinfo);
@@ -234,7 +234,7 @@ static struct attribute * kernel_attrs[] = {
NULL
};
-static struct attribute_group kernel_attr_group = {
+static const struct attribute_group kernel_attr_group = {
.attrs = kernel_attrs,
};
diff --git a/kernel/module.c b/kernel/module.c
index b3dbdde82e80..40f983cbea81 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -300,6 +300,7 @@ int unregister_module_notifier(struct notifier_block *nb)
EXPORT_SYMBOL(unregister_module_notifier);
struct load_info {
+ const char *name;
Elf_Ehdr *hdr;
unsigned long len;
Elf_Shdr *sechdrs;
@@ -600,7 +601,7 @@ static struct module *find_module_all(const char *name, size_t len,
module_assert_mutex_or_preempt();
- list_for_each_entry(mod, &modules, list) {
+ list_for_each_entry_rcu(mod, &modules, list) {
if (!even_unformed && mod->state == MODULE_STATE_UNFORMED)
continue;
if (strlen(mod->name) == len && !memcmp(mod->name, name, len))
@@ -1273,12 +1274,13 @@ static u32 resolve_rel_crc(const s32 *crc)
return *(u32 *)((void *)crc + *crc);
}
-static int check_version(Elf_Shdr *sechdrs,
- unsigned int versindex,
+static int check_version(const struct load_info *info,
const char *symname,
struct module *mod,
const s32 *crc)
{
+ Elf_Shdr *sechdrs = info->sechdrs;
+ unsigned int versindex = info->index.vers;
unsigned int i, num_versions;
struct modversion_info *versions;
@@ -1312,17 +1314,16 @@ static int check_version(Elf_Shdr *sechdrs,
}
/* Broken toolchain. Warn once, then let it go.. */
- pr_warn_once("%s: no symbol version for %s\n", mod->name, symname);
+ pr_warn_once("%s: no symbol version for %s\n", info->name, symname);
return 1;
bad_version:
pr_warn("%s: disagrees about version of symbol %s\n",
- mod->name, symname);
+ info->name, symname);
return 0;
}
-static inline int check_modstruct_version(Elf_Shdr *sechdrs,
- unsigned int versindex,
+static inline int check_modstruct_version(const struct load_info *info,
struct module *mod)
{
const s32 *crc;
@@ -1338,8 +1339,8 @@ static inline int check_modstruct_version(Elf_Shdr *sechdrs,
BUG();
}
preempt_enable();
- return check_version(sechdrs, versindex,
- VMLINUX_SYMBOL_STR(module_layout), mod, crc);
+ return check_version(info, VMLINUX_SYMBOL_STR(module_layout),
+ mod, crc);
}
/* First part is kernel version, which we ignore if module has crcs. */
@@ -1353,8 +1354,7 @@ static inline int same_magic(const char *amagic, const char *bmagic,
return strcmp(amagic, bmagic) == 0;
}
#else
-static inline int check_version(Elf_Shdr *sechdrs,
- unsigned int versindex,
+static inline int check_version(const struct load_info *info,
const char *symname,
struct module *mod,
const s32 *crc)
@@ -1362,8 +1362,7 @@ static inline int check_version(Elf_Shdr *sechdrs,
return 1;
}
-static inline int check_modstruct_version(Elf_Shdr *sechdrs,
- unsigned int versindex,
+static inline int check_modstruct_version(const struct load_info *info,
struct module *mod)
{
return 1;
@@ -1399,7 +1398,7 @@ static const struct kernel_symbol *resolve_symbol(struct module *mod,
if (!sym)
goto unlock;
- if (!check_version(info->sechdrs, info->index.vers, name, mod, crc)) {
+ if (!check_version(info, name, mod, crc)) {
sym = ERR_PTR(-EINVAL);
goto getname;
}
@@ -1662,31 +1661,36 @@ static inline void remove_notes_attrs(struct module *mod)
}
#endif /* CONFIG_KALLSYMS */
-static void add_usage_links(struct module *mod)
+static void del_usage_links(struct module *mod)
{
#ifdef CONFIG_MODULE_UNLOAD
struct module_use *use;
- int nowarn;
mutex_lock(&module_mutex);
- list_for_each_entry(use, &mod->target_list, target_list) {
- nowarn = sysfs_create_link(use->target->holders_dir,
- &mod->mkobj.kobj, mod->name);
- }
+ list_for_each_entry(use, &mod->target_list, target_list)
+ sysfs_remove_link(use->target->holders_dir, mod->name);
mutex_unlock(&module_mutex);
#endif
}
-static void del_usage_links(struct module *mod)
+static int add_usage_links(struct module *mod)
{
+ int ret = 0;
#ifdef CONFIG_MODULE_UNLOAD
struct module_use *use;
mutex_lock(&module_mutex);
- list_for_each_entry(use, &mod->target_list, target_list)
- sysfs_remove_link(use->target->holders_dir, mod->name);
+ list_for_each_entry(use, &mod->target_list, target_list) {
+ ret = sysfs_create_link(use->target->holders_dir,
+ &mod->mkobj.kobj, mod->name);
+ if (ret)
+ break;
+ }
mutex_unlock(&module_mutex);
+ if (ret)
+ del_usage_links(mod);
#endif
+ return ret;
}
static int module_add_modinfo_attrs(struct module *mod)
@@ -1797,13 +1801,18 @@ static int mod_sysfs_setup(struct module *mod,
if (err)
goto out_unreg_param;
- add_usage_links(mod);
+ err = add_usage_links(mod);
+ if (err)
+ goto out_unreg_modinfo_attrs;
+
add_sect_attrs(mod, info);
add_notes_attrs(mod, info);
kobject_uevent(&mod->mkobj.kobj, KOBJ_ADD);
return 0;
+out_unreg_modinfo_attrs:
+ module_remove_modinfo_attrs(mod);
out_unreg_param:
module_param_sysfs_remove(mod);
out_unreg_holders:
@@ -2910,9 +2919,15 @@ static int rewrite_section_headers(struct load_info *info, int flags)
info->index.vers = 0; /* Pretend no __versions section! */
else
info->index.vers = find_sec(info, "__versions");
+ info->sechdrs[info->index.vers].sh_flags &= ~(unsigned long)SHF_ALLOC;
+
info->index.info = find_sec(info, ".modinfo");
+ if (!info->index.info)
+ info->name = "(missing .modinfo section)";
+ else
+ info->name = get_modinfo(info, "name");
info->sechdrs[info->index.info].sh_flags &= ~(unsigned long)SHF_ALLOC;
- info->sechdrs[info->index.vers].sh_flags &= ~(unsigned long)SHF_ALLOC;
+
return 0;
}
@@ -2952,21 +2967,29 @@ static struct module *setup_load_info(struct load_info *info, int flags)
info->index.mod = find_sec(info, ".gnu.linkonce.this_module");
if (!info->index.mod) {
- pr_warn("No module found in object\n");
+ pr_warn("%s: No module found in object\n",
+ info->name ?: "(missing .modinfo name field)");
return ERR_PTR(-ENOEXEC);
}
/* This is temporary: point mod into copy of data. */
mod = (void *)info->sechdrs[info->index.mod].sh_addr;
+ /*
+ * If we didn't load the .modinfo 'name' field, fall back to
+ * on-disk struct mod 'name' field.
+ */
+ if (!info->name)
+ info->name = mod->name;
+
if (info->index.sym == 0) {
- pr_warn("%s: module has no symbols (stripped?)\n", mod->name);
+ pr_warn("%s: module has no symbols (stripped?)\n", info->name);
return ERR_PTR(-ENOEXEC);
}
info->index.pcpu = find_pcpusec(info);
/* Check module struct version now, before we try to use module. */
- if (!check_modstruct_version(info->sechdrs, info->index.vers, mod))
+ if (!check_modstruct_version(info, mod))
return ERR_PTR(-ENOEXEC);
return mod;
@@ -2987,7 +3010,7 @@ static int check_modinfo(struct module *mod, struct load_info *info, int flags)
return err;
} else if (!same_magic(modmagic, vermagic, info->index.vers)) {
pr_err("%s: version magic '%s' should be '%s'\n",
- mod->name, modmagic, vermagic);
+ info->name, modmagic, vermagic);
return -ENOEXEC;
}
@@ -3237,7 +3260,7 @@ int __weak module_frob_arch_sections(Elf_Ehdr *hdr,
/* module_blacklist is a comma-separated list of module names */
static char *module_blacklist;
-static bool blacklisted(char *module_name)
+static bool blacklisted(const char *module_name)
{
const char *p;
size_t len;
@@ -3267,7 +3290,7 @@ static struct module *layout_and_allocate(struct load_info *info, int flags)
if (IS_ERR(mod))
return mod;
- if (blacklisted(mod->name))
+ if (blacklisted(info->name))
return ERR_PTR(-EPERM);
err = check_modinfo(mod, info, flags);
@@ -4196,7 +4219,7 @@ const struct exception_table_entry *search_module_extables(unsigned long addr)
goto out;
e = search_extable(mod->extable,
- mod->extable + mod->num_exentries - 1,
+ mod->num_exentries,
addr);
out:
preempt_enable();
diff --git a/kernel/power/main.c b/kernel/power/main.c
index d401c21136d1..42bd800a6755 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -705,7 +705,7 @@ static struct attribute * g[] = {
NULL,
};
-static struct attribute_group attr_group = {
+static const struct attribute_group attr_group = {
.attrs = g,
};
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 076a2e31951c..29a397067ffa 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -610,6 +610,11 @@ static int sugov_start(struct cpufreq_policy *policy)
sg_cpu->sg_policy = sg_policy;
sg_cpu->flags = SCHED_CPUFREQ_RT;
sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
+ }
+
+ for_each_cpu(cpu, policy->cpus) {
+ struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
+
cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
policy_is_shared(policy) ?
sugov_update_shared :
diff --git a/kernel/signal.c b/kernel/signal.c
index 48a59eefd8ad..caed9133ae52 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1402,6 +1402,10 @@ static int kill_something_info(int sig, struct siginfo *info, pid_t pid)
return ret;
}
+ /* -INT_MIN is undefined. Exclude this case to avoid a UBSAN warning */
+ if (pid == INT_MIN)
+ return -ESRCH;
+
read_lock(&tasklist_lock);
if (pid != -1) {
ret = __kill_pgrp_info(sig, info,
diff --git a/kernel/sys.c b/kernel/sys.c
index 47d901586b4e..2855ee73acd0 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1362,7 +1362,7 @@ COMPAT_SYSCALL_DEFINE2(getrlimit, unsigned int, resource,
ret = do_prlimit(current, resource, NULL, &r);
if (!ret) {
- struct rlimit r32;
+ struct compat_rlimit r32;
if (r.rlim_cur > COMPAT_RLIM_INFINITY)
r32.rlim_cur = COMPAT_RLIM_INFINITY;
else
@@ -2360,7 +2360,7 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
case PR_GET_THP_DISABLE:
if (arg2 || arg3 || arg4 || arg5)
return -EINVAL;
- error = !!(me->mm->def_flags & VM_NOHUGEPAGE);
+ error = !!test_bit(MMF_DISABLE_THP, &me->mm->flags);
break;
case PR_SET_THP_DISABLE:
if (arg3 || arg4 || arg5)
@@ -2368,9 +2368,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
if (down_write_killable(&me->mm->mmap_sem))
return -EINTR;
if (arg2)
- me->mm->def_flags |= VM_NOHUGEPAGE;
+ set_bit(MMF_DISABLE_THP, &me->mm->flags);
else
- me->mm->def_flags &= ~VM_NOHUGEPAGE;
+ clear_bit(MMF_DISABLE_THP, &me->mm->flags);
up_write(&me->mm->mmap_sem);
break;
case PR_MPX_ENABLE_MANAGEMENT:
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 4dfba1a76cc3..6648fbbb8157 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -174,11 +174,32 @@ extern int no_unaligned_warning;
#ifdef CONFIG_PROC_SYSCTL
-#define SYSCTL_WRITES_LEGACY -1
-#define SYSCTL_WRITES_WARN 0
-#define SYSCTL_WRITES_STRICT 1
+/**
+ * enum sysctl_writes_mode - supported sysctl write modes
+ *
+ * @SYSCTL_WRITES_LEGACY: each write syscall must fully contain the sysctl value
+ * to be written, and multiple writes on the same sysctl file descriptor
+ * will rewrite the sysctl value, regardless of file position. No warning
+ * is issued when the initial position is not 0.
+ * @SYSCTL_WRITES_WARN: same as above but warn when the initial file position is
+ * not 0.
+ * @SYSCTL_WRITES_STRICT: writes to numeric sysctl entries must always be at
+ * file position 0 and the value must be fully contained in the buffer
+ * sent to the write syscall. If dealing with strings respect the file
+ * position, but restrict this to the max length of the buffer, anything
+ * passed the max lenght will be ignored. Multiple writes will append
+ * to the buffer.
+ *
+ * These write modes control how current file position affects the behavior of
+ * updating sysctl values through the proc interface on each write.
+ */
+enum sysctl_writes_mode {
+ SYSCTL_WRITES_LEGACY = -1,
+ SYSCTL_WRITES_WARN = 0,
+ SYSCTL_WRITES_STRICT = 1,
+};
-static int sysctl_writes_strict = SYSCTL_WRITES_STRICT;
+static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
static int proc_do_cad_pid(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
@@ -880,6 +901,14 @@ static struct ctl_table kern_table[] = {
#endif
},
{
+ .procname = "watchdog_cpumask",
+ .data = &watchdog_cpumask_bits,
+ .maxlen = NR_CPUS,
+ .mode = 0644,
+ .proc_handler = proc_watchdog_cpumask,
+ },
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
+ {
.procname = "soft_watchdog",
.data = &soft_watchdog_enabled,
.maxlen = sizeof (int),
@@ -889,13 +918,6 @@ static struct ctl_table kern_table[] = {
.extra2 = &one,
},
{
- .procname = "watchdog_cpumask",
- .data = &watchdog_cpumask_bits,
- .maxlen = NR_CPUS,
- .mode = 0644,
- .proc_handler = proc_watchdog_cpumask,
- },
- {
.procname = "softlockup_panic",
.data = &softlockup_panic,
.maxlen = sizeof(int),
@@ -904,27 +926,29 @@ static struct ctl_table kern_table[] = {
.extra1 = &zero,
.extra2 = &one,
},
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
+#ifdef CONFIG_SMP
{
- .procname = "hardlockup_panic",
- .data = &hardlockup_panic,
+ .procname = "softlockup_all_cpu_backtrace",
+ .data = &sysctl_softlockup_all_cpu_backtrace,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &one,
},
+#endif /* CONFIG_SMP */
#endif
-#ifdef CONFIG_SMP
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
{
- .procname = "softlockup_all_cpu_backtrace",
- .data = &sysctl_softlockup_all_cpu_backtrace,
+ .procname = "hardlockup_panic",
+ .data = &hardlockup_panic,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &one,
},
+#ifdef CONFIG_SMP
{
.procname = "hardlockup_all_cpu_backtrace",
.data = &sysctl_hardlockup_all_cpu_backtrace,
@@ -936,6 +960,8 @@ static struct ctl_table kern_table[] = {
},
#endif /* CONFIG_SMP */
#endif
+#endif
+
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
{
.procname = "unknown_nmi_panic",
@@ -1950,6 +1976,32 @@ static void warn_sysctl_write(struct ctl_table *table)
}
/**
+ * proc_first_pos_non_zero_ignore - check if firs position is allowed
+ * @ppos: file position
+ * @table: the sysctl table
+ *
+ * Returns true if the first position is non-zero and the sysctl_writes_strict
+ * mode indicates this is not allowed for numeric input types. String proc
+ * hadlers can ignore the return value.
+ */
+static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
+ struct ctl_table *table)
+{
+ if (!*ppos)
+ return false;
+
+ switch (sysctl_writes_strict) {
+ case SYSCTL_WRITES_STRICT:
+ return true;
+ case SYSCTL_WRITES_WARN:
+ warn_sysctl_write(table);
+ return false;
+ default:
+ return false;
+ }
+}
+
+/**
* proc_dostring - read a string sysctl
* @table: the sysctl table
* @write: %TRUE if this is a write to the sysctl file
@@ -1969,8 +2021,8 @@ static void warn_sysctl_write(struct ctl_table *table)
int proc_dostring(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- if (write && *ppos && sysctl_writes_strict == SYSCTL_WRITES_WARN)
- warn_sysctl_write(table);
+ if (write)
+ proc_first_pos_non_zero_ignore(ppos, table);
return _proc_do_string((char *)(table->data), table->maxlen, write,
(char __user *)buffer, lenp, ppos);
@@ -2128,19 +2180,18 @@ static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
return 0;
}
-static int do_proc_douintvec_conv(bool *negp, unsigned long *lvalp,
- int *valp,
- int write, void *data)
+static int do_proc_douintvec_conv(unsigned long *lvalp,
+ unsigned int *valp,
+ int write, void *data)
{
if (write) {
- if (*negp)
+ if (*lvalp > UINT_MAX)
return -EINVAL;
if (*lvalp > UINT_MAX)
return -EINVAL;
*valp = *lvalp;
} else {
unsigned int val = *valp;
- *negp = false;
*lvalp = (unsigned long)val;
}
return 0;
@@ -2172,17 +2223,8 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
conv = do_proc_dointvec_conv;
if (write) {
- if (*ppos) {
- switch (sysctl_writes_strict) {
- case SYSCTL_WRITES_STRICT:
- goto out;
- case SYSCTL_WRITES_WARN:
- warn_sysctl_write(table);
- break;
- default:
- break;
- }
- }
+ if (proc_first_pos_non_zero_ignore(ppos, table))
+ goto out;
if (left > PAGE_SIZE - 1)
left = PAGE_SIZE - 1;
@@ -2249,6 +2291,146 @@ static int do_proc_dointvec(struct ctl_table *table, int write,
buffer, lenp, ppos, conv, data);
}
+static int do_proc_douintvec_w(unsigned int *tbl_data,
+ struct ctl_table *table,
+ void __user *buffer,
+ size_t *lenp, loff_t *ppos,
+ int (*conv)(unsigned long *lvalp,
+ unsigned int *valp,
+ int write, void *data),
+ void *data)
+{
+ unsigned long lval;
+ int err = 0;
+ size_t left;
+ bool neg;
+ char *kbuf = NULL, *p;
+
+ left = *lenp;
+
+ if (proc_first_pos_non_zero_ignore(ppos, table))
+ goto bail_early;
+
+ if (left > PAGE_SIZE - 1)
+ left = PAGE_SIZE - 1;
+
+ p = kbuf = memdup_user_nul(buffer, left);
+ if (IS_ERR(kbuf))
+ return -EINVAL;
+
+ left -= proc_skip_spaces(&p);
+ if (!left) {
+ err = -EINVAL;
+ goto out_free;
+ }
+
+ err = proc_get_long(&p, &left, &lval, &neg,
+ proc_wspace_sep,
+ sizeof(proc_wspace_sep), NULL);
+ if (err || neg) {
+ err = -EINVAL;
+ goto out_free;
+ }
+
+ if (conv(&lval, tbl_data, 1, data)) {
+ err = -EINVAL;
+ goto out_free;
+ }
+
+ if (!err && left)
+ left -= proc_skip_spaces(&p);
+
+out_free:
+ kfree(kbuf);
+ if (err)
+ return -EINVAL;
+
+ return 0;
+
+ /* This is in keeping with old __do_proc_dointvec() */
+bail_early:
+ *ppos += *lenp;
+ return err;
+}
+
+static int do_proc_douintvec_r(unsigned int *tbl_data, void __user *buffer,
+ size_t *lenp, loff_t *ppos,
+ int (*conv)(unsigned long *lvalp,
+ unsigned int *valp,
+ int write, void *data),
+ void *data)
+{
+ unsigned long lval;
+ int err = 0;
+ size_t left;
+
+ left = *lenp;
+
+ if (conv(&lval, tbl_data, 0, data)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = proc_put_long(&buffer, &left, lval, false);
+ if (err || !left)
+ goto out;
+
+ err = proc_put_char(&buffer, &left, '\n');
+
+out:
+ *lenp -= left;
+ *ppos += *lenp;
+
+ return err;
+}
+
+static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
+ int write, void __user *buffer,
+ size_t *lenp, loff_t *ppos,
+ int (*conv)(unsigned long *lvalp,
+ unsigned int *valp,
+ int write, void *data),
+ void *data)
+{
+ unsigned int *i, vleft;
+
+ if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
+ *lenp = 0;
+ return 0;
+ }
+
+ i = (unsigned int *) tbl_data;
+ vleft = table->maxlen / sizeof(*i);
+
+ /*
+ * Arrays are not supported, keep this simple. *Do not* add
+ * support for them.
+ */
+ if (vleft != 1) {
+ *lenp = 0;
+ return -EINVAL;
+ }
+
+ if (!conv)
+ conv = do_proc_douintvec_conv;
+
+ if (write)
+ return do_proc_douintvec_w(i, table, buffer, lenp, ppos,
+ conv, data);
+ return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data);
+}
+
+static int do_proc_douintvec(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos,
+ int (*conv)(unsigned long *lvalp,
+ unsigned int *valp,
+ int write, void *data),
+ void *data)
+{
+ return __do_proc_douintvec(table->data, table, write,
+ buffer, lenp, ppos, conv, data);
+}
+
/**
* proc_dointvec - read a vector of integers
* @table: the sysctl table
@@ -2284,8 +2466,8 @@ int proc_dointvec(struct ctl_table *table, int write,
int proc_douintvec(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- return do_proc_dointvec(table, write, buffer, lenp, ppos,
- do_proc_douintvec_conv, NULL);
+ return do_proc_douintvec(table, write, buffer, lenp, ppos,
+ do_proc_douintvec_conv, NULL);
}
/*
@@ -2390,6 +2572,65 @@ int proc_dointvec_minmax(struct ctl_table *table, int write,
do_proc_dointvec_minmax_conv, &param);
}
+struct do_proc_douintvec_minmax_conv_param {
+ unsigned int *min;
+ unsigned int *max;
+};
+
+static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
+ unsigned int *valp,
+ int write, void *data)
+{
+ struct do_proc_douintvec_minmax_conv_param *param = data;
+
+ if (write) {
+ unsigned int val = *lvalp;
+
+ if ((param->min && *param->min > val) ||
+ (param->max && *param->max < val))
+ return -ERANGE;
+
+ if (*lvalp > UINT_MAX)
+ return -EINVAL;
+ *valp = val;
+ } else {
+ unsigned int val = *valp;
+ *lvalp = (unsigned long) val;
+ }
+
+ return 0;
+}
+
+/**
+ * proc_douintvec_minmax - read a vector of unsigned ints with min/max values
+ * @table: the sysctl table
+ * @write: %TRUE if this is a write to the sysctl file
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ * @ppos: file position
+ *
+ * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer
+ * values from/to the user buffer, treated as an ASCII string. Negative
+ * strings are not allowed.
+ *
+ * This routine will ensure the values are within the range specified by
+ * table->extra1 (min) and table->extra2 (max). There is a final sanity
+ * check for UINT_MAX to avoid having to support wrap around uses from
+ * userspace.
+ *
+ * Returns 0 on success.
+ */
+int proc_douintvec_minmax(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct do_proc_douintvec_minmax_conv_param param = {
+ .min = (unsigned int *) table->extra1,
+ .max = (unsigned int *) table->extra2,
+ };
+ return do_proc_douintvec(table, write, buffer, lenp, ppos,
+ do_proc_douintvec_minmax_conv, &param);
+}
+
static void validate_coredump_safety(void)
{
#ifdef CONFIG_COREDUMP
@@ -2447,17 +2688,8 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int
left = *lenp;
if (write) {
- if (*ppos) {
- switch (sysctl_writes_strict) {
- case SYSCTL_WRITES_STRICT:
- goto out;
- case SYSCTL_WRITES_WARN:
- warn_sysctl_write(table);
- break;
- default:
- break;
- }
- }
+ if (proc_first_pos_non_zero_ignore(ppos, table))
+ goto out;
if (left > PAGE_SIZE - 1)
left = PAGE_SIZE - 1;
@@ -2898,6 +3130,12 @@ int proc_dointvec_minmax(struct ctl_table *table, int write,
return -ENOSYS;
}
+int proc_douintvec_minmax(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ return -ENOSYS;
+}
+
int proc_dointvec_jiffies(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
@@ -2940,6 +3178,7 @@ EXPORT_SYMBOL(proc_dointvec);
EXPORT_SYMBOL(proc_douintvec);
EXPORT_SYMBOL(proc_dointvec_jiffies);
EXPORT_SYMBOL(proc_dointvec_minmax);
+EXPORT_SYMBOL_GPL(proc_douintvec_minmax);
EXPORT_SYMBOL(proc_dointvec_userhz_jiffies);
EXPORT_SYMBOL(proc_dointvec_ms_jiffies);
EXPORT_SYMBOL(proc_dostring);
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 939a158eab11..02e1859f2ca8 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -1346,7 +1346,7 @@ static void deprecated_sysctl_warning(const int *name, int nlen)
* CTL_KERN/KERN_VERSION is used by older glibc and cannot
* ever go away.
*/
- if (name[0] == CTL_KERN && name[1] == KERN_VERSION)
+ if (nlen >= 2 && name[0] == CTL_KERN && name[1] == KERN_VERSION)
return;
if (printk_ratelimit()) {
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 2953d558bbee..53f6b6401cf0 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3816,7 +3816,7 @@ match_records(struct ftrace_hash *hash, char *func, int len, char *mod)
int exclude_mod = 0;
int found = 0;
int ret;
- int clear_filter;
+ int clear_filter = 0;
if (func) {
func_g.type = filter_parse_regex(func, len, &func_g.search,
@@ -3950,7 +3950,7 @@ static int cache_mod(struct trace_array *tr,
continue;
/* no func matches all */
- if (!func || strcmp(func, "*") == 0 ||
+ if (strcmp(func, "*") == 0 ||
(ftrace_mod->func &&
strcmp(ftrace_mod->func, func) == 0)) {
ret = 0;
@@ -3978,6 +3978,7 @@ static int
ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
int reset, int enable);
+#ifdef CONFIG_MODULES
static void process_mod_list(struct list_head *head, struct ftrace_ops *ops,
char *mod, bool enable)
{
@@ -4068,6 +4069,7 @@ static void process_cached_mods(const char *mod_name)
kfree(mod);
}
+#endif
/*
* We register the module command as a template to show others how
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 948ec32e0c27..2d0ffcc49dba 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1916,7 +1916,11 @@ static int trace_save_cmdline(struct task_struct *tsk)
{
unsigned pid, idx;
- if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
+ /* treat recording of idle task as a success */
+ if (!tsk->pid)
+ return 1;
+
+ if (unlikely(tsk->pid > PID_MAX_DEFAULT))
return 0;
/*
@@ -2002,7 +2006,11 @@ int trace_find_tgid(int pid)
static int trace_save_tgid(struct task_struct *tsk)
{
- if (unlikely(!tgid_map || !tsk->pid || tsk->pid > PID_MAX_DEFAULT))
+ /* treat recording of idle task as a success */
+ if (!tsk->pid)
+ return 1;
+
+ if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
return 0;
tgid_map[tsk->pid] = tsk->tgid;
@@ -2029,11 +2037,20 @@ static bool tracing_record_taskinfo_skip(int flags)
*/
void tracing_record_taskinfo(struct task_struct *task, int flags)
{
+ bool done;
+
if (tracing_record_taskinfo_skip(flags))
return;
- if ((flags & TRACE_RECORD_CMDLINE) && !trace_save_cmdline(task))
- return;
- if ((flags & TRACE_RECORD_TGID) && !trace_save_tgid(task))
+
+ /*
+ * Record as much task information as possible. If some fail, continue
+ * to try to record the others.
+ */
+ done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
+ done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
+
+ /* If recording any information failed, retry again soon. */
+ if (!done)
return;
__this_cpu_write(trace_taskinfo_save, false);
@@ -2050,15 +2067,22 @@ void tracing_record_taskinfo(struct task_struct *task, int flags)
void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
struct task_struct *next, int flags)
{
+ bool done;
+
if (tracing_record_taskinfo_skip(flags))
return;
- if ((flags & TRACE_RECORD_CMDLINE) &&
- (!trace_save_cmdline(prev) || !trace_save_cmdline(next)))
- return;
+ /*
+ * Record as much task information as possible. If some fail, continue
+ * to try to record the others.
+ */
+ done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
+ done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
+ done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
+ done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
- if ((flags & TRACE_RECORD_TGID) &&
- (!trace_save_tgid(prev) || !trace_save_tgid(next)))
+ /* If recording any information failed, retry again soon. */
+ if (!done)
return;
__this_cpu_write(trace_taskinfo_save, false);
@@ -3334,14 +3358,23 @@ static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file
unsigned int flags)
{
bool tgid = flags & TRACE_ITER_RECORD_TGID;
-
- seq_printf(m, "# %s _-----=> irqs-off\n", tgid ? " " : "");
- seq_printf(m, "# %s / _----=> need-resched\n", tgid ? " " : "");
- seq_printf(m, "# %s| / _---=> hardirq/softirq\n", tgid ? " " : "");
- seq_printf(m, "# %s|| / _--=> preempt-depth\n", tgid ? " " : "");
- seq_printf(m, "# %s||| / delay\n", tgid ? " " : "");
- seq_printf(m, "# TASK-PID CPU#%s|||| TIMESTAMP FUNCTION\n", tgid ? " TGID " : "");
- seq_printf(m, "# | | | %s|||| | |\n", tgid ? " | " : "");
+ const char tgid_space[] = " ";
+ const char space[] = " ";
+
+ seq_printf(m, "# %s _-----=> irqs-off\n",
+ tgid ? tgid_space : space);
+ seq_printf(m, "# %s / _----=> need-resched\n",
+ tgid ? tgid_space : space);
+ seq_printf(m, "# %s| / _---=> hardirq/softirq\n",
+ tgid ? tgid_space : space);
+ seq_printf(m, "# %s|| / _--=> preempt-depth\n",
+ tgid ? tgid_space : space);
+ seq_printf(m, "# %s||| / delay\n",
+ tgid ? tgid_space : space);
+ seq_printf(m, "# TASK-PID CPU#%s|||| TIMESTAMP FUNCTION\n",
+ tgid ? " TGID " : space);
+ seq_printf(m, "# | | | %s|||| | |\n",
+ tgid ? " | " : space);
}
void
@@ -4689,6 +4722,76 @@ static const struct file_operations tracing_readme_fops = {
.llseek = generic_file_llseek,
};
+static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ int *ptr = v;
+
+ if (*pos || m->count)
+ ptr++;
+
+ (*pos)++;
+
+ for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
+ if (trace_find_tgid(*ptr))
+ return ptr;
+ }
+
+ return NULL;
+}
+
+static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
+{
+ void *v;
+ loff_t l = 0;
+
+ if (!tgid_map)
+ return NULL;
+
+ v = &tgid_map[0];
+ while (l <= *pos) {
+ v = saved_tgids_next(m, v, &l);
+ if (!v)
+ return NULL;
+ }
+
+ return v;
+}
+
+static void saved_tgids_stop(struct seq_file *m, void *v)
+{
+}
+
+static int saved_tgids_show(struct seq_file *m, void *v)
+{
+ int pid = (int *)v - tgid_map;
+
+ seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
+ return 0;
+}
+
+static const struct seq_operations tracing_saved_tgids_seq_ops = {
+ .start = saved_tgids_start,
+ .stop = saved_tgids_stop,
+ .next = saved_tgids_next,
+ .show = saved_tgids_show,
+};
+
+static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
+{
+ if (tracing_disabled)
+ return -ENODEV;
+
+ return seq_open(filp, &tracing_saved_tgids_seq_ops);
+}
+
+
+static const struct file_operations tracing_saved_tgids_fops = {
+ .open = tracing_saved_tgids_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
{
unsigned int *ptr = v;
@@ -7921,6 +8024,9 @@ static __init int tracer_init_tracefs(void)
trace_create_file("saved_cmdlines_size", 0644, d_tracer,
NULL, &tracing_saved_cmdlines_size_fops);
+ trace_create_file("saved_tgids", 0444, d_tracer,
+ NULL, &tracing_saved_tgids_fops);
+
trace_eval_init();
trace_create_eval_file(d_tracer);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 2c5221819be5..c9b5aa10fbf9 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -598,6 +598,14 @@ static struct notifier_block trace_kprobe_module_nb = {
.priority = 1 /* Invoked after kprobe module callback */
};
+/* Convert certain expected symbols into '_' when generating event names */
+static inline void sanitize_event_name(char *name)
+{
+ while (*name++ != '\0')
+ if (*name == ':' || *name == '.')
+ *name = '_';
+}
+
static int create_trace_kprobe(int argc, char **argv)
{
/*
@@ -736,6 +744,7 @@ static int create_trace_kprobe(int argc, char **argv)
else
snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p",
is_return ? 'r' : 'p', addr);
+ sanitize_event_name(buf);
event = buf;
}
tk = alloc_trace_kprobe(group, event, addr, symbol, offset, maxactive,
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index b4a751e8f9d6..a4df67cbc711 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -406,6 +406,8 @@ static const struct file_operations stack_trace_fops = {
.release = seq_release,
};
+#ifdef CONFIG_DYNAMIC_FTRACE
+
static int
stack_trace_filter_open(struct inode *inode, struct file *file)
{
@@ -423,6 +425,8 @@ static const struct file_operations stack_trace_filter_fops = {
.release = ftrace_regex_release,
};
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
int
stack_trace_sysctl(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
@@ -477,8 +481,10 @@ static __init int stack_trace_init(void)
trace_create_file("stack_trace", 0444, d_tracer,
NULL, &stack_trace_fops);
+#ifdef CONFIG_DYNAMIC_FTRACE
trace_create_file("stack_trace_filter", 0444, d_tracer,
&trace_ops, &stack_trace_filter_fops);
+#endif
if (stack_trace_filter_buf[0])
ftrace_set_early_filter(&trace_ops, stack_trace_filter_buf, 1);
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 03e0b69bb5bf..06d3389bca0d 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -9,7 +9,7 @@
* to those contributors as well.
*/
-#define pr_fmt(fmt) "NMI watchdog: " fmt
+#define pr_fmt(fmt) "watchdog: " fmt
#include <linux/mm.h>
#include <linux/cpu.h>
@@ -29,15 +29,58 @@
#include <linux/kvm_para.h>
#include <linux/kthread.h>
+/* Watchdog configuration */
static DEFINE_MUTEX(watchdog_proc_mutex);
-#if defined(CONFIG_HAVE_NMI_WATCHDOG) || defined(CONFIG_HARDLOCKUP_DETECTOR)
-unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED|NMI_WATCHDOG_ENABLED;
+int __read_mostly nmi_watchdog_enabled;
+
+#if defined(CONFIG_HARDLOCKUP_DETECTOR) || defined(CONFIG_HAVE_NMI_WATCHDOG)
+unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED |
+ NMI_WATCHDOG_ENABLED;
#else
unsigned long __read_mostly watchdog_enabled = SOFT_WATCHDOG_ENABLED;
#endif
-int __read_mostly nmi_watchdog_enabled;
+
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
+/* boot commands */
+/*
+ * Should we panic when a soft-lockup or hard-lockup occurs:
+ */
+unsigned int __read_mostly hardlockup_panic =
+ CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
+/*
+ * We may not want to enable hard lockup detection by default in all cases,
+ * for example when running the kernel as a guest on a hypervisor. In these
+ * cases this function can be called to disable hard lockup detection. This
+ * function should only be executed once by the boot processor before the
+ * kernel command line parameters are parsed, because otherwise it is not
+ * possible to override this in hardlockup_panic_setup().
+ */
+void hardlockup_detector_disable(void)
+{
+ watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
+}
+
+static int __init hardlockup_panic_setup(char *str)
+{
+ if (!strncmp(str, "panic", 5))
+ hardlockup_panic = 1;
+ else if (!strncmp(str, "nopanic", 7))
+ hardlockup_panic = 0;
+ else if (!strncmp(str, "0", 1))
+ watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
+ else if (!strncmp(str, "1", 1))
+ watchdog_enabled |= NMI_WATCHDOG_ENABLED;
+ return 1;
+}
+__setup("nmi_watchdog=", hardlockup_panic_setup);
+
+#endif
+
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
int __read_mostly soft_watchdog_enabled;
+#endif
+
int __read_mostly watchdog_user_enabled;
int __read_mostly watchdog_thresh = 10;
@@ -45,15 +88,9 @@ int __read_mostly watchdog_thresh = 10;
int __read_mostly sysctl_softlockup_all_cpu_backtrace;
int __read_mostly sysctl_hardlockup_all_cpu_backtrace;
#endif
-static struct cpumask watchdog_cpumask __read_mostly;
+struct cpumask watchdog_cpumask __read_mostly;
unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
-/* Helper for online, unparked cpus. */
-#define for_each_watchdog_cpu(cpu) \
- for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask)
-
-atomic_t watchdog_park_in_progress = ATOMIC_INIT(0);
-
/*
* The 'watchdog_running' variable is set to 1 when the watchdog threads
* are registered/started and is set to 0 when the watchdog threads are
@@ -72,7 +109,47 @@ static int __read_mostly watchdog_running;
* of 'watchdog_running' cannot change while the watchdog is deactivated
* temporarily (see related code in 'proc' handlers).
*/
-static int __read_mostly watchdog_suspended;
+int __read_mostly watchdog_suspended;
+
+/*
+ * These functions can be overridden if an architecture implements its
+ * own hardlockup detector.
+ *
+ * watchdog_nmi_enable/disable can be implemented to start and stop when
+ * softlockup watchdog threads start and stop. The arch must select the
+ * SOFTLOCKUP_DETECTOR Kconfig.
+ */
+int __weak watchdog_nmi_enable(unsigned int cpu)
+{
+ return 0;
+}
+void __weak watchdog_nmi_disable(unsigned int cpu)
+{
+}
+
+/*
+ * watchdog_nmi_reconfigure can be implemented to be notified after any
+ * watchdog configuration change. The arch hardlockup watchdog should
+ * respond to the following variables:
+ * - nmi_watchdog_enabled
+ * - watchdog_thresh
+ * - watchdog_cpumask
+ * - sysctl_hardlockup_all_cpu_backtrace
+ * - hardlockup_panic
+ * - watchdog_suspended
+ */
+void __weak watchdog_nmi_reconfigure(void)
+{
+}
+
+
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
+
+/* Helper for online, unparked cpus. */
+#define for_each_watchdog_cpu(cpu) \
+ for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask)
+
+atomic_t watchdog_park_in_progress = ATOMIC_INIT(0);
static u64 __read_mostly sample_period;
@@ -120,6 +197,7 @@ static int __init softlockup_all_cpu_backtrace_setup(char *str)
return 1;
}
__setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup);
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
static int __init hardlockup_all_cpu_backtrace_setup(char *str)
{
sysctl_hardlockup_all_cpu_backtrace =
@@ -128,6 +206,7 @@ static int __init hardlockup_all_cpu_backtrace_setup(char *str)
}
__setup("hardlockup_all_cpu_backtrace=", hardlockup_all_cpu_backtrace_setup);
#endif
+#endif
/*
* Hard-lockup warnings should be triggered after just a few seconds. Soft-
@@ -213,18 +292,6 @@ void touch_softlockup_watchdog_sync(void)
__this_cpu_write(watchdog_touch_ts, 0);
}
-/* watchdog detector functions */
-bool is_hardlockup(void)
-{
- unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
-
- if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
- return true;
-
- __this_cpu_write(hrtimer_interrupts_saved, hrint);
- return false;
-}
-
static int is_softlockup(unsigned long touch_ts)
{
unsigned long now = get_timestamp();
@@ -237,21 +304,21 @@ static int is_softlockup(unsigned long touch_ts)
return 0;
}
-static void watchdog_interrupt_count(void)
+/* watchdog detector functions */
+bool is_hardlockup(void)
{
- __this_cpu_inc(hrtimer_interrupts);
-}
+ unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
-/*
- * These two functions are mostly architecture specific
- * defining them as weak here.
- */
-int __weak watchdog_nmi_enable(unsigned int cpu)
-{
- return 0;
+ if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
+ return true;
+
+ __this_cpu_write(hrtimer_interrupts_saved, hrint);
+ return false;
}
-void __weak watchdog_nmi_disable(unsigned int cpu)
+
+static void watchdog_interrupt_count(void)
{
+ __this_cpu_inc(hrtimer_interrupts);
}
static int watchdog_enable_all_cpus(void);
@@ -502,57 +569,6 @@ static void watchdog_unpark_threads(void)
kthread_unpark(per_cpu(softlockup_watchdog, cpu));
}
-/*
- * Suspend the hard and soft lockup detector by parking the watchdog threads.
- */
-int lockup_detector_suspend(void)
-{
- int ret = 0;
-
- get_online_cpus();
- mutex_lock(&watchdog_proc_mutex);
- /*
- * Multiple suspend requests can be active in parallel (counted by
- * the 'watchdog_suspended' variable). If the watchdog threads are
- * running, the first caller takes care that they will be parked.
- * The state of 'watchdog_running' cannot change while a suspend
- * request is active (see related code in 'proc' handlers).
- */
- if (watchdog_running && !watchdog_suspended)
- ret = watchdog_park_threads();
-
- if (ret == 0)
- watchdog_suspended++;
- else {
- watchdog_disable_all_cpus();
- pr_err("Failed to suspend lockup detectors, disabled\n");
- watchdog_enabled = 0;
- }
-
- mutex_unlock(&watchdog_proc_mutex);
-
- return ret;
-}
-
-/*
- * Resume the hard and soft lockup detector by unparking the watchdog threads.
- */
-void lockup_detector_resume(void)
-{
- mutex_lock(&watchdog_proc_mutex);
-
- watchdog_suspended--;
- /*
- * The watchdog threads are unparked if they were previously running
- * and if there is no more active suspend request.
- */
- if (watchdog_running && !watchdog_suspended)
- watchdog_unpark_threads();
-
- mutex_unlock(&watchdog_proc_mutex);
- put_online_cpus();
-}
-
static int update_watchdog_all_cpus(void)
{
int ret;
@@ -605,6 +621,100 @@ static void watchdog_disable_all_cpus(void)
}
#ifdef CONFIG_SYSCTL
+static int watchdog_update_cpus(void)
+{
+ return smpboot_update_cpumask_percpu_thread(
+ &watchdog_threads, &watchdog_cpumask);
+}
+#endif
+
+#else /* SOFTLOCKUP */
+static int watchdog_park_threads(void)
+{
+ return 0;
+}
+
+static void watchdog_unpark_threads(void)
+{
+}
+
+static int watchdog_enable_all_cpus(void)
+{
+ return 0;
+}
+
+static void watchdog_disable_all_cpus(void)
+{
+}
+
+#ifdef CONFIG_SYSCTL
+static int watchdog_update_cpus(void)
+{
+ return 0;
+}
+#endif
+
+static void set_sample_period(void)
+{
+}
+#endif /* SOFTLOCKUP */
+
+/*
+ * Suspend the hard and soft lockup detector by parking the watchdog threads.
+ */
+int lockup_detector_suspend(void)
+{
+ int ret = 0;
+
+ get_online_cpus();
+ mutex_lock(&watchdog_proc_mutex);
+ /*
+ * Multiple suspend requests can be active in parallel (counted by
+ * the 'watchdog_suspended' variable). If the watchdog threads are
+ * running, the first caller takes care that they will be parked.
+ * The state of 'watchdog_running' cannot change while a suspend
+ * request is active (see related code in 'proc' handlers).
+ */
+ if (watchdog_running && !watchdog_suspended)
+ ret = watchdog_park_threads();
+
+ if (ret == 0)
+ watchdog_suspended++;
+ else {
+ watchdog_disable_all_cpus();
+ pr_err("Failed to suspend lockup detectors, disabled\n");
+ watchdog_enabled = 0;
+ }
+
+ watchdog_nmi_reconfigure();
+
+ mutex_unlock(&watchdog_proc_mutex);
+
+ return ret;
+}
+
+/*
+ * Resume the hard and soft lockup detector by unparking the watchdog threads.
+ */
+void lockup_detector_resume(void)
+{
+ mutex_lock(&watchdog_proc_mutex);
+
+ watchdog_suspended--;
+ /*
+ * The watchdog threads are unparked if they were previously running
+ * and if there is no more active suspend request.
+ */
+ if (watchdog_running && !watchdog_suspended)
+ watchdog_unpark_threads();
+
+ watchdog_nmi_reconfigure();
+
+ mutex_unlock(&watchdog_proc_mutex);
+ put_online_cpus();
+}
+
+#ifdef CONFIG_SYSCTL
/*
* Update the run state of the lockup detectors.
@@ -625,6 +735,8 @@ static int proc_watchdog_update(void)
else
watchdog_disable_all_cpus();
+ watchdog_nmi_reconfigure();
+
return err;
}
@@ -810,10 +922,11 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write,
* a temporary cpumask, so we are likely not in a
* position to do much else to make things better.
*/
- if (smpboot_update_cpumask_percpu_thread(
- &watchdog_threads, &watchdog_cpumask) != 0)
+ if (watchdog_update_cpus() != 0)
pr_err("cpumask update failed\n");
}
+
+ watchdog_nmi_reconfigure();
}
out:
mutex_unlock(&watchdog_proc_mutex);
diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
index 54a427d1f344..295a0d84934c 100644
--- a/kernel/watchdog_hld.c
+++ b/kernel/watchdog_hld.c
@@ -22,41 +22,9 @@ static DEFINE_PER_CPU(bool, hard_watchdog_warn);
static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
-/* boot commands */
-/*
- * Should we panic when a soft-lockup or hard-lockup occurs:
- */
-unsigned int __read_mostly hardlockup_panic =
- CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
static unsigned long hardlockup_allcpu_dumped;
-/*
- * We may not want to enable hard lockup detection by default in all cases,
- * for example when running the kernel as a guest on a hypervisor. In these
- * cases this function can be called to disable hard lockup detection. This
- * function should only be executed once by the boot processor before the
- * kernel command line parameters are parsed, because otherwise it is not
- * possible to override this in hardlockup_panic_setup().
- */
-void hardlockup_detector_disable(void)
-{
- watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
-}
-
-static int __init hardlockup_panic_setup(char *str)
-{
- if (!strncmp(str, "panic", 5))
- hardlockup_panic = 1;
- else if (!strncmp(str, "nopanic", 7))
- hardlockup_panic = 0;
- else if (!strncmp(str, "0", 1))
- watchdog_enabled &= ~NMI_WATCHDOG_ENABLED;
- else if (!strncmp(str, "1", 1))
- watchdog_enabled |= NMI_WATCHDOG_ENABLED;
- return 1;
-}
-__setup("nmi_watchdog=", hardlockup_panic_setup);
-void touch_nmi_watchdog(void)
+void arch_touch_nmi_watchdog(void)
{
/*
* Using __raw here because some code paths have
@@ -66,9 +34,8 @@ void touch_nmi_watchdog(void)
* going off.
*/
raw_cpu_write(watchdog_nmi_touch, true);
- touch_softlockup_watchdog();
}
-EXPORT_SYMBOL(touch_nmi_watchdog);
+EXPORT_SYMBOL(arch_touch_nmi_watchdog);
static struct perf_event_attr wd_hw_attr = {
.type = PERF_TYPE_HARDWARE,