From 8bb7844286fb8c9fce6f65d8288aeb09d03a5e0d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 9 May 2007 02:35:10 -0700 Subject: [PATCH] Add suspend-related notifications for CPU hotplug Since nonboot CPUs are now disabled after tasks and devices have been frozen and the CPU hotplug infrastructure is used for this purpose, we need special CPU hotplug notifications that will help the CPU-hotplug-aware subsystems distinguish normal CPU hotplug events from CPU hotplug events related to a system-wide suspend or resume operation in progress. This patch introduces such notifications and causes them to be used during suspend and resume transitions. It also changes all of the CPU-hotplug-aware subsystems to take these notifications into consideration (for now they are handled in the same way as the corresponding "normal" ones). [oleg@tv-sign.ru: cleanups] Signed-off-by: Rafael J. Wysocki Cc: Gautham R Shenoy Cc: Pavel Machek Signed-off-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cpu-hotplug.txt | 9 ++++-- arch/i386/kernel/cpu/intel_cacheinfo.c | 2 ++ arch/i386/kernel/cpu/mcheck/therm_throt.c | 2 ++ arch/i386/kernel/cpuid.c | 2 ++ arch/i386/kernel/microcode.c | 3 ++ arch/i386/kernel/msr.c | 2 ++ arch/ia64/kernel/err_inject.c | 2 ++ arch/ia64/kernel/palinfo.c | 2 ++ arch/ia64/kernel/salinfo.c | 2 ++ arch/ia64/kernel/topology.c | 2 ++ arch/powerpc/kernel/sysfs.c | 2 ++ arch/powerpc/mm/numa.c | 3 ++ arch/s390/appldata/appldata_base.c | 2 ++ arch/s390/kernel/smp.c | 2 ++ arch/x86_64/kernel/mce.c | 2 ++ arch/x86_64/kernel/mce_amd.c | 2 ++ arch/x86_64/kernel/vsyscall.c | 2 +- block/ll_rw_blk.c | 2 +- drivers/base/topology.c | 3 ++ drivers/cpufreq/cpufreq.c | 3 ++ drivers/cpufreq/cpufreq_stats.c | 2 ++ drivers/hwmon/coretemp.c | 2 ++ drivers/infiniband/hw/ehca/ehca_irq.c | 6 ++++ drivers/kvm/kvm_main.c | 3 ++ fs/buffer.c | 2 +- fs/xfs/xfs_mount.c | 3 ++ include/linux/notifier.h | 12 ++++++++ kernel/cpu.c | 34 ++++++++++++----------- kernel/hrtimer.c | 2 ++ kernel/profile.c | 4 +++ kernel/rcupdate.c | 2 ++ kernel/relay.c | 2 ++ kernel/sched.c | 10 +++++++ kernel/softirq.c | 4 +++ kernel/softlockup.c | 4 +++ kernel/timer.c | 2 ++ kernel/workqueue.c | 2 ++ lib/radix-tree.c | 2 +- mm/page_alloc.c | 5 +++- mm/slab.c | 6 ++++ mm/slub.c | 2 ++ mm/swap.c | 2 +- mm/vmscan.c | 2 +- mm/vmstat.c | 3 ++ net/core/dev.c | 2 +- net/core/flow.c | 2 +- net/iucv/iucv.c | 6 ++++ 47 files changed, 152 insertions(+), 27 deletions(-) diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt index cc60d29b954cd..b6d24c22274b8 100644 --- a/Documentation/cpu-hotplug.txt +++ b/Documentation/cpu-hotplug.txt @@ -217,14 +217,17 @@ Q: What happens when a CPU is being logically offlined? A: The following happen, listed in no particular order :-) - A notification is sent to in-kernel registered modules by sending an event - CPU_DOWN_PREPARE + CPU_DOWN_PREPARE or CPU_DOWN_PREPARE_FROZEN, depending on whether or not the + CPU is being offlined while tasks are frozen due to a suspend operation in + progress - All process is migrated away from this outgoing CPU to a new CPU - All interrupts targeted to this CPU is migrated to a new CPU - timers/bottom half/task lets are also migrated to a new CPU - Once all services are migrated, kernel calls an arch specific routine __cpu_disable() to perform arch specific cleanup. - Once this is successful, an event for successful cleanup is sent by an event - CPU_DEAD. + CPU_DEAD (or CPU_DEAD_FROZEN if tasks are frozen due to a suspend while the + CPU is being offlined). "It is expected that each service cleans up when the CPU_DOWN_PREPARE notifier is called, when CPU_DEAD is called its expected there is nothing @@ -242,9 +245,11 @@ A: This is what you would need in your kernel code to receive notifications. switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: foobar_online_action(cpu); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: foobar_dead_action(cpu); break; } diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index 80b4c5d421b13..e5be819492ef1 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -733,9 +733,11 @@ static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb, sys_dev = get_cpu_sysdev(cpu); switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: cache_add_dev(sys_dev); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: cache_remove_dev(sys_dev); break; } diff --git a/arch/i386/kernel/cpu/mcheck/therm_throt.c b/arch/i386/kernel/cpu/mcheck/therm_throt.c index 065005c3f1687..5b0a040213c2f 100644 --- a/arch/i386/kernel/cpu/mcheck/therm_throt.c +++ b/arch/i386/kernel/cpu/mcheck/therm_throt.c @@ -137,10 +137,12 @@ static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb, mutex_lock(&therm_cpu_lock); switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: err = thermal_throttle_add_dev(sys_dev); WARN_ON(err); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: thermal_throttle_remove_dev(sys_dev); break; } diff --git a/arch/i386/kernel/cpuid.c b/arch/i386/kernel/cpuid.c index eeae0d992337d..5c2faa10e9fac 100644 --- a/arch/i386/kernel/cpuid.c +++ b/arch/i386/kernel/cpuid.c @@ -169,9 +169,11 @@ static int cpuid_class_cpu_callback(struct notifier_block *nfb, unsigned long ac switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: cpuid_device_create(cpu); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu)); break; } diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c index cbe7ec8dbb9f5..7d934e493e74c 100644 --- a/arch/i386/kernel/microcode.c +++ b/arch/i386/kernel/microcode.c @@ -775,10 +775,13 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) sys_dev = get_cpu_sysdev(cpu); switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: case CPU_DOWN_FAILED: + case CPU_DOWN_FAILED_FROZEN: mc_sysdev_add(sys_dev); break; case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: mc_sysdev_remove(sys_dev); break; } diff --git a/arch/i386/kernel/msr.c b/arch/i386/kernel/msr.c index 8cd0a91ce107c..0c1069b8d6380 100644 --- a/arch/i386/kernel/msr.c +++ b/arch/i386/kernel/msr.c @@ -153,9 +153,11 @@ static int msr_class_cpu_callback(struct notifier_block *nfb, switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: msr_device_create(cpu); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: device_destroy(msr_class, MKDEV(MSR_MAJOR, cpu)); break; } diff --git a/arch/ia64/kernel/err_inject.c b/arch/ia64/kernel/err_inject.c index d3e9f33e8bddc..6a49600cf337f 100644 --- a/arch/ia64/kernel/err_inject.c +++ b/arch/ia64/kernel/err_inject.c @@ -236,9 +236,11 @@ static int __cpuinit err_inject_cpu_callback(struct notifier_block *nfb, sys_dev = get_cpu_sysdev(cpu); switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: err_inject_add_dev(sys_dev); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: err_inject_remove_dev(sys_dev); break; } diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c index a71df9ae03976..85829e27785c7 100644 --- a/arch/ia64/kernel/palinfo.c +++ b/arch/ia64/kernel/palinfo.c @@ -975,9 +975,11 @@ static int palinfo_cpu_callback(struct notifier_block *nfb, switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: create_palinfo_proc_entries(hotcpu); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: remove_palinfo_proc_entries(hotcpu); break; } diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c index a51f1d0bfb707..89f6b138a62cc 100644 --- a/arch/ia64/kernel/salinfo.c +++ b/arch/ia64/kernel/salinfo.c @@ -582,6 +582,7 @@ salinfo_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu struct salinfo_data *data; switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: spin_lock_irqsave(&data_saved_lock, flags); for (i = 0, data = salinfo_data; i < ARRAY_SIZE(salinfo_data); @@ -592,6 +593,7 @@ salinfo_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu spin_unlock_irqrestore(&data_saved_lock, flags); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: spin_lock_irqsave(&data_saved_lock, flags); for (i = 0, data = salinfo_data; i < ARRAY_SIZE(salinfo_data); diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index 687500ddb4b87..94ae3c87d828c 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -412,9 +412,11 @@ static int __cpuinit cache_cpu_callback(struct notifier_block *nfb, sys_dev = get_cpu_sysdev(cpu); switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: cache_add_dev(sys_dev); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: cache_remove_dev(sys_dev); break; } diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index cae39d9dfe48a..68991c2d4a1b3 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -342,10 +342,12 @@ static int __cpuinit sysfs_cpu_notify(struct notifier_block *self, switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: register_cpu_online(cpu); break; #ifdef CONFIG_HOTPLUG_CPU case CPU_DEAD: + case CPU_DEAD_FROZEN: unregister_cpu_online(cpu); break; #endif diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index b3a592b25ab3a..de45aa82d97b1 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -252,12 +252,15 @@ static int __cpuinit cpu_numa_callback(struct notifier_block *nfb, switch (action) { case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: numa_setup_cpu(lcpu); ret = NOTIFY_OK; break; #ifdef CONFIG_HOTPLUG_CPU case CPU_DEAD: + case CPU_DEAD_FROZEN: case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: unmap_cpu_from_node(lcpu); break; ret = NOTIFY_OK; diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index ee89b33145d58..81a2b92ab0c2b 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -567,9 +567,11 @@ appldata_cpu_notify(struct notifier_block *self, { switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: appldata_online_cpu((long) hcpu); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: appldata_offline_cpu((long) hcpu); break; default: diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index b7977027a28fa..09f028a3266ba 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -789,10 +789,12 @@ static int __cpuinit smp_cpu_notify(struct notifier_block *self, switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: if (sysdev_create_file(s, &attr_capability)) return NOTIFY_BAD; break; case CPU_DEAD: + case CPU_DEAD_FROZEN: sysdev_remove_file(s, &attr_capability); break; } diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index 442169640e452..a14375dd54252 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c @@ -720,9 +720,11 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: mce_create_device(cpu); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: mce_remove_device(cpu); break; } diff --git a/arch/x86_64/kernel/mce_amd.c b/arch/x86_64/kernel/mce_amd.c index d0bd5d66e103d..03356e64f9c8c 100644 --- a/arch/x86_64/kernel/mce_amd.c +++ b/arch/x86_64/kernel/mce_amd.c @@ -654,9 +654,11 @@ static int threshold_cpu_callback(struct notifier_block *nfb, switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: threshold_create_device(cpu); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: threshold_remove_device(cpu); break; default: diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c index dc32cef961950..51d4c6fa88c87 100644 --- a/arch/x86_64/kernel/vsyscall.c +++ b/arch/x86_64/kernel/vsyscall.c @@ -327,7 +327,7 @@ static int __cpuinit cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg) { long cpu = (long)arg; - if (action == CPU_ONLINE) + if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1); return NOTIFY_DONE; } diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index df506571ed603..cd54672da99f2 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -3507,7 +3507,7 @@ static int blk_cpu_notify(struct notifier_block *self, unsigned long action, * If a CPU goes away, splice its entries to the current CPU * and trigger a run of the softirq */ - if (action == CPU_DEAD) { + if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { int cpu = (unsigned long) hcpu; local_irq_disable(); diff --git a/drivers/base/topology.c b/drivers/base/topology.c index 067a9e8bc377f..8d8cdfec6529e 100644 --- a/drivers/base/topology.c +++ b/drivers/base/topology.c @@ -126,10 +126,13 @@ static int __cpuinit topology_cpu_callback(struct notifier_block *nfb, switch (action) { case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: rc = topology_add_dev(cpu); break; case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: case CPU_DEAD: + case CPU_DEAD_FROZEN: topology_remove_dev(cpu); break; } diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 893dbaf386fbf..eb37fba9b7efc 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -1685,9 +1685,11 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb, if (sys_dev) { switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: cpufreq_add_dev(sys_dev); break; case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: if (unlikely(lock_policy_rwsem_write(cpu))) BUG(); @@ -1699,6 +1701,7 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb, __cpufreq_remove_dev(sys_dev); break; case CPU_DOWN_FAILED: + case CPU_DOWN_FAILED_FROZEN: cpufreq_add_dev(sys_dev); break; } diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c index d1c7cac9316cc..d2f0cbd8b8f3f 100644 --- a/drivers/cpufreq/cpufreq_stats.c +++ b/drivers/cpufreq/cpufreq_stats.c @@ -313,9 +313,11 @@ static int cpufreq_stat_cpu_callback(struct notifier_block *nfb, switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: cpufreq_update_policy(cpu); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: cpufreq_stats_free_table(cpu); break; } diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index 03b1f650d1c47..75e3911810a38 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -309,9 +309,11 @@ static int coretemp_cpu_callback(struct notifier_block *nfb, switch (action) { case CPU_ONLINE: + case CPU_ONLINE_FROZEN: coretemp_device_add(cpu); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: coretemp_device_remove(cpu); break; } diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index f284be1c91664..82dda2faf4d0b 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -745,6 +745,7 @@ static int comp_pool_callback(struct notifier_block *nfb, switch (action) { case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu); if(!create_comp_task(pool, cpu)) { ehca_gen_err("Can't create comp_task for cpu: %x", cpu); @@ -752,24 +753,29 @@ static int comp_pool_callback(struct notifier_block *nfb, } break; case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu); cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); kthread_bind(cct->task, any_online_cpu(cpu_online_map)); destroy_comp_task(pool, cpu); break; case CPU_ONLINE: + case CPU_ONLINE_FROZEN: ehca_gen_dbg("CPU: %x (CPU_ONLINE)", cpu); cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); kthread_bind(cct->task, cpu); wake_up_process(cct->task); break; case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: ehca_gen_dbg("CPU: %x (CPU_DOWN_PREPARE)", cpu); break; case CPU_DOWN_FAILED: + case CPU_DOWN_FAILED_FROZEN: ehca_gen_dbg("CPU: %x (CPU_DOWN_FAILED)", cpu); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: ehca_gen_dbg("CPU: %x (CPU_DEAD)", cpu); destroy_comp_task(pool, cpu); take_over_work(pool, cpu); diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c index c8b8cfa332bb9..0d892600ff00f 100644 --- a/drivers/kvm/kvm_main.c +++ b/drivers/kvm/kvm_main.c @@ -2889,7 +2889,9 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, switch (val) { case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", cpu); decache_vcpus_on_cpu(cpu); @@ -2897,6 +2899,7 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, NULL, 0, 1); break; case CPU_ONLINE: + case CPU_ONLINE_FROZEN: printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", cpu); smp_call_function_single(cpu, kvm_arch_ops->hardware_enable, diff --git a/fs/buffer.c b/fs/buffer.c index fc2d763a8d781..aecd057cd0e06 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2946,7 +2946,7 @@ static void buffer_exit_cpu(int cpu) static int buffer_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { - if (action == CPU_DEAD) + if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) buffer_exit_cpu((unsigned long)hcpu); return NOTIFY_OK; } diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index f5aa3ef855fbb..a96bde6df96d1 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1734,11 +1734,13 @@ xfs_icsb_cpu_notify( per_cpu_ptr(mp->m_sb_cnts, (unsigned long)hcpu); switch (action) { case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: /* Easy Case - initialize the area and locks, and * then rebalance when online does everything else for us. */ memset(cntp, 0, sizeof(xfs_icsb_cnts_t)); break; case CPU_ONLINE: + case CPU_ONLINE_FROZEN: xfs_icsb_lock(mp); xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0, 0); xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0, 0); @@ -1746,6 +1748,7 @@ xfs_icsb_cpu_notify( xfs_icsb_unlock(mp); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: /* Disable all the counters, then fold the dead cpu's * count into the total on the global superblock and * re-enable the counters. */ diff --git a/include/linux/notifier.h b/include/linux/notifier.h index 1903e5490c04c..9431101bf8769 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -197,5 +197,17 @@ extern int __srcu_notifier_call_chain(struct srcu_notifier_head *nh, #define CPU_LOCK_ACQUIRE 0x0008 /* Acquire all hotcpu locks */ #define CPU_LOCK_RELEASE 0x0009 /* Release all hotcpu locks */ +/* Used for CPU hotplug events occuring while tasks are frozen due to a suspend + * operation in progress + */ +#define CPU_TASKS_FROZEN 0x0010 + +#define CPU_ONLINE_FROZEN (CPU_ONLINE | CPU_TASKS_FROZEN) +#define CPU_UP_PREPARE_FROZEN (CPU_UP_PREPARE | CPU_TASKS_FROZEN) +#define CPU_UP_CANCELED_FROZEN (CPU_UP_CANCELED | CPU_TASKS_FROZEN) +#define CPU_DOWN_PREPARE_FROZEN (CPU_DOWN_PREPARE | CPU_TASKS_FROZEN) +#define CPU_DOWN_FAILED_FROZEN (CPU_DOWN_FAILED | CPU_TASKS_FROZEN) +#define CPU_DEAD_FROZEN (CPU_DEAD | CPU_TASKS_FROZEN) + #endif /* __KERNEL__ */ #endif /* _LINUX_NOTIFIER_H */ diff --git a/kernel/cpu.c b/kernel/cpu.c index 28cb6c71a47a1..369d2892687da 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -120,12 +120,13 @@ static int take_cpu_down(void *unused) } /* Requires cpu_add_remove_lock to be held */ -static int _cpu_down(unsigned int cpu) +static int _cpu_down(unsigned int cpu, int tasks_frozen) { int err, nr_calls = 0; struct task_struct *p; cpumask_t old_allowed, tmp; void *hcpu = (void *)(long)cpu; + unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0; if (num_online_cpus() == 1) return -EBUSY; @@ -134,11 +135,11 @@ static int _cpu_down(unsigned int cpu) return -EINVAL; raw_notifier_call_chain(&cpu_chain, CPU_LOCK_ACQUIRE, hcpu); - err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE, + err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls); if (err == NOTIFY_BAD) { - __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED, hcpu, - nr_calls, NULL); + __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod, + hcpu, nr_calls, NULL); printk("%s: attempt to take down CPU %u failed\n", __FUNCTION__, cpu); err = -EINVAL; @@ -157,7 +158,7 @@ static int _cpu_down(unsigned int cpu) if (IS_ERR(p) || cpu_online(cpu)) { /* CPU didn't die: tell everyone. Can't complain. */ - if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED, + if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod, hcpu) == NOTIFY_BAD) BUG(); @@ -176,7 +177,8 @@ static int _cpu_down(unsigned int cpu) __cpu_die(cpu); /* CPU is completely dead: tell everyone. Too late to complain. */ - if (raw_notifier_call_chain(&cpu_chain, CPU_DEAD, hcpu) == NOTIFY_BAD) + if (raw_notifier_call_chain(&cpu_chain, CPU_DEAD | mod, + hcpu) == NOTIFY_BAD) BUG(); check_for_tasks(cpu); @@ -186,8 +188,7 @@ static int _cpu_down(unsigned int cpu) out_allowed: set_cpus_allowed(current, old_allowed); out_release: - raw_notifier_call_chain(&cpu_chain, CPU_LOCK_RELEASE, - (void *)(long)cpu); + raw_notifier_call_chain(&cpu_chain, CPU_LOCK_RELEASE, hcpu); return err; } @@ -199,7 +200,7 @@ int cpu_down(unsigned int cpu) if (cpu_hotplug_disabled) err = -EBUSY; else - err = _cpu_down(cpu); + err = _cpu_down(cpu, 0); mutex_unlock(&cpu_add_remove_lock); return err; @@ -207,16 +208,17 @@ int cpu_down(unsigned int cpu) #endif /*CONFIG_HOTPLUG_CPU*/ /* Requires cpu_add_remove_lock to be held */ -static int __cpuinit _cpu_up(unsigned int cpu) +static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen) { int ret, nr_calls = 0; void *hcpu = (void *)(long)cpu; + unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0; if (cpu_online(cpu) || !cpu_present(cpu)) return -EINVAL; raw_notifier_call_chain(&cpu_chain, CPU_LOCK_ACQUIRE, hcpu); - ret = __raw_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu, + ret = __raw_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls); if (ret == NOTIFY_BAD) { printk("%s: attempt to bring up CPU %u failed\n", @@ -234,12 +236,12 @@ static int __cpuinit _cpu_up(unsigned int cpu) BUG_ON(!cpu_online(cpu)); /* Now call notifier in preparation. */ - raw_notifier_call_chain(&cpu_chain, CPU_ONLINE, hcpu); + raw_notifier_call_chain(&cpu_chain, CPU_ONLINE | mod, hcpu); out_notify: if (ret != 0) __raw_notifier_call_chain(&cpu_chain, - CPU_UP_CANCELED, hcpu, nr_calls, NULL); + CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL); raw_notifier_call_chain(&cpu_chain, CPU_LOCK_RELEASE, hcpu); return ret; @@ -253,7 +255,7 @@ int __cpuinit cpu_up(unsigned int cpu) if (cpu_hotplug_disabled) err = -EBUSY; else - err = _cpu_up(cpu); + err = _cpu_up(cpu, 0); mutex_unlock(&cpu_add_remove_lock); return err; @@ -283,7 +285,7 @@ int disable_nonboot_cpus(void) for_each_online_cpu(cpu) { if (cpu == first_cpu) continue; - error = _cpu_down(cpu); + error = _cpu_down(cpu, 1); if (!error) { cpu_set(cpu, frozen_cpus); printk("CPU%d is down\n", cpu); @@ -318,7 +320,7 @@ void enable_nonboot_cpus(void) suspend_cpu_hotplug = 1; printk("Enabling non-boot CPUs ...\n"); for_each_cpu_mask(cpu, frozen_cpus) { - error = _cpu_up(cpu); + error = _cpu_up(cpu, 1); if (!error) { printk("CPU%d is up\n", cpu); continue; diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index c9f4f044a8a8f..23c03f43e1962 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1411,11 +1411,13 @@ static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self, switch (action) { case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: init_hrtimers_cpu(cpu); break; #ifdef CONFIG_HOTPLUG_CPU case CPU_DEAD: + case CPU_DEAD_FROZEN: clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &cpu); migrate_hrtimers(cpu); break; diff --git a/kernel/profile.c b/kernel/profile.c index 9bfadb248dd87..cc91b9bf759dc 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -340,6 +340,7 @@ static int __devinit profile_cpu_callback(struct notifier_block *info, switch (action) { case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: node = cpu_to_node(cpu); per_cpu(cpu_profile_flip, cpu) = 0; if (!per_cpu(cpu_profile_hits, cpu)[1]) { @@ -365,10 +366,13 @@ static int __devinit profile_cpu_callback(struct notifier_block *info, __free_page(page); return NOTIFY_BAD; case CPU_ONLINE: + case CPU_ONLINE_FROZEN: cpu_set(cpu, prof_cpu_mask); break; case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: case CPU_DEAD: + case CPU_DEAD_FROZEN: cpu_clear(cpu, prof_cpu_mask); if (per_cpu(cpu_profile_hits, cpu)[0]) { page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[0]); diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 3554b76da84cf..2c2dd8410dc4f 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -558,9 +558,11 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, long cpu = (long)hcpu; switch (action) { case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: rcu_online_cpu(cpu); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: rcu_offline_cpu(cpu); break; default: diff --git a/kernel/relay.c b/kernel/relay.c index e804589c863c1..61a504900eaa5 100644 --- a/kernel/relay.c +++ b/kernel/relay.c @@ -484,6 +484,7 @@ static int __cpuinit relay_hotcpu_callback(struct notifier_block *nb, switch(action) { case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: mutex_lock(&relay_channels_mutex); list_for_each_entry(chan, &relay_channels, list) { if (chan->buf[hotcpu]) @@ -500,6 +501,7 @@ static int __cpuinit relay_hotcpu_callback(struct notifier_block *nb, mutex_unlock(&relay_channels_mutex); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: /* No need to flush the cpu : will be flushed upon * final relay_flush() call. */ break; diff --git a/kernel/sched.c b/kernel/sched.c index fe1a9c2b855a9..799d23b4e35da 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5394,6 +5394,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) break; case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: p = kthread_create(migration_thread, hcpu, "migration/%d",cpu); if (IS_ERR(p)) return NOTIFY_BAD; @@ -5407,12 +5408,14 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) break; case CPU_ONLINE: + case CPU_ONLINE_FROZEN: /* Strictly unneccessary, as first user will wake it. */ wake_up_process(cpu_rq(cpu)->migration_thread); break; #ifdef CONFIG_HOTPLUG_CPU case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: if (!cpu_rq(cpu)->migration_thread) break; /* Unbind it from offline cpu so it can run. Fall thru. */ @@ -5423,6 +5426,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) break; case CPU_DEAD: + case CPU_DEAD_FROZEN: migrate_live_tasks(cpu); rq = cpu_rq(cpu); kthread_stop(rq->migration_thread); @@ -6912,14 +6916,20 @@ static int update_sched_domains(struct notifier_block *nfb, { switch (action) { case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: detach_destroy_domains(&cpu_online_map); return NOTIFY_OK; case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: case CPU_DOWN_FAILED: + case CPU_DOWN_FAILED_FROZEN: case CPU_ONLINE: + case CPU_ONLINE_FROZEN: case CPU_DEAD: + case CPU_DEAD_FROZEN: /* * Fall through and re-initialise the domains. */ diff --git a/kernel/softirq.c b/kernel/softirq.c index 8b75008e2bd84..0b9886a00e74b 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -593,6 +593,7 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb, switch (action) { case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu); if (IS_ERR(p)) { printk("ksoftirqd for %i failed\n", hotcpu); @@ -602,16 +603,19 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb, per_cpu(ksoftirqd, hotcpu) = p; break; case CPU_ONLINE: + case CPU_ONLINE_FROZEN: wake_up_process(per_cpu(ksoftirqd, hotcpu)); break; #ifdef CONFIG_HOTPLUG_CPU case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: if (!per_cpu(ksoftirqd, hotcpu)) break; /* Unbind so it can run. Fall thru. */ kthread_bind(per_cpu(ksoftirqd, hotcpu), any_online_cpu(cpu_online_map)); case CPU_DEAD: + case CPU_DEAD_FROZEN: p = per_cpu(ksoftirqd, hotcpu); per_cpu(ksoftirqd, hotcpu) = NULL; kthread_stop(p); diff --git a/kernel/softlockup.c b/kernel/softlockup.c index 8fa7040247ad1..0131e296ffb41 100644 --- a/kernel/softlockup.c +++ b/kernel/softlockup.c @@ -146,6 +146,7 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) switch (action) { case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: BUG_ON(per_cpu(watchdog_task, hotcpu)); p = kthread_create(watchdog, hcpu, "watchdog/%d", hotcpu); if (IS_ERR(p)) { @@ -157,16 +158,19 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) kthread_bind(p, hotcpu); break; case CPU_ONLINE: + case CPU_ONLINE_FROZEN: wake_up_process(per_cpu(watchdog_task, hotcpu)); break; #ifdef CONFIG_HOTPLUG_CPU case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: if (!per_cpu(watchdog_task, hotcpu)) break; /* Unbind so it can run. Fall thru. */ kthread_bind(per_cpu(watchdog_task, hotcpu), any_online_cpu(cpu_online_map)); case CPU_DEAD: + case CPU_DEAD_FROZEN: p = per_cpu(watchdog_task, hotcpu); per_cpu(watchdog_task, hotcpu) = NULL; kthread_stop(p); diff --git a/kernel/timer.c b/kernel/timer.c index 58f6dd07c80bd..de85f8491c1d0 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1293,11 +1293,13 @@ static int __cpuinit timer_cpu_notify(struct notifier_block *self, long cpu = (long)hcpu; switch(action) { case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: if (init_timers_cpu(cpu) < 0) return NOTIFY_BAD; break; #ifdef CONFIG_HOTPLUG_CPU case CPU_DEAD: + case CPU_DEAD_FROZEN: migrate_timers(cpu); break; #endif diff --git a/kernel/workqueue.c b/kernel/workqueue.c index b976ed87dd371..fb56fedd5c027 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -799,6 +799,8 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, struct cpu_workqueue_struct *cwq; struct workqueue_struct *wq; + action &= ~CPU_TASKS_FROZEN; + switch (action) { case CPU_LOCK_ACQUIRE: mutex_lock(&workqueue_mutex); diff --git a/lib/radix-tree.c b/lib/radix-tree.c index d69ddbe438655..402eb4eb6b236 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -1004,7 +1004,7 @@ static int radix_tree_callback(struct notifier_block *nfb, struct radix_tree_preload *rtp; /* Free per-cpu pool of perloaded nodes */ - if (action == CPU_DEAD) { + if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { rtp = &per_cpu(radix_tree_preloads, cpu); while (rtp->nr) { kmem_cache_free(radix_tree_node_cachep, diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6fd0b7455b0be..d53cbf8acb8e1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2148,11 +2148,14 @@ static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb, switch (action) { case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: if (process_zones(cpu)) ret = NOTIFY_BAD; break; case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: case CPU_DEAD: + case CPU_DEAD_FROZEN: free_zone_pagesets(cpu); break; default: @@ -3012,7 +3015,7 @@ static int page_alloc_cpu_notify(struct notifier_block *self, { int cpu = (unsigned long)hcpu; - if (action == CPU_DEAD) { + if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { local_irq_disable(); __drain_pages(cpu); vm_events_fold_cpu(cpu); diff --git a/mm/slab.c b/mm/slab.c index 1a7a10de2a4d2..6f3d6e240c611 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1190,6 +1190,7 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, mutex_lock(&cache_chain_mutex); break; case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: /* * We need to do this right in the beginning since * alloc_arraycache's are going to use this list. @@ -1276,10 +1277,12 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, } break; case CPU_ONLINE: + case CPU_ONLINE_FROZEN: start_cpu_timer(cpu); break; #ifdef CONFIG_HOTPLUG_CPU case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: /* * Shutdown cache reaper. Note that the cache_chain_mutex is * held so that if cache_reap() is invoked it cannot do @@ -1291,9 +1294,11 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, per_cpu(reap_work, cpu).work.func = NULL; break; case CPU_DOWN_FAILED: + case CPU_DOWN_FAILED_FROZEN: start_cpu_timer(cpu); break; case CPU_DEAD: + case CPU_DEAD_FROZEN: /* * Even if all the cpus of a node are down, we don't free the * kmem_list3 of any cache. This to avoid a race between @@ -1305,6 +1310,7 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, /* fall thru */ #endif case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: list_for_each_entry(cachep, &cache_chain, next) { struct array_cache *nc; struct array_cache *shared; diff --git a/mm/slub.c b/mm/slub.c index f7c120b93c41f..a581fa8ae11aa 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2514,7 +2514,9 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb, switch (action) { case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: case CPU_DEAD: + case CPU_DEAD_FROZEN: for_all_slabs(__flush_cpu_slab, cpu); break; default: diff --git a/mm/swap.c b/mm/swap.c index 218c52a24a216..d3cb966fe9920 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -488,7 +488,7 @@ static int cpu_swap_callback(struct notifier_block *nfb, long *committed; committed = &per_cpu(committed_space, (long)hcpu); - if (action == CPU_DEAD) { + if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { atomic_add(*committed, &vm_committed_space); *committed = 0; __lru_add_drain((long)hcpu); diff --git a/mm/vmscan.c b/mm/vmscan.c index 1c8e75a1cfcd3..1be5a6376ef07 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1528,7 +1528,7 @@ static int __devinit cpu_callback(struct notifier_block *nfb, pg_data_t *pgdat; cpumask_t mask; - if (action == CPU_ONLINE) { + if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) { for_each_online_pgdat(pgdat) { mask = node_to_cpumask(pgdat->node_id); if (any_online_cpu(mask) != NR_CPUS) diff --git a/mm/vmstat.c b/mm/vmstat.c index 6c488d6ac425d..9a66dc4aed438 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -650,8 +650,11 @@ static int __cpuinit vmstat_cpuup_callback(struct notifier_block *nfb, { switch (action) { case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: case CPU_DEAD: + case CPU_DEAD_FROZEN: refresh_zone_stat_thresholds(); break; default: diff --git a/net/core/dev.c b/net/core/dev.c index 4317c1be4d3ff..8301e2ac747fc 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3450,7 +3450,7 @@ static int dev_cpu_callback(struct notifier_block *nfb, unsigned int cpu, oldcpu = (unsigned long)ocpu; struct softnet_data *sd, *oldsd; - if (action != CPU_DEAD) + if (action != CPU_DEAD && action != CPU_DEAD_FROZEN) return NOTIFY_OK; local_irq_disable(); diff --git a/net/core/flow.c b/net/core/flow.c index 5d25697920b1a..051430545a053 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -338,7 +338,7 @@ static int flow_cache_cpu(struct notifier_block *nfb, unsigned long action, void *hcpu) { - if (action == CPU_DEAD) + if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) __flow_cache_shrink((unsigned long)hcpu, 0); return NOTIFY_OK; } diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index fb3faf72e8508..b7333061016df 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -556,6 +556,7 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self, switch (action) { case CPU_UP_PREPARE: + case CPU_UP_PREPARE_FROZEN: if (!percpu_populate(iucv_irq_data, sizeof(struct iucv_irq_data), GFP_KERNEL|GFP_DMA, cpu)) @@ -567,15 +568,20 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self, } break; case CPU_UP_CANCELED: + case CPU_UP_CANCELED_FROZEN: case CPU_DEAD: + case CPU_DEAD_FROZEN: percpu_depopulate(iucv_param, cpu); percpu_depopulate(iucv_irq_data, cpu); break; case CPU_ONLINE: + case CPU_ONLINE_FROZEN: case CPU_DOWN_FAILED: + case CPU_DOWN_FAILED_FROZEN: smp_call_function_on(iucv_declare_cpu, NULL, 0, 1, cpu); break; case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: cpumask = iucv_buffer_cpumask; cpu_clear(cpu, cpumask); if (cpus_empty(cpumask)) -- GitLab