summaryrefslogtreecommitdiff
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/asm-offsets.c2
-rw-r--r--arch/powerpc/kernel/btext.c2
-rw-r--r--arch/powerpc/kernel/entry_32.S23
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S112
-rw-r--r--arch/powerpc/kernel/head_64.S133
-rw-r--r--arch/powerpc/kernel/head_booke.h1
-rw-r--r--arch/powerpc/kernel/idle.c10
-rw-r--r--arch/powerpc/kernel/interrupt.c2
-rw-r--r--arch/powerpc/kernel/interrupt_64.S56
-rw-r--r--arch/powerpc/kernel/iommu.c246
-rw-r--r--arch/powerpc/kernel/irq.c8
-rw-r--r--arch/powerpc/kernel/irq_64.c10
-rw-r--r--arch/powerpc/kernel/isa-bridge.c166
-rw-r--r--arch/powerpc/kernel/legacy_serial.c10
-rw-r--r--arch/powerpc/kernel/misc_64.S2
-rw-r--r--arch/powerpc/kernel/module_32.c7
-rw-r--r--arch/powerpc/kernel/module_64.c377
-rw-r--r--arch/powerpc/kernel/paca.c2
-rw-r--r--arch/powerpc/kernel/pci-common.c21
-rw-r--r--arch/powerpc/kernel/pci_64.c2
-rw-r--r--arch/powerpc/kernel/process.c126
-rw-r--r--arch/powerpc/kernel/ptrace/ptrace-view.c6
-rw-r--r--arch/powerpc/kernel/rtas.c54
-rw-r--r--arch/powerpc/kernel/setup-common.c13
-rw-r--r--arch/powerpc/kernel/setup_64.c2
-rw-r--r--arch/powerpc/kernel/smp.c10
-rw-r--r--arch/powerpc/kernel/sysfs.c14
-rw-r--r--arch/powerpc/kernel/time.c6
-rw-r--r--arch/powerpc/kernel/trace/ftrace.c50
-rw-r--r--arch/powerpc/kernel/vdso/Makefile2
-rw-r--r--arch/powerpc/kernel/vdso/gettimeofday.S6
-rw-r--r--arch/powerpc/kernel/vector.S6
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S6
33 files changed, 1054 insertions, 439 deletions
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index d24a59a98c0c..9f14d95b8b32 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -185,7 +185,9 @@ int main(void)
offsetof(struct task_struct, thread_info));
OFFSET(PACASAVEDMSR, paca_struct, saved_msr);
OFFSET(PACAR1, paca_struct, saved_r1);
+#ifndef CONFIG_PPC_KERNEL_PCREL
OFFSET(PACATOC, paca_struct, kernel_toc);
+#endif
OFFSET(PACAKBASE, paca_struct, kernelbase);
OFFSET(PACAKMSR, paca_struct, kernel_msr);
#ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c
index 2769889219bf..19e46fd623b0 100644
--- a/arch/powerpc/kernel/btext.c
+++ b/arch/powerpc/kernel/btext.c
@@ -235,7 +235,7 @@ int __init btext_find_display(int allow_nonstdout)
return rc;
for_each_node_by_type(np, "display") {
- if (of_get_property(np, "linux,opened", NULL)) {
+ if (of_property_read_bool(np, "linux,opened")) {
printk("trying %pOF ...\n", np);
rc = btext_initialize(np);
printk("result: %d\n", rc);
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 5604c9a1ac22..47f0dd9a45ad 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -183,12 +183,11 @@ syscall_exit_finish:
ret_from_fork:
REST_NVGPRS(r1)
bl schedule_tail
- li r3,0
+ li r3,0 /* fork() return value */
b ret_from_syscall
- .globl ret_from_kernel_thread
-ret_from_kernel_thread:
- REST_NVGPRS(r1)
+ .globl ret_from_kernel_user_thread
+ret_from_kernel_user_thread:
bl schedule_tail
mtctr r14
mr r3,r15
@@ -197,6 +196,22 @@ ret_from_kernel_thread:
li r3,0
b ret_from_syscall
+ .globl start_kernel_thread
+start_kernel_thread:
+ bl schedule_tail
+ mtctr r14
+ mr r3,r15
+ PPC440EP_ERR42
+ bctrl
+ /*
+ * This must not return. We actually want to BUG here, not WARN,
+ * because BUG will exit the process which is what the kernel thread
+ * should have done, which may give some hope of continuing.
+ */
+100: trap
+ EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,0
+
+
/*
* This routine switches between two different tasks. The process
* state of one is saved on its kernel stack. Then the state
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 6441a1ba57ac..c33c8ebf8641 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1075,7 +1075,7 @@ EXC_COMMON_BEGIN(system_reset_common)
__GEN_COMMON_BODY system_reset
addi r3,r1,STACK_INT_FRAME_REGS
- bl system_reset_exception
+ bl CFUNC(system_reset_exception)
/* Clear MSR_RI before setting SRR0 and SRR1. */
li r9,0
@@ -1223,9 +1223,9 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
addi r3,r1,STACK_INT_FRAME_REGS
BEGIN_FTR_SECTION
- bl machine_check_early_boot
+ bl CFUNC(machine_check_early_boot)
END_FTR_SECTION(0, 1) // nop out after boot
- bl machine_check_early
+ bl CFUNC(machine_check_early)
std r3,RESULT(r1) /* Save result */
ld r12,_MSR(r1)
@@ -1286,7 +1286,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
* Queue up the MCE event so that we can log it later, while
* returning from kernel or opal call.
*/
- bl machine_check_queue_event
+ bl CFUNC(machine_check_queue_event)
MACHINE_CHECK_HANDLER_WINDUP
RFI_TO_KERNEL
@@ -1312,7 +1312,7 @@ EXC_COMMON_BEGIN(machine_check_common)
*/
GEN_COMMON machine_check
addi r3,r1,STACK_INT_FRAME_REGS
- bl machine_check_exception_async
+ bl CFUNC(machine_check_exception_async)
b interrupt_return_srr
@@ -1322,7 +1322,7 @@ EXC_COMMON_BEGIN(machine_check_common)
* done. Queue the event then call the idle code to do the wake up.
*/
EXC_COMMON_BEGIN(machine_check_idle_common)
- bl machine_check_queue_event
+ bl CFUNC(machine_check_queue_event)
/*
* GPR-loss wakeups are relatively straightforward, because the
@@ -1361,7 +1361,7 @@ EXC_COMMON_BEGIN(unrecoverable_mce)
BEGIN_FTR_SECTION
li r10,0 /* clear MSR_RI */
mtmsrd r10,1
- bl disable_machine_check
+ bl CFUNC(disable_machine_check)
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
ld r10,PACAKMSR(r13)
li r3,MSR_ME
@@ -1378,14 +1378,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
* the early handler which is a true NMI.
*/
addi r3,r1,STACK_INT_FRAME_REGS
- bl machine_check_exception
+ bl CFUNC(machine_check_exception)
/*
* We will not reach here. Even if we did, there is no way out.
* Call unrecoverable_exception and die.
*/
addi r3,r1,STACK_INT_FRAME_REGS
- bl unrecoverable_exception
+ bl CFUNC(unrecoverable_exception)
b .
@@ -1440,16 +1440,16 @@ EXC_COMMON_BEGIN(data_access_common)
bne- 1f
#ifdef CONFIG_PPC_64S_HASH_MMU
BEGIN_MMU_FTR_SECTION
- bl do_hash_fault
+ bl CFUNC(do_hash_fault)
MMU_FTR_SECTION_ELSE
- bl do_page_fault
+ bl CFUNC(do_page_fault)
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
#else
- bl do_page_fault
+ bl CFUNC(do_page_fault)
#endif
b interrupt_return_srr
-1: bl do_break
+1: bl CFUNC(do_break)
/*
* do_break() may have changed the NV GPRS while handling a breakpoint.
* If so, we need to restore them with their updated values.
@@ -1493,7 +1493,7 @@ EXC_COMMON_BEGIN(data_access_slb_common)
BEGIN_MMU_FTR_SECTION
/* HPT case, do SLB fault */
addi r3,r1,STACK_INT_FRAME_REGS
- bl do_slb_fault
+ bl CFUNC(do_slb_fault)
cmpdi r3,0
bne- 1f
b fast_interrupt_return_srr
@@ -1507,7 +1507,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
#endif
std r3,RESULT(r1)
addi r3,r1,STACK_INT_FRAME_REGS
- bl do_bad_segment_interrupt
+ bl CFUNC(do_bad_segment_interrupt)
b interrupt_return_srr
@@ -1541,12 +1541,12 @@ EXC_COMMON_BEGIN(instruction_access_common)
addi r3,r1,STACK_INT_FRAME_REGS
#ifdef CONFIG_PPC_64S_HASH_MMU
BEGIN_MMU_FTR_SECTION
- bl do_hash_fault
+ bl CFUNC(do_hash_fault)
MMU_FTR_SECTION_ELSE
- bl do_page_fault
+ bl CFUNC(do_page_fault)
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
#else
- bl do_page_fault
+ bl CFUNC(do_page_fault)
#endif
b interrupt_return_srr
@@ -1581,7 +1581,7 @@ EXC_COMMON_BEGIN(instruction_access_slb_common)
BEGIN_MMU_FTR_SECTION
/* HPT case, do SLB fault */
addi r3,r1,STACK_INT_FRAME_REGS
- bl do_slb_fault
+ bl CFUNC(do_slb_fault)
cmpdi r3,0
bne- 1f
b fast_interrupt_return_srr
@@ -1595,7 +1595,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
#endif
std r3,RESULT(r1)
addi r3,r1,STACK_INT_FRAME_REGS
- bl do_bad_segment_interrupt
+ bl CFUNC(do_bad_segment_interrupt)
b interrupt_return_srr
@@ -1649,7 +1649,7 @@ EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100)
EXC_COMMON_BEGIN(hardware_interrupt_common)
GEN_COMMON hardware_interrupt
addi r3,r1,STACK_INT_FRAME_REGS
- bl do_IRQ
+ bl CFUNC(do_IRQ)
BEGIN_FTR_SECTION
b interrupt_return_hsrr
FTR_SECTION_ELSE
@@ -1679,7 +1679,7 @@ EXC_VIRT_END(alignment, 0x4600, 0x100)
EXC_COMMON_BEGIN(alignment_common)
GEN_COMMON alignment
addi r3,r1,STACK_INT_FRAME_REGS
- bl alignment_exception
+ bl CFUNC(alignment_exception)
HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */
b interrupt_return_srr
@@ -1745,7 +1745,7 @@ EXC_COMMON_BEGIN(program_check_common)
.Ldo_program_check:
addi r3,r1,STACK_INT_FRAME_REGS
- bl program_check_exception
+ bl CFUNC(program_check_exception)
HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */
b interrupt_return_srr
@@ -1777,7 +1777,7 @@ EXC_COMMON_BEGIN(fp_unavailable_common)
GEN_COMMON fp_unavailable
bne 1f /* if from user, just load it up */
addi r3,r1,STACK_INT_FRAME_REGS
- bl kernel_fp_unavailable_exception
+ bl CFUNC(kernel_fp_unavailable_exception)
0: trap
EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0
1:
@@ -1790,12 +1790,12 @@ BEGIN_FTR_SECTION
bne- 2f
END_FTR_SECTION_IFSET(CPU_FTR_TM)
#endif
- bl load_up_fpu
+ bl CFUNC(load_up_fpu)
b fast_interrupt_return_srr
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2: /* User process was in a transaction */
addi r3,r1,STACK_INT_FRAME_REGS
- bl fp_unavailable_tm
+ bl CFUNC(fp_unavailable_tm)
b interrupt_return_srr
#endif
@@ -1839,7 +1839,7 @@ EXC_VIRT_END(decrementer, 0x4900, 0x80)
EXC_COMMON_BEGIN(decrementer_common)
GEN_COMMON decrementer
addi r3,r1,STACK_INT_FRAME_REGS
- bl timer_interrupt
+ bl CFUNC(timer_interrupt)
b interrupt_return_srr
@@ -1925,9 +1925,9 @@ EXC_COMMON_BEGIN(doorbell_super_common)
GEN_COMMON doorbell_super
addi r3,r1,STACK_INT_FRAME_REGS
#ifdef CONFIG_PPC_DOORBELL
- bl doorbell_exception
+ bl CFUNC(doorbell_exception)
#else
- bl unknown_async_exception
+ bl CFUNC(unknown_async_exception)
#endif
b interrupt_return_srr
@@ -2091,7 +2091,7 @@ EXC_VIRT_END(single_step, 0x4d00, 0x100)
EXC_COMMON_BEGIN(single_step_common)
GEN_COMMON single_step
addi r3,r1,STACK_INT_FRAME_REGS
- bl single_step_exception
+ bl CFUNC(single_step_exception)
b interrupt_return_srr
@@ -2126,9 +2126,9 @@ EXC_COMMON_BEGIN(h_data_storage_common)
GEN_COMMON h_data_storage
addi r3,r1,STACK_INT_FRAME_REGS
BEGIN_MMU_FTR_SECTION
- bl do_bad_page_fault_segv
+ bl CFUNC(do_bad_page_fault_segv)
MMU_FTR_SECTION_ELSE
- bl unknown_exception
+ bl CFUNC(unknown_exception)
ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX)
b interrupt_return_hsrr
@@ -2154,7 +2154,7 @@ EXC_VIRT_END(h_instr_storage, 0x4e20, 0x20)
EXC_COMMON_BEGIN(h_instr_storage_common)
GEN_COMMON h_instr_storage
addi r3,r1,STACK_INT_FRAME_REGS
- bl unknown_exception
+ bl CFUNC(unknown_exception)
b interrupt_return_hsrr
@@ -2177,7 +2177,7 @@ EXC_VIRT_END(emulation_assist, 0x4e40, 0x20)
EXC_COMMON_BEGIN(emulation_assist_common)
GEN_COMMON emulation_assist
addi r3,r1,STACK_INT_FRAME_REGS
- bl emulation_assist_interrupt
+ bl CFUNC(emulation_assist_interrupt)
HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */
b interrupt_return_hsrr
@@ -2237,7 +2237,7 @@ EXC_COMMON_BEGIN(hmi_exception_early_common)
__GEN_COMMON_BODY hmi_exception_early
addi r3,r1,STACK_INT_FRAME_REGS
- bl hmi_exception_realmode
+ bl CFUNC(hmi_exception_realmode)
cmpdi cr0,r3,0
bne 1f
@@ -2255,7 +2255,7 @@ EXC_COMMON_BEGIN(hmi_exception_early_common)
EXC_COMMON_BEGIN(hmi_exception_common)
GEN_COMMON hmi_exception
addi r3,r1,STACK_INT_FRAME_REGS
- bl handle_hmi_exception
+ bl CFUNC(handle_hmi_exception)
b interrupt_return_hsrr
@@ -2290,9 +2290,9 @@ EXC_COMMON_BEGIN(h_doorbell_common)
GEN_COMMON h_doorbell
addi r3,r1,STACK_INT_FRAME_REGS
#ifdef CONFIG_PPC_DOORBELL
- bl doorbell_exception
+ bl CFUNC(doorbell_exception)
#else
- bl unknown_async_exception
+ bl CFUNC(unknown_async_exception)
#endif
b interrupt_return_hsrr
@@ -2325,7 +2325,7 @@ EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20)
EXC_COMMON_BEGIN(h_virt_irq_common)
GEN_COMMON h_virt_irq
addi r3,r1,STACK_INT_FRAME_REGS
- bl do_IRQ
+ bl CFUNC(do_IRQ)
b interrupt_return_hsrr
@@ -2374,10 +2374,10 @@ EXC_COMMON_BEGIN(performance_monitor_common)
lbz r4,PACAIRQSOFTMASK(r13)
cmpdi r4,IRQS_ENABLED
bne 1f
- bl performance_monitor_exception_async
+ bl CFUNC(performance_monitor_exception_async)
b interrupt_return_srr
1:
- bl performance_monitor_exception_nmi
+ bl CFUNC(performance_monitor_exception_nmi)
/* Clear MSR_RI before setting SRR0 and SRR1. */
li r9,0
mtmsrd r9,1
@@ -2421,19 +2421,19 @@ BEGIN_FTR_SECTION
bne- 2f
END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69)
#endif
- bl load_up_altivec
+ bl CFUNC(load_up_altivec)
b fast_interrupt_return_srr
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2: /* User process was in a transaction */
addi r3,r1,STACK_INT_FRAME_REGS
- bl altivec_unavailable_tm
+ bl CFUNC(altivec_unavailable_tm)
b interrupt_return_srr
#endif
1:
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#endif
addi r3,r1,STACK_INT_FRAME_REGS
- bl altivec_unavailable_exception
+ bl CFUNC(altivec_unavailable_exception)
b interrupt_return_srr
@@ -2475,14 +2475,14 @@ BEGIN_FTR_SECTION
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
2: /* User process was in a transaction */
addi r3,r1,STACK_INT_FRAME_REGS
- bl vsx_unavailable_tm
+ bl CFUNC(vsx_unavailable_tm)
b interrupt_return_srr
#endif
1:
END_FTR_SECTION_IFSET(CPU_FTR_VSX)
#endif
addi r3,r1,STACK_INT_FRAME_REGS
- bl vsx_unavailable_exception
+ bl CFUNC(vsx_unavailable_exception)
b interrupt_return_srr
@@ -2509,7 +2509,7 @@ EXC_VIRT_END(facility_unavailable, 0x4f60, 0x20)
EXC_COMMON_BEGIN(facility_unavailable_common)
GEN_COMMON facility_unavailable
addi r3,r1,STACK_INT_FRAME_REGS
- bl facility_unavailable_exception
+ bl CFUNC(facility_unavailable_exception)
HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */
b interrupt_return_srr
@@ -2537,7 +2537,7 @@ EXC_VIRT_END(h_facility_unavailable, 0x4f80, 0x20)
EXC_COMMON_BEGIN(h_facility_unavailable_common)
GEN_COMMON h_facility_unavailable
addi r3,r1,STACK_INT_FRAME_REGS
- bl facility_unavailable_exception
+ bl CFUNC(facility_unavailable_exception)
/* XXX Shouldn't be necessary in practice */
HANDLER_RESTORE_NVGPRS()
b interrupt_return_hsrr
@@ -2568,7 +2568,7 @@ EXC_VIRT_NONE(0x5200, 0x100)
EXC_COMMON_BEGIN(cbe_system_error_common)
GEN_COMMON cbe_system_error
addi r3,r1,STACK_INT_FRAME_REGS
- bl cbe_system_error_exception
+ bl CFUNC(cbe_system_error_exception)
b interrupt_return_hsrr
#else /* CONFIG_CBE_RAS */
@@ -2599,7 +2599,7 @@ EXC_VIRT_END(instruction_breakpoint, 0x5300, 0x100)
EXC_COMMON_BEGIN(instruction_breakpoint_common)
GEN_COMMON instruction_breakpoint
addi r3,r1,STACK_INT_FRAME_REGS
- bl instruction_breakpoint_exception
+ bl CFUNC(instruction_breakpoint_exception)
b interrupt_return_srr
@@ -2721,7 +2721,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
EXC_COMMON_BEGIN(denorm_exception_common)
GEN_COMMON denorm_exception
addi r3,r1,STACK_INT_FRAME_REGS
- bl unknown_exception
+ bl CFUNC(unknown_exception)
b interrupt_return_hsrr
@@ -2738,7 +2738,7 @@ EXC_VIRT_NONE(0x5600, 0x100)
EXC_COMMON_BEGIN(cbe_maintenance_common)
GEN_COMMON cbe_maintenance
addi r3,r1,STACK_INT_FRAME_REGS
- bl cbe_maintenance_exception
+ bl CFUNC(cbe_maintenance_exception)
b interrupt_return_hsrr
#else /* CONFIG_CBE_RAS */
@@ -2764,10 +2764,10 @@ EXC_COMMON_BEGIN(altivec_assist_common)
GEN_COMMON altivec_assist
addi r3,r1,STACK_INT_FRAME_REGS
#ifdef CONFIG_ALTIVEC
- bl altivec_assist_exception
+ bl CFUNC(altivec_assist_exception)
HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */
#else
- bl unknown_exception
+ bl CFUNC(unknown_exception)
#endif
b interrupt_return_srr
@@ -2785,7 +2785,7 @@ EXC_VIRT_NONE(0x5800, 0x100)
EXC_COMMON_BEGIN(cbe_thermal_common)
GEN_COMMON cbe_thermal
addi r3,r1,STACK_INT_FRAME_REGS
- bl cbe_thermal_exception
+ bl CFUNC(cbe_thermal_exception)
b interrupt_return_hsrr
#else /* CONFIG_CBE_RAS */
@@ -2818,7 +2818,7 @@ EXC_COMMON_BEGIN(soft_nmi_common)
__GEN_COMMON_BODY soft_nmi
addi r3,r1,STACK_INT_FRAME_REGS
- bl soft_nmi_interrupt
+ bl CFUNC(soft_nmi_interrupt)
/* Clear MSR_RI before setting SRR0 and SRR1. */
li r9,0
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 1febb56ebaeb..f132d8704263 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -76,6 +76,13 @@
* 2. The kernel is entered at __start
*/
+/*
+ * boot_from_prom and prom_init run at the physical address. Everything
+ * after prom and kexec entry run at the virtual address (PAGE_OFFSET).
+ * Secondaries run at the virtual address from generic_secondary_common_init
+ * onward.
+ */
+
OPEN_FIXED_SECTION(first_256B, 0x0, 0x100)
USE_FIXED_SECTION(first_256B)
/*
@@ -303,13 +310,11 @@ _GLOBAL(fsl_secondary_thread_init)
/* turn on 64-bit mode */
bl enable_64b_mode
- /* get a valid TOC pointer, wherever we're mapped at */
- bl relative_toc
- tovirt(r2,r2)
-
/* Book3E initialization */
mr r3,r24
bl book3e_secondary_thread_init
+ bl relative_toc
+
b generic_secondary_common_init
#endif /* CONFIG_PPC_BOOK3E_64 */
@@ -325,22 +330,24 @@ _GLOBAL(fsl_secondary_thread_init)
*/
_GLOBAL(generic_secondary_smp_init)
FIXUP_ENDIAN
+
+ li r13,0
+
+ /* Poison TOC */
+ li r2,-1
+
mr r24,r3
mr r25,r4
/* turn on 64-bit mode */
bl enable_64b_mode
- /* get a valid TOC pointer, wherever we're mapped at */
- bl relative_toc
- tovirt(r2,r2)
-
#ifdef CONFIG_PPC_BOOK3E_64
/* Book3E initialization */
mr r3,r24
mr r4,r25
bl book3e_secondary_core_init
-
+ /* Now NIA and r2 are relocated to PAGE_OFFSET if not already */
/*
* After common core init has finished, check if the current thread is the
* one we wanted to boot. If not, start the specified thread and stop the
@@ -378,6 +385,16 @@ _GLOBAL(generic_secondary_smp_init)
10:
b 10b
20:
+#else
+ /* Now the MMU is off, can branch to our PAGE_OFFSET address */
+ bcl 20,31,$+4
+1: mflr r11
+ addi r11,r11,(2f - 1b)
+ tovirt(r11, r11)
+ mtctr r11
+ bctr
+2:
+ bl relative_toc
#endif
generic_secondary_common_init:
@@ -492,6 +509,8 @@ SYM_FUNC_START_LOCAL(start_initialization_book3s)
/* Switch off MMU if not already off */
bl __mmu_off
+ /* Now the MMU is off, can return to our PAGE_OFFSET address */
+ tovirt(r25,r25)
mtlr r25
blr
SYM_FUNC_END(start_initialization_book3s)
@@ -515,14 +534,8 @@ __start_initialization_multiplatform:
/* Zero r13 (paca) so early program check / mce don't use it */
li r13,0
- /* Get TOC pointer (current runtime address) */
- bl relative_toc
-
- /* find out where we are now */
- bcl 20,31,$+4
-0: mflr r26 /* r26 = runtime addr here */
- addis r26,r26,(_stext - 0b)@ha
- addi r26,r26,(_stext - 0b)@l /* current runtime base addr */
+ /* Poison TOC */
+ li r2,-1
/*
* Are we booted from a PROM Of-type client-interface ?
@@ -540,16 +553,41 @@ __start_initialization_multiplatform:
mr r29,r9
#endif
+ /* Get TOC pointer (current runtime address) */
+ bl relative_toc
+
+ /* These functions return to the virtual (PAGE_OFFSET) address */
#ifdef CONFIG_PPC_BOOK3E_64
bl start_initialization_book3e
#else
bl start_initialization_book3s
#endif /* CONFIG_PPC_BOOK3E_64 */
+
+ /* Get TOC pointer, virtual */
+ bl relative_toc
+
+ /* find out where we are now */
+
+ /* OPAL doesn't pass base address in r4, have to derive it. */
+ bcl 20,31,$+4
+0: mflr r26 /* r26 = runtime addr here */
+ addis r26,r26,(_stext - 0b)@ha
+ addi r26,r26,(_stext - 0b)@l /* current runtime base addr */
+
b __after_prom_start
__REF
__boot_from_prom:
#ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE
+ /* Get TOC pointer, non-virtual */
+ bl relative_toc
+
+ /* find out where we are now */
+ bcl 20,31,$+4
+0: mflr r26 /* r26 = runtime addr here */
+ addis r26,r26,(_stext - 0b)@ha
+ addi r26,r26,(_stext - 0b)@l /* current runtime base addr */
+
/* Save parameters */
mr r31,r3
mr r30,r4
@@ -579,7 +617,7 @@ __boot_from_prom:
/* Do all of the interaction with OF client interface */
mr r8,r26
- bl prom_init
+ bl CFUNC(prom_init)
#endif /* #CONFIG_PPC_OF_BOOT_TRAMPOLINE */
/* We never return. We also hit that trap if trying to boot
@@ -590,18 +628,11 @@ __boot_from_prom:
__after_prom_start:
#ifdef CONFIG_RELOCATABLE
/* process relocations for the final address of the kernel */
- lis r25,PAGE_OFFSET@highest /* compute virtual base of kernel */
- sldi r25,r25,32
-#if defined(CONFIG_PPC_BOOK3E_64)
- tovirt(r26,r26) /* on booke, we already run at PAGE_OFFSET */
-#endif
lwz r7,(FIXED_SYMBOL_ABS_ADDR(__run_at_load))(r26)
-#if defined(CONFIG_PPC_BOOK3E_64)
- tophys(r26,r26)
-#endif
cmplwi cr0,r7,1 /* flagged to stay where we are ? */
- bne 1f
- add r25,r25,r26
+ mr r25,r26 /* then use current kernel base */
+ beq 1f
+ LOAD_REG_IMMEDIATE(r25, PAGE_OFFSET) /* else use static kernel base */
1: mr r3,r25
bl relocate
#if defined(CONFIG_PPC_BOOK3E_64)
@@ -617,14 +648,8 @@ __after_prom_start:
*
* Note: This process overwrites the OF exception vectors.
*/
- li r3,0 /* target addr */
-#ifdef CONFIG_PPC_BOOK3E_64
- tovirt(r3,r3) /* on booke, we already run at PAGE_OFFSET */
-#endif
+ LOAD_REG_IMMEDIATE(r3, PAGE_OFFSET)
mr. r4,r26 /* In some cases the loader may */
-#if defined(CONFIG_PPC_BOOK3E_64)
- tovirt(r4,r4)
-#endif
beq 9f /* have already put us at zero */
li r6,0x100 /* Start offset, the first 0x100 */
/* bytes were copied earlier. */
@@ -635,9 +660,6 @@ __after_prom_start:
* variable __run_at_load, if it is set the kernel is treated as relocatable
* kernel, otherwise it will be moved to PHYSICAL_START
*/
-#if defined(CONFIG_PPC_BOOK3E_64)
- tovirt(r26,r26) /* on booke, we already run at PAGE_OFFSET */
-#endif
lwz r7,(FIXED_SYMBOL_ABS_ADDR(__run_at_load))(r26)
cmplwi cr0,r7,1
bne 3f
@@ -756,9 +778,15 @@ _GLOBAL(pmac_secondary_start)
sync
slbia
- /* get TOC pointer (real address) */
+ /* Branch to our PAGE_OFFSET address */
+ bcl 20,31,$+4
+1: mflr r11
+ addi r11,r11,(2f - 1b)
+ tovirt(r11, r11)
+ mtctr r11
+ bctr
+2:
bl relative_toc
- tovirt(r2,r2)
/* Copy some CPU settings from CPU 0 */
bl __restore_cpu_ppc970
@@ -817,7 +845,7 @@ __secondary_start:
* can turn it on below. This is a call to C, which is OK, we're still
* running on the emergency stack.
*/
- bl early_setup_secondary
+ bl CFUNC(early_setup_secondary)
/*
* The primary has initialized our kernel stack for us in the paca, grab
@@ -856,7 +884,7 @@ start_secondary_prolog:
LOAD_PACA_TOC()
li r3,0
std r3,0(r1) /* Zero the stack frame pointer */
- bl start_secondary
+ bl CFUNC(start_secondary)
b .
/*
* Reset stack pointer and call start_secondary
@@ -867,7 +895,7 @@ _GLOBAL(start_secondary_resume)
ld r1,PACAKSAVE(r13) /* Reload kernel stack pointer */
li r3,0
std r3,0(r1) /* Zero the stack frame pointer */
- bl start_secondary
+ bl CFUNC(start_secondary)
b .
#endif
@@ -897,10 +925,15 @@ SYM_FUNC_END(enable_64b_mode)
* TOC in -mcmodel=medium mode. After we relocate to 0 but before
* the MMU is on we need our TOC to be a virtual address otherwise
* these pointers will be real addresses which may get stored and
- * accessed later with the MMU on. We use tovirt() at the call
- * sites to handle this.
+ * accessed later with the MMU on. We branch to the virtual address
+ * while still in real mode then call relative_toc again to handle
+ * this.
*/
_GLOBAL(relative_toc)
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ tdnei r2,-1
+ blr
+#else
mflr r0
bcl 20,31,$+4
0: mflr r11
@@ -911,15 +944,15 @@ _GLOBAL(relative_toc)
.balign 8
p_toc: .8byte .TOC. - 0b
+#endif
/*
* This is where the main kernel code starts.
*/
__REF
start_here_multiplatform:
- /* set up the TOC */
- bl relative_toc
- tovirt(r2,r2)
+ /* Adjust TOC for moved kernel. Could adjust when moving it instead. */
+ bl relative_toc
/* Clear out the BSS. It may have been done in prom_init,
* already but that's irrelevant since prom_init will soon
@@ -972,7 +1005,7 @@ start_here_multiplatform:
*/
#ifdef CONFIG_KASAN
- bl kasan_early_init
+ bl CFUNC(kasan_early_init)
#endif
/* Restore parameters passed from prom_init/kexec */
mr r3,r31
@@ -1005,7 +1038,7 @@ start_here_common:
stb r0,PACAIRQHAPPENED(r13)
/* Generic kernel entry */
- bl start_kernel
+ bl CFUNC(start_kernel)
/* Not reached */
0: trap
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 37d43c172676..b6b5b01a173c 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -5,6 +5,7 @@
#include <asm/ptrace.h> /* for STACK_FRAME_REGS_MARKER */
#include <asm/kvm_asm.h>
#include <asm/kvm_booke_hv_asm.h>
+#include <asm/thread_info.h> /* for THREAD_SHIFT */
#ifdef __ASSEMBLY__
diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c
index b9a725abc596..b1c0418b25c8 100644
--- a/arch/powerpc/kernel/idle.c
+++ b/arch/powerpc/kernel/idle.c
@@ -107,19 +107,11 @@ static struct ctl_table powersave_nap_ctl_table[] = {
},
{}
};
-static struct ctl_table powersave_nap_sysctl_root[] = {
- {
- .procname = "kernel",
- .mode = 0555,
- .child = powersave_nap_ctl_table,
- },
- {}
-};
static int __init
register_powersave_nap_sysctl(void)
{
- register_sysctl_table(powersave_nap_sysctl_root);
+ register_sysctl("kernel", powersave_nap_ctl_table);
return 0;
}
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index 0ec1581619db..e34c72285b4e 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -95,7 +95,7 @@ static notrace void booke_load_dbcr0(void)
#endif
}
-static void check_return_regs_valid(struct pt_regs *regs)
+static notrace void check_return_regs_valid(struct pt_regs *regs)
{
#ifdef CONFIG_PPC_BOOK3S_64
unsigned long trap, srr0, srr1;
diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S
index fccc34489add..bd863702d812 100644
--- a/arch/powerpc/kernel/interrupt_64.S
+++ b/arch/powerpc/kernel/interrupt_64.S
@@ -101,12 +101,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
* state of kernel code.
*/
SANITIZE_SYSCALL_GPRS()
- bl system_call_exception
+ bl CFUNC(system_call_exception)
.Lsyscall_vectored_\name\()_exit:
addi r4,r1,STACK_INT_FRAME_REGS
li r5,1 /* scv */
- bl syscall_exit_prepare
+ bl CFUNC(syscall_exit_prepare)
std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
.Lsyscall_vectored_\name\()_rst_start:
lbz r11,PACAIRQHAPPENED(r13)
@@ -185,7 +185,7 @@ _ASM_NOKPROBE_SYMBOL(syscall_vectored_\name\()_restart)
addi r4,r1,STACK_INT_FRAME_REGS
li r11,IRQS_ALL_DISABLED
stb r11,PACAIRQSOFTMASK(r13)
- bl syscall_exit_restart
+ bl CFUNC(syscall_exit_restart)
std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
b .Lsyscall_vectored_\name\()_rst_start
1:
@@ -286,12 +286,12 @@ END_BTB_FLUSH_SECTION
* state of kernel code.
*/
SANITIZE_SYSCALL_GPRS()
- bl system_call_exception
+ bl CFUNC(system_call_exception)
.Lsyscall_exit:
addi r4,r1,STACK_INT_FRAME_REGS
li r5,0 /* !scv */
- bl syscall_exit_prepare
+ bl CFUNC(syscall_exit_prepare)
std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
#ifdef CONFIG_PPC_BOOK3S
.Lsyscall_rst_start:
@@ -372,7 +372,7 @@ _ASM_NOKPROBE_SYMBOL(syscall_restart)
addi r4,r1,STACK_INT_FRAME_REGS
li r11,IRQS_ALL_DISABLED
stb r11,PACAIRQSOFTMASK(r13)
- bl syscall_exit_restart
+ bl CFUNC(syscall_exit_restart)
std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
b .Lsyscall_rst_start
1:
@@ -401,7 +401,7 @@ _ASM_NOKPROBE_SYMBOL(fast_interrupt_return_srr)
li r3,0 /* 0 return value, no EMULATE_STACK_STORE */
bne+ .Lfast_kernel_interrupt_return_srr
addi r3,r1,STACK_INT_FRAME_REGS
- bl unrecoverable_exception
+ bl CFUNC(unrecoverable_exception)
b . /* should not get here */
#else
bne .Lfast_user_interrupt_return_srr
@@ -419,7 +419,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\())
interrupt_return_\srr\()_user: /* make backtraces match the _kernel variant */
_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user)
addi r3,r1,STACK_INT_FRAME_REGS
- bl interrupt_exit_user_prepare
+ bl CFUNC(interrupt_exit_user_prepare)
#ifndef CONFIG_INTERRUPT_SANITIZE_REGISTERS
cmpdi r3,0
bne- .Lrestore_nvgprs_\srr
@@ -523,7 +523,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user_restart)
addi r3,r1,STACK_INT_FRAME_REGS
li r11,IRQS_ALL_DISABLED
stb r11,PACAIRQSOFTMASK(r13)
- bl interrupt_exit_user_restart
+ bl CFUNC(interrupt_exit_user_restart)
std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
b .Linterrupt_return_\srr\()_user_rst_start
1:
@@ -536,7 +536,7 @@ RESTART_TABLE(.Linterrupt_return_\srr\()_user_rst_start, .Linterrupt_return_\srr
interrupt_return_\srr\()_kernel:
_ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel)
addi r3,r1,STACK_INT_FRAME_REGS
- bl interrupt_exit_kernel_prepare
+ bl CFUNC(interrupt_exit_kernel_prepare)
std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
.Linterrupt_return_\srr\()_kernel_rst_start:
@@ -705,7 +705,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel_restart)
addi r3,r1,STACK_INT_FRAME_REGS
li r11,IRQS_ALL_DISABLED
stb r11,PACAIRQSOFTMASK(r13)
- bl interrupt_exit_kernel_restart
+ bl CFUNC(interrupt_exit_kernel_restart)
std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
b .Linterrupt_return_\srr\()_kernel_rst_start
1:
@@ -727,21 +727,20 @@ DEFINE_FIXED_SYMBOL(__end_soft_masked, text)
#ifdef CONFIG_PPC_BOOK3S
_GLOBAL(ret_from_fork_scv)
- bl schedule_tail
- REST_NVGPRS(r1)
+ bl CFUNC(schedule_tail)
+ HANDLER_RESTORE_NVGPRS()
li r3,0 /* fork() return value */
b .Lsyscall_vectored_common_exit
#endif
_GLOBAL(ret_from_fork)
- bl schedule_tail
- REST_NVGPRS(r1)
+ bl CFUNC(schedule_tail)
+ HANDLER_RESTORE_NVGPRS()
li r3,0 /* fork() return value */
b .Lsyscall_exit
-_GLOBAL(ret_from_kernel_thread)
- bl schedule_tail
- REST_NVGPRS(r1)
+_GLOBAL(ret_from_kernel_user_thread)
+ bl CFUNC(schedule_tail)
mtctr r14
mr r3,r15
#ifdef CONFIG_PPC64_ELF_ABI_V2
@@ -749,4 +748,25 @@ _GLOBAL(ret_from_kernel_thread)
#endif
bctrl
li r3,0
+ /*
+ * It does not matter whether this returns via the scv or sc path
+ * because it returns as execve() and therefore has no calling ABI
+ * (i.e., it sets registers according to the exec()ed entry point).
+ */
b .Lsyscall_exit
+
+_GLOBAL(start_kernel_thread)
+ bl CFUNC(schedule_tail)
+ mtctr r14
+ mr r3,r15
+#ifdef CONFIG_PPC64_ELF_ABI_V2
+ mr r12,r14
+#endif
+ bctrl
+ /*
+ * This must not return. We actually want to BUG here, not WARN,
+ * because BUG will exit the process which is what the kernel thread
+ * should have done, which may give some hope of continuing.
+ */
+100: trap
+ EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,0
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index ee95937bdaf1..0089dd49b4cb 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -35,6 +35,7 @@
#include <asm/vio.h>
#include <asm/tce.h>
#include <asm/mmu_context.h>
+#include <asm/ppc-pci.h>
#define DBG(...)
@@ -1086,7 +1087,7 @@ void iommu_tce_kill(struct iommu_table *tbl,
}
EXPORT_SYMBOL_GPL(iommu_tce_kill);
-int iommu_take_ownership(struct iommu_table *tbl)
+static int iommu_take_ownership(struct iommu_table *tbl)
{
unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
int ret = 0;
@@ -1118,9 +1119,8 @@ int iommu_take_ownership(struct iommu_table *tbl)
return ret;
}
-EXPORT_SYMBOL_GPL(iommu_take_ownership);
-void iommu_release_ownership(struct iommu_table *tbl)
+static void iommu_release_ownership(struct iommu_table *tbl)
{
unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
@@ -1137,7 +1137,6 @@ void iommu_release_ownership(struct iommu_table *tbl)
spin_unlock(&tbl->pools[i].lock);
spin_unlock_irqrestore(&tbl->large_pool.lock, flags);
}
-EXPORT_SYMBOL_GPL(iommu_release_ownership);
int iommu_add_device(struct iommu_table_group *table_group, struct device *dev)
{
@@ -1158,8 +1157,14 @@ int iommu_add_device(struct iommu_table_group *table_group, struct device *dev)
pr_debug("%s: Adding %s to iommu group %d\n",
__func__, dev_name(dev), iommu_group_id(table_group->group));
-
- return iommu_group_add_device(table_group->group, dev);
+ /*
+ * This is still not adding devices via the IOMMU bus notifier because
+ * of pcibios_init() from arch/powerpc/kernel/pci_64.c which calls
+ * pcibios_scan_phb() first (and this guy adds devices and triggers
+ * the notifier) and only then it calls pci_bus_add_devices() which
+ * configures DMA for buses which also creates PEs and IOMMU groups.
+ */
+ return iommu_probe_device(dev);
}
EXPORT_SYMBOL_GPL(iommu_add_device);
@@ -1179,4 +1184,233 @@ void iommu_del_device(struct device *dev)
iommu_group_remove_device(dev);
}
EXPORT_SYMBOL_GPL(iommu_del_device);
+
+/*
+ * A simple iommu_table_group_ops which only allows reusing the existing
+ * iommu_table. This handles VFIO for POWER7 or the nested KVM.
+ * The ops does not allow creating windows and only allows reusing the existing
+ * one if it matches table_group->tce32_start/tce32_size/page_shift.
+ */
+static unsigned long spapr_tce_get_table_size(__u32 page_shift,
+ __u64 window_size, __u32 levels)
+{
+ unsigned long size;
+
+ if (levels > 1)
+ return ~0U;
+ size = window_size >> (page_shift - 3);
+ return size;
+}
+
+static long spapr_tce_create_table(struct iommu_table_group *table_group, int num,
+ __u32 page_shift, __u64 window_size, __u32 levels,
+ struct iommu_table **ptbl)
+{
+ struct iommu_table *tbl = table_group->tables[0];
+
+ if (num > 0)
+ return -EPERM;
+
+ if (tbl->it_page_shift != page_shift ||
+ tbl->it_size != (window_size >> page_shift) ||
+ tbl->it_indirect_levels != levels - 1)
+ return -EINVAL;
+
+ *ptbl = iommu_tce_table_get(tbl);
+ return 0;
+}
+
+static long spapr_tce_set_window(struct iommu_table_group *table_group,
+ int num, struct iommu_table *tbl)
+{
+ return tbl == table_group->tables[num] ? 0 : -EPERM;
+}
+
+static long spapr_tce_unset_window(struct iommu_table_group *table_group, int num)
+{
+ return 0;
+}
+
+static long spapr_tce_take_ownership(struct iommu_table_group *table_group)
+{
+ int i, j, rc = 0;
+
+ for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+ struct iommu_table *tbl = table_group->tables[i];
+
+ if (!tbl || !tbl->it_map)
+ continue;
+
+ rc = iommu_take_ownership(tbl);
+ if (!rc)
+ continue;
+
+ for (j = 0; j < i; ++j)
+ iommu_release_ownership(table_group->tables[j]);
+ return rc;
+ }
+ return 0;
+}
+
+static void spapr_tce_release_ownership(struct iommu_table_group *table_group)
+{
+ int i;
+
+ for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+ struct iommu_table *tbl = table_group->tables[i];
+
+ if (!tbl)
+ continue;
+
+ iommu_table_clear(tbl);
+ if (tbl->it_map)
+ iommu_release_ownership(tbl);
+ }
+}
+
+struct iommu_table_group_ops spapr_tce_table_group_ops = {
+ .get_table_size = spapr_tce_get_table_size,
+ .create_table = spapr_tce_create_table,
+ .set_window = spapr_tce_set_window,
+ .unset_window = spapr_tce_unset_window,
+ .take_ownership = spapr_tce_take_ownership,
+ .release_ownership = spapr_tce_release_ownership,
+};
+
+/*
+ * A simple iommu_ops to allow less cruft in generic VFIO code.
+ */
+static int spapr_tce_blocking_iommu_attach_dev(struct iommu_domain *dom,
+ struct device *dev)
+{
+ struct iommu_group *grp = iommu_group_get(dev);
+ struct iommu_table_group *table_group;
+ int ret = -EINVAL;
+
+ if (!grp)
+ return -ENODEV;
+
+ table_group = iommu_group_get_iommudata(grp);
+ ret = table_group->ops->take_ownership(table_group);
+ iommu_group_put(grp);
+
+ return ret;
+}
+
+static void spapr_tce_blocking_iommu_set_platform_dma(struct device *dev)
+{
+ struct iommu_group *grp = iommu_group_get(dev);
+ struct iommu_table_group *table_group;
+
+ table_group = iommu_group_get_iommudata(grp);
+ table_group->ops->release_ownership(table_group);
+}
+
+static const struct iommu_domain_ops spapr_tce_blocking_domain_ops = {
+ .attach_dev = spapr_tce_blocking_iommu_attach_dev,
+};
+
+static bool spapr_tce_iommu_capable(struct device *dev, enum iommu_cap cap)
+{
+ switch (cap) {
+ case IOMMU_CAP_CACHE_COHERENCY:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
+
+static struct iommu_domain *spapr_tce_iommu_domain_alloc(unsigned int type)
+{
+ struct iommu_domain *dom;
+
+ if (type != IOMMU_DOMAIN_BLOCKED)
+ return NULL;
+
+ dom = kzalloc(sizeof(*dom), GFP_KERNEL);
+ if (!dom)
+ return NULL;
+
+ dom->ops = &spapr_tce_blocking_domain_ops;
+
+ return dom;
+}
+
+static struct iommu_device *spapr_tce_iommu_probe_device(struct device *dev)
+{
+ struct pci_dev *pdev;
+ struct pci_controller *hose;
+
+ if (!dev_is_pci(dev))
+ return ERR_PTR(-EPERM);
+
+ pdev = to_pci_dev(dev);
+ hose = pdev->bus->sysdata;
+
+ return &hose->iommu;
+}
+
+static void spapr_tce_iommu_release_device(struct device *dev)
+{
+}
+
+static struct iommu_group *spapr_tce_iommu_device_group(struct device *dev)
+{
+ struct pci_controller *hose;
+ struct pci_dev *pdev;
+
+ pdev = to_pci_dev(dev);
+ hose = pdev->bus->sysdata;
+
+ if (!hose->controller_ops.device_group)
+ return ERR_PTR(-ENOENT);
+
+ return hose->controller_ops.device_group(hose, pdev);
+}
+
+static const struct iommu_ops spapr_tce_iommu_ops = {
+ .capable = spapr_tce_iommu_capable,
+ .domain_alloc = spapr_tce_iommu_domain_alloc,
+ .probe_device = spapr_tce_iommu_probe_device,
+ .release_device = spapr_tce_iommu_release_device,
+ .device_group = spapr_tce_iommu_device_group,
+ .set_platform_dma_ops = spapr_tce_blocking_iommu_set_platform_dma,
+};
+
+static struct attribute *spapr_tce_iommu_attrs[] = {
+ NULL,
+};
+
+static struct attribute_group spapr_tce_iommu_group = {
+ .name = "spapr-tce-iommu",
+ .attrs = spapr_tce_iommu_attrs,
+};
+
+static const struct attribute_group *spapr_tce_iommu_groups[] = {
+ &spapr_tce_iommu_group,
+ NULL,
+};
+
+/*
+ * This registers IOMMU devices of PHBs. This needs to happen
+ * after core_initcall(iommu_init) + postcore_initcall(pci_driver_init) and
+ * before subsys_initcall(iommu_subsys_init).
+ */
+static int __init spapr_tce_setup_phb_iommus_initcall(void)
+{
+ struct pci_controller *hose;
+
+ list_for_each_entry(hose, &hose_list, list_node) {
+ iommu_device_sysfs_add(&hose->iommu, hose->parent,
+ spapr_tce_iommu_groups, "iommu-phb%04x",
+ hose->global_number);
+ iommu_device_register(&hose->iommu, &spapr_tce_iommu_ops,
+ hose->parent);
+ }
+ return 0;
+}
+postcore_initcall_sync(spapr_tce_setup_phb_iommus_initcall);
+
#endif /* CONFIG_IOMMU_API */
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index c9535f2760b5..6f7d4edaa0bc 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -206,7 +206,11 @@ static __always_inline void call_do_softirq(const void *sp)
asm volatile (
PPC_STLU " %%r1, %[offset](%[sp]) ;"
"mr %%r1, %[sp] ;"
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ "bl %[callee]@notoc ;"
+#else
"bl %[callee] ;"
+#endif
PPC_LL " %%r1, 0(%%r1) ;"
: // Outputs
: // Inputs
@@ -259,7 +263,11 @@ static __always_inline void call_do_irq(struct pt_regs *regs, void *sp)
PPC_STLU " %%r1, %[offset](%[sp]) ;"
"mr %%r4, %%r1 ;"
"mr %%r1, %[sp] ;"
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ "bl %[callee]@notoc ;"
+#else
"bl %[callee] ;"
+#endif
PPC_LL " %%r1, 0(%%r1) ;"
: // Outputs
"+r" (r3)
diff --git a/arch/powerpc/kernel/irq_64.c b/arch/powerpc/kernel/irq_64.c
index c788c55512ed..938e66829eae 100644
--- a/arch/powerpc/kernel/irq_64.c
+++ b/arch/powerpc/kernel/irq_64.c
@@ -348,13 +348,12 @@ EXPORT_SYMBOL(arch_local_irq_restore);
* already the case when ppc_md.power_save is called). The function
* will return whether to enter power save or just return.
*
- * In the former case, it will have notified lockdep of interrupts
- * being re-enabled and generally sanitized the lazy irq state,
- * and in the latter case it will leave with interrupts hard
+ * In the former case, it will have generally sanitized the lazy irq
+ * state, and in the latter case it will leave with interrupts hard
* disabled and marked as such, so the local_irq_enable() call
* in arch_cpu_idle() will properly re-enable everything.
*/
-bool prep_irq_for_idle(void)
+__cpuidle bool prep_irq_for_idle(void)
{
/*
* First we need to hard disable to ensure no interrupt
@@ -370,9 +369,6 @@ bool prep_irq_for_idle(void)
if (lazy_irq_pending())
return false;
- /* Tell lockdep we are about to re-enable */
- trace_hardirqs_on();
-
/*
* Mark interrupts as soft-enabled and clear the
* PACA_IRQ_HARD_DIS from the pending mask since we
diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c
index dc746611ebc0..85bdd7d3652f 100644
--- a/arch/powerpc/kernel/isa-bridge.c
+++ b/arch/powerpc/kernel/isa-bridge.c
@@ -55,80 +55,49 @@ static void remap_isa_base(phys_addr_t pa, unsigned long size)
}
}
-static void pci_process_ISA_OF_ranges(struct device_node *isa_node,
- unsigned long phb_io_base_phys)
+static int process_ISA_OF_ranges(struct device_node *isa_node,
+ unsigned long phb_io_base_phys)
{
- /* We should get some saner parsing here and remove these structs */
- struct pci_address {
- u32 a_hi;
- u32 a_mid;
- u32 a_lo;
- };
-
- struct isa_address {
- u32 a_hi;
- u32 a_lo;
- };
-
- struct isa_range {
- struct isa_address isa_addr;
- struct pci_address pci_addr;
- unsigned int size;
- };
-
- const struct isa_range *range;
- unsigned long pci_addr;
- unsigned int isa_addr;
unsigned int size;
- int rlen = 0;
+ struct of_range_parser parser;
+ struct of_range range;
- range = of_get_property(isa_node, "ranges", &rlen);
- if (range == NULL || (rlen < sizeof(struct isa_range)))
+ if (of_range_parser_init(&parser, isa_node))
goto inval_range;
- /* From "ISA Binding to 1275"
- * The ranges property is laid out as an array of elements,
- * each of which comprises:
- * cells 0 - 1: an ISA address
- * cells 2 - 4: a PCI address
- * (size depending on dev->n_addr_cells)
- * cell 5: the size of the range
- */
- if ((range->isa_addr.a_hi & ISA_SPACE_MASK) != ISA_SPACE_IO) {
- range++;
- rlen -= sizeof(struct isa_range);
- if (rlen < sizeof(struct isa_range))
- goto inval_range;
- }
- if ((range->isa_addr.a_hi & ISA_SPACE_MASK) != ISA_SPACE_IO)
- goto inval_range;
+ for_each_of_range(&parser, &range) {
+ if ((range.flags & ISA_SPACE_MASK) != ISA_SPACE_IO)
+ continue;
- isa_addr = range->isa_addr.a_lo;
- pci_addr = (unsigned long) range->pci_addr.a_mid << 32 |
- range->pci_addr.a_lo;
+ if (range.cpu_addr == OF_BAD_ADDR) {
+ pr_err("ISA: Bad CPU mapping: %s\n", __func__);
+ return -EINVAL;
+ }
- /* Assume these are both zero. Note: We could fix that and
- * do a proper parsing instead ... oh well, that will do for
- * now as nobody uses fancy mappings for ISA bridges
- */
- if ((pci_addr != 0) || (isa_addr != 0)) {
- printk(KERN_ERR "unexpected isa to pci mapping: %s\n",
- __func__);
- return;
- }
+ /* We need page alignment */
+ if ((range.bus_addr & ~PAGE_MASK) || (range.cpu_addr & ~PAGE_MASK)) {
+ pr_warn("ISA: bridge %pOF has non aligned IO range\n", isa_node);
+ return -EINVAL;
+ }
- /* Align size and make sure it's cropped to 64K */
- size = PAGE_ALIGN(range->size);
- if (size > 0x10000)
- size = 0x10000;
+ /* Align size and make sure it's cropped to 64K */
+ size = PAGE_ALIGN(range.size);
+ if (size > 0x10000)
+ size = 0x10000;
- remap_isa_base(phb_io_base_phys, size);
- return;
+ if (!phb_io_base_phys)
+ phb_io_base_phys = range.cpu_addr;
+
+ remap_isa_base(phb_io_base_phys, size);
+ return 0;
+ }
inval_range:
- printk(KERN_ERR "no ISA IO ranges or unexpected isa range, "
- "mapping 64k\n");
- remap_isa_base(phb_io_base_phys, 0x10000);
+ if (!phb_io_base_phys) {
+ pr_err("no ISA IO ranges or unexpected isa range, mapping 64k\n");
+ remap_isa_base(phb_io_base_phys, 0x10000);
+ }
+ return 0;
}
@@ -170,7 +139,7 @@ void __init isa_bridge_find_early(struct pci_controller *hose)
isa_bridge_devnode = np;
/* Now parse the "ranges" property and setup the ISA mapping */
- pci_process_ISA_OF_ranges(np, hose->io_base_phys);
+ process_ISA_OF_ranges(np, hose->io_base_phys);
/* Set the global ISA io base to indicate we have an ISA bridge */
isa_io_base = ISA_IO_BASE;
@@ -186,75 +155,15 @@ void __init isa_bridge_find_early(struct pci_controller *hose)
*/
void __init isa_bridge_init_non_pci(struct device_node *np)
{
- const __be32 *ranges, *pbasep = NULL;
- int rlen, i, rs;
- u32 na, ns, pna;
- u64 cbase, pbase, size = 0;
+ int ret;
/* If we already have an ISA bridge, bail off */
if (isa_bridge_devnode != NULL)
return;
- pna = of_n_addr_cells(np);
- if (of_property_read_u32(np, "#address-cells", &na) ||
- of_property_read_u32(np, "#size-cells", &ns)) {
- pr_warn("ISA: Non-PCI bridge %pOF is missing address format\n",
- np);
- return;
- }
-
- /* Check it's a supported address format */
- if (na != 2 || ns != 1) {
- pr_warn("ISA: Non-PCI bridge %pOF has unsupported address format\n",
- np);
- return;
- }
- rs = na + ns + pna;
-
- /* Grab the ranges property */
- ranges = of_get_property(np, "ranges", &rlen);
- if (ranges == NULL || rlen < rs) {
- pr_warn("ISA: Non-PCI bridge %pOF has absent or invalid ranges\n",
- np);
- return;
- }
-
- /* Parse it. We are only looking for IO space */
- for (i = 0; (i + rs - 1) < rlen; i += rs) {
- if (be32_to_cpup(ranges + i) != 1)
- continue;
- cbase = be32_to_cpup(ranges + i + 1);
- size = of_read_number(ranges + i + na + pna, ns);
- pbasep = ranges + i + na;
- break;
- }
-
- /* Got something ? */
- if (!size || !pbasep) {
- pr_warn("ISA: Non-PCI bridge %pOF has no usable IO range\n",
- np);
+ ret = process_ISA_OF_ranges(np, 0);
+ if (ret)
return;
- }
-
- /* Align size and make sure it's cropped to 64K */
- size = PAGE_ALIGN(size);
- if (size > 0x10000)
- size = 0x10000;
-
- /* Map pbase */
- pbase = of_translate_address(np, pbasep);
- if (pbase == OF_BAD_ADDR) {
- pr_warn("ISA: Non-PCI bridge %pOF failed to translate IO base\n",
- np);
- return;
- }
-
- /* We need page alignment */
- if ((cbase & ~PAGE_MASK) || (pbase & ~PAGE_MASK)) {
- pr_warn("ISA: Non-PCI bridge %pOF has non aligned IO range\n",
- np);
- return;
- }
/* Got it */
isa_bridge_devnode = np;
@@ -263,7 +172,6 @@ void __init isa_bridge_init_non_pci(struct device_node *np)
* and map it
*/
isa_io_base = ISA_IO_BASE;
- remap_isa_base(pbase, size);
pr_debug("ISA: Non-PCI bridge is %pOF\n", np);
}
@@ -282,7 +190,7 @@ static void isa_bridge_find_late(struct pci_dev *pdev,
isa_bridge_pcidev = pdev;
/* Now parse the "ranges" property and setup the ISA mapping */
- pci_process_ISA_OF_ranges(devnode, hose->io_base_phys);
+ process_ISA_OF_ranges(devnode, hose->io_base_phys);
/* Set the global ISA io base to indicate we have an ISA bridge */
isa_io_base = ISA_IO_BASE;
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c
index f048c424c525..c9ad12461d44 100644
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -171,15 +171,15 @@ static int __init add_legacy_soc_port(struct device_node *np,
/* We only support ports that have a clock frequency properly
* encoded in the device-tree.
*/
- if (of_get_property(np, "clock-frequency", NULL) == NULL)
+ if (!of_property_present(np, "clock-frequency"))
return -1;
/* if reg-offset don't try to use it */
- if ((of_get_property(np, "reg-offset", NULL) != NULL))
+ if (of_property_present(np, "reg-offset"))
return -1;
/* if rtas uses this device, don't try to use it as well */
- if (of_get_property(np, "used-by-rtas", NULL) != NULL)
+ if (of_property_read_bool(np, "used-by-rtas"))
return -1;
/* Get the address */
@@ -237,7 +237,7 @@ static int __init add_legacy_isa_port(struct device_node *np,
* Note: Don't even try on P8 lpc, we know it's not directly mapped
*/
if (!of_device_is_compatible(isa_brg, "ibm,power8-lpc") ||
- of_get_property(isa_brg, "ranges", NULL)) {
+ of_property_present(isa_brg, "ranges")) {
taddr = of_translate_address(np, reg);
if (taddr == OF_BAD_ADDR)
taddr = 0;
@@ -268,7 +268,7 @@ static int __init add_legacy_pci_port(struct device_node *np,
* compatible UARTs on PCI need all sort of quirks (port offsets
* etc...) that this code doesn't know about
*/
- if (of_get_property(np, "clock-frequency", NULL) == NULL)
+ if (!of_property_present(np, "clock-frequency"))
return -1;
/* Get the PCI address. Assume BAR 0 */
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index c39c07a4c06e..2c9ac70aaf0c 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -432,7 +432,7 @@ _GLOBAL(kexec_sequence)
1:
/* copy dest pages, flush whole dest image */
mr r3,r29
- bl kexec_copy_flush /* (image) */
+ bl CFUNC(kexec_copy_flush) /* (image) */
/* turn off mmu now if not done earlier */
cmpdi r26,0
diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c
index ea6536171778..816a63fd71fb 100644
--- a/arch/powerpc/kernel/module_32.c
+++ b/arch/powerpc/kernel/module_32.c
@@ -163,8 +163,7 @@ static uint32_t do_plt_call(void *location,
pr_debug("Doing plt for call to 0x%x at 0x%x\n", val, (unsigned int)location);
/* Init, or core PLT? */
- if (location >= mod->core_layout.base
- && location < mod->core_layout.base + mod->core_layout.size)
+ if (within_module_core((unsigned long)location, mod))
entry = (void *)sechdrs[mod->arch.core_plt_section].sh_addr;
else
entry = (void *)sechdrs[mod->arch.init_plt_section].sh_addr;
@@ -322,14 +321,14 @@ notrace int module_trampoline_target(struct module *mod, unsigned long addr,
int module_finalize_ftrace(struct module *module, const Elf_Shdr *sechdrs)
{
- module->arch.tramp = do_plt_call(module->core_layout.base,
+ module->arch.tramp = do_plt_call(module->mem[MOD_TEXT].base,
(unsigned long)ftrace_caller,
sechdrs, module);
if (!module->arch.tramp)
return -ENOENT;
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
- module->arch.tramp_regs = do_plt_call(module->core_layout.base,
+ module->arch.tramp_regs = do_plt_call(module->mem[MOD_TEXT].base,
(unsigned long)ftrace_regs_caller,
sechdrs, module);
if (!module->arch.tramp_regs)
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 2ac78d207f77..92570289ce08 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -101,32 +101,45 @@ static unsigned long stub_func_addr(func_desc_t func)
/* Like PPC32, we need little trampolines to do > 24-bit jumps (into
the kernel itself). But on PPC64, these need to be used for every
jump, actually, to reset r2 (TOC+0x8000). */
-struct ppc64_stub_entry
-{
- /* 28 byte jump instruction sequence (7 instructions). We only
- * need 6 instructions on ABIv2 but we always allocate 7 so
- * so we don't have to modify the trampoline load instruction. */
+struct ppc64_stub_entry {
+ /*
+ * 28 byte jump instruction sequence (7 instructions) that can
+ * hold ppc64_stub_insns or stub_insns. Must be 8-byte aligned
+ * with PCREL kernels that use prefix instructions in the stub.
+ */
u32 jump[7];
/* Used by ftrace to identify stubs */
u32 magic;
/* Data for the above code */
func_desc_t funcdata;
+} __aligned(8);
+
+struct ppc64_got_entry {
+ u64 addr;
};
/*
* PPC64 uses 24 bit jumps, but we need to jump into other modules or
* the kernel which may be further. So we jump to a stub.
*
- * For ELFv1 we need to use this to set up the new r2 value (aka TOC
- * pointer). For ELFv2 it's the callee's responsibility to set up the
- * new r2, but for both we need to save the old r2.
+ * Target address and TOC are loaded from function descriptor in the
+ * ppc64_stub_entry.
+ *
+ * r12 is used to generate the target address, which is required for the
+ * ELFv2 global entry point calling convention.
*
- * We could simply patch the new r2 value and function pointer into
- * the stub, but it's significantly shorter to put these values at the
- * end of the stub code, and patch the stub address (32-bits relative
- * to the TOC ptr, r2) into the stub.
+ * TOC handling:
+ * - PCREL does not have a TOC.
+ * - ELFv2 non-PCREL just has to save r2, the callee is responsible for
+ * setting its own TOC pointer at the global entry address.
+ * - ELFv1 must load the new TOC pointer from the function descriptor.
*/
static u32 ppc64_stub_insns[] = {
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ /* pld r12,addr */
+ PPC_PREFIX_8LS | __PPC_PRFX_R(1),
+ PPC_INST_PLD | ___PPC_RT(_R12),
+#else
PPC_RAW_ADDIS(_R11, _R2, 0),
PPC_RAW_ADDI(_R11, _R11, 0),
/* Save current r2 value in magic place on the stack. */
@@ -136,13 +149,17 @@ static u32 ppc64_stub_insns[] = {
/* Set up new r2 from function descriptor */
PPC_RAW_LD(_R2, _R11, 40),
#endif
+#endif
PPC_RAW_MTCTR(_R12),
PPC_RAW_BCTR(),
};
-/* Count how many different 24-bit relocations (different symbol,
- different addend) */
-static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num)
+/*
+ * Count how many different r_type relocations (different symbol,
+ * different addend).
+ */
+static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num,
+ unsigned long r_type)
{
unsigned int i, r_info, r_addend, _count_relocs;
@@ -151,8 +168,8 @@ static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num)
r_info = 0;
r_addend = 0;
for (i = 0; i < num; i++)
- /* Only count 24-bit relocs, others don't need stubs */
- if (ELF64_R_TYPE(rela[i].r_info) == R_PPC_REL24 &&
+ /* Only count r_type relocs, others don't need stubs */
+ if (ELF64_R_TYPE(rela[i].r_info) == r_type &&
(r_info != ELF64_R_SYM(rela[i].r_info) ||
r_addend != rela[i].r_addend)) {
_count_relocs++;
@@ -213,7 +230,14 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr,
relocs += count_relocs((void *)sechdrs[i].sh_addr,
sechdrs[i].sh_size
- / sizeof(Elf64_Rela));
+ / sizeof(Elf64_Rela),
+ R_PPC_REL24);
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ relocs += count_relocs((void *)sechdrs[i].sh_addr,
+ sechdrs[i].sh_size
+ / sizeof(Elf64_Rela),
+ R_PPC64_REL24_NOTOC);
+#endif
}
}
@@ -230,6 +254,95 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr,
return relocs * sizeof(struct ppc64_stub_entry);
}
+#ifdef CONFIG_PPC_KERNEL_PCREL
+static int count_pcpu_relocs(const Elf64_Shdr *sechdrs,
+ const Elf64_Rela *rela, unsigned int num,
+ unsigned int symindex, unsigned int pcpu)
+{
+ unsigned int i, r_info, r_addend, _count_relocs;
+
+ _count_relocs = 0;
+ r_info = 0;
+ r_addend = 0;
+
+ for (i = 0; i < num; i++) {
+ Elf64_Sym *sym;
+
+ /* This is the symbol it is referring to */
+ sym = (Elf64_Sym *)sechdrs[symindex].sh_addr
+ + ELF64_R_SYM(rela[i].r_info);
+
+ if (sym->st_shndx == pcpu &&
+ (r_info != ELF64_R_SYM(rela[i].r_info) ||
+ r_addend != rela[i].r_addend)) {
+ _count_relocs++;
+ r_info = ELF64_R_SYM(rela[i].r_info);
+ r_addend = rela[i].r_addend;
+ }
+ }
+
+ return _count_relocs;
+}
+
+/* Get size of potential GOT required. */
+static unsigned long get_got_size(const Elf64_Ehdr *hdr,
+ const Elf64_Shdr *sechdrs,
+ struct module *me)
+{
+ /* One extra reloc so it's always 0-addr terminated */
+ unsigned long relocs = 1;
+ unsigned int i, symindex = 0;
+
+ for (i = 1; i < hdr->e_shnum; i++) {
+ if (sechdrs[i].sh_type == SHT_SYMTAB) {
+ symindex = i;
+ break;
+ }
+ }
+ WARN_ON_ONCE(!symindex);
+
+ /* Every relocated section... */
+ for (i = 1; i < hdr->e_shnum; i++) {
+ if (sechdrs[i].sh_type == SHT_RELA) {
+ pr_debug("Found relocations in section %u\n", i);
+ pr_debug("Ptr: %p. Number: %llu\n", (void *)sechdrs[i].sh_addr,
+ sechdrs[i].sh_size / sizeof(Elf64_Rela));
+
+ /*
+ * Sort the relocation information based on a symbol and
+ * addend key. This is a stable O(n*log n) complexity
+ * algorithm but it will reduce the complexity of
+ * count_relocs() to linear complexity O(n)
+ */
+ sort((void *)sechdrs[i].sh_addr,
+ sechdrs[i].sh_size / sizeof(Elf64_Rela),
+ sizeof(Elf64_Rela), relacmp, NULL);
+
+ relocs += count_relocs((void *)sechdrs[i].sh_addr,
+ sechdrs[i].sh_size
+ / sizeof(Elf64_Rela),
+ R_PPC64_GOT_PCREL34);
+
+ /*
+ * Percpu data access typically gets linked with
+ * REL34 relocations, but the percpu section gets
+ * moved at load time and requires that to be
+ * converted to GOT linkage.
+ */
+ if (IS_ENABLED(CONFIG_SMP) && symindex)
+ relocs += count_pcpu_relocs(sechdrs,
+ (void *)sechdrs[i].sh_addr,
+ sechdrs[i].sh_size
+ / sizeof(Elf64_Rela),
+ symindex, me->arch.pcpu_section);
+ }
+ }
+
+ pr_debug("Looks like a total of %lu GOT entries, max\n", relocs);
+ return relocs * sizeof(struct ppc64_got_entry);
+}
+#else /* CONFIG_PPC_KERNEL_PCREL */
+
/* Still needed for ELFv2, for .TOC. */
static void dedotify_versions(struct modversion_info *vers,
unsigned long size)
@@ -279,6 +392,7 @@ static Elf64_Sym *find_dot_toc(Elf64_Shdr *sechdrs,
}
return NULL;
}
+#endif /* CONFIG_PPC_KERNEL_PCREL */
bool module_init_section(const char *name)
{
@@ -297,6 +411,15 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr,
for (i = 1; i < hdr->e_shnum; i++) {
if (strcmp(secstrings + sechdrs[i].sh_name, ".stubs") == 0)
me->arch.stubs_section = i;
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ else if (strcmp(secstrings + sechdrs[i].sh_name, ".data..percpu") == 0)
+ me->arch.pcpu_section = i;
+ else if (strcmp(secstrings + sechdrs[i].sh_name, ".mygot") == 0) {
+ me->arch.got_section = i;
+ if (sechdrs[i].sh_addralign < 8)
+ sechdrs[i].sh_addralign = 8;
+ }
+#else
else if (strcmp(secstrings + sechdrs[i].sh_name, ".toc") == 0) {
me->arch.toc_section = i;
if (sechdrs[i].sh_addralign < 8)
@@ -311,6 +434,7 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr,
sechdrs[i].sh_size / sizeof(Elf64_Sym),
(void *)hdr
+ sechdrs[sechdrs[i].sh_link].sh_offset);
+#endif
}
if (!me->arch.stubs_section) {
@@ -318,26 +442,47 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr,
return -ENOEXEC;
}
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ if (!me->arch.got_section) {
+ pr_err("%s: doesn't contain .mygot.\n", me->name);
+ return -ENOEXEC;
+ }
+
+ /* Override the got size */
+ sechdrs[me->arch.got_section].sh_size = get_got_size(hdr, sechdrs, me);
+#else
/* If we don't have a .toc, just use .stubs. We need to set r2
to some reasonable value in case the module calls out to
other functions via a stub, or if a function pointer escapes
the module by some means. */
if (!me->arch.toc_section)
me->arch.toc_section = me->arch.stubs_section;
+#endif
/* Override the stubs size */
sechdrs[me->arch.stubs_section].sh_size = get_stubs_size(hdr, sechdrs);
+
return 0;
}
#ifdef CONFIG_MPROFILE_KERNEL
static u32 stub_insns[] = {
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernelbase)),
+ PPC_RAW_NOP(), /* align the prefix insn */
+ /* paddi r12,r12,addr */
+ PPC_PREFIX_MLS | __PPC_PRFX_R(0),
+ PPC_INST_PADDI | ___PPC_RT(_R12) | ___PPC_RA(_R12),
+ PPC_RAW_MTCTR(_R12),
+ PPC_RAW_BCTR(),
+#else
PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernel_toc)),
PPC_RAW_ADDIS(_R12, _R12, 0),
PPC_RAW_ADDI(_R12, _R12, 0),
PPC_RAW_MTCTR(_R12),
PPC_RAW_BCTR(),
+#endif
};
/*
@@ -358,18 +503,37 @@ static inline int create_ftrace_stub(struct ppc64_stub_entry *entry,
{
long reladdr;
- memcpy(entry->jump, stub_insns, sizeof(stub_insns));
-
- /* Stub uses address relative to kernel toc (from the paca) */
- reladdr = addr - kernel_toc_addr();
- if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
- pr_err("%s: Address of %ps out of range of kernel_toc.\n",
- me->name, (void *)addr);
+ if ((unsigned long)entry->jump % 8 != 0) {
+ pr_err("%s: Address of stub entry is not 8-byte aligned\n", me->name);
return 0;
}
- entry->jump[1] |= PPC_HA(reladdr);
- entry->jump[2] |= PPC_LO(reladdr);
+ BUILD_BUG_ON(sizeof(stub_insns) > sizeof(entry->jump));
+ memcpy(entry->jump, stub_insns, sizeof(stub_insns));
+
+ if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) {
+ /* Stub uses address relative to kernel base (from the paca) */
+ reladdr = addr - local_paca->kernelbase;
+ if (reladdr > 0x1FFFFFFFFL || reladdr < -0x200000000L) {
+ pr_err("%s: Address of %ps out of range of 34-bit relative address.\n",
+ me->name, (void *)addr);
+ return 0;
+ }
+
+ entry->jump[2] |= IMM_H18(reladdr);
+ entry->jump[3] |= IMM_L(reladdr);
+ } else {
+ /* Stub uses address relative to kernel toc (from the paca) */
+ reladdr = addr - kernel_toc_addr();
+ if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
+ pr_err("%s: Address of %ps out of range of kernel_toc.\n",
+ me->name, (void *)addr);
+ return 0;
+ }
+
+ entry->jump[1] |= PPC_HA(reladdr);
+ entry->jump[2] |= PPC_LO(reladdr);
+ }
/* Even though we don't use funcdata in the stub, it's needed elsewhere. */
entry->funcdata = func_desc(addr);
@@ -415,7 +579,11 @@ static bool is_mprofile_ftrace_call(const char *name)
*/
static inline unsigned long my_r2(const Elf64_Shdr *sechdrs, struct module *me)
{
+#ifndef CONFIG_PPC_KERNEL_PCREL
return (sechdrs[me->arch.toc_section].sh_addr & ~0xfful) + 0x8000;
+#else
+ return -1;
+#endif
}
/* Patch stub to reference function and correct r2 value. */
@@ -432,28 +600,53 @@ static inline int create_stub(const Elf64_Shdr *sechdrs,
if (is_mprofile_ftrace_call(name))
return create_ftrace_stub(entry, addr, me);
+ if ((unsigned long)entry->jump % 8 != 0) {
+ pr_err("%s: Address of stub entry is not 8-byte aligned\n", me->name);
+ return 0;
+ }
+
+ BUILD_BUG_ON(sizeof(ppc64_stub_insns) > sizeof(entry->jump));
for (i = 0; i < ARRAY_SIZE(ppc64_stub_insns); i++) {
if (patch_instruction(&entry->jump[i],
ppc_inst(ppc64_stub_insns[i])))
return 0;
}
- /* Stub uses address relative to r2. */
- reladdr = (unsigned long)entry - my_r2(sechdrs, me);
- if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
- pr_err("%s: Address %p of stub out of range of %p.\n",
- me->name, (void *)reladdr, (void *)my_r2);
- return 0;
- }
- pr_debug("Stub %p get data from reladdr %li\n", entry, reladdr);
+ if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) {
+ /* Stub uses address relative to itself! */
+ reladdr = 0 + offsetof(struct ppc64_stub_entry, funcdata);
+ BUILD_BUG_ON(reladdr != 32);
+ if (reladdr > 0x1FFFFFFFFL || reladdr < -0x200000000L) {
+ pr_err("%s: Address of %p out of range of 34-bit relative address.\n",
+ me->name, (void *)reladdr);
+ return 0;
+ }
+ pr_debug("Stub %p get data from reladdr %li\n", entry, reladdr);
- if (patch_instruction(&entry->jump[0],
- ppc_inst(entry->jump[0] | PPC_HA(reladdr))))
- return 0;
+ /* May not even need this if we're relative to 0 */
+ if (patch_instruction(&entry->jump[0],
+ ppc_inst_prefix(entry->jump[0] | IMM_H18(reladdr),
+ entry->jump[1] | IMM_L(reladdr))))
+ return 0;
- if (patch_instruction(&entry->jump[1],
- ppc_inst(entry->jump[1] | PPC_LO(reladdr))))
- return 0;
+ } else {
+ /* Stub uses address relative to r2. */
+ reladdr = (unsigned long)entry - my_r2(sechdrs, me);
+ if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
+ pr_err("%s: Address %p of stub out of range of %p.\n",
+ me->name, (void *)reladdr, (void *)my_r2);
+ return 0;
+ }
+ pr_debug("Stub %p get data from reladdr %li\n", entry, reladdr);
+
+ if (patch_instruction(&entry->jump[0],
+ ppc_inst(entry->jump[0] | PPC_HA(reladdr))))
+ return 0;
+
+ if (patch_instruction(&entry->jump[1],
+ ppc_inst(entry->jump[1] | PPC_LO(reladdr))))
+ return 0;
+ }
// func_desc_t is 8 bytes if ABIv2, else 16 bytes
desc = func_desc(addr);
@@ -497,6 +690,37 @@ static unsigned long stub_for_addr(const Elf64_Shdr *sechdrs,
return (unsigned long)&stubs[i];
}
+#ifdef CONFIG_PPC_KERNEL_PCREL
+/* Create GOT to load the location described in this ptr */
+static unsigned long got_for_addr(const Elf64_Shdr *sechdrs,
+ unsigned long addr,
+ struct module *me,
+ const char *name)
+{
+ struct ppc64_got_entry *got;
+ unsigned int i, num_got;
+
+ if (!IS_ENABLED(CONFIG_PPC_KERNEL_PCREL))
+ return addr;
+
+ num_got = sechdrs[me->arch.got_section].sh_size / sizeof(*got);
+
+ /* Find this stub, or if that fails, the next avail. entry */
+ got = (void *)sechdrs[me->arch.got_section].sh_addr;
+ for (i = 0; got[i].addr; i++) {
+ if (WARN_ON(i >= num_got))
+ return 0;
+
+ if (got[i].addr == addr)
+ return (unsigned long)&got[i];
+ }
+
+ got[i].addr = addr;
+
+ return (unsigned long)&got[i];
+}
+#endif
+
/* We expect a noop next: if it is, replace it with instruction to
restore r2. */
static int restore_r2(const char *name, u32 *instruction, struct module *me)
@@ -504,6 +728,9 @@ static int restore_r2(const char *name, u32 *instruction, struct module *me)
u32 *prev_insn = instruction - 1;
u32 insn_val = *instruction;
+ if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL))
+ return 0;
+
if (is_mprofile_ftrace_call(name))
return 0;
@@ -549,6 +776,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
pr_debug("Applying ADD relocate section %u to %u\n", relsec,
sechdrs[relsec].sh_info);
+#ifndef CONFIG_PPC_KERNEL_PCREL
/* First time we're called, we can fix up .TOC. */
if (!me->arch.toc_fixed) {
sym = find_dot_toc(sechdrs, strtab, symindex);
@@ -558,7 +786,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
sym->st_value = my_r2(sechdrs, me);
me->arch.toc_fixed = true;
}
-
+#endif
for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) {
/* This is where to make the change */
location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr
@@ -586,6 +814,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
*(unsigned long *)location = value;
break;
+#ifndef CONFIG_PPC_KERNEL_PCREL
case R_PPC64_TOC:
*(unsigned long *)location = my_r2(sechdrs, me);
break;
@@ -645,8 +874,13 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
= (*((uint16_t *) location) & ~0xffff)
| (value & 0xffff);
break;
+#endif
case R_PPC_REL24:
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ /* PCREL still generates REL24 for mcount */
+ case R_PPC64_REL24_NOTOC:
+#endif
/* FIXME: Handle weak symbols here --RR */
if (sym->st_shndx == SHN_UNDEF ||
sym->st_shndx == SHN_LIVEPATCH) {
@@ -694,6 +928,47 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
*(u32 *)location = value;
break;
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ case R_PPC64_PCREL34: {
+ unsigned long absvalue = value;
+
+ /* Convert value to relative */
+ value -= (unsigned long)location;
+
+ if (value + 0x200000000 > 0x3ffffffff) {
+ if (sym->st_shndx != me->arch.pcpu_section) {
+ pr_err("%s: REL34 %li out of range!\n",
+ me->name, (long)value);
+ return -ENOEXEC;
+ }
+
+ /*
+ * per-cpu section is special cased because
+ * it is moved during loading, so has to be
+ * converted to use GOT.
+ */
+ value = got_for_addr(sechdrs, absvalue, me,
+ strtab + sym->st_name);
+ if (!value)
+ return -ENOENT;
+ value -= (unsigned long)location;
+
+ /* Turn pla into pld */
+ if (patch_instruction((u32 *)location,
+ ppc_inst_prefix((*(u32 *)location & ~0x02000000),
+ (*((u32 *)location + 1) & ~0xf8000000) | 0xe4000000)))
+ return -EFAULT;
+ }
+
+ if (patch_instruction((u32 *)location,
+ ppc_inst_prefix((*(u32 *)location & ~0x3ffff) | IMM_H18(value),
+ (*((u32 *)location + 1) & ~0xffff) | IMM_L(value))))
+ return -EFAULT;
+
+ break;
+ }
+
+#else
case R_PPC64_TOCSAVE:
/*
* Marker reloc indicates we don't have to save r2.
@@ -701,8 +976,12 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
* it.
*/
break;
+#endif
case R_PPC64_ENTRY:
+ if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL))
+ break;
+
/*
* Optimize ELFv2 large code model entry point if
* the TOC is within 2GB range of current location.
@@ -745,6 +1024,20 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
| (value & 0xffff);
break;
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ case R_PPC64_GOT_PCREL34:
+ value = got_for_addr(sechdrs, value, me,
+ strtab + sym->st_name);
+ if (!value)
+ return -ENOENT;
+ value -= (unsigned long)location;
+ ((uint32_t *)location)[0] = (((uint32_t *)location)[0] & ~0x3ffff) |
+ ((value >> 16) & 0x3ffff);
+ ((uint32_t *)location)[1] = (((uint32_t *)location)[1] & ~0xffff) |
+ (value & 0xffff);
+ break;
+#endif
+
default:
pr_err("%s: Unknown ADD relocation: %lu\n",
me->name,
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index be8db402e963..cda4e00b67c1 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -191,7 +191,9 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu)
#endif
new_paca->lock_token = 0x8000;
new_paca->paca_index = cpu;
+#ifndef CONFIG_PPC_KERNEL_PCREL
new_paca->kernel_toc = kernel_toc_addr();
+#endif
new_paca->kernelbase = (unsigned long) _stext;
/* Only set MSR:IR/DR when MMU is initialized */
new_paca->kernel_msr = MSR_KERNEL & ~(MSR_IR | MSR_DR);
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index d67cf79bf5d0..e88d7c9feeec 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -880,6 +880,7 @@ int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
static void pcibios_fixup_resources(struct pci_dev *dev)
{
struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ struct resource *res;
int i;
if (!hose) {
@@ -891,9 +892,9 @@ static void pcibios_fixup_resources(struct pci_dev *dev)
if (dev->is_virtfn)
return;
- for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
- struct resource *res = dev->resource + i;
+ pci_dev_for_each_resource(dev, res, i) {
struct pci_bus_region reg;
+
if (!res->flags)
continue;
@@ -1452,11 +1453,10 @@ void pcibios_claim_one_bus(struct pci_bus *bus)
struct pci_bus *child_bus;
list_for_each_entry(dev, &bus->devices, bus_list) {
+ struct resource *r;
int i;
- for (i = 0; i < PCI_NUM_RESOURCES; i++) {
- struct resource *r = &dev->resource[i];
-
+ pci_dev_for_each_resource(dev, r, i) {
if (r->parent || !r->start || !r->flags)
continue;
@@ -1705,19 +1705,20 @@ EXPORT_SYMBOL_GPL(pcibios_scan_phb);
static void fixup_hide_host_resource_fsl(struct pci_dev *dev)
{
- int i, class = dev->class >> 8;
+ int class = dev->class >> 8;
/* When configured as agent, programming interface = 1 */
int prog_if = dev->class & 0xf;
+ struct resource *r;
if ((class == PCI_CLASS_PROCESSOR_POWERPC ||
class == PCI_CLASS_BRIDGE_OTHER) &&
(dev->hdr_type == PCI_HEADER_TYPE_NORMAL) &&
(prog_if == 0) &&
(dev->bus->parent == NULL)) {
- for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
- dev->resource[i].start = 0;
- dev->resource[i].end = 0;
- dev->resource[i].flags = 0;
+ pci_dev_for_each_resource(dev, r) {
+ r->start = 0;
+ r->end = 0;
+ r->flags = 0;
}
}
}
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index fd42059ae2a5..e27342ef128b 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -73,7 +73,7 @@ static int __init pcibios_init(void)
return 0;
}
-subsys_initcall(pcibios_init);
+subsys_initcall_sync(pcibios_init);
int pcibios_unmap_io_space(struct pci_bus *bus)
{
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 4b29ac5ddac6..1fefafb2b29b 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1630,7 +1630,7 @@ void arch_setup_new_exec(void)
}
#ifdef CONFIG_PPC64
-/**
+/*
* Assign a TIDR (thread ID) for task @t and set it in the thread
* structure. For now, we only support setting TIDR for 'current' task.
*
@@ -1738,68 +1738,83 @@ static void setup_ksp_vsid(struct task_struct *p, unsigned long sp)
*/
int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
- unsigned long clone_flags = args->flags;
- unsigned long usp = args->stack;
- unsigned long tls = args->tls;
- struct pt_regs *childregs, *kregs;
+ struct pt_regs *kregs; /* Switch frame regs */
extern void ret_from_fork(void);
extern void ret_from_fork_scv(void);
- extern void ret_from_kernel_thread(void);
+ extern void ret_from_kernel_user_thread(void);
+ extern void start_kernel_thread(void);
void (*f)(void);
unsigned long sp = (unsigned long)task_stack_page(p) + THREAD_SIZE;
- struct thread_info *ti = task_thread_info(p);
#ifdef CONFIG_HAVE_HW_BREAKPOINT
int i;
#endif
klp_init_thread_info(p);
- /* Create initial stack frame. */
- sp -= STACK_USER_INT_FRAME_SIZE;
- *(unsigned long *)(sp + STACK_INT_FRAME_MARKER) = STACK_FRAME_REGS_MARKER;
-
- /* Copy registers */
- childregs = (struct pt_regs *)(sp + STACK_INT_FRAME_REGS);
- if (unlikely(args->fn)) {
+ if (unlikely(p->flags & PF_KTHREAD)) {
/* kernel thread */
+
+ /* Create initial minimum stack frame. */
+ sp -= STACK_FRAME_MIN_SIZE;
((unsigned long *)sp)[0] = 0;
- memset(childregs, 0, sizeof(struct pt_regs));
- childregs->gpr[1] = sp + STACK_USER_INT_FRAME_SIZE;
- /* function */
- if (args->fn)
- childregs->gpr[14] = ppc_function_entry((void *)args->fn);
-#ifdef CONFIG_PPC64
- clear_tsk_thread_flag(p, TIF_32BIT);
- childregs->softe = IRQS_ENABLED;
-#endif
- childregs->gpr[15] = (unsigned long)args->fn_arg;
+
+ f = start_kernel_thread;
p->thread.regs = NULL; /* no user register state */
- ti->flags |= _TIF_RESTOREALL;
- f = ret_from_kernel_thread;
+ clear_tsk_compat_task(p);
} else {
/* user thread */
- struct pt_regs *regs = current_pt_regs();
- *childregs = *regs;
- if (usp)
- childregs->gpr[1] = usp;
- ((unsigned long *)sp)[0] = childregs->gpr[1];
- p->thread.regs = childregs;
- /* 64s sets this in ret_from_fork */
- if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64))
- childregs->gpr[3] = 0; /* Result from fork() */
- if (clone_flags & CLONE_SETTLS) {
- if (!is_32bit_task())
- childregs->gpr[13] = tls;
+ struct pt_regs *childregs;
+
+ /* Create initial user return stack frame. */
+ sp -= STACK_USER_INT_FRAME_SIZE;
+ *(unsigned long *)(sp + STACK_INT_FRAME_MARKER) = STACK_FRAME_REGS_MARKER;
+
+ childregs = (struct pt_regs *)(sp + STACK_INT_FRAME_REGS);
+
+ if (unlikely(args->fn)) {
+ /*
+ * A user space thread, but it first runs a kernel
+ * thread, and then returns as though it had called
+ * execve rather than fork, so user regs will be
+ * filled in (e.g., by kernel_execve()).
+ */
+ ((unsigned long *)sp)[0] = 0;
+ memset(childregs, 0, sizeof(struct pt_regs));
+#ifdef CONFIG_PPC64
+ childregs->softe = IRQS_ENABLED;
+#endif
+ f = ret_from_kernel_user_thread;
+ } else {
+ struct pt_regs *regs = current_pt_regs();
+ unsigned long clone_flags = args->flags;
+ unsigned long usp = args->stack;
+
+ /* Copy registers */
+ *childregs = *regs;
+ if (usp)
+ childregs->gpr[1] = usp;
+ ((unsigned long *)sp)[0] = childregs->gpr[1];
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
+ WARN_ON_ONCE(childregs->softe != IRQS_ENABLED);
+#endif
+ if (clone_flags & CLONE_SETTLS) {
+ unsigned long tls = args->tls;
+
+ if (!is_32bit_task())
+ childregs->gpr[13] = tls;
+ else
+ childregs->gpr[2] = tls;
+ }
+
+ if (trap_is_scv(regs))
+ f = ret_from_fork_scv;
else
- childregs->gpr[2] = tls;
+ f = ret_from_fork;
}
- if (trap_is_scv(regs))
- f = ret_from_fork_scv;
- else
- f = ret_from_fork;
+ childregs->msr &= ~(MSR_FP|MSR_VEC|MSR_VSX);
+ p->thread.regs = childregs;
}
- childregs->msr &= ~(MSR_FP|MSR_VEC|MSR_VSX);
/*
* The way this works is that at some point in the future
@@ -1813,6 +1828,16 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
sp -= STACK_SWITCH_FRAME_SIZE;
((unsigned long *)sp)[0] = sp + STACK_SWITCH_FRAME_SIZE;
kregs = (struct pt_regs *)(sp + STACK_SWITCH_FRAME_REGS);
+ kregs->nip = ppc_function_entry(f);
+ if (unlikely(args->fn)) {
+ /*
+ * Put kthread fn, arg parameters in non-volatile GPRs in the
+ * switch frame so they are loaded by _switch before it returns
+ * to ret_from_kernel_thread.
+ */
+ kregs->gpr[14] = ppc_function_entry((void *)args->fn);
+ kregs->gpr[15] = (unsigned long)args->fn_arg;
+ }
p->thread.ksp = sp;
#ifdef CONFIG_HAVE_HW_BREAKPOINT
@@ -1840,22 +1865,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
p->thread.dscr_inherit = current->thread.dscr_inherit;
p->thread.dscr = mfspr(SPRN_DSCR);
}
- if (cpu_has_feature(CPU_FTR_HAS_PPR))
- childregs->ppr = DEFAULT_PPR;
p->thread.tidr = 0;
#endif
- /*
- * Run with the current AMR value of the kernel
- */
-#ifdef CONFIG_PPC_PKEY
- if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP))
- kregs->amr = AMR_KUAP_BLOCKED;
-
- if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP))
- kregs->iamr = AMR_KUEP_BLOCKED;
-#endif
- kregs->nip = ppc_function_entry(f);
return 0;
}
diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c
index 2087a785f05f..5fff0d04b23f 100644
--- a/arch/powerpc/kernel/ptrace/ptrace-view.c
+++ b/arch/powerpc/kernel/ptrace/ptrace-view.c
@@ -290,6 +290,9 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset,
static int ppr_get(struct task_struct *target, const struct user_regset *regset,
struct membuf to)
{
+ if (!target->thread.regs)
+ return -EINVAL;
+
return membuf_write(&to, &target->thread.regs->ppr, sizeof(u64));
}
@@ -297,6 +300,9 @@ static int ppr_set(struct task_struct *target, const struct user_regset *regset,
unsigned int pos, unsigned int count, const void *kbuf,
const void __user *ubuf)
{
+ if (!target->thread.regs)
+ return -EINVAL;
+
return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
&target->thread.regs->ppr, 0, sizeof(u64));
}
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 31175b34856a..c087eeee320f 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -16,6 +16,7 @@
#include <linux/init.h>
#include <linux/kconfig.h>
#include <linux/kernel.h>
+#include <linux/lockdep.h>
#include <linux/memblock.h>
#include <linux/of.h>
#include <linux/of_fdt.h>
@@ -68,7 +69,7 @@ struct rtas_filter {
* functions are believed to have no users on
* ppc64le, and we want to keep it that way. It does
* not make sense for this to be set when @filter
- * is false.
+ * is NULL.
*/
struct rtas_function {
s32 token;
@@ -453,6 +454,16 @@ static struct rtas_function rtas_function_table[] __ro_after_init = {
},
};
+/*
+ * Nearly all RTAS calls need to be serialized. All uses of the
+ * default rtas_args block must hold rtas_lock.
+ *
+ * Exceptions to the RTAS serialization requirement (e.g. stop-self)
+ * must use a separate rtas_args structure.
+ */
+static DEFINE_RAW_SPINLOCK(rtas_lock);
+static struct rtas_args rtas_args;
+
/**
* rtas_function_token() - RTAS function token lookup.
* @handle: Function handle, e.g. RTAS_FN_EVENT_SCAN.
@@ -560,6 +571,9 @@ static void __do_enter_rtas(struct rtas_args *args)
static void __do_enter_rtas_trace(struct rtas_args *args)
{
const char *name = NULL;
+
+ if (args == &rtas_args)
+ lockdep_assert_held(&rtas_lock);
/*
* If the tracepoints that consume the function name aren't
* active, avoid the lookup.
@@ -619,16 +633,6 @@ static void do_enter_rtas(struct rtas_args *args)
struct rtas_t rtas;
-/*
- * Nearly all RTAS calls need to be serialized. All uses of the
- * default rtas_args block must hold rtas_lock.
- *
- * Exceptions to the RTAS serialization requirement (e.g. stop-self)
- * must use a separate rtas_args structure.
- */
-static DEFINE_RAW_SPINLOCK(rtas_lock);
-static struct rtas_args rtas_args;
-
DEFINE_SPINLOCK(rtas_data_buf_lock);
EXPORT_SYMBOL_GPL(rtas_data_buf_lock);
@@ -951,6 +955,8 @@ static char *__fetch_rtas_last_error(char *altbuf)
u32 bufsz;
char *buf = NULL;
+ lockdep_assert_held(&rtas_lock);
+
if (token == -1)
return NULL;
@@ -981,7 +987,7 @@ static char *__fetch_rtas_last_error(char *altbuf)
buf = kmalloc(RTAS_ERROR_LOG_MAX, GFP_ATOMIC);
}
if (buf)
- memcpy(buf, rtas_err_buf, RTAS_ERROR_LOG_MAX);
+ memmove(buf, rtas_err_buf, RTAS_ERROR_LOG_MAX);
}
return buf;
@@ -1016,6 +1022,23 @@ va_rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret,
do_enter_rtas(args);
}
+/**
+ * rtas_call_unlocked() - Invoke an RTAS firmware function without synchronization.
+ * @args: RTAS parameter block to be used for the call, must obey RTAS addressing
+ * constraints.
+ * @token: Identifies the function being invoked.
+ * @nargs: Number of input parameters. Does not include token.
+ * @nret: Number of output parameters, including the call status.
+ * @....: List of @nargs input parameters.
+ *
+ * Invokes the RTAS function indicated by @token, which the caller
+ * should obtain via rtas_function_token().
+ *
+ * This function is similar to rtas_call(), but must be used with a
+ * limited set of RTAS calls specifically exempted from the general
+ * requirement that only one RTAS call may be in progress at any
+ * time. Examples include stop-self and ibm,nmi-interlock.
+ */
void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, ...)
{
va_list list;
@@ -1091,6 +1114,7 @@ static bool token_is_restricted_errinjct(s32 token)
*/
int rtas_call(int token, int nargs, int nret, int *outputs, ...)
{
+ struct pin_cookie cookie;
va_list list;
int i;
unsigned long flags;
@@ -1117,6 +1141,8 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...)
}
raw_spin_lock_irqsave(&rtas_lock, flags);
+ cookie = lockdep_pin_lock(&rtas_lock);
+
/* We use the global rtas args buffer */
args = &rtas_args;
@@ -1134,6 +1160,7 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...)
outputs[i] = be32_to_cpu(args->rets[i + 1]);
ret = (nret > 0) ? be32_to_cpu(args->rets[0]) : 0;
+ lockdep_unpin_lock(&rtas_lock, cookie);
raw_spin_unlock_irqrestore(&rtas_lock, flags);
if (buff_copy) {
@@ -1765,6 +1792,7 @@ err:
/* We assume to be passed big endian arguments */
SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
{
+ struct pin_cookie cookie;
struct rtas_args args;
unsigned long flags;
char *buff_copy, *errbuf = NULL;
@@ -1833,6 +1861,7 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
buff_copy = get_errorlog_buffer();
raw_spin_lock_irqsave(&rtas_lock, flags);
+ cookie = lockdep_pin_lock(&rtas_lock);
rtas_args = args;
do_enter_rtas(&rtas_args);
@@ -1843,6 +1872,7 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
if (be32_to_cpu(args.rets[0]) == -1)
errbuf = __fetch_rtas_last_error(buff_copy);
+ lockdep_unpin_lock(&rtas_lock, cookie);
raw_spin_unlock_irqrestore(&rtas_lock, flags);
if (buff_copy) {
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index e77734e5a127..d2a446216444 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -630,13 +630,14 @@ static __init void probe_machine(void)
for (machine_id = &__machine_desc_start;
machine_id < &__machine_desc_end;
machine_id++) {
- DBG(" %s ...", machine_id->name);
+ DBG(" %s ...\n", machine_id->name);
+ if (machine_id->compatible && !of_machine_is_compatible(machine_id->compatible))
+ continue;
memcpy(&ppc_md, machine_id, sizeof(struct machdep_calls));
- if (ppc_md.probe()) {
- DBG(" match !\n");
- break;
- }
- DBG("\n");
+ if (ppc_md.probe && !ppc_md.probe())
+ continue;
+ DBG(" %s match !\n", machine_id->name);
+ break;
}
/* What can we do if we didn't find ? */
if (machine_id >= &__machine_desc_end) {
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index b2e0d3ce4261..246201d0d879 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -480,7 +480,7 @@ void early_setup_secondary(void)
#endif /* CONFIG_SMP */
-void panic_smp_self_stop(void)
+void __noreturn panic_smp_self_stop(void)
{
hard_irq_disable();
spin_begin();
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 6b90f10a6c81..265801a3e94c 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -61,6 +61,8 @@
#include <asm/kup.h>
#include <asm/fadump.h>
+#include <trace/events/ipi.h>
+
#ifdef DEBUG
#include <asm/udbg.h>
#define DBG(fmt...) udbg_printf(fmt)
@@ -364,12 +366,12 @@ static inline void do_message_pass(int cpu, int msg)
#endif
}
-void smp_send_reschedule(int cpu)
+void arch_smp_send_reschedule(int cpu)
{
if (likely(smp_ops))
do_message_pass(cpu, PPC_MSG_RESCHEDULE);
}
-EXPORT_SYMBOL_GPL(smp_send_reschedule);
+EXPORT_SYMBOL_GPL(arch_smp_send_reschedule);
void arch_send_call_function_single_ipi(int cpu)
{
@@ -1611,7 +1613,7 @@ void start_secondary(void *unused)
if (IS_ENABLED(CONFIG_PPC32))
setup_kup();
- mmgrab(&init_mm);
+ mmgrab_lazy_tlb(&init_mm);
current->active_mm = &init_mm;
smp_store_cpu_info(cpu);
@@ -1752,7 +1754,7 @@ void __cpu_die(unsigned int cpu)
smp_ops->cpu_die(cpu);
}
-void arch_cpu_idle_dead(void)
+void __noreturn arch_cpu_idle_dead(void)
{
/*
* Disable on the down path. This will be re-enabled by
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index ef9a61718940..0f39a6b84132 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -217,13 +217,18 @@ static DEVICE_ATTR(dscr_default, 0600,
static void __init sysfs_create_dscr_default(void)
{
if (cpu_has_feature(CPU_FTR_DSCR)) {
+ struct device *dev_root;
int cpu;
dscr_default = spr_default_dscr;
for_each_possible_cpu(cpu)
paca_ptrs[cpu]->dscr_default = dscr_default;
- device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default);
+ dev_root = bus_get_dev_root(&cpu_subsys);
+ if (dev_root) {
+ device_create_file(dev_root, &dev_attr_dscr_default);
+ put_device(dev_root);
+ }
}
}
#endif /* CONFIG_PPC64 */
@@ -746,7 +751,12 @@ static DEVICE_ATTR(svm, 0444, show_svm, NULL);
static void __init create_svm_file(void)
{
- device_create_file(cpu_subsys.dev_root, &dev_attr_svm);
+ struct device *dev_root = bus_get_dev_root(&cpu_subsys);
+
+ if (dev_root) {
+ device_create_file(dev_root, &dev_attr_svm);
+ put_device(dev_root);
+ }
}
#else
static void __init create_svm_file(void)
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 9d8665910350..df20cf201f74 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -887,7 +887,11 @@ void __init time_init(void)
unsigned shift;
/* Normal PowerPC with timebase register */
- ppc_md.calibrate_decr();
+ if (ppc_md.calibrate_decr)
+ ppc_md.calibrate_decr();
+ else
+ generic_calibrate_decr();
+
printk(KERN_DEBUG "time_init: decrementer frequency = %lu.%.6lu MHz\n",
ppc_tb_freq / 1000000, ppc_tb_freq % 1000000);
printk(KERN_DEBUG "time_init: processor frequency = %lu.%.6lu MHz\n",
diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
index 7b85c3b460a3..a47f30373423 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -194,6 +194,8 @@ __ftrace_make_nop(struct module *mod,
* get corrupted.
*
* Use a b +8 to jump over the load.
+ * XXX: could make PCREL depend on MPROFILE_KERNEL
+ * XXX: check PCREL && MPROFILE_KERNEL calling sequence
*/
if (IS_ENABLED(CONFIG_MPROFILE_KERNEL) || IS_ENABLED(CONFIG_PPC32))
pop = ppc_inst(PPC_RAW_NOP());
@@ -725,6 +727,15 @@ int __init ftrace_dyn_arch_init(void)
{
int i;
unsigned int *tramp[] = { ftrace_tramp_text, ftrace_tramp_init };
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ u32 stub_insns[] = {
+ /* pla r12,addr */
+ PPC_PREFIX_MLS | __PPC_PRFX_R(1),
+ PPC_INST_PADDI | ___PPC_RT(_R12),
+ PPC_RAW_MTCTR(_R12),
+ PPC_RAW_BCTR()
+ };
+#else
u32 stub_insns[] = {
PPC_RAW_LD(_R12, _R13, PACATOC),
PPC_RAW_ADDIS(_R12, _R12, 0),
@@ -732,6 +743,8 @@ int __init ftrace_dyn_arch_init(void)
PPC_RAW_MTCTR(_R12),
PPC_RAW_BCTR()
};
+#endif
+
unsigned long addr;
long reladdr;
@@ -740,19 +753,36 @@ int __init ftrace_dyn_arch_init(void)
else
addr = ppc_global_function_entry((void *)ftrace_caller);
- reladdr = addr - kernel_toc_addr();
+ if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) {
+ for (i = 0; i < 2; i++) {
+ reladdr = addr - (unsigned long)tramp[i];
- if (reladdr >= SZ_2G || reladdr < -(long)SZ_2G) {
- pr_err("Address of %ps out of range of kernel_toc.\n",
+ if (reladdr >= (long)SZ_8G || reladdr < -(long)SZ_8G) {
+ pr_err("Address of %ps out of range of pcrel address.\n",
+ (void *)addr);
+ return -1;
+ }
+
+ memcpy(tramp[i], stub_insns, sizeof(stub_insns));
+ tramp[i][0] |= IMM_H18(reladdr);
+ tramp[i][1] |= IMM_L(reladdr);
+ add_ftrace_tramp((unsigned long)tramp[i]);
+ }
+ } else {
+ reladdr = addr - kernel_toc_addr();
+
+ if (reladdr >= (long)SZ_2G || reladdr < -(long)SZ_2G) {
+ pr_err("Address of %ps out of range of kernel_toc.\n",
(void *)addr);
- return -1;
- }
+ return -1;
+ }
- for (i = 0; i < 2; i++) {
- memcpy(tramp[i], stub_insns, sizeof(stub_insns));
- tramp[i][1] |= PPC_HA(reladdr);
- tramp[i][2] |= PPC_LO(reladdr);
- add_ftrace_tramp((unsigned long)tramp[i]);
+ for (i = 0; i < 2; i++) {
+ memcpy(tramp[i], stub_insns, sizeof(stub_insns));
+ tramp[i][1] |= PPC_HA(reladdr);
+ tramp[i][2] |= PPC_LO(reladdr);
+ add_ftrace_tramp((unsigned long)tramp[i]);
+ }
}
return 0;
diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile
index 66f723f53be2..4c3f34485f08 100644
--- a/arch/powerpc/kernel/vdso/Makefile
+++ b/arch/powerpc/kernel/vdso/Makefile
@@ -2,7 +2,7 @@
# List of files in the vdso, has to be asm only for now
-ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN|R_PPC_REL24
+# Include the generic Makefile to check the built vdso.
include $(srctree)/lib/vdso/Makefile
obj-vdso32 = sigtramp32-32.o gettimeofday-32.o datapage-32.o cacheflush-32.o note-32.o getcpu-32.o
diff --git a/arch/powerpc/kernel/vdso/gettimeofday.S b/arch/powerpc/kernel/vdso/gettimeofday.S
index 0c4ecc8fec5a..48fc6658053a 100644
--- a/arch/powerpc/kernel/vdso/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso/gettimeofday.S
@@ -38,7 +38,11 @@
.else
addi r4, r5, VDSO_DATA_OFFSET
.endif
- bl DOTSYM(\funct)
+#ifdef __powerpc64__
+ bl CFUNC(DOTSYM(\funct))
+#else
+ bl \funct
+#endif
PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1)
#ifdef __powerpc64__
PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1)
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index ffe5d90abe17..fcc0ad6d9c7b 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -177,10 +177,16 @@ fpone:
fphalf:
.quad 0x3fe0000000000000 /* 0.5 */
+#ifdef CONFIG_PPC_KERNEL_PCREL
+#define LDCONST(fr, name) \
+ pla r11,name@pcrel; \
+ lfd fr,0(r11)
+#else
#define LDCONST(fr, name) \
addis r11,r2,name@toc@ha; \
lfd fr,name@toc@l(r11)
#endif
+#endif
.text
/*
* Internal routine to enable floating point and set FPSCR to 0.
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index ee86753e444e..13614f0b269c 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -169,12 +169,18 @@ SECTIONS
}
#else /* CONFIG_PPC32 */
+#ifndef CONFIG_PPC_KERNEL_PCREL
.toc1 : AT(ADDR(.toc1) - LOAD_OFFSET) {
*(.toc1)
}
+#endif
.got : AT(ADDR(.got) - LOAD_OFFSET) ALIGN(256) {
+#ifdef CONFIG_PPC_KERNEL_PCREL
+ *(.got)
+#else
*(.got .toc)
+#endif
}
SOFT_MASK_TABLE(8)