diff options
Diffstat (limited to 'arch/powerpc/kernel')
33 files changed, 1054 insertions, 439 deletions
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index d24a59a98c0c..9f14d95b8b32 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -185,7 +185,9 @@ int main(void) offsetof(struct task_struct, thread_info)); OFFSET(PACASAVEDMSR, paca_struct, saved_msr); OFFSET(PACAR1, paca_struct, saved_r1); +#ifndef CONFIG_PPC_KERNEL_PCREL OFFSET(PACATOC, paca_struct, kernel_toc); +#endif OFFSET(PACAKBASE, paca_struct, kernelbase); OFFSET(PACAKMSR, paca_struct, kernel_msr); #ifdef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c index 2769889219bf..19e46fd623b0 100644 --- a/arch/powerpc/kernel/btext.c +++ b/arch/powerpc/kernel/btext.c @@ -235,7 +235,7 @@ int __init btext_find_display(int allow_nonstdout) return rc; for_each_node_by_type(np, "display") { - if (of_get_property(np, "linux,opened", NULL)) { + if (of_property_read_bool(np, "linux,opened")) { printk("trying %pOF ...\n", np); rc = btext_initialize(np); printk("result: %d\n", rc); diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 5604c9a1ac22..47f0dd9a45ad 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -183,12 +183,11 @@ syscall_exit_finish: ret_from_fork: REST_NVGPRS(r1) bl schedule_tail - li r3,0 + li r3,0 /* fork() return value */ b ret_from_syscall - .globl ret_from_kernel_thread -ret_from_kernel_thread: - REST_NVGPRS(r1) + .globl ret_from_kernel_user_thread +ret_from_kernel_user_thread: bl schedule_tail mtctr r14 mr r3,r15 @@ -197,6 +196,22 @@ ret_from_kernel_thread: li r3,0 b ret_from_syscall + .globl start_kernel_thread +start_kernel_thread: + bl schedule_tail + mtctr r14 + mr r3,r15 + PPC440EP_ERR42 + bctrl + /* + * This must not return. We actually want to BUG here, not WARN, + * because BUG will exit the process which is what the kernel thread + * should have done, which may give some hope of continuing. + */ +100: trap + EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,0 + + /* * This routine switches between two different tasks. The process * state of one is saved on its kernel stack. Then the state diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 6441a1ba57ac..c33c8ebf8641 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1075,7 +1075,7 @@ EXC_COMMON_BEGIN(system_reset_common) __GEN_COMMON_BODY system_reset addi r3,r1,STACK_INT_FRAME_REGS - bl system_reset_exception + bl CFUNC(system_reset_exception) /* Clear MSR_RI before setting SRR0 and SRR1. */ li r9,0 @@ -1223,9 +1223,9 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) addi r3,r1,STACK_INT_FRAME_REGS BEGIN_FTR_SECTION - bl machine_check_early_boot + bl CFUNC(machine_check_early_boot) END_FTR_SECTION(0, 1) // nop out after boot - bl machine_check_early + bl CFUNC(machine_check_early) std r3,RESULT(r1) /* Save result */ ld r12,_MSR(r1) @@ -1286,7 +1286,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) * Queue up the MCE event so that we can log it later, while * returning from kernel or opal call. */ - bl machine_check_queue_event + bl CFUNC(machine_check_queue_event) MACHINE_CHECK_HANDLER_WINDUP RFI_TO_KERNEL @@ -1312,7 +1312,7 @@ EXC_COMMON_BEGIN(machine_check_common) */ GEN_COMMON machine_check addi r3,r1,STACK_INT_FRAME_REGS - bl machine_check_exception_async + bl CFUNC(machine_check_exception_async) b interrupt_return_srr @@ -1322,7 +1322,7 @@ EXC_COMMON_BEGIN(machine_check_common) * done. Queue the event then call the idle code to do the wake up. */ EXC_COMMON_BEGIN(machine_check_idle_common) - bl machine_check_queue_event + bl CFUNC(machine_check_queue_event) /* * GPR-loss wakeups are relatively straightforward, because the @@ -1361,7 +1361,7 @@ EXC_COMMON_BEGIN(unrecoverable_mce) BEGIN_FTR_SECTION li r10,0 /* clear MSR_RI */ mtmsrd r10,1 - bl disable_machine_check + bl CFUNC(disable_machine_check) END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) ld r10,PACAKMSR(r13) li r3,MSR_ME @@ -1378,14 +1378,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) * the early handler which is a true NMI. */ addi r3,r1,STACK_INT_FRAME_REGS - bl machine_check_exception + bl CFUNC(machine_check_exception) /* * We will not reach here. Even if we did, there is no way out. * Call unrecoverable_exception and die. */ addi r3,r1,STACK_INT_FRAME_REGS - bl unrecoverable_exception + bl CFUNC(unrecoverable_exception) b . @@ -1440,16 +1440,16 @@ EXC_COMMON_BEGIN(data_access_common) bne- 1f #ifdef CONFIG_PPC_64S_HASH_MMU BEGIN_MMU_FTR_SECTION - bl do_hash_fault + bl CFUNC(do_hash_fault) MMU_FTR_SECTION_ELSE - bl do_page_fault + bl CFUNC(do_page_fault) ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) #else - bl do_page_fault + bl CFUNC(do_page_fault) #endif b interrupt_return_srr -1: bl do_break +1: bl CFUNC(do_break) /* * do_break() may have changed the NV GPRS while handling a breakpoint. * If so, we need to restore them with their updated values. @@ -1493,7 +1493,7 @@ EXC_COMMON_BEGIN(data_access_slb_common) BEGIN_MMU_FTR_SECTION /* HPT case, do SLB fault */ addi r3,r1,STACK_INT_FRAME_REGS - bl do_slb_fault + bl CFUNC(do_slb_fault) cmpdi r3,0 bne- 1f b fast_interrupt_return_srr @@ -1507,7 +1507,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) #endif std r3,RESULT(r1) addi r3,r1,STACK_INT_FRAME_REGS - bl do_bad_segment_interrupt + bl CFUNC(do_bad_segment_interrupt) b interrupt_return_srr @@ -1541,12 +1541,12 @@ EXC_COMMON_BEGIN(instruction_access_common) addi r3,r1,STACK_INT_FRAME_REGS #ifdef CONFIG_PPC_64S_HASH_MMU BEGIN_MMU_FTR_SECTION - bl do_hash_fault + bl CFUNC(do_hash_fault) MMU_FTR_SECTION_ELSE - bl do_page_fault + bl CFUNC(do_page_fault) ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) #else - bl do_page_fault + bl CFUNC(do_page_fault) #endif b interrupt_return_srr @@ -1581,7 +1581,7 @@ EXC_COMMON_BEGIN(instruction_access_slb_common) BEGIN_MMU_FTR_SECTION /* HPT case, do SLB fault */ addi r3,r1,STACK_INT_FRAME_REGS - bl do_slb_fault + bl CFUNC(do_slb_fault) cmpdi r3,0 bne- 1f b fast_interrupt_return_srr @@ -1595,7 +1595,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) #endif std r3,RESULT(r1) addi r3,r1,STACK_INT_FRAME_REGS - bl do_bad_segment_interrupt + bl CFUNC(do_bad_segment_interrupt) b interrupt_return_srr @@ -1649,7 +1649,7 @@ EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100) EXC_COMMON_BEGIN(hardware_interrupt_common) GEN_COMMON hardware_interrupt addi r3,r1,STACK_INT_FRAME_REGS - bl do_IRQ + bl CFUNC(do_IRQ) BEGIN_FTR_SECTION b interrupt_return_hsrr FTR_SECTION_ELSE @@ -1679,7 +1679,7 @@ EXC_VIRT_END(alignment, 0x4600, 0x100) EXC_COMMON_BEGIN(alignment_common) GEN_COMMON alignment addi r3,r1,STACK_INT_FRAME_REGS - bl alignment_exception + bl CFUNC(alignment_exception) HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */ b interrupt_return_srr @@ -1745,7 +1745,7 @@ EXC_COMMON_BEGIN(program_check_common) .Ldo_program_check: addi r3,r1,STACK_INT_FRAME_REGS - bl program_check_exception + bl CFUNC(program_check_exception) HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */ b interrupt_return_srr @@ -1777,7 +1777,7 @@ EXC_COMMON_BEGIN(fp_unavailable_common) GEN_COMMON fp_unavailable bne 1f /* if from user, just load it up */ addi r3,r1,STACK_INT_FRAME_REGS - bl kernel_fp_unavailable_exception + bl CFUNC(kernel_fp_unavailable_exception) 0: trap EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0 1: @@ -1790,12 +1790,12 @@ BEGIN_FTR_SECTION bne- 2f END_FTR_SECTION_IFSET(CPU_FTR_TM) #endif - bl load_up_fpu + bl CFUNC(load_up_fpu) b fast_interrupt_return_srr #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ addi r3,r1,STACK_INT_FRAME_REGS - bl fp_unavailable_tm + bl CFUNC(fp_unavailable_tm) b interrupt_return_srr #endif @@ -1839,7 +1839,7 @@ EXC_VIRT_END(decrementer, 0x4900, 0x80) EXC_COMMON_BEGIN(decrementer_common) GEN_COMMON decrementer addi r3,r1,STACK_INT_FRAME_REGS - bl timer_interrupt + bl CFUNC(timer_interrupt) b interrupt_return_srr @@ -1925,9 +1925,9 @@ EXC_COMMON_BEGIN(doorbell_super_common) GEN_COMMON doorbell_super addi r3,r1,STACK_INT_FRAME_REGS #ifdef CONFIG_PPC_DOORBELL - bl doorbell_exception + bl CFUNC(doorbell_exception) #else - bl unknown_async_exception + bl CFUNC(unknown_async_exception) #endif b interrupt_return_srr @@ -2091,7 +2091,7 @@ EXC_VIRT_END(single_step, 0x4d00, 0x100) EXC_COMMON_BEGIN(single_step_common) GEN_COMMON single_step addi r3,r1,STACK_INT_FRAME_REGS - bl single_step_exception + bl CFUNC(single_step_exception) b interrupt_return_srr @@ -2126,9 +2126,9 @@ EXC_COMMON_BEGIN(h_data_storage_common) GEN_COMMON h_data_storage addi r3,r1,STACK_INT_FRAME_REGS BEGIN_MMU_FTR_SECTION - bl do_bad_page_fault_segv + bl CFUNC(do_bad_page_fault_segv) MMU_FTR_SECTION_ELSE - bl unknown_exception + bl CFUNC(unknown_exception) ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX) b interrupt_return_hsrr @@ -2154,7 +2154,7 @@ EXC_VIRT_END(h_instr_storage, 0x4e20, 0x20) EXC_COMMON_BEGIN(h_instr_storage_common) GEN_COMMON h_instr_storage addi r3,r1,STACK_INT_FRAME_REGS - bl unknown_exception + bl CFUNC(unknown_exception) b interrupt_return_hsrr @@ -2177,7 +2177,7 @@ EXC_VIRT_END(emulation_assist, 0x4e40, 0x20) EXC_COMMON_BEGIN(emulation_assist_common) GEN_COMMON emulation_assist addi r3,r1,STACK_INT_FRAME_REGS - bl emulation_assist_interrupt + bl CFUNC(emulation_assist_interrupt) HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */ b interrupt_return_hsrr @@ -2237,7 +2237,7 @@ EXC_COMMON_BEGIN(hmi_exception_early_common) __GEN_COMMON_BODY hmi_exception_early addi r3,r1,STACK_INT_FRAME_REGS - bl hmi_exception_realmode + bl CFUNC(hmi_exception_realmode) cmpdi cr0,r3,0 bne 1f @@ -2255,7 +2255,7 @@ EXC_COMMON_BEGIN(hmi_exception_early_common) EXC_COMMON_BEGIN(hmi_exception_common) GEN_COMMON hmi_exception addi r3,r1,STACK_INT_FRAME_REGS - bl handle_hmi_exception + bl CFUNC(handle_hmi_exception) b interrupt_return_hsrr @@ -2290,9 +2290,9 @@ EXC_COMMON_BEGIN(h_doorbell_common) GEN_COMMON h_doorbell addi r3,r1,STACK_INT_FRAME_REGS #ifdef CONFIG_PPC_DOORBELL - bl doorbell_exception + bl CFUNC(doorbell_exception) #else - bl unknown_async_exception + bl CFUNC(unknown_async_exception) #endif b interrupt_return_hsrr @@ -2325,7 +2325,7 @@ EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20) EXC_COMMON_BEGIN(h_virt_irq_common) GEN_COMMON h_virt_irq addi r3,r1,STACK_INT_FRAME_REGS - bl do_IRQ + bl CFUNC(do_IRQ) b interrupt_return_hsrr @@ -2374,10 +2374,10 @@ EXC_COMMON_BEGIN(performance_monitor_common) lbz r4,PACAIRQSOFTMASK(r13) cmpdi r4,IRQS_ENABLED bne 1f - bl performance_monitor_exception_async + bl CFUNC(performance_monitor_exception_async) b interrupt_return_srr 1: - bl performance_monitor_exception_nmi + bl CFUNC(performance_monitor_exception_nmi) /* Clear MSR_RI before setting SRR0 and SRR1. */ li r9,0 mtmsrd r9,1 @@ -2421,19 +2421,19 @@ BEGIN_FTR_SECTION bne- 2f END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69) #endif - bl load_up_altivec + bl CFUNC(load_up_altivec) b fast_interrupt_return_srr #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ addi r3,r1,STACK_INT_FRAME_REGS - bl altivec_unavailable_tm + bl CFUNC(altivec_unavailable_tm) b interrupt_return_srr #endif 1: END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) #endif addi r3,r1,STACK_INT_FRAME_REGS - bl altivec_unavailable_exception + bl CFUNC(altivec_unavailable_exception) b interrupt_return_srr @@ -2475,14 +2475,14 @@ BEGIN_FTR_SECTION #ifdef CONFIG_PPC_TRANSACTIONAL_MEM 2: /* User process was in a transaction */ addi r3,r1,STACK_INT_FRAME_REGS - bl vsx_unavailable_tm + bl CFUNC(vsx_unavailable_tm) b interrupt_return_srr #endif 1: END_FTR_SECTION_IFSET(CPU_FTR_VSX) #endif addi r3,r1,STACK_INT_FRAME_REGS - bl vsx_unavailable_exception + bl CFUNC(vsx_unavailable_exception) b interrupt_return_srr @@ -2509,7 +2509,7 @@ EXC_VIRT_END(facility_unavailable, 0x4f60, 0x20) EXC_COMMON_BEGIN(facility_unavailable_common) GEN_COMMON facility_unavailable addi r3,r1,STACK_INT_FRAME_REGS - bl facility_unavailable_exception + bl CFUNC(facility_unavailable_exception) HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */ b interrupt_return_srr @@ -2537,7 +2537,7 @@ EXC_VIRT_END(h_facility_unavailable, 0x4f80, 0x20) EXC_COMMON_BEGIN(h_facility_unavailable_common) GEN_COMMON h_facility_unavailable addi r3,r1,STACK_INT_FRAME_REGS - bl facility_unavailable_exception + bl CFUNC(facility_unavailable_exception) /* XXX Shouldn't be necessary in practice */ HANDLER_RESTORE_NVGPRS() b interrupt_return_hsrr @@ -2568,7 +2568,7 @@ EXC_VIRT_NONE(0x5200, 0x100) EXC_COMMON_BEGIN(cbe_system_error_common) GEN_COMMON cbe_system_error addi r3,r1,STACK_INT_FRAME_REGS - bl cbe_system_error_exception + bl CFUNC(cbe_system_error_exception) b interrupt_return_hsrr #else /* CONFIG_CBE_RAS */ @@ -2599,7 +2599,7 @@ EXC_VIRT_END(instruction_breakpoint, 0x5300, 0x100) EXC_COMMON_BEGIN(instruction_breakpoint_common) GEN_COMMON instruction_breakpoint addi r3,r1,STACK_INT_FRAME_REGS - bl instruction_breakpoint_exception + bl CFUNC(instruction_breakpoint_exception) b interrupt_return_srr @@ -2721,7 +2721,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) EXC_COMMON_BEGIN(denorm_exception_common) GEN_COMMON denorm_exception addi r3,r1,STACK_INT_FRAME_REGS - bl unknown_exception + bl CFUNC(unknown_exception) b interrupt_return_hsrr @@ -2738,7 +2738,7 @@ EXC_VIRT_NONE(0x5600, 0x100) EXC_COMMON_BEGIN(cbe_maintenance_common) GEN_COMMON cbe_maintenance addi r3,r1,STACK_INT_FRAME_REGS - bl cbe_maintenance_exception + bl CFUNC(cbe_maintenance_exception) b interrupt_return_hsrr #else /* CONFIG_CBE_RAS */ @@ -2764,10 +2764,10 @@ EXC_COMMON_BEGIN(altivec_assist_common) GEN_COMMON altivec_assist addi r3,r1,STACK_INT_FRAME_REGS #ifdef CONFIG_ALTIVEC - bl altivec_assist_exception + bl CFUNC(altivec_assist_exception) HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */ #else - bl unknown_exception + bl CFUNC(unknown_exception) #endif b interrupt_return_srr @@ -2785,7 +2785,7 @@ EXC_VIRT_NONE(0x5800, 0x100) EXC_COMMON_BEGIN(cbe_thermal_common) GEN_COMMON cbe_thermal addi r3,r1,STACK_INT_FRAME_REGS - bl cbe_thermal_exception + bl CFUNC(cbe_thermal_exception) b interrupt_return_hsrr #else /* CONFIG_CBE_RAS */ @@ -2818,7 +2818,7 @@ EXC_COMMON_BEGIN(soft_nmi_common) __GEN_COMMON_BODY soft_nmi addi r3,r1,STACK_INT_FRAME_REGS - bl soft_nmi_interrupt + bl CFUNC(soft_nmi_interrupt) /* Clear MSR_RI before setting SRR0 and SRR1. */ li r9,0 diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 1febb56ebaeb..f132d8704263 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -76,6 +76,13 @@ * 2. The kernel is entered at __start */ +/* + * boot_from_prom and prom_init run at the physical address. Everything + * after prom and kexec entry run at the virtual address (PAGE_OFFSET). + * Secondaries run at the virtual address from generic_secondary_common_init + * onward. + */ + OPEN_FIXED_SECTION(first_256B, 0x0, 0x100) USE_FIXED_SECTION(first_256B) /* @@ -303,13 +310,11 @@ _GLOBAL(fsl_secondary_thread_init) /* turn on 64-bit mode */ bl enable_64b_mode - /* get a valid TOC pointer, wherever we're mapped at */ - bl relative_toc - tovirt(r2,r2) - /* Book3E initialization */ mr r3,r24 bl book3e_secondary_thread_init + bl relative_toc + b generic_secondary_common_init #endif /* CONFIG_PPC_BOOK3E_64 */ @@ -325,22 +330,24 @@ _GLOBAL(fsl_secondary_thread_init) */ _GLOBAL(generic_secondary_smp_init) FIXUP_ENDIAN + + li r13,0 + + /* Poison TOC */ + li r2,-1 + mr r24,r3 mr r25,r4 /* turn on 64-bit mode */ bl enable_64b_mode - /* get a valid TOC pointer, wherever we're mapped at */ - bl relative_toc - tovirt(r2,r2) - #ifdef CONFIG_PPC_BOOK3E_64 /* Book3E initialization */ mr r3,r24 mr r4,r25 bl book3e_secondary_core_init - + /* Now NIA and r2 are relocated to PAGE_OFFSET if not already */ /* * After common core init has finished, check if the current thread is the * one we wanted to boot. If not, start the specified thread and stop the @@ -378,6 +385,16 @@ _GLOBAL(generic_secondary_smp_init) 10: b 10b 20: +#else + /* Now the MMU is off, can branch to our PAGE_OFFSET address */ + bcl 20,31,$+4 +1: mflr r11 + addi r11,r11,(2f - 1b) + tovirt(r11, r11) + mtctr r11 + bctr +2: + bl relative_toc #endif generic_secondary_common_init: @@ -492,6 +509,8 @@ SYM_FUNC_START_LOCAL(start_initialization_book3s) /* Switch off MMU if not already off */ bl __mmu_off + /* Now the MMU is off, can return to our PAGE_OFFSET address */ + tovirt(r25,r25) mtlr r25 blr SYM_FUNC_END(start_initialization_book3s) @@ -515,14 +534,8 @@ __start_initialization_multiplatform: /* Zero r13 (paca) so early program check / mce don't use it */ li r13,0 - /* Get TOC pointer (current runtime address) */ - bl relative_toc - - /* find out where we are now */ - bcl 20,31,$+4 -0: mflr r26 /* r26 = runtime addr here */ - addis r26,r26,(_stext - 0b)@ha - addi r26,r26,(_stext - 0b)@l /* current runtime base addr */ + /* Poison TOC */ + li r2,-1 /* * Are we booted from a PROM Of-type client-interface ? @@ -540,16 +553,41 @@ __start_initialization_multiplatform: mr r29,r9 #endif + /* Get TOC pointer (current runtime address) */ + bl relative_toc + + /* These functions return to the virtual (PAGE_OFFSET) address */ #ifdef CONFIG_PPC_BOOK3E_64 bl start_initialization_book3e #else bl start_initialization_book3s #endif /* CONFIG_PPC_BOOK3E_64 */ + + /* Get TOC pointer, virtual */ + bl relative_toc + + /* find out where we are now */ + + /* OPAL doesn't pass base address in r4, have to derive it. */ + bcl 20,31,$+4 +0: mflr r26 /* r26 = runtime addr here */ + addis r26,r26,(_stext - 0b)@ha + addi r26,r26,(_stext - 0b)@l /* current runtime base addr */ + b __after_prom_start __REF __boot_from_prom: #ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE + /* Get TOC pointer, non-virtual */ + bl relative_toc + + /* find out where we are now */ + bcl 20,31,$+4 +0: mflr r26 /* r26 = runtime addr here */ + addis r26,r26,(_stext - 0b)@ha + addi r26,r26,(_stext - 0b)@l /* current runtime base addr */ + /* Save parameters */ mr r31,r3 mr r30,r4 @@ -579,7 +617,7 @@ __boot_from_prom: /* Do all of the interaction with OF client interface */ mr r8,r26 - bl prom_init + bl CFUNC(prom_init) #endif /* #CONFIG_PPC_OF_BOOT_TRAMPOLINE */ /* We never return. We also hit that trap if trying to boot @@ -590,18 +628,11 @@ __boot_from_prom: __after_prom_start: #ifdef CONFIG_RELOCATABLE /* process relocations for the final address of the kernel */ - lis r25,PAGE_OFFSET@highest /* compute virtual base of kernel */ - sldi r25,r25,32 -#if defined(CONFIG_PPC_BOOK3E_64) - tovirt(r26,r26) /* on booke, we already run at PAGE_OFFSET */ -#endif lwz r7,(FIXED_SYMBOL_ABS_ADDR(__run_at_load))(r26) -#if defined(CONFIG_PPC_BOOK3E_64) - tophys(r26,r26) -#endif cmplwi cr0,r7,1 /* flagged to stay where we are ? */ - bne 1f - add r25,r25,r26 + mr r25,r26 /* then use current kernel base */ + beq 1f + LOAD_REG_IMMEDIATE(r25, PAGE_OFFSET) /* else use static kernel base */ 1: mr r3,r25 bl relocate #if defined(CONFIG_PPC_BOOK3E_64) @@ -617,14 +648,8 @@ __after_prom_start: * * Note: This process overwrites the OF exception vectors. */ - li r3,0 /* target addr */ -#ifdef CONFIG_PPC_BOOK3E_64 - tovirt(r3,r3) /* on booke, we already run at PAGE_OFFSET */ -#endif + LOAD_REG_IMMEDIATE(r3, PAGE_OFFSET) mr. r4,r26 /* In some cases the loader may */ -#if defined(CONFIG_PPC_BOOK3E_64) - tovirt(r4,r4) -#endif beq 9f /* have already put us at zero */ li r6,0x100 /* Start offset, the first 0x100 */ /* bytes were copied earlier. */ @@ -635,9 +660,6 @@ __after_prom_start: * variable __run_at_load, if it is set the kernel is treated as relocatable * kernel, otherwise it will be moved to PHYSICAL_START */ -#if defined(CONFIG_PPC_BOOK3E_64) - tovirt(r26,r26) /* on booke, we already run at PAGE_OFFSET */ -#endif lwz r7,(FIXED_SYMBOL_ABS_ADDR(__run_at_load))(r26) cmplwi cr0,r7,1 bne 3f @@ -756,9 +778,15 @@ _GLOBAL(pmac_secondary_start) sync slbia - /* get TOC pointer (real address) */ + /* Branch to our PAGE_OFFSET address */ + bcl 20,31,$+4 +1: mflr r11 + addi r11,r11,(2f - 1b) + tovirt(r11, r11) + mtctr r11 + bctr +2: bl relative_toc - tovirt(r2,r2) /* Copy some CPU settings from CPU 0 */ bl __restore_cpu_ppc970 @@ -817,7 +845,7 @@ __secondary_start: * can turn it on below. This is a call to C, which is OK, we're still * running on the emergency stack. */ - bl early_setup_secondary + bl CFUNC(early_setup_secondary) /* * The primary has initialized our kernel stack for us in the paca, grab @@ -856,7 +884,7 @@ start_secondary_prolog: LOAD_PACA_TOC() li r3,0 std r3,0(r1) /* Zero the stack frame pointer */ - bl start_secondary + bl CFUNC(start_secondary) b . /* * Reset stack pointer and call start_secondary @@ -867,7 +895,7 @@ _GLOBAL(start_secondary_resume) ld r1,PACAKSAVE(r13) /* Reload kernel stack pointer */ li r3,0 std r3,0(r1) /* Zero the stack frame pointer */ - bl start_secondary + bl CFUNC(start_secondary) b . #endif @@ -897,10 +925,15 @@ SYM_FUNC_END(enable_64b_mode) * TOC in -mcmodel=medium mode. After we relocate to 0 but before * the MMU is on we need our TOC to be a virtual address otherwise * these pointers will be real addresses which may get stored and - * accessed later with the MMU on. We use tovirt() at the call - * sites to handle this. + * accessed later with the MMU on. We branch to the virtual address + * while still in real mode then call relative_toc again to handle + * this. */ _GLOBAL(relative_toc) +#ifdef CONFIG_PPC_KERNEL_PCREL + tdnei r2,-1 + blr +#else mflr r0 bcl 20,31,$+4 0: mflr r11 @@ -911,15 +944,15 @@ _GLOBAL(relative_toc) .balign 8 p_toc: .8byte .TOC. - 0b +#endif /* * This is where the main kernel code starts. */ __REF start_here_multiplatform: - /* set up the TOC */ - bl relative_toc - tovirt(r2,r2) + /* Adjust TOC for moved kernel. Could adjust when moving it instead. */ + bl relative_toc /* Clear out the BSS. It may have been done in prom_init, * already but that's irrelevant since prom_init will soon @@ -972,7 +1005,7 @@ start_here_multiplatform: */ #ifdef CONFIG_KASAN - bl kasan_early_init + bl CFUNC(kasan_early_init) #endif /* Restore parameters passed from prom_init/kexec */ mr r3,r31 @@ -1005,7 +1038,7 @@ start_here_common: stb r0,PACAIRQHAPPENED(r13) /* Generic kernel entry */ - bl start_kernel + bl CFUNC(start_kernel) /* Not reached */ 0: trap diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 37d43c172676..b6b5b01a173c 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -5,6 +5,7 @@ #include <asm/ptrace.h> /* for STACK_FRAME_REGS_MARKER */ #include <asm/kvm_asm.h> #include <asm/kvm_booke_hv_asm.h> +#include <asm/thread_info.h> /* for THREAD_SHIFT */ #ifdef __ASSEMBLY__ diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index b9a725abc596..b1c0418b25c8 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -107,19 +107,11 @@ static struct ctl_table powersave_nap_ctl_table[] = { }, {} }; -static struct ctl_table powersave_nap_sysctl_root[] = { - { - .procname = "kernel", - .mode = 0555, - .child = powersave_nap_ctl_table, - }, - {} -}; static int __init register_powersave_nap_sysctl(void) { - register_sysctl_table(powersave_nap_sysctl_root); + register_sysctl("kernel", powersave_nap_ctl_table); return 0; } diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 0ec1581619db..e34c72285b4e 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -95,7 +95,7 @@ static notrace void booke_load_dbcr0(void) #endif } -static void check_return_regs_valid(struct pt_regs *regs) +static notrace void check_return_regs_valid(struct pt_regs *regs) { #ifdef CONFIG_PPC_BOOK3S_64 unsigned long trap, srr0, srr1; diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S index fccc34489add..bd863702d812 100644 --- a/arch/powerpc/kernel/interrupt_64.S +++ b/arch/powerpc/kernel/interrupt_64.S @@ -101,12 +101,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR) * state of kernel code. */ SANITIZE_SYSCALL_GPRS() - bl system_call_exception + bl CFUNC(system_call_exception) .Lsyscall_vectored_\name\()_exit: addi r4,r1,STACK_INT_FRAME_REGS li r5,1 /* scv */ - bl syscall_exit_prepare + bl CFUNC(syscall_exit_prepare) std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ .Lsyscall_vectored_\name\()_rst_start: lbz r11,PACAIRQHAPPENED(r13) @@ -185,7 +185,7 @@ _ASM_NOKPROBE_SYMBOL(syscall_vectored_\name\()_restart) addi r4,r1,STACK_INT_FRAME_REGS li r11,IRQS_ALL_DISABLED stb r11,PACAIRQSOFTMASK(r13) - bl syscall_exit_restart + bl CFUNC(syscall_exit_restart) std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ b .Lsyscall_vectored_\name\()_rst_start 1: @@ -286,12 +286,12 @@ END_BTB_FLUSH_SECTION * state of kernel code. */ SANITIZE_SYSCALL_GPRS() - bl system_call_exception + bl CFUNC(system_call_exception) .Lsyscall_exit: addi r4,r1,STACK_INT_FRAME_REGS li r5,0 /* !scv */ - bl syscall_exit_prepare + bl CFUNC(syscall_exit_prepare) std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ #ifdef CONFIG_PPC_BOOK3S .Lsyscall_rst_start: @@ -372,7 +372,7 @@ _ASM_NOKPROBE_SYMBOL(syscall_restart) addi r4,r1,STACK_INT_FRAME_REGS li r11,IRQS_ALL_DISABLED stb r11,PACAIRQSOFTMASK(r13) - bl syscall_exit_restart + bl CFUNC(syscall_exit_restart) std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ b .Lsyscall_rst_start 1: @@ -401,7 +401,7 @@ _ASM_NOKPROBE_SYMBOL(fast_interrupt_return_srr) li r3,0 /* 0 return value, no EMULATE_STACK_STORE */ bne+ .Lfast_kernel_interrupt_return_srr addi r3,r1,STACK_INT_FRAME_REGS - bl unrecoverable_exception + bl CFUNC(unrecoverable_exception) b . /* should not get here */ #else bne .Lfast_user_interrupt_return_srr @@ -419,7 +419,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()) interrupt_return_\srr\()_user: /* make backtraces match the _kernel variant */ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user) addi r3,r1,STACK_INT_FRAME_REGS - bl interrupt_exit_user_prepare + bl CFUNC(interrupt_exit_user_prepare) #ifndef CONFIG_INTERRUPT_SANITIZE_REGISTERS cmpdi r3,0 bne- .Lrestore_nvgprs_\srr @@ -523,7 +523,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user_restart) addi r3,r1,STACK_INT_FRAME_REGS li r11,IRQS_ALL_DISABLED stb r11,PACAIRQSOFTMASK(r13) - bl interrupt_exit_user_restart + bl CFUNC(interrupt_exit_user_restart) std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ b .Linterrupt_return_\srr\()_user_rst_start 1: @@ -536,7 +536,7 @@ RESTART_TABLE(.Linterrupt_return_\srr\()_user_rst_start, .Linterrupt_return_\srr interrupt_return_\srr\()_kernel: _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel) addi r3,r1,STACK_INT_FRAME_REGS - bl interrupt_exit_kernel_prepare + bl CFUNC(interrupt_exit_kernel_prepare) std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ .Linterrupt_return_\srr\()_kernel_rst_start: @@ -705,7 +705,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel_restart) addi r3,r1,STACK_INT_FRAME_REGS li r11,IRQS_ALL_DISABLED stb r11,PACAIRQSOFTMASK(r13) - bl interrupt_exit_kernel_restart + bl CFUNC(interrupt_exit_kernel_restart) std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */ b .Linterrupt_return_\srr\()_kernel_rst_start 1: @@ -727,21 +727,20 @@ DEFINE_FIXED_SYMBOL(__end_soft_masked, text) #ifdef CONFIG_PPC_BOOK3S _GLOBAL(ret_from_fork_scv) - bl schedule_tail - REST_NVGPRS(r1) + bl CFUNC(schedule_tail) + HANDLER_RESTORE_NVGPRS() li r3,0 /* fork() return value */ b .Lsyscall_vectored_common_exit #endif _GLOBAL(ret_from_fork) - bl schedule_tail - REST_NVGPRS(r1) + bl CFUNC(schedule_tail) + HANDLER_RESTORE_NVGPRS() li r3,0 /* fork() return value */ b .Lsyscall_exit -_GLOBAL(ret_from_kernel_thread) - bl schedule_tail - REST_NVGPRS(r1) +_GLOBAL(ret_from_kernel_user_thread) + bl CFUNC(schedule_tail) mtctr r14 mr r3,r15 #ifdef CONFIG_PPC64_ELF_ABI_V2 @@ -749,4 +748,25 @@ _GLOBAL(ret_from_kernel_thread) #endif bctrl li r3,0 + /* + * It does not matter whether this returns via the scv or sc path + * because it returns as execve() and therefore has no calling ABI + * (i.e., it sets registers according to the exec()ed entry point). + */ b .Lsyscall_exit + +_GLOBAL(start_kernel_thread) + bl CFUNC(schedule_tail) + mtctr r14 + mr r3,r15 +#ifdef CONFIG_PPC64_ELF_ABI_V2 + mr r12,r14 +#endif + bctrl + /* + * This must not return. We actually want to BUG here, not WARN, + * because BUG will exit the process which is what the kernel thread + * should have done, which may give some hope of continuing. + */ +100: trap + EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,0 diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index ee95937bdaf1..0089dd49b4cb 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -35,6 +35,7 @@ #include <asm/vio.h> #include <asm/tce.h> #include <asm/mmu_context.h> +#include <asm/ppc-pci.h> #define DBG(...) @@ -1086,7 +1087,7 @@ void iommu_tce_kill(struct iommu_table *tbl, } EXPORT_SYMBOL_GPL(iommu_tce_kill); -int iommu_take_ownership(struct iommu_table *tbl) +static int iommu_take_ownership(struct iommu_table *tbl) { unsigned long flags, i, sz = (tbl->it_size + 7) >> 3; int ret = 0; @@ -1118,9 +1119,8 @@ int iommu_take_ownership(struct iommu_table *tbl) return ret; } -EXPORT_SYMBOL_GPL(iommu_take_ownership); -void iommu_release_ownership(struct iommu_table *tbl) +static void iommu_release_ownership(struct iommu_table *tbl) { unsigned long flags, i, sz = (tbl->it_size + 7) >> 3; @@ -1137,7 +1137,6 @@ void iommu_release_ownership(struct iommu_table *tbl) spin_unlock(&tbl->pools[i].lock); spin_unlock_irqrestore(&tbl->large_pool.lock, flags); } -EXPORT_SYMBOL_GPL(iommu_release_ownership); int iommu_add_device(struct iommu_table_group *table_group, struct device *dev) { @@ -1158,8 +1157,14 @@ int iommu_add_device(struct iommu_table_group *table_group, struct device *dev) pr_debug("%s: Adding %s to iommu group %d\n", __func__, dev_name(dev), iommu_group_id(table_group->group)); - - return iommu_group_add_device(table_group->group, dev); + /* + * This is still not adding devices via the IOMMU bus notifier because + * of pcibios_init() from arch/powerpc/kernel/pci_64.c which calls + * pcibios_scan_phb() first (and this guy adds devices and triggers + * the notifier) and only then it calls pci_bus_add_devices() which + * configures DMA for buses which also creates PEs and IOMMU groups. + */ + return iommu_probe_device(dev); } EXPORT_SYMBOL_GPL(iommu_add_device); @@ -1179,4 +1184,233 @@ void iommu_del_device(struct device *dev) iommu_group_remove_device(dev); } EXPORT_SYMBOL_GPL(iommu_del_device); + +/* + * A simple iommu_table_group_ops which only allows reusing the existing + * iommu_table. This handles VFIO for POWER7 or the nested KVM. + * The ops does not allow creating windows and only allows reusing the existing + * one if it matches table_group->tce32_start/tce32_size/page_shift. + */ +static unsigned long spapr_tce_get_table_size(__u32 page_shift, + __u64 window_size, __u32 levels) +{ + unsigned long size; + + if (levels > 1) + return ~0U; + size = window_size >> (page_shift - 3); + return size; +} + +static long spapr_tce_create_table(struct iommu_table_group *table_group, int num, + __u32 page_shift, __u64 window_size, __u32 levels, + struct iommu_table **ptbl) +{ + struct iommu_table *tbl = table_group->tables[0]; + + if (num > 0) + return -EPERM; + + if (tbl->it_page_shift != page_shift || + tbl->it_size != (window_size >> page_shift) || + tbl->it_indirect_levels != levels - 1) + return -EINVAL; + + *ptbl = iommu_tce_table_get(tbl); + return 0; +} + +static long spapr_tce_set_window(struct iommu_table_group *table_group, + int num, struct iommu_table *tbl) +{ + return tbl == table_group->tables[num] ? 0 : -EPERM; +} + +static long spapr_tce_unset_window(struct iommu_table_group *table_group, int num) +{ + return 0; +} + +static long spapr_tce_take_ownership(struct iommu_table_group *table_group) +{ + int i, j, rc = 0; + + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { + struct iommu_table *tbl = table_group->tables[i]; + + if (!tbl || !tbl->it_map) + continue; + + rc = iommu_take_ownership(tbl); + if (!rc) + continue; + + for (j = 0; j < i; ++j) + iommu_release_ownership(table_group->tables[j]); + return rc; + } + return 0; +} + +static void spapr_tce_release_ownership(struct iommu_table_group *table_group) +{ + int i; + + for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { + struct iommu_table *tbl = table_group->tables[i]; + + if (!tbl) + continue; + + iommu_table_clear(tbl); + if (tbl->it_map) + iommu_release_ownership(tbl); + } +} + +struct iommu_table_group_ops spapr_tce_table_group_ops = { + .get_table_size = spapr_tce_get_table_size, + .create_table = spapr_tce_create_table, + .set_window = spapr_tce_set_window, + .unset_window = spapr_tce_unset_window, + .take_ownership = spapr_tce_take_ownership, + .release_ownership = spapr_tce_release_ownership, +}; + +/* + * A simple iommu_ops to allow less cruft in generic VFIO code. + */ +static int spapr_tce_blocking_iommu_attach_dev(struct iommu_domain *dom, + struct device *dev) +{ + struct iommu_group *grp = iommu_group_get(dev); + struct iommu_table_group *table_group; + int ret = -EINVAL; + + if (!grp) + return -ENODEV; + + table_group = iommu_group_get_iommudata(grp); + ret = table_group->ops->take_ownership(table_group); + iommu_group_put(grp); + + return ret; +} + +static void spapr_tce_blocking_iommu_set_platform_dma(struct device *dev) +{ + struct iommu_group *grp = iommu_group_get(dev); + struct iommu_table_group *table_group; + + table_group = iommu_group_get_iommudata(grp); + table_group->ops->release_ownership(table_group); +} + +static const struct iommu_domain_ops spapr_tce_blocking_domain_ops = { + .attach_dev = spapr_tce_blocking_iommu_attach_dev, +}; + +static bool spapr_tce_iommu_capable(struct device *dev, enum iommu_cap cap) +{ + switch (cap) { + case IOMMU_CAP_CACHE_COHERENCY: + return true; + default: + break; + } + + return false; +} + +static struct iommu_domain *spapr_tce_iommu_domain_alloc(unsigned int type) +{ + struct iommu_domain *dom; + + if (type != IOMMU_DOMAIN_BLOCKED) + return NULL; + + dom = kzalloc(sizeof(*dom), GFP_KERNEL); + if (!dom) + return NULL; + + dom->ops = &spapr_tce_blocking_domain_ops; + + return dom; +} + +static struct iommu_device *spapr_tce_iommu_probe_device(struct device *dev) +{ + struct pci_dev *pdev; + struct pci_controller *hose; + + if (!dev_is_pci(dev)) + return ERR_PTR(-EPERM); + + pdev = to_pci_dev(dev); + hose = pdev->bus->sysdata; + + return &hose->iommu; +} + +static void spapr_tce_iommu_release_device(struct device *dev) +{ +} + +static struct iommu_group *spapr_tce_iommu_device_group(struct device *dev) +{ + struct pci_controller *hose; + struct pci_dev *pdev; + + pdev = to_pci_dev(dev); + hose = pdev->bus->sysdata; + + if (!hose->controller_ops.device_group) + return ERR_PTR(-ENOENT); + + return hose->controller_ops.device_group(hose, pdev); +} + +static const struct iommu_ops spapr_tce_iommu_ops = { + .capable = spapr_tce_iommu_capable, + .domain_alloc = spapr_tce_iommu_domain_alloc, + .probe_device = spapr_tce_iommu_probe_device, + .release_device = spapr_tce_iommu_release_device, + .device_group = spapr_tce_iommu_device_group, + .set_platform_dma_ops = spapr_tce_blocking_iommu_set_platform_dma, +}; + +static struct attribute *spapr_tce_iommu_attrs[] = { + NULL, +}; + +static struct attribute_group spapr_tce_iommu_group = { + .name = "spapr-tce-iommu", + .attrs = spapr_tce_iommu_attrs, +}; + +static const struct attribute_group *spapr_tce_iommu_groups[] = { + &spapr_tce_iommu_group, + NULL, +}; + +/* + * This registers IOMMU devices of PHBs. This needs to happen + * after core_initcall(iommu_init) + postcore_initcall(pci_driver_init) and + * before subsys_initcall(iommu_subsys_init). + */ +static int __init spapr_tce_setup_phb_iommus_initcall(void) +{ + struct pci_controller *hose; + + list_for_each_entry(hose, &hose_list, list_node) { + iommu_device_sysfs_add(&hose->iommu, hose->parent, + spapr_tce_iommu_groups, "iommu-phb%04x", + hose->global_number); + iommu_device_register(&hose->iommu, &spapr_tce_iommu_ops, + hose->parent); + } + return 0; +} +postcore_initcall_sync(spapr_tce_setup_phb_iommus_initcall); + #endif /* CONFIG_IOMMU_API */ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index c9535f2760b5..6f7d4edaa0bc 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -206,7 +206,11 @@ static __always_inline void call_do_softirq(const void *sp) asm volatile ( PPC_STLU " %%r1, %[offset](%[sp]) ;" "mr %%r1, %[sp] ;" +#ifdef CONFIG_PPC_KERNEL_PCREL + "bl %[callee]@notoc ;" +#else "bl %[callee] ;" +#endif PPC_LL " %%r1, 0(%%r1) ;" : // Outputs : // Inputs @@ -259,7 +263,11 @@ static __always_inline void call_do_irq(struct pt_regs *regs, void *sp) PPC_STLU " %%r1, %[offset](%[sp]) ;" "mr %%r4, %%r1 ;" "mr %%r1, %[sp] ;" +#ifdef CONFIG_PPC_KERNEL_PCREL + "bl %[callee]@notoc ;" +#else "bl %[callee] ;" +#endif PPC_LL " %%r1, 0(%%r1) ;" : // Outputs "+r" (r3) diff --git a/arch/powerpc/kernel/irq_64.c b/arch/powerpc/kernel/irq_64.c index c788c55512ed..938e66829eae 100644 --- a/arch/powerpc/kernel/irq_64.c +++ b/arch/powerpc/kernel/irq_64.c @@ -348,13 +348,12 @@ EXPORT_SYMBOL(arch_local_irq_restore); * already the case when ppc_md.power_save is called). The function * will return whether to enter power save or just return. * - * In the former case, it will have notified lockdep of interrupts - * being re-enabled and generally sanitized the lazy irq state, - * and in the latter case it will leave with interrupts hard + * In the former case, it will have generally sanitized the lazy irq + * state, and in the latter case it will leave with interrupts hard * disabled and marked as such, so the local_irq_enable() call * in arch_cpu_idle() will properly re-enable everything. */ -bool prep_irq_for_idle(void) +__cpuidle bool prep_irq_for_idle(void) { /* * First we need to hard disable to ensure no interrupt @@ -370,9 +369,6 @@ bool prep_irq_for_idle(void) if (lazy_irq_pending()) return false; - /* Tell lockdep we are about to re-enable */ - trace_hardirqs_on(); - /* * Mark interrupts as soft-enabled and clear the * PACA_IRQ_HARD_DIS from the pending mask since we diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c index dc746611ebc0..85bdd7d3652f 100644 --- a/arch/powerpc/kernel/isa-bridge.c +++ b/arch/powerpc/kernel/isa-bridge.c @@ -55,80 +55,49 @@ static void remap_isa_base(phys_addr_t pa, unsigned long size) } } -static void pci_process_ISA_OF_ranges(struct device_node *isa_node, - unsigned long phb_io_base_phys) +static int process_ISA_OF_ranges(struct device_node *isa_node, + unsigned long phb_io_base_phys) { - /* We should get some saner parsing here and remove these structs */ - struct pci_address { - u32 a_hi; - u32 a_mid; - u32 a_lo; - }; - - struct isa_address { - u32 a_hi; - u32 a_lo; - }; - - struct isa_range { - struct isa_address isa_addr; - struct pci_address pci_addr; - unsigned int size; - }; - - const struct isa_range *range; - unsigned long pci_addr; - unsigned int isa_addr; unsigned int size; - int rlen = 0; + struct of_range_parser parser; + struct of_range range; - range = of_get_property(isa_node, "ranges", &rlen); - if (range == NULL || (rlen < sizeof(struct isa_range))) + if (of_range_parser_init(&parser, isa_node)) goto inval_range; - /* From "ISA Binding to 1275" - * The ranges property is laid out as an array of elements, - * each of which comprises: - * cells 0 - 1: an ISA address - * cells 2 - 4: a PCI address - * (size depending on dev->n_addr_cells) - * cell 5: the size of the range - */ - if ((range->isa_addr.a_hi & ISA_SPACE_MASK) != ISA_SPACE_IO) { - range++; - rlen -= sizeof(struct isa_range); - if (rlen < sizeof(struct isa_range)) - goto inval_range; - } - if ((range->isa_addr.a_hi & ISA_SPACE_MASK) != ISA_SPACE_IO) - goto inval_range; + for_each_of_range(&parser, &range) { + if ((range.flags & ISA_SPACE_MASK) != ISA_SPACE_IO) + continue; - isa_addr = range->isa_addr.a_lo; - pci_addr = (unsigned long) range->pci_addr.a_mid << 32 | - range->pci_addr.a_lo; + if (range.cpu_addr == OF_BAD_ADDR) { + pr_err("ISA: Bad CPU mapping: %s\n", __func__); + return -EINVAL; + } - /* Assume these are both zero. Note: We could fix that and - * do a proper parsing instead ... oh well, that will do for - * now as nobody uses fancy mappings for ISA bridges - */ - if ((pci_addr != 0) || (isa_addr != 0)) { - printk(KERN_ERR "unexpected isa to pci mapping: %s\n", - __func__); - return; - } + /* We need page alignment */ + if ((range.bus_addr & ~PAGE_MASK) || (range.cpu_addr & ~PAGE_MASK)) { + pr_warn("ISA: bridge %pOF has non aligned IO range\n", isa_node); + return -EINVAL; + } - /* Align size and make sure it's cropped to 64K */ - size = PAGE_ALIGN(range->size); - if (size > 0x10000) - size = 0x10000; + /* Align size and make sure it's cropped to 64K */ + size = PAGE_ALIGN(range.size); + if (size > 0x10000) + size = 0x10000; - remap_isa_base(phb_io_base_phys, size); - return; + if (!phb_io_base_phys) + phb_io_base_phys = range.cpu_addr; + + remap_isa_base(phb_io_base_phys, size); + return 0; + } inval_range: - printk(KERN_ERR "no ISA IO ranges or unexpected isa range, " - "mapping 64k\n"); - remap_isa_base(phb_io_base_phys, 0x10000); + if (!phb_io_base_phys) { + pr_err("no ISA IO ranges or unexpected isa range, mapping 64k\n"); + remap_isa_base(phb_io_base_phys, 0x10000); + } + return 0; } @@ -170,7 +139,7 @@ void __init isa_bridge_find_early(struct pci_controller *hose) isa_bridge_devnode = np; /* Now parse the "ranges" property and setup the ISA mapping */ - pci_process_ISA_OF_ranges(np, hose->io_base_phys); + process_ISA_OF_ranges(np, hose->io_base_phys); /* Set the global ISA io base to indicate we have an ISA bridge */ isa_io_base = ISA_IO_BASE; @@ -186,75 +155,15 @@ void __init isa_bridge_find_early(struct pci_controller *hose) */ void __init isa_bridge_init_non_pci(struct device_node *np) { - const __be32 *ranges, *pbasep = NULL; - int rlen, i, rs; - u32 na, ns, pna; - u64 cbase, pbase, size = 0; + int ret; /* If we already have an ISA bridge, bail off */ if (isa_bridge_devnode != NULL) return; - pna = of_n_addr_cells(np); - if (of_property_read_u32(np, "#address-cells", &na) || - of_property_read_u32(np, "#size-cells", &ns)) { - pr_warn("ISA: Non-PCI bridge %pOF is missing address format\n", - np); - return; - } - - /* Check it's a supported address format */ - if (na != 2 || ns != 1) { - pr_warn("ISA: Non-PCI bridge %pOF has unsupported address format\n", - np); - return; - } - rs = na + ns + pna; - - /* Grab the ranges property */ - ranges = of_get_property(np, "ranges", &rlen); - if (ranges == NULL || rlen < rs) { - pr_warn("ISA: Non-PCI bridge %pOF has absent or invalid ranges\n", - np); - return; - } - - /* Parse it. We are only looking for IO space */ - for (i = 0; (i + rs - 1) < rlen; i += rs) { - if (be32_to_cpup(ranges + i) != 1) - continue; - cbase = be32_to_cpup(ranges + i + 1); - size = of_read_number(ranges + i + na + pna, ns); - pbasep = ranges + i + na; - break; - } - - /* Got something ? */ - if (!size || !pbasep) { - pr_warn("ISA: Non-PCI bridge %pOF has no usable IO range\n", - np); + ret = process_ISA_OF_ranges(np, 0); + if (ret) return; - } - - /* Align size and make sure it's cropped to 64K */ - size = PAGE_ALIGN(size); - if (size > 0x10000) - size = 0x10000; - - /* Map pbase */ - pbase = of_translate_address(np, pbasep); - if (pbase == OF_BAD_ADDR) { - pr_warn("ISA: Non-PCI bridge %pOF failed to translate IO base\n", - np); - return; - } - - /* We need page alignment */ - if ((cbase & ~PAGE_MASK) || (pbase & ~PAGE_MASK)) { - pr_warn("ISA: Non-PCI bridge %pOF has non aligned IO range\n", - np); - return; - } /* Got it */ isa_bridge_devnode = np; @@ -263,7 +172,6 @@ void __init isa_bridge_init_non_pci(struct device_node *np) * and map it */ isa_io_base = ISA_IO_BASE; - remap_isa_base(pbase, size); pr_debug("ISA: Non-PCI bridge is %pOF\n", np); } @@ -282,7 +190,7 @@ static void isa_bridge_find_late(struct pci_dev *pdev, isa_bridge_pcidev = pdev; /* Now parse the "ranges" property and setup the ISA mapping */ - pci_process_ISA_OF_ranges(devnode, hose->io_base_phys); + process_ISA_OF_ranges(devnode, hose->io_base_phys); /* Set the global ISA io base to indicate we have an ISA bridge */ isa_io_base = ISA_IO_BASE; diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index f048c424c525..c9ad12461d44 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -171,15 +171,15 @@ static int __init add_legacy_soc_port(struct device_node *np, /* We only support ports that have a clock frequency properly * encoded in the device-tree. */ - if (of_get_property(np, "clock-frequency", NULL) == NULL) + if (!of_property_present(np, "clock-frequency")) return -1; /* if reg-offset don't try to use it */ - if ((of_get_property(np, "reg-offset", NULL) != NULL)) + if (of_property_present(np, "reg-offset")) return -1; /* if rtas uses this device, don't try to use it as well */ - if (of_get_property(np, "used-by-rtas", NULL) != NULL) + if (of_property_read_bool(np, "used-by-rtas")) return -1; /* Get the address */ @@ -237,7 +237,7 @@ static int __init add_legacy_isa_port(struct device_node *np, * Note: Don't even try on P8 lpc, we know it's not directly mapped */ if (!of_device_is_compatible(isa_brg, "ibm,power8-lpc") || - of_get_property(isa_brg, "ranges", NULL)) { + of_property_present(isa_brg, "ranges")) { taddr = of_translate_address(np, reg); if (taddr == OF_BAD_ADDR) taddr = 0; @@ -268,7 +268,7 @@ static int __init add_legacy_pci_port(struct device_node *np, * compatible UARTs on PCI need all sort of quirks (port offsets * etc...) that this code doesn't know about */ - if (of_get_property(np, "clock-frequency", NULL) == NULL) + if (!of_property_present(np, "clock-frequency")) return -1; /* Get the PCI address. Assume BAR 0 */ diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index c39c07a4c06e..2c9ac70aaf0c 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -432,7 +432,7 @@ _GLOBAL(kexec_sequence) 1: /* copy dest pages, flush whole dest image */ mr r3,r29 - bl kexec_copy_flush /* (image) */ + bl CFUNC(kexec_copy_flush) /* (image) */ /* turn off mmu now if not done earlier */ cmpdi r26,0 diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index ea6536171778..816a63fd71fb 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -163,8 +163,7 @@ static uint32_t do_plt_call(void *location, pr_debug("Doing plt for call to 0x%x at 0x%x\n", val, (unsigned int)location); /* Init, or core PLT? */ - if (location >= mod->core_layout.base - && location < mod->core_layout.base + mod->core_layout.size) + if (within_module_core((unsigned long)location, mod)) entry = (void *)sechdrs[mod->arch.core_plt_section].sh_addr; else entry = (void *)sechdrs[mod->arch.init_plt_section].sh_addr; @@ -322,14 +321,14 @@ notrace int module_trampoline_target(struct module *mod, unsigned long addr, int module_finalize_ftrace(struct module *module, const Elf_Shdr *sechdrs) { - module->arch.tramp = do_plt_call(module->core_layout.base, + module->arch.tramp = do_plt_call(module->mem[MOD_TEXT].base, (unsigned long)ftrace_caller, sechdrs, module); if (!module->arch.tramp) return -ENOENT; #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS - module->arch.tramp_regs = do_plt_call(module->core_layout.base, + module->arch.tramp_regs = do_plt_call(module->mem[MOD_TEXT].base, (unsigned long)ftrace_regs_caller, sechdrs, module); if (!module->arch.tramp_regs) diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 2ac78d207f77..92570289ce08 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -101,32 +101,45 @@ static unsigned long stub_func_addr(func_desc_t func) /* Like PPC32, we need little trampolines to do > 24-bit jumps (into the kernel itself). But on PPC64, these need to be used for every jump, actually, to reset r2 (TOC+0x8000). */ -struct ppc64_stub_entry -{ - /* 28 byte jump instruction sequence (7 instructions). We only - * need 6 instructions on ABIv2 but we always allocate 7 so - * so we don't have to modify the trampoline load instruction. */ +struct ppc64_stub_entry { + /* + * 28 byte jump instruction sequence (7 instructions) that can + * hold ppc64_stub_insns or stub_insns. Must be 8-byte aligned + * with PCREL kernels that use prefix instructions in the stub. + */ u32 jump[7]; /* Used by ftrace to identify stubs */ u32 magic; /* Data for the above code */ func_desc_t funcdata; +} __aligned(8); + +struct ppc64_got_entry { + u64 addr; }; /* * PPC64 uses 24 bit jumps, but we need to jump into other modules or * the kernel which may be further. So we jump to a stub. * - * For ELFv1 we need to use this to set up the new r2 value (aka TOC - * pointer). For ELFv2 it's the callee's responsibility to set up the - * new r2, but for both we need to save the old r2. + * Target address and TOC are loaded from function descriptor in the + * ppc64_stub_entry. + * + * r12 is used to generate the target address, which is required for the + * ELFv2 global entry point calling convention. * - * We could simply patch the new r2 value and function pointer into - * the stub, but it's significantly shorter to put these values at the - * end of the stub code, and patch the stub address (32-bits relative - * to the TOC ptr, r2) into the stub. + * TOC handling: + * - PCREL does not have a TOC. + * - ELFv2 non-PCREL just has to save r2, the callee is responsible for + * setting its own TOC pointer at the global entry address. + * - ELFv1 must load the new TOC pointer from the function descriptor. */ static u32 ppc64_stub_insns[] = { +#ifdef CONFIG_PPC_KERNEL_PCREL + /* pld r12,addr */ + PPC_PREFIX_8LS | __PPC_PRFX_R(1), + PPC_INST_PLD | ___PPC_RT(_R12), +#else PPC_RAW_ADDIS(_R11, _R2, 0), PPC_RAW_ADDI(_R11, _R11, 0), /* Save current r2 value in magic place on the stack. */ @@ -136,13 +149,17 @@ static u32 ppc64_stub_insns[] = { /* Set up new r2 from function descriptor */ PPC_RAW_LD(_R2, _R11, 40), #endif +#endif PPC_RAW_MTCTR(_R12), PPC_RAW_BCTR(), }; -/* Count how many different 24-bit relocations (different symbol, - different addend) */ -static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num) +/* + * Count how many different r_type relocations (different symbol, + * different addend). + */ +static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num, + unsigned long r_type) { unsigned int i, r_info, r_addend, _count_relocs; @@ -151,8 +168,8 @@ static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num) r_info = 0; r_addend = 0; for (i = 0; i < num; i++) - /* Only count 24-bit relocs, others don't need stubs */ - if (ELF64_R_TYPE(rela[i].r_info) == R_PPC_REL24 && + /* Only count r_type relocs, others don't need stubs */ + if (ELF64_R_TYPE(rela[i].r_info) == r_type && (r_info != ELF64_R_SYM(rela[i].r_info) || r_addend != rela[i].r_addend)) { _count_relocs++; @@ -213,7 +230,14 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, relocs += count_relocs((void *)sechdrs[i].sh_addr, sechdrs[i].sh_size - / sizeof(Elf64_Rela)); + / sizeof(Elf64_Rela), + R_PPC_REL24); +#ifdef CONFIG_PPC_KERNEL_PCREL + relocs += count_relocs((void *)sechdrs[i].sh_addr, + sechdrs[i].sh_size + / sizeof(Elf64_Rela), + R_PPC64_REL24_NOTOC); +#endif } } @@ -230,6 +254,95 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, return relocs * sizeof(struct ppc64_stub_entry); } +#ifdef CONFIG_PPC_KERNEL_PCREL +static int count_pcpu_relocs(const Elf64_Shdr *sechdrs, + const Elf64_Rela *rela, unsigned int num, + unsigned int symindex, unsigned int pcpu) +{ + unsigned int i, r_info, r_addend, _count_relocs; + + _count_relocs = 0; + r_info = 0; + r_addend = 0; + + for (i = 0; i < num; i++) { + Elf64_Sym *sym; + + /* This is the symbol it is referring to */ + sym = (Elf64_Sym *)sechdrs[symindex].sh_addr + + ELF64_R_SYM(rela[i].r_info); + + if (sym->st_shndx == pcpu && + (r_info != ELF64_R_SYM(rela[i].r_info) || + r_addend != rela[i].r_addend)) { + _count_relocs++; + r_info = ELF64_R_SYM(rela[i].r_info); + r_addend = rela[i].r_addend; + } + } + + return _count_relocs; +} + +/* Get size of potential GOT required. */ +static unsigned long get_got_size(const Elf64_Ehdr *hdr, + const Elf64_Shdr *sechdrs, + struct module *me) +{ + /* One extra reloc so it's always 0-addr terminated */ + unsigned long relocs = 1; + unsigned int i, symindex = 0; + + for (i = 1; i < hdr->e_shnum; i++) { + if (sechdrs[i].sh_type == SHT_SYMTAB) { + symindex = i; + break; + } + } + WARN_ON_ONCE(!symindex); + + /* Every relocated section... */ + for (i = 1; i < hdr->e_shnum; i++) { + if (sechdrs[i].sh_type == SHT_RELA) { + pr_debug("Found relocations in section %u\n", i); + pr_debug("Ptr: %p. Number: %llu\n", (void *)sechdrs[i].sh_addr, + sechdrs[i].sh_size / sizeof(Elf64_Rela)); + + /* + * Sort the relocation information based on a symbol and + * addend key. This is a stable O(n*log n) complexity + * algorithm but it will reduce the complexity of + * count_relocs() to linear complexity O(n) + */ + sort((void *)sechdrs[i].sh_addr, + sechdrs[i].sh_size / sizeof(Elf64_Rela), + sizeof(Elf64_Rela), relacmp, NULL); + + relocs += count_relocs((void *)sechdrs[i].sh_addr, + sechdrs[i].sh_size + / sizeof(Elf64_Rela), + R_PPC64_GOT_PCREL34); + + /* + * Percpu data access typically gets linked with + * REL34 relocations, but the percpu section gets + * moved at load time and requires that to be + * converted to GOT linkage. + */ + if (IS_ENABLED(CONFIG_SMP) && symindex) + relocs += count_pcpu_relocs(sechdrs, + (void *)sechdrs[i].sh_addr, + sechdrs[i].sh_size + / sizeof(Elf64_Rela), + symindex, me->arch.pcpu_section); + } + } + + pr_debug("Looks like a total of %lu GOT entries, max\n", relocs); + return relocs * sizeof(struct ppc64_got_entry); +} +#else /* CONFIG_PPC_KERNEL_PCREL */ + /* Still needed for ELFv2, for .TOC. */ static void dedotify_versions(struct modversion_info *vers, unsigned long size) @@ -279,6 +392,7 @@ static Elf64_Sym *find_dot_toc(Elf64_Shdr *sechdrs, } return NULL; } +#endif /* CONFIG_PPC_KERNEL_PCREL */ bool module_init_section(const char *name) { @@ -297,6 +411,15 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, for (i = 1; i < hdr->e_shnum; i++) { if (strcmp(secstrings + sechdrs[i].sh_name, ".stubs") == 0) me->arch.stubs_section = i; +#ifdef CONFIG_PPC_KERNEL_PCREL + else if (strcmp(secstrings + sechdrs[i].sh_name, ".data..percpu") == 0) + me->arch.pcpu_section = i; + else if (strcmp(secstrings + sechdrs[i].sh_name, ".mygot") == 0) { + me->arch.got_section = i; + if (sechdrs[i].sh_addralign < 8) + sechdrs[i].sh_addralign = 8; + } +#else else if (strcmp(secstrings + sechdrs[i].sh_name, ".toc") == 0) { me->arch.toc_section = i; if (sechdrs[i].sh_addralign < 8) @@ -311,6 +434,7 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, sechdrs[i].sh_size / sizeof(Elf64_Sym), (void *)hdr + sechdrs[sechdrs[i].sh_link].sh_offset); +#endif } if (!me->arch.stubs_section) { @@ -318,26 +442,47 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, return -ENOEXEC; } +#ifdef CONFIG_PPC_KERNEL_PCREL + if (!me->arch.got_section) { + pr_err("%s: doesn't contain .mygot.\n", me->name); + return -ENOEXEC; + } + + /* Override the got size */ + sechdrs[me->arch.got_section].sh_size = get_got_size(hdr, sechdrs, me); +#else /* If we don't have a .toc, just use .stubs. We need to set r2 to some reasonable value in case the module calls out to other functions via a stub, or if a function pointer escapes the module by some means. */ if (!me->arch.toc_section) me->arch.toc_section = me->arch.stubs_section; +#endif /* Override the stubs size */ sechdrs[me->arch.stubs_section].sh_size = get_stubs_size(hdr, sechdrs); + return 0; } #ifdef CONFIG_MPROFILE_KERNEL static u32 stub_insns[] = { +#ifdef CONFIG_PPC_KERNEL_PCREL + PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernelbase)), + PPC_RAW_NOP(), /* align the prefix insn */ + /* paddi r12,r12,addr */ + PPC_PREFIX_MLS | __PPC_PRFX_R(0), + PPC_INST_PADDI | ___PPC_RT(_R12) | ___PPC_RA(_R12), + PPC_RAW_MTCTR(_R12), + PPC_RAW_BCTR(), +#else PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernel_toc)), PPC_RAW_ADDIS(_R12, _R12, 0), PPC_RAW_ADDI(_R12, _R12, 0), PPC_RAW_MTCTR(_R12), PPC_RAW_BCTR(), +#endif }; /* @@ -358,18 +503,37 @@ static inline int create_ftrace_stub(struct ppc64_stub_entry *entry, { long reladdr; - memcpy(entry->jump, stub_insns, sizeof(stub_insns)); - - /* Stub uses address relative to kernel toc (from the paca) */ - reladdr = addr - kernel_toc_addr(); - if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { - pr_err("%s: Address of %ps out of range of kernel_toc.\n", - me->name, (void *)addr); + if ((unsigned long)entry->jump % 8 != 0) { + pr_err("%s: Address of stub entry is not 8-byte aligned\n", me->name); return 0; } - entry->jump[1] |= PPC_HA(reladdr); - entry->jump[2] |= PPC_LO(reladdr); + BUILD_BUG_ON(sizeof(stub_insns) > sizeof(entry->jump)); + memcpy(entry->jump, stub_insns, sizeof(stub_insns)); + + if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) { + /* Stub uses address relative to kernel base (from the paca) */ + reladdr = addr - local_paca->kernelbase; + if (reladdr > 0x1FFFFFFFFL || reladdr < -0x200000000L) { + pr_err("%s: Address of %ps out of range of 34-bit relative address.\n", + me->name, (void *)addr); + return 0; + } + + entry->jump[2] |= IMM_H18(reladdr); + entry->jump[3] |= IMM_L(reladdr); + } else { + /* Stub uses address relative to kernel toc (from the paca) */ + reladdr = addr - kernel_toc_addr(); + if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { + pr_err("%s: Address of %ps out of range of kernel_toc.\n", + me->name, (void *)addr); + return 0; + } + + entry->jump[1] |= PPC_HA(reladdr); + entry->jump[2] |= PPC_LO(reladdr); + } /* Even though we don't use funcdata in the stub, it's needed elsewhere. */ entry->funcdata = func_desc(addr); @@ -415,7 +579,11 @@ static bool is_mprofile_ftrace_call(const char *name) */ static inline unsigned long my_r2(const Elf64_Shdr *sechdrs, struct module *me) { +#ifndef CONFIG_PPC_KERNEL_PCREL return (sechdrs[me->arch.toc_section].sh_addr & ~0xfful) + 0x8000; +#else + return -1; +#endif } /* Patch stub to reference function and correct r2 value. */ @@ -432,28 +600,53 @@ static inline int create_stub(const Elf64_Shdr *sechdrs, if (is_mprofile_ftrace_call(name)) return create_ftrace_stub(entry, addr, me); + if ((unsigned long)entry->jump % 8 != 0) { + pr_err("%s: Address of stub entry is not 8-byte aligned\n", me->name); + return 0; + } + + BUILD_BUG_ON(sizeof(ppc64_stub_insns) > sizeof(entry->jump)); for (i = 0; i < ARRAY_SIZE(ppc64_stub_insns); i++) { if (patch_instruction(&entry->jump[i], ppc_inst(ppc64_stub_insns[i]))) return 0; } - /* Stub uses address relative to r2. */ - reladdr = (unsigned long)entry - my_r2(sechdrs, me); - if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { - pr_err("%s: Address %p of stub out of range of %p.\n", - me->name, (void *)reladdr, (void *)my_r2); - return 0; - } - pr_debug("Stub %p get data from reladdr %li\n", entry, reladdr); + if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) { + /* Stub uses address relative to itself! */ + reladdr = 0 + offsetof(struct ppc64_stub_entry, funcdata); + BUILD_BUG_ON(reladdr != 32); + if (reladdr > 0x1FFFFFFFFL || reladdr < -0x200000000L) { + pr_err("%s: Address of %p out of range of 34-bit relative address.\n", + me->name, (void *)reladdr); + return 0; + } + pr_debug("Stub %p get data from reladdr %li\n", entry, reladdr); - if (patch_instruction(&entry->jump[0], - ppc_inst(entry->jump[0] | PPC_HA(reladdr)))) - return 0; + /* May not even need this if we're relative to 0 */ + if (patch_instruction(&entry->jump[0], + ppc_inst_prefix(entry->jump[0] | IMM_H18(reladdr), + entry->jump[1] | IMM_L(reladdr)))) + return 0; - if (patch_instruction(&entry->jump[1], - ppc_inst(entry->jump[1] | PPC_LO(reladdr)))) - return 0; + } else { + /* Stub uses address relative to r2. */ + reladdr = (unsigned long)entry - my_r2(sechdrs, me); + if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) { + pr_err("%s: Address %p of stub out of range of %p.\n", + me->name, (void *)reladdr, (void *)my_r2); + return 0; + } + pr_debug("Stub %p get data from reladdr %li\n", entry, reladdr); + + if (patch_instruction(&entry->jump[0], + ppc_inst(entry->jump[0] | PPC_HA(reladdr)))) + return 0; + + if (patch_instruction(&entry->jump[1], + ppc_inst(entry->jump[1] | PPC_LO(reladdr)))) + return 0; + } // func_desc_t is 8 bytes if ABIv2, else 16 bytes desc = func_desc(addr); @@ -497,6 +690,37 @@ static unsigned long stub_for_addr(const Elf64_Shdr *sechdrs, return (unsigned long)&stubs[i]; } +#ifdef CONFIG_PPC_KERNEL_PCREL +/* Create GOT to load the location described in this ptr */ +static unsigned long got_for_addr(const Elf64_Shdr *sechdrs, + unsigned long addr, + struct module *me, + const char *name) +{ + struct ppc64_got_entry *got; + unsigned int i, num_got; + + if (!IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) + return addr; + + num_got = sechdrs[me->arch.got_section].sh_size / sizeof(*got); + + /* Find this stub, or if that fails, the next avail. entry */ + got = (void *)sechdrs[me->arch.got_section].sh_addr; + for (i = 0; got[i].addr; i++) { + if (WARN_ON(i >= num_got)) + return 0; + + if (got[i].addr == addr) + return (unsigned long)&got[i]; + } + + got[i].addr = addr; + + return (unsigned long)&got[i]; +} +#endif + /* We expect a noop next: if it is, replace it with instruction to restore r2. */ static int restore_r2(const char *name, u32 *instruction, struct module *me) @@ -504,6 +728,9 @@ static int restore_r2(const char *name, u32 *instruction, struct module *me) u32 *prev_insn = instruction - 1; u32 insn_val = *instruction; + if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) + return 0; + if (is_mprofile_ftrace_call(name)) return 0; @@ -549,6 +776,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, pr_debug("Applying ADD relocate section %u to %u\n", relsec, sechdrs[relsec].sh_info); +#ifndef CONFIG_PPC_KERNEL_PCREL /* First time we're called, we can fix up .TOC. */ if (!me->arch.toc_fixed) { sym = find_dot_toc(sechdrs, strtab, symindex); @@ -558,7 +786,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, sym->st_value = my_r2(sechdrs, me); me->arch.toc_fixed = true; } - +#endif for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rela); i++) { /* This is where to make the change */ location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr @@ -586,6 +814,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, *(unsigned long *)location = value; break; +#ifndef CONFIG_PPC_KERNEL_PCREL case R_PPC64_TOC: *(unsigned long *)location = my_r2(sechdrs, me); break; @@ -645,8 +874,13 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, = (*((uint16_t *) location) & ~0xffff) | (value & 0xffff); break; +#endif case R_PPC_REL24: +#ifdef CONFIG_PPC_KERNEL_PCREL + /* PCREL still generates REL24 for mcount */ + case R_PPC64_REL24_NOTOC: +#endif /* FIXME: Handle weak symbols here --RR */ if (sym->st_shndx == SHN_UNDEF || sym->st_shndx == SHN_LIVEPATCH) { @@ -694,6 +928,47 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, *(u32 *)location = value; break; +#ifdef CONFIG_PPC_KERNEL_PCREL + case R_PPC64_PCREL34: { + unsigned long absvalue = value; + + /* Convert value to relative */ + value -= (unsigned long)location; + + if (value + 0x200000000 > 0x3ffffffff) { + if (sym->st_shndx != me->arch.pcpu_section) { + pr_err("%s: REL34 %li out of range!\n", + me->name, (long)value); + return -ENOEXEC; + } + + /* + * per-cpu section is special cased because + * it is moved during loading, so has to be + * converted to use GOT. + */ + value = got_for_addr(sechdrs, absvalue, me, + strtab + sym->st_name); + if (!value) + return -ENOENT; + value -= (unsigned long)location; + + /* Turn pla into pld */ + if (patch_instruction((u32 *)location, + ppc_inst_prefix((*(u32 *)location & ~0x02000000), + (*((u32 *)location + 1) & ~0xf8000000) | 0xe4000000))) + return -EFAULT; + } + + if (patch_instruction((u32 *)location, + ppc_inst_prefix((*(u32 *)location & ~0x3ffff) | IMM_H18(value), + (*((u32 *)location + 1) & ~0xffff) | IMM_L(value)))) + return -EFAULT; + + break; + } + +#else case R_PPC64_TOCSAVE: /* * Marker reloc indicates we don't have to save r2. @@ -701,8 +976,12 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, * it. */ break; +#endif case R_PPC64_ENTRY: + if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) + break; + /* * Optimize ELFv2 large code model entry point if * the TOC is within 2GB range of current location. @@ -745,6 +1024,20 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, | (value & 0xffff); break; +#ifdef CONFIG_PPC_KERNEL_PCREL + case R_PPC64_GOT_PCREL34: + value = got_for_addr(sechdrs, value, me, + strtab + sym->st_name); + if (!value) + return -ENOENT; + value -= (unsigned long)location; + ((uint32_t *)location)[0] = (((uint32_t *)location)[0] & ~0x3ffff) | + ((value >> 16) & 0x3ffff); + ((uint32_t *)location)[1] = (((uint32_t *)location)[1] & ~0xffff) | + (value & 0xffff); + break; +#endif + default: pr_err("%s: Unknown ADD relocation: %lu\n", me->name, diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index be8db402e963..cda4e00b67c1 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -191,7 +191,9 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu) #endif new_paca->lock_token = 0x8000; new_paca->paca_index = cpu; +#ifndef CONFIG_PPC_KERNEL_PCREL new_paca->kernel_toc = kernel_toc_addr(); +#endif new_paca->kernelbase = (unsigned long) _stext; /* Only set MSR:IR/DR when MMU is initialized */ new_paca->kernel_msr = MSR_KERNEL & ~(MSR_IR | MSR_DR); diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index d67cf79bf5d0..e88d7c9feeec 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -880,6 +880,7 @@ int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) static void pcibios_fixup_resources(struct pci_dev *dev) { struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct resource *res; int i; if (!hose) { @@ -891,9 +892,9 @@ static void pcibios_fixup_resources(struct pci_dev *dev) if (dev->is_virtfn) return; - for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { - struct resource *res = dev->resource + i; + pci_dev_for_each_resource(dev, res, i) { struct pci_bus_region reg; + if (!res->flags) continue; @@ -1452,11 +1453,10 @@ void pcibios_claim_one_bus(struct pci_bus *bus) struct pci_bus *child_bus; list_for_each_entry(dev, &bus->devices, bus_list) { + struct resource *r; int i; - for (i = 0; i < PCI_NUM_RESOURCES; i++) { - struct resource *r = &dev->resource[i]; - + pci_dev_for_each_resource(dev, r, i) { if (r->parent || !r->start || !r->flags) continue; @@ -1705,19 +1705,20 @@ EXPORT_SYMBOL_GPL(pcibios_scan_phb); static void fixup_hide_host_resource_fsl(struct pci_dev *dev) { - int i, class = dev->class >> 8; + int class = dev->class >> 8; /* When configured as agent, programming interface = 1 */ int prog_if = dev->class & 0xf; + struct resource *r; if ((class == PCI_CLASS_PROCESSOR_POWERPC || class == PCI_CLASS_BRIDGE_OTHER) && (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) && (prog_if == 0) && (dev->bus->parent == NULL)) { - for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { - dev->resource[i].start = 0; - dev->resource[i].end = 0; - dev->resource[i].flags = 0; + pci_dev_for_each_resource(dev, r) { + r->start = 0; + r->end = 0; + r->flags = 0; } } } diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index fd42059ae2a5..e27342ef128b 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -73,7 +73,7 @@ static int __init pcibios_init(void) return 0; } -subsys_initcall(pcibios_init); +subsys_initcall_sync(pcibios_init); int pcibios_unmap_io_space(struct pci_bus *bus) { diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 4b29ac5ddac6..1fefafb2b29b 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1630,7 +1630,7 @@ void arch_setup_new_exec(void) } #ifdef CONFIG_PPC64 -/** +/* * Assign a TIDR (thread ID) for task @t and set it in the thread * structure. For now, we only support setting TIDR for 'current' task. * @@ -1738,68 +1738,83 @@ static void setup_ksp_vsid(struct task_struct *p, unsigned long sp) */ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) { - unsigned long clone_flags = args->flags; - unsigned long usp = args->stack; - unsigned long tls = args->tls; - struct pt_regs *childregs, *kregs; + struct pt_regs *kregs; /* Switch frame regs */ extern void ret_from_fork(void); extern void ret_from_fork_scv(void); - extern void ret_from_kernel_thread(void); + extern void ret_from_kernel_user_thread(void); + extern void start_kernel_thread(void); void (*f)(void); unsigned long sp = (unsigned long)task_stack_page(p) + THREAD_SIZE; - struct thread_info *ti = task_thread_info(p); #ifdef CONFIG_HAVE_HW_BREAKPOINT int i; #endif klp_init_thread_info(p); - /* Create initial stack frame. */ - sp -= STACK_USER_INT_FRAME_SIZE; - *(unsigned long *)(sp + STACK_INT_FRAME_MARKER) = STACK_FRAME_REGS_MARKER; - - /* Copy registers */ - childregs = (struct pt_regs *)(sp + STACK_INT_FRAME_REGS); - if (unlikely(args->fn)) { + if (unlikely(p->flags & PF_KTHREAD)) { /* kernel thread */ + + /* Create initial minimum stack frame. */ + sp -= STACK_FRAME_MIN_SIZE; ((unsigned long *)sp)[0] = 0; - memset(childregs, 0, sizeof(struct pt_regs)); - childregs->gpr[1] = sp + STACK_USER_INT_FRAME_SIZE; - /* function */ - if (args->fn) - childregs->gpr[14] = ppc_function_entry((void *)args->fn); -#ifdef CONFIG_PPC64 - clear_tsk_thread_flag(p, TIF_32BIT); - childregs->softe = IRQS_ENABLED; -#endif - childregs->gpr[15] = (unsigned long)args->fn_arg; + + f = start_kernel_thread; p->thread.regs = NULL; /* no user register state */ - ti->flags |= _TIF_RESTOREALL; - f = ret_from_kernel_thread; + clear_tsk_compat_task(p); } else { /* user thread */ - struct pt_regs *regs = current_pt_regs(); - *childregs = *regs; - if (usp) - childregs->gpr[1] = usp; - ((unsigned long *)sp)[0] = childregs->gpr[1]; - p->thread.regs = childregs; - /* 64s sets this in ret_from_fork */ - if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64)) - childregs->gpr[3] = 0; /* Result from fork() */ - if (clone_flags & CLONE_SETTLS) { - if (!is_32bit_task()) - childregs->gpr[13] = tls; + struct pt_regs *childregs; + + /* Create initial user return stack frame. */ + sp -= STACK_USER_INT_FRAME_SIZE; + *(unsigned long *)(sp + STACK_INT_FRAME_MARKER) = STACK_FRAME_REGS_MARKER; + + childregs = (struct pt_regs *)(sp + STACK_INT_FRAME_REGS); + + if (unlikely(args->fn)) { + /* + * A user space thread, but it first runs a kernel + * thread, and then returns as though it had called + * execve rather than fork, so user regs will be + * filled in (e.g., by kernel_execve()). + */ + ((unsigned long *)sp)[0] = 0; + memset(childregs, 0, sizeof(struct pt_regs)); +#ifdef CONFIG_PPC64 + childregs->softe = IRQS_ENABLED; +#endif + f = ret_from_kernel_user_thread; + } else { + struct pt_regs *regs = current_pt_regs(); + unsigned long clone_flags = args->flags; + unsigned long usp = args->stack; + + /* Copy registers */ + *childregs = *regs; + if (usp) + childregs->gpr[1] = usp; + ((unsigned long *)sp)[0] = childregs->gpr[1]; +#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG + WARN_ON_ONCE(childregs->softe != IRQS_ENABLED); +#endif + if (clone_flags & CLONE_SETTLS) { + unsigned long tls = args->tls; + + if (!is_32bit_task()) + childregs->gpr[13] = tls; + else + childregs->gpr[2] = tls; + } + + if (trap_is_scv(regs)) + f = ret_from_fork_scv; else - childregs->gpr[2] = tls; + f = ret_from_fork; } - if (trap_is_scv(regs)) - f = ret_from_fork_scv; - else - f = ret_from_fork; + childregs->msr &= ~(MSR_FP|MSR_VEC|MSR_VSX); + p->thread.regs = childregs; } - childregs->msr &= ~(MSR_FP|MSR_VEC|MSR_VSX); /* * The way this works is that at some point in the future @@ -1813,6 +1828,16 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) sp -= STACK_SWITCH_FRAME_SIZE; ((unsigned long *)sp)[0] = sp + STACK_SWITCH_FRAME_SIZE; kregs = (struct pt_regs *)(sp + STACK_SWITCH_FRAME_REGS); + kregs->nip = ppc_function_entry(f); + if (unlikely(args->fn)) { + /* + * Put kthread fn, arg parameters in non-volatile GPRs in the + * switch frame so they are loaded by _switch before it returns + * to ret_from_kernel_thread. + */ + kregs->gpr[14] = ppc_function_entry((void *)args->fn); + kregs->gpr[15] = (unsigned long)args->fn_arg; + } p->thread.ksp = sp; #ifdef CONFIG_HAVE_HW_BREAKPOINT @@ -1840,22 +1865,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) p->thread.dscr_inherit = current->thread.dscr_inherit; p->thread.dscr = mfspr(SPRN_DSCR); } - if (cpu_has_feature(CPU_FTR_HAS_PPR)) - childregs->ppr = DEFAULT_PPR; p->thread.tidr = 0; #endif - /* - * Run with the current AMR value of the kernel - */ -#ifdef CONFIG_PPC_PKEY - if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) - kregs->amr = AMR_KUAP_BLOCKED; - - if (mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) - kregs->iamr = AMR_KUEP_BLOCKED; -#endif - kregs->nip = ppc_function_entry(f); return 0; } diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c index 2087a785f05f..5fff0d04b23f 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-view.c +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c @@ -290,6 +290,9 @@ static int gpr_set(struct task_struct *target, const struct user_regset *regset, static int ppr_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) { + if (!target->thread.regs) + return -EINVAL; + return membuf_write(&to, &target->thread.regs->ppr, sizeof(u64)); } @@ -297,6 +300,9 @@ static int ppr_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { + if (!target->thread.regs) + return -EINVAL; + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.regs->ppr, 0, sizeof(u64)); } diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 31175b34856a..c087eeee320f 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -16,6 +16,7 @@ #include <linux/init.h> #include <linux/kconfig.h> #include <linux/kernel.h> +#include <linux/lockdep.h> #include <linux/memblock.h> #include <linux/of.h> #include <linux/of_fdt.h> @@ -68,7 +69,7 @@ struct rtas_filter { * functions are believed to have no users on * ppc64le, and we want to keep it that way. It does * not make sense for this to be set when @filter - * is false. + * is NULL. */ struct rtas_function { s32 token; @@ -453,6 +454,16 @@ static struct rtas_function rtas_function_table[] __ro_after_init = { }, }; +/* + * Nearly all RTAS calls need to be serialized. All uses of the + * default rtas_args block must hold rtas_lock. + * + * Exceptions to the RTAS serialization requirement (e.g. stop-self) + * must use a separate rtas_args structure. + */ +static DEFINE_RAW_SPINLOCK(rtas_lock); +static struct rtas_args rtas_args; + /** * rtas_function_token() - RTAS function token lookup. * @handle: Function handle, e.g. RTAS_FN_EVENT_SCAN. @@ -560,6 +571,9 @@ static void __do_enter_rtas(struct rtas_args *args) static void __do_enter_rtas_trace(struct rtas_args *args) { const char *name = NULL; + + if (args == &rtas_args) + lockdep_assert_held(&rtas_lock); /* * If the tracepoints that consume the function name aren't * active, avoid the lookup. @@ -619,16 +633,6 @@ static void do_enter_rtas(struct rtas_args *args) struct rtas_t rtas; -/* - * Nearly all RTAS calls need to be serialized. All uses of the - * default rtas_args block must hold rtas_lock. - * - * Exceptions to the RTAS serialization requirement (e.g. stop-self) - * must use a separate rtas_args structure. - */ -static DEFINE_RAW_SPINLOCK(rtas_lock); -static struct rtas_args rtas_args; - DEFINE_SPINLOCK(rtas_data_buf_lock); EXPORT_SYMBOL_GPL(rtas_data_buf_lock); @@ -951,6 +955,8 @@ static char *__fetch_rtas_last_error(char *altbuf) u32 bufsz; char *buf = NULL; + lockdep_assert_held(&rtas_lock); + if (token == -1) return NULL; @@ -981,7 +987,7 @@ static char *__fetch_rtas_last_error(char *altbuf) buf = kmalloc(RTAS_ERROR_LOG_MAX, GFP_ATOMIC); } if (buf) - memcpy(buf, rtas_err_buf, RTAS_ERROR_LOG_MAX); + memmove(buf, rtas_err_buf, RTAS_ERROR_LOG_MAX); } return buf; @@ -1016,6 +1022,23 @@ va_rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, do_enter_rtas(args); } +/** + * rtas_call_unlocked() - Invoke an RTAS firmware function without synchronization. + * @args: RTAS parameter block to be used for the call, must obey RTAS addressing + * constraints. + * @token: Identifies the function being invoked. + * @nargs: Number of input parameters. Does not include token. + * @nret: Number of output parameters, including the call status. + * @....: List of @nargs input parameters. + * + * Invokes the RTAS function indicated by @token, which the caller + * should obtain via rtas_function_token(). + * + * This function is similar to rtas_call(), but must be used with a + * limited set of RTAS calls specifically exempted from the general + * requirement that only one RTAS call may be in progress at any + * time. Examples include stop-self and ibm,nmi-interlock. + */ void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, ...) { va_list list; @@ -1091,6 +1114,7 @@ static bool token_is_restricted_errinjct(s32 token) */ int rtas_call(int token, int nargs, int nret, int *outputs, ...) { + struct pin_cookie cookie; va_list list; int i; unsigned long flags; @@ -1117,6 +1141,8 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...) } raw_spin_lock_irqsave(&rtas_lock, flags); + cookie = lockdep_pin_lock(&rtas_lock); + /* We use the global rtas args buffer */ args = &rtas_args; @@ -1134,6 +1160,7 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...) outputs[i] = be32_to_cpu(args->rets[i + 1]); ret = (nret > 0) ? be32_to_cpu(args->rets[0]) : 0; + lockdep_unpin_lock(&rtas_lock, cookie); raw_spin_unlock_irqrestore(&rtas_lock, flags); if (buff_copy) { @@ -1765,6 +1792,7 @@ err: /* We assume to be passed big endian arguments */ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) { + struct pin_cookie cookie; struct rtas_args args; unsigned long flags; char *buff_copy, *errbuf = NULL; @@ -1833,6 +1861,7 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) buff_copy = get_errorlog_buffer(); raw_spin_lock_irqsave(&rtas_lock, flags); + cookie = lockdep_pin_lock(&rtas_lock); rtas_args = args; do_enter_rtas(&rtas_args); @@ -1843,6 +1872,7 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) if (be32_to_cpu(args.rets[0]) == -1) errbuf = __fetch_rtas_last_error(buff_copy); + lockdep_unpin_lock(&rtas_lock, cookie); raw_spin_unlock_irqrestore(&rtas_lock, flags); if (buff_copy) { diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index e77734e5a127..d2a446216444 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -630,13 +630,14 @@ static __init void probe_machine(void) for (machine_id = &__machine_desc_start; machine_id < &__machine_desc_end; machine_id++) { - DBG(" %s ...", machine_id->name); + DBG(" %s ...\n", machine_id->name); + if (machine_id->compatible && !of_machine_is_compatible(machine_id->compatible)) + continue; memcpy(&ppc_md, machine_id, sizeof(struct machdep_calls)); - if (ppc_md.probe()) { - DBG(" match !\n"); - break; - } - DBG("\n"); + if (ppc_md.probe && !ppc_md.probe()) + continue; + DBG(" %s match !\n", machine_id->name); + break; } /* What can we do if we didn't find ? */ if (machine_id >= &__machine_desc_end) { diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index b2e0d3ce4261..246201d0d879 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -480,7 +480,7 @@ void early_setup_secondary(void) #endif /* CONFIG_SMP */ -void panic_smp_self_stop(void) +void __noreturn panic_smp_self_stop(void) { hard_irq_disable(); spin_begin(); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 6b90f10a6c81..265801a3e94c 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -61,6 +61,8 @@ #include <asm/kup.h> #include <asm/fadump.h> +#include <trace/events/ipi.h> + #ifdef DEBUG #include <asm/udbg.h> #define DBG(fmt...) udbg_printf(fmt) @@ -364,12 +366,12 @@ static inline void do_message_pass(int cpu, int msg) #endif } -void smp_send_reschedule(int cpu) +void arch_smp_send_reschedule(int cpu) { if (likely(smp_ops)) do_message_pass(cpu, PPC_MSG_RESCHEDULE); } -EXPORT_SYMBOL_GPL(smp_send_reschedule); +EXPORT_SYMBOL_GPL(arch_smp_send_reschedule); void arch_send_call_function_single_ipi(int cpu) { @@ -1611,7 +1613,7 @@ void start_secondary(void *unused) if (IS_ENABLED(CONFIG_PPC32)) setup_kup(); - mmgrab(&init_mm); + mmgrab_lazy_tlb(&init_mm); current->active_mm = &init_mm; smp_store_cpu_info(cpu); @@ -1752,7 +1754,7 @@ void __cpu_die(unsigned int cpu) smp_ops->cpu_die(cpu); } -void arch_cpu_idle_dead(void) +void __noreturn arch_cpu_idle_dead(void) { /* * Disable on the down path. This will be re-enabled by diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index ef9a61718940..0f39a6b84132 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -217,13 +217,18 @@ static DEVICE_ATTR(dscr_default, 0600, static void __init sysfs_create_dscr_default(void) { if (cpu_has_feature(CPU_FTR_DSCR)) { + struct device *dev_root; int cpu; dscr_default = spr_default_dscr; for_each_possible_cpu(cpu) paca_ptrs[cpu]->dscr_default = dscr_default; - device_create_file(cpu_subsys.dev_root, &dev_attr_dscr_default); + dev_root = bus_get_dev_root(&cpu_subsys); + if (dev_root) { + device_create_file(dev_root, &dev_attr_dscr_default); + put_device(dev_root); + } } } #endif /* CONFIG_PPC64 */ @@ -746,7 +751,12 @@ static DEVICE_ATTR(svm, 0444, show_svm, NULL); static void __init create_svm_file(void) { - device_create_file(cpu_subsys.dev_root, &dev_attr_svm); + struct device *dev_root = bus_get_dev_root(&cpu_subsys); + + if (dev_root) { + device_create_file(dev_root, &dev_attr_svm); + put_device(dev_root); + } } #else static void __init create_svm_file(void) diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 9d8665910350..df20cf201f74 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -887,7 +887,11 @@ void __init time_init(void) unsigned shift; /* Normal PowerPC with timebase register */ - ppc_md.calibrate_decr(); + if (ppc_md.calibrate_decr) + ppc_md.calibrate_decr(); + else + generic_calibrate_decr(); + printk(KERN_DEBUG "time_init: decrementer frequency = %lu.%.6lu MHz\n", ppc_tb_freq / 1000000, ppc_tb_freq % 1000000); printk(KERN_DEBUG "time_init: processor frequency = %lu.%.6lu MHz\n", diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 7b85c3b460a3..a47f30373423 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -194,6 +194,8 @@ __ftrace_make_nop(struct module *mod, * get corrupted. * * Use a b +8 to jump over the load. + * XXX: could make PCREL depend on MPROFILE_KERNEL + * XXX: check PCREL && MPROFILE_KERNEL calling sequence */ if (IS_ENABLED(CONFIG_MPROFILE_KERNEL) || IS_ENABLED(CONFIG_PPC32)) pop = ppc_inst(PPC_RAW_NOP()); @@ -725,6 +727,15 @@ int __init ftrace_dyn_arch_init(void) { int i; unsigned int *tramp[] = { ftrace_tramp_text, ftrace_tramp_init }; +#ifdef CONFIG_PPC_KERNEL_PCREL + u32 stub_insns[] = { + /* pla r12,addr */ + PPC_PREFIX_MLS | __PPC_PRFX_R(1), + PPC_INST_PADDI | ___PPC_RT(_R12), + PPC_RAW_MTCTR(_R12), + PPC_RAW_BCTR() + }; +#else u32 stub_insns[] = { PPC_RAW_LD(_R12, _R13, PACATOC), PPC_RAW_ADDIS(_R12, _R12, 0), @@ -732,6 +743,8 @@ int __init ftrace_dyn_arch_init(void) PPC_RAW_MTCTR(_R12), PPC_RAW_BCTR() }; +#endif + unsigned long addr; long reladdr; @@ -740,19 +753,36 @@ int __init ftrace_dyn_arch_init(void) else addr = ppc_global_function_entry((void *)ftrace_caller); - reladdr = addr - kernel_toc_addr(); + if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) { + for (i = 0; i < 2; i++) { + reladdr = addr - (unsigned long)tramp[i]; - if (reladdr >= SZ_2G || reladdr < -(long)SZ_2G) { - pr_err("Address of %ps out of range of kernel_toc.\n", + if (reladdr >= (long)SZ_8G || reladdr < -(long)SZ_8G) { + pr_err("Address of %ps out of range of pcrel address.\n", + (void *)addr); + return -1; + } + + memcpy(tramp[i], stub_insns, sizeof(stub_insns)); + tramp[i][0] |= IMM_H18(reladdr); + tramp[i][1] |= IMM_L(reladdr); + add_ftrace_tramp((unsigned long)tramp[i]); + } + } else { + reladdr = addr - kernel_toc_addr(); + + if (reladdr >= (long)SZ_2G || reladdr < -(long)SZ_2G) { + pr_err("Address of %ps out of range of kernel_toc.\n", (void *)addr); - return -1; - } + return -1; + } - for (i = 0; i < 2; i++) { - memcpy(tramp[i], stub_insns, sizeof(stub_insns)); - tramp[i][1] |= PPC_HA(reladdr); - tramp[i][2] |= PPC_LO(reladdr); - add_ftrace_tramp((unsigned long)tramp[i]); + for (i = 0; i < 2; i++) { + memcpy(tramp[i], stub_insns, sizeof(stub_insns)); + tramp[i][1] |= PPC_HA(reladdr); + tramp[i][2] |= PPC_LO(reladdr); + add_ftrace_tramp((unsigned long)tramp[i]); + } } return 0; diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile index 66f723f53be2..4c3f34485f08 100644 --- a/arch/powerpc/kernel/vdso/Makefile +++ b/arch/powerpc/kernel/vdso/Makefile @@ -2,7 +2,7 @@ # List of files in the vdso, has to be asm only for now -ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN|R_PPC_REL24 +# Include the generic Makefile to check the built vdso. include $(srctree)/lib/vdso/Makefile obj-vdso32 = sigtramp32-32.o gettimeofday-32.o datapage-32.o cacheflush-32.o note-32.o getcpu-32.o diff --git a/arch/powerpc/kernel/vdso/gettimeofday.S b/arch/powerpc/kernel/vdso/gettimeofday.S index 0c4ecc8fec5a..48fc6658053a 100644 --- a/arch/powerpc/kernel/vdso/gettimeofday.S +++ b/arch/powerpc/kernel/vdso/gettimeofday.S @@ -38,7 +38,11 @@ .else addi r4, r5, VDSO_DATA_OFFSET .endif - bl DOTSYM(\funct) +#ifdef __powerpc64__ + bl CFUNC(DOTSYM(\funct)) +#else + bl \funct +#endif PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) #ifdef __powerpc64__ PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1) diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index ffe5d90abe17..fcc0ad6d9c7b 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -177,10 +177,16 @@ fpone: fphalf: .quad 0x3fe0000000000000 /* 0.5 */ +#ifdef CONFIG_PPC_KERNEL_PCREL +#define LDCONST(fr, name) \ + pla r11,name@pcrel; \ + lfd fr,0(r11) +#else #define LDCONST(fr, name) \ addis r11,r2,name@toc@ha; \ lfd fr,name@toc@l(r11) #endif +#endif .text /* * Internal routine to enable floating point and set FPSCR to 0. diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index ee86753e444e..13614f0b269c 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -169,12 +169,18 @@ SECTIONS } #else /* CONFIG_PPC32 */ +#ifndef CONFIG_PPC_KERNEL_PCREL .toc1 : AT(ADDR(.toc1) - LOAD_OFFSET) { *(.toc1) } +#endif .got : AT(ADDR(.got) - LOAD_OFFSET) ALIGN(256) { +#ifdef CONFIG_PPC_KERNEL_PCREL + *(.got) +#else *(.got .toc) +#endif } SOFT_MASK_TABLE(8) |