mirror of
https://github.com/torvalds/linux.git
synced 2024-11-01 13:03:25 +01:00
ea49cdb26e
The current assembly around swap_pages() in the relocate_kernel() takes some time to follow because the use of registers can be easily lost when the line of assembly goes long. Add a couple of comments to clarify the code around swap_pages() to improve readability. Signed-off-by: Kai Huang <kai.huang@intel.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Link: https://lore.kernel.org/all/8b52b0b8513a34b2a02fb4abb05c6700c2821475.1724573384.git.kai.huang@intel.com
322 lines
7.1 KiB
ArmAsm
322 lines
7.1 KiB
ArmAsm
/* SPDX-License-Identifier: GPL-2.0-only */
|
|
/*
|
|
* relocate_kernel.S - put the kernel image in place to boot
|
|
* Copyright (C) 2002-2005 Eric Biederman <ebiederm@xmission.com>
|
|
*/
|
|
|
|
#include <linux/linkage.h>
|
|
#include <linux/stringify.h>
|
|
#include <asm/alternative.h>
|
|
#include <asm/page_types.h>
|
|
#include <asm/kexec.h>
|
|
#include <asm/processor-flags.h>
|
|
#include <asm/pgtable_types.h>
|
|
#include <asm/nospec-branch.h>
|
|
#include <asm/unwind_hints.h>
|
|
|
|
/*
|
|
* Must be relocatable PIC code callable as a C function, in particular
|
|
* there must be a plain RET and not jump to return thunk.
|
|
*/
|
|
|
|
#define PTR(x) (x << 3)
|
|
#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
|
|
|
|
/*
|
|
* control_page + KEXEC_CONTROL_CODE_MAX_SIZE
|
|
* ~ control_page + PAGE_SIZE are used as data storage and stack for
|
|
* jumping back
|
|
*/
|
|
#define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset))
|
|
|
|
/* Minimal CPU state */
|
|
#define RSP DATA(0x0)
|
|
#define CR0 DATA(0x8)
|
|
#define CR3 DATA(0x10)
|
|
#define CR4 DATA(0x18)
|
|
|
|
/* other data */
|
|
#define CP_PA_TABLE_PAGE DATA(0x20)
|
|
#define CP_PA_SWAP_PAGE DATA(0x28)
|
|
#define CP_PA_BACKUP_PAGES_MAP DATA(0x30)
|
|
|
|
.text
|
|
.align PAGE_SIZE
|
|
.code64
|
|
SYM_CODE_START_NOALIGN(relocate_range)
|
|
SYM_CODE_START_NOALIGN(relocate_kernel)
|
|
UNWIND_HINT_END_OF_STACK
|
|
ANNOTATE_NOENDBR
|
|
/*
|
|
* %rdi indirection_page
|
|
* %rsi page_list
|
|
* %rdx start address
|
|
* %rcx preserve_context
|
|
* %r8 host_mem_enc_active
|
|
*/
|
|
|
|
/* Save the CPU context, used for jumping back */
|
|
pushq %rbx
|
|
pushq %rbp
|
|
pushq %r12
|
|
pushq %r13
|
|
pushq %r14
|
|
pushq %r15
|
|
pushf
|
|
|
|
movq PTR(VA_CONTROL_PAGE)(%rsi), %r11
|
|
movq %rsp, RSP(%r11)
|
|
movq %cr0, %rax
|
|
movq %rax, CR0(%r11)
|
|
movq %cr3, %rax
|
|
movq %rax, CR3(%r11)
|
|
movq %cr4, %rax
|
|
movq %rax, CR4(%r11)
|
|
|
|
/* Save CR4. Required to enable the right paging mode later. */
|
|
movq %rax, %r13
|
|
|
|
/* zero out flags, and disable interrupts */
|
|
pushq $0
|
|
popfq
|
|
|
|
/* Save SME active flag */
|
|
movq %r8, %r12
|
|
|
|
/*
|
|
* get physical address of control page now
|
|
* this is impossible after page table switch
|
|
*/
|
|
movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
|
|
|
|
/* get physical address of page table now too */
|
|
movq PTR(PA_TABLE_PAGE)(%rsi), %r9
|
|
|
|
/* get physical address of swap page now */
|
|
movq PTR(PA_SWAP_PAGE)(%rsi), %r10
|
|
|
|
/* save some information for jumping back */
|
|
movq %r9, CP_PA_TABLE_PAGE(%r11)
|
|
movq %r10, CP_PA_SWAP_PAGE(%r11)
|
|
movq %rdi, CP_PA_BACKUP_PAGES_MAP(%r11)
|
|
|
|
/* Switch to the identity mapped page tables */
|
|
movq %r9, %cr3
|
|
|
|
/* setup a new stack at the end of the physical control page */
|
|
lea PAGE_SIZE(%r8), %rsp
|
|
|
|
/* jump to identity mapped page */
|
|
addq $(identity_mapped - relocate_kernel), %r8
|
|
pushq %r8
|
|
ANNOTATE_UNRET_SAFE
|
|
ret
|
|
int3
|
|
SYM_CODE_END(relocate_kernel)
|
|
|
|
SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
|
|
UNWIND_HINT_END_OF_STACK
|
|
/* set return address to 0 if not preserving context */
|
|
pushq $0
|
|
/* store the start address on the stack */
|
|
pushq %rdx
|
|
|
|
/*
|
|
* Clear X86_CR4_CET (if it was set) such that we can clear CR0_WP
|
|
* below.
|
|
*/
|
|
movq %cr4, %rax
|
|
andq $~(X86_CR4_CET), %rax
|
|
movq %rax, %cr4
|
|
|
|
/*
|
|
* Set cr0 to a known state:
|
|
* - Paging enabled
|
|
* - Alignment check disabled
|
|
* - Write protect disabled
|
|
* - No task switch
|
|
* - Don't do FP software emulation.
|
|
* - Protected mode enabled
|
|
*/
|
|
movq %cr0, %rax
|
|
andq $~(X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM), %rax
|
|
orl $(X86_CR0_PG | X86_CR0_PE), %eax
|
|
movq %rax, %cr0
|
|
|
|
/*
|
|
* Set cr4 to a known state:
|
|
* - physical address extension enabled
|
|
* - 5-level paging, if it was enabled before
|
|
* - Machine check exception on TDX guest, if it was enabled before.
|
|
* Clearing MCE might not be allowed in TDX guests, depending on setup.
|
|
*
|
|
* Use R13 that contains the original CR4 value, read in relocate_kernel().
|
|
* PAE is always set in the original CR4.
|
|
*/
|
|
andl $(X86_CR4_PAE | X86_CR4_LA57), %r13d
|
|
ALTERNATIVE "", __stringify(orl $X86_CR4_MCE, %r13d), X86_FEATURE_TDX_GUEST
|
|
movq %r13, %cr4
|
|
|
|
/* Flush the TLB (needed?) */
|
|
movq %r9, %cr3
|
|
|
|
/*
|
|
* If SME is active, there could be old encrypted cache line
|
|
* entries that will conflict with the now unencrypted memory
|
|
* used by kexec. Flush the caches before copying the kernel.
|
|
*/
|
|
testq %r12, %r12
|
|
jz .Lsme_off
|
|
wbinvd
|
|
.Lsme_off:
|
|
|
|
/* Save the preserve_context to %r11 as swap_pages clobbers %rcx. */
|
|
movq %rcx, %r11
|
|
call swap_pages
|
|
|
|
/*
|
|
* To be certain of avoiding problems with self-modifying code
|
|
* I need to execute a serializing instruction here.
|
|
* So I flush the TLB by reloading %cr3 here, it's handy,
|
|
* and not processor dependent.
|
|
*/
|
|
movq %cr3, %rax
|
|
movq %rax, %cr3
|
|
|
|
/*
|
|
* set all of the registers to known values
|
|
* leave %rsp alone
|
|
*/
|
|
|
|
testq %r11, %r11
|
|
jnz .Lrelocate
|
|
xorl %eax, %eax
|
|
xorl %ebx, %ebx
|
|
xorl %ecx, %ecx
|
|
xorl %edx, %edx
|
|
xorl %esi, %esi
|
|
xorl %edi, %edi
|
|
xorl %ebp, %ebp
|
|
xorl %r8d, %r8d
|
|
xorl %r9d, %r9d
|
|
xorl %r10d, %r10d
|
|
xorl %r11d, %r11d
|
|
xorl %r12d, %r12d
|
|
xorl %r13d, %r13d
|
|
xorl %r14d, %r14d
|
|
xorl %r15d, %r15d
|
|
|
|
ANNOTATE_UNRET_SAFE
|
|
ret
|
|
int3
|
|
|
|
.Lrelocate:
|
|
popq %rdx
|
|
leaq PAGE_SIZE(%r10), %rsp
|
|
ANNOTATE_RETPOLINE_SAFE
|
|
call *%rdx
|
|
|
|
/* get the re-entry point of the peer system */
|
|
movq 0(%rsp), %rbp
|
|
leaq relocate_kernel(%rip), %r8
|
|
movq CP_PA_SWAP_PAGE(%r8), %r10
|
|
movq CP_PA_BACKUP_PAGES_MAP(%r8), %rdi
|
|
movq CP_PA_TABLE_PAGE(%r8), %rax
|
|
movq %rax, %cr3
|
|
lea PAGE_SIZE(%r8), %rsp
|
|
call swap_pages
|
|
movq $virtual_mapped, %rax
|
|
pushq %rax
|
|
ANNOTATE_UNRET_SAFE
|
|
ret
|
|
int3
|
|
SYM_CODE_END(identity_mapped)
|
|
|
|
SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
|
|
UNWIND_HINT_END_OF_STACK
|
|
ANNOTATE_NOENDBR // RET target, above
|
|
movq RSP(%r8), %rsp
|
|
movq CR4(%r8), %rax
|
|
movq %rax, %cr4
|
|
movq CR3(%r8), %rax
|
|
movq CR0(%r8), %r8
|
|
movq %rax, %cr3
|
|
movq %r8, %cr0
|
|
movq %rbp, %rax
|
|
|
|
popf
|
|
popq %r15
|
|
popq %r14
|
|
popq %r13
|
|
popq %r12
|
|
popq %rbp
|
|
popq %rbx
|
|
ANNOTATE_UNRET_SAFE
|
|
ret
|
|
int3
|
|
SYM_CODE_END(virtual_mapped)
|
|
|
|
/* Do the copies */
|
|
SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
|
|
UNWIND_HINT_END_OF_STACK
|
|
movq %rdi, %rcx /* Put the indirection_page in %rcx */
|
|
xorl %edi, %edi
|
|
xorl %esi, %esi
|
|
jmp 1f
|
|
|
|
0: /* top, read another word for the indirection page */
|
|
|
|
movq (%rbx), %rcx
|
|
addq $8, %rbx
|
|
1:
|
|
testb $0x1, %cl /* is it a destination page? */
|
|
jz 2f
|
|
movq %rcx, %rdi
|
|
andq $0xfffffffffffff000, %rdi
|
|
jmp 0b
|
|
2:
|
|
testb $0x2, %cl /* is it an indirection page? */
|
|
jz 2f
|
|
movq %rcx, %rbx
|
|
andq $0xfffffffffffff000, %rbx
|
|
jmp 0b
|
|
2:
|
|
testb $0x4, %cl /* is it the done indicator? */
|
|
jz 2f
|
|
jmp 3f
|
|
2:
|
|
testb $0x8, %cl /* is it the source indicator? */
|
|
jz 0b /* Ignore it otherwise */
|
|
movq %rcx, %rsi /* For ever source page do a copy */
|
|
andq $0xfffffffffffff000, %rsi
|
|
|
|
movq %rdi, %rdx /* Save destination page to %rdx */
|
|
movq %rsi, %rax /* Save source page to %rax */
|
|
|
|
/* copy source page to swap page */
|
|
movq %r10, %rdi
|
|
movl $512, %ecx
|
|
rep ; movsq
|
|
|
|
/* copy destination page to source page */
|
|
movq %rax, %rdi
|
|
movq %rdx, %rsi
|
|
movl $512, %ecx
|
|
rep ; movsq
|
|
|
|
/* copy swap page to destination page */
|
|
movq %rdx, %rdi
|
|
movq %r10, %rsi
|
|
movl $512, %ecx
|
|
rep ; movsq
|
|
|
|
lea PAGE_SIZE(%rax), %rsi
|
|
jmp 0b
|
|
3:
|
|
ANNOTATE_UNRET_SAFE
|
|
ret
|
|
int3
|
|
SYM_CODE_END(swap_pages)
|
|
|
|
.skip KEXEC_CONTROL_CODE_MAX_SIZE - (. - relocate_kernel), 0xcc
|
|
SYM_CODE_END(relocate_range);
|