# HG changeset patch
# User Ian Campbell <ian.campbell@citrix.com>
# Date 1329153968 0
# Node ID 63e88a26e1ef58c8e5a2b30a003ab7c3bc9c6b54
# Parent 7fd8f10cfd3eaf9f0982eb6fd49334a1e229ba98
arm: fixup hard tabs
Unfortunately the tool I was using to apply patches mangles hard tabs. This
patch corrects this in the effected files (which is fortunately only a subset
of .S or files imported from Linux)
"git diff" and "git diff -b" vs. Stefano''s v6
branch now contain the same
output -- i.e. only the intervening development
Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/arch/arm/dtb.S
--- a/xen/arch/arm/dtb.S Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/arch/arm/dtb.S Mon Feb 13 17:26:08 2012 +0000
@@ -1,2 +1,2 @@
- .section .dtb,#alloc
- .incbin CONFIG_DTB_FILE
+ .section .dtb,#alloc
+ .incbin CONFIG_DTB_FILE
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/arch/arm/dummy.S
--- a/xen/arch/arm/dummy.S Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/arch/arm/dummy.S Mon Feb 13 17:26:08 2012 +0000
@@ -1,13 +1,13 @@
/* Nothing is mapped at 1G, for the moment */
#define DUMMY(x) \
- .globl x; \
-x: .word 0xe7f000f0
-/* x: mov r0, #0x40000000 ; str r0, [r0]; b x */
+ .globl x; \
+x: .word 0xe7f000f0
+/* x: mov r0, #0x40000000 ; str r0, [r0]; b x */
#define NOP(x) \
- .globl x; \
-x: mov pc, lr
-
+ .globl x; \
+x: mov pc, lr
+
DUMMY(alloc_pirq_struct);
DUMMY(alloc_vcpu_guest_context);
DUMMY(arch_do_domctl);
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/arch/arm/entry.S
--- a/xen/arch/arm/entry.S Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/arch/arm/entry.S Mon Feb 13 17:26:08 2012 +0000
@@ -1,69 +1,69 @@
#include <xen/config.h>
#include <asm/asm_defns.h>
-#define SAVE_ONE_BANKED(reg) mrs r11, reg; str r11, [sp, #UREGS_##reg]
-#define RESTORE_ONE_BANKED(reg) ldr r11, [sp, #UREGS_##reg]; msr reg,
r11
+#define SAVE_ONE_BANKED(reg) mrs r11, reg; str r11, [sp, #UREGS_##reg]
+#define RESTORE_ONE_BANKED(reg) ldr r11, [sp, #UREGS_##reg]; msr reg, r11
#define SAVE_BANKED(mode) \
- SAVE_ONE_BANKED(SP_##mode) ; SAVE_ONE_BANKED(LR_##mode) ;
SAVE_ONE_BANKED(SPSR_##mode)
+ SAVE_ONE_BANKED(SP_##mode) ; SAVE_ONE_BANKED(LR_##mode) ;
SAVE_ONE_BANKED(SPSR_##mode)
#define RESTORE_BANKED(mode) \
- RESTORE_ONE_BANKED(SP_##mode) ; RESTORE_ONE_BANKED(LR_##mode) ;
RESTORE_ONE_BANKED(SPSR_##mode)
+ RESTORE_ONE_BANKED(SP_##mode) ; RESTORE_ONE_BANKED(LR_##mode) ;
RESTORE_ONE_BANKED(SPSR_##mode)
-#define SAVE_ALL
\
- sub sp, #(UREGS_R8_fiq - UREGS_sp); /* SP, LR, SPSR, PC */
\
- push {r0-r12}; /* Save R0-R12 */
\
-
\
- mrs r11, ELR_hyp; /* ELR_hyp is return address. */
\
- str r11, [sp, #UREGS_pc];
\
-
\
- str lr, [sp, #UREGS_lr];
\
-
\
- add r11, sp, #UREGS_kernel_sizeof+4;
\
- str r11, [sp, #UREGS_sp];
\
-
\
- mrs r11, SPSR_hyp;
\
- str r11, [sp, #UREGS_cpsr];
\
- and r11, #PSR_MODE_MASK;
\
- cmp r11, #PSR_MODE_HYP;
\
- blne save_guest_regs
+#define SAVE_ALL \
+ sub sp, #(UREGS_R8_fiq - UREGS_sp); /* SP, LR, SPSR, PC */ \
+ push {r0-r12}; /* Save R0-R12 */ \
+ \
+ mrs r11, ELR_hyp; /* ELR_hyp is return address. */ \
+ str r11, [sp, #UREGS_pc]; \
+ \
+ str lr, [sp, #UREGS_lr]; \
+ \
+ add r11, sp, #UREGS_kernel_sizeof+4; \
+ str r11, [sp, #UREGS_sp]; \
+ \
+ mrs r11, SPSR_hyp; \
+ str r11, [sp, #UREGS_cpsr]; \
+ and r11, #PSR_MODE_MASK; \
+ cmp r11, #PSR_MODE_HYP; \
+ blne save_guest_regs
save_guest_regs:
- ldr r11, [sp, #UREGS_lr]
- str r11, [sp, #UREGS_LR_usr]
- ldr r11, =0xffffffff /* Clobber SP which is only valid for hypervisor
frames. */
- str r11, [sp, #UREGS_sp]
- SAVE_ONE_BANKED(SP_usr)
- SAVE_BANKED(svc)
- SAVE_BANKED(abt)
- SAVE_BANKED(und)
- SAVE_BANKED(irq)
- SAVE_BANKED(fiq)
- SAVE_ONE_BANKED(R8_fiq); SAVE_ONE_BANKED(R9_fiq);
SAVE_ONE_BANKED(R10_fiq)
- SAVE_ONE_BANKED(R11_fiq); SAVE_ONE_BANKED(R12_fiq);
- mov pc, lr
+ ldr r11, [sp, #UREGS_lr]
+ str r11, [sp, #UREGS_LR_usr]
+ ldr r11, =0xffffffff /* Clobber SP which is only valid for hypervisor frames.
*/
+ str r11, [sp, #UREGS_sp]
+ SAVE_ONE_BANKED(SP_usr)
+ SAVE_BANKED(svc)
+ SAVE_BANKED(abt)
+ SAVE_BANKED(und)
+ SAVE_BANKED(irq)
+ SAVE_BANKED(fiq)
+ SAVE_ONE_BANKED(R8_fiq); SAVE_ONE_BANKED(R9_fiq); SAVE_ONE_BANKED(R10_fiq)
+ SAVE_ONE_BANKED(R11_fiq); SAVE_ONE_BANKED(R12_fiq);
+ mov pc, lr
-#define DEFINE_TRAP_ENTRY(trap)
\
- ALIGN;
\
-trap_##trap:
\
- SAVE_ALL;
\
- adr lr, return_from_trap;
\
- mov r0, sp;
\
- mov r11, sp;
\
- bic sp, #7; /* Align the stack pointer (noop on guest trap) */
\
- b do_trap_##trap
+#define DEFINE_TRAP_ENTRY(trap) \
+ ALIGN; \
+trap_##trap: \
+ SAVE_ALL; \
+ adr lr, return_from_trap; \
+ mov r0, sp; \
+ mov r11, sp; \
+ bic sp, #7; /* Align the stack pointer (noop on guest trap) */ \
+ b do_trap_##trap
.globl hyp_traps_vector
- .align 5
+ .align 5
hyp_traps_vector:
- .word 0 /* 0x00 - Reset */
- b trap_undefined_instruction /* 0x04 - Undefined Instruction */
- b trap_supervisor_call /* 0x08 - Supervisor Call */
- b trap_prefetch_abort /* 0x0c - Prefetch Abort */
- b trap_data_abort /* 0x10 - Data Abort */
- b trap_hypervisor /* 0x14 - Hypervisor */
- b trap_irq /* 0x18 - IRQ */
- b trap_fiq /* 0x1c - FIQ */
+ .word 0 /* 0x00 - Reset */
+ b trap_undefined_instruction /* 0x04 - Undefined Instruction */
+ b trap_supervisor_call /* 0x08 - Supervisor Call */
+ b trap_prefetch_abort /* 0x0c - Prefetch Abort */
+ b trap_data_abort /* 0x10 - Data Abort */
+ b trap_hypervisor /* 0x14 - Hypervisor */
+ b trap_irq /* 0x18 - IRQ */
+ b trap_fiq /* 0x1c - FIQ */
DEFINE_TRAP_ENTRY(undefined_instruction)
DEFINE_TRAP_ENTRY(supervisor_call)
@@ -74,34 +74,34 @@ DEFINE_TRAP_ENTRY(irq)
DEFINE_TRAP_ENTRY(fiq)
ENTRY(return_from_trap)
- ldr r11, [sp, #UREGS_cpsr]
- and r11, #PSR_MODE_MASK
- cmp r11, #PSR_MODE_HYP
- beq return_to_hypervisor
+ ldr r11, [sp, #UREGS_cpsr]
+ and r11, #PSR_MODE_MASK
+ cmp r11, #PSR_MODE_HYP
+ beq return_to_hypervisor
ENTRY(return_to_guest)
- mov r11, sp
- bic sp, #7 /* Align the stack pointer */
- bl leave_hypervisor_tail
- ldr r11, [sp, #UREGS_pc]
- msr ELR_hyp, r11
- ldr r11, [sp, #UREGS_cpsr]
- msr SPSR_hyp, r11
- RESTORE_ONE_BANKED(SP_usr)
- RESTORE_BANKED(svc)
- RESTORE_BANKED(abt)
- RESTORE_BANKED(und)
- RESTORE_BANKED(irq)
- RESTORE_BANKED(fiq)
- RESTORE_ONE_BANKED(R8_fiq); RESTORE_ONE_BANKED(R9_fiq);
RESTORE_ONE_BANKED(R10_fiq)
- RESTORE_ONE_BANKED(R11_fiq); RESTORE_ONE_BANKED(R12_fiq);
- ldr lr, [sp, #UREGS_LR_usr]
- pop {r0-r12}
- add sp, #(UREGS_R8_fiq - UREGS_sp); /* SP, LR, SPSR, PC */
- eret
+ mov r11, sp
+ bic sp, #7 /* Align the stack pointer */
+ bl leave_hypervisor_tail
+ ldr r11, [sp, #UREGS_pc]
+ msr ELR_hyp, r11
+ ldr r11, [sp, #UREGS_cpsr]
+ msr SPSR_hyp, r11
+ RESTORE_ONE_BANKED(SP_usr)
+ RESTORE_BANKED(svc)
+ RESTORE_BANKED(abt)
+ RESTORE_BANKED(und)
+ RESTORE_BANKED(irq)
+ RESTORE_BANKED(fiq)
+ RESTORE_ONE_BANKED(R8_fiq); RESTORE_ONE_BANKED(R9_fiq);
RESTORE_ONE_BANKED(R10_fiq)
+ RESTORE_ONE_BANKED(R11_fiq); RESTORE_ONE_BANKED(R12_fiq);
+ ldr lr, [sp, #UREGS_LR_usr]
+ pop {r0-r12}
+ add sp, #(UREGS_R8_fiq - UREGS_sp); /* SP, LR, SPSR, PC */
+ eret
ENTRY(return_to_hypervisor)
- ldr lr, [sp, #UREGS_lr]
- pop {r0-r12}
- add sp, #(UREGS_R8_fiq - UREGS_sp); /* SP, LR, SPSR, PC */
- eret
+ ldr lr, [sp, #UREGS_lr]
+ pop {r0-r12}
+ add sp, #(UREGS_R8_fiq - UREGS_sp); /* SP, LR, SPSR, PC */
+ eret
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/arch/arm/head.S
--- a/xen/arch/arm/head.S Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/arch/arm/head.S Mon Feb 13 17:26:08 2012 +0000
@@ -26,281 +26,281 @@
* Clobbers r0-r3. */
#ifdef EARLY_UART_ADDRESS
#define PRINT(_s) \
- adr r0, 98f ; \
- bl puts ; \
- b 99f ; \
-98: .asciz _s ; \
- .align 2 ; \
+ adr r0, 98f ; \
+ bl puts ; \
+ b 99f ; \
+98: .asciz _s ; \
+ .align 2 ; \
99:
#else
#define PRINT(s)
#endif
- .arm
+ .arm
- /* This must be the very first address in the loaded image.
- * It should be linked at XEN_VIRT_START, and loaded at any
- * 2MB-aligned address. All of text+data+bss must fit in 2MB,
- * or the initial pagetable code below will need adjustment. */
- .global start
+ /* This must be the very first address in the loaded image.
+ * It should be linked at XEN_VIRT_START, and loaded at any
+ * 2MB-aligned address. All of text+data+bss must fit in 2MB,
+ * or the initial pagetable code below will need adjustment. */
+ .global start
start:
- cpsid aif /* Disable all interrupts */
+ cpsid aif /* Disable all interrupts */
- /* Save the bootloader arguments in less-clobberable registers */
- mov r7, r1 /* r7 := ARM-linux machine type */
- mov r8, r2 /* r8 := ATAG base address */
+ /* Save the bootloader arguments in less-clobberable registers */
+ mov r7, r1 /* r7 := ARM-linux machine type */
+ mov r8, r2 /* r8 := ATAG base address */
- /* Find out where we are */
- ldr r0, =start
- adr r9, start /* r9 := paddr (start) */
- sub r10, r9, r0 /* r10 := phys-offset */
+ /* Find out where we are */
+ ldr r0, =start
+ adr r9, start /* r9 := paddr (start) */
+ sub r10, r9, r0 /* r10 := phys-offset */
- /* Using the DTB in the .dtb section? */
+ /* Using the DTB in the .dtb section? */
#ifdef CONFIG_DTB_FILE
- ldr r8, =_sdtb
- add r8, r10 /* r8 := paddr(DTB) */
+ ldr r8, =_sdtb
+ add r8, r10 /* r8 := paddr(DTB) */
#endif
#ifdef EARLY_UART_ADDRESS
- /* Say hello */
- ldr r11, =EARLY_UART_ADDRESS /* r11 := UART base address */
- bl init_uart
+ /* Say hello */
+ ldr r11, =EARLY_UART_ADDRESS /* r11 := UART base address */
+ bl init_uart
#endif
- /* Check that this CPU has Hyp mode */
- mrc CP32(r0, ID_PFR1)
- and r0, r0, #0xf000 /* Bits 12-15 define virt extensions */
- teq r0, #0x1000 /* Must == 0x1 or may be incompatible */
- beq 1f
- bl putn
- PRINT("- CPU doesn''t support the virtualization extensions
-\r\n")
- b fail
+ /* Check that this CPU has Hyp mode */
+ mrc CP32(r0, ID_PFR1)
+ and r0, r0, #0xf000 /* Bits 12-15 define virt extensions */
+ teq r0, #0x1000 /* Must == 0x1 or may be incompatible */
+ beq 1f
+ bl putn
+ PRINT("- CPU doesn''t support the virtualization extensions
-\r\n")
+ b fail
1:
- /* Check if we''re already in it */
- mrs r0, cpsr
- and r0, r0, #0x1f /* Mode is in the low 5 bits of CPSR */
- teq r0, #0x1a /* Hyp Mode? */
- bne 1f
- PRINT("- Started in Hyp mode -\r\n")
- b hyp
+ /* Check if we''re already in it */
+ mrs r0, cpsr
+ and r0, r0, #0x1f /* Mode is in the low 5 bits of CPSR */
+ teq r0, #0x1a /* Hyp Mode? */
+ bne 1f
+ PRINT("- Started in Hyp mode -\r\n")
+ b hyp
1:
- /* Otherwise, it must have been Secure Supervisor mode */
- mrc CP32(r0, SCR)
- tst r0, #0x1 /* Not-Secure bit set? */
- beq 1f
- PRINT("- CPU is not in Hyp mode or Secure state -\r\n")
- b fail
+ /* Otherwise, it must have been Secure Supervisor mode */
+ mrc CP32(r0, SCR)
+ tst r0, #0x1 /* Not-Secure bit set? */
+ beq 1f
+ PRINT("- CPU is not in Hyp mode or Secure state -\r\n")
+ b fail
1:
- /* OK, we''re in Secure state. */
- PRINT("- Started in Secure state -\r\n- Entering Hyp mode
-\r\n")
+ /* OK, we''re in Secure state. */
+ PRINT("- Started in Secure state -\r\n- Entering Hyp mode -\r\n")
- /* Dance into Hyp mode */
- cpsid aif, #0x16 /* Enter Monitor mode */
- mrc CP32(r0, SCR)
- orr r0, r0, #0x100 /* Set HCE */
- orr r0, r0, #0xb1 /* Set SCD, AW, FW and NS */
- bic r0, r0, #0xe /* Clear EA, FIQ and IRQ */
- mcr CP32(r0, SCR)
- /* Ugly: the system timer''s frequency register is only
- * programmable in Secure state. Since we don''t know where its
- * memory-mapped control registers live, we can''t find out the
- * right frequency. Use the VE model''s default frequency here.
*/
- ldr r0, =0x5f5e100 /* 100 MHz */
- mcr CP32(r0, CNTFRQ)
- ldr r0, =0x40c00 /* SMP, c11, c10 in non-secure mode */
- mcr CP32(r0, NSACR)
- /* Continuing ugliness: Set up the GIC so NS state owns interrupts */
- mov r0, #GIC_BASE_ADDRESS
- add r0, r0, #GIC_DR_OFFSET
- mov r1, #0
- str r1, [r0] /* Disable delivery in the distributor */
- add r0, r0, #0x80 /* GICD_IGROUP0 */
- mov r2, #0xffffffff /* All interrupts to group 1 */
- str r2, [r0]
- str r2, [r0, #4]
- str r2, [r0, #8]
- /* Must drop priority mask below 0x80 before entering NS state */
- mov r0, #GIC_BASE_ADDRESS
- add r0, r0, #GIC_CR_OFFSET
- ldr r1, =0xff
- str r1, [r0, #0x4] /* -> GICC_PMR */
- /* Reset a few config registers */
- mov r0, #0
- mcr CP32(r0, FCSEIDR)
- mcr CP32(r0, CONTEXTIDR)
- /* FIXME: ought to reset some other NS control regs here */
- adr r1, 1f
- adr r0, hyp /* Store paddr (hyp entry point) */
- str r0, [r1] /* where we can use it for RFE */
- isb /* Ensure we see the stored target address
*/
- rfeia r1 /* Enter Hyp mode */
+ /* Dance into Hyp mode */
+ cpsid aif, #0x16 /* Enter Monitor mode */
+ mrc CP32(r0, SCR)
+ orr r0, r0, #0x100 /* Set HCE */
+ orr r0, r0, #0xb1 /* Set SCD, AW, FW and NS */
+ bic r0, r0, #0xe /* Clear EA, FIQ and IRQ */
+ mcr CP32(r0, SCR)
+ /* Ugly: the system timer''s frequency register is only
+ * programmable in Secure state. Since we don''t know where its
+ * memory-mapped control registers live, we can''t find out the
+ * right frequency. Use the VE model''s default frequency here. */
+ ldr r0, =0x5f5e100 /* 100 MHz */
+ mcr CP32(r0, CNTFRQ)
+ ldr r0, =0x40c00 /* SMP, c11, c10 in non-secure mode */
+ mcr CP32(r0, NSACR)
+ /* Continuing ugliness: Set up the GIC so NS state owns interrupts */
+ mov r0, #GIC_BASE_ADDRESS
+ add r0, r0, #GIC_DR_OFFSET
+ mov r1, #0
+ str r1, [r0] /* Disable delivery in the distributor */
+ add r0, r0, #0x80 /* GICD_IGROUP0 */
+ mov r2, #0xffffffff /* All interrupts to group 1 */
+ str r2, [r0]
+ str r2, [r0, #4]
+ str r2, [r0, #8]
+ /* Must drop priority mask below 0x80 before entering NS state */
+ mov r0, #GIC_BASE_ADDRESS
+ add r0, r0, #GIC_CR_OFFSET
+ ldr r1, =0xff
+ str r1, [r0, #0x4] /* -> GICC_PMR */
+ /* Reset a few config registers */
+ mov r0, #0
+ mcr CP32(r0, FCSEIDR)
+ mcr CP32(r0, CONTEXTIDR)
+ /* FIXME: ought to reset some other NS control regs here */
+ adr r1, 1f
+ adr r0, hyp /* Store paddr (hyp entry point) */
+ str r0, [r1] /* where we can use it for RFE */
+ isb /* Ensure we see the stored target address */
+ rfeia r1 /* Enter Hyp mode */
-1: .word 0 /* PC to enter Hyp mode at */
- .word 0x000001da /* CPSR: LE, Abort/IRQ/FIQ off, Hyp */
+1: .word 0 /* PC to enter Hyp mode at */
+ .word 0x000001da /* CPSR: LE, Abort/IRQ/FIQ off, Hyp */
hyp:
- PRINT("- Setting up control registers -\r\n")
+ PRINT("- Setting up control registers -\r\n")
- /* Set up memory attribute type tables */
- ldr r0, =MAIR0VAL
- ldr r1, =MAIR1VAL
- mcr CP32(r0, MAIR0)
- mcr CP32(r1, MAIR1)
- mcr CP32(r0, HMAIR0)
- mcr CP32(r1, HMAIR1)
+ /* Set up memory attribute type tables */
+ ldr r0, =MAIR0VAL
+ ldr r1, =MAIR1VAL
+ mcr CP32(r0, MAIR0)
+ mcr CP32(r1, MAIR1)
+ mcr CP32(r0, HMAIR0)
+ mcr CP32(r1, HMAIR1)
- /* Set up the HTCR:
- * PT walks use Outer-Shareable accesses,
- * PT walks are write-back, no-write-allocate in both cache levels,
- * Full 32-bit address space goes through this table. */
- ldr r0, =0x80002500
- mcr CP32(r0, HTCR)
+ /* Set up the HTCR:
+ * PT walks use Outer-Shareable accesses,
+ * PT walks are write-back, no-write-allocate in both cache levels,
+ * Full 32-bit address space goes through this table. */
+ ldr r0, =0x80002500
+ mcr CP32(r0, HTCR)
- /* Set up the HSCTLR:
- * Exceptions in LE ARM,
- * Low-latency IRQs disabled,
- * Write-implies-XN disabled (for now),
- * I-cache and d-cache enabled,
- * Alignment checking enabled,
- * MMU translation disabled (for now). */
- ldr r0, =(HSCTLR_BASE|SCTLR_A|SCTLR_C)
- mcr CP32(r0, HSCTLR)
+ /* Set up the HSCTLR:
+ * Exceptions in LE ARM,
+ * Low-latency IRQs disabled,
+ * Write-implies-XN disabled (for now),
+ * I-cache and d-cache enabled,
+ * Alignment checking enabled,
+ * MMU translation disabled (for now). */
+ ldr r0, =(HSCTLR_BASE|SCTLR_A|SCTLR_C)
+ mcr CP32(r0, HSCTLR)
- /* Write Xen''s PT''s paddr into the HTTBR */
- ldr r4, =xen_pgtable
- add r4, r4, r10 /* r4 := paddr (xen_pagetable) */
- mov r5, #0 /* r4:r5 is paddr (xen_pagetable) */
- mcrr CP64(r4, r5, HTTBR)
+ /* Write Xen''s PT''s paddr into the HTTBR */
+ ldr r4, =xen_pgtable
+ add r4, r4, r10 /* r4 := paddr (xen_pagetable) */
+ mov r5, #0 /* r4:r5 is paddr (xen_pagetable) */
+ mcrr CP64(r4, r5, HTTBR)
- /* Build the baseline idle pagetable''s first-level entries */
- ldr r1, =xen_second
- add r1, r1, r10 /* r1 := paddr (xen_second) */
- mov r3, #0x0
- orr r2, r1, #0xe00 /* r2:r3 := table map of xen_second */
- orr r2, r2, #0x07f /* (+ rights for linear PT) */
- strd r2, r3, [r4, #0] /* Map it in slot 0 */
- add r2, r2, #0x1000
- strd r2, r3, [r4, #8] /* Map 2nd page in slot 1 */
- add r2, r2, #0x1000
- strd r2, r3, [r4, #16] /* Map 3rd page in slot 2 */
- add r2, r2, #0x1000
- strd r2, r3, [r4, #24] /* Map 4th page in slot 3 */
+ /* Build the baseline idle pagetable''s first-level entries */
+ ldr r1, =xen_second
+ add r1, r1, r10 /* r1 := paddr (xen_second) */
+ mov r3, #0x0
+ orr r2, r1, #0xe00 /* r2:r3 := table map of xen_second */
+ orr r2, r2, #0x07f /* (+ rights for linear PT) */
+ strd r2, r3, [r4, #0] /* Map it in slot 0 */
+ add r2, r2, #0x1000
+ strd r2, r3, [r4, #8] /* Map 2nd page in slot 1 */
+ add r2, r2, #0x1000
+ strd r2, r3, [r4, #16] /* Map 3rd page in slot 2 */
+ add r2, r2, #0x1000
+ strd r2, r3, [r4, #24] /* Map 4th page in slot 3 */
- /* Now set up the second-level entries */
- orr r2, r9, #0xe00
- orr r2, r2, #0x07d /* r2:r3 := 2MB normal map of Xen */
- mov r4, r9, lsr #18 /* Slot for paddr(start) */
- strd r2, r3, [r1, r4] /* Map Xen there */
- ldr r4, =start
- lsr r4, #18 /* Slot for vaddr(start) */
- strd r2, r3, [r1, r4] /* Map Xen there too */
+ /* Now set up the second-level entries */
+ orr r2, r9, #0xe00
+ orr r2, r2, #0x07d /* r2:r3 := 2MB normal map of Xen */
+ mov r4, r9, lsr #18 /* Slot for paddr(start) */
+ strd r2, r3, [r1, r4] /* Map Xen there */
+ ldr r4, =start
+ lsr r4, #18 /* Slot for vaddr(start) */
+ strd r2, r3, [r1, r4] /* Map Xen there too */
#ifdef EARLY_UART_ADDRESS
- ldr r3, =(1<<(54-32)) /* NS for device mapping */
- lsr r2, r11, #21
- lsl r2, r2, #21 /* 2MB-aligned paddr of UART */
- orr r2, r2, #0xe00
- orr r2, r2, #0x071 /* r2:r3 := 2MB dev map including UART */
- add r4, r4, #8
- strd r2, r3, [r1, r4] /* Map it in the fixmap''s slot */
+ ldr r3, =(1<<(54-32)) /* NS for device mapping */
+ lsr r2, r11, #21
+ lsl r2, r2, #21 /* 2MB-aligned paddr of UART */
+ orr r2, r2, #0xe00
+ orr r2, r2, #0x071 /* r2:r3 := 2MB dev map including UART */
+ add r4, r4, #8
+ strd r2, r3, [r1, r4] /* Map it in the fixmap''s slot */
#else
- add r4, r4, #8 /* Skip over unused fixmap slot */
+ add r4, r4, #8 /* Skip over unused fixmap slot */
#endif
- mov r3, #0x0
- lsr r2, r8, #21
- lsl r2, r2, #21 /* 2MB-aligned paddr of DTB */
- orr r2, r2, #0xf00
- orr r2, r2, #0x07d /* r2:r3 := 2MB RAM incl. DTB */
- add r4, r4, #8
- strd r2, r3, [r1, r4] /* Map it in the early boot slot */
+ mov r3, #0x0
+ lsr r2, r8, #21
+ lsl r2, r2, #21 /* 2MB-aligned paddr of DTB */
+ orr r2, r2, #0xf00
+ orr r2, r2, #0x07d /* r2:r3 := 2MB RAM incl. DTB */
+ add r4, r4, #8
+ strd r2, r3, [r1, r4] /* Map it in the early boot slot */
- PRINT("- Turning on paging -\r\n")
+ PRINT("- Turning on paging -\r\n")
- ldr r1, =paging /* Explicit vaddr, not RIP-relative */
- mrc CP32(r0, HSCTLR)
- orr r0, r0, #0x1 /* Add in the MMU enable bit */
- dsb /* Flush PTE writes and finish reads */
- mcr CP32(r0, HSCTLR) /* now paging is enabled */
- isb /* Now, flush the icache */
- mov pc, r1 /* Get a proper vaddr into PC */
+ ldr r1, =paging /* Explicit vaddr, not RIP-relative */
+ mrc CP32(r0, HSCTLR)
+ orr r0, r0, #0x1 /* Add in the MMU enable bit */
+ dsb /* Flush PTE writes and finish reads */
+ mcr CP32(r0, HSCTLR) /* now paging is enabled */
+ isb /* Now, flush the icache */
+ mov pc, r1 /* Get a proper vaddr into PC */
paging:
#ifdef EARLY_UART_ADDRESS
- /* Recover the UART address in the new address space */
- lsl r11, #11
- lsr r11, #11 /* UART base''s offset from 2MB
base */
- adr r0, start
- add r0, r0, #0x200000 /* vaddr of the fixmap''s 2MB slot
*/
- add r11, r11, r0 /* r11 := vaddr (UART base address) */
+ /* Recover the UART address in the new address space */
+ lsl r11, #11
+ lsr r11, #11 /* UART base''s offset from 2MB base */
+ adr r0, start
+ add r0, r0, #0x200000 /* vaddr of the fixmap''s 2MB slot */
+ add r11, r11, r0 /* r11 := vaddr (UART base address) */
#endif
- PRINT("- Entering C -\r\n")
+ PRINT("- Entering C -\r\n")
- ldr sp, =init_stack /* Supply a stack */
- add sp, #STACK_SIZE /* (which grows down from the top). */
- sub sp, #CPUINFO_sizeof /* Make room for CPU save record */
- mov r0, r10 /* Marshal args: - phys_offset */
- mov r1, r7 /* - machine type */
- mov r2, r8 /* - ATAG address */
- b start_xen /* and disappear into the land of C */
+ ldr sp, =init_stack /* Supply a stack */
+ add sp, #STACK_SIZE /* (which grows down from the top). */
+ sub sp, #CPUINFO_sizeof /* Make room for CPU save record */
+ mov r0, r10 /* Marshal args: - phys_offset */
+ mov r1, r7 /* - machine type */
+ mov r2, r8 /* - ATAG address */
+ b start_xen /* and disappear into the land of C */
/* Fail-stop
* r0: string explaining why */
-fail: PRINT("- Boot failed -\r\n")
-1: wfe
- b 1b
+fail: PRINT("- Boot failed -\r\n")
+1: wfe
+ b 1b
#ifdef EARLY_UART_ADDRESS
/* Bring up the UART. Specific to the PL011 UART.
* Clobbers r0-r2 */
init_uart:
- mov r1, #0x0
- str r1, [r11, #0x24] /* -> UARTIBRD (Baud divisor fraction)
*/
- mov r1, #0x4 /* 7.3728MHz / 0x4 == 16 * 115200 */
- str r1, [r11, #0x24] /* -> UARTIBRD (Baud divisor integer) */
- mov r1, #0x60 /* 8n1 */
- str r1, [r11, #0x24] /* -> UARTLCR_H (Line control) */
- ldr r1, =0x00000301 /* RXE | TXE | UARTEN */
- str r1, [r11, #0x30] /* -> UARTCR (Control Register) */
- adr r0, 1f
- b puts
-1: .asciz "- UART enabled -\r\n"
- .align 4
+ mov r1, #0x0
+ str r1, [r11, #0x24] /* -> UARTIBRD (Baud divisor fraction) */
+ mov r1, #0x4 /* 7.3728MHz / 0x4 == 16 * 115200 */
+ str r1, [r11, #0x24] /* -> UARTIBRD (Baud divisor integer) */
+ mov r1, #0x60 /* 8n1 */
+ str r1, [r11, #0x24] /* -> UARTLCR_H (Line control) */
+ ldr r1, =0x00000301 /* RXE | TXE | UARTEN */
+ str r1, [r11, #0x30] /* -> UARTCR (Control Register) */
+ adr r0, 1f
+ b puts
+1: .asciz "- UART enabled -\r\n"
+ .align 4
/* Print early debug messages. Specific to the PL011 UART.
* r0: Nul-terminated string to print.
* Clobbers r0-r2 */
puts:
- ldr r2, [r11, #0x18] /* <- UARTFR (Flag register) */
- tst r2, #0x8 /* Check BUSY bit */
- bne puts /* Wait for the UART to be ready */
- ldrb r2, [r0], #1 /* Load next char */
- teq r2, #0 /* Exit on nul*/
- moveq pc, lr
- str r2, [r11] /* -> UARTDR (Data Register) */
- b puts
+ ldr r2, [r11, #0x18] /* <- UARTFR (Flag register) */
+ tst r2, #0x8 /* Check BUSY bit */
+ bne puts /* Wait for the UART to be ready */
+ ldrb r2, [r0], #1 /* Load next char */
+ teq r2, #0 /* Exit on nul*/
+ moveq pc, lr
+ str r2, [r11] /* -> UARTDR (Data Register) */
+ b puts
/* Print a 32-bit number in hex. Specific to the PL011 UART.
* r0: Number to print.
* clobbers r0-r3 */
putn:
- adr r1, hex
- mov r3, #8
-1: ldr r2, [r11, #0x18] /* <- UARTFR (Flag register) */
- tst r2, #0x8 /* Check BUSY bit */
- bne 1b /* Wait for the UART to be ready */
- and r2, r0, #0xf0000000 /* Mask off the top nybble */
- ldrb r2, [r1, r2, lsr #28] /* Convert to a char */
- str r2, [r11] /* -> UARTDR (Data Register) */
- lsl r0, #4 /* Roll it through one nybble at a time */
- subs r3, r3, #1
- bne 1b
- adr r0, crlf /* Finish with a newline */
- b puts
+ adr r1, hex
+ mov r3, #8
+1: ldr r2, [r11, #0x18] /* <- UARTFR (Flag register) */
+ tst r2, #0x8 /* Check BUSY bit */
+ bne 1b /* Wait for the UART to be ready */
+ and r2, r0, #0xf0000000 /* Mask off the top nybble */
+ ldrb r2, [r1, r2, lsr #28] /* Convert to a char */
+ str r2, [r11] /* -> UARTDR (Data Register) */
+ lsl r0, #4 /* Roll it through one nybble at a time */
+ subs r3, r3, #1
+ bne 1b
+ adr r0, crlf /* Finish with a newline */
+ b puts
-crlf: .asciz "\r\n"
-hex: .ascii "0123456789abcdef"
- .align 2
+crlf: .asciz "\r\n"
+hex: .ascii "0123456789abcdef"
+ .align 2
#else /* EARLY_UART_ADDRESS */
@@ -308,6 +308,6 @@ init_uart:
.global early_puts
early_puts:
puts:
-putn: mov pc, lr
+putn: mov pc, lr
#endif /* EARLY_UART_ADDRESS */
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/arch/arm/lib/bitops.h
--- a/xen/arch/arm/lib/bitops.h Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/arch/arm/lib/bitops.h Mon Feb 13 17:26:08 2012 +0000
@@ -1,61 +1,61 @@
#include <xen/config.h>
#if __LINUX_ARM_ARCH__ >= 6
- .macro bitop, instr
- ands ip, r1, #3
- strneb r1, [ip] @ assert word-aligned
- mov r2, #1
- and r3, r0, #31 @ Get bit offset
- mov r0, r0, lsr #5
- add r1, r1, r0, lsl #2 @ Get word offset
- mov r3, r2, lsl r3
-1: ldrex r2, [r1]
- \instr r2, r2, r3
- strex r0, r2, [r1]
- cmp r0, #0
- bne 1b
- bx lr
- .endm
+ .macro bitop, instr
+ ands ip, r1, #3
+ strneb r1, [ip] @ assert word-aligned
+ mov r2, #1
+ and r3, r0, #31 @ Get bit offset
+ mov r0, r0, lsr #5
+ add r1, r1, r0, lsl #2 @ Get word offset
+ mov r3, r2, lsl r3
+1: ldrex r2, [r1]
+ \instr r2, r2, r3
+ strex r0, r2, [r1]
+ cmp r0, #0
+ bne 1b
+ bx lr
+ .endm
- .macro testop, instr, store
- ands ip, r1, #3
- strneb r1, [ip] @ assert word-aligned
- mov r2, #1
- and r3, r0, #31 @ Get bit offset
- mov r0, r0, lsr #5
- add r1, r1, r0, lsl #2 @ Get word offset
- mov r3, r2, lsl r3 @ create mask
- smp_dmb
-1: ldrex r2, [r1]
- ands r0, r2, r3 @ save old value of bit
- \instr r2, r2, r3 @ toggle bit
- strex ip, r2, [r1]
- cmp ip, #0
- bne 1b
- smp_dmb
- cmp r0, #0
- movne r0, #1
-2: bx lr
- .endm
+ .macro testop, instr, store
+ ands ip, r1, #3
+ strneb r1, [ip] @ assert word-aligned
+ mov r2, #1
+ and r3, r0, #31 @ Get bit offset
+ mov r0, r0, lsr #5
+ add r1, r1, r0, lsl #2 @ Get word offset
+ mov r3, r2, lsl r3 @ create mask
+ smp_dmb
+1: ldrex r2, [r1]
+ ands r0, r2, r3 @ save old value of bit
+ \instr r2, r2, r3 @ toggle bit
+ strex ip, r2, [r1]
+ cmp ip, #0
+ bne 1b
+ smp_dmb
+ cmp r0, #0
+ movne r0, #1
+2: bx lr
+ .endm
#else
- .macro bitop, name, instr
-ENTRY( \name )
-UNWIND( .fnstart )
- ands ip, r1, #3
- strneb r1, [ip] @ assert word-aligned
- and r2, r0, #31
- mov r0, r0, lsr #5
- mov r3, #1
- mov r3, r3, lsl r2
- save_and_disable_irqs ip
- ldr r2, [r1, r0, lsl #2]
- \instr r2, r2, r3
- str r2, [r1, r0, lsl #2]
- restore_irqs ip
- mov pc, lr
-UNWIND( .fnend )
-ENDPROC(\name )
- .endm
+ .macro bitop, name, instr
+ENTRY( \name )
+UNWIND( .fnstart )
+ ands ip, r1, #3
+ strneb r1, [ip] @ assert word-aligned
+ and r2, r0, #31
+ mov r0, r0, lsr #5
+ mov r3, #1
+ mov r3, r3, lsl r2
+ save_and_disable_irqs ip
+ ldr r2, [r1, r0, lsl #2]
+ \instr r2, r2, r3
+ str r2, [r1, r0, lsl #2]
+ restore_irqs ip
+ mov pc, lr
+UNWIND( .fnend )
+ENDPROC(\name )
+ .endm
/**
* testop - implement a test_and_xxx_bit operation.
@@ -65,23 +65,23 @@ ENDPROC(\name )
* Note: we can trivially conditionalise the store instruction
* to avoid dirtying the data cache.
*/
- .macro testop, name, instr, store
-ENTRY( \name )
-UNWIND( .fnstart )
- ands ip, r1, #3
- strneb r1, [ip] @ assert word-aligned
- and r3, r0, #31
- mov r0, r0, lsr #5
- save_and_disable_irqs ip
- ldr r2, [r1, r0, lsl #2]!
- mov r0, #1
- tst r2, r0, lsl r3
- \instr r2, r2, r0, lsl r3
- \store r2, [r1]
- moveq r0, #0
- restore_irqs ip
- mov pc, lr
-UNWIND( .fnend )
-ENDPROC(\name )
- .endm
+ .macro testop, name, instr, store
+ENTRY( \name )
+UNWIND( .fnstart )
+ ands ip, r1, #3
+ strneb r1, [ip] @ assert word-aligned
+ and r3, r0, #31
+ mov r0, r0, lsr #5
+ save_and_disable_irqs ip
+ ldr r2, [r1, r0, lsl #2]!
+ mov r0, #1
+ tst r2, r0, lsl r3
+ \instr r2, r2, r0, lsl r3
+ \store r2, [r1]
+ moveq r0, #0
+ restore_irqs ip
+ mov pc, lr
+UNWIND( .fnend )
+ENDPROC(\name )
+ .endm
#endif
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/arch/arm/lib/changebit.S
--- a/xen/arch/arm/lib/changebit.S Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/arch/arm/lib/changebit.S Mon Feb 13 17:26:08 2012 +0000
@@ -14,5 +14,5 @@
.text
ENTRY(_change_bit)
- bitop eor
+ bitop eor
ENDPROC(_change_bit)
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/arch/arm/lib/clearbit.S
--- a/xen/arch/arm/lib/clearbit.S Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/arch/arm/lib/clearbit.S Mon Feb 13 17:26:08 2012 +0000
@@ -15,5 +15,5 @@
.text
ENTRY(_clear_bit)
- bitop bic
+ bitop bic
ENDPROC(_clear_bit)
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/arch/arm/lib/copy_template.S
--- a/xen/arch/arm/lib/copy_template.S Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/arch/arm/lib/copy_template.S Mon Feb 13 17:26:08 2012 +0000
@@ -3,9 +3,9 @@
*
* Code template for optimized memory copy functions
*
- * Author: Nicolas Pitre
- * Created: Sep 28, 2005
- * Copyright: MontaVista Software, Inc.
+ * Author: Nicolas Pitre
+ * Created: Sep 28, 2005
+ * Copyright: MontaVista Software, Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -24,227 +24,227 @@
*
* ldr1w ptr reg abort
*
- * This loads one word from ''ptr'', stores it in
''reg'' and increments
- * ''ptr'' to the next word. The ''abort''
argument is used for fixup tables.
+ * This loads one word from ''ptr'', stores it in
''reg'' and increments
+ * ''ptr'' to the next word. The ''abort''
argument is used for fixup tables.
*
* ldr4w ptr reg1 reg2 reg3 reg4 abort
* ldr8w ptr, reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
*
- * This loads four or eight words starting from ''ptr'',
stores them
- * in provided registers and increments ''ptr'' past those
words.
- * The''abort'' argument is used for fixup tables.
+ * This loads four or eight words starting from ''ptr'', stores
them
+ * in provided registers and increments ''ptr'' past those
words.
+ * The''abort'' argument is used for fixup tables.
*
* ldr1b ptr reg cond abort
*
- * Similar to ldr1w, but it loads a byte and increments
''ptr'' one byte.
- * It also must apply the condition code if provided, otherwise the
- * "al" condition is assumed by default.
+ * Similar to ldr1w, but it loads a byte and increments ''ptr''
one byte.
+ * It also must apply the condition code if provided, otherwise the
+ * "al" condition is assumed by default.
*
* str1w ptr reg abort
* str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
* str1b ptr reg cond abort
*
- * Same as their ldr* counterparts, but data is stored to
''ptr'' location
- * rather than being loaded.
+ * Same as their ldr* counterparts, but data is stored to
''ptr'' location
+ * rather than being loaded.
*
* enter reg1 reg2
*
- * Preserve the provided registers on the stack plus any additional
- * data as needed by the implementation including this code. Called
- * upon code entry.
+ * Preserve the provided registers on the stack plus any additional
+ * data as needed by the implementation including this code. Called
+ * upon code entry.
*
* exit reg1 reg2
*
- * Restore registers with the values previously saved with the
- * ''preserv'' macro. Called upon code termination.
+ * Restore registers with the values previously saved with the
+ * ''preserv'' macro. Called upon code termination.
*
* LDR1W_SHIFT
* STR1W_SHIFT
*
- * Correction to be applied to the "ip" register when branching
into
- * the ldr1w or str1w instructions (some of these macros may expand to
- * than one 32bit instruction in Thumb-2)
+ * Correction to be applied to the "ip" register when branching into
+ * the ldr1w or str1w instructions (some of these macros may expand to
+ * than one 32bit instruction in Thumb-2)
*/
- enter r4, lr
+ enter r4, lr
- subs r2, r2, #4
- blt 8f
- ands ip, r0, #3
- PLD( pld [r1, #0] )
- bne 9f
- ands ip, r1, #3
- bne 10f
+ subs r2, r2, #4
+ blt 8f
+ ands ip, r0, #3
+ PLD( pld [r1, #0] )
+ bne 9f
+ ands ip, r1, #3
+ bne 10f
-1: subs r2, r2, #(28)
- stmfd sp!, {r5 - r8}
- blt 5f
+1: subs r2, r2, #(28)
+ stmfd sp!, {r5 - r8}
+ blt 5f
- CALGN( ands ip, r0, #31 )
- CALGN( rsb r3, ip, #32 )
- CALGN( sbcnes r4, r3, r2 ) @ C is always set here
- CALGN( bcs 2f )
- CALGN( adr r4, 6f )
- CALGN( subs r2, r2, r3 ) @ C gets set
- CALGN( add pc, r4, ip )
+ CALGN( ands ip, r0, #31 )
+ CALGN( rsb r3, ip, #32 )
+ CALGN( sbcnes r4, r3, r2 ) @ C is always set here
+ CALGN( bcs 2f )
+ CALGN( adr r4, 6f )
+ CALGN( subs r2, r2, r3 ) @ C gets set
+ CALGN( add pc, r4, ip )
- PLD( pld [r1, #0] )
-2: PLD( subs r2, r2, #96 )
- PLD( pld [r1, #28] )
- PLD( blt 4f )
- PLD( pld [r1, #60] )
- PLD( pld [r1, #92] )
+ PLD( pld [r1, #0] )
+2: PLD( subs r2, r2, #96 )
+ PLD( pld [r1, #28] )
+ PLD( blt 4f )
+ PLD( pld [r1, #60] )
+ PLD( pld [r1, #92] )
-3: PLD( pld [r1, #124] )
-4: ldr8w r1, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
- subs r2, r2, #32
- str8w r0, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
- bge 3b
- PLD( cmn r2, #96 )
- PLD( bge 4b )
+3: PLD( pld [r1, #124] )
+4: ldr8w r1, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
+ subs r2, r2, #32
+ str8w r0, r3, r4, r5, r6, r7, r8, ip, lr, abort=20f
+ bge 3b
+ PLD( cmn r2, #96 )
+ PLD( bge 4b )
-5: ands ip, r2, #28
- rsb ip, ip, #32
+5: ands ip, r2, #28
+ rsb ip, ip, #32
#if LDR1W_SHIFT > 0
- lsl ip, ip, #LDR1W_SHIFT
+ lsl ip, ip, #LDR1W_SHIFT
#endif
- addne pc, pc, ip @ C is always clear here
- b 7f
+ addne pc, pc, ip @ C is always clear here
+ b 7f
6:
- .rept (1 << LDR1W_SHIFT)
- W(nop)
- .endr
- ldr1w r1, r3, abort=20f
- ldr1w r1, r4, abort=20f
- ldr1w r1, r5, abort=20f
- ldr1w r1, r6, abort=20f
- ldr1w r1, r7, abort=20f
- ldr1w r1, r8, abort=20f
- ldr1w r1, lr, abort=20f
+ .rept (1 << LDR1W_SHIFT)
+ W(nop)
+ .endr
+ ldr1w r1, r3, abort=20f
+ ldr1w r1, r4, abort=20f
+ ldr1w r1, r5, abort=20f
+ ldr1w r1, r6, abort=20f
+ ldr1w r1, r7, abort=20f
+ ldr1w r1, r8, abort=20f
+ ldr1w r1, lr, abort=20f
#if LDR1W_SHIFT < STR1W_SHIFT
- lsl ip, ip, #STR1W_SHIFT - LDR1W_SHIFT
+ lsl ip, ip, #STR1W_SHIFT - LDR1W_SHIFT
#elif LDR1W_SHIFT > STR1W_SHIFT
- lsr ip, ip, #LDR1W_SHIFT - STR1W_SHIFT
+ lsr ip, ip, #LDR1W_SHIFT - STR1W_SHIFT
#endif
- add pc, pc, ip
- nop
- .rept (1 << STR1W_SHIFT)
- W(nop)
- .endr
- str1w r0, r3, abort=20f
- str1w r0, r4, abort=20f
- str1w r0, r5, abort=20f
- str1w r0, r6, abort=20f
- str1w r0, r7, abort=20f
- str1w r0, r8, abort=20f
- str1w r0, lr, abort=20f
+ add pc, pc, ip
+ nop
+ .rept (1 << STR1W_SHIFT)
+ W(nop)
+ .endr
+ str1w r0, r3, abort=20f
+ str1w r0, r4, abort=20f
+ str1w r0, r5, abort=20f
+ str1w r0, r6, abort=20f
+ str1w r0, r7, abort=20f
+ str1w r0, r8, abort=20f
+ str1w r0, lr, abort=20f
- CALGN( bcs 2b )
+ CALGN( bcs 2b )
-7: ldmfd sp!, {r5 - r8}
+7: ldmfd sp!, {r5 - r8}
-8: movs r2, r2, lsl #31
- ldr1b r1, r3, ne, abort=21f
- ldr1b r1, r4, cs, abort=21f
- ldr1b r1, ip, cs, abort=21f
- str1b r0, r3, ne, abort=21f
- str1b r0, r4, cs, abort=21f
- str1b r0, ip, cs, abort=21f
+8: movs r2, r2, lsl #31
+ ldr1b r1, r3, ne, abort=21f
+ ldr1b r1, r4, cs, abort=21f
+ ldr1b r1, ip, cs, abort=21f
+ str1b r0, r3, ne, abort=21f
+ str1b r0, r4, cs, abort=21f
+ str1b r0, ip, cs, abort=21f
- exit r4, pc
+ exit r4, pc
-9: rsb ip, ip, #4
- cmp ip, #2
- ldr1b r1, r3, gt, abort=21f
- ldr1b r1, r4, ge, abort=21f
- ldr1b r1, lr, abort=21f
- str1b r0, r3, gt, abort=21f
- str1b r0, r4, ge, abort=21f
- subs r2, r2, ip
- str1b r0, lr, abort=21f
- blt 8b
- ands ip, r1, #3
- beq 1b
+9: rsb ip, ip, #4
+ cmp ip, #2
+ ldr1b r1, r3, gt, abort=21f
+ ldr1b r1, r4, ge, abort=21f
+ ldr1b r1, lr, abort=21f
+ str1b r0, r3, gt, abort=21f
+ str1b r0, r4, ge, abort=21f
+ subs r2, r2, ip
+ str1b r0, lr, abort=21f
+ blt 8b
+ ands ip, r1, #3
+ beq 1b
-10: bic r1, r1, #3
- cmp ip, #2
- ldr1w r1, lr, abort=21f
- beq 17f
- bgt 18f
+10: bic r1, r1, #3
+ cmp ip, #2
+ ldr1w r1, lr, abort=21f
+ beq 17f
+ bgt 18f
- .macro forward_copy_shift pull push
+ .macro forward_copy_shift pull push
- subs r2, r2, #28
- blt 14f
+ subs r2, r2, #28
+ blt 14f
- CALGN( ands ip, r0, #31 )
- CALGN( rsb ip, ip, #32 )
- CALGN( sbcnes r4, ip, r2 ) @ C is always set here
- CALGN( subcc r2, r2, ip )
- CALGN( bcc 15f )
+ CALGN( ands ip, r0, #31 )
+ CALGN( rsb ip, ip, #32 )
+ CALGN( sbcnes r4, ip, r2 ) @ C is always set here
+ CALGN( subcc r2, r2, ip )
+ CALGN( bcc 15f )
-11: stmfd sp!, {r5 - r9}
+11: stmfd sp!, {r5 - r9}
- PLD( pld [r1, #0] )
- PLD( subs r2, r2, #96 )
- PLD( pld [r1, #28] )
- PLD( blt 13f )
- PLD( pld [r1, #60] )
- PLD( pld [r1, #92] )
+ PLD( pld [r1, #0] )
+ PLD( subs r2, r2, #96 )
+ PLD( pld [r1, #28] )
+ PLD( blt 13f )
+ PLD( pld [r1, #60] )
+ PLD( pld [r1, #92] )
-12: PLD( pld [r1, #124] )
-13: ldr4w r1, r4, r5, r6, r7, abort=19f
- mov r3, lr, pull #\pull
- subs r2, r2, #32
- ldr4w r1, r8, r9, ip, lr, abort=19f
- orr r3, r3, r4, push #\push
- mov r4, r4, pull #\pull
- orr r4, r4, r5, push #\push
- mov r5, r5, pull #\pull
- orr r5, r5, r6, push #\push
- mov r6, r6, pull #\pull
- orr r6, r6, r7, push #\push
- mov r7, r7, pull #\pull
- orr r7, r7, r8, push #\push
- mov r8, r8, pull #\pull
- orr r8, r8, r9, push #\push
- mov r9, r9, pull #\pull
- orr r9, r9, ip, push #\push
- mov ip, ip, pull #\pull
- orr ip, ip, lr, push #\push
- str8w r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f
- bge 12b
- PLD( cmn r2, #96 )
- PLD( bge 13b )
+12: PLD( pld [r1, #124] )
+13: ldr4w r1, r4, r5, r6, r7, abort=19f
+ mov r3, lr, pull #\pull
+ subs r2, r2, #32
+ ldr4w r1, r8, r9, ip, lr, abort=19f
+ orr r3, r3, r4, push #\push
+ mov r4, r4, pull #\pull
+ orr r4, r4, r5, push #\push
+ mov r5, r5, pull #\pull
+ orr r5, r5, r6, push #\push
+ mov r6, r6, pull #\pull
+ orr r6, r6, r7, push #\push
+ mov r7, r7, pull #\pull
+ orr r7, r7, r8, push #\push
+ mov r8, r8, pull #\pull
+ orr r8, r8, r9, push #\push
+ mov r9, r9, pull #\pull
+ orr r9, r9, ip, push #\push
+ mov ip, ip, pull #\pull
+ orr ip, ip, lr, push #\push
+ str8w r0, r3, r4, r5, r6, r7, r8, r9, ip, , abort=19f
+ bge 12b
+ PLD( cmn r2, #96 )
+ PLD( bge 13b )
- ldmfd sp!, {r5 - r9}
+ ldmfd sp!, {r5 - r9}
-14: ands ip, r2, #28
- beq 16f
+14: ands ip, r2, #28
+ beq 16f
-15: mov r3, lr, pull #\pull
- ldr1w r1, lr, abort=21f
- subs ip, ip, #4
- orr r3, r3, lr, push #\push
- str1w r0, r3, abort=21f
- bgt 15b
- CALGN( cmp r2, #0 )
- CALGN( bge 11b )
+15: mov r3, lr, pull #\pull
+ ldr1w r1, lr, abort=21f
+ subs ip, ip, #4
+ orr r3, r3, lr, push #\push
+ str1w r0, r3, abort=21f
+ bgt 15b
+ CALGN( cmp r2, #0 )
+ CALGN( bge 11b )
-16: sub r1, r1, #(\push / 8)
- b 8b
+16: sub r1, r1, #(\push / 8)
+ b 8b
- .endm
+ .endm
- forward_copy_shift pull=8 push=24
+ forward_copy_shift pull=8 push=24
-17: forward_copy_shift pull=16 push=16
+17: forward_copy_shift pull=16 push=16
-18: forward_copy_shift pull=24 push=8
+18: forward_copy_shift pull=24 push=8
/*
@@ -254,14 +254,14 @@ 18: forward_copy_shift p
* the exit macro.
*/
- .macro copy_abort_preamble
-19: ldmfd sp!, {r5 - r9}
- b 21f
-20: ldmfd sp!, {r5 - r8}
+ .macro copy_abort_preamble
+19: ldmfd sp!, {r5 - r9}
+ b 21f
+20: ldmfd sp!, {r5 - r8}
21:
- .endm
+ .endm
- .macro copy_abort_end
- ldmfd sp!, {r4, pc}
- .endm
+ .macro copy_abort_end
+ ldmfd sp!, {r4, pc}
+ .endm
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/arch/arm/lib/div64.S
--- a/xen/arch/arm/lib/div64.S Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/arch/arm/lib/div64.S Mon Feb 13 17:26:08 2012 +0000
@@ -3,9 +3,9 @@
*
* Optimized computation of 64-bit dividend / 32-bit divisor
*
- * Author: Nicolas Pitre
- * Created: Oct 5, 2003
- * Copyright: Monta Vista Software, Inc.
+ * Author: Nicolas Pitre
+ * Created: Oct 5, 2003
+ * Copyright: Monta Vista Software, Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -14,7 +14,7 @@
#include <xen/config.h>
#include "assembler.h"
-
+
#ifdef __ARMEB__
#define xh r0
#define xl r1
@@ -34,12 +34,12 @@
* This is meant to be used by do_div() from include/asm/div64.h only.
*
* Input parameters:
- * xh-xl = dividend (clobbered)
- * r4 = divisor (preserved)
+ * xh-xl = dividend (clobbered)
+ * r4 = divisor (preserved)
*
* Output values:
- * yh-yl = result
- * xh = remainder
+ * yh-yl = result
+ * xh = remainder
*
* Clobbered regs: xl, ip
*/
@@ -47,165 +47,165 @@
ENTRY(__do_div64)
UNWIND(.fnstart)
- @ Test for easy paths first.
- subs ip, r4, #1
- bls 9f @ divisor is 0 or 1
- tst ip, r4
- beq 8f @ divisor is power of 2
+ @ Test for easy paths first.
+ subs ip, r4, #1
+ bls 9f @ divisor is 0 or 1
+ tst ip, r4
+ beq 8f @ divisor is power of 2
- @ See if we need to handle upper 32-bit result.
- cmp xh, r4
- mov yh, #0
- blo 3f
+ @ See if we need to handle upper 32-bit result.
+ cmp xh, r4
+ mov yh, #0
+ blo 3f
- @ Align divisor with upper part of dividend.
- @ The aligned divisor is stored in yl preserving the original.
- @ The bit position is stored in ip.
+ @ Align divisor with upper part of dividend.
+ @ The aligned divisor is stored in yl preserving the original.
+ @ The bit position is stored in ip.
#if __LINUX_ARM_ARCH__ >= 5
- clz yl, r4
- clz ip, xh
- sub yl, yl, ip
- mov ip, #1
- mov ip, ip, lsl yl
- mov yl, r4, lsl yl
+ clz yl, r4
+ clz ip, xh
+ sub yl, yl, ip
+ mov ip, #1
+ mov ip, ip, lsl yl
+ mov yl, r4, lsl yl
#else
- mov yl, r4
- mov ip, #1
-1: cmp yl, #0x80000000
- cmpcc yl, xh
- movcc yl, yl, lsl #1
- movcc ip, ip, lsl #1
- bcc 1b
+ mov yl, r4
+ mov ip, #1
+1: cmp yl, #0x80000000
+ cmpcc yl, xh
+ movcc yl, yl, lsl #1
+ movcc ip, ip, lsl #1
+ bcc 1b
#endif
- @ The division loop for needed upper bit positions.
- @ Break out early if dividend reaches 0.
-2: cmp xh, yl
- orrcs yh, yh, ip
- subcss xh, xh, yl
- movnes ip, ip, lsr #1
- mov yl, yl, lsr #1
- bne 2b
+ @ The division loop for needed upper bit positions.
+ @ Break out early if dividend reaches 0.
+2: cmp xh, yl
+ orrcs yh, yh, ip
+ subcss xh, xh, yl
+ movnes ip, ip, lsr #1
+ mov yl, yl, lsr #1
+ bne 2b
- @ See if we need to handle lower 32-bit result.
-3: cmp xh, #0
- mov yl, #0
- cmpeq xl, r4
- movlo xh, xl
- movlo pc, lr
+ @ See if we need to handle lower 32-bit result.
+3: cmp xh, #0
+ mov yl, #0
+ cmpeq xl, r4
+ movlo xh, xl
+ movlo pc, lr
- @ The division loop for lower bit positions.
- @ Here we shift remainer bits leftwards rather than moving the
- @ divisor for comparisons, considering the carry-out bit as well.
- mov ip, #0x80000000
-4: movs xl, xl, lsl #1
- adcs xh, xh, xh
- beq 6f
- cmpcc xh, r4
-5: orrcs yl, yl, ip
- subcs xh, xh, r4
- movs ip, ip, lsr #1
- bne 4b
- mov pc, lr
+ @ The division loop for lower bit positions.
+ @ Here we shift remainer bits leftwards rather than moving the
+ @ divisor for comparisons, considering the carry-out bit as well.
+ mov ip, #0x80000000
+4: movs xl, xl, lsl #1
+ adcs xh, xh, xh
+ beq 6f
+ cmpcc xh, r4
+5: orrcs yl, yl, ip
+ subcs xh, xh, r4
+ movs ip, ip, lsr #1
+ bne 4b
+ mov pc, lr
- @ The top part of remainder became zero. If carry is set
- @ (the 33th bit) this is a false positive so resume the loop.
- @ Otherwise, if lower part is also null then we are done.
-6: bcs 5b
- cmp xl, #0
- moveq pc, lr
+ @ The top part of remainder became zero. If carry is set
+ @ (the 33th bit) this is a false positive so resume the loop.
+ @ Otherwise, if lower part is also null then we are done.
+6: bcs 5b
+ cmp xl, #0
+ moveq pc, lr
- @ We still have remainer bits in the low part. Bring them up.
+ @ We still have remainer bits in the low part. Bring them up.
#if __LINUX_ARM_ARCH__ >= 5
- clz xh, xl @ we know xh is zero here so...
- add xh, xh, #1
- mov xl, xl, lsl xh
- mov ip, ip, lsr xh
+ clz xh, xl @ we know xh is zero here so...
+ add xh, xh, #1
+ mov xl, xl, lsl xh
+ mov ip, ip, lsr xh
#else
-7: movs xl, xl, lsl #1
- mov ip, ip, lsr #1
- bcc 7b
+7: movs xl, xl, lsl #1
+ mov ip, ip, lsr #1
+ bcc 7b
#endif
- @ Current remainder is now 1. It is worthless to compare with
- @ divisor at this point since divisor can not be smaller than 3 here.
- @ If possible, branch for another shift in the division loop.
- @ If no bit position left then we are done.
- movs ip, ip, lsr #1
- mov xh, #1
- bne 4b
- mov pc, lr
+ @ Current remainder is now 1. It is worthless to compare with
+ @ divisor at this point since divisor can not be smaller than 3 here.
+ @ If possible, branch for another shift in the division loop.
+ @ If no bit position left then we are done.
+ movs ip, ip, lsr #1
+ mov xh, #1
+ bne 4b
+ mov pc, lr
-8: @ Division by a power of 2: determine what that divisor order is
- @ then simply shift values around
+8: @ Division by a power of 2: determine what that divisor order is
+ @ then simply shift values around
#if __LINUX_ARM_ARCH__ >= 5
- clz ip, r4
- rsb ip, ip, #31
+ clz ip, r4
+ rsb ip, ip, #31
#else
- mov yl, r4
- cmp r4, #(1 << 16)
- mov ip, #0
- movhs yl, yl, lsr #16
- movhs ip, #16
+ mov yl, r4
+ cmp r4, #(1 << 16)
+ mov ip, #0
+ movhs yl, yl, lsr #16
+ movhs ip, #16
- cmp yl, #(1 << 8)
- movhs yl, yl, lsr #8
- addhs ip, ip, #8
+ cmp yl, #(1 << 8)
+ movhs yl, yl, lsr #8
+ addhs ip, ip, #8
- cmp yl, #(1 << 4)
- movhs yl, yl, lsr #4
- addhs ip, ip, #4
+ cmp yl, #(1 << 4)
+ movhs yl, yl, lsr #4
+ addhs ip, ip, #4
- cmp yl, #(1 << 2)
- addhi ip, ip, #3
- addls ip, ip, yl, lsr #1
+ cmp yl, #(1 << 2)
+ addhi ip, ip, #3
+ addls ip, ip, yl, lsr #1
#endif
- mov yh, xh, lsr ip
- mov yl, xl, lsr ip
- rsb ip, ip, #32
- ARM( orr yl, yl, xh, lsl ip )
- THUMB( lsl xh, xh, ip )
- THUMB( orr yl, yl, xh )
- mov xh, xl, lsl ip
- mov xh, xh, lsr ip
- mov pc, lr
+ mov yh, xh, lsr ip
+ mov yl, xl, lsr ip
+ rsb ip, ip, #32
+ ARM( orr yl, yl, xh, lsl ip )
+ THUMB( lsl xh, xh, ip )
+ THUMB( orr yl, yl, xh )
+ mov xh, xl, lsl ip
+ mov xh, xh, lsr ip
+ mov pc, lr
- @ eq -> division by 1: obvious enough...
-9: moveq yl, xl
- moveq yh, xh
- moveq xh, #0
- moveq pc, lr
+ @ eq -> division by 1: obvious enough...
+9: moveq yl, xl
+ moveq yh, xh
+ moveq xh, #0
+ moveq pc, lr
UNWIND(.fnend)
UNWIND(.fnstart)
UNWIND(.pad #4)
UNWIND(.save {lr})
Ldiv0_64:
- @ Division by 0:
- str lr, [sp, #-8]!
- bl __div0
+ @ Division by 0:
+ str lr, [sp, #-8]!
+ bl __div0
- @ as wrong as it could be...
- mov yl, #0
- mov yh, #0
- mov xh, #0
- ldr pc, [sp], #8
+ @ as wrong as it could be...
+ mov yl, #0
+ mov yh, #0
+ mov xh, #0
+ ldr pc, [sp], #8
UNWIND(.fnend)
ENDPROC(__do_div64)
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/arch/arm/lib/findbit.S
--- a/xen/arch/arm/lib/findbit.S Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/arch/arm/lib/findbit.S Mon Feb 13 17:26:08 2012 +0000
@@ -24,20 +24,20 @@
* Prototype: int find_first_zero_bit(void *addr, unsigned int maxbit);
*/
ENTRY(_find_first_zero_bit_le)
- teq r1, #0
- beq 3f
- mov r2, #0
+ teq r1, #0
+ beq 3f
+ mov r2, #0
1:
- ARM( ldrb r3, [r0, r2, lsr #3] )
- THUMB( lsr r3, r2, #3 )
- THUMB( ldrb r3, [r0, r3] )
- eors r3, r3, #0xff @ invert bits
- bne .L_found @ any now set - found zero bit
- add r2, r2, #8 @ next bit pointer
-2: cmp r2, r1 @ any more?
- blo 1b
-3: mov r0, r1 @ no free bits
- mov pc, lr
+ ARM( ldrb r3, [r0, r2, lsr #3] )
+ THUMB( lsr r3, r2, #3 )
+ THUMB( ldrb r3, [r0, r3] )
+ eors r3, r3, #0xff @ invert bits
+ bne .L_found @ any now set - found zero bit
+ add r2, r2, #8 @ next bit pointer
+2: cmp r2, r1 @ any more?
+ blo 1b
+3: mov r0, r1 @ no free bits
+ mov pc, lr
ENDPROC(_find_first_zero_bit_le)
/*
@@ -45,19 +45,19 @@ ENDPROC(_find_first_zero_bit_le)
* Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int
offset)
*/
ENTRY(_find_next_zero_bit_le)
- teq r1, #0
- beq 3b
- ands ip, r2, #7
- beq 1b @ If new byte, goto old routine
- ARM( ldrb r3, [r0, r2, lsr #3] )
- THUMB( lsr r3, r2, #3 )
- THUMB( ldrb r3, [r0, r3] )
- eor r3, r3, #0xff @ now looking for a 1 bit
- movs r3, r3, lsr ip @ shift off unused bits
- bne .L_found
- orr r2, r2, #7 @ if zero, then no bits here
- add r2, r2, #1 @ align bit pointer
- b 2b @ loop for next bit
+ teq r1, #0
+ beq 3b
+ ands ip, r2, #7
+ beq 1b @ If new byte, goto old routine
+ ARM( ldrb r3, [r0, r2, lsr #3] )
+ THUMB( lsr r3, r2, #3 )
+ THUMB( ldrb r3, [r0, r3] )
+ eor r3, r3, #0xff @ now looking for a 1 bit
+ movs r3, r3, lsr ip @ shift off unused bits
+ bne .L_found
+ orr r2, r2, #7 @ if zero, then no bits here
+ add r2, r2, #1 @ align bit pointer
+ b 2b @ loop for next bit
ENDPROC(_find_next_zero_bit_le)
/*
@@ -65,20 +65,20 @@ ENDPROC(_find_next_zero_bit_le)
* Prototype: int find_first_bit(const unsigned long *addr, unsigned int
maxbit);
*/
ENTRY(_find_first_bit_le)
- teq r1, #0
- beq 3f
- mov r2, #0
+ teq r1, #0
+ beq 3f
+ mov r2, #0
1:
- ARM( ldrb r3, [r0, r2, lsr #3] )
- THUMB( lsr r3, r2, #3 )
- THUMB( ldrb r3, [r0, r3] )
- movs r3, r3
- bne .L_found @ any now set - found zero bit
- add r2, r2, #8 @ next bit pointer
-2: cmp r2, r1 @ any more?
- blo 1b
-3: mov r0, r1 @ no free bits
- mov pc, lr
+ ARM( ldrb r3, [r0, r2, lsr #3] )
+ THUMB( lsr r3, r2, #3 )
+ THUMB( ldrb r3, [r0, r3] )
+ movs r3, r3
+ bne .L_found @ any now set - found zero bit
+ add r2, r2, #8 @ next bit pointer
+2: cmp r2, r1 @ any more?
+ blo 1b
+3: mov r0, r1 @ no free bits
+ mov pc, lr
ENDPROC(_find_first_bit_le)
/*
@@ -86,87 +86,87 @@ ENDPROC(_find_first_bit_le)
* Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int
offset)
*/
ENTRY(_find_next_bit_le)
- teq r1, #0
- beq 3b
- ands ip, r2, #7
- beq 1b @ If new byte, goto old routine
- ARM( ldrb r3, [r0, r2, lsr #3] )
- THUMB( lsr r3, r2, #3 )
- THUMB( ldrb r3, [r0, r3] )
- movs r3, r3, lsr ip @ shift off unused bits
- bne .L_found
- orr r2, r2, #7 @ if zero, then no bits here
- add r2, r2, #1 @ align bit pointer
- b 2b @ loop for next bit
+ teq r1, #0
+ beq 3b
+ ands ip, r2, #7
+ beq 1b @ If new byte, goto old routine
+ ARM( ldrb r3, [r0, r2, lsr #3] )
+ THUMB( lsr r3, r2, #3 )
+ THUMB( ldrb r3, [r0, r3] )
+ movs r3, r3, lsr ip @ shift off unused bits
+ bne .L_found
+ orr r2, r2, #7 @ if zero, then no bits here
+ add r2, r2, #1 @ align bit pointer
+ b 2b @ loop for next bit
ENDPROC(_find_next_bit_le)
#ifdef __ARMEB__
ENTRY(_find_first_zero_bit_be)
- teq r1, #0
- beq 3f
- mov r2, #0
-1: eor r3, r2, #0x18 @ big endian byte ordering
- ARM( ldrb r3, [r0, r3, lsr #3] )
- THUMB( lsr r3, #3 )
- THUMB( ldrb r3, [r0, r3] )
- eors r3, r3, #0xff @ invert bits
- bne .L_found @ any now set - found zero bit
- add r2, r2, #8 @ next bit pointer
-2: cmp r2, r1 @ any more?
- blo 1b
-3: mov r0, r1 @ no free bits
- mov pc, lr
+ teq r1, #0
+ beq 3f
+ mov r2, #0
+1: eor r3, r2, #0x18 @ big endian byte ordering
+ ARM( ldrb r3, [r0, r3, lsr #3] )
+ THUMB( lsr r3, #3 )
+ THUMB( ldrb r3, [r0, r3] )
+ eors r3, r3, #0xff @ invert bits
+ bne .L_found @ any now set - found zero bit
+ add r2, r2, #8 @ next bit pointer
+2: cmp r2, r1 @ any more?
+ blo 1b
+3: mov r0, r1 @ no free bits
+ mov pc, lr
ENDPROC(_find_first_zero_bit_be)
ENTRY(_find_next_zero_bit_be)
- teq r1, #0
- beq 3b
- ands ip, r2, #7
- beq 1b @ If new byte, goto old routine
- eor r3, r2, #0x18 @ big endian byte ordering
- ARM( ldrb r3, [r0, r3, lsr #3] )
- THUMB( lsr r3, #3 )
- THUMB( ldrb r3, [r0, r3] )
- eor r3, r3, #0xff @ now looking for a 1 bit
- movs r3, r3, lsr ip @ shift off unused bits
- bne .L_found
- orr r2, r2, #7 @ if zero, then no bits here
- add r2, r2, #1 @ align bit pointer
- b 2b @ loop for next bit
+ teq r1, #0
+ beq 3b
+ ands ip, r2, #7
+ beq 1b @ If new byte, goto old routine
+ eor r3, r2, #0x18 @ big endian byte ordering
+ ARM( ldrb r3, [r0, r3, lsr #3] )
+ THUMB( lsr r3, #3 )
+ THUMB( ldrb r3, [r0, r3] )
+ eor r3, r3, #0xff @ now looking for a 1 bit
+ movs r3, r3, lsr ip @ shift off unused bits
+ bne .L_found
+ orr r2, r2, #7 @ if zero, then no bits here
+ add r2, r2, #1 @ align bit pointer
+ b 2b @ loop for next bit
ENDPROC(_find_next_zero_bit_be)
ENTRY(_find_first_bit_be)
- teq r1, #0
- beq 3f
- mov r2, #0
-1: eor r3, r2, #0x18 @ big endian byte ordering
- ARM( ldrb r3, [r0, r3, lsr #3] )
- THUMB( lsr r3, #3 )
- THUMB( ldrb r3, [r0, r3] )
- movs r3, r3
- bne .L_found @ any now set - found zero bit
- add r2, r2, #8 @ next bit pointer
-2: cmp r2, r1 @ any more?
- blo 1b
-3: mov r0, r1 @ no free bits
- mov pc, lr
+ teq r1, #0
+ beq 3f
+ mov r2, #0
+1: eor r3, r2, #0x18 @ big endian byte ordering
+ ARM( ldrb r3, [r0, r3, lsr #3] )
+ THUMB( lsr r3, #3 )
+ THUMB( ldrb r3, [r0, r3] )
+ movs r3, r3
+ bne .L_found @ any now set - found zero bit
+ add r2, r2, #8 @ next bit pointer
+2: cmp r2, r1 @ any more?
+ blo 1b
+3: mov r0, r1 @ no free bits
+ mov pc, lr
ENDPROC(_find_first_bit_be)
ENTRY(_find_next_bit_be)
- teq r1, #0
- beq 3b
- ands ip, r2, #7
- beq 1b @ If new byte, goto old routine
- eor r3, r2, #0x18 @ big endian byte ordering
- ARM( ldrb r3, [r0, r3, lsr #3] )
- THUMB( lsr r3, #3 )
- THUMB( ldrb r3, [r0, r3] )
- movs r3, r3, lsr ip @ shift off unused bits
- bne .L_found
- orr r2, r2, #7 @ if zero, then no bits here
- add r2, r2, #1 @ align bit pointer
- b 2b @ loop for next bit
+ teq r1, #0
+ beq 3b
+ ands ip, r2, #7
+ beq 1b @ If new byte, goto old routine
+ eor r3, r2, #0x18 @ big endian byte ordering
+ ARM( ldrb r3, [r0, r3, lsr #3] )
+ THUMB( lsr r3, #3 )
+ THUMB( ldrb r3, [r0, r3] )
+ movs r3, r3, lsr ip @ shift off unused bits
+ bne .L_found
+ orr r2, r2, #7 @ if zero, then no bits here
+ add r2, r2, #1 @ align bit pointer
+ b 2b @ loop for next bit
ENDPROC(_find_next_bit_be)
#endif
@@ -176,23 +176,23 @@ ENDPROC(_find_next_bit_be)
*/
.L_found:
#if __LINUX_ARM_ARCH__ >= 5
- rsb r0, r3, #0
- and r3, r3, r0
- clz r3, r3
- rsb r3, r3, #31
- add r0, r2, r3
+ rsb r0, r3, #0
+ and r3, r3, r0
+ clz r3, r3
+ rsb r3, r3, #31
+ add r0, r2, r3
#else
- tst r3, #0x0f
- addeq r2, r2, #4
- movne r3, r3, lsl #4
- tst r3, #0x30
- addeq r2, r2, #2
- movne r3, r3, lsl #2
- tst r3, #0x40
- addeq r2, r2, #1
- mov r0, r2
+ tst r3, #0x0f
+ addeq r2, r2, #4
+ movne r3, r3, lsl #4
+ tst r3, #0x30
+ addeq r2, r2, #2
+ movne r3, r3, lsl #2
+ tst r3, #0x40
+ addeq r2, r2, #1
+ mov r0, r2
#endif
- cmp r1, r0 @ Clamp to maxbit
- movlo r0, r1
- mov pc, lr
+ cmp r1, r0 @ Clamp to maxbit
+ movlo r0, r1
+ mov pc, lr
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/arch/arm/lib/lib1funcs.S
--- a/xen/arch/arm/lib/lib1funcs.S Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/arch/arm/lib/lib1funcs.S Mon Feb 13 17:26:08 2012 +0000
@@ -40,64 +40,64 @@ Boston, MA 02111-1307, USA. */
#if __LINUX_ARM_ARCH__ >= 5
- clz \curbit, \divisor
- clz \result, \dividend
- sub \result, \curbit, \result
- mov \curbit, #1
- mov \divisor, \divisor, lsl \result
- mov \curbit, \curbit, lsl \result
- mov \result, #0
-
+ clz \curbit, \divisor
+ clz \result, \dividend
+ sub \result, \curbit, \result
+ mov \curbit, #1
+ mov \divisor, \divisor, lsl \result
+ mov \curbit, \curbit, lsl \result
+ mov \result, #0
+
#else
- @ Initially shift the divisor left 3 bits if possible,
- @ set curbit accordingly. This allows for curbit to be located
- @ at the left end of each 4 bit nibbles in the division loop
- @ to save one loop in most cases.
- tst \divisor, #0xe0000000
- moveq \divisor, \divisor, lsl #3
- moveq \curbit, #8
- movne \curbit, #1
+ @ Initially shift the divisor left 3 bits if possible,
+ @ set curbit accordingly. This allows for curbit to be located
+ @ at the left end of each 4 bit nibbles in the division loop
+ @ to save one loop in most cases.
+ tst \divisor, #0xe0000000
+ moveq \divisor, \divisor, lsl #3
+ moveq \curbit, #8
+ movne \curbit, #1
- @ Unless the divisor is very big, shift it up in multiples of
- @ four bits, since this is the amount of unwinding in the main
- @ division loop. Continue shifting until the divisor is
- @ larger than the dividend.
-1: cmp \divisor, #0x10000000
- cmplo \divisor, \dividend
- movlo \divisor, \divisor, lsl #4
- movlo \curbit, \curbit, lsl #4
- blo 1b
+ @ Unless the divisor is very big, shift it up in multiples of
+ @ four bits, since this is the amount of unwinding in the main
+ @ division loop. Continue shifting until the divisor is
+ @ larger than the dividend.
+1: cmp \divisor, #0x10000000
+ cmplo \divisor, \dividend
+ movlo \divisor, \divisor, lsl #4
+ movlo \curbit, \curbit, lsl #4
+ blo 1b
- @ For very big divisors, we must shift it a bit at a time, or
- @ we will be in danger of overflowing.
-1: cmp \divisor, #0x80000000
- cmplo \divisor, \dividend
- movlo \divisor, \divisor, lsl #1
- movlo \curbit, \curbit, lsl #1
- blo 1b
+ @ For very big divisors, we must shift it a bit at a time, or
+ @ we will be in danger of overflowing.
+1: cmp \divisor, #0x80000000
+ cmplo \divisor, \dividend
+ movlo \divisor, \divisor, lsl #1
+ movlo \curbit, \curbit, lsl #1
+ blo 1b
- mov \result, #0
+ mov \result, #0
#endif
- @ Division loop
-1: cmp \dividend, \divisor
- subhs \dividend, \dividend, \divisor
- orrhs \result, \result, \curbit
- cmp \dividend, \divisor, lsr #1
- subhs \dividend, \dividend, \divisor, lsr #1
- orrhs \result, \result, \curbit, lsr #1
- cmp \dividend, \divisor, lsr #2
- subhs \dividend, \dividend, \divisor, lsr #2
- orrhs \result, \result, \curbit, lsr #2
- cmp \dividend, \divisor, lsr #3
- subhs \dividend, \dividend, \divisor, lsr #3
- orrhs \result, \result, \curbit, lsr #3
- cmp \dividend, #0 @ Early termination?
- movnes \curbit, \curbit, lsr #4 @ No, any more bits to do?
- movne \divisor, \divisor, lsr #4
- bne 1b
+ @ Division loop
+1: cmp \dividend, \divisor
+ subhs \dividend, \dividend, \divisor
+ orrhs \result, \result, \curbit
+ cmp \dividend, \divisor, lsr #1
+ subhs \dividend, \dividend, \divisor, lsr #1
+ orrhs \result, \result, \curbit, lsr #1
+ cmp \dividend, \divisor, lsr #2
+ subhs \dividend, \dividend, \divisor, lsr #2
+ orrhs \result, \result, \curbit, lsr #2
+ cmp \dividend, \divisor, lsr #3
+ subhs \dividend, \dividend, \divisor, lsr #3
+ orrhs \result, \result, \curbit, lsr #3
+ cmp \dividend, #0 @ Early termination?
+ movnes \curbit, \curbit, lsr #4 @ No, any more bits to do?
+ movne \divisor, \divisor, lsr #4
+ bne 1b
.endm
@@ -106,27 +106,27 @@ 1: cmp \dividend, \divisor
#if __LINUX_ARM_ARCH__ >= 5
- clz \order, \divisor
- rsb \order, \order, #31
+ clz \order, \divisor
+ rsb \order, \order, #31
#else
- cmp \divisor, #(1 << 16)
- movhs \divisor, \divisor, lsr #16
- movhs \order, #16
- movlo \order, #0
+ cmp \divisor, #(1 << 16)
+ movhs \divisor, \divisor, lsr #16
+ movhs \order, #16
+ movlo \order, #0
- cmp \divisor, #(1 << 8)
- movhs \divisor, \divisor, lsr #8
- addhs \order, \order, #8
+ cmp \divisor, #(1 << 8)
+ movhs \divisor, \divisor, lsr #8
+ addhs \order, \order, #8
- cmp \divisor, #(1 << 4)
- movhs \divisor, \divisor, lsr #4
- addhs \order, \order, #4
+ cmp \divisor, #(1 << 4)
+ movhs \divisor, \divisor, lsr #4
+ addhs \order, \order, #4
- cmp \divisor, #(1 << 2)
- addhi \order, \order, #3
- addls \order, \order, \divisor, lsr #1
+ cmp \divisor, #(1 << 2)
+ addhi \order, \order, #3
+ addls \order, \order, \divisor, lsr #1
#endif
@@ -137,69 +137,69 @@ 1: cmp \dividend, \divisor
#if __LINUX_ARM_ARCH__ >= 5
- clz \order, \divisor
- clz \spare, \dividend
- sub \order, \order, \spare
- mov \divisor, \divisor, lsl \order
+ clz \order, \divisor
+ clz \spare, \dividend
+ sub \order, \order, \spare
+ mov \divisor, \divisor, lsl \order
#else
- mov \order, #0
+ mov \order, #0
- @ Unless the divisor is very big, shift it up in multiples of
- @ four bits, since this is the amount of unwinding in the main
- @ division loop. Continue shifting until the divisor is
- @ larger than the dividend.
-1: cmp \divisor, #0x10000000
- cmplo \divisor, \dividend
- movlo \divisor, \divisor, lsl #4
- addlo \order, \order, #4
- blo 1b
+ @ Unless the divisor is very big, shift it up in multiples of
+ @ four bits, since this is the amount of unwinding in the main
+ @ division loop. Continue shifting until the divisor is
+ @ larger than the dividend.
+1: cmp \divisor, #0x10000000
+ cmplo \divisor, \dividend
+ movlo \divisor, \divisor, lsl #4
+ addlo \order, \order, #4
+ blo 1b
- @ For very big divisors, we must shift it a bit at a time, or
- @ we will be in danger of overflowing.
-1: cmp \divisor, #0x80000000
- cmplo \divisor, \dividend
- movlo \divisor, \divisor, lsl #1
- addlo \order, \order, #1
- blo 1b
+ @ For very big divisors, we must shift it a bit at a time, or
+ @ we will be in danger of overflowing.
+1: cmp \divisor, #0x80000000
+ cmplo \divisor, \dividend
+ movlo \divisor, \divisor, lsl #1
+ addlo \order, \order, #1
+ blo 1b
#endif
- @ Perform all needed substractions to keep only the reminder.
- @ Do comparisons in batch of 4 first.
- subs \order, \order, #3 @ yes, 3 is intended here
- blt 2f
+ @ Perform all needed substractions to keep only the reminder.
+ @ Do comparisons in batch of 4 first.
+ subs \order, \order, #3 @ yes, 3 is intended here
+ blt 2f
-1: cmp \dividend, \divisor
- subhs \dividend, \dividend, \divisor
- cmp \dividend, \divisor, lsr #1
- subhs \dividend, \dividend, \divisor, lsr #1
- cmp \dividend, \divisor, lsr #2
- subhs \dividend, \dividend, \divisor, lsr #2
- cmp \dividend, \divisor, lsr #3
- subhs \dividend, \dividend, \divisor, lsr #3
- cmp \dividend, #1
- mov \divisor, \divisor, lsr #4
- subges \order, \order, #4
- bge 1b
+1: cmp \dividend, \divisor
+ subhs \dividend, \dividend, \divisor
+ cmp \dividend, \divisor, lsr #1
+ subhs \dividend, \dividend, \divisor, lsr #1
+ cmp \dividend, \divisor, lsr #2
+ subhs \dividend, \dividend, \divisor, lsr #2
+ cmp \dividend, \divisor, lsr #3
+ subhs \dividend, \dividend, \divisor, lsr #3
+ cmp \dividend, #1
+ mov \divisor, \divisor, lsr #4
+ subges \order, \order, #4
+ bge 1b
- tst \order, #3
- teqne \dividend, #0
- beq 5f
+ tst \order, #3
+ teqne \dividend, #0
+ beq 5f
- @ Either 1, 2 or 3 comparison/substractions are left.
-2: cmn \order, #2
- blt 4f
- beq 3f
- cmp \dividend, \divisor
- subhs \dividend, \dividend, \divisor
- mov \divisor, \divisor, lsr #1
-3: cmp \dividend, \divisor
- subhs \dividend, \dividend, \divisor
- mov \divisor, \divisor, lsr #1
-4: cmp \dividend, \divisor
- subhs \dividend, \dividend, \divisor
+ @ Either 1, 2 or 3 comparison/substractions are left.
+2: cmn \order, #2
+ blt 4f
+ beq 3f
+ cmp \dividend, \divisor
+ subhs \dividend, \dividend, \divisor
+ mov \divisor, \divisor, lsr #1
+3: cmp \dividend, \divisor
+ subhs \dividend, \dividend, \divisor
+ mov \divisor, \divisor, lsr #1
+4: cmp \dividend, \divisor
+ subhs \dividend, \dividend, \divisor
5:
.endm
@@ -208,27 +208,27 @@ ENTRY(__udivsi3)
ENTRY(__aeabi_uidiv)
UNWIND(.fnstart)
- subs r2, r1, #1
- moveq pc, lr
- bcc Ldiv0
- cmp r0, r1
- bls 11f
- tst r1, r2
- beq 12f
+ subs r2, r1, #1
+ moveq pc, lr
+ bcc Ldiv0
+ cmp r0, r1
+ bls 11f
+ tst r1, r2
+ beq 12f
- ARM_DIV_BODY r0, r1, r2, r3
+ ARM_DIV_BODY r0, r1, r2, r3
- mov r0, r2
- mov pc, lr
+ mov r0, r2
+ mov pc, lr
-11: moveq r0, #1
- movne r0, #0
- mov pc, lr
+11: moveq r0, #1
+ movne r0, #0
+ mov pc, lr
-12: ARM_DIV2_ORDER r1, r2
+12: ARM_DIV2_ORDER r1, r2
- mov r0, r0, lsr r2
- mov pc, lr
+ mov r0, r0, lsr r2
+ mov pc, lr
UNWIND(.fnend)
ENDPROC(__udivsi3)
@@ -237,17 +237,17 @@ ENDPROC(__aeabi_uidiv)
ENTRY(__umodsi3)
UNWIND(.fnstart)
- subs r2, r1, #1 @ compare divisor with 1
- bcc Ldiv0
- cmpne r0, r1 @ compare dividend with divisor
- moveq r0, #0
- tsthi r1, r2 @ see if divisor is power of 2
- andeq r0, r0, r2
- movls pc, lr
+ subs r2, r1, #1 @ compare divisor with 1
+ bcc Ldiv0
+ cmpne r0, r1 @ compare dividend with divisor
+ moveq r0, #0
+ tsthi r1, r2 @ see if divisor is power of 2
+ andeq r0, r0, r2
+ movls pc, lr
- ARM_MOD_BODY r0, r1, r2, r3
+ ARM_MOD_BODY r0, r1, r2, r3
- mov pc, lr
+ mov pc, lr
UNWIND(.fnend)
ENDPROC(__umodsi3)
@@ -256,40 +256,40 @@ ENTRY(__divsi3)
ENTRY(__aeabi_idiv)
UNWIND(.fnstart)
- cmp r1, #0
- eor ip, r0, r1 @ save the sign of the result.
- beq Ldiv0
- rsbmi r1, r1, #0 @ loops below use unsigned.
- subs r2, r1, #1 @ division by 1 or -1 ?
- beq 10f
- movs r3, r0
- rsbmi r3, r0, #0 @ positive dividend value
- cmp r3, r1
- bls 11f
- tst r1, r2 @ divisor is power of 2 ?
- beq 12f
+ cmp r1, #0
+ eor ip, r0, r1 @ save the sign of the result.
+ beq Ldiv0
+ rsbmi r1, r1, #0 @ loops below use unsigned.
+ subs r2, r1, #1 @ division by 1 or -1 ?
+ beq 10f
+ movs r3, r0
+ rsbmi r3, r0, #0 @ positive dividend value
+ cmp r3, r1
+ bls 11f
+ tst r1, r2 @ divisor is power of 2 ?
+ beq 12f
- ARM_DIV_BODY r3, r1, r0, r2
+ ARM_DIV_BODY r3, r1, r0, r2
- cmp ip, #0
- rsbmi r0, r0, #0
- mov pc, lr
+ cmp ip, #0
+ rsbmi r0, r0, #0
+ mov pc, lr
-10: teq ip, r0 @ same sign ?
- rsbmi r0, r0, #0
- mov pc, lr
+10: teq ip, r0 @ same sign ?
+ rsbmi r0, r0, #0
+ mov pc, lr
-11: movlo r0, #0
- moveq r0, ip, asr #31
- orreq r0, r0, #1
- mov pc, lr
+11: movlo r0, #0
+ moveq r0, ip, asr #31
+ orreq r0, r0, #1
+ mov pc, lr
-12: ARM_DIV2_ORDER r1, r2
+12: ARM_DIV2_ORDER r1, r2
- cmp ip, #0
- mov r0, r3, lsr r2
- rsbmi r0, r0, #0
- mov pc, lr
+ cmp ip, #0
+ mov r0, r3, lsr r2
+ rsbmi r0, r0, #0
+ mov pc, lr
UNWIND(.fnend)
ENDPROC(__divsi3)
@@ -298,23 +298,23 @@ ENDPROC(__aeabi_idiv)
ENTRY(__modsi3)
UNWIND(.fnstart)
- cmp r1, #0
- beq Ldiv0
- rsbmi r1, r1, #0 @ loops below use unsigned.
- movs ip, r0 @ preserve sign of dividend
- rsbmi r0, r0, #0 @ if negative make positive
- subs r2, r1, #1 @ compare divisor with 1
- cmpne r0, r1 @ compare dividend with divisor
- moveq r0, #0
- tsthi r1, r2 @ see if divisor is power of 2
- andeq r0, r0, r2
- bls 10f
+ cmp r1, #0
+ beq Ldiv0
+ rsbmi r1, r1, #0 @ loops below use unsigned.
+ movs ip, r0 @ preserve sign of dividend
+ rsbmi r0, r0, #0 @ if negative make positive
+ subs r2, r1, #1 @ compare divisor with 1
+ cmpne r0, r1 @ compare dividend with divisor
+ moveq r0, #0
+ tsthi r1, r2 @ see if divisor is power of 2
+ andeq r0, r0, r2
+ bls 10f
- ARM_MOD_BODY r0, r1, r2, r3
+ ARM_MOD_BODY r0, r1, r2, r3
-10: cmp ip, #0
- rsbmi r0, r0, #0
- mov pc, lr
+10: cmp ip, #0
+ rsbmi r0, r0, #0
+ mov pc, lr
UNWIND(.fnend)
ENDPROC(__modsi3)
@@ -323,56 +323,56 @@ ENDPROC(__modsi3)
ENTRY(__aeabi_uidivmod)
UNWIND(.fnstart)
-UNWIND(.save {r0, r1, ip, lr} )
+UNWIND(.save {r0, r1, ip, lr} )
- stmfd sp!, {r0, r1, ip, lr}
- bl __aeabi_uidiv
- ldmfd sp!, {r1, r2, ip, lr}
- mul r3, r0, r2
- sub r1, r1, r3
- mov pc, lr
+ stmfd sp!, {r0, r1, ip, lr}
+ bl __aeabi_uidiv
+ ldmfd sp!, {r1, r2, ip, lr}
+ mul r3, r0, r2
+ sub r1, r1, r3
+ mov pc, lr
UNWIND(.fnend)
ENDPROC(__aeabi_uidivmod)
ENTRY(__aeabi_idivmod)
UNWIND(.fnstart)
-UNWIND(.save {r0, r1, ip, lr} )
- stmfd sp!, {r0, r1, ip, lr}
- bl __aeabi_idiv
- ldmfd sp!, {r1, r2, ip, lr}
- mul r3, r0, r2
- sub r1, r1, r3
- mov pc, lr
+UNWIND(.save {r0, r1, ip, lr} )
+ stmfd sp!, {r0, r1, ip, lr}
+ bl __aeabi_idiv
+ ldmfd sp!, {r1, r2, ip, lr}
+ mul r3, r0, r2
+ sub r1, r1, r3
+ mov pc, lr
UNWIND(.fnend)
ENDPROC(__aeabi_idivmod)
ENTRY(__aeabi_uldivmod)
UNWIND(.fnstart)
-UNWIND(.save {lr} )
- sub sp, sp, #8
- stmfd sp!, {sp, lr}
- bl __qdivrem
- ldr lr, [sp, #4]
- add sp, sp, #8
- ldmfd sp!, {r2, r3}
- mov pc, lr
+UNWIND(.save {lr} )
+ sub sp, sp, #8
+ stmfd sp!, {sp, lr}
+ bl __qdivrem
+ ldr lr, [sp, #4]
+ add sp, sp, #8
+ ldmfd sp!, {r2, r3}
+ mov pc, lr
UNWIND(.fnend)
ENDPROC(__aeabi_uldivmod)
ENTRY(__aeabi_ldivmod)
UNWIND(.fnstart)
-UNWIND(.save {lr} )
- sub sp, sp, #16
- stmfd sp!, {sp, lr}
- bl __ldivmod_helper
- ldr lr, [sp, #4]
- add sp, sp, #16
- ldmfd sp!, {r2, r3}
- mov pc, lr
-
+UNWIND(.save {lr} )
+ sub sp, sp, #16
+ stmfd sp!, {sp, lr}
+ bl __ldivmod_helper
+ ldr lr, [sp, #4]
+ add sp, sp, #16
+ ldmfd sp!, {r2, r3}
+ mov pc, lr
+
UNWIND(.fnend)
ENDPROC(__aeabi_ldivmod)
#endif
@@ -381,9 +381,9 @@ Ldiv0:
UNWIND(.fnstart)
UNWIND(.pad #4)
UNWIND(.save {lr})
- str lr, [sp, #-8]!
- bl __div0
- mov r0, #0 @ About as wrong as it could be.
- ldr pc, [sp], #8
+ str lr, [sp, #-8]!
+ bl __div0
+ mov r0, #0 @ About as wrong as it could be.
+ ldr pc, [sp], #8
UNWIND(.fnend)
ENDPROC(Ldiv0)
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/arch/arm/lib/memcpy.S
--- a/xen/arch/arm/lib/memcpy.S Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/arch/arm/lib/memcpy.S Mon Feb 13 17:26:08 2012 +0000
@@ -1,9 +1,9 @@
/*
* linux/arch/arm/lib/memcpy.S
*
- * Author: Nicolas Pitre
- * Created: Sep 28, 2005
- * Copyright: MontaVista Software, Inc.
+ * Author: Nicolas Pitre
+ * Created: Sep 28, 2005
+ * Copyright: MontaVista Software, Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -13,46 +13,46 @@
#include <xen/config.h>
#include "assembler.h"
-#define LDR1W_SHIFT 0
-#define STR1W_SHIFT 0
+#define LDR1W_SHIFT 0
+#define STR1W_SHIFT 0
- .macro ldr1w ptr reg abort
- W(ldr) \reg, [\ptr], #4
- .endm
+ .macro ldr1w ptr reg abort
+ W(ldr) \reg, [\ptr], #4
+ .endm
- .macro ldr4w ptr reg1 reg2 reg3 reg4 abort
- ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4}
- .endm
+ .macro ldr4w ptr reg1 reg2 reg3 reg4 abort
+ ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4}
+ .endm
- .macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
- ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
- .endm
+ .macro ldr8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+ ldmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
+ .endm
- .macro ldr1b ptr reg cond=al abort
- ldr\cond\()b \reg, [\ptr], #1
- .endm
+ .macro ldr1b ptr reg cond=al abort
+ ldr\cond\()b \reg, [\ptr], #1
+ .endm
- .macro str1w ptr reg abort
- W(str) \reg, [\ptr], #4
- .endm
+ .macro str1w ptr reg abort
+ W(str) \reg, [\ptr], #4
+ .endm
- .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
- stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
- .endm
+ .macro str8w ptr reg1 reg2 reg3 reg4 reg5 reg6 reg7 reg8 abort
+ stmia \ptr!, {\reg1, \reg2, \reg3, \reg4, \reg5, \reg6, \reg7, \reg8}
+ .endm
- .macro str1b ptr reg cond=al abort
- str\cond\()b \reg, [\ptr], #1
- .endm
+ .macro str1b ptr reg cond=al abort
+ str\cond\()b \reg, [\ptr], #1
+ .endm
- .macro enter reg1 reg2
- stmdb sp!, {r0, \reg1, \reg2}
- .endm
+ .macro enter reg1 reg2
+ stmdb sp!, {r0, \reg1, \reg2}
+ .endm
- .macro exit reg1 reg2
- ldmfd sp!, {r0, \reg1, \reg2}
- .endm
+ .macro exit reg1 reg2
+ ldmfd sp!, {r0, \reg1, \reg2}
+ .endm
- .text
+ .text
/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/arch/arm/lib/memmove.S
--- a/xen/arch/arm/lib/memmove.S Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/arch/arm/lib/memmove.S Mon Feb 13 17:26:08 2012 +0000
@@ -1,9 +1,9 @@
/*
* linux/arch/arm/lib/memmove.S
*
- * Author: Nicolas Pitre
- * Created: Sep 28, 2005
- * Copyright: (C) MontaVista Software Inc.
+ * Author: Nicolas Pitre
+ * Created: Sep 28, 2005
+ * Copyright: (C) MontaVista Software Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -14,7 +14,7 @@
#include "assembler.h"
- .text
+ .text
/*
* Prototype: void *memmove(void *dest, const void *src, size_t n);
@@ -29,172 +29,172 @@
ENTRY(memmove)
- subs ip, r0, r1
- cmphi r2, ip
- bls memcpy
+ subs ip, r0, r1
+ cmphi r2, ip
+ bls memcpy
- stmfd sp!, {r0, r4, lr}
- add r1, r1, r2
- add r0, r0, r2
- subs r2, r2, #4
- blt 8f
- ands ip, r0, #3
- PLD( pld [r1, #-4] )
- bne 9f
- ands ip, r1, #3
- bne 10f
+ stmfd sp!, {r0, r4, lr}
+ add r1, r1, r2
+ add r0, r0, r2
+ subs r2, r2, #4
+ blt 8f
+ ands ip, r0, #3
+ PLD( pld [r1, #-4] )
+ bne 9f
+ ands ip, r1, #3
+ bne 10f
-1: subs r2, r2, #(28)
- stmfd sp!, {r5 - r8}
- blt 5f
+1: subs r2, r2, #(28)
+ stmfd sp!, {r5 - r8}
+ blt 5f
- CALGN( ands ip, r0, #31 )
- CALGN( sbcnes r4, ip, r2 ) @ C is always set here
- CALGN( bcs 2f )
- CALGN( adr r4, 6f )
- CALGN( subs r2, r2, ip ) @ C is set here
- CALGN( rsb ip, ip, #32 )
- CALGN( add pc, r4, ip )
+ CALGN( ands ip, r0, #31 )
+ CALGN( sbcnes r4, ip, r2 ) @ C is always set here
+ CALGN( bcs 2f )
+ CALGN( adr r4, 6f )
+ CALGN( subs r2, r2, ip ) @ C is set here
+ CALGN( rsb ip, ip, #32 )
+ CALGN( add pc, r4, ip )
- PLD( pld [r1, #-4] )
-2: PLD( subs r2, r2, #96 )
- PLD( pld [r1, #-32] )
- PLD( blt 4f )
- PLD( pld [r1, #-64] )
- PLD( pld [r1, #-96] )
+ PLD( pld [r1, #-4] )
+2: PLD( subs r2, r2, #96 )
+ PLD( pld [r1, #-32] )
+ PLD( blt 4f )
+ PLD( pld [r1, #-64] )
+ PLD( pld [r1, #-96] )
-3: PLD( pld [r1, #-128] )
-4: ldmdb r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
- subs r2, r2, #32
- stmdb r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
- bge 3b
- PLD( cmn r2, #96 )
- PLD( bge 4b )
+3: PLD( pld [r1, #-128] )
+4: ldmdb r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
+ subs r2, r2, #32
+ stmdb r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
+ bge 3b
+ PLD( cmn r2, #96 )
+ PLD( bge 4b )
-5: ands ip, r2, #28
- rsb ip, ip, #32
- addne pc, pc, ip @ C is always clear here
- b 7f
-6: W(nop)
- W(ldr) r3, [r1, #-4]!
- W(ldr) r4, [r1, #-4]!
- W(ldr) r5, [r1, #-4]!
- W(ldr) r6, [r1, #-4]!
- W(ldr) r7, [r1, #-4]!
- W(ldr) r8, [r1, #-4]!
- W(ldr) lr, [r1, #-4]!
+5: ands ip, r2, #28
+ rsb ip, ip, #32
+ addne pc, pc, ip @ C is always clear here
+ b 7f
+6: W(nop)
+ W(ldr) r3, [r1, #-4]!
+ W(ldr) r4, [r1, #-4]!
+ W(ldr) r5, [r1, #-4]!
+ W(ldr) r6, [r1, #-4]!
+ W(ldr) r7, [r1, #-4]!
+ W(ldr) r8, [r1, #-4]!
+ W(ldr) lr, [r1, #-4]!
- add pc, pc, ip
- nop
- W(nop)
- W(str) r3, [r0, #-4]!
- W(str) r4, [r0, #-4]!
- W(str) r5, [r0, #-4]!
- W(str) r6, [r0, #-4]!
- W(str) r7, [r0, #-4]!
- W(str) r8, [r0, #-4]!
- W(str) lr, [r0, #-4]!
+ add pc, pc, ip
+ nop
+ W(nop)
+ W(str) r3, [r0, #-4]!
+ W(str) r4, [r0, #-4]!
+ W(str) r5, [r0, #-4]!
+ W(str) r6, [r0, #-4]!
+ W(str) r7, [r0, #-4]!
+ W(str) r8, [r0, #-4]!
+ W(str) lr, [r0, #-4]!
- CALGN( bcs 2b )
+ CALGN( bcs 2b )
-7: ldmfd sp!, {r5 - r8}
+7: ldmfd sp!, {r5 - r8}
-8: movs r2, r2, lsl #31
- ldrneb r3, [r1, #-1]!
- ldrcsb r4, [r1, #-1]!
- ldrcsb ip, [r1, #-1]
- strneb r3, [r0, #-1]!
- strcsb r4, [r0, #-1]!
- strcsb ip, [r0, #-1]
- ldmfd sp!, {r0, r4, pc}
+8: movs r2, r2, lsl #31
+ ldrneb r3, [r1, #-1]!
+ ldrcsb r4, [r1, #-1]!
+ ldrcsb ip, [r1, #-1]
+ strneb r3, [r0, #-1]!
+ strcsb r4, [r0, #-1]!
+ strcsb ip, [r0, #-1]
+ ldmfd sp!, {r0, r4, pc}
-9: cmp ip, #2
- ldrgtb r3, [r1, #-1]!
- ldrgeb r4, [r1, #-1]!
- ldrb lr, [r1, #-1]!
- strgtb r3, [r0, #-1]!
- strgeb r4, [r0, #-1]!
- subs r2, r2, ip
- strb lr, [r0, #-1]!
- blt 8b
- ands ip, r1, #3
- beq 1b
+9: cmp ip, #2
+ ldrgtb r3, [r1, #-1]!
+ ldrgeb r4, [r1, #-1]!
+ ldrb lr, [r1, #-1]!
+ strgtb r3, [r0, #-1]!
+ strgeb r4, [r0, #-1]!
+ subs r2, r2, ip
+ strb lr, [r0, #-1]!
+ blt 8b
+ ands ip, r1, #3
+ beq 1b
-10: bic r1, r1, #3
- cmp ip, #2
- ldr r3, [r1, #0]
- beq 17f
- blt 18f
+10: bic r1, r1, #3
+ cmp ip, #2
+ ldr r3, [r1, #0]
+ beq 17f
+ blt 18f
- .macro backward_copy_shift push pull
+ .macro backward_copy_shift push pull
- subs r2, r2, #28
- blt 14f
+ subs r2, r2, #28
+ blt 14f
- CALGN( ands ip, r0, #31 )
- CALGN( sbcnes r4, ip, r2 ) @ C is always set here
- CALGN( subcc r2, r2, ip )
- CALGN( bcc 15f )
+ CALGN( ands ip, r0, #31 )
+ CALGN( sbcnes r4, ip, r2 ) @ C is always set here
+ CALGN( subcc r2, r2, ip )
+ CALGN( bcc 15f )
-11: stmfd sp!, {r5 - r9}
+11: stmfd sp!, {r5 - r9}
- PLD( pld [r1, #-4] )
- PLD( subs r2, r2, #96 )
- PLD( pld [r1, #-32] )
- PLD( blt 13f )
- PLD( pld [r1, #-64] )
- PLD( pld [r1, #-96] )
+ PLD( pld [r1, #-4] )
+ PLD( subs r2, r2, #96 )
+ PLD( pld [r1, #-32] )
+ PLD( blt 13f )
+ PLD( pld [r1, #-64] )
+ PLD( pld [r1, #-96] )
-12: PLD( pld [r1, #-128] )
-13: ldmdb r1!, {r7, r8, r9, ip}
- mov lr, r3, push #\push
- subs r2, r2, #32
- ldmdb r1!, {r3, r4, r5, r6}
- orr lr, lr, ip, pull #\pull
- mov ip, ip, push #\push
- orr ip, ip, r9, pull #\pull
- mov r9, r9, push #\push
- orr r9, r9, r8, pull #\pull
- mov r8, r8, push #\push
- orr r8, r8, r7, pull #\pull
- mov r7, r7, push #\push
- orr r7, r7, r6, pull #\pull
- mov r6, r6, push #\push
- orr r6, r6, r5, pull #\pull
- mov r5, r5, push #\push
- orr r5, r5, r4, pull #\pull
- mov r4, r4, push #\push
- orr r4, r4, r3, pull #\pull
- stmdb r0!, {r4 - r9, ip, lr}
- bge 12b
- PLD( cmn r2, #96 )
- PLD( bge 13b )
+12: PLD( pld [r1, #-128] )
+13: ldmdb r1!, {r7, r8, r9, ip}
+ mov lr, r3, push #\push
+ subs r2, r2, #32
+ ldmdb r1!, {r3, r4, r5, r6}
+ orr lr, lr, ip, pull #\pull
+ mov ip, ip, push #\push
+ orr ip, ip, r9, pull #\pull
+ mov r9, r9, push #\push
+ orr r9, r9, r8, pull #\pull
+ mov r8, r8, push #\push
+ orr r8, r8, r7, pull #\pull
+ mov r7, r7, push #\push
+ orr r7, r7, r6, pull #\pull
+ mov r6, r6, push #\push
+ orr r6, r6, r5, pull #\pull
+ mov r5, r5, push #\push
+ orr r5, r5, r4, pull #\pull
+ mov r4, r4, push #\push
+ orr r4, r4, r3, pull #\pull
+ stmdb r0!, {r4 - r9, ip, lr}
+ bge 12b
+ PLD( cmn r2, #96 )
+ PLD( bge 13b )
- ldmfd sp!, {r5 - r9}
+ ldmfd sp!, {r5 - r9}
-14: ands ip, r2, #28
- beq 16f
+14: ands ip, r2, #28
+ beq 16f
-15: mov lr, r3, push #\push
- ldr r3, [r1, #-4]!
- subs ip, ip, #4
- orr lr, lr, r3, pull #\pull
- str lr, [r0, #-4]!
- bgt 15b
- CALGN( cmp r2, #0 )
- CALGN( bge 11b )
+15: mov lr, r3, push #\push
+ ldr r3, [r1, #-4]!
+ subs ip, ip, #4
+ orr lr, lr, r3, pull #\pull
+ str lr, [r0, #-4]!
+ bgt 15b
+ CALGN( cmp r2, #0 )
+ CALGN( bge 11b )
-16: add r1, r1, #(\pull / 8)
- b 8b
+16: add r1, r1, #(\pull / 8)
+ b 8b
- .endm
+ .endm
- backward_copy_shift push=8 pull=24
+ backward_copy_shift push=8 pull=24
-17: backward_copy_shift push=16 pull=16
+17: backward_copy_shift push=16 pull=16
-18: backward_copy_shift push=24 pull=8
+18: backward_copy_shift push=24 pull=8
ENDPROC(memmove)
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/arch/arm/lib/memset.S
--- a/xen/arch/arm/lib/memset.S Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/arch/arm/lib/memset.S Mon Feb 13 17:26:08 2012 +0000
@@ -14,33 +14,33 @@
#include "assembler.h"
- .text
- .align 5
- .word 0
+ .text
+ .align 5
+ .word 0
-1: subs r2, r2, #4 @ 1 do we have enough
- blt 5f @ 1 bytes to align with?
- cmp r3, #2 @ 1
- strltb r1, [r0], #1 @ 1
- strleb r1, [r0], #1 @ 1
- strb r1, [r0], #1 @ 1
- add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3))
+1: subs r2, r2, #4 @ 1 do we have enough
+ blt 5f @ 1 bytes to align with?
+ cmp r3, #2 @ 1
+ strltb r1, [r0], #1 @ 1
+ strleb r1, [r0], #1 @ 1
+ strb r1, [r0], #1 @ 1
+ add r2, r2, r3 @ 1 (r2 = r2 - (4 - r3))
/*
* The pointer is now aligned and the length is adjusted. Try doing the
* memset again.
*/
ENTRY(memset)
- ands r3, r0, #3 @ 1 unaligned?
- bne 1b @ 1
+ ands r3, r0, #3 @ 1 unaligned?
+ bne 1b @ 1
/*
* we know that the pointer in r0 is aligned to a word boundary.
*/
- orr r1, r1, r1, lsl #8
- orr r1, r1, r1, lsl #16
- mov r3, r1
- cmp r2, #16
- blt 4f
+ orr r1, r1, r1, lsl #8
+ orr r1, r1, r1, lsl #16
+ mov r3, r1
+ cmp r2, #16
+ blt 4f
#if ! CALGN(1)+0
@@ -48,26 +48,26 @@ ENTRY(memset)
* We need an extra register for this loop - save the return address and
* use the LR
*/
- str lr, [sp, #-4]!
- mov ip, r1
- mov lr, r1
+ str lr, [sp, #-4]!
+ mov ip, r1
+ mov lr, r1
-2: subs r2, r2, #64
- stmgeia r0!, {r1, r3, ip, lr} @ 64 bytes at a time.
- stmgeia r0!, {r1, r3, ip, lr}
- stmgeia r0!, {r1, r3, ip, lr}
- stmgeia r0!, {r1, r3, ip, lr}
- bgt 2b
- ldmeqfd sp!, {pc} @ Now <64 bytes to go.
+2: subs r2, r2, #64
+ stmgeia r0!, {r1, r3, ip, lr} @ 64 bytes at a time.
+ stmgeia r0!, {r1, r3, ip, lr}
+ stmgeia r0!, {r1, r3, ip, lr}
+ stmgeia r0!, {r1, r3, ip, lr}
+ bgt 2b
+ ldmeqfd sp!, {pc} @ Now <64 bytes to go.
/*
* No need to correct the count; we''re only testing bits from now on
*/
- tst r2, #32
- stmneia r0!, {r1, r3, ip, lr}
- stmneia r0!, {r1, r3, ip, lr}
- tst r2, #16
- stmneia r0!, {r1, r3, ip, lr}
- ldr lr, [sp], #4
+ tst r2, #32
+ stmneia r0!, {r1, r3, ip, lr}
+ stmneia r0!, {r1, r3, ip, lr}
+ tst r2, #16
+ stmneia r0!, {r1, r3, ip, lr}
+ ldr lr, [sp], #4
#else
@@ -76,54 +76,54 @@ 2: subs r2, r2, #64
* whole cache lines at once.
*/
- stmfd sp!, {r4-r7, lr}
- mov r4, r1
- mov r5, r1
- mov r6, r1
- mov r7, r1
- mov ip, r1
- mov lr, r1
+ stmfd sp!, {r4-r7, lr}
+ mov r4, r1
+ mov r5, r1
+ mov r6, r1
+ mov r7, r1
+ mov ip, r1
+ mov lr, r1
- cmp r2, #96
- tstgt r0, #31
- ble 3f
+ cmp r2, #96
+ tstgt r0, #31
+ ble 3f
- and ip, r0, #31
- rsb ip, ip, #32
- sub r2, r2, ip
- movs ip, ip, lsl #(32 - 4)
- stmcsia r0!, {r4, r5, r6, r7}
- stmmiia r0!, {r4, r5}
- tst ip, #(1 << 30)
- mov ip, r1
- strne r1, [r0], #4
+ and ip, r0, #31
+ rsb ip, ip, #32
+ sub r2, r2, ip
+ movs ip, ip, lsl #(32 - 4)
+ stmcsia r0!, {r4, r5, r6, r7}
+ stmmiia r0!, {r4, r5}
+ tst ip, #(1 << 30)
+ mov ip, r1
+ strne r1, [r0], #4
-3: subs r2, r2, #64
- stmgeia r0!, {r1, r3-r7, ip, lr}
- stmgeia r0!, {r1, r3-r7, ip, lr}
- bgt 3b
- ldmeqfd sp!, {r4-r7, pc}
+3: subs r2, r2, #64
+ stmgeia r0!, {r1, r3-r7, ip, lr}
+ stmgeia r0!, {r1, r3-r7, ip, lr}
+ bgt 3b
+ ldmeqfd sp!, {r4-r7, pc}
- tst r2, #32
- stmneia r0!, {r1, r3-r7, ip, lr}
- tst r2, #16
- stmneia r0!, {r4-r7}
- ldmfd sp!, {r4-r7, lr}
+ tst r2, #32
+ stmneia r0!, {r1, r3-r7, ip, lr}
+ tst r2, #16
+ stmneia r0!, {r4-r7}
+ ldmfd sp!, {r4-r7, lr}
#endif
-4: tst r2, #8
- stmneia r0!, {r1, r3}
- tst r2, #4
- strne r1, [r0], #4
+4: tst r2, #8
+ stmneia r0!, {r1, r3}
+ tst r2, #4
+ strne r1, [r0], #4
/*
* When we get here, we''ve got less than 4 bytes to zero. We
* may have an unaligned pointer as well.
*/
-5: tst r2, #2
- strneb r1, [r0], #1
- strneb r1, [r0], #1
- tst r2, #1
- strneb r1, [r0], #1
- mov pc, lr
+5: tst r2, #2
+ strneb r1, [r0], #1
+ strneb r1, [r0], #1
+ tst r2, #1
+ strneb r1, [r0], #1
+ mov pc, lr
ENDPROC(memset)
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/arch/arm/lib/memzero.S
--- a/xen/arch/arm/lib/memzero.S Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/arch/arm/lib/memzero.S Mon Feb 13 17:26:08 2012 +0000
@@ -12,35 +12,35 @@
#include "assembler.h"
- .text
- .align 5
- .word 0
+ .text
+ .align 5
+ .word 0
/*
* Align the pointer in r0. r3 contains the number of bytes that we are
* mis-aligned by, and r1 is the number of bytes. If r1 < 4, then we
* don''t bother; we use byte stores instead.
*/
-1: subs r1, r1, #4 @ 1 do we have enough
- blt 5f @ 1 bytes to align with?
- cmp r3, #2 @ 1
- strltb r2, [r0], #1 @ 1
- strleb r2, [r0], #1 @ 1
- strb r2, [r0], #1 @ 1
- add r1, r1, r3 @ 1 (r1 = r1 - (4 - r3))
+1: subs r1, r1, #4 @ 1 do we have enough
+ blt 5f @ 1 bytes to align with?
+ cmp r3, #2 @ 1
+ strltb r2, [r0], #1 @ 1
+ strleb r2, [r0], #1 @ 1
+ strb r2, [r0], #1 @ 1
+ add r1, r1, r3 @ 1 (r1 = r1 - (4 - r3))
/*
* The pointer is now aligned and the length is adjusted. Try doing the
* memzero again.
*/
ENTRY(__memzero)
- mov r2, #0 @ 1
- ands r3, r0, #3 @ 1 unaligned?
- bne 1b @ 1
+ mov r2, #0 @ 1
+ ands r3, r0, #3 @ 1 unaligned?
+ bne 1b @ 1
/*
* r3 = 0, and we know that the pointer in r0 is aligned to a word boundary.
*/
- cmp r1, #16 @ 1 we can skip this chunk if we
- blt 4f @ 1 have < 16 bytes
+ cmp r1, #16 @ 1 we can skip this chunk if we
+ blt 4f @ 1 have < 16 bytes
#if ! CALGN(1)+0
@@ -48,26 +48,26 @@ ENTRY(__memzero)
* We need an extra register for this loop - save the return address and
* use the LR
*/
- str lr, [sp, #-4]! @ 1
- mov ip, r2 @ 1
- mov lr, r2 @ 1
+ str lr, [sp, #-4]! @ 1
+ mov ip, r2 @ 1
+ mov lr, r2 @ 1
-3: subs r1, r1, #64 @ 1 write 32 bytes out per loop
- stmgeia r0!, {r2, r3, ip, lr} @ 4
- stmgeia r0!, {r2, r3, ip, lr} @ 4
- stmgeia r0!, {r2, r3, ip, lr} @ 4
- stmgeia r0!, {r2, r3, ip, lr} @ 4
- bgt 3b @ 1
- ldmeqfd sp!, {pc} @ 1/2 quick exit
+3: subs r1, r1, #64 @ 1 write 32 bytes out per loop
+ stmgeia r0!, {r2, r3, ip, lr} @ 4
+ stmgeia r0!, {r2, r3, ip, lr} @ 4
+ stmgeia r0!, {r2, r3, ip, lr} @ 4
+ stmgeia r0!, {r2, r3, ip, lr} @ 4
+ bgt 3b @ 1
+ ldmeqfd sp!, {pc} @ 1/2 quick exit
/*
* No need to correct the count; we''re only testing bits from now on
*/
- tst r1, #32 @ 1
- stmneia r0!, {r2, r3, ip, lr} @ 4
- stmneia r0!, {r2, r3, ip, lr} @ 4
- tst r1, #16 @ 1 16 bytes or more?
- stmneia r0!, {r2, r3, ip, lr} @ 4
- ldr lr, [sp], #4 @ 1
+ tst r1, #32 @ 1
+ stmneia r0!, {r2, r3, ip, lr} @ 4
+ stmneia r0!, {r2, r3, ip, lr} @ 4
+ tst r1, #16 @ 1 16 bytes or more?
+ stmneia r0!, {r2, r3, ip, lr} @ 4
+ ldr lr, [sp], #4 @ 1
#else
@@ -76,52 +76,52 @@ 3: subs r1, r1, #64 @
* whole cache lines at once.
*/
- stmfd sp!, {r4-r7, lr}
- mov r4, r2
- mov r5, r2
- mov r6, r2
- mov r7, r2
- mov ip, r2
- mov lr, r2
+ stmfd sp!, {r4-r7, lr}
+ mov r4, r2
+ mov r5, r2
+ mov r6, r2
+ mov r7, r2
+ mov ip, r2
+ mov lr, r2
- cmp r1, #96
- andgts ip, r0, #31
- ble 3f
+ cmp r1, #96
+ andgts ip, r0, #31
+ ble 3f
- rsb ip, ip, #32
- sub r1, r1, ip
- movs ip, ip, lsl #(32 - 4)
- stmcsia r0!, {r4, r5, r6, r7}
- stmmiia r0!, {r4, r5}
- movs ip, ip, lsl #2
- strcs r2, [r0], #4
+ rsb ip, ip, #32
+ sub r1, r1, ip
+ movs ip, ip, lsl #(32 - 4)
+ stmcsia r0!, {r4, r5, r6, r7}
+ stmmiia r0!, {r4, r5}
+ movs ip, ip, lsl #2
+ strcs r2, [r0], #4
-3: subs r1, r1, #64
- stmgeia r0!, {r2-r7, ip, lr}
- stmgeia r0!, {r2-r7, ip, lr}
- bgt 3b
- ldmeqfd sp!, {r4-r7, pc}
+3: subs r1, r1, #64
+ stmgeia r0!, {r2-r7, ip, lr}
+ stmgeia r0!, {r2-r7, ip, lr}
+ bgt 3b
+ ldmeqfd sp!, {r4-r7, pc}
- tst r1, #32
- stmneia r0!, {r2-r7, ip, lr}
- tst r1, #16
- stmneia r0!, {r4-r7}
- ldmfd sp!, {r4-r7, lr}
+ tst r1, #32
+ stmneia r0!, {r2-r7, ip, lr}
+ tst r1, #16
+ stmneia r0!, {r4-r7}
+ ldmfd sp!, {r4-r7, lr}
#endif
-4: tst r1, #8 @ 1 8 bytes or more?
- stmneia r0!, {r2, r3} @ 2
- tst r1, #4 @ 1 4 bytes or more?
- strne r2, [r0], #4 @ 1
+4: tst r1, #8 @ 1 8 bytes or more?
+ stmneia r0!, {r2, r3} @ 2
+ tst r1, #4 @ 1 4 bytes or more?
+ strne r2, [r0], #4 @ 1
/*
* When we get here, we''ve got less than 4 bytes to zero. We
* may have an unaligned pointer as well.
*/
-5: tst r1, #2 @ 1 2 bytes or more?
- strneb r2, [r0], #1 @ 1
- strneb r2, [r0], #1 @ 1
- tst r1, #1 @ 1 a byte left over
- strneb r2, [r0], #1 @ 1
- mov pc, lr @ 1
+5: tst r1, #2 @ 1 2 bytes or more?
+ strneb r2, [r0], #1 @ 1
+ strneb r2, [r0], #1 @ 1
+ tst r1, #1 @ 1 a byte left over
+ strneb r2, [r0], #1 @ 1
+ mov pc, lr @ 1
ENDPROC(__memzero)
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/arch/arm/lib/setbit.S
--- a/xen/arch/arm/lib/setbit.S Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/arch/arm/lib/setbit.S Mon Feb 13 17:26:08 2012 +0000
@@ -11,8 +11,8 @@
#include "assembler.h"
#include "bitops.h"
- .text
+ .text
ENTRY(_set_bit)
- bitop orr
+ bitop orr
ENDPROC(_set_bit)
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/arch/arm/lib/testchangebit.S
--- a/xen/arch/arm/lib/testchangebit.S Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/arch/arm/lib/testchangebit.S Mon Feb 13 17:26:08 2012 +0000
@@ -14,5 +14,5 @@
.text
ENTRY(_test_and_change_bit)
- testop eor, str
+ testop eor, str
ENDPROC(_test_and_change_bit)
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/arch/arm/lib/testclearbit.S
--- a/xen/arch/arm/lib/testclearbit.S Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/arch/arm/lib/testclearbit.S Mon Feb 13 17:26:08 2012 +0000
@@ -14,5 +14,5 @@
.text
ENTRY(_test_and_clear_bit)
- testop bicne, strne
+ testop bicne, strne
ENDPROC(_test_and_clear_bit)
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/arch/arm/lib/testsetbit.S
--- a/xen/arch/arm/lib/testsetbit.S Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/arch/arm/lib/testsetbit.S Mon Feb 13 17:26:08 2012 +0000
@@ -14,5 +14,5 @@
.text
ENTRY(_test_and_set_bit)
- testop orreq, streq
+ testop orreq, streq
ENDPROC(_test_and_set_bit)
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/include/asm-arm/bitops.h
--- a/xen/include/asm-arm/bitops.h Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/include/asm-arm/bitops.h Mon Feb 13 17:26:08 2012 +0000
@@ -115,19 +115,19 @@ extern int _find_next_bit_be(const unsig
/*
* These are the little endian, atomic definitions.
*/
-#define find_first_zero_bit(p,sz) _find_first_zero_bit_le(p,sz)
-#define find_next_zero_bit(p,sz,off) _find_next_zero_bit_le(p,sz,off)
-#define find_first_bit(p,sz) _find_first_bit_le(p,sz)
-#define find_next_bit(p,sz,off) _find_next_bit_le(p,sz,off)
+#define find_first_zero_bit(p,sz) _find_first_zero_bit_le(p,sz)
+#define find_next_zero_bit(p,sz,off) _find_next_zero_bit_le(p,sz,off)
+#define find_first_bit(p,sz) _find_first_bit_le(p,sz)
+#define find_next_bit(p,sz,off) _find_next_bit_le(p,sz,off)
#else
/*
* These are the big endian, atomic definitions.
*/
-#define find_first_zero_bit(p,sz) _find_first_zero_bit_be(p,sz)
-#define find_next_zero_bit(p,sz,off) _find_next_zero_bit_be(p,sz,off)
-#define find_first_bit(p,sz) _find_first_bit_be(p,sz)
-#define find_next_bit(p,sz,off) _find_next_bit_be(p,sz,off)
+#define find_first_zero_bit(p,sz) _find_first_zero_bit_be(p,sz)
+#define find_next_zero_bit(p,sz,off) _find_next_zero_bit_be(p,sz,off)
+#define find_first_bit(p,sz) _find_first_bit_be(p,sz)
+#define find_next_bit(p,sz,off) _find_next_bit_be(p,sz,off)
#endif
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/include/asm-arm/div64.h
--- a/xen/include/asm-arm/div64.h Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/include/asm-arm/div64.h Mon Feb 13 17:26:08 2012 +0000
@@ -10,9 +10,9 @@
*
* uint32_t do_div(uint64_t *n, uint32_t base)
* {
- * uint32_t remainder = *n % base;
- * *n = *n / base;
- * return remainder;
+ * uint32_t remainder = *n % base;
+ * *n = *n / base;
+ * return remainder;
* }
*
* In other words, a 64-bit dividend with a 32-bit divisor producing
@@ -29,22 +29,22 @@
#define __xh "r1"
#endif
-#define __do_div_asm(n, base) \
-({ \
- register unsigned int __base asm("r4") = base; \
- register unsigned long long __n asm("r0") = n; \
- register unsigned long long __res asm("r2"); \
- register unsigned int __rem asm(__xh); \
- asm( __asmeq("%0", __xh) \
- __asmeq("%1", "r2")
\
- __asmeq("%2", "r0")
\
- __asmeq("%3", "r4")
\
- "bl __do_div64" \
- : "=r" (__rem), "=r" (__res)
\
- : "r" (__n), "r" (__base)
\
- : "ip", "lr", "cc");
\
- n = __res; \
- __rem; \
+#define __do_div_asm(n, base) \
+({ \
+ register unsigned int __base asm("r4") = base; \
+ register unsigned long long __n asm("r0") = n; \
+ register unsigned long long __res asm("r2"); \
+ register unsigned int __rem asm(__xh); \
+ asm( __asmeq("%0", __xh) \
+ __asmeq("%1", "r2") \
+ __asmeq("%2", "r0") \
+ __asmeq("%3", "r4") \
+ "bl __do_div64" \
+ : "=r" (__rem), "=r" (__res) \
+ : "r" (__n), "r" (__base) \
+ : "ip", "lr", "cc"); \
+ n = __res; \
+ __rem; \
})
#if __GNUC__ < 4
@@ -71,155 +71,155 @@
* sufficiently recent to perform proper long long constant propagation.
* (It is unfortunate that gcc doesn''t perform all this internally.)
*/
-#define do_div(n, base)
\
-({ \
- unsigned int __r, __b = (base); \
- if (!__builtin_constant_p(__b) || __b == 0) { \
- /* non-constant divisor (or zero): slow path */ \
- __r = __do_div_asm(n, __b); \
- } else if ((__b & (__b - 1)) == 0) { \
- /* Trivial: __b is constant and a power of 2 */ \
- /* gcc does the right thing with this code. */ \
- __r = n; \
- __r &= (__b - 1); \
- n /= __b; \
- } else { \
- /* Multiply by inverse of __b: n/b = n*(p/b)/p */ \
- /* We rely on the fact that most of this code gets */ \
- /* optimized away at compile time due to constant */ \
- /* propagation and only a couple inline assembly */ \
- /* instructions should remain. Better avoid any */ \
- /* code construct that might prevent that. */ \
- unsigned long long __res, __x, __t, __m, __n = n; \
- unsigned int __c, __p, __z = 0; \
- /* preserve low part of n for reminder computation */ \
- __r = __n; \
- /* determine number of bits to represent __b */ \
- __p = 1 << __div64_fls(__b); \
- /* compute __m = ((__p << 64) + __b - 1) / __b */ \
- __m = (~0ULL / __b) * __p; \
- __m += (((~0ULL % __b + 1) * __p) + __b - 1) / __b; \
- /* compute __res = __m*(~0ULL/__b*__b-1)/(__p << 64) */ \
- __x = ~0ULL / __b * __b - 1; \
- __res = (__m & 0xffffffff) * (__x & 0xffffffff);
\
- __res >>= 32; \
- __res += (__m & 0xffffffff) * (__x >> 32);
\
- __t = __res; \
- __res += (__x & 0xffffffff) * (__m >> 32);
\
- __t = (__res < __t) ? (1ULL << 32) : 0;
\
- __res = (__res >> 32) + __t; \
- __res += (__m >> 32) * (__x >> 32);
\
- __res /= __p; \
- /* Now sanitize and optimize what we''ve got. */
\
- if (~0ULL % (__b / (__b & -__b)) == 0) { \
- /* those cases can be simplified with: */ \
- __n /= (__b & -__b); \
- __m = ~0ULL / (__b / (__b & -__b)); \
- __p = 1; \
- __c = 1; \
- } else if (__res != __x / __b) { \
- /* We can''t get away without a correction */
\
- /* to compensate for bit truncation errors. */ \
- /* To avoid it we''d need an additional bit */
\
- /* to represent __m which would overflow it. */ \
- /* Instead we do m=p/b and n/b=(n*m+m)/p. */ \
- __c = 1; \
- /* Compute __m = (__p << 64) / __b */ \
- __m = (~0ULL / __b) * __p; \
- __m += ((~0ULL % __b + 1) * __p) / __b; \
- } else { \
- /* Reduce __m/__p, and try to clear bit 31 */ \
- /* of __m when possible otherwise that''ll */
\
- /* need extra overflow handling later. */ \
- unsigned int __bits = -(__m & -__m); \
- __bits |= __m >> 32; \
- __bits = (~__bits) << 1; \
- /* If __bits == 0 then setting bit 31 is */ \
- /* unavoidable. Simply apply the maximum */ \
- /* possible reduction in that case. */ \
- /* Otherwise the MSB of __bits indicates the */ \
- /* best reduction we should apply. */ \
- if (!__bits) { \
- __p /= (__m & -__m); \
- __m /= (__m & -__m); \
- } else { \
- __p >>= __div64_fls(__bits); \
- __m >>= __div64_fls(__bits); \
- } \
- /* No correction needed. */ \
- __c = 0; \
- } \
- /* Now we have a combination of 2 conditions: */ \
- /* 1) whether or not we need a correction (__c), and */ \
- /* 2) whether or not there might be an overflow in */ \
- /* the cross product (__m & ((1<<63) |
(1<<31))) */ \
- /* Select the best insn combination to perform the */ \
- /* actual __m * __n / (__p << 64) operation. */ \
- if (!__c) { \
- asm ( "umull %Q0, %R0, %1, %Q2\n\t"
\
- "mov %Q0, #0"
\
- : "=&r" (__res)
\
- : "r" (__m), "r" (__n)
\
- : "cc" );
\
- } else if (!(__m & ((1ULL << 63) | (1ULL <<
31)))) { \
- __res = __m; \
- asm ( "umlal %Q0, %R0, %Q1, %Q2\n\t"
\
- "mov %Q0, #0"
\
- : "+&r" (__res)
\
- : "r" (__m), "r" (__n)
\
- : "cc" );
\
- } else { \
- asm ( "umull %Q0, %R0, %Q1, %Q2\n\t"
\
- "cmn %Q0, %Q1\n\t"
\
- "adcs %R0, %R0, %R1\n\t"
\
- "adc %Q0, %3, #0"
\
- : "=&r" (__res)
\
- : "r" (__m), "r" (__n),
"r" (__z) \
- : "cc" );
\
- } \
- if (!(__m & ((1ULL << 63) | (1ULL << 31)))) {
\
- asm ( "umlal %R0, %Q0, %R1, %Q2\n\t"
\
- "umlal %R0, %Q0, %Q1, %R2\n\t"
\
- "mov %R0, #0\n\t"
\
- "umlal %Q0, %R0, %R1, %R2"
\
- : "+&r" (__res)
\
- : "r" (__m), "r" (__n)
\
- : "cc" );
\
- } else { \
- asm ( "umlal %R0, %Q0, %R2, %Q3\n\t"
\
- "umlal %R0, %1, %Q2, %R3\n\t"
\
- "mov %R0, #0\n\t"
\
- "adds %Q0, %1, %Q0\n\t"
\
- "adc %R0, %R0, #0\n\t"
\
- "umlal %Q0, %R0, %R2, %R3"
\
- : "+&r" (__res),
"+&r" (__z) \
- : "r" (__m), "r" (__n)
\
- : "cc" );
\
- } \
- __res /= __p; \
- /* The reminder can be computed with 32-bit regs */ \
- /* only, and gcc is good at that. */ \
- { \
- unsigned int __res0 = __res; \
- unsigned int __b0 = __b; \
- __r -= __res0 * __b0; \
- } \
- /* BUG_ON(__r >= __b || __res * __b + __r != n); */ \
- n = __res; \
- } \
- __r; \
+#define do_div(n, base) \
+({ \
+ unsigned int __r, __b = (base); \
+ if (!__builtin_constant_p(__b) || __b == 0) { \
+ /* non-constant divisor (or zero): slow path */ \
+ __r = __do_div_asm(n, __b); \
+ } else if ((__b & (__b - 1)) == 0) { \
+ /* Trivial: __b is constant and a power of 2 */ \
+ /* gcc does the right thing with this code. */ \
+ __r = n; \
+ __r &= (__b - 1); \
+ n /= __b; \
+ } else { \
+ /* Multiply by inverse of __b: n/b = n*(p/b)/p */ \
+ /* We rely on the fact that most of this code gets */ \
+ /* optimized away at compile time due to constant */ \
+ /* propagation and only a couple inline assembly */ \
+ /* instructions should remain. Better avoid any */ \
+ /* code construct that might prevent that. */ \
+ unsigned long long __res, __x, __t, __m, __n = n; \
+ unsigned int __c, __p, __z = 0; \
+ /* preserve low part of n for reminder computation */ \
+ __r = __n; \
+ /* determine number of bits to represent __b */ \
+ __p = 1 << __div64_fls(__b); \
+ /* compute __m = ((__p << 64) + __b - 1) / __b */ \
+ __m = (~0ULL / __b) * __p; \
+ __m += (((~0ULL % __b + 1) * __p) + __b - 1) / __b; \
+ /* compute __res = __m*(~0ULL/__b*__b-1)/(__p << 64) */ \
+ __x = ~0ULL / __b * __b - 1; \
+ __res = (__m & 0xffffffff) * (__x & 0xffffffff); \
+ __res >>= 32; \
+ __res += (__m & 0xffffffff) * (__x >> 32); \
+ __t = __res; \
+ __res += (__x & 0xffffffff) * (__m >> 32); \
+ __t = (__res < __t) ? (1ULL << 32) : 0; \
+ __res = (__res >> 32) + __t; \
+ __res += (__m >> 32) * (__x >> 32); \
+ __res /= __p; \
+ /* Now sanitize and optimize what we''ve got. */ \
+ if (~0ULL % (__b / (__b & -__b)) == 0) { \
+ /* those cases can be simplified with: */ \
+ __n /= (__b & -__b); \
+ __m = ~0ULL / (__b / (__b & -__b)); \
+ __p = 1; \
+ __c = 1; \
+ } else if (__res != __x / __b) { \
+ /* We can''t get away without a correction */ \
+ /* to compensate for bit truncation errors. */ \
+ /* To avoid it we''d need an additional bit */ \
+ /* to represent __m which would overflow it. */ \
+ /* Instead we do m=p/b and n/b=(n*m+m)/p. */ \
+ __c = 1; \
+ /* Compute __m = (__p << 64) / __b */ \
+ __m = (~0ULL / __b) * __p; \
+ __m += ((~0ULL % __b + 1) * __p) / __b; \
+ } else { \
+ /* Reduce __m/__p, and try to clear bit 31 */ \
+ /* of __m when possible otherwise that''ll */ \
+ /* need extra overflow handling later. */ \
+ unsigned int __bits = -(__m & -__m); \
+ __bits |= __m >> 32; \
+ __bits = (~__bits) << 1; \
+ /* If __bits == 0 then setting bit 31 is */ \
+ /* unavoidable. Simply apply the maximum */ \
+ /* possible reduction in that case. */ \
+ /* Otherwise the MSB of __bits indicates the */ \
+ /* best reduction we should apply. */ \
+ if (!__bits) { \
+ __p /= (__m & -__m); \
+ __m /= (__m & -__m); \
+ } else { \
+ __p >>= __div64_fls(__bits); \
+ __m >>= __div64_fls(__bits); \
+ } \
+ /* No correction needed. */ \
+ __c = 0; \
+ } \
+ /* Now we have a combination of 2 conditions: */ \
+ /* 1) whether or not we need a correction (__c), and */ \
+ /* 2) whether or not there might be an overflow in */ \
+ /* the cross product (__m & ((1<<63) | (1<<31))) */ \
+ /* Select the best insn combination to perform the */ \
+ /* actual __m * __n / (__p << 64) operation. */ \
+ if (!__c) { \
+ asm ( "umull %Q0, %R0, %1, %Q2\n\t" \
+ "mov %Q0, #0" \
+ : "=&r" (__res) \
+ : "r" (__m), "r" (__n) \
+ : "cc" ); \
+ } else if (!(__m & ((1ULL << 63) | (1ULL << 31)))) { \
+ __res = __m; \
+ asm ( "umlal %Q0, %R0, %Q1, %Q2\n\t" \
+ "mov %Q0, #0" \
+ : "+&r" (__res) \
+ : "r" (__m), "r" (__n) \
+ : "cc" ); \
+ } else { \
+ asm ( "umull %Q0, %R0, %Q1, %Q2\n\t" \
+ "cmn %Q0, %Q1\n\t" \
+ "adcs %R0, %R0, %R1\n\t" \
+ "adc %Q0, %3, #0" \
+ : "=&r" (__res) \
+ : "r" (__m), "r" (__n), "r" (__z) \
+ : "cc" ); \
+ } \
+ if (!(__m & ((1ULL << 63) | (1ULL << 31)))) { \
+ asm ( "umlal %R0, %Q0, %R1, %Q2\n\t" \
+ "umlal %R0, %Q0, %Q1, %R2\n\t" \
+ "mov %R0, #0\n\t" \
+ "umlal %Q0, %R0, %R1, %R2" \
+ : "+&r" (__res) \
+ : "r" (__m), "r" (__n) \
+ : "cc" ); \
+ } else { \
+ asm ( "umlal %R0, %Q0, %R2, %Q3\n\t" \
+ "umlal %R0, %1, %Q2, %R3\n\t" \
+ "mov %R0, #0\n\t" \
+ "adds %Q0, %1, %Q0\n\t" \
+ "adc %R0, %R0, #0\n\t" \
+ "umlal %Q0, %R0, %R2, %R3" \
+ : "+&r" (__res), "+&r" (__z) \
+ : "r" (__m), "r" (__n) \
+ : "cc" ); \
+ } \
+ __res /= __p; \
+ /* The reminder can be computed with 32-bit regs */ \
+ /* only, and gcc is good at that. */ \
+ { \
+ unsigned int __res0 = __res; \
+ unsigned int __b0 = __b; \
+ __r -= __res0 * __b0; \
+ } \
+ /* BUG_ON(__r >= __b || __res * __b + __r != n); */ \
+ n = __res; \
+ } \
+ __r; \
})
/* our own fls implementation to make sure constant propagation is fine */
-#define __div64_fls(bits) \
-({ \
- unsigned int __left = (bits), __nr = 0; \
- if (__left & 0xffff0000) __nr += 16, __left >>= 16;
\
- if (__left & 0x0000ff00) __nr += 8, __left >>= 8;
\
- if (__left & 0x000000f0) __nr += 4, __left >>= 4;
\
- if (__left & 0x0000000c) __nr += 2, __left >>= 2;
\
- if (__left & 0x00000002) __nr += 1; \
- __nr; \
+#define __div64_fls(bits) \
+({ \
+ unsigned int __left = (bits), __nr = 0; \
+ if (__left & 0xffff0000) __nr += 16, __left >>= 16; \
+ if (__left & 0x0000ff00) __nr += 8, __left >>= 8; \
+ if (__left & 0x000000f0) __nr += 4, __left >>= 4; \
+ if (__left & 0x0000000c) __nr += 2, __left >>= 2; \
+ if (__left & 0x00000002) __nr += 1; \
+ __nr; \
})
#endif
diff -r 7fd8f10cfd3e -r 63e88a26e1ef xen/include/asm-arm/numa.h
--- a/xen/include/asm-arm/numa.h Mon Feb 13 17:03:44 2012 +0000
+++ b/xen/include/asm-arm/numa.h Mon Feb 13 17:26:08 2012 +0000
@@ -3,7 +3,7 @@
/* Fake one node for now... */
#define cpu_to_node(cpu) 0
-#define node_to_cpumask(node) (cpu_online_map)
+#define node_to_cpumask(node) (cpu_online_map)
static inline __attribute__((pure)) int phys_to_nid(paddr_t addr)
{