>>> On 09.11.12 at 14:28, Jan Beulich wrote: > ... and use it as basis for a proper ioremap() on x86. > > Signed-off-by: Jan Beulich <jbeulich@suse.com> > > --- a/xen/arch/x86/mm.c > +++ b/xen/arch/x86/mm.c > @@ -100,6 +100,7 @@ > #include <xen/iocap.h> > #include <xen/guest_access.h> > #include <xen/pfn.h> > +#include <xen/vmap.h> > #include <xen/xmalloc.h> > #include <xen/efi.h> > #include <xen/grant_table.h> > @@ -5460,6 +5561,32 @@ void __set_fixmap( > map_pages_to_xen(fix_to_virt(idx), mfn, 1, flags); > } > > +void *__init arch_vmap_virt_end(void) > +{ > + return (void *)fix_to_virt(__end_of_fixed_addresses); > +} > + > +void __iomem *ioremap(paddr_t pa, size_t len) > +{ > + unsigned long pfn = PFN_DOWN(pa); > + void *va; > + > + WARN_ON(page_is_ram_type(pfn, RAM_TYPE_CONVENTIONAL)); > + > + /* The low first Mb is always mapped. */ > + if ( !((pa + len - 1) >> 20) ) > + va = __va(pa); > + else > + { > + unsigned int offs = pa & (PAGE_SIZE - 1); > + unsigned int nr = PFN_UP(offs + len); > + > + va = __vmap(&pfn, nr, 1, 1, PAGE_HYPERVISOR_NOCACHE) + offs; > + } > + > + return (void __force __iomem *)va; > +} > + > #ifdef MEMORY_GUARD > > void memguard_init(void) > --- a/xen/arch/x86/setup.c > +++ b/xen/arch/x86/setup.c > @@ -1104,6 +1104,7 @@ void __init __start_xen(unsigned long mb > end_boot_allocator(); > system_state = SYS_STATE_boot; > > + vm_init(); > vesa_init(); > > softirq_init(); > --- a/xen/common/Makefile > +++ b/xen/common/Makefile > @@ -36,6 +36,7 @@ obj-y += time.o > obj-y += timer.o > obj-y += trace.o > obj-y += version.o > +obj-y += vmap.o > obj-y += vsprintf.o > obj-y += wait.o > obj-y += xmalloc_tlsf.o > --- /dev/null > +++ b/xen/common/vmap.c > @@ -0,0 +1,204 @@ > +#ifdef VMAP_VIRT_START > +#include <xen/bitmap.h> > +#include <xen/cache.h> > +#include <xen/init.h> > +#include <xen/mm.h> > +#include <xen/pfn.h> > +#include <xen/spinlock.h> > +#include <xen/types.h> > +#include <xen/vmap.h> > +#include <asm/page.h> > + > +static DEFINE_SPINLOCK(vm_lock); > +static void *__read_mostly vm_base; > +#define vm_bitmap ((unsigned long *)vm_base) > +/* highest allocated bit in the bitmap */ > +static unsigned int __read_mostly vm_top; > +/* total number of bits in the bitmap */ > +static unsigned int __read_mostly vm_end; > +/* lowest known clear bit in the bitmap */ > +static unsigned int vm_low; > + > +void __init vm_init(void) > +{ > + unsigned int i, nr; > + unsigned long va; > + > + vm_base = (void *)VMAP_VIRT_START; > + vm_end = PFN_DOWN(arch_vmap_virt_end() - vm_base); > + vm_low = PFN_UP((vm_end + 7) / 8); > + nr = PFN_UP((vm_low + 7) / 8); > + vm_top = nr * PAGE_SIZE * 8; > + > + for ( i = 0, va = (unsigned long)vm_bitmap; i < nr; ++i, va += > PAGE_SIZE ) > + { > + struct page_info *pg = alloc_domheap_page(NULL, 0); > + > + map_pages_to_xen(va, page_to_mfn(pg), 1, PAGE_HYPERVISOR); > + clear_page((void *)va); > + } > + bitmap_fill(vm_bitmap, vm_low); > + > + /* Populate page tables for the bitmap if necessary. */ > + map_pages_to_xen(va, 0, vm_low - nr, MAP_SMALL_PAGES); > +} > + > +void *vm_alloc(unsigned int nr, unsigned int align) > +{ > + unsigned int start, bit; > + > + if ( !align ) > + align = 1; > + else if ( align & (align - 1) ) > + align &= -align; > + > + spin_lock(&vm_lock); > + for ( ; ; ) > + { > + struct page_info *pg; > + > + ASSERT(!test_bit(vm_low, vm_bitmap)); > + for ( start = vm_low; ; ) > + { > + bit = find_next_bit(vm_bitmap, vm_top, start + 1); > + if ( bit > vm_top ) > + bit = vm_top; > + /* > + * Note that this skips the first bit, making the > + * corresponding page a guard one. > + */ > + start = (start + align) & ~(align - 1); > + if ( start + nr <= bit ) > + break; > + start = bit < vm_top ? > + find_next_zero_bit(vm_bitmap, vm_top, bit + 1) : bit; > + if ( start >= vm_top ) > + break; > + } > + > + if ( start < vm_top ) > + break; > + > + spin_unlock(&vm_lock); > + > + if ( vm_top >= vm_end ) > + return NULL; > + > + pg = alloc_domheap_page(NULL, 0); > + if ( !pg ) > + return NULL; > + > + spin_lock(&vm_lock); > + > + if ( start >= vm_top ) > + { > + unsigned long va = (unsigned long)vm_bitmap + vm_top / 8; > + > + if ( !map_pages_to_xen(va, page_to_mfn(pg), 1, PAGE_HYPERVISOR) > ) > + { > + clear_page((void *)va); > + vm_top += PAGE_SIZE * 8; > + if ( vm_top > vm_end ) > + vm_top = vm_end; > + continue; > + } > + } > + > + free_domheap_page(pg); > + > + if ( start >= vm_top ) > + { > + spin_unlock(&vm_lock); > + return NULL; > + } > + } > + > + for ( bit = start; bit < start + nr; ++bit ) > + __set_bit(bit, vm_bitmap); > + if ( start <= vm_low + 2 ) > + vm_low = bit; > + spin_unlock(&vm_lock); > + > + return vm_base + start * PAGE_SIZE; > +} > + > +static unsigned int vm_index(const void *va) > +{ > + unsigned long addr = (unsigned long)va & ~(PAGE_SIZE - 1); > + unsigned int idx; > + > + if ( addr < VMAP_VIRT_START + (vm_end / 8) || > + addr >= VMAP_VIRT_START + vm_top * PAGE_SIZE ) > + return 0; > + > + idx = PFN_DOWN(va - vm_base); > + return !test_bit(idx - 1, vm_bitmap) && > + test_bit(idx, vm_bitmap) ? idx : 0; > +} > + > +static unsigned int vm_size(const void *va) > +{ > + unsigned int start = vm_index(va), end; > + > + if ( !start ) > + return 0; > + > + end = find_next_zero_bit(vm_bitmap, vm_top, start + 1); > + > + return min(end, vm_top) - start; > +} > + > +void vm_free(const void *va) > +{ > + unsigned int bit = vm_index(va); > + > + if ( !bit ) > + { > + WARN_ON(va != NULL); > + return; > + } > + > + spin_lock(&vm_lock); > + if ( bit < vm_low ) > + { > + vm_low = bit - 1; > + while ( !test_bit(vm_low - 1, vm_bitmap) ) > + --vm_low; > + } > + while ( __test_and_clear_bit(bit, vm_bitmap) ) > + if ( ++bit == vm_top ) > + break; > + spin_unlock(&vm_lock); > +} > + > +void *__vmap(const unsigned long *mfn, unsigned int granularity, > + unsigned int nr, unsigned int align, unsigned int flags) > +{ > + void *va = vm_alloc(nr * granularity, align); > + unsigned long cur = (unsigned long)va; > + > + for ( ; va && nr--; ++mfn, cur += PAGE_SIZE * granularity ) > + { > + if ( map_pages_to_xen(cur, *mfn, granularity, flags) ) > + { > + vunmap(va); > + va = NULL; > + } > + } > + > + return va; > +} > + > +void *vmap(const unsigned long *mfn, unsigned int nr) > +{ > + return __vmap(mfn, 1, nr, 1, PAGE_HYPERVISOR); > +} > + > +void vunmap(const void *va) > +{ > + unsigned long addr = (unsigned long)va; > + > + destroy_xen_mappings(addr, addr + PAGE_SIZE * vm_size(va)); > + vm_free(va); > +} > +#endif > --- a/xen/drivers/acpi/apei/erst.c > +++ b/xen/drivers/acpi/apei/erst.c > @@ -247,9 +247,6 @@ static int erst_exec_move_data(struct ap > { > int rc; > u64 offset; > -#ifdef CONFIG_X86 > - enum fixed_addresses idx; > -#endif > void *src, *dst; > > /* ioremap does not work in interrupt context */ > @@ -263,39 +260,11 @@ static int erst_exec_move_data(struct ap > if (rc) > return rc; > > -#ifdef CONFIG_X86 > - switch (ctx->var2) { > - case 0: > - return 0; > - case 1 ... PAGE_SIZE: > - break; > - default: > - printk(KERN_WARNING > - "MOVE_DATA cannot be used for %#"PRIx64" bytes of data\n", > - ctx->var2); > - return -EOPNOTSUPP; > - } > - > - src = __acpi_map_table(ctx->src_base + offset, ctx->var2); > -#else > src = ioremap(ctx->src_base + offset, ctx->var2); > -#endif > if (!src) > return -ENOMEM; > > -#ifdef CONFIG_X86 > - BUILD_BUG_ON(FIX_ACPI_PAGES < 4); > - idx = virt_to_fix((unsigned long)src + 2 * PAGE_SIZE); > - offset += ctx->dst_base; > - dst = (void *)fix_to_virt(idx) + (offset & ~PAGE_MASK); > - set_fixmap(idx, offset); > - if (PFN_DOWN(offset) != PFN_DOWN(offset + ctx->var2 - 1)) { > - idx = virt_to_fix((unsigned long)dst + PAGE_SIZE); > - set_fixmap(idx, offset + PAGE_SIZE); > - } > -#else > dst = ioremap(ctx->dst_base + offset, ctx->var2); > -#endif > if (dst) { > memmove(dst, src, ctx->var2); > iounmap(dst); > --- a/xen/drivers/video/vesa.c > +++ b/xen/drivers/video/vesa.c > @@ -10,6 +10,7 @@ > #include <xen/xmalloc.h> > #include <xen/kernel.h> > #include <xen/vga.h> > +#include <asm/io.h> > #include <asm/page.h> > #include "font.h" > > @@ -101,13 +102,11 @@ void __init vesa_init(void) > if ( !line_len ) > goto fail; > > - if ( map_pages_to_xen(IOREMAP_VIRT_START, > - vlfb_info.lfb_base >> PAGE_SHIFT, > - vram_remap >> PAGE_SHIFT, > - PAGE_HYPERVISOR_NOCACHE) ) > + lfb = ioremap(vlfb_info.lfb_base, vram_remap); > + if ( !lfb ) > goto fail; > > - lfb = memset((void *)IOREMAP_VIRT_START, 0, vram_remap); > + memset(lfb, 0, vram_remap); > > vga_puts = vesa_redraw_puts; > > --- a/xen/include/asm-x86/config.h > +++ b/xen/include/asm-x86/config.h > @@ -144,7 +144,7 @@ extern unsigned char boot_edid_info[128] > * 0xffff828000000000 - 0xffff82bfffffffff [256GB, 2^38 bytes, PML4:261] > * Machine-to-phys translation table. > * 0xffff82c000000000 - 0xffff82c3ffffffff [16GB, 2^34 bytes, PML4:261] > - * ioremap()/fixmap area. > + * vmap()/ioremap()/fixmap area. > * 0xffff82c400000000 - 0xffff82c43fffffff [1GB, 2^30 bytes, PML4:261] > * Compatibility machine-to-phys translation table. > * 0xffff82c440000000 - 0xffff82c47fffffff [1GB, 2^30 bytes, PML4:261] > @@ -205,11 +205,11 @@ extern unsigned char boot_edid_info[128] > /* Slot 261: machine-to-phys conversion table (256GB). */ > #define RDWR_MPT_VIRT_START (PML4_ADDR(261)) > #define RDWR_MPT_VIRT_END (RDWR_MPT_VIRT_START + MPT_VIRT_SIZE) > -/* Slot 261: ioremap()/fixmap area (16GB). */ > -#define IOREMAP_VIRT_START RDWR_MPT_VIRT_END > -#define IOREMAP_VIRT_END (IOREMAP_VIRT_START + GB(16)) > +/* Slot 261: vmap()/ioremap()/fixmap area (16GB). */ > +#define VMAP_VIRT_START RDWR_MPT_VIRT_END > +#define VMAP_VIRT_END (VMAP_VIRT_START + GB(16)) > /* Slot 261: compatibility machine-to-phys conversion table (1GB). */ > -#define RDWR_COMPAT_MPT_VIRT_START IOREMAP_VIRT_END > +#define RDWR_COMPAT_MPT_VIRT_START VMAP_VIRT_END > #define RDWR_COMPAT_MPT_VIRT_END (RDWR_COMPAT_MPT_VIRT_START + GB(1)) > /* Slot 261: high read-only compat machine-to-phys conversion table (1GB). > */ > #define HIRO_COMPAT_MPT_VIRT_START RDWR_COMPAT_MPT_VIRT_END > --- a/xen/include/asm-x86/fixmap.h > +++ b/xen/include/asm-x86/fixmap.h > @@ -15,7 +15,7 @@ > #include <xen/config.h> > #include <asm/page.h> > > -#define FIXADDR_TOP (IOREMAP_VIRT_END - PAGE_SIZE) > +#define FIXADDR_TOP (VMAP_VIRT_END - PAGE_SIZE) > > #ifndef __ASSEMBLY__ > > --- a/xen/include/asm-x86/io.h > +++ b/xen/include/asm-x86/io.h > @@ -1,14 +1,10 @@ > #ifndef _ASM_IO_H > #define _ASM_IO_H > > -#include <xen/config.h> > +#include <xen/vmap.h> > #include <xen/types.h> > #include <asm/page.h> > > -/* We don''t need real ioremap() on Xen/x86. */ > -#define ioremap(x,l) (__va(x)) > -#define iounmap(p) ((void)0) > - > #define readb(x) (*(volatile char *)(x)) > #define readw(x) (*(volatile short *)(x)) > #define readl(x) (*(volatile int *)(x)) > --- /dev/null > +++ b/xen/include/xen/vmap.h > @@ -0,0 +1,24 @@ > +#if !defined(__XEN_VMAP_H__) && defined(VMAP_VIRT_START) > +#define __XEN_VMAP_H__ > + > +#include <xen/types.h> > + > +void *vm_alloc(unsigned int nr, unsigned int align); > +void vm_free(const void *); > + > +void *__vmap(const unsigned long *mfn, unsigned int granularity, > + unsigned int nr, unsigned int align, unsigned int flags); > +void *vmap(const unsigned long *mfn, unsigned int nr); > +void vunmap(const void *); > + > +void __iomem *ioremap(paddr_t, size_t); > + > +static inline void iounmap(void __iomem *va) > +{ > + vunmap((void __force *)va); > +} > + > +void vm_init(void); > +void *arch_vmap_virt_end(void); > + > +#endif /* __XEN_VMAP_H__ */ > > >