thr3ads.net - Nouveau - [Nouveau] Fixing nouveau for >4k PAGE

If this information is useful, please help other people find it:
Share via:

Benjamin Herrenschmidt

2013-Aug-11 09:35 UTC

[Nouveau] Fixing nouveau for >4k PAGE_SIZE

On Sun, 2013-08-11 at 11:02 +0200, Maarten Lankhorst wrote:
> > diff --git a/drivers/gpu/drm/nouveau/core/engine/fifo/nv40.c
b/drivers/gpu/drm/nouveau/core/engine/fifo/nv40.c
> > index 5c7433d..c314a5f 100644
> > --- a/drivers/gpu/drm/nouveau/core/engine/fifo/nv40.c
> > +++ b/drivers/gpu/drm/nouveau/core/engine/fifo/nv40.c
> > @@ -190,8 +190,8 @@ nv40_fifo_chan_ctor(struct nouveau_object *parent,
> >  	if (size < sizeof(*args))
> >  		return -EINVAL;
> >  
> > -	ret = nouveau_fifo_channel_create(parent, engine, oclass, 0,
0xc00000,
> > -					  0x1000, args->pushbuf,
> > +	ret = nouveau_fifo_channel_create(parent, engine, oclass, 0,
0x800000,
> > +					  0x10000, args->pushbuf,
> >  					  (1ULL << NVDEV_ENGINE_DMAOBJ) |
> >  					  (1ULL << NVDEV_ENGINE_SW) |
> >  					  (1ULL << NVDEV_ENGINE_GR) |
> Why the size change?
This reverts the value to older ones, however that patch might not be
needed anymore (I was carrying it from Dave but if we don't map the
registers into userspace we shouldn't need to force align them).
> > diff --git a/drivers/gpu/drm/nouveau/core/subdev/vm/base.c
b/drivers/gpu/drm/nouveau/core/subdev/vm/base.c
> > index ef3133e..5833851 100644
> > --- a/drivers/gpu/drm/nouveau/core/subdev/vm/base.c
> > +++ b/drivers/gpu/drm/nouveau/core/subdev/vm/base.c
> > @@ -84,10 +84,11 @@ nouveau_vm_map_sg_table(struct nouveau_vma *vma,
u64 delta, u64 length,
> >  {
> >  	struct nouveau_vm *vm = vma->vm;
> >  	struct nouveau_vmmgr *vmm = vm->vmm;
> > -	int big = vma->node->type != vmm->spg_shift;
> > +	u32 shift = vma->node->type;
> > +	int big = shift != vmm->spg_shift;
> >  	u32 offset = vma->node->offset + (delta >> 12);
> > -	u32 bits = vma->node->type - 12;
> > -	u32 num  = length >> vma->node->type;
> > +	u32 bits = shift - 12;
> > +	u32 num  = length >> shift;
> >  	u32 pde  = (offset >> vmm->pgt_bits) - vm->fpde;
> >  	u32 pte  = (offset & ((1 << vmm->pgt_bits) - 1))
>> bits;
> >  	u32 max  = 1 << (vmm->pgt_bits - bits);
> > @@ -98,7 +99,7 @@ nouveau_vm_map_sg_table(struct nouveau_vma *vma, u64
delta, u64 length,
> >  
> >  	for_each_sg(mem->sg->sgl, sg, mem->sg->nents, i) {
> >  		struct nouveau_gpuobj *pgt = vm->pgt[pde].obj[big];
> > -		sglen = sg_dma_len(sg) >> PAGE_SHIFT;
> > +		sglen = sg_dma_len(sg) >> shift;
> >  
> >  		end = pte + sglen;
> >  		if (unlikely(end >= max))
> Please add a WARN_ON(big); in map_sg and map_sg_table if you do this.
So that's debatable :-)

The above code is map_sg. Arguably my patch *fixes* using it with card
large pages :-)

IE, Look at the "usual" case (PAGE_SHIFT=12). Today, the above means
sglen will be in units of small pages. But everything else in that loop
operates in units of whatever is represented by the pte, which can
either be 4k or larger. 

So adding "sglen" to "pte" was never right for shift != 12
before
(regardless of PAGE_SHIFT).

With my patch, it's more correct, so as such, adding a WARN_ON here
wouldn't be "if I do this" :-)

Now, the "big" case still cannot really work here with PAGE_SHIFT=12,
because that would require the sg segments to be multiple of
"shift" (the card large page) and that is not going to be the case.

So funnily enough, we *could* use card large pages of 64k ("big") if
...
we had PAGE_SHIFT=16 ... which we do on ppc64 :-) But not anywhere else.

But yes, the point remains, in the general case, that function cannot
work for the "big" case, so I wonder if we should catch it with a
WARN_ON and maybe simplify the code a bunch while at it...
> > @@ -106,7 +107,7 @@ nouveau_vm_map_sg_table(struct nouveau_vma *vma,
u64 delta, u64 length,
> >  		len = end - pte;
> >  
> >  		for (m = 0; m < len; m++) {
> > -			dma_addr_t addr = sg_dma_address(sg) + (m << PAGE_SHIFT);
> > +			dma_addr_t addr = sg_dma_address(sg) + (m << shift);
> >  
> >  			vmm->map_sg(vma, pgt, mem, pte, 1, &addr);
> >  			num--;
> > @@ -121,7 +122,7 @@ nouveau_vm_map_sg_table(struct nouveau_vma *vma,
u64 delta, u64 length,
> >  		}
> >  		if (m < sglen) {
> >  			for (; m < sglen; m++) {
> > -				dma_addr_t addr = sg_dma_address(sg) + (m << PAGE_SHIFT);
> > +				dma_addr_t addr = sg_dma_address(sg) + (m << shift);
> >  
> >  				vmm->map_sg(vma, pgt, mem, pte, 1, &addr);
> >  				num--;
> > @@ -136,6 +137,7 @@ finish:
> >  	vmm->flush(vm);
> >  }
> >  
> > +#if PAGE_SHIFT == 12
> >  void
> >  nouveau_vm_map_sg(struct nouveau_vma *vma, u64 delta, u64 length,
> >  		  struct nouveau_mem *mem)
> > @@ -143,10 +145,11 @@ nouveau_vm_map_sg(struct nouveau_vma *vma, u64
delta, u64 length,
> >  	struct nouveau_vm *vm = vma->vm;
> >  	struct nouveau_vmmgr *vmm = vm->vmm;
> >  	dma_addr_t *list = mem->pages;
> > -	int big = vma->node->type != vmm->spg_shift;
> > +	u32 shift = vma->node->type;
> > +	int big = shift != vmm->spg_shift;
> >  	u32 offset = vma->node->offset + (delta >> 12);
> > -	u32 bits = vma->node->type - 12;
> > -	u32 num  = length >> vma->node->type;
> > +	u32 bits = shift - 12;
> > +	u32 num  = length >> shift;
> >  	u32 pde  = (offset >> vmm->pgt_bits) - vm->fpde;
> >  	u32 pte  = (offset & ((1 << vmm->pgt_bits) - 1))
>> bits;
> >  	u32 max  = 1 << (vmm->pgt_bits - bits);
> > @@ -173,6 +176,52 @@ nouveau_vm_map_sg(struct nouveau_vma *vma, u64
delta, u64 length,
So the above is the existing one which I kept mostly intact ... but
cannot work for shift != 12 either for the same reasons so here too, if
we could simplify that ...
  > >  	vmm->flush(vm);
> >  }
> > +#else
> > +void
> > +nouveau_vm_map_sg(struct nouveau_vma *vma, u64 delta, u64 length,
> > +		  struct nouveau_mem *mem)
> > +{
> > +	struct nouveau_vm *vm = vma->vm;
> > +	struct nouveau_vmmgr *vmm = vm->vmm;
> > +	dma_addr_t *list = mem->pages;
> > +	u32 shift = vma->node->type;
> > +	int big = shift != vmm->spg_shift;
> > +	u32 offset = vma->node->offset + (delta >> 12);
> > +	u32 bits = shift - 12;
> > +	u32 num  = length >> shift;
> > +	u32 pde  = (offset >> vmm->pgt_bits) - vm->fpde;
> > +	u32 pte  = (offset & ((1 << vmm->pgt_bits) - 1))
>> bits;
> > +	u32 max  = 1 << (vmm->pgt_bits - bits);
> > +	u32 sub_cnt = 1 << (PAGE_SHIFT - shift);
> > +	u32 sub_rem = 0;
> > +	u64 phys = 0;
> > +
> > +
> > +	/* XXX This will not work for a big mapping ! */
> > +	WARN_ON_ONCE(big);
> > +
> > +	while (num) {
> > +		struct nouveau_gpuobj *pgt = vm->pgt[pde].obj[big];
> > +
> > +		if (sub_rem == 0) {
> > +			phys = *(list++);
> > +			sub_rem = sub_cnt;
> > +		}
> > +		vmm->map_sg(vma, pgt, mem, pte, 1, &phys);
> > +
> > +		num  -= 1;
> > +		pte  += 1;
> > +		sub_rem -= 1;
> > +		phys += 1 << shift;
> > +		if (unlikely(pte >= max)) {
> > +			pde++;
> > +			pte = 0;
> > +		}
> > +	}
> > +
> > +	vmm->flush(vm);
> > +}
> > +#endif
> 
> Considering that map_sg in nv04-50 takes PAGE_SIZE pages, it would be
easier to fix
> map_sg for nv50 and nvc0, this would mean less fixing in
map_sg/map_sg_table.
Sorry, I'm not sure what you mean exactly ... The code *today* tries to
handle things at the low level (vmm->map_sg) and that cannot work the
way it's done. I'm removing that. Or rather, that cannot work unless we
can guarantee that there will be no crossing of PTE page boundary (no
crossing of pde) by the PAGE_SIZE page, which I think we cannot (see my
discussion in my email asking if we could enforce that somewhat).

Additionally the current code is broken in that the upper layer in
vm/base.c doesn't increment "pte" by the right amount.

Now, if those two assertions can be made always true:

 - Those two functions (map_sg and map_sg_table) never deal with the
"big" case.

 - An object is always mapped at a card address that is a multiple
of PAGE_SIZE (ie, invividual PAGE_SIZE pages don't cross pde boundaries
when mapped)

Then we can probably simplify the code significantly *and* go back to
handling PAGE_SIZE pages in the vmm->map_sg() instead of breaking them
up a level above like I do. 
> I don't like the duplicate definition, and the extra for loop in map_sg
will be compiled out when page_size == 12.
Sort-of. Right now, my code prevents calling vmm->map_sg with more than
"1" as len which reduces perf in the PAGE_SHIFT=12 case, which is why
I
did the duplication.

However this is just an illustration. I'm fully aware that this is not
good as-is, I'm just poking to see what you guys think is the best
approach to *properly* fix it.
> Oh fun fact:
> on nv50 if PAGE_SIZE = 64K you should just use large pages on the nvidia
card for everything. :D
I don't think I care at this stage but heh ... 
> I have no idea if it works for bar, but you could test..
> In subdev/bar/nv50.c kmap/umap change the 12 argument to 16, and change
vm->pgt[0].obj[0] to vm->pgt[0].obj[1] and vm->pgt[0].refcount[0] to
vm->pgt[0].refcount[1].
> 
> for nvc0 that would require 128K pages, but I believe it should work too.
I don't think we want to go there right now, there is little benefit for
the vast majority of platforms (x86 and ARM). Let's stick to making
ppc64 "work" and not bother too much with things that will never be
used
in practice :-)

I'd rather make the code simpler by removing the "big" case from
those
functions if it's never going to be used.

Cheers,
Ben.
> >  
> >  void
> >  nouveau_vm_unmap_at(struct nouveau_vma *vma, u64 delta, u64 length)
> > diff --git a/drivers/gpu/drm/nouveau/core/subdev/vm/nv04.c
b/drivers/gpu/drm/nouveau/core/subdev/vm/nv04.c
> > index ed45437..f7e2311 100644
> > --- a/drivers/gpu/drm/nouveau/core/subdev/vm/nv04.c
> > +++ b/drivers/gpu/drm/nouveau/core/subdev/vm/nv04.c
> > @@ -39,14 +39,10 @@ nv04_vm_map_sg(struct nouveau_vma *vma, struct
nouveau_gpuobj *pgt,
> >  {
> >  	pte = 0x00008 + (pte * 4);
> >  	while (cnt) {
> > -		u32 page = PAGE_SIZE / NV04_PDMA_PAGE;
> >  		u32 phys = (u32)*list++;
> > -		while (cnt && page--) {
> > -			nv_wo32(pgt, pte, phys | 3);
> > -			phys += NV04_PDMA_PAGE;
> > -			pte += 4;
> > -			cnt -= 1;
> > -		}
> > +		nv_wo32(pgt, pte, phys | 3);
> > +		pte += 4;
> > +		cnt -= 1;
> >  	}
> >  }
> >  
> > diff --git a/drivers/gpu/drm/nouveau/core/subdev/vm/nv41.c
b/drivers/gpu/drm/nouveau/core/subdev/vm/nv41.c
> > index 064c762..a78f624 100644
> > --- a/drivers/gpu/drm/nouveau/core/subdev/vm/nv41.c
> > +++ b/drivers/gpu/drm/nouveau/core/subdev/vm/nv41.c
> > @@ -43,14 +43,10 @@ nv41_vm_map_sg(struct nouveau_vma *vma, struct
nouveau_gpuobj *pgt,
> >  {
> >  	pte = pte * 4;
> >  	while (cnt) {
> > -		u32 page = PAGE_SIZE / NV41_GART_PAGE;
> >  		u64 phys = (u64)*list++;
> > -		while (cnt && page--) {
> > -			nv_wo32(pgt, pte, (phys >> 7) | 1);
> > -			phys += NV41_GART_PAGE;
> > -			pte += 4;
> > -			cnt -= 1;
> > -		}
> > +		nv_wo32(pgt, pte, (phys >> 7) | 1);
> > +		pte += 4;
> > +		cnt -= 1;
> >  	}
> >  }
> See above^, it's better if you could fixup nv50/nvc0.c instead.
> >  
> > diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c
b/drivers/gpu/drm/nouveau/nouveau_bo.c
> > index af20fba..694024d 100644
> > --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
> > +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
> > @@ -226,7 +226,7 @@ nouveau_bo_new(struct drm_device *dev, int size,
int align,
> >  	nvbo->page_shift = 12;
> >  	if (drm->client.base.vm) {
> >  		if (!(flags & TTM_PL_FLAG_TT) && size > 256 * 1024)
> > -			nvbo->page_shift =
drm->client.base.vm->vmm->lpg_shift;
> > +			nvbo->page_shift = lpg_shift;
> >  	}
> >  
> >  	nouveau_bo_fixup_align(nvbo, flags, &align, &size);
> Hm.. I don't know if it will be an issue here. But I'm concerned
about the cases where nouveau_vm can end up unaligned.
> This will not be an issue for the bar mappings, because they map the entire
bo, and bo's are always page aligned.
> See nouveau_bo_fixup_align.
> 
> But the channel vm mappings are no longer guaranteed to be page aligned. In
fact it's very likely they are all misaligned due to some vm allocations
> done when mapping a channel. This is only a problem on nv50+ though.
Probably not an issue you're hitting.
> > diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c
b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
> > index ca5492a..494cf88 100644
> > --- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c
> > +++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
> > @@ -31,7 +31,7 @@ nv04_sgdma_bind(struct ttm_tt *ttm, struct
ttm_mem_reg *mem)
> >  {
> >  	struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)ttm;
> >  	struct nouveau_mem *node = mem->mm_node;
> > -	u64 size = mem->num_pages << 12;
> > +	u64 size = mem->num_pages << PAGE_SHIFT;
> >  
> >  	if (ttm->sg) {
> >  		node->sg = ttm->sg;
> > diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c
b/drivers/gpu/drm/nouveau/nouveau_ttm.c
> > index 19e3757..f0629de 100644
> > --- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
> > +++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
> > @@ -252,8 +252,8 @@ nv04_gart_manager_new(struct ttm_mem_type_manager
*man,
> >  
> >  	node->page_shift = 12;
> >  
> > -	ret = nouveau_vm_get(man->priv, mem->num_pages << 12,
node->page_shift,
> > -			     NV_MEM_ACCESS_RW, &node->vma[0]);
> > +	ret = nouveau_vm_get(man->priv, mem->num_pages <<
PAGE_SHIFT,
> > +			     node->page_shift, NV_MEM_ACCESS_RW, &node->vma[0]);
> >  	if (ret) {
> >  		kfree(node);
> >  		return ret;
> >
> >

Ben Skeggs

2013-Aug-29 06:49 UTC

head link

[Nouveau] Fixing nouveau for >4k PAGE_SIZE

On Sun, Aug 11, 2013 at 7:35 PM, Benjamin Herrenschmidt
<benh at kernel.crashing.org> wrote:> On Sun, 2013-08-11 at 11:02 +0200, Maarten Lankhorst wrote:
>
>> > diff --git a/drivers/gpu/drm/nouveau/core/engine/fifo/nv40.c
b/drivers/gpu/drm/nouveau/core/engine/fifo/nv40.c
>> > index 5c7433d..c314a5f 100644
>> > --- a/drivers/gpu/drm/nouveau/core/engine/fifo/nv40.c
>> > +++ b/drivers/gpu/drm/nouveau/core/engine/fifo/nv40.c
>> > @@ -190,8 +190,8 @@ nv40_fifo_chan_ctor(struct nouveau_object
*parent,
>> >     if (size < sizeof(*args))
>> >             return -EINVAL;
>> >
>> > -   ret = nouveau_fifo_channel_create(parent, engine, oclass, 0,
0xc00000,
>> > -                                     0x1000, args->pushbuf,
>> > +   ret = nouveau_fifo_channel_create(parent, engine, oclass, 0,
0x800000,
>> > +                                     0x10000, args->pushbuf,
>> >                                       (1ULL <<
NVDEV_ENGINE_DMAOBJ) |
>> >                                       (1ULL <<
NVDEV_ENGINE_SW) |
>> >                                       (1ULL <<
NVDEV_ENGINE_GR) |
>> Why the size change?
>
> This reverts the value to older ones, however that patch might not be
> needed anymore (I was carrying it from Dave but if we don't map the
> registers into userspace we shouldn't need to force align them).
>
>> > diff --git a/drivers/gpu/drm/nouveau/core/subdev/vm/base.c
b/drivers/gpu/drm/nouveau/core/subdev/vm/base.c
>> > index ef3133e..5833851 100644
>> > --- a/drivers/gpu/drm/nouveau/core/subdev/vm/base.c
>> > +++ b/drivers/gpu/drm/nouveau/core/subdev/vm/base.c
>> > @@ -84,10 +84,11 @@ nouveau_vm_map_sg_table(struct nouveau_vma
*vma, u64 delta, u64 length,
>> >  {
>> >     struct nouveau_vm *vm = vma->vm;
>> >     struct nouveau_vmmgr *vmm = vm->vmm;
>> > -   int big = vma->node->type != vmm->spg_shift;
>> > +   u32 shift = vma->node->type;
>> > +   int big = shift != vmm->spg_shift;
>> >     u32 offset = vma->node->offset + (delta >> 12);
>> > -   u32 bits = vma->node->type - 12;
>> > -   u32 num  = length >> vma->node->type;
>> > +   u32 bits = shift - 12;
>> > +   u32 num  = length >> shift;
>> >     u32 pde  = (offset >> vmm->pgt_bits) - vm->fpde;
>> >     u32 pte  = (offset & ((1 << vmm->pgt_bits) - 1))
>> bits;
>> >     u32 max  = 1 << (vmm->pgt_bits - bits);
>> > @@ -98,7 +99,7 @@ nouveau_vm_map_sg_table(struct nouveau_vma *vma,
u64 delta, u64 length,
>> >
>> >     for_each_sg(mem->sg->sgl, sg, mem->sg->nents, i) {
>> >             struct nouveau_gpuobj *pgt = vm->pgt[pde].obj[big];
>> > -           sglen = sg_dma_len(sg) >> PAGE_SHIFT;
>> > +           sglen = sg_dma_len(sg) >> shift;
>> >
>> >             end = pte + sglen;
>> >             if (unlikely(end >= max))
>> Please add a WARN_ON(big); in map_sg and map_sg_table if you do this.
>
> So that's debatable :-)
>
> The above code is map_sg. Arguably my patch *fixes* using it with card
> large pages :-)
>
> IE, Look at the "usual" case (PAGE_SHIFT=12). Today, the above
means
> sglen will be in units of small pages. But everything else in that loop
> operates in units of whatever is represented by the pte, which can
> either be 4k or larger.
>
> So adding "sglen" to "pte" was never right for shift !=
12 before
> (regardless of PAGE_SHIFT).
>
> With my patch, it's more correct, so as such, adding a WARN_ON here
> wouldn't be "if I do this" :-)
>
> Now, the "big" case still cannot really work here with
PAGE_SHIFT=12,
> because that would require the sg segments to be multiple of
> "shift" (the card large page) and that is not going to be the
case.
>
> So funnily enough, we *could* use card large pages of 64k ("big")
if ...
> we had PAGE_SHIFT=16 ... which we do on ppc64 :-) But not anywhere else.
>
> But yes, the point remains, in the general case, that function cannot
> work for the "big" case, so I wonder if we should catch it with a
> WARN_ON and maybe simplify the code a bunch while at it...
>
>> > @@ -106,7 +107,7 @@ nouveau_vm_map_sg_table(struct nouveau_vma
*vma, u64 delta, u64 length,
>> >             len = end - pte;
>> >
>> >             for (m = 0; m < len; m++) {
>> > -                   dma_addr_t addr = sg_dma_address(sg) + (m
<< PAGE_SHIFT);
>> > +                   dma_addr_t addr = sg_dma_address(sg) + (m
<< shift);
>> >
>> >                     vmm->map_sg(vma, pgt, mem, pte, 1,
&addr);
>> >                     num--;
>> > @@ -121,7 +122,7 @@ nouveau_vm_map_sg_table(struct nouveau_vma
*vma, u64 delta, u64 length,
>> >             }
>> >             if (m < sglen) {
>> >                     for (; m < sglen; m++) {
>> > -                           dma_addr_t addr = sg_dma_address(sg) +
(m << PAGE_SHIFT);
>> > +                           dma_addr_t addr = sg_dma_address(sg) +
(m << shift);
>> >
>> >                             vmm->map_sg(vma, pgt, mem, pte, 1,
&addr);
>> >                             num--;
>> > @@ -136,6 +137,7 @@ finish:
>> >     vmm->flush(vm);
>> >  }
>> >
>> > +#if PAGE_SHIFT == 12
>> >  void
>> >  nouveau_vm_map_sg(struct nouveau_vma *vma, u64 delta, u64 length,
>> >               struct nouveau_mem *mem)
>> > @@ -143,10 +145,11 @@ nouveau_vm_map_sg(struct nouveau_vma *vma,
u64 delta, u64 length,
>> >     struct nouveau_vm *vm = vma->vm;
>> >     struct nouveau_vmmgr *vmm = vm->vmm;
>> >     dma_addr_t *list = mem->pages;
>> > -   int big = vma->node->type != vmm->spg_shift;
>> > +   u32 shift = vma->node->type;
>> > +   int big = shift != vmm->spg_shift;
>> >     u32 offset = vma->node->offset + (delta >> 12);
>> > -   u32 bits = vma->node->type - 12;
>> > -   u32 num  = length >> vma->node->type;
>> > +   u32 bits = shift - 12;
>> > +   u32 num  = length >> shift;
>> >     u32 pde  = (offset >> vmm->pgt_bits) - vm->fpde;
>> >     u32 pte  = (offset & ((1 << vmm->pgt_bits) - 1))
>> bits;
>> >     u32 max  = 1 << (vmm->pgt_bits - bits);
>> > @@ -173,6 +176,52 @@ nouveau_vm_map_sg(struct nouveau_vma *vma,
u64 delta, u64 length,
>
> So the above is the existing one which I kept mostly intact ... but
> cannot work for shift != 12 either for the same reasons so here too, if
> we could simplify that ...
>
>> >     vmm->flush(vm);
>> >  }
>> > +#else
>> > +void
>> > +nouveau_vm_map_sg(struct nouveau_vma *vma, u64 delta, u64 length,
>> > +             struct nouveau_mem *mem)
>> > +{
>> > +   struct nouveau_vm *vm = vma->vm;
>> > +   struct nouveau_vmmgr *vmm = vm->vmm;
>> > +   dma_addr_t *list = mem->pages;
>> > +   u32 shift = vma->node->type;
>> > +   int big = shift != vmm->spg_shift;
>> > +   u32 offset = vma->node->offset + (delta >> 12);
>> > +   u32 bits = shift - 12;
>> > +   u32 num  = length >> shift;
>> > +   u32 pde  = (offset >> vmm->pgt_bits) - vm->fpde;
>> > +   u32 pte  = (offset & ((1 << vmm->pgt_bits) - 1))
>> bits;
>> > +   u32 max  = 1 << (vmm->pgt_bits - bits);
>> > +   u32 sub_cnt = 1 << (PAGE_SHIFT - shift);
>> > +   u32 sub_rem = 0;
>> > +   u64 phys = 0;
>> > +
>> > +
>> > +   /* XXX This will not work for a big mapping ! */
>> > +   WARN_ON_ONCE(big);
>> > +
>> > +   while (num) {
>> > +           struct nouveau_gpuobj *pgt = vm->pgt[pde].obj[big];
>> > +
>> > +           if (sub_rem == 0) {
>> > +                   phys = *(list++);
>> > +                   sub_rem = sub_cnt;
>> > +           }
>> > +           vmm->map_sg(vma, pgt, mem, pte, 1, &phys);
>> > +
>> > +           num  -= 1;
>> > +           pte  += 1;
>> > +           sub_rem -= 1;
>> > +           phys += 1 << shift;
>> > +           if (unlikely(pte >= max)) {
>> > +                   pde++;
>> > +                   pte = 0;
>> > +           }
>> > +   }
>> > +
>> > +   vmm->flush(vm);
>> > +}
>> > +#endif
>>
>> Considering that map_sg in nv04-50 takes PAGE_SIZE pages, it would be
easier to fix
>> map_sg for nv50 and nvc0, this would mean less fixing in
map_sg/map_sg_table.
>
> Sorry, I'm not sure what you mean exactly ... The code *today* tries to
> handle things at the low level (vmm->map_sg) and that cannot work the
> way it's done. I'm removing that. Or rather, that cannot work
unless we
> can guarantee that there will be no crossing of PTE page boundary (no
> crossing of pde) by the PAGE_SIZE page, which I think we cannot (see my
> discussion in my email asking if we could enforce that somewhat).
>
> Additionally the current code is broken in that the upper layer in
> vm/base.c doesn't increment "pte" by the right amount.
>
> Now, if those two assertions can be made always true:
>
>  - Those two functions (map_sg and map_sg_table) never deal with the
> "big" case.
>
>  - An object is always mapped at a card address that is a multiple
> of PAGE_SIZE (ie, invividual PAGE_SIZE pages don't cross pde boundaries
> when mapped)I think these two restrictions are reasonable to enforce, and we should do so.
>
> Then we can probably simplify the code significantly *and* go back to
> handling PAGE_SIZE pages in the vmm->map_sg() instead of breaking them
> up a level above like I do.Sounds good!
>
>> I don't like the duplicate definition, and the extra for loop in
map_sg will be compiled out when page_size == 12.
>
> Sort-of. Right now, my code prevents calling vmm->map_sg with more than
> "1" as len which reduces perf in the PAGE_SHIFT=12 case, which is
why I
> did the duplication.
>
> However this is just an illustration. I'm fully aware that this is not
> good as-is, I'm just poking to see what you guys think is the best
> approach to *properly* fix it.
>
>> Oh fun fact:
>> on nv50 if PAGE_SIZE = 64K you should just use large pages on the
nvidia card for everything. :D
>
> I don't think I care at this stage but heh ...
>
>> I have no idea if it works for bar, but you could test..
>> In subdev/bar/nv50.c kmap/umap change the 12 argument to 16, and change
vm->pgt[0].obj[0] to vm->pgt[0].obj[1] and vm->pgt[0].refcount[0] to
vm->pgt[0].refcount[1].
>>
>> for nvc0 that would require 128K pages, but I believe it should work
too.
>
> I don't think we want to go there right now, there is little benefit
for
> the vast majority of platforms (x86 and ARM). Let's stick to making
> ppc64 "work" and not bother too much with things that will never
be used
> in practice :-)
>
> I'd rather make the code simpler by removing the "big" case
from those
> functions if it's never going to be used.Fully agreed!

Ben.
>
> Cheers,
> Ben.
>
>> >
>> >  void
>> >  nouveau_vm_unmap_at(struct nouveau_vma *vma, u64 delta, u64
length)
>> > diff --git a/drivers/gpu/drm/nouveau/core/subdev/vm/nv04.c
b/drivers/gpu/drm/nouveau/core/subdev/vm/nv04.c
>> > index ed45437..f7e2311 100644
>> > --- a/drivers/gpu/drm/nouveau/core/subdev/vm/nv04.c
>> > +++ b/drivers/gpu/drm/nouveau/core/subdev/vm/nv04.c
>> > @@ -39,14 +39,10 @@ nv04_vm_map_sg(struct nouveau_vma *vma, struct
nouveau_gpuobj *pgt,
>> >  {
>> >     pte = 0x00008 + (pte * 4);
>> >     while (cnt) {
>> > -           u32 page = PAGE_SIZE / NV04_PDMA_PAGE;
>> >             u32 phys = (u32)*list++;
>> > -           while (cnt && page--) {
>> > -                   nv_wo32(pgt, pte, phys | 3);
>> > -                   phys += NV04_PDMA_PAGE;
>> > -                   pte += 4;
>> > -                   cnt -= 1;
>> > -           }
>> > +           nv_wo32(pgt, pte, phys | 3);
>> > +           pte += 4;
>> > +           cnt -= 1;
>> >     }
>> >  }
>> >
>> > diff --git a/drivers/gpu/drm/nouveau/core/subdev/vm/nv41.c
b/drivers/gpu/drm/nouveau/core/subdev/vm/nv41.c
>> > index 064c762..a78f624 100644
>> > --- a/drivers/gpu/drm/nouveau/core/subdev/vm/nv41.c
>> > +++ b/drivers/gpu/drm/nouveau/core/subdev/vm/nv41.c
>> > @@ -43,14 +43,10 @@ nv41_vm_map_sg(struct nouveau_vma *vma, struct
nouveau_gpuobj *pgt,
>> >  {
>> >     pte = pte * 4;
>> >     while (cnt) {
>> > -           u32 page = PAGE_SIZE / NV41_GART_PAGE;
>> >             u64 phys = (u64)*list++;
>> > -           while (cnt && page--) {
>> > -                   nv_wo32(pgt, pte, (phys >> 7) | 1);
>> > -                   phys += NV41_GART_PAGE;
>> > -                   pte += 4;
>> > -                   cnt -= 1;
>> > -           }
>> > +           nv_wo32(pgt, pte, (phys >> 7) | 1);
>> > +           pte += 4;
>> > +           cnt -= 1;
>> >     }
>> >  }
>> See above^, it's better if you could fixup nv50/nvc0.c instead.
>> >
>> > diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c
b/drivers/gpu/drm/nouveau/nouveau_bo.c
>> > index af20fba..694024d 100644
>> > --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
>> > +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
>> > @@ -226,7 +226,7 @@ nouveau_bo_new(struct drm_device *dev, int
size, int align,
>> >     nvbo->page_shift = 12;
>> >     if (drm->client.base.vm) {
>> >             if (!(flags & TTM_PL_FLAG_TT) && size >
256 * 1024)
>> > -                   nvbo->page_shift =
drm->client.base.vm->vmm->lpg_shift;
>> > +                   nvbo->page_shift = lpg_shift;
>> >     }
>> >
>> >     nouveau_bo_fixup_align(nvbo, flags, &align, &size);
>> Hm.. I don't know if it will be an issue here. But I'm
concerned about the cases where nouveau_vm can end up unaligned.
>> This will not be an issue for the bar mappings, because they map the
entire bo, and bo's are always page aligned.
>> See nouveau_bo_fixup_align.
>>
>> But the channel vm mappings are no longer guaranteed to be page
aligned. In fact it's very likely they are all misaligned due to some vm
allocations
>> done when mapping a channel. This is only a problem on nv50+ though.
Probably not an issue you're hitting.
>> > diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c
b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
>> > index ca5492a..494cf88 100644
>> > --- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c
>> > +++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
>> > @@ -31,7 +31,7 @@ nv04_sgdma_bind(struct ttm_tt *ttm, struct
ttm_mem_reg *mem)
>> >  {
>> >     struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be
*)ttm;
>> >     struct nouveau_mem *node = mem->mm_node;
>> > -   u64 size = mem->num_pages << 12;
>> > +   u64 size = mem->num_pages << PAGE_SHIFT;
>> >
>> >     if (ttm->sg) {
>> >             node->sg = ttm->sg;
>> > diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c
b/drivers/gpu/drm/nouveau/nouveau_ttm.c
>> > index 19e3757..f0629de 100644
>> > --- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
>> > +++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
>> > @@ -252,8 +252,8 @@ nv04_gart_manager_new(struct
ttm_mem_type_manager *man,
>> >
>> >     node->page_shift = 12;
>> >
>> > -   ret = nouveau_vm_get(man->priv, mem->num_pages <<
12, node->page_shift,
>> > -                        NV_MEM_ACCESS_RW, &node->vma[0]);
>> > +   ret = nouveau_vm_get(man->priv, mem->num_pages <<
PAGE_SHIFT,
>> > +                        node->page_shift, NV_MEM_ACCESS_RW,
&node->vma[0]);
>> >     if (ret) {
>> >             kfree(node);
>> >             return ret;
>> >
>> >
>
>
> _______________________________________________
> dri-devel mailing list
> dri-devel at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/dri-devel

Benjamin Herrenschmidt

2013-Nov-29 06:01 UTC

head link

[Nouveau] Fixing nouveau for >4k PAGE_SIZE

On Thu, 2013-08-29 at 16:49 +1000, Ben Skeggs wrote:
> > Additionally the current code is broken in that the upper layer in
> > vm/base.c doesn't increment "pte" by the right amount.
> >
> > Now, if those two assertions can be made always true:
> >
> >  - Those two functions (map_sg and map_sg_table) never deal with the
> > "big" case.
> >
> >  - An object is always mapped at a card address that is a multiple
> > of PAGE_SIZE (ie, invividual PAGE_SIZE pages don't cross pde
boundaries
> > when mapped)
> I think these two restrictions are reasonable to enforce, and we should do
so.
> 
> >
> > Then we can probably simplify the code significantly *and* go back to
> > handling PAGE_SIZE pages in the vmm->map_sg() instead of breaking
them
> > up a level above like I do.
> Sounds good!
Ok so I experimented with that approach a bit. The code could probably
use further simplifications and cleanups but it seems to work. Note that
I haven't been able to test much more than the fbdev and the DDX without
3d accel since GLX is currently broken on nouveau big endian for other
reasons (no visuals) since the Gallium BE rework by Ajax (plus the
nv30/40 merge also introduced artifacts and instabilities on BE which I
haven't tracked down either).

The basic idea here is that the backend vmm->map_sg operates on system
PAGE_SIZE, which allows to continue passing a page array down as we do
today.

That means however that only the nv04 and nv41 backends handle that case
right now, the other ones will have to be fixed eventually but the fix
is rather easy.

Now it's possible that I've missed cases where card objects might be
allocated in vram that isn't system PAGE_SIZE aligned, in which case we
will have breakage due to having a single system PAGE crossing a PDE
boundary, you'll have to help me here figure that out though I haven't
hit any of my WARN_ON's so far :-)

Patch isn't S-O-B yet, first let me know what you think of the approach
(and maybe check I didn't break x86 :-)

diff --git a/drivers/gpu/drm/nouveau/core/subdev/vm/base.c
b/drivers/gpu/drm/nouveau/core/subdev/vm/base.c
index ef3133e..44dc050 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/vm/base.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/vm/base.c
@@ -82,55 +82,77 @@ void
 nouveau_vm_map_sg_table(struct nouveau_vma *vma, u64 delta, u64 length,
 			struct nouveau_mem *mem)
 {
+	/*
+	 * XXX Should the "12" in a couple of places here be replaced
+	 * with vmm->spg_shift for correctness ? Might help if we ever
+	 * support 64k card pages on 64k PAGE_SIZE systems
+	 */
 	struct nouveau_vm *vm = vma->vm;
 	struct nouveau_vmmgr *vmm = vm->vmm;
-	int big = vma->node->type != vmm->spg_shift;
 	u32 offset = vma->node->offset + (delta >> 12);
-	u32 bits = vma->node->type - 12;
-	u32 num  = length >> vma->node->type;
+	u32 shift = vma->node->type;
+	u32 order = PAGE_SHIFT - shift;
+	u32 num  = length >> PAGE_SHIFT;
 	u32 pde  = (offset >> vmm->pgt_bits) - vm->fpde;
-	u32 pte  = (offset & ((1 << vmm->pgt_bits) - 1)) >> bits;
-	u32 max  = 1 << (vmm->pgt_bits - bits);
-	unsigned m, sglen;
-	u32 end, len;
+	u32 pte  = offset & ((1 << vmm->pgt_bits) - 1);
+	u32 max  = 1 << vmm->pgt_bits;
+	u32 end, len, cardlen;
 	int i;
 	struct scatterlist *sg;
 
-	for_each_sg(mem->sg->sgl, sg, mem->sg->nents, i) {
-		struct nouveau_gpuobj *pgt = vm->pgt[pde].obj[big];
-		sglen = sg_dma_len(sg) >> PAGE_SHIFT;
+	/* We don't handle "big" pages here */
+	if (WARN_ON(shift != vmm->spg_shift || shift > PAGE_SHIFT))
+		return;
 
-		end = pte + sglen;
-		if (unlikely(end >= max))
-			end = max;
-		len = end - pte;
+	/* We dont' handle objects that aren't PAGE_SIZE aligned either */
+	if (WARN_ON((offset << 12) & ~PAGE_MASK))
+		return;
 
-		for (m = 0; m < len; m++) {
-			dma_addr_t addr = sg_dma_address(sg) + (m << PAGE_SHIFT);
+	/* Iterate sglist elements */
+	for_each_sg(mem->sg->sgl, sg, mem->sg->nents, i) {
+		struct nouveau_gpuobj *pgt = vm->pgt[pde].obj[0];
+		unsigned long m, sglen;
+		dma_addr_t addr;
 
-			vmm->map_sg(vma, pgt, mem, pte, 1, &addr);
-			num--;
-			pte++;
+		/* Number of system pages and base address */
+		sglen = sg_dma_len(sg) >> PAGE_SHIFT;
+		addr = sg_dma_address(sg);
+
+		/* Iterate over system pages in the segment and
+		 * covered PDEs
+		 */
+		while(sglen) {
+			/* Number of card PTEs */
+			cardlen = sglen << order;
+			end = pte + cardlen;
+			if (unlikely(end > max))
+				end = max;
+			cardlen = end - pte;
 
-			if (num == 0)
-				goto finish;
-		}
-		if (unlikely(end >= max)) {
-			pde++;
-			pte = 0;
-		}
-		if (m < sglen) {
-			for (; m < sglen; m++) {
-				dma_addr_t addr = sg_dma_address(sg) + (m << PAGE_SHIFT);
+			/* Convert back to system pages after cropping */
+			len = cardlen >> order;
+
+			/* Ensure this fits system pages */
+			if (WARN_ON((len << order) != cardlen))
+				break;
 
+			/* For each system page in the segment */
+			for (m = 0; m < len; m++) {
 				vmm->map_sg(vma, pgt, mem, pte, 1, &addr);
+				addr += PAGE_SIZE;
 				num--;
-				pte++;
+				pte += (1u << order);
 				if (num == 0)
 					goto finish;
 			}
-		}
+			sglen -= len;
 
+			/* Wrap to next PDE ? */
+			if (unlikely(end >= max)) {
+				pde++;
+				pte = 0;
+			}
+		}
 	}
 finish:
 	vmm->flush(vm);
@@ -143,28 +165,44 @@ nouveau_vm_map_sg(struct nouveau_vma *vma, u64 delta, u64
length,
 	struct nouveau_vm *vm = vma->vm;
 	struct nouveau_vmmgr *vmm = vm->vmm;
 	dma_addr_t *list = mem->pages;
-	int big = vma->node->type != vmm->spg_shift;
 	u32 offset = vma->node->offset + (delta >> 12);
-	u32 bits = vma->node->type - 12;
-	u32 num  = length >> vma->node->type;
+	u32 shift = vma->node->type;
+	u32 order = PAGE_SHIFT - shift;
+	u32 num  = length >> PAGE_SHIFT;
 	u32 pde  = (offset >> vmm->pgt_bits) - vm->fpde;
-	u32 pte  = (offset & ((1 << vmm->pgt_bits) - 1)) >> bits;
-	u32 max  = 1 << (vmm->pgt_bits - bits);
-	u32 end, len;
+	u32 pte  = offset & ((1 << vmm->pgt_bits) - 1);
+	u32 max  = 1 << vmm->pgt_bits;
+	u32 end, len, cardlen;
+
+	/* We don't handle "big" pages here */
+	if (WARN_ON(shift != vmm->spg_shift || shift > PAGE_SHIFT))
+		return;
+
+	/* We dont' handle objects that aren't PAGE_SIZE aligned either */
+	if (WARN_ON((offset << 12) & ~PAGE_MASK))
+		return;
 
 	while (num) {
-		struct nouveau_gpuobj *pgt = vm->pgt[pde].obj[big];
+		struct nouveau_gpuobj *pgt = vm->pgt[pde].obj[0];
 
-		end = (pte + num);
-		if (unlikely(end >= max))
+		/* "num" is remaining system pages, check how many fit
+		 * in the PDE
+		 */
+		end = (pte + (num << order));
+		if (unlikely(end > max))
 			end = max;
-		len = end - pte;
+		cardlen = end - pte;
+		len = cardlen >> order;
+
+		/* Ensure this fits system pages */
+		if (WARN_ON((len << order) != cardlen))
+			break;
 
 		vmm->map_sg(vma, pgt, mem, pte, len, list);
 
 		num  -= len;
-		pte  += len;
 		list += len;
+		pte  += cardlen;
 		if (unlikely(end >= max)) {
 			pde++;
 			pte = 0;
diff --git a/drivers/gpu/drm/nouveau/core/subdev/vm/nv04.c
b/drivers/gpu/drm/nouveau/core/subdev/vm/nv04.c
index ed45437..c1e7c11 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/vm/nv04.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/vm/nv04.c
@@ -38,14 +38,13 @@ nv04_vm_map_sg(struct nouveau_vma *vma, struct
nouveau_gpuobj *pgt,
 	       struct nouveau_mem *mem, u32 pte, u32 cnt, dma_addr_t *list)
 {
 	pte = 0x00008 + (pte * 4);
-	while (cnt) {
+	while (cnt--) {
 		u32 page = PAGE_SIZE / NV04_PDMA_PAGE;
 		u32 phys = (u32)*list++;
-		while (cnt && page--) {
+		while (page--) {
 			nv_wo32(pgt, pte, phys | 3);
 			phys += NV04_PDMA_PAGE;
 			pte += 4;
-			cnt -= 1;
 		}
 	}
 }
diff --git a/drivers/gpu/drm/nouveau/core/subdev/vm/nv41.c
b/drivers/gpu/drm/nouveau/core/subdev/vm/nv41.c
index 064c762..09570d7 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/vm/nv41.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/vm/nv41.c
@@ -42,14 +42,13 @@ nv41_vm_map_sg(struct nouveau_vma *vma, struct
nouveau_gpuobj *pgt,
 	       struct nouveau_mem *mem, u32 pte, u32 cnt, dma_addr_t *list)
 {
 	pte = pte * 4;
-	while (cnt) {
+	while (cnt--) {
 		u32 page = PAGE_SIZE / NV41_GART_PAGE;
 		u64 phys = (u64)*list++;
-		while (cnt && page--) {
+		while (page--) {
 			nv_wo32(pgt, pte, (phys >> 7) | 1);
 			phys += NV41_GART_PAGE;
 			pte += 4;
-			cnt -= 1;
 		}
 	}
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index c0fde6b..16dce89 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -178,7 +178,7 @@ nouveau_bo_fixup_align(struct nouveau_bo *nvbo, u32 flags,
 		*size = roundup(*size, (1 << nvbo->page_shift));
 		*align = max((1 <<  nvbo->page_shift), *align);
 	}
-
+	*align = roundup(*align, PAGE_SIZE);
 	*size = roundup(*size, PAGE_SIZE);
 }
 
@@ -221,7 +221,7 @@ nouveau_bo_new(struct drm_device *dev, int size, int align,
 	nvbo->page_shift = 12;
 	if (drm->client.base.vm) {
 		if (!(flags & TTM_PL_FLAG_TT) && size > 256 * 1024)
-			nvbo->page_shift = drm->client.base.vm->vmm->lpg_shift;
+			nvbo->page_shift = lpg_shift;
 	}
 
 	nouveau_bo_fixup_align(nvbo, flags, &align, &size);
diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c
b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
index 0843ebc..f255ff8 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
@@ -31,7 +31,7 @@ nv04_sgdma_bind(struct ttm_tt *ttm, struct ttm_mem_reg *mem)
 {
 	struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)ttm;
 	struct nouveau_mem *node = mem->mm_node;
-	u64 size = mem->num_pages << 12;
+	u64 size = mem->num_pages << PAGE_SHIFT;
 
 	if (ttm->sg) {
 		node->sg = ttm->sg;
diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c
b/drivers/gpu/drm/nouveau/nouveau_ttm.c
index 19e3757..b7fc456 100644
--- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
@@ -252,8 +252,9 @@ nv04_gart_manager_new(struct ttm_mem_type_manager *man,
 
 	node->page_shift = 12;
 
-	ret = nouveau_vm_get(man->priv, mem->num_pages << 12,
node->page_shift,
-			     NV_MEM_ACCESS_RW, &node->vma[0]);
+	ret = nouveau_vm_get(man->priv, mem->num_pages << PAGE_SHIFT,
+			     node->page_shift, NV_MEM_ACCESS_RW,
+			     &node->vma[0]);
 	if (ret) {
 		kfree(node);
 		return ret;

Reasonably Related Threads

Search for more reasonably related threads

Nouveau - Nov 2013 - Fixing nouveau for >4k PAGE_SIZE

[Nouveau] Fixing nouveau for >4k PAGE_SIZE

[Nouveau] Fixing nouveau for >4k PAGE_SIZE

[Nouveau] Fixing nouveau for >4k PAGE_SIZE

Reasonably Related Threads