Alexandre Courbot
2015-Sep-03 06:42 UTC
[Nouveau] [PATCH 0/3] New instmem implementation for Tegra
Due to the lack of implicit synchronization between CPU and GPU on Tegra systems (like what PCI provides for dGPUs), the instmem implementation of GK20A/GM20B relied on the slow, legacy PRAMIN so that CPU accesses used the same path as GPU, making sure we see the same data on both ends. The recent Nouveau refactoring introduced acquire/release functions on instmem that help us understand the bounds of accesses to instobjs. We can now use these to perform any needed synchronization, and use direct CPU accesses. For Tegra, provided that we use a write-combined CPU mapping, we just need to make sure the GPU L2 cache is flushed before we access instobjs and invalidated after. This patch series first introduces flush/invalidate hooks on LTC so we can manage the L2 ourselves, and an implementation for GF100+ GPUs. The last patch then changes GK20A's instmem implementation to use CPU mappings and manage the GPU L2 in the new acquire/release hooks. This results in a lot of L2 operations, but thankfully flushing/invalidating an empty L2 returns immediately, so the performance gain is largely positive. In the future it would be nice to move L2 management to a higher level though. Alexandre Courbot (3): ltc: add hooks for invalidate and flush ltc/gf100: add flush/invalidate functions instmem/gk20a: use direct CPU writes drm/nouveau/include/nvkm/subdev/ltc.h | 4 + drm/nouveau/nvkm/subdev/instmem/gk20a.c | 360 +++++++++++++++++++++++--------- drm/nouveau/nvkm/subdev/ltc/base.c | 14 ++ drm/nouveau/nvkm/subdev/ltc/gf100.c | 36 ++++ drm/nouveau/nvkm/subdev/ltc/gk104.c | 2 + drm/nouveau/nvkm/subdev/ltc/gm107.c | 2 + drm/nouveau/nvkm/subdev/ltc/priv.h | 5 + 7 files changed, 326 insertions(+), 97 deletions(-) -- 2.5.1
Alexandre Courbot
2015-Sep-03 06:42 UTC
[Nouveau] [PATCH 1/3] ltc: add hooks for invalidate and flush
These are useful for systems without a coherent CPU/GPU bus. For such systems we may need to maintain the L2 ourselves. Signed-off-by: Alexandre Courbot <acourbot at nvidia.com> --- drm/nouveau/include/nvkm/subdev/ltc.h | 3 +++ drm/nouveau/nvkm/subdev/ltc/base.c | 14 ++++++++++++++ drm/nouveau/nvkm/subdev/ltc/priv.h | 3 +++ 3 files changed, 20 insertions(+) diff --git a/drm/nouveau/include/nvkm/subdev/ltc.h b/drm/nouveau/include/nvkm/subdev/ltc.h index c773b5e958b4..5464fcf482f1 100644 --- a/drm/nouveau/include/nvkm/subdev/ltc.h +++ b/drm/nouveau/include/nvkm/subdev/ltc.h @@ -30,6 +30,9 @@ void nvkm_ltc_tags_clear(struct nvkm_ltc *, u32 first, u32 count); int nvkm_ltc_zbc_color_get(struct nvkm_ltc *, int index, const u32[4]); int nvkm_ltc_zbc_depth_get(struct nvkm_ltc *, int index, const u32); +void nvkm_ltc_invalidate(struct nvkm_ltc *); +void nvkm_ltc_flush(struct nvkm_ltc *); + int gf100_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **); int gk104_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **); int gm107_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **); diff --git a/drm/nouveau/nvkm/subdev/ltc/base.c b/drm/nouveau/nvkm/subdev/ltc/base.c index 930d25b6e63c..85b1464c0194 100644 --- a/drm/nouveau/nvkm/subdev/ltc/base.c +++ b/drm/nouveau/nvkm/subdev/ltc/base.c @@ -67,6 +67,20 @@ nvkm_ltc_zbc_depth_get(struct nvkm_ltc *ltc, int index, const u32 depth) return index; } +void +nvkm_ltc_invalidate(struct nvkm_ltc *ltc) +{ + if (ltc->func->invalidate) + ltc->func->invalidate(ltc); +} + +void +nvkm_ltc_flush(struct nvkm_ltc *ltc) +{ + if (ltc->func->flush) + ltc->func->flush(ltc); +} + static void nvkm_ltc_intr(struct nvkm_subdev *subdev) { diff --git a/drm/nouveau/nvkm/subdev/ltc/priv.h b/drm/nouveau/nvkm/subdev/ltc/priv.h index 4e05037cc99f..6f66bd03f829 100644 --- a/drm/nouveau/nvkm/subdev/ltc/priv.h +++ b/drm/nouveau/nvkm/subdev/ltc/priv.h @@ -17,6 +17,9 @@ struct nvkm_ltc_func { int zbc; void (*zbc_clear_color)(struct nvkm_ltc *, int, const u32[4]); void (*zbc_clear_depth)(struct nvkm_ltc *, int, const u32); + + void (*invalidate)(struct nvkm_ltc *); + void (*flush)(struct nvkm_ltc *); }; int gf100_ltc_oneinit(struct nvkm_ltc *); -- 2.5.1
Alexandre Courbot
2015-Sep-03 06:42 UTC
[Nouveau] [PATCH 2/3] ltc/gf100: add flush/invalidate functions
Allow clients to manually flush and invalidate L2. This will be useful for Tegra systems for which we want to write instmem using the CPU. Signed-off-by: Alexandre Courbot <acourbot at nvidia.com> --- drm/nouveau/include/nvkm/subdev/ltc.h | 1 + drm/nouveau/nvkm/subdev/ltc/gf100.c | 36 +++++++++++++++++++++++++++++++++++ drm/nouveau/nvkm/subdev/ltc/gk104.c | 2 ++ drm/nouveau/nvkm/subdev/ltc/gm107.c | 2 ++ drm/nouveau/nvkm/subdev/ltc/priv.h | 2 ++ 5 files changed, 43 insertions(+) diff --git a/drm/nouveau/include/nvkm/subdev/ltc.h b/drm/nouveau/include/nvkm/subdev/ltc.h index 5464fcf482f1..3d4dbbf9aab3 100644 --- a/drm/nouveau/include/nvkm/subdev/ltc.h +++ b/drm/nouveau/include/nvkm/subdev/ltc.h @@ -35,5 +35,6 @@ void nvkm_ltc_flush(struct nvkm_ltc *); int gf100_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **); int gk104_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **); +int gk20a_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **); int gm107_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **); #endif diff --git a/drm/nouveau/nvkm/subdev/ltc/gf100.c b/drm/nouveau/nvkm/subdev/ltc/gf100.c index 45ac765b753e..42ae77533865 100644 --- a/drm/nouveau/nvkm/subdev/ltc/gf100.c +++ b/drm/nouveau/nvkm/subdev/ltc/gf100.c @@ -122,6 +122,40 @@ gf100_ltc_intr(struct nvkm_ltc *ltc) } } +void +gf100_ltc_invalidate(struct nvkm_ltc *ltc) +{ + struct nvkm_device *device = ltc->subdev.device; + s64 taken; + + nvkm_wr32(device, 0x70004, 0x00000001); + if ((taken = nvkm_msec(device, 2000, + if ((nvkm_rd32(device, 0x70004) & 0x00000003) == 0x00000000) + break; + )) < 0) + nvkm_warn(<c->subdev, "L2 invalidate timeout\n"); + + if (taken > 0) + nvkm_debug(<c->subdev, "LTC invalidate took %lld ns\n", taken); +} + +void +gf100_ltc_flush(struct nvkm_ltc *ltc) +{ + struct nvkm_device *device = ltc->subdev.device; + s64 taken; + + nvkm_wr32(device, 0x70010, 0x00000001); + if ((taken = nvkm_msec(device, 2000, + if ((nvkm_rd32(device, 0x70010) & 0x00000003) == 0x00000000) + break; + )) < 0) + nvkm_warn(<c->subdev, "L2 flush timeout\n"); + + if (taken > 0) + nvkm_debug(<c->subdev, "LTC flush took %lld ns\n", taken); +} + /* TODO: Figure out tag memory details and drop the over-cautious allocation. */ int @@ -215,6 +249,8 @@ gf100_ltc = { .zbc = 16, .zbc_clear_color = gf100_ltc_zbc_clear_color, .zbc_clear_depth = gf100_ltc_zbc_clear_depth, + .invalidate = gf100_ltc_invalidate, + .flush = gf100_ltc_flush, }; int diff --git a/drm/nouveau/nvkm/subdev/ltc/gk104.c b/drm/nouveau/nvkm/subdev/ltc/gk104.c index 839e6b4c597b..b4f6e0034d58 100644 --- a/drm/nouveau/nvkm/subdev/ltc/gk104.c +++ b/drm/nouveau/nvkm/subdev/ltc/gk104.c @@ -45,6 +45,8 @@ gk104_ltc = { .zbc = 16, .zbc_clear_color = gf100_ltc_zbc_clear_color, .zbc_clear_depth = gf100_ltc_zbc_clear_depth, + .invalidate = gf100_ltc_invalidate, + .flush = gf100_ltc_flush, }; int diff --git a/drm/nouveau/nvkm/subdev/ltc/gm107.c b/drm/nouveau/nvkm/subdev/ltc/gm107.c index 389331bb63ba..3043bbfd7384 100644 --- a/drm/nouveau/nvkm/subdev/ltc/gm107.c +++ b/drm/nouveau/nvkm/subdev/ltc/gm107.c @@ -138,6 +138,8 @@ gm107_ltc = { .zbc = 16, .zbc_clear_color = gm107_ltc_zbc_clear_color, .zbc_clear_depth = gm107_ltc_zbc_clear_depth, + .invalidate = gf100_ltc_invalidate, + .flush = gf100_ltc_flush, }; int diff --git a/drm/nouveau/nvkm/subdev/ltc/priv.h b/drm/nouveau/nvkm/subdev/ltc/priv.h index 6f66bd03f829..4e3755b82769 100644 --- a/drm/nouveau/nvkm/subdev/ltc/priv.h +++ b/drm/nouveau/nvkm/subdev/ltc/priv.h @@ -29,4 +29,6 @@ void gf100_ltc_cbc_clear(struct nvkm_ltc *, u32, u32); void gf100_ltc_cbc_wait(struct nvkm_ltc *); void gf100_ltc_zbc_clear_color(struct nvkm_ltc *, int, const u32[4]); void gf100_ltc_zbc_clear_depth(struct nvkm_ltc *, int, const u32); +void gf100_ltc_invalidate(struct nvkm_ltc *); +void gf100_ltc_flush(struct nvkm_ltc *); #endif -- 2.5.1
Ben Skeggs
2015-Sep-03 07:09 UTC
[Nouveau] [PATCH 2/3] ltc/gf100: add flush/invalidate functions
On 3 September 2015 at 16:42, Alexandre Courbot <acourbot at nvidia.com> wrote:> Allow clients to manually flush and invalidate L2. This will be useful > for Tegra systems for which we want to write instmem using the CPU. > > Signed-off-by: Alexandre Courbot <acourbot at nvidia.com> > --- > drm/nouveau/include/nvkm/subdev/ltc.h | 1 + > drm/nouveau/nvkm/subdev/ltc/gf100.c | 36 +++++++++++++++++++++++++++++++++++ > drm/nouveau/nvkm/subdev/ltc/gk104.c | 2 ++ > drm/nouveau/nvkm/subdev/ltc/gm107.c | 2 ++ > drm/nouveau/nvkm/subdev/ltc/priv.h | 2 ++ > 5 files changed, 43 insertions(+) > > diff --git a/drm/nouveau/include/nvkm/subdev/ltc.h b/drm/nouveau/include/nvkm/subdev/ltc.h > index 5464fcf482f1..3d4dbbf9aab3 100644 > --- a/drm/nouveau/include/nvkm/subdev/ltc.h > +++ b/drm/nouveau/include/nvkm/subdev/ltc.h > @@ -35,5 +35,6 @@ void nvkm_ltc_flush(struct nvkm_ltc *); > > int gf100_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **); > int gk104_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **); > +int gk20a_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **); > int gm107_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **); > #endif > diff --git a/drm/nouveau/nvkm/subdev/ltc/gf100.c b/drm/nouveau/nvkm/subdev/ltc/gf100.c > index 45ac765b753e..42ae77533865 100644 > --- a/drm/nouveau/nvkm/subdev/ltc/gf100.c > +++ b/drm/nouveau/nvkm/subdev/ltc/gf100.c > @@ -122,6 +122,40 @@ gf100_ltc_intr(struct nvkm_ltc *ltc) > } > } > > +void > +gf100_ltc_invalidate(struct nvkm_ltc *ltc) > +{ > + struct nvkm_device *device = ltc->subdev.device; > + s64 taken; > + > + nvkm_wr32(device, 0x70004, 0x00000001); > + if ((taken = nvkm_msec(device, 2000,I don't suppose you have access to information on more realistic timeouts? I'd like to improve all the potential 2s timeout values across the driver in general, to avoid things hanging for a long time when the GPU craps itself :)> + if ((nvkm_rd32(device, 0x70004) & 0x00000003) == 0x00000000) > + break; > + )) < 0) > + nvkm_warn(<c->subdev, "L2 invalidate timeout\n"); > + > + if (taken > 0) > + nvkm_debug(<c->subdev, "LTC invalidate took %lld ns\n", taken); > +} > + > +void > +gf100_ltc_flush(struct nvkm_ltc *ltc) > +{ > + struct nvkm_device *device = ltc->subdev.device; > + s64 taken; > + > + nvkm_wr32(device, 0x70010, 0x00000001); > + if ((taken = nvkm_msec(device, 2000, > + if ((nvkm_rd32(device, 0x70010) & 0x00000003) == 0x00000000) > + break; > + )) < 0) > + nvkm_warn(<c->subdev, "L2 flush timeout\n"); > + > + if (taken > 0) > + nvkm_debug(<c->subdev, "LTC flush took %lld ns\n", taken); > +} > + > /* TODO: Figure out tag memory details and drop the over-cautious allocation. > */ > int > @@ -215,6 +249,8 @@ gf100_ltc = { > .zbc = 16, > .zbc_clear_color = gf100_ltc_zbc_clear_color, > .zbc_clear_depth = gf100_ltc_zbc_clear_depth, > + .invalidate = gf100_ltc_invalidate, > + .flush = gf100_ltc_flush, > }; > > int > diff --git a/drm/nouveau/nvkm/subdev/ltc/gk104.c b/drm/nouveau/nvkm/subdev/ltc/gk104.c > index 839e6b4c597b..b4f6e0034d58 100644 > --- a/drm/nouveau/nvkm/subdev/ltc/gk104.c > +++ b/drm/nouveau/nvkm/subdev/ltc/gk104.c > @@ -45,6 +45,8 @@ gk104_ltc = { > .zbc = 16, > .zbc_clear_color = gf100_ltc_zbc_clear_color, > .zbc_clear_depth = gf100_ltc_zbc_clear_depth, > + .invalidate = gf100_ltc_invalidate, > + .flush = gf100_ltc_flush, > }; > > int > diff --git a/drm/nouveau/nvkm/subdev/ltc/gm107.c b/drm/nouveau/nvkm/subdev/ltc/gm107.c > index 389331bb63ba..3043bbfd7384 100644 > --- a/drm/nouveau/nvkm/subdev/ltc/gm107.c > +++ b/drm/nouveau/nvkm/subdev/ltc/gm107.c > @@ -138,6 +138,8 @@ gm107_ltc = { > .zbc = 16, > .zbc_clear_color = gm107_ltc_zbc_clear_color, > .zbc_clear_depth = gm107_ltc_zbc_clear_depth, > + .invalidate = gf100_ltc_invalidate, > + .flush = gf100_ltc_flush, > }; > > int > diff --git a/drm/nouveau/nvkm/subdev/ltc/priv.h b/drm/nouveau/nvkm/subdev/ltc/priv.h > index 6f66bd03f829..4e3755b82769 100644 > --- a/drm/nouveau/nvkm/subdev/ltc/priv.h > +++ b/drm/nouveau/nvkm/subdev/ltc/priv.h > @@ -29,4 +29,6 @@ void gf100_ltc_cbc_clear(struct nvkm_ltc *, u32, u32); > void gf100_ltc_cbc_wait(struct nvkm_ltc *); > void gf100_ltc_zbc_clear_color(struct nvkm_ltc *, int, const u32[4]); > void gf100_ltc_zbc_clear_depth(struct nvkm_ltc *, int, const u32); > +void gf100_ltc_invalidate(struct nvkm_ltc *); > +void gf100_ltc_flush(struct nvkm_ltc *); > #endif > -- > 2.5.1 > > _______________________________________________ > Nouveau mailing list > Nouveau at lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/nouveau
Possibly Parallel Threads
- [PATCH 0/3] New instmem implementation for Tegra
- [PATCH 2/3] ltc/gf100: add flush/invalidate functions
- [PATCH] ltc/gf100: use more reasonable timeout value
- [PATCH] instmem/gk20a: exclusively acquire instobjs
- [PATCH] drm/nouveau: gk20a: Turn instmem lock into mutex