Patch 1 and 2 implement wait-for-vblank, required to remove flicker when reclocking memory Patch 3 and 4 allow me to do things between waiting for VBLANK and disabling FB, like pause PFIFO and wait for the engines to idle. This minimises the time PFIFO is paused, thus maximises performance. The rest of the patches speak for themselves. As the actual memory reclocking script is still somewhat prone to changes, this will follow later. Please review and merge if positive! Thanks, Roy
Roy Spliet
2014-Sep-04 14:58 UTC
[Nouveau] [PATCH 1/8] nv50/display: Set VBLANK time in modeset script
Solves blinking on reclocking memory. The value set is an underestimate, but with non-reduced vblanking this should give us plenty of time Signed-off-by: Roy Spliet <rspliet at eclipso.eu> --- drivers/gpu/drm/nouveau/nv50_display.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c index 4c534b7..cfa7ecf 100644 --- a/drivers/gpu/drm/nouveau/nv50_display.c +++ b/drivers/gpu/drm/nouveau/nv50_display.c @@ -1070,7 +1070,7 @@ nv50_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *umode, u32 vscan = (mode->flags & DRM_MODE_FLAG_DBLSCAN) ? 2 : 1; u32 hactive, hsynce, hbackp, hfrontp, hblanke, hblanks; u32 vactive, vsynce, vbackp, vfrontp, vblanke, vblanks; - u32 vblan2e = 0, vblan2s = 1; + u32 vblan2e = 0, vblan2s = 1, vblankus; u32 *push; int ret; @@ -1087,6 +1087,7 @@ nv50_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *umode, vblanke = vsynce + vbackp; vfrontp = (mode->vsync_start - mode->vdisplay) * vscan / ilace; vblanks = vactive - vfrontp - 1; + if (mode->flags & DRM_MODE_FLAG_INTERLACE) { vblan2e = vactive + vsynce + vbackp; vblan2s = vblan2e + (mode->vdisplay * vscan / ilace); @@ -1100,17 +1101,22 @@ nv50_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *umode, push = evo_wait(mast, 64); if (push) { if (nv50_vers(mast) < NVD0_DISP_MAST_CLASS) { + /* XXX: Safe underestimate, even "0" works */ + vblankus = (vactive - mode->vdisplay - 2) * hactive; + vblankus *= 1000; + vblankus /= mode->clock; + evo_mthd(push, 0x0804 + (nv_crtc->index * 0x400), 2); evo_data(push, 0x00800000 | mode->clock); evo_data(push, (ilace == 2) ? 2 : 0); - evo_mthd(push, 0x0810 + (nv_crtc->index * 0x400), 6); + evo_mthd(push, 0x0810 + (nv_crtc->index * 0x400), 8); evo_data(push, 0x00000000); evo_data(push, (vactive << 16) | hactive); evo_data(push, ( vsynce << 16) | hsynce); evo_data(push, (vblanke << 16) | hblanke); evo_data(push, (vblanks << 16) | hblanks); evo_data(push, (vblan2e << 16) | vblan2s); - evo_mthd(push, 0x082c + (nv_crtc->index * 0x400), 1); + evo_data(push, vblankus); evo_data(push, 0x00000000); evo_mthd(push, 0x0900 + (nv_crtc->index * 0x400), 2); evo_data(push, 0x00000311); -- 1.9.3
Roy Spliet
2014-Sep-04 14:58 UTC
[Nouveau] [PATCH 2/8] pwr/memx/nva3: Implement "wait for VBLANK"
Signed-off-by: Roy Spliet <rspliet at eclipso.eu> --- drivers/gpu/drm/nouveau/core/include/subdev/pwr.h | 1 + drivers/gpu/drm/nouveau/core/subdev/fb/ramfuc.h | 25 +++++---- .../gpu/drm/nouveau/core/subdev/pwr/fuc/memx.fuc | 60 ++++++++++++++++++++-- drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/os.h | 1 + drivers/gpu/drm/nouveau/core/subdev/pwr/memx.c | 35 ++++++++++++- 5 files changed, 107 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/pwr.h b/drivers/gpu/drm/nouveau/core/include/subdev/pwr.h index c5c92cb..b5286b3 100644 --- a/drivers/gpu/drm/nouveau/core/include/subdev/pwr.h +++ b/drivers/gpu/drm/nouveau/core/include/subdev/pwr.h @@ -76,5 +76,6 @@ void nouveau_memx_wr32(struct nouveau_memx *, u32 addr, u32 data); void nouveau_memx_wait(struct nouveau_memx *, u32 addr, u32 mask, u32 data, u32 nsec); void nouveau_memx_nsec(struct nouveau_memx *, u32 nsec); +void nouveau_memx_wait_vblank(struct nouveau_memx *); #endif diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/ramfuc.h b/drivers/gpu/drm/nouveau/core/subdev/fb/ramfuc.h index 2af9cfd..76290bb 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/fb/ramfuc.h +++ b/drivers/gpu/drm/nouveau/core/subdev/fb/ramfuc.h @@ -105,14 +105,21 @@ ramfuc_nsec(struct ramfuc *ram, u32 nsec) nouveau_memx_nsec(ram->memx, nsec); } -#define ram_init(s,p) ramfuc_init(&(s)->base, (p)) -#define ram_exec(s,e) ramfuc_exec(&(s)->base, (e)) -#define ram_have(s,r) ((s)->r_##r.addr[0] != 0x000000) -#define ram_rd32(s,r) ramfuc_rd32(&(s)->base, &(s)->r_##r) -#define ram_wr32(s,r,d) ramfuc_wr32(&(s)->base, &(s)->r_##r, (d)) -#define ram_nuke(s,r) ramfuc_nuke(&(s)->base, &(s)->r_##r) -#define ram_mask(s,r,m,d) ramfuc_mask(&(s)->base, &(s)->r_##r, (m), (d)) -#define ram_wait(s,r,m,d,n) ramfuc_wait(&(s)->base, (r), (m), (d), (n)) -#define ram_nsec(s,n) ramfuc_nsec(&(s)->base, (n)) +static inline void +ramfuc_wait_vblank(struct ramfuc *ram) +{ + nouveau_memx_wait_vblank(ram->memx); +} + +#define ram_init(s,p) ramfuc_init(&(s)->base, (p)) +#define ram_exec(s,e) ramfuc_exec(&(s)->base, (e)) +#define ram_have(s,r) ((s)->r_##r.addr[0] != 0x000000) +#define ram_rd32(s,r) ramfuc_rd32(&(s)->base, &(s)->r_##r) +#define ram_wr32(s,r,d) ramfuc_wr32(&(s)->base, &(s)->r_##r, (d)) +#define ram_nuke(s,r) ramfuc_nuke(&(s)->base, &(s)->r_##r) +#define ram_mask(s,r,m,d) ramfuc_mask(&(s)->base, &(s)->r_##r, (m), (d)) +#define ram_wait(s,r,m,d,n) ramfuc_wait(&(s)->base, (r), (m), (d), (n)) +#define ram_nsec(s,n) ramfuc_nsec(&(s)->base, (n)) +#define ram_wait_vblank(s) ramfuc_wait_vblank(&(s)->base) #endif diff --git a/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/memx.fuc b/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/memx.fuc index d43741e..228ee0d 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/memx.fuc +++ b/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/memx.fuc @@ -43,12 +43,13 @@ process(PROC_MEMX, #memx_init, #memx_recv) */ .b32 func memx_func_head: -handler(ENTER , 0x0001, 0x0000, #memx_func_enter) +handler(ENTER , 0x0000, 0x0000, #memx_func_enter) memx_func_next: handler(LEAVE , 0x0000, 0x0000, #memx_func_leave) handler(WR32 , 0x0000, 0x0002, #memx_func_wr32) handler(WAIT , 0x0004, 0x0000, #memx_func_wait) handler(DELAY , 0x0001, 0x0000, #memx_func_delay) +handler(VBLANK, 0x0001, 0x0000, #memx_func_wait_vblank) memx_func_tail: .equ #memx_func_size #memx_func_next - #memx_func_head @@ -67,7 +68,6 @@ memx_data_tail: // // $r15 - current (memx) // $r4 - packet length -// +00: bitmask of heads to wait for vblank on // $r3 - opcode desciption // $r0 - zero memx_func_enter: @@ -77,9 +77,7 @@ memx_func_enter: nv_iord($r6, NV_PPWR_OUTPUT) and $r6 NV_PPWR_OUTPUT_FB_PAUSE bra z #memx_func_enter_wait - //XXX: TODO - ld b32 $r6 D[$r1 + 0x00] - add b32 $r1 0x04 + ret // description @@ -97,6 +95,58 @@ memx_func_leave: bra nz #memx_func_leave_wait ret +#if NVKM_PPWR_CHIPSET < GF119 +// description +// +// $r15 - current (memx) +// $r4 - packet length +// +00: head to wait for vblank on +// $r3 - opcode desciption +// $r0 - zero +memx_func_wait_vblank: + ld b32 $r6 D[$r1 + 0x00] + cmp b32 $r6 0x0 + bra z #memx_func_wait_vblank_head0 + cmp b32 $r6 0x1 + bra z #memx_func_wait_vblank_head1 + bra #memx_func_wait_vblank_fini + + memx_func_wait_vblank_head1: + movw $r7 0x20 + bra #memx_func_wait_vblank_0 + + memx_func_wait_vblank_head0: + movw $r7 0x8 + + memx_func_wait_vblank_0: + nv_iord($r6, NV_PPWR_INPUT) + and $r6 $r7 + bra nz #memx_func_wait_vblank_0 + + memx_func_wait_vblank_1: + nv_iord($r6, NV_PPWR_INPUT) + and $r6 $r7 + bra z #memx_func_wait_vblank_1 + + memx_func_wait_vblank_fini: + add b32 $r1 0x4 + ret + +#else + +// XXX: currently no-op +// +// $r15 - current (memx) +// $r4 - packet length +// +00: head to wait for vblank on +// $r3 - opcode desciption +// $r0 - zero +memx_func_wait_vblank: + add b32 $r1 0x4 + ret + +#endif + // description // // $r15 - current (memx) diff --git a/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/os.h b/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/os.h index 574acfa..80f8328 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/os.h +++ b/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/os.h @@ -24,6 +24,7 @@ #define MEMX_WR32 2 #define MEMX_WAIT 3 #define MEMX_DELAY 4 +#define MEMX_VBLANK 5 /* I2C_: message identifiers */ #define I2C__MSG_RD08 0 diff --git a/drivers/gpu/drm/nouveau/core/subdev/pwr/memx.c b/drivers/gpu/drm/nouveau/core/subdev/pwr/memx.c index 03de310..bcdc00f 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/pwr/memx.c +++ b/drivers/gpu/drm/nouveau/core/subdev/pwr/memx.c @@ -64,7 +64,7 @@ nouveau_memx_init(struct nouveau_pwr *ppwr, struct nouveau_memx **pmemx) } while (nv_rd32(ppwr, 0x10a580) != 0x00000003); nv_wr32(ppwr, 0x10a1c0, 0x01000000 | memx->base); nv_wr32(ppwr, 0x10a1c4, 0x00010000 | MEMX_ENTER); - nv_wr32(ppwr, 0x10a1c4, 0x00000000); + return 0; } @@ -118,4 +118,37 @@ nouveau_memx_nsec(struct nouveau_memx *memx, u32 nsec) memx_out(memx); /* fuc can't handle multiple */ } +void +nouveau_memx_wait_vblank(struct nouveau_memx *memx) +{ + struct nouveau_pwr *ppwr = memx->ppwr; + u32 heads, x, y, px = 0; + int i, head_sync; + + if (nv_device(ppwr)->chipset < 0xd0) { + heads = nv_rd32(ppwr, 0x610050); + for (i = 0; i < 2; i++) { + /* Heuristic: sync to head with biggest resolution */ + if (heads & (2 << (i << 3))) { + x = nv_rd32(ppwr, 0x610b40 + (0x540 * i)); + y = (x & 0xffff0000) >> 16; + x &= 0x0000ffff; + if ((x * y) > px) { + px = (x * y); + head_sync = i; + } + } + } + } + + if (px == 0) { + nv_debug(memx->ppwr, "WAIT VBLANK !NO ACTIVE HEAD\n"); + return; + } + + nv_debug(memx->ppwr, "WAIT VBLANK HEAD%d\n", head_sync); + memx_cmd(memx, MEMX_VBLANK, 1, (u32[]){ head_sync }); + memx_out(memx); /* fuc can't handle multiple */ +} + #endif -- 1.9.3
Roy Spliet
2014-Sep-04 14:58 UTC
[Nouveau] [PATCH 3/8] pwr/memx: Make FB disable and enable explicit
Needs to be done after wait-for-VBLANK, and NVA3 requires register writes in between. Rather than hard-coding register writes, just split out fb_disable and fb_enable. Signed-off-by: Roy Spliet <rspliet at eclipso.eu> --- drivers/gpu/drm/nouveau/core/include/subdev/pwr.h | 2 ++ drivers/gpu/drm/nouveau/core/subdev/fb/ramfuc.h | 14 ++++++++++++++ drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/memx.fuc | 4 ++-- drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/os.h | 4 ++-- drivers/gpu/drm/nouveau/core/subdev/pwr/memx.c | 20 ++++++++++++++++++-- 5 files changed, 38 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/pwr.h b/drivers/gpu/drm/nouveau/core/include/subdev/pwr.h index b5286b3..94f8629 100644 --- a/drivers/gpu/drm/nouveau/core/include/subdev/pwr.h +++ b/drivers/gpu/drm/nouveau/core/include/subdev/pwr.h @@ -77,5 +77,7 @@ void nouveau_memx_wait(struct nouveau_memx *, u32 addr, u32 mask, u32 data, u32 nsec); void nouveau_memx_nsec(struct nouveau_memx *, u32 nsec); void nouveau_memx_wait_vblank(struct nouveau_memx *); +void nouveau_memx_fb_disable(struct nouveau_memx *); +void nouveau_memx_fb_enable(struct nouveau_memx *); #endif diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/ramfuc.h b/drivers/gpu/drm/nouveau/core/subdev/fb/ramfuc.h index 76290bb..430261a 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/fb/ramfuc.h +++ b/drivers/gpu/drm/nouveau/core/subdev/fb/ramfuc.h @@ -111,6 +111,18 @@ ramfuc_wait_vblank(struct ramfuc *ram) nouveau_memx_wait_vblank(ram->memx); } +static inline void +ramfuc_fb_disable(struct ramfuc *ram) +{ + nouveau_memx_fb_disable(ram->memx); +} + +static inline void +ramfuc_fb_enable(struct ramfuc *ram) +{ + nouveau_memx_fb_enable(ram->memx); +} + #define ram_init(s,p) ramfuc_init(&(s)->base, (p)) #define ram_exec(s,e) ramfuc_exec(&(s)->base, (e)) #define ram_have(s,r) ((s)->r_##r.addr[0] != 0x000000) @@ -121,5 +133,7 @@ ramfuc_wait_vblank(struct ramfuc *ram) #define ram_wait(s,r,m,d,n) ramfuc_wait(&(s)->base, (r), (m), (d), (n)) #define ram_nsec(s,n) ramfuc_nsec(&(s)->base, (n)) #define ram_wait_vblank(s) ramfuc_wait_vblank(&(s)->base) +#define ram_fb_disable(s) ramfuc_fb_disable(&(s)->base) +#define ram_fb_enable(s) ramfuc_fb_enable(&(s)->base) #endif diff --git a/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/memx.fuc b/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/memx.fuc index 228ee0d..9f2f57c 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/memx.fuc +++ b/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/memx.fuc @@ -43,9 +43,9 @@ process(PROC_MEMX, #memx_init, #memx_recv) */ .b32 func memx_func_head: -handler(ENTER , 0x0000, 0x0000, #memx_func_enter) +handler(FB_OFF, 0x0000, 0x0000, #memx_func_enter) memx_func_next: -handler(LEAVE , 0x0000, 0x0000, #memx_func_leave) +handler(FB_ON , 0x0000, 0x0000, #memx_func_leave) handler(WR32 , 0x0000, 0x0002, #memx_func_wr32) handler(WAIT , 0x0004, 0x0000, #memx_func_wait) handler(DELAY , 0x0001, 0x0000, #memx_func_delay) diff --git a/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/os.h b/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/os.h index 80f8328..50f9a38 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/os.h +++ b/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/os.h @@ -19,8 +19,8 @@ #define MEMX_MSG_EXEC 1 /* MEMX: script opcode definitions */ -#define MEMX_ENTER 0 -#define MEMX_LEAVE 1 +#define MEMX_FB_OFF 0 +#define MEMX_FB_ON 1 #define MEMX_WR32 2 #define MEMX_WAIT 3 #define MEMX_DELAY 4 diff --git a/drivers/gpu/drm/nouveau/core/subdev/pwr/memx.c b/drivers/gpu/drm/nouveau/core/subdev/pwr/memx.c index bcdc00f..060bbd2 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/pwr/memx.c +++ b/drivers/gpu/drm/nouveau/core/subdev/pwr/memx.c @@ -63,7 +63,6 @@ nouveau_memx_init(struct nouveau_pwr *ppwr, struct nouveau_memx **pmemx) nv_wr32(ppwr, 0x10a580, 0x00000003); } while (nv_rd32(ppwr, 0x10a580) != 0x00000003); nv_wr32(ppwr, 0x10a1c0, 0x01000000 | memx->base); - nv_wr32(ppwr, 0x10a1c4, 0x00010000 | MEMX_ENTER); return 0; } @@ -79,7 +78,6 @@ nouveau_memx_fini(struct nouveau_memx **pmemx, bool exec) memx_out(memx); /* release data segment access */ - nv_wr32(ppwr, 0x10a1c4, 0x00000000 | MEMX_LEAVE); finish = nv_rd32(ppwr, 0x10a1c0) & 0x00ffffff; nv_wr32(ppwr, 0x10a580, 0x00000000); @@ -151,4 +149,22 @@ nouveau_memx_wait_vblank(struct nouveau_memx *memx) memx_out(memx); /* fuc can't handle multiple */ } +void +nouveau_memx_fb_disable(struct nouveau_memx *memx) +{ + struct nouveau_pwr *ppwr = memx->ppwr; + + nv_debug(memx->ppwr, " FB OFF\n"); + nv_wr32(ppwr, 0x10a1c4, MEMX_FB_OFF); +} + +void +nouveau_memx_fb_enable(struct nouveau_memx *memx) +{ + struct nouveau_pwr *ppwr = memx->ppwr; + + nv_debug(memx->ppwr, " FB ON\n"); + nv_wr32(ppwr, 0x10a1c4, MEMX_FB_ON); +} + #endif -- 1.9.3
Roy Spliet
2014-Sep-04 14:58 UTC
[Nouveau] [PATCH 4/8] fb/ramnve0: Disable FB before reclocking
This used to be done implicitly Signed-off-by: Roy Spliet <rspliet at eclipso.eu> --- drivers/gpu/drm/nouveau/core/subdev/fb/ramnve0.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnve0.c b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnve0.c index c5b46e3..9764792 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnve0.c +++ b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnve0.c @@ -998,6 +998,8 @@ nve0_ram_calc_xits(struct nouveau_fb *pfb, struct nouveau_ram_data *next) if (ret) return ret; + ram_fb_disable(fuc); + ram->mode = (next->freq > fuc->refpll.vco1.max_freq) ? 2 : 1; ram->from = ram_rd32(fuc, 0x1373f4) & 0x0000000f; @@ -1061,6 +1063,9 @@ nve0_ram_calc_xits(struct nouveau_fb *pfb, struct nouveau_ram_data *next) break; } + if (!ret) + ram_fb_enable(fuc); + return ret; } -- 1.9.3
Roy Spliet
2014-Sep-04 14:58 UTC
[Nouveau] [PATCH 5/8] pwr/memx: Return debugging information
Time measured from disabling FB to re-enabling, PPWR_IN reveals status of heads at the end of script. Helps debug various issues (like flicker). Signed-off-by: Roy Spliet <rspliet at eclipso.eu> --- drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/memx.fuc | 16 ++++++++++++++++ drivers/gpu/drm/nouveau/core/subdev/pwr/memx.c | 2 ++ 2 files changed, 18 insertions(+) diff --git a/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/memx.fuc b/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/memx.fuc index 9f2f57c..989fdc1 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/memx.fuc +++ b/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/memx.fuc @@ -55,6 +55,11 @@ memx_func_tail: .equ #memx_func_size #memx_func_next - #memx_func_head .equ #memx_func_num (#memx_func_tail - #memx_func_head) / #memx_func_size +memx_ts_start: +.b32 0 +memx_ts_end: +.b32 0 + memx_data_head: .skip 0x0800 memx_data_tail: @@ -78,6 +83,9 @@ memx_func_enter: and $r6 NV_PPWR_OUTPUT_FB_PAUSE bra z #memx_func_enter_wait + nv_iord($r6, NV_PPWR_TIMER_LOW) + st b32 D[$r0 + #memx_ts_start] $r6 + ret // description @@ -87,6 +95,9 @@ memx_func_enter: // $r3 - opcode desciption // $r0 - zero memx_func_leave: + nv_iord($r6, NV_PPWR_TIMER_LOW) + st b32 D[$r0 + #memx_ts_end] $r6 + mov $r6 NV_PPWR_OUTPUT_CLR_FB_PAUSE nv_iowr(NV_PPWR_OUTPUT_CLR, $r6) memx_func_leave_wait: @@ -210,6 +221,7 @@ memx_exec: push $r13 mov b32 $r1 $r12 mov b32 $r2 $r11 + memx_exec_next: // fetch the packet header, and locate opcode info ld b32 $r3 D[$r1] @@ -226,6 +238,10 @@ memx_exec: bra l #memx_exec_next // send completion reply + ld b32 $r11 D[$r0 + #memx_ts_start] + ld b32 $r12 D[$r0 + #memx_ts_end] + sub b32 $r12 $r11 + nv_iord($r11, NV_PPWR_INPUT) pop $r13 pop $r14 call(send) diff --git a/drivers/gpu/drm/nouveau/core/subdev/pwr/memx.c b/drivers/gpu/drm/nouveau/core/subdev/pwr/memx.c index 060bbd2..c20669b9e 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/pwr/memx.c +++ b/drivers/gpu/drm/nouveau/core/subdev/pwr/memx.c @@ -87,6 +87,8 @@ nouveau_memx_fini(struct nouveau_memx **pmemx, bool exec) memx->base, finish); } + nv_debug(memx->ppwr, "Exec took %uns, PPWR_IN %08x", reply[0], reply[1]); + kfree(memx); return 0; } -- 1.9.3
Signed-off-by: Roy Spliet <rspliet at eclipso.eu> --- drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/memx.fuc | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/memx.fuc b/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/memx.fuc index 989fdc1..744f2e9 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/memx.fuc +++ b/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/memx.fuc @@ -76,6 +76,16 @@ memx_data_tail: // $r3 - opcode desciption // $r0 - zero memx_func_enter: +#if NVKM_PPWR_CHIPSET == GT215 + movw $r8 0x1610 + nv_rd32($r7, $r8) + imm32($r6, 0xfffffffc) + and $r7 $r6 + movw $r6 0x2 + or $r7 $r6 + nv_wr32($r8, $r7) +#endif + mov $r6 NV_PPWR_OUTPUT_SET_FB_PAUSE nv_iowr(NV_PPWR_OUTPUT_SET, $r6) memx_func_enter_wait: @@ -104,6 +114,15 @@ memx_func_leave: nv_iord($r6, NV_PPWR_OUTPUT) and $r6 NV_PPWR_OUTPUT_FB_PAUSE bra nz #memx_func_leave_wait + +#if NVKM_PPWR_CHIPSET == GT215 + movw $r8 0x1610 + nv_rd32($r7, $r8) + imm32($r6, 0xffffffcc) + and $r7 $r6 + nv_wr32($r8, $r7) +#endif + ret #if NVKM_PPWR_CHIPSET < GF119 -- 1.9.3
Signed-off-by: Roy Spliet <rspliet at eclipso.eu> --- drivers/gpu/drm/nouveau/core/subdev/fb/sddr3.c | 43 ++++++++++++++++++++------ 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/sddr3.c b/drivers/gpu/drm/nouveau/core/subdev/fb/sddr3.c index ebd4cd9..f84c654 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/fb/sddr3.c +++ b/drivers/gpu/drm/nouveau/core/subdev/fb/sddr3.c @@ -20,6 +20,7 @@ * OTHER DEALINGS IN THE SOFTWARE. * * Authors: Ben Skeggs <bskeggs at redhat.com> + * Roy Spliet <rspliet at eclipso.eu> */ #include <subdev/bios.h> @@ -70,30 +71,52 @@ int nouveau_sddr3_calc(struct nouveau_ram *ram) { struct nouveau_bios *bios = nouveau_bios(ram); - int WL, CL, WR; + int CWL, CL, WR, DLL = 0, ODT = 0; switch (!!ram->timing.data * ram->timing.version) { + case 0x10: + if (ram->timing.size < 0x17) { + /* XXX: NV50: Get CWL from the timing register */ + return -ENOSYS; + } + CWL = nv_ro08(bios, ram->timing.data + 0x13); + CL = nv_ro08(bios, ram->timing.data + 0x02); + WR = nv_ro08(bios, ram->timing.data + 0x00); + DLL = !(nv_ro08(bios, ram->ramcfg.data + 0x02) & 0x40); + ODT = nv_ro08(bios, ram->timing.data + 0x0e) & 0x07; + break; case 0x20: - WL = (nv_ro16(bios, ram->timing.data + 0x04) & 0x0f80) >> 7; - CL = nv_ro08(bios, ram->timing.data + 0x04) & 0x1f; - WR = nv_ro08(bios, ram->timing.data + 0x0a) & 0x7f; + CWL = (nv_ro16(bios, ram->timing.data + 0x04) & 0x0f80) >> 7; + CL = nv_ro08(bios, ram->timing.data + 0x04) & 0x1f; + WR = nv_ro08(bios, ram->timing.data + 0x0a) & 0x7f; + /* XXX: Get these values from the VBIOS instead */ + DLL = !(ram->mr[1] & 0x1); + ODT = (ram->mr[1] & 0x004) >> 2 | + (ram->mr[1] & 0x040) >> 5 | + (ram->mr[1] & 0x200) >> 7; break; default: return -ENOSYS; } - WL = ramxlat(ramddr3_cwl, WL); - CL = ramxlat(ramddr3_cl, CL); - WR = ramxlat(ramddr3_wr, WR); - if (WL < 0 || CL < 0 || WR < 0) + CWL = ramxlat(ramddr3_cwl, CWL); + CL = ramxlat(ramddr3_cl, CL); + WR = ramxlat(ramddr3_wr, WR); + if (CL < 0 || CWL < 0 || WR < 0) return -EINVAL; - ram->mr[0] &= ~0xe74; + ram->mr[0] &= ~0xf74; ram->mr[0] |= (WR & 0x07) << 9; ram->mr[0] |= (CL & 0x0e) << 3; ram->mr[0] |= (CL & 0x01) << 2; + ram->mr[1] &= ~0x245; + ram->mr[1] |= (ODT & 0x1) << 2; + ram->mr[1] |= (ODT & 0x2) << 5; + ram->mr[1] |= (ODT & 0x4) << 7; + ram->mr[1] |= !DLL; + ram->mr[2] &= ~0x038; - ram->mr[2] |= (WL & 0x07) << 3; + ram->mr[2] |= (CWL & 0x07) << 3; return 0; } -- 1.9.3
Signed-off-by: Roy Spliet <rspliet at eclipso.eu> --- drivers/gpu/drm/nouveau/Makefile | 1 + drivers/gpu/drm/nouveau/core/subdev/fb/priv.h | 1 + drivers/gpu/drm/nouveau/core/subdev/fb/sddr2.c | 96 ++++++++++++++++++++++++++ 3 files changed, 98 insertions(+) create mode 100644 drivers/gpu/drm/nouveau/core/subdev/fb/sddr2.c diff --git a/drivers/gpu/drm/nouveau/Makefile b/drivers/gpu/drm/nouveau/Makefile index 8b307e1..8228e05 100644 --- a/drivers/gpu/drm/nouveau/Makefile +++ b/drivers/gpu/drm/nouveau/Makefile @@ -120,6 +120,7 @@ nouveau-y += core/subdev/fb/ramnvc0.o nouveau-y += core/subdev/fb/ramnve0.o nouveau-y += core/subdev/fb/ramgk20a.o nouveau-y += core/subdev/fb/ramgm107.o +nouveau-y += core/subdev/fb/sddr2.o nouveau-y += core/subdev/fb/sddr3.o nouveau-y += core/subdev/fb/gddr5.o nouveau-y += core/subdev/gpio/base.o diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/priv.h b/drivers/gpu/drm/nouveau/core/subdev/fb/priv.h index 82273f8..60322e9 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/fb/priv.h +++ b/drivers/gpu/drm/nouveau/core/subdev/fb/priv.h @@ -35,6 +35,7 @@ extern struct nouveau_oclass nve0_ram_oclass; extern struct nouveau_oclass gk20a_ram_oclass; extern struct nouveau_oclass gm107_ram_oclass; +int nouveau_sddr2_calc(struct nouveau_ram *ram); int nouveau_sddr3_calc(struct nouveau_ram *ram); int nouveau_gddr5_calc(struct nouveau_ram *ram, bool nuts); diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/sddr2.c b/drivers/gpu/drm/nouveau/core/subdev/fb/sddr2.c new file mode 100644 index 0000000..f2f5f12 --- /dev/null +++ b/drivers/gpu/drm/nouveau/core/subdev/fb/sddr2.c @@ -0,0 +1,96 @@ +/* + * Copyright 2014 Roy Spliet + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Roy Spliet <rspliet at eclipso.eu> + * Ben Skeggs + */ + +#include <subdev/bios.h> +#include "priv.h" + +struct ramxlat { + int id; + u8 enc; +}; + +static inline int +ramxlat(const struct ramxlat *xlat, int id) +{ + while (xlat->id >= 0) { + if (xlat->id == id) + return xlat->enc; + xlat++; + } + return -EINVAL; +} + +static const struct ramxlat +ramddr2_cl[] = { + { 2, 2 }, { 3, 3 }, { 4, 4 }, { 5, 5 }, { 6, 6 }, + /* The following are available in some, but not all DDR2 docs */ + { 7, 7 }, + { -1 } +}; + +static const struct ramxlat +ramddr2_wr[] = { + { 2, 1 }, { 3, 2 }, { 4, 3 }, { 5, 4 }, { 6, 5 }, + /* The following are available in some, but not all DDR2 docs */ + { 7, 6 }, + { -1 } +}; + +int +nouveau_sddr2_calc(struct nouveau_ram *ram) +{ + struct nouveau_bios *bios = nouveau_bios(ram); + int CL, WR, DLL = 0, ODT = 0; + + switch (!!ram->timing.data * ram->timing.version) { + case 0x10: + CL = nv_ro08(bios, ram->timing.data + 0x02); + WR = nv_ro08(bios, ram->timing.data + 0x00); + DLL = !(nv_ro08(bios, ram->ramcfg.data + 0x02) & 0x40); + ODT = nv_ro08(bios, ram->timing.data + 0x0e) & 0x03; + break; + case 0x20: + CL = nv_ro08(bios, ram->timing.data + 0x04) & 0x1f; + WR = nv_ro08(bios, ram->timing.data + 0x0a) & 0x7f; + break; + default: + return -ENOSYS; + } + + CL = ramxlat(ramddr2_cl, CL); + WR = ramxlat(ramddr2_wr, WR); + if (CL < 0 || WR < 0) + return -EINVAL; + + ram->mr[0] &= ~0xe70; + ram->mr[0] |= (WR & 0x07) << 9; + ram->mr[0] |= (CL & 0x07) << 4; + + ram->mr[1] &= ~0x045; + ram->mr[1] |= (ODT & 0x1) << 2; + ram->mr[1] |= (ODT & 0x2) << 5; + ram->mr[1] |= !DLL; + return 0; +} -- 1.9.3
Ilia Mirkin
2014-Sep-04 15:08 UTC
[Nouveau] [PATCH 4/8] fb/ramnve0: Disable FB before reclocking
This should probably be folded into the previous patch to avoid breaking bisectability on nve0 On Thu, Sep 4, 2014 at 10:58 AM, Roy Spliet <rspliet at eclipso.eu> wrote:> This used to be done implicitly > > Signed-off-by: Roy Spliet <rspliet at eclipso.eu> > --- > drivers/gpu/drm/nouveau/core/subdev/fb/ramnve0.c | 5 +++++ > 1 file changed, 5 insertions(+) > > diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnve0.c b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnve0.c > index c5b46e3..9764792 100644 > --- a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnve0.c > +++ b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnve0.c > @@ -998,6 +998,8 @@ nve0_ram_calc_xits(struct nouveau_fb *pfb, struct nouveau_ram_data *next) > if (ret) > return ret; > > + ram_fb_disable(fuc); > + > ram->mode = (next->freq > fuc->refpll.vco1.max_freq) ? 2 : 1; > ram->from = ram_rd32(fuc, 0x1373f4) & 0x0000000f; > > @@ -1061,6 +1063,9 @@ nve0_ram_calc_xits(struct nouveau_fb *pfb, struct nouveau_ram_data *next) > break; > } > > + if (!ret) > + ram_fb_enable(fuc); > + > return ret; > } > > -- > 1.9.3 > > > > _______________________________________________ > Nouveau mailing list > Nouveau at lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/nouveau
Ben Skeggs
2014-Sep-05 22:49 UTC
[Nouveau] [PATCH 1/8] nv50/display: Set VBLANK time in modeset script
On Fri, Sep 5, 2014 at 12:58 AM, Roy Spliet <rspliet at eclipso.eu> wrote:> Solves blinking on reclocking memory. The value set is an underestimate, but with non-reduced vblanking this should give us plenty of timeHey Roy, I've merged all (squashed the kepler change into the commit where it's needed too) except this patch, which needs to be rebased on top of 3.17-rc. There's one minor nitpick inline too. Please also try and watch the wrapping of commit messages, I added newlines to make it easier to read them in git-log output. Thanks, Ben.> > Signed-off-by: Roy Spliet <rspliet at eclipso.eu> > --- > drivers/gpu/drm/nouveau/nv50_display.c | 12 +++++++++--- > 1 file changed, 9 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c > index 4c534b7..cfa7ecf 100644 > --- a/drivers/gpu/drm/nouveau/nv50_display.c > +++ b/drivers/gpu/drm/nouveau/nv50_display.c > @@ -1070,7 +1070,7 @@ nv50_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *umode, > u32 vscan = (mode->flags & DRM_MODE_FLAG_DBLSCAN) ? 2 : 1; > u32 hactive, hsynce, hbackp, hfrontp, hblanke, hblanks; > u32 vactive, vsynce, vbackp, vfrontp, vblanke, vblanks; > - u32 vblan2e = 0, vblan2s = 1; > + u32 vblan2e = 0, vblan2s = 1, vblankus; > u32 *push; > int ret; > > @@ -1087,6 +1087,7 @@ nv50_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *umode, > vblanke = vsynce + vbackp; > vfrontp = (mode->vsync_start - mode->vdisplay) * vscan / ilace; > vblanks = vactive - vfrontp - 1; > + > if (mode->flags & DRM_MODE_FLAG_INTERLACE) { > vblan2e = vactive + vsynce + vbackp; > vblan2s = vblan2e + (mode->vdisplay * vscan / ilace); > @@ -1100,17 +1101,22 @@ nv50_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *umode, > push = evo_wait(mast, 64); > if (push) { > if (nv50_vers(mast) < NVD0_DISP_MAST_CLASS) { > + /* XXX: Safe underestimate, even "0" works */ > + vblankus = (vactive - mode->vdisplay - 2) * hactive; > + vblankus *= 1000; > + vblankus /= mode->clock;When you rebase, can you move this calculation into the common code please :)> + > evo_mthd(push, 0x0804 + (nv_crtc->index * 0x400), 2); > evo_data(push, 0x00800000 | mode->clock); > evo_data(push, (ilace == 2) ? 2 : 0); > - evo_mthd(push, 0x0810 + (nv_crtc->index * 0x400), 6); > + evo_mthd(push, 0x0810 + (nv_crtc->index * 0x400), 8); > evo_data(push, 0x00000000); > evo_data(push, (vactive << 16) | hactive); > evo_data(push, ( vsynce << 16) | hsynce); > evo_data(push, (vblanke << 16) | hblanke); > evo_data(push, (vblanks << 16) | hblanks); > evo_data(push, (vblan2e << 16) | vblan2s); > - evo_mthd(push, 0x082c + (nv_crtc->index * 0x400), 1); > + evo_data(push, vblankus); > evo_data(push, 0x00000000); > evo_mthd(push, 0x0900 + (nv_crtc->index * 0x400), 2); > evo_data(push, 0x00000311); > -- > 1.9.3 > > >
Apparently Analagous Threads
- [PATCH 4/8] fb/ramnve0: Disable FB before reclocking
- MEMX improvements + DDR 2/3 MR generation
- NVA3: Small misc mem reclocking fixes
- [PATCH] drm/nouveau/fb: Prevent inlining of ramfuc_reg
- [PATCH 1/2] fb: default NvMemExec to on, turning it off is used for debugging only