Francisco Jerez
2009-Nov-23 13:45 UTC
[Nouveau] [PATCH 1/3] drm/nouveau: Update the CRTC arbitration parameters on FB depth switch.
Signed-off-by: Francisco Jerez <currojerez at riseup.net> --- drivers/gpu/drm/nouveau/nv04_crtc.c | 37 +++++++++++++++++++++------------- 1 files changed, 23 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nv04_crtc.c b/drivers/gpu/drm/nouveau/nv04_crtc.c index 2ab9f30..0a5cfc1 100644 --- a/drivers/gpu/drm/nouveau/nv04_crtc.c +++ b/drivers/gpu/drm/nouveau/nv04_crtc.c @@ -106,10 +106,8 @@ static void nv_crtc_calc_state_ext(struct drm_crtc *crtc, struct drm_display_mod struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc); struct nv04_mode_state *state = &dev_priv->mode_reg; struct nv04_crtc_reg *regp = &state->crtc_reg[nv_crtc->index]; - struct drm_framebuffer *fb = crtc->fb; struct nouveau_pll_vals *pv = ®p->pllvals; struct pll_lims pll_lim; - int vclk, arb_burst, arb_fifo_lwm; if (get_pll_limits(dev, nv_crtc->index ? VPLL2 : VPLL1, &pll_lim)) return; @@ -130,8 +128,7 @@ static void nv_crtc_calc_state_ext(struct drm_crtc *crtc, struct drm_display_mod if (dev_priv->chipset > 0x40 && dot_clock <= (pll_lim.vco1.maxfreq / 2)) memset(&pll_lim.vco2, 0, sizeof(pll_lim.vco2)); - vclk = nouveau_calc_pll_mnp(dev, &pll_lim, dot_clock, pv); - if (!vclk) + if (!nouveau_calc_pll_mnp(dev, &pll_lim, dot_clock, pv)) return; state->pllsel &= PLLSEL_VPLL1_MASK | PLLSEL_VPLL2_MASK | PLLSEL_TV_MASK; @@ -152,13 +149,6 @@ static void nv_crtc_calc_state_ext(struct drm_crtc *crtc, struct drm_display_mod NV_TRACE(dev, "vpll: n %d m %d log2p %d\n", pv->N1, pv->M1, pv->log2P); - nouveau_calc_arb(dev, vclk, fb->bits_per_pixel, &arb_burst, &arb_fifo_lwm); - - regp->CRTC[NV_CIO_CRE_FF_INDEX] = arb_burst; - regp->CRTC[NV_CIO_CRE_FFLWM__INDEX] = arb_fifo_lwm & 0xff; - if (nv_arch(dev) >= NV_30) - regp->CRTC[NV_CIO_CRE_47] = arb_fifo_lwm >> 8; - nv_crtc->cursor.set_offset(nv_crtc, nv_crtc->cursor.offset); } @@ -775,10 +765,12 @@ nv04_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y, struct drm_framebuffer *old_fb) { struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc); - struct drm_nouveau_private *dev_priv = crtc->dev->dev_private; + struct drm_device *dev = crtc->dev; + struct drm_nouveau_private *dev_priv = dev->dev_private; struct nv04_crtc_reg *regp = &dev_priv->mode_reg.crtc_reg[nv_crtc->index]; struct drm_framebuffer *drm_fb = nv_crtc->base.fb; struct nouveau_framebuffer *fb = nouveau_framebuffer(drm_fb); + int arb_burst, arb_lwm; int ret; ret = nouveau_bo_pin(fb->nvbo, TTM_PL_FLAG_VRAM); @@ -797,13 +789,14 @@ nv04_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y, nv_crtc_gamma_load(crtc); } + /* Update the framebuffer format. */ regp->CRTC[NV_CIO_CRE_PIXEL_INDEX] &= ~3; regp->CRTC[NV_CIO_CRE_PIXEL_INDEX] |= (crtc->fb->depth + 1) / 8; regp->ramdac_gen_ctrl &= ~NV_PRAMDAC_GENERAL_CONTROL_ALT_MODE_SEL; if (crtc->fb->depth == 16) regp->ramdac_gen_ctrl |= NV_PRAMDAC_GENERAL_CONTROL_ALT_MODE_SEL; crtc_wr_cio_state(crtc, regp, NV_CIO_CRE_PIXEL_INDEX); - NVWriteRAMDAC(crtc->dev, nv_crtc->index, NV_PRAMDAC_GENERAL_CONTROL, + NVWriteRAMDAC(dev, nv_crtc->index, NV_PRAMDAC_GENERAL_CONTROL, regp->ramdac_gen_ctrl); regp->CRTC[NV_CIO_CR_OFFSET_INDEX] = drm_fb->pitch >> 3; @@ -812,9 +805,25 @@ nv04_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y, crtc_wr_cio_state(crtc, regp, NV_CIO_CRE_RPC0_INDEX); crtc_wr_cio_state(crtc, regp, NV_CIO_CR_OFFSET_INDEX); + /* Update the framebuffer location. */ regp->fb_start = nv_crtc->fb.offset & ~3; regp->fb_start += (y * drm_fb->pitch) + (x * drm_fb->bits_per_pixel / 8); - NVWriteCRTC(crtc->dev, nv_crtc->index, NV_PCRTC_START, regp->fb_start); + NVWriteCRTC(dev, nv_crtc->index, NV_PCRTC_START, regp->fb_start); + + /* Update the arbitration parameters. */ + nouveau_calc_arb(dev, crtc->mode.clock, drm_fb->bits_per_pixel, + &arb_burst, &arb_lwm); + + regp->CRTC[NV_CIO_CRE_FF_INDEX] = arb_burst; + regp->CRTC[NV_CIO_CRE_FFLWM__INDEX] = arb_lwm & 0xff; + crtc_wr_cio_state(crtc, regp, NV_CIO_CRE_FF_INDEX); + crtc_wr_cio_state(crtc, regp, NV_CIO_CRE_FFLWM__INDEX); + + if (nv_arch(dev) >= NV_30) { + regp->CRTC[NV_CIO_CRE_47] = arb_lwm >> 8; + crtc_wr_cio_state(crtc, regp, NV_CIO_CRE_47); + } + return 0; } -- 1.6.4.4
Francisco Jerez
2009-Nov-23 13:45 UTC
[Nouveau] [PATCH 2/3] drm/nouveau: Clean up the arbitration parameters calculation code.
Signed-off-by: Francisco Jerez <currojerez at riseup.net> --- drivers/gpu/drm/nouveau/nouveau_calc.c | 248 +++++++++----------------------- 1 files changed, 69 insertions(+), 179 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_calc.c b/drivers/gpu/drm/nouveau/nouveau_calc.c index 3f80db8..5d48274 100644 --- a/drivers/gpu/drm/nouveau/nouveau_calc.c +++ b/drivers/gpu/drm/nouveau/nouveau_calc.c @@ -34,35 +34,31 @@ \****************************************************************************/ struct nv_fifo_info { - int graphics_lwm; - int video_lwm; - int graphics_burst_size; - int video_burst_size; - bool valid; + int lwm; + int burst; }; struct nv_sim_state { int pclk_khz; int mclk_khz; int nvclk_khz; - int pix_bpp; - bool enable_mp; - bool enable_video; + int bpp; int mem_page_miss; int mem_latency; int memory_type; int memory_width; + int two_heads; }; static void -nv4CalcArbitration(struct nv_fifo_info *fifo, struct nv_sim_state *arb) +nv04_calc_arb(struct nv_fifo_info *fifo, struct nv_sim_state *arb) { - int pagemiss, cas, width, video_enable, bpp; - int nvclks, mclks, pclks, vpagemiss, crtpagemiss, vbs; + int pagemiss, cas, width, bpp; + int nvclks, mclks, pclks, crtpagemiss; int found, mclk_extra, mclk_loop, cbs, m1, p1; - int mclk_freq, pclk_freq, nvclk_freq, mp_enable; - int us_m, us_n, us_p, video_drain_rate, crtc_drain_rate; - int vpm_us, us_video, vlwm, video_fill_us, cpm_us, us_crt, clwm; + int mclk_freq, pclk_freq, nvclk_freq; + int us_m, us_n, us_p, crtc_drain_rate; + int cpm_us, us_crt, clwm; pclk_freq = arb->pclk_khz; mclk_freq = arb->mclk_khz; @@ -70,107 +66,53 @@ nv4CalcArbitration(struct nv_fifo_info *fifo, struct nv_sim_state *arb) pagemiss = arb->mem_page_miss; cas = arb->mem_latency; width = arb->memory_width >> 6; - video_enable = arb->enable_video; - bpp = arb->pix_bpp; - mp_enable = arb->enable_mp; - clwm = 0; - vlwm = 0; + bpp = arb->bpp; cbs = 128; + pclks = 2; - nvclks = 2; - nvclks += 2; - nvclks += 1; - mclks = 5; - mclks += 3; - mclks += 1; - mclks += cas; - mclks += 1; - mclks += 1; - mclks += 1; - mclks += 1; + nvclks = 10; + mclks = 13 + cas; mclk_extra = 3; - nvclks += 2; - nvclks += 1; - nvclks += 1; - nvclks += 1; - if (mp_enable) - mclks += 4; - nvclks += 0; - pclks += 0; found = 0; - vbs = 0; - while (found != 1) { - fifo->valid = true; + + while (!found) { found = 1; + mclk_loop = mclks + mclk_extra; us_m = mclk_loop * 1000 * 1000 / mclk_freq; us_n = nvclks * 1000 * 1000 / nvclk_freq; us_p = nvclks * 1000 * 1000 / pclk_freq; - if (video_enable) { - video_drain_rate = pclk_freq * 2; - crtc_drain_rate = pclk_freq * bpp / 8; - vpagemiss = 2; - vpagemiss += 1; - crtpagemiss = 2; - vpm_us = vpagemiss * pagemiss * 1000 * 1000 / mclk_freq; - if (nvclk_freq * 2 > mclk_freq * width) - video_fill_us = cbs * 1000 * 1000 / 16 / nvclk_freq; - else - video_fill_us = cbs * 1000 * 1000 / (8 * width) / mclk_freq; - us_video = vpm_us + us_m + us_n + us_p + video_fill_us; - vlwm = us_video * video_drain_rate / (1000 * 1000); - vlwm++; - vbs = 128; - if (vlwm > 128) - vbs = 64; - if (vlwm > (256 - 64)) - vbs = 32; - if (nvclk_freq * 2 > mclk_freq * width) - video_fill_us = vbs * 1000 * 1000 / 16 / nvclk_freq; - else - video_fill_us = vbs * 1000 * 1000 / (8 * width) / mclk_freq; - cpm_us = crtpagemiss * pagemiss * 1000 * 1000 / mclk_freq; - us_crt = us_video + video_fill_us + cpm_us + us_m + us_n + us_p; - clwm = us_crt * crtc_drain_rate / (1000 * 1000); - clwm++; - } else { - crtc_drain_rate = pclk_freq * bpp / 8; - crtpagemiss = 2; - crtpagemiss += 1; - cpm_us = crtpagemiss * pagemiss * 1000 * 1000 / mclk_freq; - us_crt = cpm_us + us_m + us_n + us_p; - clwm = us_crt * crtc_drain_rate / (1000 * 1000); - clwm++; - } + + crtc_drain_rate = pclk_freq * bpp / 8; + crtpagemiss = 2; + crtpagemiss += 1; + cpm_us = crtpagemiss * pagemiss * 1000 * 1000 / mclk_freq; + us_crt = cpm_us + us_m + us_n + us_p; + clwm = us_crt * crtc_drain_rate / (1000 * 1000); + clwm++; + m1 = clwm + cbs - 512; p1 = m1 * pclk_freq / mclk_freq; p1 = p1 * bpp / 8; - if ((p1 < m1 && m1 > 0) || - (video_enable && (clwm > 511 || vlwm > 255)) || - (!video_enable && clwm > 519)) { - fifo->valid = false; + if ((p1 < m1 && m1 > 0) || clwm > 519) { found = !mclk_extra; mclk_extra--; } if (clwm < 384) clwm = 384; - if (vlwm < 128) - vlwm = 128; - fifo->graphics_lwm = clwm; - fifo->graphics_burst_size = 128; - fifo->video_lwm = vlwm + 15; - fifo->video_burst_size = vbs; + + fifo->lwm = clwm; + fifo->burst = cbs; } } static void -nv10CalcArbitration(struct nv_fifo_info *fifo, struct nv_sim_state *arb) +nv10_calc_arb(struct nv_fifo_info *fifo, struct nv_sim_state *arb) { - int pagemiss, width, video_enable, bpp; + int pagemiss, width, bpp; int nvclks, mclks, pclks, vpagemiss, crtpagemiss; - int nvclk_fill; int found, mclk_extra, mclk_loop, cbs, m1; - int mclk_freq, pclk_freq, nvclk_freq, mp_enable; + int mclk_freq, pclk_freq, nvclk_freq; int us_m, us_m_min, us_n, us_p, crtc_drain_rate; int vus_m; int vpm_us, us_video, cpm_us, us_crt, clwm; @@ -184,9 +126,7 @@ nv10CalcArbitration(struct nv_fifo_info *fifo, struct nv_sim_state *arb) nvclk_freq = arb->nvclk_khz; pagemiss = arb->mem_page_miss; width = arb->memory_width / 64; - video_enable = arb->enable_video; - bpp = arb->pix_bpp; - mp_enable = arb->enable_mp; + bpp = arb->bpp; clwm = 0; cbs = 512; pclks = 4; /* lwm detect. */ @@ -210,29 +150,20 @@ nv10CalcArbitration(struct nv_fifo_info *fifo, struct nv_sim_state *arb) else mclks += 1; - if (!video_enable && arb->memory_width == 128) { - mclk_extra = (bpp == 32) ? 31 : 42; /* Margin of error */ - min_mclk_extra = 17; - } else { - mclk_extra = (bpp == 32) ? 8 : 4; /* Margin of error */ - /* mclk_extra = 4; *//* Margin of error */ - min_mclk_extra = 18; - } + mclk_extra = (bpp == 32) ? 8 : 4; /* Margin of error */ + min_mclk_extra = 18; nvclks += 1; /* 2 edge sync. may be very close to edge so just put one. */ nvclks += 1; /* fbi_d_rdv_n */ nvclks += 1; /* Fbi_d_rdata */ nvclks += 1; /* crtfifo load */ - if (mp_enable) - mclks += 4; /* Mp can get in with a burst of 8. */ /* Extra clocks determined by heuristics */ nvclks += 0; pclks += 0; found = 0; while (found != 1) { - fifo->valid = true; found = 1; mclk_loop = mclks + mclk_extra; us_m = mclk_loop * 1000 * 1000 / mclk_freq; /* Mclk latency in us */ @@ -244,50 +175,24 @@ nv10CalcArbitration(struct nv_fifo_info *fifo, struct nv_sim_state *arb) vus_m = mclk_loop * 1000 * 1000 / mclk_freq; /* Mclk latency in us */ - if (video_enable) { - crtc_drain_rate = pclk_freq * bpp / 8; /* MB/s */ - - vpagemiss = 1; /* self generating page miss */ - vpagemiss += 1; /* One higher priority before */ - - crtpagemiss = 2; /* self generating page miss */ - if (mp_enable) - crtpagemiss += 1; /* if MA0 conflict */ - - vpm_us = vpagemiss * pagemiss * 1000 * 1000 / mclk_freq; - - us_video = vpm_us + vus_m; /* Video has separate read return path */ - - cpm_us = crtpagemiss * pagemiss * 1000 * 1000 / mclk_freq; - us_crt = us_video /* Wait for video */ - + cpm_us /* CRT Page miss */ - + us_m + us_n + us_p; /* other latency */ - - clwm = us_crt * crtc_drain_rate / (1000 * 1000); - clwm++; /* fixed point <= float_point - 1. Fixes that */ - } else { - crtc_drain_rate = pclk_freq * bpp / 8; /* bpp * pclk/8 */ - - crtpagemiss = 1; /* self generating page miss */ - crtpagemiss += 1; /* MA0 page miss */ - if (mp_enable) - crtpagemiss += 1; /* if MA0 conflict */ - cpm_us = crtpagemiss * pagemiss * 1000 * 1000 / mclk_freq; - us_crt = cpm_us + us_m + us_n + us_p; - clwm = us_crt * crtc_drain_rate / (1000 * 1000); - clwm++; /* fixed point <= float_point - 1. Fixes that */ - - /* Finally, a heuristic check when width == 64 bits */ - if (width == 1) { - nvclk_fill = nvclk_freq * 8; - if (crtc_drain_rate * 100 >= nvclk_fill * 102) - clwm = 0xfff; /* Large number to fail */ - else if (crtc_drain_rate * 100 >= nvclk_fill * 98) { - clwm = 1024; - cbs = 512; - } - } - } + crtc_drain_rate = pclk_freq * bpp / 8; /* MB/s */ + + vpagemiss = 1; /* self generating page miss */ + vpagemiss += 1; /* One higher priority before */ + + crtpagemiss = 2; /* self generating page miss */ + + vpm_us = vpagemiss * pagemiss * 1000 * 1000 / mclk_freq; + + us_video = vpm_us + vus_m; /* Video has separate read return path */ + + cpm_us = crtpagemiss * pagemiss * 1000 * 1000 / mclk_freq; + us_crt = us_video /* Wait for video */ + + cpm_us /* CRT Page miss */ + + us_m + us_n + us_p; /* other latency */ + + clwm = us_crt * crtc_drain_rate / (1000 * 1000); + clwm++; /* fixed point <= float_point - 1. Fixes that */ /* * Overfill check: @@ -305,7 +210,6 @@ nv10CalcArbitration(struct nv_fifo_info *fifo, struct nv_sim_state *arb) p2 = p1clk * bpp / 8; /* bytes drained. */ if (p2 < m1 && m1 > 0) { - fifo->valid = false; found = 0; if (min_mclk_extra == 0) { if (cbs <= 32) @@ -315,7 +219,6 @@ nv10CalcArbitration(struct nv_fifo_info *fifo, struct nv_sim_state *arb) } else min_mclk_extra--; } else if (clwm > 1023) { /* Have some margin */ - fifo->valid = false; found = 0; if (min_mclk_extra == 0) found = 1; /* Can't adjust anymore! */ @@ -330,17 +233,14 @@ nv10CalcArbitration(struct nv_fifo_info *fifo, struct nv_sim_state *arb) clwm = min_clwm; /* printf("CRT LWM: prog: 0x%x, bs: 256\n", clwm); */ - fifo->graphics_lwm = clwm; - fifo->graphics_burst_size = cbs; - - fifo->video_lwm = 1024; - fifo->video_burst_size = 512; + fifo->lwm = clwm; + fifo->burst = cbs; } } static void -nv4_10UpdateArbitrationSettings(struct drm_device *dev, int VClk, int bpp, - int *burst, int *lwm) +nv04_update_arb(struct drm_device *dev, int VClk, int bpp, + int *burst, int *lwm) { struct nv_fifo_info fifo_data; struct nv_sim_state sim_data; @@ -351,21 +251,19 @@ nv4_10UpdateArbitrationSettings(struct drm_device *dev, int VClk, int bpp, sim_data.pclk_khz = VClk; sim_data.mclk_khz = MClk; sim_data.nvclk_khz = NVClk; - sim_data.pix_bpp = bpp; - sim_data.enable_mp = false; + sim_data.bpp = bpp; + sim_data.two_heads = nv_two_heads(dev); if ((dev->pci_device & 0xffff) == 0x01a0 /*CHIPSET_NFORCE*/ || (dev->pci_device & 0xffff) == 0x01f0 /*CHIPSET_NFORCE2*/) { uint32_t type; pci_read_config_dword(pci_get_bus_and_slot(0, 1), 0x7c, &type); - sim_data.enable_video = false; sim_data.memory_type = (type >> 12) & 1; sim_data.memory_width = 64; sim_data.mem_latency = 3; sim_data.mem_page_miss = 10; } else { - sim_data.enable_video = (nv_arch(dev) != NV_04); sim_data.memory_type = nvReadFB(dev, NV_PFB_CFG0) & 0x1; sim_data.memory_width = (nvReadEXTDEV(dev, NV_PEXTDEV_BOOT_0) & 0x10) ? 128 : 64; sim_data.mem_latency = cfg1 & 0xf; @@ -373,21 +271,16 @@ nv4_10UpdateArbitrationSettings(struct drm_device *dev, int VClk, int bpp, } if (nv_arch(dev) == NV_04) - nv4CalcArbitration(&fifo_data, &sim_data); + nv04_calc_arb(&fifo_data, &sim_data); else - nv10CalcArbitration(&fifo_data, &sim_data); - - if (fifo_data.valid) { - int b = fifo_data.graphics_burst_size >> 4; - *burst = 0; - while (b >>= 1) - (*burst)++; - *lwm = fifo_data.graphics_lwm >> 3; - } + nv10_calc_arb(&fifo_data, &sim_data); + + *burst = ilog2(fifo_data.burst >> 4); + *lwm = fifo_data.lwm >> 3; } static void -nv30UpdateArbitrationSettings(int *burst, int *lwm) +nv30_update_arb(int *burst, int *lwm) { unsigned int fifo_size, burst_size, graphics_lwm; @@ -395,10 +288,7 @@ nv30UpdateArbitrationSettings(int *burst, int *lwm) burst_size = 512; graphics_lwm = fifo_size - burst_size; - *burst = 0; - burst_size >>= 5; - while (burst_size >>= 1) - (*burst)++; + *burst = ilog2(burst_size >> 5); *lwm = graphics_lwm >> 3; } @@ -406,13 +296,13 @@ void nouveau_calc_arb(struct drm_device *dev, int vclk, int bpp, int *burst, int *lwm) { if (nv_arch(dev) < NV_30) - nv4_10UpdateArbitrationSettings(dev, vclk, bpp, burst, lwm); + nv04_update_arb(dev, vclk, bpp, burst, lwm); else if ((dev->pci_device & 0xfff0) == 0x0240 /*CHIPSET_C51*/ || (dev->pci_device & 0xfff0) == 0x03d0 /*CHIPSET_C512*/) { *burst = 128; *lwm = 0x0480; } else - nv30UpdateArbitrationSettings(burst, lwm); + nv30_update_arb(burst, lwm); } static int -- 1.6.4.4
Francisco Jerez
2009-Nov-23 13:45 UTC
[Nouveau] [PATCH 3/3] drm/nv10-nv20: CRTC arbitration code rewrite.
The previous bandwidth calculation code was nv legacy and it had some issues besides being obfuscated: * It assumed a single-head setup (I workarounded this on 5603fe7f, however it triggered bug 24820). * It could lead to unnecessarily conservative settings, because it assumed a 1kB FIFO size limit (like nv10/nv15, but not nv11/nv17). * It sometimes set unacceptably large FIFO burst values, screwing latency and causing some overlay corruption (bug 11993). Signed-off-by: Francisco Jerez <currojerez at riseup.net> --- drivers/gpu/drm/nouveau/nouveau_calc.c | 182 ++++++++++++-------------------- 1 files changed, 67 insertions(+), 115 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_calc.c b/drivers/gpu/drm/nouveau/nouveau_calc.c index 5d48274..1f85715 100644 --- a/drivers/gpu/drm/nouveau/nouveau_calc.c +++ b/drivers/gpu/drm/nouveau/nouveau_calc.c @@ -109,133 +109,85 @@ nv04_calc_arb(struct nv_fifo_info *fifo, struct nv_sim_state *arb) static void nv10_calc_arb(struct nv_fifo_info *fifo, struct nv_sim_state *arb) { - int pagemiss, width, bpp; - int nvclks, mclks, pclks, vpagemiss, crtpagemiss; - int found, mclk_extra, mclk_loop, cbs, m1; - int mclk_freq, pclk_freq, nvclk_freq; - int us_m, us_m_min, us_n, us_p, crtc_drain_rate; - int vus_m; - int vpm_us, us_video, cpm_us, us_crt, clwm; - int clwm_rnd_down, min_clwm; - int m2us, us_pipe_min, p1clk, p2; - int min_mclk_extra; - int us_min_mclk_extra; - - pclk_freq = arb->pclk_khz; /* freq in KHz */ - mclk_freq = arb->mclk_khz; + int fill_rate, drain_rate; + int pclks, nvclks, mclks, xclks; + int pclk_freq, nvclk_freq, mclk_freq; + int fill_lat, extra_lat; + int max_burst_o, max_burst_l, burst; + int fifo_len, min_lwm, max_lwm; + const int burst_lat = 80; /* Maximum allowable latency due + * to the CRTC FIFO burst. (ns) */ + + pclk_freq = arb->pclk_khz; nvclk_freq = arb->nvclk_khz; - pagemiss = arb->mem_page_miss; - width = arb->memory_width / 64; - bpp = arb->bpp; - clwm = 0; - cbs = 512; + mclk_freq = arb->mclk_khz; + + fill_rate = mclk_freq * arb->memory_width / 8; /* kB/s */ + drain_rate = pclk_freq * arb->bpp / 8; /* kB/s */ + + fifo_len = arb->two_heads ? 1536 : 1024; /* B */ + + /* Fixed FIFO refill latency. */ + pclks = 4; /* lwm detect. */ - nvclks = 3; /* lwm -> sync. */ - nvclks += 2; /* fbi bus cycles (1 req + 1 busy) */ - mclks = 1; /* 2 edge sync. may be very close to edge so just put one. */ - mclks += 1; /* arb_hp_req */ - mclks += 5; /* ap_hp_req tiling pipeline */ - mclks += 2; /* tc_req latency fifo */ - mclks += 2; /* fb_cas_n_ memory request to fbio block */ - mclks += 7; /* sm_d_rdv data returned from fbio block */ - - /* fb.rd.d.Put_gc need to accumulate 256 bits for read */ - if (arb->memory_type == 0) { - if (arb->memory_width == 64) /* 64 bit bus */ - mclks += 4; - else - mclks += 2; - } else if (arb->memory_width == 64) /* 64 bit bus */ - mclks += 2; - else - mclks += 1; - mclk_extra = (bpp == 32) ? 8 : 4; /* Margin of error */ - min_mclk_extra = 18; + nvclks = 3 /* lwm -> sync. */ + + 2 /* fbi bus cycles (1 req + 1 busy) */ + + 1 /* 2 edge sync. may be very close to edge so + * just put one. */ + + 1 /* fbi_d_rdv_n */ + + 1 /* Fbi_d_rdata */ + + 1; /* crtfifo load */ - nvclks += 1; /* 2 edge sync. may be very close to edge so just put one. */ - nvclks += 1; /* fbi_d_rdv_n */ - nvclks += 1; /* Fbi_d_rdata */ - nvclks += 1; /* crtfifo load */ + mclks = 1 /* 2 edge sync. may be very close to edge so + * just put one. */ + + 1 /* arb_hp_req */ + + 5 /* tiling pipeline */ + + 2 /* latency fifo */ + + 2 /* memory request to fbio block */ + + 7; /* data returned from fbio block */ - /* Extra clocks determined by heuristics */ + /* Need to accumulate 256 bits for read */ + mclks += (arb->memory_type == 0 ? 2 : 1) + * arb->memory_width / 32; - nvclks += 0; - pclks += 0; - found = 0; - while (found != 1) { - found = 1; - mclk_loop = mclks + mclk_extra; - us_m = mclk_loop * 1000 * 1000 / mclk_freq; /* Mclk latency in us */ - us_m_min = mclks * 1000 * 1000 / mclk_freq; /* Minimum Mclk latency in us */ - us_min_mclk_extra = min_mclk_extra * 1000 * 1000 / mclk_freq; - us_n = nvclks * 1000 * 1000 / nvclk_freq; /* nvclk latency in us */ - us_p = pclks * 1000 * 1000 / pclk_freq; /* nvclk latency in us */ - us_pipe_min = us_m_min + us_n + us_p; + fill_lat = mclks * 1000 * 1000 / mclk_freq /* minimum mclk latency */ + + nvclks * 1000 * 1000 / nvclk_freq /* nvclk latency */ + + pclks * 1000 * 1000 / pclk_freq; /* pclk latency */ - vus_m = mclk_loop * 1000 * 1000 / mclk_freq; /* Mclk latency in us */ + /* Conditional FIFO refill latency. */ - crtc_drain_rate = pclk_freq * bpp / 8; /* MB/s */ + xclks = 2 * arb->mem_page_miss + mclks /* Extra latency due to + * the overlay. */ + + 2 * arb->mem_page_miss /* Extra pagemiss latency. */ + + (arb->bpp == 32 ? 8 : 4); /* Margin of error. */ - vpagemiss = 1; /* self generating page miss */ - vpagemiss += 1; /* One higher priority before */ + extra_lat = xclks * 1000 * 1000 / mclk_freq; - crtpagemiss = 2; /* self generating page miss */ + if (arb->two_heads) + /* Account for another CRTC. */ + extra_lat += fill_lat + extra_lat + burst_lat; - vpm_us = vpagemiss * pagemiss * 1000 * 1000 / mclk_freq; + /* FIFO burst */ - us_video = vpm_us + vus_m; /* Video has separate read return path */ + /* Max burst not leading to overflows. */ + max_burst_o = (1 + fifo_len - extra_lat * drain_rate / (1000 * 1000)) + * (fill_rate / 1000) / ((fill_rate - drain_rate) / 1000); - cpm_us = crtpagemiss * pagemiss * 1000 * 1000 / mclk_freq; - us_crt = us_video /* Wait for video */ - + cpm_us /* CRT Page miss */ - + us_m + us_n + us_p; /* other latency */ + /* Max burst value with an acceptable latency. */ + max_burst_l = burst_lat * fill_rate / (1000 * 1000); - clwm = us_crt * crtc_drain_rate / (1000 * 1000); - clwm++; /* fixed point <= float_point - 1. Fixes that */ - - /* - * Overfill check: - */ - - clwm_rnd_down = (clwm / 8) * 8; - if (clwm_rnd_down < clwm) - clwm += 8; - - m1 = clwm + cbs - 1024; /* Amount of overfill */ - m2us = us_pipe_min + us_min_mclk_extra; - - /* pclk cycles to drain */ - p1clk = m2us * pclk_freq / (1000 * 1000); - p2 = p1clk * bpp / 8; /* bytes drained. */ - - if (p2 < m1 && m1 > 0) { - found = 0; - if (min_mclk_extra == 0) { - if (cbs <= 32) - found = 1; /* Can't adjust anymore! */ - else - cbs = cbs / 2; /* reduce the burst size */ - } else - min_mclk_extra--; - } else if (clwm > 1023) { /* Have some margin */ - found = 0; - if (min_mclk_extra == 0) - found = 1; /* Can't adjust anymore! */ - else - min_mclk_extra--; - } + fifo->burst = burst = rounddown_pow_of_two( + min(max_burst_l, min(max_burst_o, 1024))); - /* This correction works around a slight snow effect - * when the TV and VGA outputs are enabled simultaneously. */ - min_clwm = 1024 - cbs + 128 * pclk_freq / 100000; - if (clwm < min_clwm) - clwm = min_clwm; + /* FIFO low watermark */ - /* printf("CRT LWM: prog: 0x%x, bs: 256\n", clwm); */ - fifo->lwm = clwm; - fifo->burst = cbs; - } + min_lwm = (fill_lat + extra_lat) * drain_rate / (1000 * 1000) + 1; + max_lwm = fifo_len - burst + + fill_lat * drain_rate / (1000 * 1000) + + burst * drain_rate / fill_rate; + + fifo->lwm = min_lwm + 5 * (max_lwm - min_lwm) / 100; /* Empirical. */ } static void -- 1.6.4.4
Reasonably Related Threads
- [PATCH] drm/nouveau: remove set but unused variable.
- [PATCH 0/2] drm/nouveau: remove some set but not used variables
- [PATCH] drm: nouveau: remove a redundant local variable 'pclks'
- [PATCH 1/6] drm/i2c/ch7006: Fix some sparse warnings.
- [PATCHv2 1/6] drm/i2c/ch7006: Fix some sparse warnings.